New upstream version 1.0.0+dfsg

author: Picca Frédéric-Emmanuel <picca@debian.org> 2022-02-02 14:19:58 +0100
committer: Picca Frédéric-Emmanuel <picca@debian.org> 2022-02-02 14:19:58 +0100
commit: 4e774db12d5ebe7a20eded6dd434a289e27999e5 (patch)
tree: a9822974ba45196f1e3740995ab157d6eb214a04 /src/silx/opencl/test
parent: d3194b1a9c4404ba93afac43d97172ab24c57098 (diff)
13 files changed, 2216 insertions, 0 deletions
diff --git a/src/silx/opencl/test/__init__.py b/src/silx/opencl/test/__init__.py
new file mode 100644
index 0000000..92cda4a
--- /dev/null
+++ b/src/silx/opencl/test/__init__.py
@@ -0,0 +1,23 @@
+# -*- coding: utf-8 -*-
+#
+#    Project: silx
+#             https://github.com/silx-kit/silx
+#
+#    Copyright (C) 2012-2016  European Synchrotron Radiation Facility, Grenoble, France
+# Permission is hereby granted, free of charge, to any person obtaining a copy
+# of this software and associated documentation files (the "Software"), to deal
+# in the Software without restriction, including without limitation the rights
+# to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
+# copies of the Software, and to permit persons to whom the Software is
+# furnished to do so, subject to the following conditions:
+#
+# The above copyright notice and this permission notice shall be included in
+# all copies or substantial portions of the Software.
+#
+# THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+# IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+# FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
+# AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+# LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
+# OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN
+# THE SOFTWARE.
diff --git a/src/silx/opencl/test/test_addition.py b/src/silx/opencl/test/test_addition.py
new file mode 100644
index 0000000..3b668bf
--- /dev/null
+++ b/src/silx/opencl/test/test_addition.py
@@ -0,0 +1,140 @@
+#!/usr/bin/env python
+# -*- coding: utf-8 -*-
+#
+#    Project: Sift implementation in Python + OpenCL
+#             https://github.com/silx-kit/silx
+#
+# Permission is hereby granted, free of charge, to any person
+# obtaining a copy of this software and associated documentation
+# files (the "Software"), to deal in the Software without
+# restriction, including without limitation the rights to use,
+# copy, modify, merge, publish, distribute, sublicense, and/or sell
+# copies of the Software, and to permit persons to whom the
+# Software is furnished to do so, subject to the following
+# conditions:
+#
+# The above copyright notice and this permission notice shall be
+# included in all copies or substantial portions of the Software.
+#
+# THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
+# EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES
+# OF MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
+# NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT
+# HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY,
+# WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
+# FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR
+# OTHER DEALINGS IN THE SOFTWARE.
+
+"""
+Simple test of an addition
+"""
+
+__authors__ = ["Henri Payno, Jérôme Kieffer"]
+__contact__ = "jerome.kieffer@esrf.eu"
+__license__ = "MIT"
+__copyright__ = "2013 European Synchrotron Radiation Facility, Grenoble, France"
+__date__ = "30/11/2020"
+
+import logging
+import numpy
+import pytest
+
+import unittest
+from ..common import ocl, _measure_workgroup_size, query_kernel_info
+if ocl:
+    import pyopencl
+    import pyopencl.array
+from ..utils import get_opencl_code
+logger = logging.getLogger(__name__)
+
+
+@unittest.skipUnless(ocl, "PyOpenCl is missing")
+class TestAddition(unittest.TestCase):
+
+    @classmethod
+    def setUpClass(cls):
+        super(TestAddition, cls).setUpClass()
+        if ocl:
+            cls.ctx = ocl.create_context()
+            if logger.getEffectiveLevel() <= logging.INFO:
+                cls.PROFILE = True
+                cls.queue = pyopencl.CommandQueue(
+                                cls.ctx,
+                                properties=pyopencl.command_queue_properties.PROFILING_ENABLE)
+            else:
+                cls.PROFILE = False
+                cls.queue = pyopencl.CommandQueue(cls.ctx)
+            cls.max_valid_wg = 0
+
+    @classmethod
+    def tearDownClass(cls):
+        super(TestAddition, cls).tearDownClass()
+        print("Maximum valid workgroup size %s on device %s" % (cls.max_valid_wg, cls.ctx.devices[0]))
+        cls.ctx = None
+        cls.queue = None
+
+    def setUp(self):
+        if ocl is None:
+            return
+        self.shape = 4096
+        self.data = numpy.random.random(self.shape).astype(numpy.float32)
+        self.d_array_img = pyopencl.array.to_device(self.queue, self.data)
+        self.d_array_5 = pyopencl.array.empty_like(self.d_array_img)
+        self.d_array_5.fill(-5)
+        self.program = pyopencl.Program(self.ctx, get_opencl_code("addition")).build()
+
+    def tearDown(self):
+        self.img = self.data = None
+        self.d_array_img = self.d_array_5 = self.program = None
+
+    def test_add(self):
+        """
+        tests the addition  kernel
+        """
+        maxi = int(round(numpy.log2(self.shape)))
+        for i in range(maxi):
+            d_array_result = pyopencl.array.empty_like(self.d_array_img)
+            wg = 1 << i
+            try:
+                evt = self.program.addition(self.queue, (self.shape,), (wg,),
+                       self.d_array_img.data, self.d_array_5.data, d_array_result.data, numpy.int32(self.shape))
+                evt.wait()
+            except Exception as error:
+                max_valid_wg = self.program.addition.get_work_group_info(pyopencl.kernel_work_group_info.WORK_GROUP_SIZE, self.ctx.devices[0])
+                msg = "Error %s on WG=%s: %s" % (error, wg, max_valid_wg)
+                self.assertLess(max_valid_wg, wg, msg)
+                break
+            else:
+                res = d_array_result.get()
+                good = numpy.allclose(res, self.data - 5)
+                if good and wg > self.max_valid_wg:
+                    self.__class__.max_valid_wg = wg
+                self.assertTrue(good, "calculation is correct for WG=%s" % wg)
+
+    def test_measurement(self):
+        """
+        tests that all devices are working properly ... lengthy and error prone
+        """
+        for platform in ocl.platforms:
+            for did, device in enumerate(platform.devices):
+                meas = _measure_workgroup_size((platform.id, device.id))
+                self.assertEqual(meas, device.max_work_group_size,
+                                 "Workgroup size for %s/%s: %s == %s" % (platform, device, meas, device.max_work_group_size))
+
+    def test_query(self):
+        """
+        tests that all devices are working properly ... lengthy and error prone
+        """
+        for what in ("COMPILE_WORK_GROUP_SIZE",
+                     "LOCAL_MEM_SIZE",
+                     "PREFERRED_WORK_GROUP_SIZE_MULTIPLE",
+                     "PRIVATE_MEM_SIZE",
+                     "WORK_GROUP_SIZE"):
+            logger.info("%s: %s", what, query_kernel_info(program=self.program, kernel="addition", what=what))
+
+        # Not all ICD work properly ....    
+        #self.assertEqual(3, len(query_kernel_info(program=self.program, kernel="addition", what="COMPILE_WORK_GROUP_SIZE")), "3D kernel")
+
+        min_wg = query_kernel_info(program=self.program, kernel="addition", what="PREFERRED_WORK_GROUP_SIZE_MULTIPLE")
+        max_wg = query_kernel_info(program=self.program, kernel="addition", what="WORK_GROUP_SIZE")
+        self.assertEqual(max_wg % min_wg, 0, msg="max_wg is a multiple of min_wg")
diff --git a/src/silx/opencl/test/test_array_utils.py b/src/silx/opencl/test/test_array_utils.py
new file mode 100644
index 0000000..325a6c3
--- /dev/null
+++ b/src/silx/opencl/test/test_array_utils.py
@@ -0,0 +1,152 @@
+#!/usr/bin/env python
+# coding: utf-8
+# /*##########################################################################
+#
+# Copyright (c) 2016 European Synchrotron Radiation Facility
+#
+# Permission is hereby granted, free of charge, to any person obtaining a copy
+# of this software and associated documentation files (the "Software"), to deal
+# in the Software without restriction, including without limitation the rights
+# to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
+# copies of the Software, and to permit persons to whom the Software is
+# furnished to do so, subject to the following conditions:
+#
+# The above copyright notice and this permission notice shall be included in
+# all copies or substantial portions of the Software.
+#
+# THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+# IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+# FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
+# AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+# LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
+# OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN
+# THE SOFTWARE.
+#
+# ###########################################################################*/
+"""Test of the OpenCL array_utils"""
+
+from __future__ import division, print_function
+
+__authors__ = ["Pierre paleo"]
+__license__ = "MIT"
+__copyright__ = "2013-2017 European Synchrotron Radiation Facility, Grenoble, France"
+__date__ = "14/06/2017"
+
+
+import time
+import logging
+import numpy as np
+import unittest
+try:
+    import mako
+except ImportError:
+    mako = None
+from ..common import ocl
+if ocl:
+    import pyopencl as cl
+    import pyopencl.array as parray
+    from .. import linalg
+from ..utils import get_opencl_code
+from silx.test.utils import utilstest
+
+logger = logging.getLogger(__name__)
+try:
+    from scipy.ndimage.filters import laplace
+    _has_scipy = True
+except ImportError:
+    _has_scipy = False
+
+
+
+@unittest.skipUnless(ocl and mako, "PyOpenCl is missing")
+class TestCpy2d(unittest.TestCase):
+
+    def setUp(self):
+        if ocl is None:
+            return
+        self.ctx = ocl.create_context()
+        if logger.getEffectiveLevel() <= logging.INFO:
+            self.PROFILE = True
+            self.queue = cl.CommandQueue(
+                            self.ctx,
+                            properties=cl.command_queue_properties.PROFILING_ENABLE)
+        else:
+            self.PROFILE = False
+            self.queue = cl.CommandQueue(self.ctx)
+        self.allocate_arrays()
+        self.program = cl.Program(self.ctx, get_opencl_code("array_utils")).build()
+
+    def allocate_arrays(self):
+        """
+        Allocate various types of arrays for the tests
+        """
+        self.prng_state = np.random.get_state()
+        # Generate arrays of random shape
+        self.shape1 = np.random.randint(20, high=512, size=(2,))
+        self.shape2 = np.random.randint(20, high=512, size=(2,))
+        self.array1 = np.random.rand(*self.shape1).astype(np.float32)
+        self.array2 = np.random.rand(*self.shape2).astype(np.float32)
+        self.d_array1 = parray.to_device(self.queue, self.array1)
+        self.d_array2 = parray.to_device(self.queue, self.array2)
+        # Generate random offsets
+        offset1_y = np.random.randint(2, high=min(self.shape1[0], self.shape2[0]) - 10)
+        offset1_x = np.random.randint(2, high=min(self.shape1[1], self.shape2[1]) - 10)
+        offset2_y = np.random.randint(2, high=min(self.shape1[0], self.shape2[0]) - 10)
+        offset2_x = np.random.randint(2, high=min(self.shape1[1], self.shape2[1]) - 10)
+        self.offset1 = (offset1_y, offset1_x)
+        self.offset2 = (offset2_y, offset2_x)
+        # Compute the size of the rectangle to transfer
+        size_y = np.random.randint(2, high=min(self.shape1[0], self.shape2[0]) - max(offset1_y, offset2_y) + 1)
+        size_x = np.random.randint(2, high=min(self.shape1[1], self.shape2[1]) - max(offset1_x, offset2_x) + 1)
+        self.transfer_shape = (size_y, size_x)
+
+    def tearDown(self):
+        self.array1 = None
+        self.array2 = None
+        self.d_array1.data.release()
+        self.d_array2.data.release()
+        self.d_array1 = None
+        self.d_array2 = None
+        self.ctx = None
+        self.queue = None
+
+    def compare(self, result, reference):
+        errmax = np.max(np.abs(result - reference))
+        logger.info("Max error = %e" % (errmax))
+        self.assertTrue(errmax == 0, str("Max error is too high"))#. PRNG state was %s" % str(self.prng_state)))
+
+    @unittest.skipUnless(ocl and mako, "pyopencl is missing")
+    def test_cpy2d(self):
+        """
+        Test rectangular transfer of self.d_array1 to self.d_array2
+        """
+        # Reference
+        o1 = self.offset1
+        o2 = self.offset2
+        T = self.transfer_shape
+        logger.info("""Testing D->D rectangular copy with (N1_y, N1_x) = %s,
+                    (N2_y, N2_x) = %s:
+                    array2[%d:%d, %d:%d] = array1[%d:%d, %d:%d]""" %
+                        (
+                            str(self.shape1), str(self.shape2),
+                            o2[0], o2[0] + T[0],
+                            o2[1], o2[1] + T[1],
+                            o1[0], o1[0] + T[0],
+                            o1[1], o1[1] + T[1]
+                        )
+                    )
+        self.array2[o2[0]:o2[0] + T[0], o2[1]:o2[1] + T[1]] = self.array1[o1[0]:o1[0] + T[0], o1[1]:o1[1] + T[1]]
+        kernel_args = (
+            self.d_array2.data,
+            self.d_array1.data,
+            np.int32(self.shape2[1]),
+            np.int32(self.shape1[1]),
+            np.int32(self.offset2[::-1]),
+            np.int32(self.offset1[::-1]),
+            np.int32(self.transfer_shape[::-1])
+        )
+        wg = None
+        ndrange = self.transfer_shape[::-1]
+        self.program.cpy2d(self.queue, ndrange, wg, *kernel_args)
+        res = self.d_array2.get()
+        self.compare(res, self.array2)
diff --git a/src/silx/opencl/test/test_backprojection.py b/src/silx/opencl/test/test_backprojection.py
new file mode 100644
index 0000000..96d56fa
--- /dev/null
+++ b/src/silx/opencl/test/test_backprojection.py
@@ -0,0 +1,217 @@
+#!/usr/bin/env python
+# coding: utf-8
+# /*##########################################################################
+#
+# Copyright (c) 2016 European Synchrotron Radiation Facility
+#
+# Permission is hereby granted, free of charge, to any person obtaining a copy
+# of this software and associated documentation files (the "Software"), to deal
+# in the Software without restriction, including without limitation the rights
+# to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
+# copies of the Software, and to permit persons to whom the Software is
+# furnished to do so, subject to the following conditions:
+#
+# The above copyright notice and this permission notice shall be included in
+# all copies or substantial portions of the Software.
+#
+# THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+# IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+# FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
+# AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+# LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
+# OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN
+# THE SOFTWARE.
+#
+# ###########################################################################*/
+"""Test of the filtered backprojection module"""
+
+from __future__ import division, print_function
+
+__authors__ = ["Pierre paleo"]
+__license__ = "MIT"
+__copyright__ = "2013-2017 European Synchrotron Radiation Facility, Grenoble, France"
+__date__ = "19/01/2018"
+
+
+import time
+import logging
+import numpy as np
+import unittest
+from math import pi
+try:
+    import mako
+except ImportError:
+    mako = None
+from ..common import ocl
+if ocl:
+    from .. import backprojection
+    from ...image.tomography import compute_fourier_filter
+from silx.test.utils import utilstest
+
+logger = logging.getLogger(__name__)
+
+
+def generate_coords(img_shp, center=None):
+    """
+    Return two 2D arrays containing the indexes of an image.
+    The zero is at the center of the image.
+    """
+    l_r, l_c = float(img_shp[0]), float(img_shp[1])
+    R, C = np.mgrid[:l_r, :l_c]
+    if center is None:
+        center0, center1 = l_r / 2., l_c / 2.
+    else:
+        center0, center1 = center
+    R = R + 0.5 - center0
+    C = C + 0.5 - center1
+    return R, C
+
+
+def clip_circle(img, center=None, radius=None):
+    """
+    Puts zeros outside the inscribed circle of the image support.
+    """
+    R, C = generate_coords(img.shape, center)
+    M = R * R + C * C
+    res = np.zeros_like(img)
+    if radius is None:
+        radius = img.shape[0] / 2. - 1
+    mask = M < radius * radius
+    res[mask] = img[mask]
+    return res
+
+
+@unittest.skipUnless(ocl and mako, "PyOpenCl is missing")
+class TestFBP(unittest.TestCase):
+
+    def setUp(self):
+        if ocl is None:
+            return
+        self.getfiles()
+        self.fbp = backprojection.Backprojection(self.sino.shape, profile=True)
+        if self.fbp.compiletime_workgroup_size < 16 * 16:
+            self.skipTest("Current implementation of OpenCL backprojection is "
+                          "not supported on this platform yet")
+        # Astra does not use the same backprojector implementation.
+        # Therefore, we cannot expect results to be the "same" (up to float32
+        # numerical error)
+        self.tol = 5e-2
+        if not(self.fbp._use_textures) or self.fbp.device.type == "CPU":
+            # Precision is less when using CPU
+            # (either CPU textures or "manual" linear interpolation)
+            self.tol *= 2
+
+    def tearDown(self):
+        self.sino = None
+        # self.fbp.log_profile()
+        self.fbp = None
+
+    def getfiles(self):
+        # load sinogram of 512x512 MRI phantom
+        self.sino = np.load(utilstest.getfile("sino500.npz"))["data"]
+        # load reconstruction made with ASTRA FBP (with filter designed in spatial domain)
+        self.reference_rec = np.load(utilstest.getfile("rec_astra_500.npz"))["data"]
+
+    def measure(self):
+        "Common measurement of timings"
+        t1 = time.time()
+        try:
+            result = self.fbp.filtered_backprojection(self.sino)
+        except RuntimeError as msg:
+            logger.error(msg)
+            return
+        t2 = time.time()
+        return t2 - t1, result
+
+    def compare(self, res):
+        """
+        Compare a result with the reference reconstruction.
+        Only the valid reconstruction zone (inscribed circle) is taken into
+        account
+        """
+        res_clipped = clip_circle(res)
+        ref_clipped = clip_circle(self.reference_rec)
+        delta = abs(res_clipped - ref_clipped)
+        bad = delta > 1
+        logger.debug("Absolute difference: %s with %s outlier pixels out of %s"
+                     "", delta.max(), bad.sum(), np.prod(bad.shape))
+        return delta.max()
+
+    @unittest.skipUnless(ocl and mako, "pyopencl is missing")
+    def test_fbp(self):
+        """
+        tests FBP
+        """
+        # Test single reconstruction
+        # --------------------------
+        t, res = self.measure()
+        if t is None:
+            logger.info("test_fp: skipped")
+        else:
+            logger.info("test_backproj: time = %.3fs" % t)
+            err = self.compare(res)
+            msg = str("Max error = %e" % err)
+            logger.info(msg)
+            self.assertTrue(err < self.tol, "Max error is too high")
+
+        # Test multiple reconstructions
+        # -----------------------------
+        res0 = np.copy(res)
+        for i in range(10):
+            res = self.fbp.filtered_backprojection(self.sino)
+            errmax = np.max(np.abs(res - res0))
+            self.assertTrue(errmax < 1.e-6, "Max error is too high")
+
+    @unittest.skipUnless(ocl and mako, "pyopencl is missing")
+    def test_fbp_filters(self):
+        """
+        Test the different available filters of silx FBP.
+        """
+        avail_filters = [
+            "ramlak", "shepp-logan", "cosine", "hamming",
+            "hann"
+        ]
+        # Create a Dirac delta function at a single angle view.
+        # As the filters are radially invarant:
+        #   - backprojection yields an image where each line is a Dirac.
+        #   - FBP yields an image where each line is the spatial filter
+        # One can simply filter "dirac" without backprojecting it, but this
+        # test will also ensure that backprojection behaves well.
+        dirac = np.zeros_like(self.sino)
+        na, dw = dirac.shape
+        dirac[0, dw//2] = na / pi * 2
+
+        for filter_name in avail_filters:
+            B = backprojection.Backprojection(dirac.shape, filter_name=filter_name)
+            r = B(dirac)
+            # Check that radial invariance is kept
+            std0 = np.max(np.abs(np.std(r, axis=0)))
+            self.assertTrue(
+                std0 < 5.e-6,
+                "Something wrong with FBP(filter=%s)" % filter_name
+            )
+            # Check that the filter is retrieved
+            r_f = np.fft.fft(np.fft.fftshift(r[0])).real / 2.  # filter factor
+            ref_filter_f = compute_fourier_filter(dw, filter_name)
+            errmax = np.max(np.abs(r_f - ref_filter_f))
+            logger.info("FBP filter %s: max error=%e" % (filter_name, errmax))
+            self.assertTrue(
+                errmax < 1.e-3,
+                "Something wrong with FBP(filter=%s)" % filter_name
+            )
+
+    @unittest.skipUnless(ocl and mako, "pyopencl is missing")
+    def test_fbp_oddsize(self):
+        # Generate a 513-sinogram.
+        # The padded width will be nextpow(513*2).
+        # silx [0.10, 0.10.1] will give 1029, which makes R2C transform fail.
+        sino = np.pad(self.sino, ((0, 0), (1, 0)), mode='edge')
+        B = backprojection.Backprojection(sino.shape, axis_position=self.fbp.axis_pos+1)
+        res = B(sino)
+        # Compare with self.reference_rec. Tolerance is high as backprojector
+        # is not fully shift-invariant.
+        errmax = np.max(np.abs(clip_circle(res[1:, 1:] - self.reference_rec)))
+        self.assertLess(
+            errmax, 1.e-1,
+            "Something wrong with FBP on odd-sized sinogram"
+        )
diff --git a/src/silx/opencl/test/test_convolution.py b/src/silx/opencl/test/test_convolution.py
new file mode 100644
index 0000000..6a2759d
--- /dev/null
+++ b/src/silx/opencl/test/test_convolution.py
@@ -0,0 +1,280 @@
+#!/usr/bin/env python
+# coding: utf-8
+# /*##########################################################################
+#
+# Copyright (c) 2019 European Synchrotron Radiation Facility
+#
+# Permission is hereby granted, free of charge, to any person obtaining a copy
+# of this software and associated documentation files (the "Software"), to deal
+# in the Software without restriction, including without limitation the rights
+# to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
+# copies of the Software, and to permit persons to whom the Software is
+# furnished to do so, subject to the following conditions:
+#
+# The above copyright notice and this permission notice shall be included in
+# all copies or substantial portions of the Software.
+#
+# THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+# IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+# FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
+# AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+# LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
+# OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN
+# THE SOFTWARE.
+#
+# ###########################################################################*/
+
+"""
+Test of the Convolution class.
+"""
+
+from __future__ import division, print_function
+
+__authors__ = ["Pierre Paleo"]
+__contact__ = "pierre.paleo@esrf.fr"
+__license__ = "MIT"
+__copyright__ = "2019 European Synchrotron Radiation Facility, Grenoble, France"
+__date__ = "01/08/2019"
+
+import pytest
+import logging
+from itertools import product
+import numpy as np
+from silx.image.utils import gaussian_kernel
+
+try:
+    from scipy.ndimage import convolve, convolve1d
+    from scipy.misc import ascent
+
+    scipy_convolve = convolve
+    scipy_convolve1d = convolve1d
+except ImportError:
+    scipy_convolve = None
+import unittest
+from ..common import ocl, check_textures_availability
+
+if ocl:
+    import pyopencl as cl
+    import pyopencl.array as parray
+    from silx.opencl.convolution import Convolution
+logger = logging.getLogger(__name__)
+
+
+class ConvolutionData:
+
+    def __init__(self, param):
+        self.param = param
+        self.mode = param["boundary_handling"]
+        logger.debug(
+            """
+            Testing convolution with boundary_handling=%s,
+            use_textures=%s, input_device=%s, output_device=%s
+            """
+            % (
+                self.mode,
+                param["use_textures"],
+                param["input_on_device"],
+                param["output_on_device"],
+            )
+        )
+
+    @classmethod
+    def setUpClass(cls):
+        cls.image = np.ascontiguousarray(ascent()[:, :511], dtype="f")
+        cls.data1d = cls.image[0]
+        cls.data2d = cls.image
+        cls.data3d = np.tile(cls.image[224:-224, 224:-224], (62, 1, 1))
+        cls.kernel1d = gaussian_kernel(1.0)
+        cls.kernel2d = np.outer(cls.kernel1d, cls.kernel1d)
+        cls.kernel3d = np.multiply.outer(cls.kernel2d, cls.kernel1d)
+        cls.ctx = ocl.create_context()
+        cls.tol = {
+            "1D": 1e-4,
+            "2D": 1e-3,
+            "3D": 1e-3,
+        }
+
+    @classmethod
+    def tearDownClass(cls):
+        cls.data1d = cls.data2d = cls.data3d = cls.image = None
+        cls.kernel1d = cls.kernel2d = cls.kernel3d = None
+
+    @staticmethod
+    def compare(arr1, arr2):
+        return np.max(np.abs(arr1 - arr2))
+
+    @staticmethod
+    def print_err(conv):
+        errmsg = str(
+            """
+            Something wrong with %s
+            mode=%s, texture=%s
+            """
+            % (conv.use_case_desc, conv.mode, conv.use_textures)
+        )
+        return errmsg
+
+    def instantiate_convol(self, shape, kernel, axes=None):
+        if self.mode == "constant":
+            if not (self.param["use_textures"]) or (
+                self.param["use_textures"]
+                and not (check_textures_availability(self.ctx))
+            ):
+                pytest.skip("mode=constant not implemented without textures")
+        C = Convolution(
+            shape,
+            kernel,
+            mode=self.mode,
+            ctx=self.ctx,
+            axes=axes,
+            extra_options={"dont_use_textures": not (self.param["use_textures"])},
+        )
+        return C
+
+    def get_data_and_kernel(self, test_name):
+        dims = {
+            "test_1D": (1, 1),
+            "test_separable_2D": (2, 1),
+            "test_separable_3D": (3, 1),
+            "test_nonseparable_2D": (2, 2),
+            "test_nonseparable_3D": (3, 3),
+        }
+        dim_data = {1: self.data1d, 2: self.data2d, 3: self.data3d}
+        dim_kernel = {
+            1: self.kernel1d,
+            2: self.kernel2d,
+            3: self.kernel3d,
+        }
+        dd, kd = dims[test_name]
+        return dim_data[dd], dim_kernel[kd]
+
+    def get_reference_function(self, test_name):
+        ref_func = {
+            "test_1D": lambda x, y: scipy_convolve1d(x, y, mode=self.mode),
+            "test_separable_2D": lambda x, y: scipy_convolve1d(
+                scipy_convolve1d(x, y, mode=self.mode, axis=1),
+                y,
+                mode=self.mode,
+                axis=0,
+            ),
+            "test_separable_3D": lambda x, y: scipy_convolve1d(
+                scipy_convolve1d(
+                    scipy_convolve1d(x, y, mode=self.mode, axis=2),
+                    y,
+                    mode=self.mode,
+                    axis=1,
+                ),
+                y,
+                mode=self.mode,
+                axis=0,
+            ),
+            "test_nonseparable_2D": lambda x, y: scipy_convolve(x, y, mode=self.mode),
+            "test_nonseparable_3D": lambda x, y: scipy_convolve(x, y, mode=self.mode),
+        }
+        return ref_func[test_name]
+
+    def template_test(self, test_name):
+        data, kernel = self.get_data_and_kernel(test_name)
+        conv = self.instantiate_convol(data.shape, kernel)
+        if self.param["input_on_device"]:
+            data_ref = parray.to_device(conv.queue, data)
+        else:
+            data_ref = data
+        if self.param["output_on_device"]:
+            d_res = parray.empty_like(conv.data_out)
+            d_res.fill(0)
+            res = d_res
+        else:
+            res = None
+        res = conv(data_ref, output=res)
+        if self.param["output_on_device"]:
+            res = res.get()
+        ref_func = self.get_reference_function(test_name)
+        ref = ref_func(data, kernel)
+        metric = self.compare(res, ref)
+        logger.info("%s: max error = %.2e" % (test_name, metric))
+        tol = self.tol[str("%dD" % kernel.ndim)]
+        assert metric < tol, self.print_err(conv)
+
+
+def convolution_data_params():
+    boundary_handlings = ["reflect", "nearest", "wrap", "constant"]
+    use_textures = [True, False]
+    input_on_devices = [True, False]
+    output_on_devices = [True, False]
+    param_vals = list(
+        product(boundary_handlings, use_textures, input_on_devices, output_on_devices)
+    )
+    params = []
+    for boundary_handling, use_texture, input_dev, output_dev in param_vals:
+        param={
+            "boundary_handling": boundary_handling,
+            "input_on_device": input_dev,
+            "output_on_device": output_dev,
+            "use_textures": use_texture,
+        }
+        params.append(param)
+
+    return params
+
+
+@pytest.fixture(scope="module", params=convolution_data_params())
+def convolution_data(request):
+    """Provide a set of convolution data
+
+    The module scope allows to test each function during a single setup of each
+    convolution data
+    """
+    cdata = None
+    try:
+        cdata = ConvolutionData(request.param)
+        cdata.setUpClass()
+        yield cdata
+    finally:
+        cdata.tearDownClass()
+
+
+@pytest.mark.skipif(ocl is None, reason="OpenCL is missing")
+@pytest.mark.skipif(scipy_convolve is None, reason="scipy is missing")
+def test_1D(convolution_data):
+    convolution_data.template_test("test_1D")
+
+@pytest.mark.skipif(ocl is None, reason="OpenCL is missing")
+@pytest.mark.skipif(scipy_convolve is None, reason="scipy is missing")
+def test_separable_2D(convolution_data):
+    convolution_data.template_test("test_separable_2D")
+
+@pytest.mark.skipif(ocl is None, reason="OpenCL is missing")
+@pytest.mark.skipif(scipy_convolve is None, reason="scipy is missing")
+def test_separable_3D(convolution_data):
+    convolution_data.template_test("test_separable_3D")
+
+@pytest.mark.skipif(ocl is None, reason="OpenCL is missing")
+@pytest.mark.skipif(scipy_convolve is None, reason="scipy is missing")
+def test_nonseparable_2D(convolution_data):
+    convolution_data.template_test("test_nonseparable_2D")
+
+@pytest.mark.skipif(ocl is None, reason="OpenCL is missing")
+@pytest.mark.skipif(scipy_convolve is None, reason="scipy is missing")
+def test_nonseparable_3D(convolution_data):
+    convolution_data.template_test("test_nonseparable_3D")
+
+@pytest.mark.skipif(ocl is None, reason="OpenCL is missing")
+@pytest.mark.skipif(scipy_convolve is None, reason="scipy is missing")
+def test_batched_2D(convolution_data):
+    """
+    Test batched (nonseparable) 2D convolution on 3D data.
+    In this test: batch along "z" (axis 0)
+    """
+    data = convolution_data.data3d
+    kernel = convolution_data.kernel2d
+    conv = convolution_data.instantiate_convol(data.shape, kernel, axes=(0,))
+    res = conv(data)  # 3D
+    ref = scipy_convolve(data[0], kernel, mode=convolution_data.mode)  # 2D
+
+    std = np.std(res, axis=0)
+    std_max = np.max(np.abs(std))
+    assert std_max < convolution_data.tol["2D"], convolution_data.print_err(conv)
+    metric = convolution_data.compare(res[0], ref)
+    logger.info("test_nonseparable_3D: max error = %.2e" % metric)
+    assert metric < convolution_data.tol["2D"], convolution_data.print_err(conv)
diff --git a/src/silx/opencl/test/test_doubleword.py b/src/silx/opencl/test/test_doubleword.py
new file mode 100644
index 0000000..a33cf5a
--- /dev/null
+++ b/src/silx/opencl/test/test_doubleword.py
@@ -0,0 +1,244 @@
+#!/usr/bin/env python
+# coding: utf-8
+#
+#    Project: The silx project
+#             https://github.com/silx-kit/silx
+#
+#    Copyright (C) 2021-2021 European Synchrotron Radiation Facility, Grenoble, France
+#
+#    Principal author:       Jérôme Kieffer (Jerome.Kieffer@ESRF.eu)
+#
+# Permission is hereby granted, free of charge, to any person obtaining a copy
+# of this software and associated documentation files (the "Software"), to deal
+# in the Software without restriction, including without limitation the rights
+# to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
+# copies of the Software, and to permit persons to whom the Software is
+# furnished to do so, subject to the following conditions:
+#
+# The above copyright notice and this permission notice shall be included in
+# all copies or substantial portions of the Software.
+#
+# THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+# IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+# FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
+# AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+# LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
+# OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN
+# THE SOFTWARE.
+
+"test suite for OpenCL code"
+
+__author__ = "Jérôme Kieffer"
+__contact__ = "Jerome.Kieffer@ESRF.eu"
+__license__ = "MIT"
+__copyright__ = "European Synchrotron Radiation Facility, Grenoble, France"
+__date__ = "31/05/2021"
+
+import unittest
+import numpy
+import logging
+import platform
+
+logger = logging.getLogger(__name__)
+try:
+    import pyopencl
+except ImportError as error:
+    logger.warning("OpenCL module (pyopencl) is not present, skip tests. %s.", error)
+    pyopencl = None
+
+from .. import ocl
+if ocl is not None:
+    from ..utils import read_cl_file
+    from .. import pyopencl
+    import pyopencl.array
+    from pyopencl.elementwise import ElementwiseKernel
+
+EPS32 = numpy.finfo("float32").eps
+EPS64 = numpy.finfo("float64").eps
+
+
+@unittest.skipUnless(ocl, "PyOpenCl is missing")
+class TestDoubleWord(unittest.TestCase):
+    """
+    Test the kernels for compensated math in OpenCL
+    """
+
+    @classmethod
+    def setUpClass(cls):
+        if pyopencl is None or ocl is None:
+            raise unittest.SkipTest("OpenCL module (pyopencl) is not present or no device available")
+
+        cls.ctx = ocl.create_context(devicetype="GPU")
+        cls.queue = pyopencl.CommandQueue(cls.ctx, properties=pyopencl.command_queue_properties.PROFILING_ENABLE)
+
+        # this is running 32 bits OpenCL woth POCL
+        if (platform.machine() in ("i386", "i686", "x86_64") and (tuple.__itemsize__ == 4) and
+                cls.ctx.devices[0].platform.name == 'Portable Computing Language'):
+            cls.args = "-DX87_VOLATILE=volatile"
+        else:
+            cls.args = ""
+        size = 1024
+        cls.a = 1.0 + numpy.random.random(size)
+        cls.b = 1.0 + numpy.random.random(size)
+        cls.ah = cls.a.astype(numpy.float32)
+        cls.bh = cls.b.astype(numpy.float32)
+        cls.al = (cls.a - cls.ah).astype(numpy.float32)
+        cls.bl = (cls.b - cls.bh).astype(numpy.float32)
+        cls.doubleword = read_cl_file("doubleword.cl")
+
+    @classmethod
+    def tearDownClass(cls):
+        cls.queue = None
+        cls.ctx = None
+        cls.a = cls.al = cls.ah = None
+        cls.b = cls.bl = cls.bh = None
+        cls.doubleword = None
+
+    def test_fast_sum2(self):
+        test_kernel = ElementwiseKernel(self.ctx,
+                      "float *a, float *b, float *res_h, float *res_l",
+                      "float2 tmp = fast_fp_plus_fp(a[i], b[i]); res_h[i] = tmp.s0; res_l[i] = tmp.s1",
+                      preamble=self.doubleword)
+        a_g = pyopencl.array.to_device(self.queue, self.ah)
+        b_g = pyopencl.array.to_device(self.queue, self.bl)
+        res_l = pyopencl.array.empty_like(a_g)
+        res_h = pyopencl.array.empty_like(a_g)
+        test_kernel(a_g, b_g, res_h, res_l)
+        self.assertEqual(abs(self.ah + self.bl - res_h.get()).max(), 0, "Major matches")
+        self.assertGreater(abs(self.ah.astype(numpy.float64) + self.bl - res_h.get()).max(), 0, "Exact mismatches")
+        self.assertEqual(abs(self.ah.astype(numpy.float64) + self.bl - (res_h.get().astype(numpy.float64) + res_l.get())).max(), 0, "Exact matches")
+
+    def test_sum2(self):
+        test_kernel = ElementwiseKernel(self.ctx,
+                    "float *a, float *b, float *res_h, float *res_l",
+                    "float2 tmp = fp_plus_fp(a[i],b[i]); res_h[i]=tmp.s0; res_l[i]=tmp.s1;",
+                    preamble=self.doubleword)
+        a_g = pyopencl.array.to_device(self.queue, self.ah)
+        b_g = pyopencl.array.to_device(self.queue, self.bh)
+        res_l = pyopencl.array.empty_like(a_g)
+        res_h = pyopencl.array.empty_like(a_g)
+        test_kernel(a_g, b_g, res_h, res_l)
+        self.assertEqual(abs(self.ah + self.bh - res_h.get()).max(), 0, "Major matches")
+        self.assertGreater(abs(self.ah.astype(numpy.float64) + self.bh - res_h.get()).max(), 0, "Exact mismatches")
+        self.assertEqual(abs(self.ah.astype(numpy.float64) + self.bh - (res_h.get().astype(numpy.float64) + res_l.get())).max(), 0, "Exact matches")
+
+    def test_prod2(self):
+        test_kernel = ElementwiseKernel(self.ctx,
+                    "float *a, float *b, float *res_h, float *res_l",
+                    "float2 tmp = fp_times_fp(a[i],b[i]); res_h[i]=tmp.s0; res_l[i]=tmp.s1;",
+                    preamble=self.doubleword)
+        a_g = pyopencl.array.to_device(self.queue, self.ah)
+        b_g = pyopencl.array.to_device(self.queue, self.bh)
+        res_l = pyopencl.array.empty_like(a_g)
+        res_h = pyopencl.array.empty_like(a_g)
+        test_kernel(a_g, b_g, res_h, res_l)
+        res_m = res_h.get()
+        res = res_h.get().astype(numpy.float64) + res_l.get()
+        self.assertEqual(abs(self.ah * self.bh - res_m).max(), 0, "Major matches")
+        self.assertGreater(abs(self.ah.astype(numpy.float64) * self.bh - res_m).max(), 0, "Exact mismatches")
+        self.assertEqual(abs(self.ah.astype(numpy.float64) * self.bh - res).max(), 0, "Exact matches")
+
+    def test_dw_plus_fp(self):
+        test_kernel = ElementwiseKernel(self.ctx,
+                    "float *ah, float *al, float *b, float *res_h, float *res_l",
+                    "float2 tmp = dw_plus_fp((float2)(ah[i], al[i]),b[i]); res_h[i]=tmp.s0; res_l[i]=tmp.s1;",
+                    preamble=self.doubleword)
+        ah_g = pyopencl.array.to_device(self.queue, self.ah)
+        al_g = pyopencl.array.to_device(self.queue, self.al)
+        b_g = pyopencl.array.to_device(self.queue, self.bh)
+        res_l = pyopencl.array.empty_like(b_g)
+        res_h = pyopencl.array.empty_like(b_g)
+        test_kernel(ah_g, al_g, b_g, res_h, res_l)
+        res_m = res_h.get()
+        res = res_h.get().astype(numpy.float64) + res_l.get()
+        self.assertLess(abs(self.a + self.bh - res_m).max(), EPS32, "Major matches")
+        self.assertGreater(abs(self.a + self.bh - res_m).max(), EPS64, "Exact mismatches")
+        self.assertLess(abs(self.ah.astype(numpy.float64) + self.al + self.bh - res).max(), 2 * EPS32 ** 2, "Exact matches")
+
+    def test_dw_plus_dw(self):
+        test_kernel = ElementwiseKernel(self.ctx,
+                    "float *ah, float *al, float *bh, float *bl, float *res_h, float *res_l",
+                    "float2 tmp = dw_plus_dw((float2)(ah[i], al[i]),(float2)(bh[i], bl[i])); res_h[i]=tmp.s0; res_l[i]=tmp.s1;",
+                    preamble=self.doubleword)
+        ah_g = pyopencl.array.to_device(self.queue, self.ah)
+        al_g = pyopencl.array.to_device(self.queue, self.al)
+        bh_g = pyopencl.array.to_device(self.queue, self.bh)
+        bl_g = pyopencl.array.to_device(self.queue, self.bl)
+        res_l = pyopencl.array.empty_like(bh_g)
+        res_h = pyopencl.array.empty_like(bh_g)
+        test_kernel(ah_g, al_g, bh_g, bl_g, res_h, res_l)
+        res_m = res_h.get()
+        res = res_h.get().astype(numpy.float64) + res_l.get()
+        self.assertLess(abs(self.a + self.b - res_m).max(), EPS32, "Major matches")
+        self.assertGreater(abs(self.a + self.b - res_m).max(), EPS64, "Exact mismatches")
+        self.assertLess(abs(self.a + self.b - res).max(), 3 * EPS32 ** 2, "Exact matches")
+
+    def test_dw_times_fp(self):
+        test_kernel = ElementwiseKernel(self.ctx,
+                    "float *ah, float *al, float *b, float *res_h, float *res_l",
+                    "float2 tmp = dw_times_fp((float2)(ah[i], al[i]),b[i]); res_h[i]=tmp.s0; res_l[i]=tmp.s1;",
+                    preamble=self.doubleword)
+        ah_g = pyopencl.array.to_device(self.queue, self.ah)
+        al_g = pyopencl.array.to_device(self.queue, self.al)
+        b_g = pyopencl.array.to_device(self.queue, self.bh)
+        res_l = pyopencl.array.empty_like(b_g)
+        res_h = pyopencl.array.empty_like(b_g)
+        test_kernel(ah_g, al_g, b_g, res_h, res_l)
+        res_m = res_h.get()
+        res = res_h.get().astype(numpy.float64) + res_l.get()
+        self.assertLess(abs(self.a * self.bh - res_m).max(), EPS32, "Major matches")
+        self.assertGreater(abs(self.a * self.bh - res_m).max(), EPS64, "Exact mismatches")
+        self.assertLess(abs(self.a * self.bh - res).max(), 2 * EPS32 ** 2, "Exact matches")
+
+    def test_dw_times_dw(self):
+        test_kernel = ElementwiseKernel(self.ctx,
+                    "float *ah, float *al, float *bh, float *bl, float *res_h, float *res_l",
+                    "float2 tmp = dw_times_dw((float2)(ah[i], al[i]),(float2)(bh[i], bl[i])); res_h[i]=tmp.s0; res_l[i]=tmp.s1;",
+                    preamble=self.doubleword)
+        ah_g = pyopencl.array.to_device(self.queue, self.ah)
+        al_g = pyopencl.array.to_device(self.queue, self.al)
+        bh_g = pyopencl.array.to_device(self.queue, self.bh)
+        bl_g = pyopencl.array.to_device(self.queue, self.bl)
+        res_l = pyopencl.array.empty_like(bh_g)
+        res_h = pyopencl.array.empty_like(bh_g)
+        test_kernel(ah_g, al_g, bh_g, bl_g, res_h, res_l)
+        res_m = res_h.get()
+        res = res_h.get().astype(numpy.float64) + res_l.get()
+        self.assertLess(abs(self.a * self.b - res_m).max(), EPS32, "Major matches")
+        self.assertGreater(abs(self.a * self.b - res_m).max(), EPS64, "Exact mismatches")
+        self.assertLess(abs(self.a * self.b - res).max(), 5 * EPS32 ** 2, "Exact matches")
+
+    def test_dw_div_fp(self):
+        test_kernel = ElementwiseKernel(self.ctx,
+                    "float *ah, float *al, float *b, float *res_h, float *res_l",
+                    "float2 tmp = dw_div_fp((float2)(ah[i], al[i]),b[i]); res_h[i]=tmp.s0; res_l[i]=tmp.s1;",
+                    preamble=self.doubleword)
+        ah_g = pyopencl.array.to_device(self.queue, self.ah)
+        al_g = pyopencl.array.to_device(self.queue, self.al)
+        b_g = pyopencl.array.to_device(self.queue, self.bh)
+        res_l = pyopencl.array.empty_like(b_g)
+        res_h = pyopencl.array.empty_like(b_g)
+        test_kernel(ah_g, al_g, b_g, res_h, res_l)
+        res_m = res_h.get()
+        res = res_h.get().astype(numpy.float64) + res_l.get()
+        self.assertLess(abs(self.a / self.bh - res_m).max(), EPS32, "Major matches")
+        self.assertGreater(abs(self.a / self.bh - res_m).max(), EPS64, "Exact mismatches")
+        self.assertLess(abs(self.a / self.bh - res).max(), 3 * EPS32 ** 2, "Exact matches")
+
+    def test_dw_div_dw(self):
+        test_kernel = ElementwiseKernel(self.ctx,
+                    "float *ah, float *al, float *bh, float *bl, float *res_h, float *res_l",
+                    "float2 tmp = dw_div_dw((float2)(ah[i], al[i]),(float2)(bh[i], bl[i])); res_h[i]=tmp.s0; res_l[i]=tmp.s1;",
+                    preamble=self.doubleword)
+        ah_g = pyopencl.array.to_device(self.queue, self.ah)
+        al_g = pyopencl.array.to_device(self.queue, self.al)
+        bh_g = pyopencl.array.to_device(self.queue, self.bh)
+        bl_g = pyopencl.array.to_device(self.queue, self.bl)
+        res_l = pyopencl.array.empty_like(bh_g)
+        res_h = pyopencl.array.empty_like(bh_g)
+        test_kernel(ah_g, al_g, bh_g, bl_g, res_h, res_l)
+        res_m = res_h.get()
+        res = res_h.get().astype(numpy.float64) + res_l.get()
+        self.assertLess(abs(self.a / self.b - res_m).max(), EPS32, "Major matches")
+        self.assertGreater(abs(self.a / self.b - res_m).max(), EPS64, "Exact mismatches")
+        self.assertLess(abs(self.a / self.b - res).max(), 6 * EPS32 ** 2, "Exact matches")
diff --git a/src/silx/opencl/test/test_image.py b/src/silx/opencl/test/test_image.py
new file mode 100644
index 0000000..73c771b
--- /dev/null
+++ b/src/silx/opencl/test/test_image.py
@@ -0,0 +1,125 @@
+#!/usr/bin/env python
+# -*- coding: utf-8 -*-
+#
+#    Project: image manipulation in  OpenCL
+#             https://github.com/silx-kit/silx
+#
+# Permission is hereby granted, free of charge, to any person
+# obtaining a copy of this software and associated documentation
+# files (the "Software"), to deal in the Software without
+# restriction, including without limitation the rights to use,
+# copy, modify, merge, publish, distribute, sublicense, and/or sell
+# copies of the Software, and to permit persons to whom the
+# Software is furnished to do so, subject to the following
+# conditions:
+#
+# The above copyright notice and this permission notice shall be
+# included in all copies or substantial portions of the Software.
+#
+# THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
+# EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES
+# OF MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
+# NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT
+# HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY,
+# WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
+# FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR
+# OTHER DEALINGS IN THE SOFTWARE.
+
+"""
+Simple test of image manipulation
+"""
+
+from __future__ import division, print_function
+
+__authors__ = ["Jérôme Kieffer"]
+__contact__ = "jerome.kieffer@esrf.eu"
+__license__ = "MIT"
+__copyright__ = "2017 European Synchrotron Radiation Facility, Grenoble, France"
+__date__ = "13/02/2018"
+
+import logging
+import numpy
+
+import unittest
+from ..common import ocl, _measure_workgroup_size
+if ocl:
+    import pyopencl
+    import pyopencl.array
+from ...test.utils import utilstest
+from ..image import ImageProcessing
+logger = logging.getLogger(__name__)
+try:
+    from PIL import Image
+except ImportError:
+    Image = None
+
+
+@unittest.skipUnless(ocl and Image, "PyOpenCl/Image is missing")
+class TestImage(unittest.TestCase):
+
+    @classmethod
+    def setUpClass(cls):
+        super(TestImage, cls).setUpClass()
+        if ocl:
+            cls.ctx = ocl.create_context()
+            cls.lena = utilstest.getfile("lena.png")
+            cls.data = numpy.asarray(Image.open(cls.lena))
+            cls.ip = ImageProcessing(ctx=cls.ctx, template=cls.data, profile=True)
+
+    @classmethod
+    def tearDownClass(cls):
+        super(TestImage, cls).tearDownClass()
+        cls.ctx = None
+        cls.lena = None
+        cls.data = None
+        if logger.level <= logging.INFO:
+            logger.warning("\n".join(cls.ip.log_profile()))
+        cls.ip = None
+
+    def setUp(self):
+        if ocl is None:
+            return
+        self.data = numpy.asarray(Image.open(self.lena))
+
+    def tearDown(self):
+        self.img = self.data = None
+
+    @unittest.skipUnless(ocl, "pyopencl is missing")
+    def test_cast(self):
+        """
+        tests the cast kernel
+        """
+        res = self.ip.to_float(self.data)
+        self.assertEqual(res.shape, self.data.shape, "shape")
+        self.assertEqual(res.dtype, numpy.float32, "dtype")
+        self.assertEqual(abs(res - self.data).max(), 0, "content")
+
+    @unittest.skipUnless(ocl, "pyopencl is missing")
+    def test_normalize(self):
+        """
+        tests that all devices are working properly ...
+        """
+        tmp = pyopencl.array.empty(self.ip.ctx, self.data.shape, "float32")
+        res = self.ip.to_float(self.data, out=tmp)
+        res2 = self.ip.normalize(tmp, -100, 100, copy=False)
+        norm = (self.data.astype(numpy.float32) - self.data.min()) / (self.data.max() - self.data.min())
+        ref2 = 200 * norm - 100
+        self.assertLess(abs(res2 - ref2).max(), 3e-5, "content")
+
+    @unittest.skipUnless(ocl, "pyopencl is missing")
+    def test_histogram(self):
+        """
+        Test on a greyscaled image ... of Lena :)
+        """
+        lena_bw = (0.2126 * self.data[:, :, 0] +
+                   0.7152 * self.data[:, :, 1] +
+                   0.0722 * self.data[:, :, 2]).astype("int32")
+        ref = numpy.histogram(lena_bw, 255)
+        ip = ImageProcessing(ctx=self.ctx, template=lena_bw, profile=True)
+        res = ip.histogram(lena_bw, 255)
+        ip.log_profile()
+        delta = (ref[0] - res[0])
+        deltap = (ref[1] - res[1])
+        self.assertEqual(delta.sum(), 0, "errors are self-compensated")
+        self.assertLessEqual(abs(delta).max(), 1, "errors are small")
+        self.assertLessEqual(abs(deltap).max(), 3e-5, "errors on position are small: %s" % (abs(deltap).max()))
diff --git a/src/silx/opencl/test/test_kahan.py b/src/silx/opencl/test/test_kahan.py
new file mode 100644
index 0000000..9e4a1e3
--- /dev/null
+++ b/src/silx/opencl/test/test_kahan.py
@@ -0,0 +1,254 @@
+#!/usr/bin/env python
+# coding: utf-8
+#
+#    Project: OpenCL numerical library
+#             https://github.com/silx-kit/silx
+#
+#    Copyright (C) 2015-2021 European Synchrotron Radiation Facility, Grenoble, France
+#
+#    Principal author:       Jérôme Kieffer (Jerome.Kieffer@ESRF.eu)
+#
+# Permission is hereby granted, free of charge, to any person obtaining a copy
+# of this software and associated documentation files (the "Software"), to deal
+# in the Software without restriction, including without limitation the rights
+# to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
+# copies of the Software, and to permit persons to whom the Software is
+# furnished to do so, subject to the following conditions:
+#
+# The above copyright notice and this permission notice shall be included in
+# all copies or substantial portions of the Software.
+#
+# THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+# IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+# FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
+# AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+# LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
+# OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN
+# THE SOFTWARE.
+
+"test suite for OpenCL code"
+
+__author__ = "Jérôme Kieffer"
+__contact__ = "Jerome.Kieffer@ESRF.eu"
+__license__ = "MIT"
+__copyright__ = "European Synchrotron Radiation Facility, Grenoble, France"
+__date__ = "17/05/2021"
+
+
+import unittest
+import numpy
+import logging
+import platform
+
+logger = logging.getLogger(__name__)
+try:
+    import pyopencl
+except ImportError as error:
+    logger.warning("OpenCL module (pyopencl) is not present, skip tests. %s.", error)
+    pyopencl = None
+
+from .. import ocl
+if ocl is not None:
+    from ..utils import read_cl_file
+    from .. import pyopencl
+    import pyopencl.array
+
+
+class TestKahan(unittest.TestCase):
+    """
+    Test the kernels for compensated math in OpenCL
+    """
+
+    @classmethod
+    def setUpClass(cls):
+        if pyopencl is None or ocl is None:
+            raise unittest.SkipTest("OpenCL module (pyopencl) is not present or no device available")
+
+        cls.ctx = ocl.create_context(devicetype="GPU")
+        cls.queue = pyopencl.CommandQueue(cls.ctx, properties=pyopencl.command_queue_properties.PROFILING_ENABLE)
+
+        # this is running 32 bits OpenCL woth POCL
+        if (platform.machine() in ("i386", "i686", "x86_64") and (tuple.__itemsize__ == 4) and
+                cls.ctx.devices[0].platform.name == 'Portable Computing Language'):
+            cls.args = "-DX87_VOLATILE=volatile"
+        else:
+            cls.args = ""
+
+    @classmethod
+    def tearDownClass(cls):
+        cls.queue = None
+        cls.ctx = None
+
+    @staticmethod
+    def dummy_sum(ary, dtype=None):
+        "perform the actual sum in a dummy way "
+        if dtype is None:
+            dtype = ary.dtype.type
+        sum_ = dtype(0)
+        for i in ary:
+            sum_ += i
+        return sum_
+
+    def test_kahan(self):
+        # simple test
+        N = 26
+        data = (1 << (N - 1 - numpy.arange(N))).astype(numpy.float32)
+
+        ref64 = numpy.sum(data, dtype=numpy.float64)
+        ref32 = self.dummy_sum(data)
+        if (ref64 == ref32):
+            logger.warning("Kahan: invalid tests as float32 provides the same result as float64")
+        # Dummy kernel to evaluate
+        src = """
+        kernel void summation(global float* data,
+                                           int size,
+                                    global float* result)
+        {
+            float2 acc = (float2)(0.0f, 0.0f);
+            for (int i=0; i<size; i++)
+            {
+                acc = kahan_sum(acc, data[i]);
+            }
+            result[0] = acc.s0;
+            result[1] = acc.s1;
+        }
+        """
+        prg = pyopencl.Program(self.ctx, read_cl_file("kahan.cl") + src).build(self.args)
+        ones_d = pyopencl.array.to_device(self.queue, data)
+        res_d = pyopencl.array.empty(self.queue, 2, numpy.float32)
+        res_d.fill(0)
+        evt = prg.summation(self.queue, (1,), (1,), ones_d.data, numpy.int32(N), res_d.data)
+        evt.wait()
+        res = res_d.get().sum(dtype=numpy.float64)
+        self.assertEqual(ref64, res, "test_kahan")
+
+    def test_dot16(self):
+        # simple test
+        N = 16
+        data = (1 << (N - 1 - numpy.arange(N))).astype(numpy.float32)
+
+        ref64 = numpy.dot(data.astype(numpy.float64), data.astype(numpy.float64))
+        ref32 = numpy.dot(data, data)
+        if (ref64 == ref32):
+            logger.warning("dot16: invalid tests as float32 provides the same result as float64")
+        # Dummy kernel to evaluate
+        src = """
+        kernel void test_dot16(global float* data,
+                                           int size,
+                               global float* result)
+        {
+            float2 acc = (float2)(0.0f, 0.0f);
+            float16 data16 = (float16) (data[0],data[1],data[2],data[3],data[4],
+                                        data[5],data[6],data[7],data[8],data[9],
+                         data[10],data[11],data[12],data[13],data[14],data[15]);
+            acc = comp_dot16(data16, data16);
+            result[0] = acc.s0;
+            result[1] = acc.s1;
+        }
+
+        kernel void test_dot8(global float* data,
+                                           int size,
+                               global float* result)
+        {
+            float2 acc = (float2)(0.0f, 0.0f);
+            float8 data0 = (float8) (data[0],data[2],data[4],data[6],data[8],data[10],data[12],data[14]);
+            float8 data1 = (float8) (data[1],data[3],data[5],data[7],data[9],data[11],data[13],data[15]);
+            acc = comp_dot8(data0, data1);
+            result[0] = acc.s0;
+            result[1] = acc.s1;
+        }
+
+        kernel void test_dot4(global float* data,
+                                           int size,
+                               global float* result)
+        {
+            float2 acc = (float2)(0.0f, 0.0f);
+            float4 data0 = (float4) (data[0],data[4],data[8],data[12]);
+            float4 data1 = (float4) (data[3],data[7],data[11],data[15]);
+            acc = comp_dot4(data0, data1);
+            result[0] = acc.s0;
+            result[1] = acc.s1;
+        }
+
+        kernel void test_dot3(global float* data,
+                                           int size,
+                               global float* result)
+        {
+            float2 acc = (float2)(0.0f, 0.0f);
+            float3 data0 = (float3) (data[0],data[4],data[12]);
+            float3 data1 = (float3) (data[3],data[11],data[15]);
+            acc = comp_dot3(data0, data1);
+            result[0] = acc.s0;
+            result[1] = acc.s1;
+        }
+
+        kernel void test_dot2(global float* data,
+                                           int size,
+                               global float* result)
+        {
+            float2 acc = (float2)(0.0f, 0.0f);
+            float2 data0 = (float2) (data[0],data[14]);
+            float2 data1 = (float2) (data[1],data[15]);
+            acc = comp_dot2(data0, data1);
+            result[0] = acc.s0;
+            result[1] = acc.s1;
+        }
+
+        """
+
+        prg = pyopencl.Program(self.ctx, read_cl_file("kahan.cl") + src).build(self.args)
+        ones_d = pyopencl.array.to_device(self.queue, data)
+        res_d = pyopencl.array.empty(self.queue, 2, numpy.float32)
+        res_d.fill(0)
+        evt = prg.test_dot16(self.queue, (1,), (1,), ones_d.data, numpy.int32(N), res_d.data)
+        evt.wait()
+        res = res_d.get().sum(dtype="float64")
+        self.assertEqual(ref64, res, "test_dot16")
+
+        res_d.fill(0)
+        data0 = data[0::2]
+        data1 = data[1::2]
+        ref64 = numpy.dot(data0.astype(numpy.float64), data1.astype(numpy.float64))
+        ref32 = numpy.dot(data0, data1)
+        if (ref64 == ref32):
+            logger.warning("dot8: invalid tests as float32 provides the same result as float64")
+        evt = prg.test_dot8(self.queue, (1,), (1,), ones_d.data, numpy.int32(N), res_d.data)
+        evt.wait()
+        res = res_d.get().sum(dtype="float64")
+        self.assertEqual(ref64, res, "test_dot8")
+
+        res_d.fill(0)
+        data0 = data[0::4]
+        data1 = data[3::4]
+        ref64 = numpy.dot(data0.astype(numpy.float64), data1.astype(numpy.float64))
+        ref32 = numpy.dot(data0, data1)
+        if (ref64 == ref32):
+            logger.warning("dot4: invalid tests as float32 provides the same result as float64")
+        evt = prg.test_dot4(self.queue, (1,), (1,), ones_d.data, numpy.int32(N), res_d.data)
+        evt.wait()
+        res = res_d.get().sum(dtype="float64")
+        self.assertEqual(ref64, res, "test_dot4")
+
+        res_d.fill(0)
+        data0 = numpy.array([data[0], data[4], data[12]])
+        data1 = numpy.array([data[3], data[11], data[15]])
+        ref64 = numpy.dot(data0.astype(numpy.float64), data1.astype(numpy.float64))
+        ref32 = numpy.dot(data0, data1)
+        if (ref64 == ref32):
+            logger.warning("dot3: invalid tests as float32 provides the same result as float64")
+        evt = prg.test_dot3(self.queue, (1,), (1,), ones_d.data, numpy.int32(N), res_d.data)
+        evt.wait()
+        res = res_d.get().sum(dtype="float64")
+        self.assertEqual(ref64, res, "test_dot3")
+
+        res_d.fill(0)
+        data0 = numpy.array([data[0], data[14]])
+        data1 = numpy.array([data[1], data[15]])
+        ref64 = numpy.dot(data0.astype(numpy.float64), data1.astype(numpy.float64))
+        ref32 = numpy.dot(data0, data1)
+        if (ref64 == ref32):
+            logger.warning("dot2: invalid tests as float32 provides the same result as float64")
+        evt = prg.test_dot2(self.queue, (1,), (1,), ones_d.data, numpy.int32(N), res_d.data)
+        evt.wait()
+        res = res_d.get().sum(dtype="float64")
+        self.assertEqual(ref64, res, "test_dot2")
diff --git a/src/silx/opencl/test/test_linalg.py b/src/silx/opencl/test/test_linalg.py
new file mode 100644
index 0000000..a997a36
--- /dev/null
+++ b/src/silx/opencl/test/test_linalg.py
@@ -0,0 +1,204 @@
+#!/usr/bin/env python
+# coding: utf-8
+# /*##########################################################################
+#
+# Copyright (c) 2016 European Synchrotron Radiation Facility
+#
+# Permission is hereby granted, free of charge, to any person obtaining a copy
+# of this software and associated documentation files (the "Software"), to deal
+# in the Software without restriction, including without limitation the rights
+# to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
+# copies of the Software, and to permit persons to whom the Software is
+# furnished to do so, subject to the following conditions:
+#
+# The above copyright notice and this permission notice shall be included in
+# all copies or substantial portions of the Software.
+#
+# THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+# IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+# FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
+# AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+# LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
+# OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN
+# THE SOFTWARE.
+#
+# ###########################################################################*/
+"""Test of the linalg module"""
+
+from __future__ import division, print_function
+
+__authors__ = ["Pierre paleo"]
+__license__ = "MIT"
+__copyright__ = "2013-2017 European Synchrotron Radiation Facility, Grenoble, France"
+__date__ = "01/08/2019"
+
+
+import time
+import logging
+import numpy as np
+import unittest
+try:
+    import mako
+except ImportError:
+    mako = None
+from ..common import ocl
+if ocl:
+    import pyopencl as cl
+    import pyopencl.array as parray
+    from .. import linalg
+from silx.test.utils import utilstest
+
+logger = logging.getLogger(__name__)
+try:
+    from scipy.ndimage.filters import laplace
+    _has_scipy = True
+except ImportError:
+    _has_scipy = False
+
+
+# TODO move this function in math or image ?
+def gradient(img):
+    '''
+    Compute the gradient of an image as a numpy array
+    Code from https://github.com/emmanuelle/tomo-tv/
+    '''
+    shape = [img.ndim, ] + list(img.shape)
+    gradient = np.zeros(shape, dtype=img.dtype)
+    slice_all = [0, slice(None, -1),]
+    for d in range(img.ndim):
+        gradient[tuple(slice_all)] = np.diff(img, axis=d)
+        slice_all[0] = d + 1
+        slice_all.insert(1, slice(None))
+    return gradient
+
+
+# TODO move this function in math or image ?
+def divergence(grad):
+    '''
+    Compute the divergence of a gradient
+    Code from https://github.com/emmanuelle/tomo-tv/
+    '''
+    res = np.zeros(grad.shape[1:])
+    for d in range(grad.shape[0]):
+        this_grad = np.rollaxis(grad[d], d)
+        this_res = np.rollaxis(res, d)
+        this_res[:-1] += this_grad[:-1]
+        this_res[1:-1] -= this_grad[:-2]
+        this_res[-1] -= this_grad[-2]
+    return res
+
+
+@unittest.skipUnless(ocl and mako, "PyOpenCl is missing")
+class TestLinAlg(unittest.TestCase):
+
+    def setUp(self):
+        if ocl is None:
+            return
+        self.getfiles()
+        self.la = linalg.LinAlg(self.image.shape)
+        self.allocate_arrays()
+
+    def allocate_arrays(self):
+        """
+        Allocate various types of arrays for the tests
+        """
+        # numpy images
+        self.grad = np.zeros(self.image.shape, dtype=np.complex64)
+        self.grad2 = np.zeros((2,) + self.image.shape, dtype=np.float32)
+        self.grad_ref = gradient(self.image)
+        self.div_ref = divergence(self.grad_ref)
+        self.image2 = np.zeros_like(self.image)
+        # Device images
+        self.gradient_parray = parray.empty(self.la.queue, self.image.shape, np.complex64)
+        self.gradient_parray.fill(0)
+        # we should be using cl.Buffer(self.la.ctx, cl.mem_flags.READ_WRITE, size=self.image.nbytes*2),
+        # but platforms not suporting openCL 1.2 have a problem with enqueue_fill_buffer,
+        # so we use the parray "fill" utility
+        self.gradient_buffer = self.gradient_parray.data
+        # Do the same for image
+        self.image_parray = parray.to_device(self.la.queue, self.image)
+        self.image_buffer = self.image_parray.data
+        # Refs
+        tmp = np.zeros(self.image.shape, dtype=np.complex64)
+        tmp.real = np.copy(self.grad_ref[0])
+        tmp.imag = np.copy(self.grad_ref[1])
+        self.grad_ref_parray = parray.to_device(self.la.queue, tmp)
+        self.grad_ref_buffer = self.grad_ref_parray.data
+
+    def tearDown(self):
+        self.image = None
+        self.image2 = None
+        self.grad = None
+        self.grad2 = None
+        self.grad_ref = None
+        self.div_ref = None
+        self.gradient_parray.data.release()
+        self.gradient_parray = None
+        self.gradient_buffer = None
+        self.image_parray.data.release()
+        self.image_parray = None
+        self.image_buffer = None
+        self.grad_ref_parray.data.release()
+        self.grad_ref_parray = None
+        self.grad_ref_buffer = None
+
+    def getfiles(self):
+        # load 512x512 MRI phantom - TODO include Lena or ascent once a .npz is available
+        self.image = np.load(utilstest.getfile("Brain512.npz"))["data"]
+
+    def compare(self, result, reference, abstol, name):
+        errmax = np.max(np.abs(result - reference))
+        logger.info("%s: Max error = %e" % (name, errmax))
+        self.assertTrue(errmax < abstol, str("%s: Max error is too high" % name))
+
+    @unittest.skipUnless(ocl and mako, "pyopencl is missing")
+    def test_gradient(self):
+        arrays = {
+            "numpy.ndarray": self.image,
+            "buffer": self.image_buffer,
+            "parray": self.image_parray
+        }
+        for desc, image in arrays.items():
+            # Test with dst on host (numpy.ndarray)
+            res = self.la.gradient(image, return_to_host=True)
+            self.compare(res, self.grad_ref, 1e-6, str("gradient[src=%s, dst=numpy.ndarray]" % desc))
+            # Test with dst on device (pyopencl.Buffer)
+            self.la.gradient(image, dst=self.gradient_buffer)
+            cl.enqueue_copy(self.la.queue, self.grad, self.gradient_buffer)
+            self.grad2[0] = self.grad.real
+            self.grad2[1] = self.grad.imag
+            self.compare(self.grad2, self.grad_ref, 1e-6, str("gradient[src=%s, dst=buffer]" % desc))
+            # Test with dst on device (pyopencl.Array)
+            self.la.gradient(image, dst=self.gradient_parray)
+            self.grad = self.gradient_parray.get()
+            self.grad2[0] = self.grad.real
+            self.grad2[1] = self.grad.imag
+            self.compare(self.grad2, self.grad_ref, 1e-6, str("gradient[src=%s, dst=parray]" % desc))
+
+    @unittest.skipUnless(ocl and mako, "pyopencl is missing")
+    def test_divergence(self):
+        arrays = {
+            "numpy.ndarray": self.grad_ref,
+            "buffer": self.grad_ref_buffer,
+            "parray": self.grad_ref_parray
+        }
+        for desc, grad in arrays.items():
+            # Test with dst on host (numpy.ndarray)
+            res = self.la.divergence(grad, return_to_host=True)
+            self.compare(res, self.div_ref, 1e-6, str("divergence[src=%s, dst=numpy.ndarray]" % desc))
+            # Test with dst on device (pyopencl.Buffer)
+            self.la.divergence(grad, dst=self.image_buffer)
+            cl.enqueue_copy(self.la.queue, self.image2, self.image_buffer)
+            self.compare(self.image2, self.div_ref, 1e-6, str("divergence[src=%s, dst=buffer]" % desc))
+            # Test with dst on device (pyopencl.Array)
+            self.la.divergence(grad, dst=self.image_parray)
+            self.image2 = self.image_parray.get()
+            self.compare(self.image2, self.div_ref, 1e-6, str("divergence[src=%s, dst=parray]" % desc))
+
+    @unittest.skipUnless(ocl and mako and _has_scipy, "pyopencl and/or scipy is missing")
+    def test_laplacian(self):
+        laplacian_ref = laplace(self.image)
+        # Laplacian = div(grad)
+        self.la.gradient(self.image)
+        laplacian_ocl = self.la.divergence(self.la.d_gradient, return_to_host=True)
+        self.compare(laplacian_ocl, laplacian_ref, 1e-6, "laplacian")
diff --git a/src/silx/opencl/test/test_medfilt.py b/src/silx/opencl/test/test_medfilt.py
new file mode 100644
index 0000000..339e0f2
--- /dev/null
+++ b/src/silx/opencl/test/test_medfilt.py
@@ -0,0 +1,162 @@
+#!/usr/bin/env python
+# -*- coding: utf-8 -*-
+#
+#    Project: Median filter of images + OpenCL
+#             https://github.com/silx-kit/silx
+#
+# Permission is hereby granted, free of charge, to any person
+# obtaining a copy of this software and associated documentation
+# files (the "Software"), to deal in the Software without
+# restriction, including without limitation the rights to use,
+# copy, modify, merge, publish, distribute, sublicense, and/or sell
+# copies of the Software, and to permit persons to whom the
+# Software is furnished to do so, subject to the following
+# conditions:
+#
+# The above copyright notice and this permission notice shall be
+# included in all copies or substantial portions of the Software.
+#
+# THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
+# EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES
+# OF MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
+# NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT
+# HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY,
+# WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
+# FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR
+# OTHER DEALINGS IN THE SOFTWARE.
+
+"""
+Simple test of the median filter
+"""
+
+from __future__ import division, print_function
+
+__authors__ = ["Jérôme Kieffer"]
+__contact__ = "jerome.kieffer@esrf.eu"
+__license__ = "MIT"
+__copyright__ = "2013-2017 European Synchrotron Radiation Facility, Grenoble, France"
+__date__ = "05/07/2018"
+
+
+import sys
+import time
+import logging
+import numpy
+import unittest
+from collections import namedtuple
+try:
+    import mako
+except ImportError:
+    mako = None
+from ..common import ocl
+if ocl:
+    import pyopencl
+    import pyopencl.array
+    from .. import medfilt
+
+logger = logging.getLogger(__name__)
+
+Result = namedtuple("Result", ["size", "error", "sp_time", "oc_time"])
+
+try:
+    from scipy.misc import ascent
+except:
+    def ascent():
+        """Dummy image from random data"""
+        return numpy.random.random((512, 512))
+try:
+    from scipy.ndimage import filters
+    median_filter = filters.median_filter
+    HAS_SCIPY = True
+except:
+    HAS_SCIPY = False
+    from silx.math import medfilt2d as median_filter
+
+@unittest.skipUnless(ocl and mako, "PyOpenCl is missing")
+class TestMedianFilter(unittest.TestCase):
+
+    def setUp(self):
+        if ocl is None:
+            return
+        self.data = ascent().astype(numpy.float32)
+        self.medianfilter = medfilt.MedianFilter2D(self.data.shape, devicetype="gpu")
+
+    def tearDown(self):
+        self.data = None
+        self.medianfilter = None
+
+    def measure(self, size):
+        "Common measurement of accuracy and timings"
+        t0 = time.time()
+        if HAS_SCIPY:
+            ref = median_filter(self.data, size, mode="nearest")
+        else:
+            ref = median_filter(self.data, size)
+        t1 = time.time()
+        try:
+            got = self.medianfilter.medfilt2d(self.data, size)
+        except RuntimeError as msg:
+            logger.error(msg)
+            return
+        t2 = time.time()
+        delta = abs(got - ref).max()
+        return Result(size, delta, t1 - t0, t2 - t1)
+
+    @unittest.skipUnless(ocl and mako, "pyopencl is missing")
+    def test_medfilt(self):
+        """
+        tests the median filter kernel
+        """
+        r = self.measure(size=11)
+        if r is None:
+            logger.info("test_medfilt: size: %s: skipped")
+        else:
+            logger.info("test_medfilt: size: %s error %s, t_ref: %.3fs, t_ocl: %.3fs" % r)
+            self.assertEqual(r.error, 0, 'Results are correct')
+
+    def benchmark(self, limit=36):
+        "Run some benchmarking"
+        try:
+            import PyQt5
+            from ...gui.matplotlib import pylab
+            from ...gui.utils import update_fig
+        except:
+            pylab = None
+
+            def update_fig(*ag, **kwarg):
+                pass
+
+        fig = pylab.figure()
+        fig.suptitle("Median filter of an image 512x512")
+        sp = fig.add_subplot(1, 1, 1)
+        sp.set_title(self.medianfilter.ctx.devices[0].name)
+        sp.set_xlabel("Window width & height")
+        sp.set_ylabel("Execution time (s)")
+        sp.set_xlim(2, limit + 1)
+        sp.set_ylim(0, 4)
+        data_size = []
+        data_scipy = []
+        data_opencl = []
+        plot_sp = sp.plot(data_size, data_scipy, "-or", label="scipy")[0]
+        plot_opencl = sp.plot(data_size, data_opencl, "-ob", label="opencl")[0]
+        sp.legend(loc=2)
+        fig.show()
+        update_fig(fig)
+        for s in range(3, limit, 2):
+            r = self.measure(s)
+            print(r)
+            if r.error == 0:
+                data_size.append(s)
+                data_scipy.append(r.sp_time)
+                data_opencl.append(r.oc_time)
+                plot_sp.set_data(data_size, data_scipy)
+                plot_opencl.set_data(data_size, data_opencl)
+                update_fig(fig)
+        fig.show()
+        input()
+
+
+def benchmark():
+    testSuite = unittest.TestSuite()
+    testSuite.addTest(TestMedianFilter("benchmark"))
+    return testSuite
diff --git a/src/silx/opencl/test/test_projection.py b/src/silx/opencl/test/test_projection.py
new file mode 100644
index 0000000..13db5f4
--- /dev/null
+++ b/src/silx/opencl/test/test_projection.py
@@ -0,0 +1,121 @@
+#!/usr/bin/env python
+# coding: utf-8
+# /*##########################################################################
+#
+# Copyright (c) 2016 European Synchrotron Radiation Facility
+#
+# Permission is hereby granted, free of charge, to any person obtaining a copy
+# of this software and associated documentation files (the "Software"), to deal
+# in the Software without restriction, including without limitation the rights
+# to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
+# copies of the Software, and to permit persons to whom the Software is
+# furnished to do so, subject to the following conditions:
+#
+# The above copyright notice and this permission notice shall be included in
+# all copies or substantial portions of the Software.
+#
+# THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+# IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+# FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
+# AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+# LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
+# OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN
+# THE SOFTWARE.
+#
+# ###########################################################################*/
+"""Test of the forward projection module"""
+
+from __future__ import division, print_function
+
+__authors__ = ["Pierre paleo"]
+__license__ = "MIT"
+__copyright__ = "2013-2017 European Synchrotron Radiation Facility, Grenoble, France"
+__date__ = "19/01/2018"
+
+
+import time
+import logging
+import numpy as np
+import unittest
+try:
+    import mako
+except ImportError:
+    mako = None
+from ..common import ocl
+if ocl:
+    from .. import projection
+from silx.test.utils import utilstest
+
+logger = logging.getLogger(__name__)
+
+
+@unittest.skipUnless(ocl and mako, "PyOpenCl is missing")
+class TestProj(unittest.TestCase):
+
+    def setUp(self):
+        if ocl is None:
+            return
+        # ~ if sys.platform.startswith('darwin'):
+            # ~ self.skipTest("Projection is not implemented on CPU for OS X yet")
+        self.getfiles()
+        n_angles = self.sino.shape[0]
+        self.proj = projection.Projection(self.phantom.shape, n_angles)
+        if self.proj.compiletime_workgroup_size < 16 * 16:
+            self.skipTest("Current implementation of OpenCL projection is not supported on this platform yet")
+
+    def tearDown(self):
+        self.phantom = None
+        self.sino = None
+        self.proj = None
+
+    def getfiles(self):
+        # load 512x512 MRI phantom
+        self.phantom = np.load(utilstest.getfile("Brain512.npz"))["data"]
+        # load sinogram computed with PyHST
+        self.sino = np.load(utilstest.getfile("sino500_pyhst.npz"))["data"]
+
+    def measure(self):
+        "Common measurement of timings"
+        t1 = time.time()
+        try:
+            result = self.proj.projection(self.phantom)
+        except RuntimeError as msg:
+            logger.error(msg)
+            return
+        t2 = time.time()
+        return t2 - t1, result
+
+    def compare(self, res):
+        """
+        Compare a result with the reference reconstruction.
+        Only the valid reconstruction zone (inscribed circle) is taken into account
+        """
+        # Compare with the original phantom.
+        # TODO: compare a standard projection
+        ref = self.sino
+        return np.max(np.abs(res - ref))
+
+    @unittest.skipUnless(ocl and mako, "pyopencl is missing")
+    def test_proj(self):
+        """
+        tests Projection
+        """
+        # Test single reconstruction
+        # --------------------------
+        t, res = self.measure()
+        if t is None:
+            logger.info("test_proj: skipped")
+        else:
+            logger.info("test_proj: time = %.3fs" % t)
+            err = self.compare(res)
+            msg = str("Max error = %e" % err)
+            logger.info(msg)
+            # Interpolation differs at some lines, giving relative error of 10/50000
+            self.assertTrue(err < 20., "Max error is too high")
+        # Test multiple reconstructions
+        # -----------------------------
+        res0 = np.copy(res)
+        for i in range(10):
+            res = self.proj.projection(self.phantom)
+            errmax = np.max(np.abs(res - res0))
+            self.assertTrue(errmax < 1.e-6, "Max error is too high")
diff --git a/src/silx/opencl/test/test_sparse.py b/src/silx/opencl/test/test_sparse.py
new file mode 100644
index 0000000..1d26b36
--- /dev/null
+++ b/src/silx/opencl/test/test_sparse.py
@@ -0,0 +1,188 @@
+#!/usr/bin/env python
+# coding: utf-8
+# /*##########################################################################
+#
+# Copyright (c) 2018-2019 European Synchrotron Radiation Facility
+#
+# Permission is hereby granted, free of charge, to any person obtaining a copy
+# of this software and associated documentation files (the "Software"), to deal
+# in the Software without restriction, including without limitation the rights
+# to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
+# copies of the Software, and to permit persons to whom the Software is
+# furnished to do so, subject to the following conditions:
+#
+# The above copyright notice and this permission notice shall be included in
+# all copies or substantial portions of the Software.
+#
+# THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+# IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+# FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
+# AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+# LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
+# OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN
+# THE SOFTWARE.
+#
+# ###########################################################################*/
+"""Test of the sparse module"""
+
+import numpy as np
+import unittest
+import logging
+from itertools import product
+from ..common import ocl
+if ocl:
+    import pyopencl.array as parray
+    from silx.opencl.sparse import CSR
+try:
+    import scipy.sparse as sp
+except ImportError:
+    sp = None
+logger = logging.getLogger(__name__)
+
+
+
+def generate_sparse_random_data(
+    shape=(1000,),
+    data_min=0, data_max=100,
+    density=0.1,
+    use_only_integers=True,
+    dtype="f"):
+    """
+    Generate random sparse data where.
+
+    Parameters
+    ------------
+    shape: tuple
+        Output data shape.
+    data_min: int or float
+        Minimum value of data
+    data_max: int or float
+        Maximum value of data
+    density: float
+        Density of non-zero elements in the output data.
+        Low value of density mean low number of non-zero elements.
+    use_only_integers: bool
+        If set to True, the output data items will be primarily integers,
+        possibly casted to float if dtype is a floating-point type.
+        This can be used for ease of debugging.
+    dtype: str or numpy.dtype
+        Output data type
+    """
+    mask = np.random.binomial(1, density, size=shape)
+    if use_only_integers:
+        d = np.random.randint(data_min, high=data_max, size=shape)
+    else:
+        d = data_min + (data_max - data_min) * np.random.rand(*shape)
+    return (d * mask).astype(dtype)
+
+
+
+@unittest.skipUnless(ocl and sp, "PyOpenCl/scipy is missing")
+class TestCSR(unittest.TestCase):
+    """Test CSR format"""
+
+    def setUp(self):
+        # Test possible configurations
+        input_on_device = [False, True]
+        output_on_device = [False, True]
+        dtypes = [np.float32, np.int32, np.uint16]
+        self._test_configs = list(product(input_on_device, output_on_device, dtypes))
+
+
+    def compute_ref_sparsification(self, array):
+        ref_sparse = sp.csr_matrix(array)
+        return ref_sparse
+
+
+    def test_sparsification(self):
+        for input_on_device, output_on_device, dtype in self._test_configs:
+            self._test_sparsification(input_on_device, output_on_device, dtype)
+
+
+    def _test_sparsification(self, input_on_device, output_on_device, dtype):
+        current_config = "input on device: %s, output on device: %s, dtype: %s" % (
+            str(input_on_device), str(output_on_device), str(dtype)
+        )
+        logger.debug("CSR: %s" % current_config)
+        # Generate data and reference CSR
+        array = generate_sparse_random_data(shape=(512, 511), dtype=dtype)
+        ref_sparse = self.compute_ref_sparsification(array)
+        # Sparsify on device
+        csr = CSR(array.shape, dtype=dtype)
+        if input_on_device:
+            # The array has to be flattened
+            arr = parray.to_device(csr.queue, array.ravel())
+        else:
+            arr = array
+        if output_on_device:
+            d_data = parray.empty_like(csr.data)
+            d_indices = parray.empty_like(csr.indices)
+            d_indptr = parray.empty_like(csr.indptr)
+            d_data.fill(0)
+            d_indices.fill(0)
+            d_indptr.fill(0)
+            output = (d_data, d_indices, d_indptr)
+        else:
+            output = None
+        data, indices, indptr = csr.sparsify(arr, output=output)
+        if output_on_device:
+            data = data.get()
+            indices = indices.get()
+            indptr = indptr.get()
+        # Compare
+        nnz = ref_sparse.nnz
+        self.assertTrue(
+            np.allclose(data[:nnz], ref_sparse.data),
+            "something wrong with sparsified data (%s)"
+            % current_config
+        )
+        self.assertTrue(
+            np.allclose(indices[:nnz], ref_sparse.indices),
+            "something wrong with sparsified indices (%s)"
+            % current_config
+        )
+        self.assertTrue(
+            np.allclose(indptr, ref_sparse.indptr),
+            "something wrong with sparsified indices pointers (indptr) (%s)"
+            % current_config
+        )
+
+
+    def test_desparsification(self):
+        for input_on_device, output_on_device, dtype in self._test_configs:
+            self._test_desparsification(input_on_device, output_on_device, dtype)
+
+
+    def _test_desparsification(self, input_on_device, output_on_device, dtype):
+        current_config = "input on device: %s, output on device: %s, dtype: %s" % (
+            str(input_on_device), str(output_on_device), str(dtype)
+        )
+        logger.debug("CSR: %s" % current_config)
+        # Generate data and reference CSR
+        array = generate_sparse_random_data(shape=(512, 511), dtype=dtype)
+        ref_sparse = self.compute_ref_sparsification(array)
+        # De-sparsify on device
+        csr = CSR(array.shape, dtype=dtype, max_nnz=ref_sparse.nnz)
+        if input_on_device:
+            data = parray.to_device(csr.queue, ref_sparse.data)
+            indices = parray.to_device(csr.queue, ref_sparse.indices)
+            indptr = parray.to_device(csr.queue, ref_sparse.indptr)
+        else:
+            data = ref_sparse.data
+            indices = ref_sparse.indices
+            indptr = ref_sparse.indptr
+        if output_on_device:
+            d_arr = parray.empty_like(csr.array)
+            d_arr.fill(0)
+            output = d_arr
+        else:
+            output = None
+        arr = csr.densify(data, indices, indptr, output=output)
+        if output_on_device:
+            arr = arr.get()
+        # Compare
+        self.assertTrue(
+            np.allclose(arr.reshape(array.shape), array),
+            "something wrong with densified data (%s)"
+            % current_config
+        )
diff --git a/src/silx/opencl/test/test_stats.py b/src/silx/opencl/test/test_stats.py
new file mode 100644
index 0000000..859271d
--- /dev/null
+++ b/src/silx/opencl/test/test_stats.py
@@ -0,0 +1,106 @@
+#!/usr/bin/env python
+# -*- coding: utf-8 -*-
+#
+#    Project: Sift implementation in Python + OpenCL
+#             https://github.com/silx-kit/silx
+#
+# Permission is hereby granted, free of charge, to any person
+# obtaining a copy of this software and associated documentation
+# files (the "Software"), to deal in the Software without
+# restriction, including without limitation the rights to use,
+# copy, modify, merge, publish, distribute, sublicense, and/or sell
+# copies of the Software, and to permit persons to whom the
+# Software is furnished to do so, subject to the following
+# conditions:
+#
+# The above copyright notice and this permission notice shall be
+# included in all copies or substantial portions of the Software.
+#
+# THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
+# EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES
+# OF MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
+# NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT
+# HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY,
+# WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
+# FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR
+# OTHER DEALINGS IN THE SOFTWARE.
+
+"""
+Simple test of an addition
+"""
+__authors__ = ["Henri Payno, Jérôme Kieffer"]
+__contact__ = "jerome.kieffer@esrf.eu"
+__license__ = "MIT"
+__copyright__ = "2013 European Synchrotron Radiation Facility, Grenoble, France"
+__date__ = "19/05/2021"
+
+import logging
+import time
+import numpy
+
+import unittest
+from ..common import ocl
+if ocl:
+    import pyopencl
+    import pyopencl.array
+    from ..statistics import StatResults, Statistics
+from ..utils import get_opencl_code
+logger = logging.getLogger(__name__)
+
+
+@unittest.skipUnless(ocl, "PyOpenCl is missing")
+class TestStatistics(unittest.TestCase):
+
+    @classmethod
+    def setUpClass(cls):
+        cls.size = 1 << 20  # 1 million elements
+        cls.data = numpy.random.randint(0, 65000, cls.size).astype("uint16")
+        fdata = cls.data.astype("float64")
+        t0 = time.perf_counter()
+        std = fdata.std()
+        cls.ref = StatResults(fdata.min(), fdata.max(), float(fdata.size),
+                              fdata.sum(), fdata.mean(), std ** 2,
+                              std)
+        t1 = time.perf_counter()
+        cls.ref_time = t1 - t0
+
+    @classmethod
+    def tearDownClass(cls):
+        cls.size = cls.ref = cls.data = cls.ref_time = None
+
+    @classmethod
+    def validate(cls, res):
+        return (
+            (res.min == cls.ref.min) and
+            (res.max == cls.ref.max) and
+            (res.cnt == cls.ref.cnt) and
+            abs(res.mean - cls.ref.mean) < 0.01 and
+            abs(res.std - cls.ref.std) < 0.1)
+
+    def test_measurement(self):
+        """
+        tests that all devices are working properly ...
+        """
+        logger.info("Reference results: %s", self.ref)
+        for pid, platform in enumerate(ocl.platforms):
+            for did, device in enumerate(platform.devices):
+                try:
+                    s = Statistics(template=self.data, platformid=pid, deviceid=did)
+                except Exception as err:
+                    failed_init = True
+                    res = StatResults(0, 0, 0, 0, 0, 0, 0)
+                    print(err)
+                else:
+                    failed_init = False
+                    for comp in ("single", "double", "comp"):
+                        t0 = time.perf_counter()
+                        res = s(self.data, comp=comp)
+                        t1 = time.perf_counter()
+                        logger.info("Runtime on %s/%s : %.3fms x%.1f", platform, device, 1000 * (t1 - t0), self.ref_time / (t1 - t0))
+
+                        if failed_init or not self.validate(res):
+                            logger.error("failed_init %s; Computation modes %s", failed_init, comp)
+                            logger.error("Failed on platform %s device %s", platform, device)
+                            logger.error("Reference results: %s", self.ref)
+                            logger.error("Faulty results: %s", res)
+                            self.assertTrue(False, f"Stat calculation failed on {platform},{device} in mode {comp}")
author	Picca Frédéric-Emmanuel <picca@debian.org>	2022-02-02 14:19:58 +0100
committer	Picca Frédéric-Emmanuel <picca@debian.org>	2022-02-02 14:19:58 +0100
commit	4e774db12d5ebe7a20eded6dd434a289e27999e5 (patch)
tree	a9822974ba45196f1e3740995ab157d6eb214a04 /src/silx/opencl/test
parent	d3194b1a9c4404ba93afac43d97172ab24c57098 (diff)