New upstream version 0.15.2+dfsg

author: Picca Frédéric-Emmanuel <picca@debian.org> 2021-09-07 14:39:36 +0200
committer: Picca Frédéric-Emmanuel <picca@debian.org> 2021-09-07 14:39:36 +0200
commit: d3194b1a9c4404ba93afac43d97172ab24c57098 (patch)
tree: a1604130e1401dc1cbd084518ed72869dc92b86f /silx/opencl
parent: b3bea947efa55d2c0f198b6c6795b3177be27f45 (diff)
8 files changed, 363 insertions, 60 deletions
diff --git a/silx/opencl/codec/test/test_byte_offset.py b/silx/opencl/codec/test/test_byte_offset.py
index e523b0f..d1482ce 100644
--- a/silx/opencl/codec/test/test_byte_offset.py
+++ b/silx/opencl/codec/test/test_byte_offset.py
@@ -37,7 +37,7 @@ __authors__ = ["Jérôme Kieffer"]
 __contact__ = "jerome.kieffer@esrf.eu"
 __license__ = "MIT"
 __copyright__ = "2013 European Synchrotron Radiation Facility, Grenoble, France"
-__date__ = "10/11/2017"
+__date__ = "02/03/2021"
 
 import sys
 import time
@@ -78,8 +78,8 @@ class TestByteOffset(unittest.TestCase):
         tests the byte offset decompression on GPU
         """
         ref, raw = self._create_test_data(shape=(91, 97), nexcept=229)
-        #ref, raw = self._create_test_data(shape=(7, 9), nexcept=0)
-        
+        # ref, raw = self._create_test_data(shape=(7, 9), nexcept=0)
+
         size = numpy.prod(ref.shape)
 
         try:
@@ -104,8 +104,8 @@ class TestByteOffset(unittest.TestCase):
                      1000.0 * (t1 - t0),
                      1000.0 * (t2 - t1))
         bo.log_profile()
-        #print(ref)
-        #print(res_cl.get())
+        # print(ref)
+        # print(res_cl.get())
         self.assertEqual(delta_cy, 0, "Checks fabio works")
         self.assertEqual(delta_cl, 0, "Checks opencl works")
 
@@ -119,7 +119,7 @@ class TestByteOffset(unittest.TestCase):
         ref, raw = self._create_test_data(shape=shape, nexcept=0, lam=100)
 
         try:
-            bo = byte_offset.ByteOffset(len(raw), size, profile=False)
+            bo = byte_offset.ByteOffset(len(raw), size, profile=True)
         except (RuntimeError, pyopencl.RuntimeError) as err:
             logger.warning(err)
             if sys.platform == "darwin":
@@ -155,6 +155,7 @@ class TestByteOffset(unittest.TestCase):
             logger.debug("Global execution time: fabio %.3fms, OpenCL: %.3fms.",
                          1000.0 * (t1 - t0),
                          1000.0 * (t2 - t1))
+        bo.log_profile(stats=True)
 
     def test_encode(self):
         """Test byte offset compression"""
diff --git a/silx/opencl/common.py b/silx/opencl/common.py
index 002c15d..b66b7b7 100644
--- a/silx/opencl/common.py
+++ b/silx/opencl/common.py
@@ -4,7 +4,7 @@
 #    Project: S I L X project
 #             https://github.com/silx-kit/silx
 #
-#    Copyright (C) 2012-2018 European Synchrotron Radiation Facility, Grenoble, France
+#    Copyright (C) 2012-2021 European Synchrotron Radiation Facility, Grenoble, France
 #
 #    Principal author:       Jérôme Kieffer (Jerome.Kieffer@ESRF.eu)
 #
@@ -55,7 +55,7 @@ else:
     try:
         import pyopencl
     except ImportError:
-        logger.warning("Unable to import pyOpenCl. Please install it from: http://pypi.python.org/pypi/pyopencl")
+        logger.warning("Unable to import pyOpenCl. Please install it from: https://pypi.org/project/pyopencl")
         pyopencl = None
     else:
         try:
diff --git a/silx/opencl/processing.py b/silx/opencl/processing.py
index 470b141..8b81f7f 100644
--- a/silx/opencl/processing.py
+++ b/silx/opencl/processing.py
@@ -34,19 +34,18 @@
 Common OpenCL abstract base classe for different processing
 """
 
-from __future__ import absolute_import, print_function, division
-
 __author__ = "Jerome Kieffer"
 __contact__ = "Jerome.Kieffer@ESRF.eu"
 __license__ = "MIT"
 __copyright__ = "European Synchrotron Radiation Facility, Grenoble, France"
-__date__ = "04/12/2020"
+__date__ = "02/03/2021"
 __status__ = "stable"
 
+import sys
 import os
 import logging
 import gc
-from collections import namedtuple
+from collections import namedtuple, OrderedDict
 import numpy
 import threading
 from .common import ocl, pyopencl, release_cl_buffers, query_kernel_info, allocate_texture, check_textures_availability
@@ -342,20 +341,45 @@ class OpenclProcessing(object):
         ev = pyopencl.enqueue_copy(*copy_args, **copy_kwargs)
         self.profile_add(ev, "Transfer to texture")
 
-    def log_profile(self):
+    def log_profile(self, stats=False):
         """If we are in profiling mode, prints out all timing for every single OpenCL call
+        
+        :param stats: if True, prints the statistics on each kernel instead of all execution timings
+        :return: list of lines to print
         """
-        t = 0.0
-        out = ["", "Profiling info for OpenCL %s" % self.__class__.__name__]
+        total_time = 0.0
+        out = [""]
+        if stats:
+            stats = OrderedDict()
+            out.append(f"OpenCL kernel profiling statistics in milliseconds for: {self.__class__.__name__}")
+            out.append(f"{'Kernel name':>50} (count):      min   median      max     mean      std")
+        else:
+            stats = None
+            out.append(f"Profiling info for OpenCL: {self.__class__.__name__}")
+
         if self.profile:
             for e in self.events:
                 if "__len__" in dir(e) and len(e) >= 2:
-                    et = 1e-6 * (e[1].profile.end - e[1].profile.start)
-                    out.append("%50s:\t%.3fms" % (e[0], et))
-                    t += et
+                    name = e[0]
+                    pr = e[1].profile
+                    t0 = pr.start
+                    t1 = pr.end
+                    et = 1e-6 * (t1 - t0)
+                    total_time += et
+                    if stats is None:
+                        out.append(f"{name:>50}        : {et:.3f}ms")
+                    else:
+                        if name in stats:
+                            stats[name].append(et)
+                        else:
+                            stats[name] = [et]
+            if stats is not None:
+                for k, v in stats.items():
+                    n = numpy.array(v)
+                    out.append(f"{k:>50} ({len(v):5}): {n.min():8.3f} {numpy.median(n):8.3f} {n.max():8.3f} {n.mean():8.3f} {n.std():8.3f}")
+            out.append("_" * 80)
+            out.append(f"{'Total OpenCL execution time':>50}        : {total_time:.3f}ms")
 
-        out.append("_" * 80)
-        out.append("%50s:\t%.3fms" % ("Total execution time", t))
         logger.info(os.linesep.join(out))
         return out
 
diff --git a/silx/opencl/statistics.py b/silx/opencl/statistics.py
index bd8e7b7..a96ee33 100644
--- a/silx/opencl/statistics.py
+++ b/silx/opencl/statistics.py
@@ -29,13 +29,10 @@
 large data where numpy is not very efficient.
 """
 
-from __future__ import absolute_import, print_function, with_statement, division
-
-
 __author__ = "Jerome Kieffer"
 __license__ = "MIT"
-__date__ = "11/01/2019"
-__copyright__ = "2012-2017, ESRF, Grenoble"
+__date__ = "19/05/2021"
+__copyright__ = "2012-2019, ESRF, Grenoble"
 __contact__ = "jerome.kieffer@esrf.fr"
 
 import logging
@@ -128,7 +125,7 @@ class Statistics(OpenclProcessing):
                                          self.kernel_files,
                                          "-D NIMAGE=%i" % self.size)
         compiler_options = self.get_compiler_options(x87_volatile=True)
-        src = concatenate_cl_kernel(("kahan.cl", "statistics.cl"))
+        src = concatenate_cl_kernel(("doubleword.cl", "statistics.cl"))
         self.reduction_comp = ReductionKernel(self.ctx,
                                               dtype_out=float8,
                                               neutral=zero8,
@@ -146,6 +143,19 @@ class Statistics(OpenclProcessing):
                                                 preamble=src,
                                                 options=compiler_options)
 
+        if "cl_khr_fp64" in self.device.extensions:
+            self.reduction_double = ReductionKernel(self.ctx,
+                                                    dtype_out=float8,
+                                                    neutral=zero8,
+                                                    map_expr="map_statistics(data, i)",
+                                                    reduce_expr="reduce_statistics_double(a,b)",
+                                                    arguments="__global float *data",
+                                                    preamble=src,
+                                                    options=compiler_options)
+        else:
+            logger.info("Device %s does not support double-precision arithmetics, fall-back on compensated one", self.device)
+            self.reduction_double = self.reduction_comp
+
     def send_buffer(self, data, dest):
         """
         Send a numpy array to the device, including the cast on the device if
@@ -154,7 +164,7 @@ class Statistics(OpenclProcessing):
         :param numpy.ndarray data: numpy array with data
         :param dest: name of the buffer as registered in the class
         """
-
+        logger.info("send data to %s", dest)
         dest_type = numpy.dtype([i.dtype for i in self.buffers if i.name == dest][0])
         events = []
         if (data.dtype == dest_type) or (data.dtype.itemsize > dest_type.itemsize):
@@ -173,8 +183,8 @@ class Statistics(OpenclProcessing):
                                    self.cl_mem["raw"].data,
                                    self.cl_mem[dest].data)
             events += [
-                EventDescription("copy H->D %s" % dest, copy_image),
-                EventDescription("cast to float", cast_to_float)
+                EventDescription("copy H->D raw", copy_image),
+                EventDescription(f"cast to float {dest}", cast_to_float)
             ]
         if self.profile:
             self.events += events
@@ -193,16 +203,24 @@ class Statistics(OpenclProcessing):
         size = data.size
         assert size <= self.size, "size is OK"
         events = []
+        if comp is True:
+            comp = "comp"
+        elif comp is False:
+            comp = "single"
+        else:
+            comp = comp.lower()
         with self.sem:
             self.send_buffer(data, "converted")
-            if comp:
-                reduction = self.reduction_comp
-            else:
+            if comp in ("single", "fp32", "float32"):
                 reduction = self.reduction_simple
+            elif comp in ("double", "fp64", "float64"):
+                reduction = self.reduction_double
+            else:
+                reduction = self.reduction_comp
             res_d, evt = reduction(self.cl_mem["converted"][:self.size],
                                    queue=self.queue,
                                    return_event=True)
-            events.append(EventDescription("statistical reduction %s" % ("comp"if comp else "simple"), evt))
+            events.append(EventDescription(f"statistical reduction {comp}", evt))
             if self.profile:
                 self.events += events
             res_h = res_d.get()
diff --git a/silx/opencl/test/__init__.py b/silx/opencl/test/__init__.py
index 2e90e66..928dbaf 100644
--- a/silx/opencl/test/__init__.py
+++ b/silx/opencl/test/__init__.py
@@ -24,7 +24,7 @@
 
 __authors__ = ["J. Kieffer"]
 __license__ = "MIT"
-__date__ = "11/01/2019"
+__date__ = "17/05/2021"
 
 import os
 import unittest
@@ -37,6 +37,7 @@ from . import test_array_utils
 from ..codec import test as test_codec
 from . import test_image
 from . import test_kahan
+from . import test_doubleword
 from . import test_stats
 from . import test_convolution
 from . import test_sparse
@@ -53,6 +54,7 @@ def suite():
     test_suite.addTests(test_codec.suite())
     test_suite.addTests(test_image.suite())
     test_suite.addTests(test_kahan.suite())
+    test_suite.addTests(test_doubleword.suite())
     test_suite.addTests(test_stats.suite())
     test_suite.addTests(test_convolution.suite())
     test_suite.addTests(test_sparse.suite())
diff --git a/silx/opencl/test/test_doubleword.py b/silx/opencl/test/test_doubleword.py
new file mode 100644
index 0000000..ca947e0
--- /dev/null
+++ b/silx/opencl/test/test_doubleword.py
@@ -0,0 +1,258 @@
+#!/usr/bin/env python
+# coding: utf-8
+#
+#    Project: The silx project
+#             https://github.com/silx-kit/silx
+#
+#    Copyright (C) 2021-2021 European Synchrotron Radiation Facility, Grenoble, France
+#
+#    Principal author:       Jérôme Kieffer (Jerome.Kieffer@ESRF.eu)
+#
+# Permission is hereby granted, free of charge, to any person obtaining a copy
+# of this software and associated documentation files (the "Software"), to deal
+# in the Software without restriction, including without limitation the rights
+# to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
+# copies of the Software, and to permit persons to whom the Software is
+# furnished to do so, subject to the following conditions:
+#
+# The above copyright notice and this permission notice shall be included in
+# all copies or substantial portions of the Software.
+#
+# THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+# IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+# FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
+# AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+# LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
+# OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN
+# THE SOFTWARE.
+
+"test suite for OpenCL code"
+
+__author__ = "Jérôme Kieffer"
+__contact__ = "Jerome.Kieffer@ESRF.eu"
+__license__ = "MIT"
+__copyright__ = "European Synchrotron Radiation Facility, Grenoble, France"
+__date__ = "31/05/2021"
+
+import unittest
+import numpy
+import logging
+import platform
+
+logger = logging.getLogger(__name__)
+try:
+    import pyopencl
+except ImportError as error:
+    logger.warning("OpenCL module (pyopencl) is not present, skip tests. %s.", error)
+    pyopencl = None
+
+from .. import ocl
+if ocl is not None:
+    from ..utils import read_cl_file
+    from .. import pyopencl
+    import pyopencl.array
+    from pyopencl.elementwise import ElementwiseKernel
+from ...test.utils import test_options
+
+EPS32 = numpy.finfo("float32").eps
+EPS64 = numpy.finfo("float64").eps
+
+
+class TestDoubleWord(unittest.TestCase):
+    """
+    Test the kernels for compensated math in OpenCL
+    """
+
+    @classmethod
+    def setUpClass(cls):
+        if not test_options.WITH_OPENCL_TEST:
+            raise unittest.SkipTest("User request to skip OpenCL tests")
+        if pyopencl is None or ocl is None:
+            raise unittest.SkipTest("OpenCL module (pyopencl) is not present or no device available")
+
+        cls.ctx = ocl.create_context(devicetype="GPU")
+        cls.queue = pyopencl.CommandQueue(cls.ctx, properties=pyopencl.command_queue_properties.PROFILING_ENABLE)
+
+        # this is running 32 bits OpenCL woth POCL
+        if (platform.machine() in ("i386", "i686", "x86_64") and (tuple.__itemsize__ == 4) and
+                cls.ctx.devices[0].platform.name == 'Portable Computing Language'):
+            cls.args = "-DX87_VOLATILE=volatile"
+        else:
+            cls.args = ""
+        size = 1024
+        cls.a = 1.0 + numpy.random.random(size)
+        cls.b = 1.0 + numpy.random.random(size)
+        cls.ah = cls.a.astype(numpy.float32)
+        cls.bh = cls.b.astype(numpy.float32)
+        cls.al = (cls.a - cls.ah).astype(numpy.float32)
+        cls.bl = (cls.b - cls.bh).astype(numpy.float32)
+        cls.doubleword = read_cl_file("doubleword.cl")
+
+    @classmethod
+    def tearDownClass(cls):
+        cls.queue = None
+        cls.ctx = None
+        cls.a = cls.al = cls.ah = None
+        cls.b = cls.bl = cls.bh = None
+        cls.doubleword = None
+
+    def test_fast_sum2(self):
+        test_kernel = ElementwiseKernel(self.ctx,
+                      "float *a, float *b, float *res_h, float *res_l",
+                      "float2 tmp = fast_fp_plus_fp(a[i], b[i]); res_h[i] = tmp.s0; res_l[i] = tmp.s1",
+                      preamble=self.doubleword)
+        a_g = pyopencl.array.to_device(self.queue, self.ah)
+        b_g = pyopencl.array.to_device(self.queue, self.bl)
+        res_l = pyopencl.array.empty_like(a_g)
+        res_h = pyopencl.array.empty_like(a_g)
+        test_kernel(a_g, b_g, res_h, res_l)
+        self.assertEqual(abs(self.ah + self.bl - res_h.get()).max(), 0, "Major matches")
+        self.assertGreater(abs(self.ah.astype(numpy.float64) + self.bl - res_h.get()).max(), 0, "Exact mismatches")
+        self.assertEqual(abs(self.ah.astype(numpy.float64) + self.bl - (res_h.get().astype(numpy.float64) + res_l.get())).max(), 0, "Exact matches")
+
+    def test_sum2(self):
+        test_kernel = ElementwiseKernel(self.ctx,
+                    "float *a, float *b, float *res_h, float *res_l",
+                    "float2 tmp = fp_plus_fp(a[i],b[i]); res_h[i]=tmp.s0; res_l[i]=tmp.s1;",
+                    preamble=self.doubleword)
+        a_g = pyopencl.array.to_device(self.queue, self.ah)
+        b_g = pyopencl.array.to_device(self.queue, self.bh)
+        res_l = pyopencl.array.empty_like(a_g)
+        res_h = pyopencl.array.empty_like(a_g)
+        test_kernel(a_g, b_g, res_h, res_l)
+        self.assertEqual(abs(self.ah + self.bh - res_h.get()).max(), 0, "Major matches")
+        self.assertGreater(abs(self.ah.astype(numpy.float64) + self.bh - res_h.get()).max(), 0, "Exact mismatches")
+        self.assertEqual(abs(self.ah.astype(numpy.float64) + self.bh - (res_h.get().astype(numpy.float64) + res_l.get())).max(), 0, "Exact matches")
+
+    def test_prod2(self):
+        test_kernel = ElementwiseKernel(self.ctx,
+                    "float *a, float *b, float *res_h, float *res_l",
+                    "float2 tmp = fp_times_fp(a[i],b[i]); res_h[i]=tmp.s0; res_l[i]=tmp.s1;",
+                    preamble=self.doubleword)
+        a_g = pyopencl.array.to_device(self.queue, self.ah)
+        b_g = pyopencl.array.to_device(self.queue, self.bh)
+        res_l = pyopencl.array.empty_like(a_g)
+        res_h = pyopencl.array.empty_like(a_g)
+        test_kernel(a_g, b_g, res_h, res_l)
+        res_m = res_h.get()
+        res = res_h.get().astype(numpy.float64) + res_l.get()
+        self.assertEqual(abs(self.ah * self.bh - res_m).max(), 0, "Major matches")
+        self.assertGreater(abs(self.ah.astype(numpy.float64) * self.bh - res_m).max(), 0, "Exact mismatches")
+        self.assertEqual(abs(self.ah.astype(numpy.float64) * self.bh - res).max(), 0, "Exact matches")
+
+    def test_dw_plus_fp(self):
+        test_kernel = ElementwiseKernel(self.ctx,
+                    "float *ah, float *al, float *b, float *res_h, float *res_l",
+                    "float2 tmp = dw_plus_fp((float2)(ah[i], al[i]),b[i]); res_h[i]=tmp.s0; res_l[i]=tmp.s1;",
+                    preamble=self.doubleword)
+        ah_g = pyopencl.array.to_device(self.queue, self.ah)
+        al_g = pyopencl.array.to_device(self.queue, self.al)
+        b_g = pyopencl.array.to_device(self.queue, self.bh)
+        res_l = pyopencl.array.empty_like(b_g)
+        res_h = pyopencl.array.empty_like(b_g)
+        test_kernel(ah_g, al_g, b_g, res_h, res_l)
+        res_m = res_h.get()
+        res = res_h.get().astype(numpy.float64) + res_l.get()
+        self.assertLess(abs(self.a + self.bh - res_m).max(), EPS32, "Major matches")
+        self.assertGreater(abs(self.a + self.bh - res_m).max(), EPS64, "Exact mismatches")
+        self.assertLess(abs(self.ah.astype(numpy.float64) + self.al + self.bh - res).max(), 2 * EPS32 ** 2, "Exact matches")
+
+    def test_dw_plus_dw(self):
+        test_kernel = ElementwiseKernel(self.ctx,
+                    "float *ah, float *al, float *bh, float *bl, float *res_h, float *res_l",
+                    "float2 tmp = dw_plus_dw((float2)(ah[i], al[i]),(float2)(bh[i], bl[i])); res_h[i]=tmp.s0; res_l[i]=tmp.s1;",
+                    preamble=self.doubleword)
+        ah_g = pyopencl.array.to_device(self.queue, self.ah)
+        al_g = pyopencl.array.to_device(self.queue, self.al)
+        bh_g = pyopencl.array.to_device(self.queue, self.bh)
+        bl_g = pyopencl.array.to_device(self.queue, self.bl)
+        res_l = pyopencl.array.empty_like(bh_g)
+        res_h = pyopencl.array.empty_like(bh_g)
+        test_kernel(ah_g, al_g, bh_g, bl_g, res_h, res_l)
+        res_m = res_h.get()
+        res = res_h.get().astype(numpy.float64) + res_l.get()
+        self.assertLess(abs(self.a + self.b - res_m).max(), EPS32, "Major matches")
+        self.assertGreater(abs(self.a + self.b - res_m).max(), EPS64, "Exact mismatches")
+        self.assertLess(abs(self.a + self.b - res).max(), 3 * EPS32 ** 2, "Exact matches")
+
+    def test_dw_times_fp(self):
+        test_kernel = ElementwiseKernel(self.ctx,
+                    "float *ah, float *al, float *b, float *res_h, float *res_l",
+                    "float2 tmp = dw_times_fp((float2)(ah[i], al[i]),b[i]); res_h[i]=tmp.s0; res_l[i]=tmp.s1;",
+                    preamble=self.doubleword)
+        ah_g = pyopencl.array.to_device(self.queue, self.ah)
+        al_g = pyopencl.array.to_device(self.queue, self.al)
+        b_g = pyopencl.array.to_device(self.queue, self.bh)
+        res_l = pyopencl.array.empty_like(b_g)
+        res_h = pyopencl.array.empty_like(b_g)
+        test_kernel(ah_g, al_g, b_g, res_h, res_l)
+        res_m = res_h.get()
+        res = res_h.get().astype(numpy.float64) + res_l.get()
+        self.assertLess(abs(self.a * self.bh - res_m).max(), EPS32, "Major matches")
+        self.assertGreater(abs(self.a * self.bh - res_m).max(), EPS64, "Exact mismatches")
+        self.assertLess(abs(self.a * self.bh - res).max(), 2 * EPS32 ** 2, "Exact matches")
+
+    def test_dw_times_dw(self):
+        test_kernel = ElementwiseKernel(self.ctx,
+                    "float *ah, float *al, float *bh, float *bl, float *res_h, float *res_l",
+                    "float2 tmp = dw_times_dw((float2)(ah[i], al[i]),(float2)(bh[i], bl[i])); res_h[i]=tmp.s0; res_l[i]=tmp.s1;",
+                    preamble=self.doubleword)
+        ah_g = pyopencl.array.to_device(self.queue, self.ah)
+        al_g = pyopencl.array.to_device(self.queue, self.al)
+        bh_g = pyopencl.array.to_device(self.queue, self.bh)
+        bl_g = pyopencl.array.to_device(self.queue, self.bl)
+        res_l = pyopencl.array.empty_like(bh_g)
+        res_h = pyopencl.array.empty_like(bh_g)
+        test_kernel(ah_g, al_g, bh_g, bl_g, res_h, res_l)
+        res_m = res_h.get()
+        res = res_h.get().astype(numpy.float64) + res_l.get()
+        self.assertLess(abs(self.a * self.b - res_m).max(), EPS32, "Major matches")
+        self.assertGreater(abs(self.a * self.b - res_m).max(), EPS64, "Exact mismatches")
+        self.assertLess(abs(self.a * self.b - res).max(), 5 * EPS32 ** 2, "Exact matches")
+
+    def test_dw_div_fp(self):
+        test_kernel = ElementwiseKernel(self.ctx,
+                    "float *ah, float *al, float *b, float *res_h, float *res_l",
+                    "float2 tmp = dw_div_fp((float2)(ah[i], al[i]),b[i]); res_h[i]=tmp.s0; res_l[i]=tmp.s1;",
+                    preamble=self.doubleword)
+        ah_g = pyopencl.array.to_device(self.queue, self.ah)
+        al_g = pyopencl.array.to_device(self.queue, self.al)
+        b_g = pyopencl.array.to_device(self.queue, self.bh)
+        res_l = pyopencl.array.empty_like(b_g)
+        res_h = pyopencl.array.empty_like(b_g)
+        test_kernel(ah_g, al_g, b_g, res_h, res_l)
+        res_m = res_h.get()
+        res = res_h.get().astype(numpy.float64) + res_l.get()
+        self.assertLess(abs(self.a / self.bh - res_m).max(), EPS32, "Major matches")
+        self.assertGreater(abs(self.a / self.bh - res_m).max(), EPS64, "Exact mismatches")
+        self.assertLess(abs(self.a / self.bh - res).max(), 3 * EPS32 ** 2, "Exact matches")
+
+    def test_dw_div_dw(self):
+        test_kernel = ElementwiseKernel(self.ctx,
+                    "float *ah, float *al, float *bh, float *bl, float *res_h, float *res_l",
+                    "float2 tmp = dw_div_dw((float2)(ah[i], al[i]),(float2)(bh[i], bl[i])); res_h[i]=tmp.s0; res_l[i]=tmp.s1;",
+                    preamble=self.doubleword)
+        ah_g = pyopencl.array.to_device(self.queue, self.ah)
+        al_g = pyopencl.array.to_device(self.queue, self.al)
+        bh_g = pyopencl.array.to_device(self.queue, self.bh)
+        bl_g = pyopencl.array.to_device(self.queue, self.bl)
+        res_l = pyopencl.array.empty_like(bh_g)
+        res_h = pyopencl.array.empty_like(bh_g)
+        test_kernel(ah_g, al_g, bh_g, bl_g, res_h, res_l)
+        res_m = res_h.get()
+        res = res_h.get().astype(numpy.float64) + res_l.get()
+        self.assertLess(abs(self.a / self.b - res_m).max(), EPS32, "Major matches")
+        self.assertGreater(abs(self.a / self.b - res_m).max(), EPS64, "Exact mismatches")
+        self.assertLess(abs(self.a / self.b - res).max(), 6 * EPS32 ** 2, "Exact matches")
+
+
+def suite():
+    testsuite = unittest.TestSuite()
+    loader = unittest.defaultTestLoader.loadTestsFromTestCase
+    testsuite.addTest(loader(TestDoubleWord))
+    return testsuite
+
+
+if __name__ == '__main__':
+    runner = unittest.TextTestRunner()
+    runner.run(suite())
diff --git a/silx/opencl/test/test_kahan.py b/silx/opencl/test/test_kahan.py
index 167640c..6ea599b 100644
--- a/silx/opencl/test/test_kahan.py
+++ b/silx/opencl/test/test_kahan.py
@@ -1,10 +1,10 @@
 #!/usr/bin/env python
 # coding: utf-8
 #
-#    Project: Azimuthal integration
-#             https://github.com/silx-kit/pyFAI
+#    Project: OpenCL numerical library
+#             https://github.com/silx-kit/silx
 #
-#    Copyright (C) 2015-2019 European Synchrotron Radiation Facility, Grenoble, France
+#    Copyright (C) 2015-2021 European Synchrotron Radiation Facility, Grenoble, France
 #
 #    Principal author:       Jérôme Kieffer (Jerome.Kieffer@ESRF.eu)
 #
@@ -28,13 +28,11 @@
 
 "test suite for OpenCL code"
 
-from __future__ import absolute_import, division, print_function
-
 __author__ = "Jérôme Kieffer"
 __contact__ = "Jerome.Kieffer@ESRF.eu"
 __license__ = "MIT"
 __copyright__ = "European Synchrotron Radiation Facility, Grenoble, France"
-__date__ = "01/08/2019"
+__date__ = "17/05/2021"
 
 
 import unittest
diff --git a/silx/opencl/test/test_stats.py b/silx/opencl/test/test_stats.py
index b5127c8..8baf05e 100644
--- a/silx/opencl/test/test_stats.py
+++ b/silx/opencl/test/test_stats.py
@@ -28,14 +28,11 @@
 """
 Simple test of an addition
 """
-
-from __future__ import division, print_function
-
 __authors__ = ["Henri Payno, Jérôme Kieffer"]
 __contact__ = "jerome.kieffer@esrf.eu"
 __license__ = "MIT"
 __copyright__ = "2013 European Synchrotron Radiation Facility, Grenoble, France"
-__date__ = "13/12/2018"
+__date__ = "19/05/2021"
 
 import logging
 import time
@@ -58,11 +55,13 @@ class TestStatistics(unittest.TestCase):
     def setUpClass(cls):
         cls.size = 1 << 20  # 1 million elements
         cls.data = numpy.random.randint(0, 65000, cls.size).astype("uint16")
-        t0 = time.time()
-        cls.ref = StatResults(cls.data.min(), cls.data.max(), cls.data.size,
-                              cls.data.sum(), cls.data.mean(), cls.data.std() ** 2,
-                              cls.data.std())
-        t1 = time.time()
+        fdata = cls.data.astype("float64")
+        t0 = time.perf_counter()
+        std = fdata.std()
+        cls.ref = StatResults(fdata.min(), fdata.max(), float(fdata.size),
+                              fdata.sum(), fdata.mean(), std ** 2,
+                              std)
+        t1 = time.perf_counter()
         cls.ref_time = t1 - t0
 
     @classmethod
@@ -89,19 +88,22 @@ class TestStatistics(unittest.TestCase):
                     s = Statistics(template=self.data, platformid=pid, deviceid=did)
                 except Exception as err:
                     failed_init = True
-                    res = StatResults(0,0,0,0,0,0,0)
+                    res = StatResults(0, 0, 0, 0, 0, 0, 0)
+                    print(err)
                 else:
                     failed_init = False
-                    t0 = time.time()
-                    res = s(self.data)
-                    t1 = time.time()
-                logger.warning("failed_init %s", failed_init)
-                if failed_init or not self.validate(res):
-                    logger.error("Failed on platform %s device %s", platform, device)
-                    logger.error("Reference results: %s", self.ref)
-                    logger.error("Faulty results: %s", res)
-                    self.assertTrue(False, "Stat calculation failed on %s %s" % (platform, device))
-                logger.info("Runtime on %s/%s : %.3fms x%.1f", platform, device, 1000 * (t1 - t0), self.ref_time / (t1 - t0))
+                    for comp in ("single", "double", "comp"):
+                        t0 = time.perf_counter()
+                        res = s(self.data, comp=comp)
+                        t1 = time.perf_counter()
+                        logger.info("Runtime on %s/%s : %.3fms x%.1f", platform, device, 1000 * (t1 - t0), self.ref_time / (t1 - t0))
+
+                        if failed_init or not self.validate(res):
+                            logger.error("failed_init %s; Computation modes %s", failed_init, comp)
+                            logger.error("Failed on platform %s device %s", platform, device)
+                            logger.error("Reference results: %s", self.ref)
+                            logger.error("Faulty results: %s", res)
+                            self.assertTrue(False, f"Stat calculation failed on {platform},{device}  in mode {comp}")
 
 
 def suite():
author	Picca Frédéric-Emmanuel <picca@debian.org>	2021-09-07 14:39:36 +0200
committer	Picca Frédéric-Emmanuel <picca@debian.org>	2021-09-07 14:39:36 +0200
commit	d3194b1a9c4404ba93afac43d97172ab24c57098 (patch)
tree	a1604130e1401dc1cbd084518ed72869dc92b86f /silx/opencl
parent	b3bea947efa55d2c0f198b6c6795b3177be27f45 (diff)