summaryrefslogtreecommitdiff
path: root/silx/opencl
diff options
context:
space:
mode:
authorPicca Frédéric-Emmanuel <picca@debian.org>2021-09-07 14:39:36 +0200
committerPicca Frédéric-Emmanuel <picca@debian.org>2021-09-07 14:39:36 +0200
commitd3194b1a9c4404ba93afac43d97172ab24c57098 (patch)
treea1604130e1401dc1cbd084518ed72869dc92b86f /silx/opencl
parentb3bea947efa55d2c0f198b6c6795b3177be27f45 (diff)
New upstream version 0.15.2+dfsg
Diffstat (limited to 'silx/opencl')
-rw-r--r--silx/opencl/codec/test/test_byte_offset.py13
-rw-r--r--silx/opencl/common.py4
-rw-r--r--silx/opencl/processing.py48
-rw-r--r--silx/opencl/statistics.py44
-rw-r--r--silx/opencl/test/__init__.py4
-rw-r--r--silx/opencl/test/test_doubleword.py258
-rw-r--r--silx/opencl/test/test_kahan.py10
-rw-r--r--silx/opencl/test/test_stats.py42
8 files changed, 363 insertions, 60 deletions
diff --git a/silx/opencl/codec/test/test_byte_offset.py b/silx/opencl/codec/test/test_byte_offset.py
index e523b0f..d1482ce 100644
--- a/silx/opencl/codec/test/test_byte_offset.py
+++ b/silx/opencl/codec/test/test_byte_offset.py
@@ -37,7 +37,7 @@ __authors__ = ["Jérôme Kieffer"]
__contact__ = "jerome.kieffer@esrf.eu"
__license__ = "MIT"
__copyright__ = "2013 European Synchrotron Radiation Facility, Grenoble, France"
-__date__ = "10/11/2017"
+__date__ = "02/03/2021"
import sys
import time
@@ -78,8 +78,8 @@ class TestByteOffset(unittest.TestCase):
tests the byte offset decompression on GPU
"""
ref, raw = self._create_test_data(shape=(91, 97), nexcept=229)
- #ref, raw = self._create_test_data(shape=(7, 9), nexcept=0)
-
+ # ref, raw = self._create_test_data(shape=(7, 9), nexcept=0)
+
size = numpy.prod(ref.shape)
try:
@@ -104,8 +104,8 @@ class TestByteOffset(unittest.TestCase):
1000.0 * (t1 - t0),
1000.0 * (t2 - t1))
bo.log_profile()
- #print(ref)
- #print(res_cl.get())
+ # print(ref)
+ # print(res_cl.get())
self.assertEqual(delta_cy, 0, "Checks fabio works")
self.assertEqual(delta_cl, 0, "Checks opencl works")
@@ -119,7 +119,7 @@ class TestByteOffset(unittest.TestCase):
ref, raw = self._create_test_data(shape=shape, nexcept=0, lam=100)
try:
- bo = byte_offset.ByteOffset(len(raw), size, profile=False)
+ bo = byte_offset.ByteOffset(len(raw), size, profile=True)
except (RuntimeError, pyopencl.RuntimeError) as err:
logger.warning(err)
if sys.platform == "darwin":
@@ -155,6 +155,7 @@ class TestByteOffset(unittest.TestCase):
logger.debug("Global execution time: fabio %.3fms, OpenCL: %.3fms.",
1000.0 * (t1 - t0),
1000.0 * (t2 - t1))
+ bo.log_profile(stats=True)
def test_encode(self):
"""Test byte offset compression"""
diff --git a/silx/opencl/common.py b/silx/opencl/common.py
index 002c15d..b66b7b7 100644
--- a/silx/opencl/common.py
+++ b/silx/opencl/common.py
@@ -4,7 +4,7 @@
# Project: S I L X project
# https://github.com/silx-kit/silx
#
-# Copyright (C) 2012-2018 European Synchrotron Radiation Facility, Grenoble, France
+# Copyright (C) 2012-2021 European Synchrotron Radiation Facility, Grenoble, France
#
# Principal author: Jérôme Kieffer (Jerome.Kieffer@ESRF.eu)
#
@@ -55,7 +55,7 @@ else:
try:
import pyopencl
except ImportError:
- logger.warning("Unable to import pyOpenCl. Please install it from: http://pypi.python.org/pypi/pyopencl")
+ logger.warning("Unable to import pyOpenCl. Please install it from: https://pypi.org/project/pyopencl")
pyopencl = None
else:
try:
diff --git a/silx/opencl/processing.py b/silx/opencl/processing.py
index 470b141..8b81f7f 100644
--- a/silx/opencl/processing.py
+++ b/silx/opencl/processing.py
@@ -34,19 +34,18 @@
Common OpenCL abstract base classe for different processing
"""
-from __future__ import absolute_import, print_function, division
-
__author__ = "Jerome Kieffer"
__contact__ = "Jerome.Kieffer@ESRF.eu"
__license__ = "MIT"
__copyright__ = "European Synchrotron Radiation Facility, Grenoble, France"
-__date__ = "04/12/2020"
+__date__ = "02/03/2021"
__status__ = "stable"
+import sys
import os
import logging
import gc
-from collections import namedtuple
+from collections import namedtuple, OrderedDict
import numpy
import threading
from .common import ocl, pyopencl, release_cl_buffers, query_kernel_info, allocate_texture, check_textures_availability
@@ -342,20 +341,45 @@ class OpenclProcessing(object):
ev = pyopencl.enqueue_copy(*copy_args, **copy_kwargs)
self.profile_add(ev, "Transfer to texture")
- def log_profile(self):
+ def log_profile(self, stats=False):
"""If we are in profiling mode, prints out all timing for every single OpenCL call
+
+ :param stats: if True, prints the statistics on each kernel instead of all execution timings
+ :return: list of lines to print
"""
- t = 0.0
- out = ["", "Profiling info for OpenCL %s" % self.__class__.__name__]
+ total_time = 0.0
+ out = [""]
+ if stats:
+ stats = OrderedDict()
+ out.append(f"OpenCL kernel profiling statistics in milliseconds for: {self.__class__.__name__}")
+ out.append(f"{'Kernel name':>50} (count): min median max mean std")
+ else:
+ stats = None
+ out.append(f"Profiling info for OpenCL: {self.__class__.__name__}")
+
if self.profile:
for e in self.events:
if "__len__" in dir(e) and len(e) >= 2:
- et = 1e-6 * (e[1].profile.end - e[1].profile.start)
- out.append("%50s:\t%.3fms" % (e[0], et))
- t += et
+ name = e[0]
+ pr = e[1].profile
+ t0 = pr.start
+ t1 = pr.end
+ et = 1e-6 * (t1 - t0)
+ total_time += et
+ if stats is None:
+ out.append(f"{name:>50} : {et:.3f}ms")
+ else:
+ if name in stats:
+ stats[name].append(et)
+ else:
+ stats[name] = [et]
+ if stats is not None:
+ for k, v in stats.items():
+ n = numpy.array(v)
+ out.append(f"{k:>50} ({len(v):5}): {n.min():8.3f} {numpy.median(n):8.3f} {n.max():8.3f} {n.mean():8.3f} {n.std():8.3f}")
+ out.append("_" * 80)
+ out.append(f"{'Total OpenCL execution time':>50} : {total_time:.3f}ms")
- out.append("_" * 80)
- out.append("%50s:\t%.3fms" % ("Total execution time", t))
logger.info(os.linesep.join(out))
return out
diff --git a/silx/opencl/statistics.py b/silx/opencl/statistics.py
index bd8e7b7..a96ee33 100644
--- a/silx/opencl/statistics.py
+++ b/silx/opencl/statistics.py
@@ -29,13 +29,10 @@
large data where numpy is not very efficient.
"""
-from __future__ import absolute_import, print_function, with_statement, division
-
-
__author__ = "Jerome Kieffer"
__license__ = "MIT"
-__date__ = "11/01/2019"
-__copyright__ = "2012-2017, ESRF, Grenoble"
+__date__ = "19/05/2021"
+__copyright__ = "2012-2019, ESRF, Grenoble"
__contact__ = "jerome.kieffer@esrf.fr"
import logging
@@ -128,7 +125,7 @@ class Statistics(OpenclProcessing):
self.kernel_files,
"-D NIMAGE=%i" % self.size)
compiler_options = self.get_compiler_options(x87_volatile=True)
- src = concatenate_cl_kernel(("kahan.cl", "statistics.cl"))
+ src = concatenate_cl_kernel(("doubleword.cl", "statistics.cl"))
self.reduction_comp = ReductionKernel(self.ctx,
dtype_out=float8,
neutral=zero8,
@@ -146,6 +143,19 @@ class Statistics(OpenclProcessing):
preamble=src,
options=compiler_options)
+ if "cl_khr_fp64" in self.device.extensions:
+ self.reduction_double = ReductionKernel(self.ctx,
+ dtype_out=float8,
+ neutral=zero8,
+ map_expr="map_statistics(data, i)",
+ reduce_expr="reduce_statistics_double(a,b)",
+ arguments="__global float *data",
+ preamble=src,
+ options=compiler_options)
+ else:
+ logger.info("Device %s does not support double-precision arithmetics, fall-back on compensated one", self.device)
+ self.reduction_double = self.reduction_comp
+
def send_buffer(self, data, dest):
"""
Send a numpy array to the device, including the cast on the device if
@@ -154,7 +164,7 @@ class Statistics(OpenclProcessing):
:param numpy.ndarray data: numpy array with data
:param dest: name of the buffer as registered in the class
"""
-
+ logger.info("send data to %s", dest)
dest_type = numpy.dtype([i.dtype for i in self.buffers if i.name == dest][0])
events = []
if (data.dtype == dest_type) or (data.dtype.itemsize > dest_type.itemsize):
@@ -173,8 +183,8 @@ class Statistics(OpenclProcessing):
self.cl_mem["raw"].data,
self.cl_mem[dest].data)
events += [
- EventDescription("copy H->D %s" % dest, copy_image),
- EventDescription("cast to float", cast_to_float)
+ EventDescription("copy H->D raw", copy_image),
+ EventDescription(f"cast to float {dest}", cast_to_float)
]
if self.profile:
self.events += events
@@ -193,16 +203,24 @@ class Statistics(OpenclProcessing):
size = data.size
assert size <= self.size, "size is OK"
events = []
+ if comp is True:
+ comp = "comp"
+ elif comp is False:
+ comp = "single"
+ else:
+ comp = comp.lower()
with self.sem:
self.send_buffer(data, "converted")
- if comp:
- reduction = self.reduction_comp
- else:
+ if comp in ("single", "fp32", "float32"):
reduction = self.reduction_simple
+ elif comp in ("double", "fp64", "float64"):
+ reduction = self.reduction_double
+ else:
+ reduction = self.reduction_comp
res_d, evt = reduction(self.cl_mem["converted"][:self.size],
queue=self.queue,
return_event=True)
- events.append(EventDescription("statistical reduction %s" % ("comp"if comp else "simple"), evt))
+ events.append(EventDescription(f"statistical reduction {comp}", evt))
if self.profile:
self.events += events
res_h = res_d.get()
diff --git a/silx/opencl/test/__init__.py b/silx/opencl/test/__init__.py
index 2e90e66..928dbaf 100644
--- a/silx/opencl/test/__init__.py
+++ b/silx/opencl/test/__init__.py
@@ -24,7 +24,7 @@
__authors__ = ["J. Kieffer"]
__license__ = "MIT"
-__date__ = "11/01/2019"
+__date__ = "17/05/2021"
import os
import unittest
@@ -37,6 +37,7 @@ from . import test_array_utils
from ..codec import test as test_codec
from . import test_image
from . import test_kahan
+from . import test_doubleword
from . import test_stats
from . import test_convolution
from . import test_sparse
@@ -53,6 +54,7 @@ def suite():
test_suite.addTests(test_codec.suite())
test_suite.addTests(test_image.suite())
test_suite.addTests(test_kahan.suite())
+ test_suite.addTests(test_doubleword.suite())
test_suite.addTests(test_stats.suite())
test_suite.addTests(test_convolution.suite())
test_suite.addTests(test_sparse.suite())
diff --git a/silx/opencl/test/test_doubleword.py b/silx/opencl/test/test_doubleword.py
new file mode 100644
index 0000000..ca947e0
--- /dev/null
+++ b/silx/opencl/test/test_doubleword.py
@@ -0,0 +1,258 @@
+#!/usr/bin/env python
+# coding: utf-8
+#
+# Project: The silx project
+# https://github.com/silx-kit/silx
+#
+# Copyright (C) 2021-2021 European Synchrotron Radiation Facility, Grenoble, France
+#
+# Principal author: Jérôme Kieffer (Jerome.Kieffer@ESRF.eu)
+#
+# Permission is hereby granted, free of charge, to any person obtaining a copy
+# of this software and associated documentation files (the "Software"), to deal
+# in the Software without restriction, including without limitation the rights
+# to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
+# copies of the Software, and to permit persons to whom the Software is
+# furnished to do so, subject to the following conditions:
+#
+# The above copyright notice and this permission notice shall be included in
+# all copies or substantial portions of the Software.
+#
+# THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+# IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+# FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
+# AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+# LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
+# OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN
+# THE SOFTWARE.
+
+"test suite for OpenCL code"
+
+__author__ = "Jérôme Kieffer"
+__contact__ = "Jerome.Kieffer@ESRF.eu"
+__license__ = "MIT"
+__copyright__ = "European Synchrotron Radiation Facility, Grenoble, France"
+__date__ = "31/05/2021"
+
+import unittest
+import numpy
+import logging
+import platform
+
+logger = logging.getLogger(__name__)
+try:
+ import pyopencl
+except ImportError as error:
+ logger.warning("OpenCL module (pyopencl) is not present, skip tests. %s.", error)
+ pyopencl = None
+
+from .. import ocl
+if ocl is not None:
+ from ..utils import read_cl_file
+ from .. import pyopencl
+ import pyopencl.array
+ from pyopencl.elementwise import ElementwiseKernel
+from ...test.utils import test_options
+
+EPS32 = numpy.finfo("float32").eps
+EPS64 = numpy.finfo("float64").eps
+
+
+class TestDoubleWord(unittest.TestCase):
+ """
+ Test the kernels for compensated math in OpenCL
+ """
+
+ @classmethod
+ def setUpClass(cls):
+ if not test_options.WITH_OPENCL_TEST:
+ raise unittest.SkipTest("User request to skip OpenCL tests")
+ if pyopencl is None or ocl is None:
+ raise unittest.SkipTest("OpenCL module (pyopencl) is not present or no device available")
+
+ cls.ctx = ocl.create_context(devicetype="GPU")
+ cls.queue = pyopencl.CommandQueue(cls.ctx, properties=pyopencl.command_queue_properties.PROFILING_ENABLE)
+
+ # this is running 32 bits OpenCL woth POCL
+ if (platform.machine() in ("i386", "i686", "x86_64") and (tuple.__itemsize__ == 4) and
+ cls.ctx.devices[0].platform.name == 'Portable Computing Language'):
+ cls.args = "-DX87_VOLATILE=volatile"
+ else:
+ cls.args = ""
+ size = 1024
+ cls.a = 1.0 + numpy.random.random(size)
+ cls.b = 1.0 + numpy.random.random(size)
+ cls.ah = cls.a.astype(numpy.float32)
+ cls.bh = cls.b.astype(numpy.float32)
+ cls.al = (cls.a - cls.ah).astype(numpy.float32)
+ cls.bl = (cls.b - cls.bh).astype(numpy.float32)
+ cls.doubleword = read_cl_file("doubleword.cl")
+
+ @classmethod
+ def tearDownClass(cls):
+ cls.queue = None
+ cls.ctx = None
+ cls.a = cls.al = cls.ah = None
+ cls.b = cls.bl = cls.bh = None
+ cls.doubleword = None
+
+ def test_fast_sum2(self):
+ test_kernel = ElementwiseKernel(self.ctx,
+ "float *a, float *b, float *res_h, float *res_l",
+ "float2 tmp = fast_fp_plus_fp(a[i], b[i]); res_h[i] = tmp.s0; res_l[i] = tmp.s1",
+ preamble=self.doubleword)
+ a_g = pyopencl.array.to_device(self.queue, self.ah)
+ b_g = pyopencl.array.to_device(self.queue, self.bl)
+ res_l = pyopencl.array.empty_like(a_g)
+ res_h = pyopencl.array.empty_like(a_g)
+ test_kernel(a_g, b_g, res_h, res_l)
+ self.assertEqual(abs(self.ah + self.bl - res_h.get()).max(), 0, "Major matches")
+ self.assertGreater(abs(self.ah.astype(numpy.float64) + self.bl - res_h.get()).max(), 0, "Exact mismatches")
+ self.assertEqual(abs(self.ah.astype(numpy.float64) + self.bl - (res_h.get().astype(numpy.float64) + res_l.get())).max(), 0, "Exact matches")
+
+ def test_sum2(self):
+ test_kernel = ElementwiseKernel(self.ctx,
+ "float *a, float *b, float *res_h, float *res_l",
+ "float2 tmp = fp_plus_fp(a[i],b[i]); res_h[i]=tmp.s0; res_l[i]=tmp.s1;",
+ preamble=self.doubleword)
+ a_g = pyopencl.array.to_device(self.queue, self.ah)
+ b_g = pyopencl.array.to_device(self.queue, self.bh)
+ res_l = pyopencl.array.empty_like(a_g)
+ res_h = pyopencl.array.empty_like(a_g)
+ test_kernel(a_g, b_g, res_h, res_l)
+ self.assertEqual(abs(self.ah + self.bh - res_h.get()).max(), 0, "Major matches")
+ self.assertGreater(abs(self.ah.astype(numpy.float64) + self.bh - res_h.get()).max(), 0, "Exact mismatches")
+ self.assertEqual(abs(self.ah.astype(numpy.float64) + self.bh - (res_h.get().astype(numpy.float64) + res_l.get())).max(), 0, "Exact matches")
+
+ def test_prod2(self):
+ test_kernel = ElementwiseKernel(self.ctx,
+ "float *a, float *b, float *res_h, float *res_l",
+ "float2 tmp = fp_times_fp(a[i],b[i]); res_h[i]=tmp.s0; res_l[i]=tmp.s1;",
+ preamble=self.doubleword)
+ a_g = pyopencl.array.to_device(self.queue, self.ah)
+ b_g = pyopencl.array.to_device(self.queue, self.bh)
+ res_l = pyopencl.array.empty_like(a_g)
+ res_h = pyopencl.array.empty_like(a_g)
+ test_kernel(a_g, b_g, res_h, res_l)
+ res_m = res_h.get()
+ res = res_h.get().astype(numpy.float64) + res_l.get()
+ self.assertEqual(abs(self.ah * self.bh - res_m).max(), 0, "Major matches")
+ self.assertGreater(abs(self.ah.astype(numpy.float64) * self.bh - res_m).max(), 0, "Exact mismatches")
+ self.assertEqual(abs(self.ah.astype(numpy.float64) * self.bh - res).max(), 0, "Exact matches")
+
+ def test_dw_plus_fp(self):
+ test_kernel = ElementwiseKernel(self.ctx,
+ "float *ah, float *al, float *b, float *res_h, float *res_l",
+ "float2 tmp = dw_plus_fp((float2)(ah[i], al[i]),b[i]); res_h[i]=tmp.s0; res_l[i]=tmp.s1;",
+ preamble=self.doubleword)
+ ah_g = pyopencl.array.to_device(self.queue, self.ah)
+ al_g = pyopencl.array.to_device(self.queue, self.al)
+ b_g = pyopencl.array.to_device(self.queue, self.bh)
+ res_l = pyopencl.array.empty_like(b_g)
+ res_h = pyopencl.array.empty_like(b_g)
+ test_kernel(ah_g, al_g, b_g, res_h, res_l)
+ res_m = res_h.get()
+ res = res_h.get().astype(numpy.float64) + res_l.get()
+ self.assertLess(abs(self.a + self.bh - res_m).max(), EPS32, "Major matches")
+ self.assertGreater(abs(self.a + self.bh - res_m).max(), EPS64, "Exact mismatches")
+ self.assertLess(abs(self.ah.astype(numpy.float64) + self.al + self.bh - res).max(), 2 * EPS32 ** 2, "Exact matches")
+
+ def test_dw_plus_dw(self):
+ test_kernel = ElementwiseKernel(self.ctx,
+ "float *ah, float *al, float *bh, float *bl, float *res_h, float *res_l",
+ "float2 tmp = dw_plus_dw((float2)(ah[i], al[i]),(float2)(bh[i], bl[i])); res_h[i]=tmp.s0; res_l[i]=tmp.s1;",
+ preamble=self.doubleword)
+ ah_g = pyopencl.array.to_device(self.queue, self.ah)
+ al_g = pyopencl.array.to_device(self.queue, self.al)
+ bh_g = pyopencl.array.to_device(self.queue, self.bh)
+ bl_g = pyopencl.array.to_device(self.queue, self.bl)
+ res_l = pyopencl.array.empty_like(bh_g)
+ res_h = pyopencl.array.empty_like(bh_g)
+ test_kernel(ah_g, al_g, bh_g, bl_g, res_h, res_l)
+ res_m = res_h.get()
+ res = res_h.get().astype(numpy.float64) + res_l.get()
+ self.assertLess(abs(self.a + self.b - res_m).max(), EPS32, "Major matches")
+ self.assertGreater(abs(self.a + self.b - res_m).max(), EPS64, "Exact mismatches")
+ self.assertLess(abs(self.a + self.b - res).max(), 3 * EPS32 ** 2, "Exact matches")
+
+ def test_dw_times_fp(self):
+ test_kernel = ElementwiseKernel(self.ctx,
+ "float *ah, float *al, float *b, float *res_h, float *res_l",
+ "float2 tmp = dw_times_fp((float2)(ah[i], al[i]),b[i]); res_h[i]=tmp.s0; res_l[i]=tmp.s1;",
+ preamble=self.doubleword)
+ ah_g = pyopencl.array.to_device(self.queue, self.ah)
+ al_g = pyopencl.array.to_device(self.queue, self.al)
+ b_g = pyopencl.array.to_device(self.queue, self.bh)
+ res_l = pyopencl.array.empty_like(b_g)
+ res_h = pyopencl.array.empty_like(b_g)
+ test_kernel(ah_g, al_g, b_g, res_h, res_l)
+ res_m = res_h.get()
+ res = res_h.get().astype(numpy.float64) + res_l.get()
+ self.assertLess(abs(self.a * self.bh - res_m).max(), EPS32, "Major matches")
+ self.assertGreater(abs(self.a * self.bh - res_m).max(), EPS64, "Exact mismatches")
+ self.assertLess(abs(self.a * self.bh - res).max(), 2 * EPS32 ** 2, "Exact matches")
+
+ def test_dw_times_dw(self):
+ test_kernel = ElementwiseKernel(self.ctx,
+ "float *ah, float *al, float *bh, float *bl, float *res_h, float *res_l",
+ "float2 tmp = dw_times_dw((float2)(ah[i], al[i]),(float2)(bh[i], bl[i])); res_h[i]=tmp.s0; res_l[i]=tmp.s1;",
+ preamble=self.doubleword)
+ ah_g = pyopencl.array.to_device(self.queue, self.ah)
+ al_g = pyopencl.array.to_device(self.queue, self.al)
+ bh_g = pyopencl.array.to_device(self.queue, self.bh)
+ bl_g = pyopencl.array.to_device(self.queue, self.bl)
+ res_l = pyopencl.array.empty_like(bh_g)
+ res_h = pyopencl.array.empty_like(bh_g)
+ test_kernel(ah_g, al_g, bh_g, bl_g, res_h, res_l)
+ res_m = res_h.get()
+ res = res_h.get().astype(numpy.float64) + res_l.get()
+ self.assertLess(abs(self.a * self.b - res_m).max(), EPS32, "Major matches")
+ self.assertGreater(abs(self.a * self.b - res_m).max(), EPS64, "Exact mismatches")
+ self.assertLess(abs(self.a * self.b - res).max(), 5 * EPS32 ** 2, "Exact matches")
+
+ def test_dw_div_fp(self):
+ test_kernel = ElementwiseKernel(self.ctx,
+ "float *ah, float *al, float *b, float *res_h, float *res_l",
+ "float2 tmp = dw_div_fp((float2)(ah[i], al[i]),b[i]); res_h[i]=tmp.s0; res_l[i]=tmp.s1;",
+ preamble=self.doubleword)
+ ah_g = pyopencl.array.to_device(self.queue, self.ah)
+ al_g = pyopencl.array.to_device(self.queue, self.al)
+ b_g = pyopencl.array.to_device(self.queue, self.bh)
+ res_l = pyopencl.array.empty_like(b_g)
+ res_h = pyopencl.array.empty_like(b_g)
+ test_kernel(ah_g, al_g, b_g, res_h, res_l)
+ res_m = res_h.get()
+ res = res_h.get().astype(numpy.float64) + res_l.get()
+ self.assertLess(abs(self.a / self.bh - res_m).max(), EPS32, "Major matches")
+ self.assertGreater(abs(self.a / self.bh - res_m).max(), EPS64, "Exact mismatches")
+ self.assertLess(abs(self.a / self.bh - res).max(), 3 * EPS32 ** 2, "Exact matches")
+
+ def test_dw_div_dw(self):
+ test_kernel = ElementwiseKernel(self.ctx,
+ "float *ah, float *al, float *bh, float *bl, float *res_h, float *res_l",
+ "float2 tmp = dw_div_dw((float2)(ah[i], al[i]),(float2)(bh[i], bl[i])); res_h[i]=tmp.s0; res_l[i]=tmp.s1;",
+ preamble=self.doubleword)
+ ah_g = pyopencl.array.to_device(self.queue, self.ah)
+ al_g = pyopencl.array.to_device(self.queue, self.al)
+ bh_g = pyopencl.array.to_device(self.queue, self.bh)
+ bl_g = pyopencl.array.to_device(self.queue, self.bl)
+ res_l = pyopencl.array.empty_like(bh_g)
+ res_h = pyopencl.array.empty_like(bh_g)
+ test_kernel(ah_g, al_g, bh_g, bl_g, res_h, res_l)
+ res_m = res_h.get()
+ res = res_h.get().astype(numpy.float64) + res_l.get()
+ self.assertLess(abs(self.a / self.b - res_m).max(), EPS32, "Major matches")
+ self.assertGreater(abs(self.a / self.b - res_m).max(), EPS64, "Exact mismatches")
+ self.assertLess(abs(self.a / self.b - res).max(), 6 * EPS32 ** 2, "Exact matches")
+
+
+def suite():
+ testsuite = unittest.TestSuite()
+ loader = unittest.defaultTestLoader.loadTestsFromTestCase
+ testsuite.addTest(loader(TestDoubleWord))
+ return testsuite
+
+
+if __name__ == '__main__':
+ runner = unittest.TextTestRunner()
+ runner.run(suite())
diff --git a/silx/opencl/test/test_kahan.py b/silx/opencl/test/test_kahan.py
index 167640c..6ea599b 100644
--- a/silx/opencl/test/test_kahan.py
+++ b/silx/opencl/test/test_kahan.py
@@ -1,10 +1,10 @@
#!/usr/bin/env python
# coding: utf-8
#
-# Project: Azimuthal integration
-# https://github.com/silx-kit/pyFAI
+# Project: OpenCL numerical library
+# https://github.com/silx-kit/silx
#
-# Copyright (C) 2015-2019 European Synchrotron Radiation Facility, Grenoble, France
+# Copyright (C) 2015-2021 European Synchrotron Radiation Facility, Grenoble, France
#
# Principal author: Jérôme Kieffer (Jerome.Kieffer@ESRF.eu)
#
@@ -28,13 +28,11 @@
"test suite for OpenCL code"
-from __future__ import absolute_import, division, print_function
-
__author__ = "Jérôme Kieffer"
__contact__ = "Jerome.Kieffer@ESRF.eu"
__license__ = "MIT"
__copyright__ = "European Synchrotron Radiation Facility, Grenoble, France"
-__date__ = "01/08/2019"
+__date__ = "17/05/2021"
import unittest
diff --git a/silx/opencl/test/test_stats.py b/silx/opencl/test/test_stats.py
index b5127c8..8baf05e 100644
--- a/silx/opencl/test/test_stats.py
+++ b/silx/opencl/test/test_stats.py
@@ -28,14 +28,11 @@
"""
Simple test of an addition
"""
-
-from __future__ import division, print_function
-
__authors__ = ["Henri Payno, Jérôme Kieffer"]
__contact__ = "jerome.kieffer@esrf.eu"
__license__ = "MIT"
__copyright__ = "2013 European Synchrotron Radiation Facility, Grenoble, France"
-__date__ = "13/12/2018"
+__date__ = "19/05/2021"
import logging
import time
@@ -58,11 +55,13 @@ class TestStatistics(unittest.TestCase):
def setUpClass(cls):
cls.size = 1 << 20 # 1 million elements
cls.data = numpy.random.randint(0, 65000, cls.size).astype("uint16")
- t0 = time.time()
- cls.ref = StatResults(cls.data.min(), cls.data.max(), cls.data.size,
- cls.data.sum(), cls.data.mean(), cls.data.std() ** 2,
- cls.data.std())
- t1 = time.time()
+ fdata = cls.data.astype("float64")
+ t0 = time.perf_counter()
+ std = fdata.std()
+ cls.ref = StatResults(fdata.min(), fdata.max(), float(fdata.size),
+ fdata.sum(), fdata.mean(), std ** 2,
+ std)
+ t1 = time.perf_counter()
cls.ref_time = t1 - t0
@classmethod
@@ -89,19 +88,22 @@ class TestStatistics(unittest.TestCase):
s = Statistics(template=self.data, platformid=pid, deviceid=did)
except Exception as err:
failed_init = True
- res = StatResults(0,0,0,0,0,0,0)
+ res = StatResults(0, 0, 0, 0, 0, 0, 0)
+ print(err)
else:
failed_init = False
- t0 = time.time()
- res = s(self.data)
- t1 = time.time()
- logger.warning("failed_init %s", failed_init)
- if failed_init or not self.validate(res):
- logger.error("Failed on platform %s device %s", platform, device)
- logger.error("Reference results: %s", self.ref)
- logger.error("Faulty results: %s", res)
- self.assertTrue(False, "Stat calculation failed on %s %s" % (platform, device))
- logger.info("Runtime on %s/%s : %.3fms x%.1f", platform, device, 1000 * (t1 - t0), self.ref_time / (t1 - t0))
+ for comp in ("single", "double", "comp"):
+ t0 = time.perf_counter()
+ res = s(self.data, comp=comp)
+ t1 = time.perf_counter()
+ logger.info("Runtime on %s/%s : %.3fms x%.1f", platform, device, 1000 * (t1 - t0), self.ref_time / (t1 - t0))
+
+ if failed_init or not self.validate(res):
+ logger.error("failed_init %s; Computation modes %s", failed_init, comp)
+ logger.error("Failed on platform %s device %s", platform, device)
+ logger.error("Reference results: %s", self.ref)
+ logger.error("Faulty results: %s", res)
+ self.assertTrue(False, f"Stat calculation failed on {platform},{device} in mode {comp}")
def suite():