summaryrefslogtreecommitdiff
path: root/src/silx/opencl
diff options
context:
space:
mode:
Diffstat (limited to 'src/silx/opencl')
-rw-r--r--src/silx/opencl/__init__.py52
-rw-r--r--src/silx/opencl/backprojection.py397
-rw-r--r--src/silx/opencl/codec/__init__.py0
-rw-r--r--src/silx/opencl/codec/byte_offset.py439
-rw-r--r--src/silx/opencl/codec/setup.py43
-rw-r--r--src/silx/opencl/codec/test/__init__.py23
-rw-r--r--src/silx/opencl/codec/test/test_byte_offset.py303
-rw-r--r--src/silx/opencl/common.py694
-rw-r--r--src/silx/opencl/conftest.py5
-rw-r--r--src/silx/opencl/convolution.py442
-rw-r--r--src/silx/opencl/image.py387
-rw-r--r--src/silx/opencl/linalg.py220
-rw-r--r--src/silx/opencl/medfilt.py269
-rw-r--r--src/silx/opencl/processing.py447
-rw-r--r--src/silx/opencl/projection.py428
-rw-r--r--src/silx/opencl/reconstruction.py388
-rw-r--r--src/silx/opencl/setup.py48
-rw-r--r--src/silx/opencl/sinofilter.py435
-rw-r--r--src/silx/opencl/sparse.py377
-rw-r--r--src/silx/opencl/statistics.py242
-rw-r--r--src/silx/opencl/test/__init__.py23
-rw-r--r--src/silx/opencl/test/test_addition.py140
-rw-r--r--src/silx/opencl/test/test_array_utils.py152
-rw-r--r--src/silx/opencl/test/test_backprojection.py217
-rw-r--r--src/silx/opencl/test/test_convolution.py280
-rw-r--r--src/silx/opencl/test/test_doubleword.py244
-rw-r--r--src/silx/opencl/test/test_image.py125
-rw-r--r--src/silx/opencl/test/test_kahan.py254
-rw-r--r--src/silx/opencl/test/test_linalg.py204
-rw-r--r--src/silx/opencl/test/test_medfilt.py162
-rw-r--r--src/silx/opencl/test/test_projection.py121
-rw-r--r--src/silx/opencl/test/test_sparse.py188
-rw-r--r--src/silx/opencl/test/test_stats.py106
-rw-r--r--src/silx/opencl/utils.py214
34 files changed, 8069 insertions, 0 deletions
diff --git a/src/silx/opencl/__init__.py b/src/silx/opencl/__init__.py
new file mode 100644
index 0000000..fbd1f88
--- /dev/null
+++ b/src/silx/opencl/__init__.py
@@ -0,0 +1,52 @@
+#!/usr/bin/env python
+# -*- coding: utf-8 -*-
+#
+# Project: S I L X project
+# https://github.com/silx-kit/silx
+#
+# Copyright (C) 2012-2018 European Synchrotron Radiation Facility, Grenoble, France
+#
+# Principal author: Jérôme Kieffer (Jerome.Kieffer@ESRF.eu)
+#
+# Permission is hereby granted, free of charge, to any person
+# obtaining a copy of this software and associated documentation
+# files (the "Software"), to deal in the Software without
+# restriction, including without limitation the rights to use,
+# copy, modify, merge, publish, distribute, sublicense, and/or sell
+# copies of the Software, and to permit persons to whom the
+# Software is furnished to do so, subject to the following
+# conditions:
+#
+# The above copyright notice and this permission notice shall be
+# included in all copies or substantial portions of the Software.
+#
+# THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
+# EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES
+# OF MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
+# NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT
+# HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY,
+# WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
+# FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR
+# OTHER DEALINGS IN THE SOFTWARE.
+#
+"""This package provides OpenCl-based optimized processing functions.
+
+For more processing functions, see the silx.math and silx.image packages.
+
+See silx documentation: http://www.silx.org/doc/silx/latest/
+"""
+
+__author__ = "Jerome Kieffer"
+__contact__ = "Jerome.Kieffer@ESRF.eu"
+__license__ = "MIT"
+__copyright__ = "European Synchrotron Radiation Facility, Grenoble, France"
+__date__ = "15/03/2017"
+__status__ = "stable"
+
+import logging
+
+
+logger = logging.getLogger(__name__)
+
+
+from .common import *
diff --git a/src/silx/opencl/backprojection.py b/src/silx/opencl/backprojection.py
new file mode 100644
index 0000000..65a9836
--- /dev/null
+++ b/src/silx/opencl/backprojection.py
@@ -0,0 +1,397 @@
+#!/usr/bin/env python
+# coding: utf-8
+# /*##########################################################################
+#
+# Copyright (c) 2016 European Synchrotron Radiation Facility
+#
+# Permission is hereby granted, free of charge, to any person obtaining a copy
+# of this software and associated documentation files (the "Software"), to deal
+# in the Software without restriction, including without limitation the rights
+# to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
+# copies of the Software, and to permit persons to whom the Software is
+# furnished to do so, subject to the following conditions:
+#
+# The above copyright notice and this permission notice shall be included in
+# all copies or substantial portions of the Software.
+#
+# THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+# IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+# FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
+# AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+# LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
+# OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN
+# THE SOFTWARE.
+#
+# ###########################################################################*/
+"""Module for (filtered) backprojection on the GPU"""
+
+from __future__ import absolute_import, print_function, with_statement, division
+
+__authors__ = ["A. Mirone, P. Paleo"]
+__license__ = "MIT"
+__date__ = "25/01/2019"
+
+import logging
+import numpy as np
+
+from .common import pyopencl
+from .processing import EventDescription, OpenclProcessing, BufferDescription
+from .sinofilter import SinoFilter
+from .sinofilter import fourier_filter as fourier_filter_
+from ..utils.deprecation import deprecated
+
+if pyopencl:
+ mf = pyopencl.mem_flags
+ import pyopencl.array as parray
+else:
+ raise ImportError("Please install pyopencl in order to use opencl backprojection")
+logger = logging.getLogger(__name__)
+
+
+def _sizeof(Type):
+ """
+ return the size (in bytes) of a scalar type, like the C behavior
+ """
+ return np.dtype(Type).itemsize
+
+
+def _idivup(a, b):
+ """
+ return the integer division, plus one if `a` is not a multiple of `b`
+ """
+ return (a + (b - 1)) // b
+
+
+class Backprojection(OpenclProcessing):
+ """A class for performing the backprojection using OpenCL"""
+ kernel_files = ["backproj.cl", "array_utils.cl"]
+
+ def __init__(self, sino_shape, slice_shape=None, axis_position=None,
+ angles=None, filter_name=None, ctx=None, devicetype="all",
+ platformid=None, deviceid=None, profile=False,
+ extra_options=None):
+ """Constructor of the OpenCL (filtered) backprojection
+
+ :param sino_shape: shape of the sinogram. The sinogram is in the format
+ (n_b, n_a) where n_b is the number of detector bins
+ and n_a is the number of angles.
+ :param slice_shape: Optional, shape of the reconstructed slice. By
+ default, it is a square slice where the dimension
+ is the "x dimension" of the sinogram (number of
+ bins).
+ :param axis_position: Optional, axis position. Default is
+ `(shape[1]-1)/2.0`.
+ :param angles: Optional, a list of custom angles in radian.
+ :param filter_name: Optional, name of the filter for FBP. Default is
+ the Ram-Lak filter.
+ :param ctx: actual working context, left to None for automatic
+ initialization from device type or platformid/deviceid
+ :param devicetype: type of device, can be "CPU", "GPU", "ACC" or "ALL"
+ :param platformid: integer with the platform_identifier, as given by
+ clinfo
+ :param deviceid: Integer with the device identifier, as given by clinfo
+ :param profile: switch on profiling to be able to profile at the kernel
+ level, store profiling elements (makes code slightly
+ slower)
+ :param extra_options: Advanced extra options in the form of a dict.
+ Current options are: cutoff, use_numpy_fft
+ """
+ # OS X enforces a workgroup size of 1 when the kernel has
+ # synchronization barriers if sys.platform.startswith('darwin'):
+ # assuming no discrete GPU
+ # raise NotImplementedError("Backprojection is not implemented on CPU for OS X yet")
+
+ OpenclProcessing.__init__(self, ctx=ctx, devicetype=devicetype,
+ platformid=platformid, deviceid=deviceid,
+ profile=profile)
+
+ self._init_geometry(sino_shape, slice_shape, angles, axis_position,
+ extra_options)
+ self._allocate_memory()
+ self._compute_angles()
+ self._init_kernels()
+ self._init_filter(filter_name)
+
+ def _init_geometry(self, sino_shape, slice_shape, angles, axis_position,
+ extra_options):
+ """Geometry Initialization
+
+ :param sino_shape: shape of the sinogram. The sinogram is in the format
+ (n_b, n_a) where n_b is the number of detector bins
+ and n_a is the number of angles.
+ :param slice_shape: shape of the reconstructed slice. By
+ default, it is a square slice where the dimension
+ is the "x dimension" of the sinogram (number of
+ bins).
+ :param angles: list of projection angles in radian.
+ :param axis_position: axis position
+ :param dict extra_options: Advanced extra options
+ """
+ self.shape = sino_shape
+ self.num_bins = np.int32(sino_shape[1])
+ self.num_projs = np.int32(sino_shape[0])
+ self.angles = angles
+ if slice_shape is None:
+ self.slice_shape = (self.num_bins, self.num_bins)
+ else:
+ self.slice_shape = slice_shape
+ self.dimrec_shape = (
+ _idivup(self.slice_shape[0], 32) * 32,
+ _idivup(self.slice_shape[1], 32) * 32
+ )
+ if axis_position:
+ self.axis_pos = np.float32(axis_position)
+ else:
+ self.axis_pos = np.float32((sino_shape[1] - 1.) / 2)
+ self.axis_array = None # TODO: add axis correction front-end
+ self._init_extra_options(extra_options)
+
+ def _init_extra_options(self, extra_options):
+ """Backprojection extra option initialization
+
+ :param dict extra_options: Advanced extra options
+ """
+ self.extra_options = {
+ "cutoff": 1.,
+ "use_numpy_fft": False,
+ # It is axis_pos - (num_bins-1)/2 in PyHST
+ "gpu_offset_x": 0., #self.axis_pos - (self.num_bins - 1) / 2.,
+ "gpu_offset_y": 0., #self.axis_pos - (self.num_bins - 1) / 2.
+ }
+ if extra_options is not None:
+ self.extra_options.update(extra_options)
+
+ def _allocate_memory(self):
+ # Host memory
+ self.slice = np.zeros(self.dimrec_shape, dtype=np.float32)
+ self._use_textures = self.check_textures_availability()
+
+ # Device memory
+ self.buffers = [
+ BufferDescription("_d_slice", self.dimrec_shape, np.float32, mf.READ_WRITE),
+ BufferDescription("d_sino", self.shape, np.float32, mf.READ_WRITE), # before transferring to texture (if available)
+ BufferDescription("d_cos", (self.num_projs,), np.float32, mf.READ_ONLY),
+ BufferDescription("d_sin", (self.num_projs,), np.float32, mf.READ_ONLY),
+ BufferDescription("d_axes", (self.num_projs,), np.float32, mf.READ_ONLY),
+ ]
+ self.allocate_buffers(use_array=True)
+ self.d_sino = self.cl_mem["d_sino"] # shorthand
+
+ # Texture memory (if relevant)
+ if self._use_textures:
+ self._allocate_textures()
+
+ # Local memory
+ self.local_mem = 256 * 3 * _sizeof(np.float32) # constant for all image sizes
+
+ def _compute_angles(self):
+ if self.angles is None:
+ self.angles = np.linspace(0, np.pi, self.num_projs, False)
+ h_cos = np.cos(self.angles).astype(np.float32)
+ h_sin = np.sin(self.angles).astype(np.float32)
+ self.cl_mem["d_cos"][:] = h_cos[:]
+ self.cl_mem["d_sin"][:] = h_sin[:]
+ if self.axis_array:
+ self.cl_mem["d_axes"][:] = self.axis_array.astype(np.float32)[:]
+ else:
+ self.cl_mem["d_axes"][:] = np.ones(self.num_projs, dtype="f") * self.axis_pos
+
+ def _init_kernels(self):
+ compile_options = None
+ if not(self._use_textures):
+ compile_options = "-DDONT_USE_TEXTURES"
+ OpenclProcessing.compile_kernels(
+ self,
+ self.kernel_files,
+ compile_options=compile_options
+ )
+ # check that workgroup can actually be (16, 16)
+ self.compiletime_workgroup_size = self.kernels.max_workgroup_size("backproj_cpu_kernel")
+ # Workgroup and ndrange sizes are always the same
+ self.wg = (16, 16)
+ self.ndrange = (
+ _idivup(int(self.dimrec_shape[1]), 32) * self.wg[0],
+ _idivup(int(self.dimrec_shape[0]), 32) * self.wg[1]
+ )
+ # Prepare arguments for the kernel call
+ if not(self._use_textures):
+ d_sino_ref = self.d_sino.data
+ else:
+ d_sino_ref = self.d_sino_tex
+ self._backproj_kernel_args = (
+ # num of projections (int32)
+ self.num_projs,
+ # num of bins (int32)
+ self.num_bins,
+ # axis position (float32)
+ self.axis_pos,
+ # d_slice (__global float32*)
+ self.cl_mem["_d_slice"].data,
+ # d_sino (__read_only image2d_t or float*)
+ d_sino_ref,
+ # gpu_offset_x (float32) 
+ np.float32(self.extra_options["gpu_offset_x"]),
+ # gpu_offset_y (float32)
+ np.float32(self.extra_options["gpu_offset_y"]),
+ # d_cos (__global float32*)
+ self.cl_mem["d_cos"].data,
+ # d_sin (__global float32*)
+ self.cl_mem["d_sin"].data,
+ # d_axis (__global float32*)
+ self.cl_mem["d_axes"].data,
+ # shared mem (__local float32*)
+ self._get_local_mem()
+ )
+
+ def _allocate_textures(self):
+ """
+ Allocate the texture for the sinogram.
+ """
+ self.d_sino_tex = self.allocate_texture(self.shape)
+
+ def _init_filter(self, filter_name):
+ """Filter initialization
+
+ :param str filter_name: filter name
+ """
+ self.filter_name = filter_name or "ram-lak"
+ self.sino_filter = SinoFilter(
+ self.shape,
+ ctx=self.ctx,
+ filter_name=self.filter_name,
+ extra_options=self.extra_options,
+ )
+
+ def _get_local_mem(self):
+ return pyopencl.LocalMemory(self.local_mem) # constant for all image sizes
+
+ def _cpy2d_to_slice(self, dst):
+ ndrange = (int(self.slice_shape[1]), int(self.slice_shape[0]))
+ slice_shape_ocl = np.int32(ndrange)
+ wg = None
+ kernel_args = (
+ dst.data,
+ self.cl_mem["_d_slice"].data,
+ np.int32(self.slice_shape[1]),
+ np.int32(self.dimrec_shape[1]),
+ np.int32((0, 0)),
+ np.int32((0, 0)),
+ slice_shape_ocl
+ )
+ return self.kernels.cpy2d(self.queue, ndrange, wg, *kernel_args)
+
+ def _transfer_to_texture(self, sino):
+ if isinstance(sino, parray.Array):
+ return self._transfer_device_to_texture(sino)
+ sino2 = sino
+ if not(sino.flags["C_CONTIGUOUS"] and sino.dtype == np.float32):
+ sino2 = np.ascontiguousarray(sino, dtype=np.float32)
+ if not(self._use_textures):
+ ev = pyopencl.enqueue_copy(
+ self.queue,
+ self.d_sino.data,
+ sino2
+ )
+ what = "transfer filtered sino H->D buffer"
+ ev.wait()
+ else:
+ ev = pyopencl.enqueue_copy(
+ self.queue,
+ self.d_sino_tex,
+ sino2,
+ origin=(0, 0),
+ region=self.shape[::-1]
+ )
+ what = "transfer filtered sino H->D texture"
+ return EventDescription(what, ev)
+
+ def _transfer_device_to_texture(self, d_sino):
+ if not(self._use_textures):
+ if id(self.d_sino) == id(d_sino):
+ return
+ ev = pyopencl.enqueue_copy(
+ self.queue,
+ self.d_sino.data,
+ d_sino
+ )
+ what = "transfer filtered sino D->D buffer"
+ ev.wait()
+ else:
+ ev = pyopencl.enqueue_copy(
+ self.queue,
+ self.d_sino_tex,
+ d_sino.data,
+ offset=0,
+ origin=(0, 0),
+ region=self.shape[::-1]
+ )
+ what = "transfer filtered sino D->D texture"
+ return EventDescription(what, ev)
+
+ def backprojection(self, sino, output=None):
+ """Perform the backprojection on an input sinogram
+
+ :param sino: sinogram.
+ :param output: optional, output slice.
+ If provided, the result will be written in this array.
+ :return: backprojection of sinogram
+ """
+ events = []
+ with self.sem:
+ events.append(self._transfer_to_texture(sino))
+ # Call the backprojection kernel
+ if not(self._use_textures):
+ kernel_to_call = self.kernels.backproj_cpu_kernel
+ else:
+ kernel_to_call = self.kernels.backproj_kernel
+ kernel_to_call(
+ self.queue,
+ self.ndrange,
+ self.wg,
+ *self._backproj_kernel_args
+ )
+ # Return
+ if output is None:
+ res = self.cl_mem["_d_slice"].get()
+ res = res[:self.slice_shape[0], :self.slice_shape[1]]
+ else:
+ res = output
+ self._cpy2d_to_slice(output)
+
+ # /with self.sem
+ if self.profile:
+ self.events += events
+
+ return res
+
+ def filtered_backprojection(self, sino, output=None):
+ """
+ Compute the filtered backprojection (FBP) on a sinogram.
+
+ :param sino: sinogram (`np.ndarray` or `pyopencl.array.Array`)
+ with the shape (n_projections, n_bins)
+ :param output: output (`np.ndarray` or `pyopencl.array.Array`).
+ If nothing is provided, a new numpy array is returned.
+ """
+ # Filter
+ self.sino_filter(sino, output=self.d_sino)
+ # Backproject
+ res = self.backprojection(self.d_sino, output=output)
+ return res
+
+ __call__ = filtered_backprojection
+
+
+ # -------------------
+ # - Compatibility -
+ # -------------------
+
+ @deprecated(replacement="Backprojection.sino_filter", since_version="0.10")
+ def filter_projections(self, sino, rescale=True):
+ self.sino_filter(sino, output=self.d_sino)
+
+
+
+def fourier_filter(sino, filter_=None, fft_size=None):
+ return fourier_filter_(sino, filter_=filter_, fft_size=fft_size)
+
diff --git a/src/silx/opencl/codec/__init__.py b/src/silx/opencl/codec/__init__.py
new file mode 100644
index 0000000..e69de29
--- /dev/null
+++ b/src/silx/opencl/codec/__init__.py
diff --git a/src/silx/opencl/codec/byte_offset.py b/src/silx/opencl/codec/byte_offset.py
new file mode 100644
index 0000000..9a52427
--- /dev/null
+++ b/src/silx/opencl/codec/byte_offset.py
@@ -0,0 +1,439 @@
+#!/usr/bin/env python
+# -*- coding: utf-8 -*-
+#
+# Project: Sift implementation in Python + OpenCL
+# https://github.com/silx-kit/silx
+#
+# Copyright (C) 2013-2020 European Synchrotron Radiation Facility, Grenoble, France
+#
+# Permission is hereby granted, free of charge, to any person
+# obtaining a copy of this software and associated documentation
+# files (the "Software"), to deal in the Software without
+# restriction, including without limitation the rights to use,
+# copy, modify, merge, publish, distribute, sublicense, and/or sell
+# copies of the Software, and to permit persons to whom the
+# Software is furnished to do so, subject to the following
+# conditions:
+#
+# The above copyright notice and this permission notice shall be
+# included in all copies or substantial portions of the Software.
+#
+# THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
+# EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES
+# OF MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
+# NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT
+# HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY,
+# WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
+# FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR
+# OTHER DEALINGS IN THE SOFTWARE.
+
+"""
+This module provides a class for CBF byte offset compression/decompression.
+"""
+
+from __future__ import division, print_function, with_statement
+
+__authors__ = ["Jérôme Kieffer"]
+__contact__ = "jerome.kieffer@esrf.eu"
+__license__ = "MIT"
+__copyright__ = "European Synchrotron Radiation Facility, Grenoble, France"
+__date__ = "11/10/2018"
+__status__ = "production"
+
+
+import functools
+import os
+import numpy
+from ..common import ocl, pyopencl
+from ..processing import BufferDescription, EventDescription, OpenclProcessing
+
+import logging
+logger = logging.getLogger(__name__)
+
+if pyopencl:
+ import pyopencl.version
+ if pyopencl.version.VERSION < (2016, 0):
+ from pyopencl.scan import GenericScanKernel, GenericDebugScanKernel
+ else:
+ from pyopencl.algorithm import GenericScanKernel
+ from pyopencl.scan import GenericDebugScanKernel
+else:
+ logger.warning("No PyOpenCL, no byte-offset, please see fabio")
+
+
+class ByteOffset(OpenclProcessing):
+ """Perform the byte offset compression/decompression on the GPU
+
+ See :class:`OpenclProcessing` for optional arguments description.
+
+ :param int raw_size:
+ Size of the raw stream for decompression.
+ It can be (slightly) larger than the array.
+ :param int dec_size:
+ Size of the decompression output array
+ (mandatory for decompression)
+ """
+
+ def __init__(self, raw_size=None, dec_size=None,
+ ctx=None, devicetype="all",
+ platformid=None, deviceid=None,
+ block_size=None, profile=False):
+ OpenclProcessing.__init__(self, ctx=ctx, devicetype=devicetype,
+ platformid=platformid, deviceid=deviceid,
+ block_size=block_size, profile=profile)
+ if self.block_size is None:
+ self.block_size = self.device.max_work_group_size
+ wg = self.block_size
+
+ buffers = [BufferDescription("counter", 1, numpy.int32, None)]
+
+ if raw_size is None:
+ self.raw_size = -1
+ self.padded_raw_size = -1
+ else:
+ self.raw_size = int(raw_size)
+ self.padded_raw_size = int((self.raw_size + wg - 1) & ~(wg - 1))
+ buffers += [
+ BufferDescription("raw", self.padded_raw_size, numpy.int8, None),
+ BufferDescription("mask", self.padded_raw_size, numpy.int32, None),
+ BufferDescription("values", self.padded_raw_size, numpy.int32, None),
+ BufferDescription("exceptions", self.padded_raw_size, numpy.int32, None)
+ ]
+
+ if dec_size is None:
+ self.dec_size = None
+ else:
+ self.dec_size = numpy.int32(dec_size)
+ buffers += [
+ BufferDescription("data_float", self.dec_size, numpy.float32, None),
+ BufferDescription("data_int", self.dec_size, numpy.int32, None)
+ ]
+
+ self.allocate_buffers(buffers, use_array=True)
+
+ self.compile_kernels([os.path.join("codec", "byte_offset")])
+ self.kernels.__setattr__("scan", self._init_double_scan())
+ self.kernels.__setattr__("compression_scan",
+ self._init_compression_scan())
+
+ def _init_double_scan(self):
+ """"generates a double scan on indexes and values in one operation"""
+ arguments = "__global int *value", "__global int *index"
+ int2 = pyopencl.tools.get_or_register_dtype("int2")
+ input_expr = "index[i]>0 ? (int2)(0, 0) : (int2)(value[i], 1)"
+ scan_expr = "a+b"
+ neutral = "(int2)(0,0)"
+ output_statement = "value[i] = item.s0; index[i+1] = item.s1;"
+
+ if self.block_size > 256:
+ knl = GenericScanKernel(self.ctx,
+ dtype=int2,
+ arguments=arguments,
+ input_expr=input_expr,
+ scan_expr=scan_expr,
+ neutral=neutral,
+ output_statement=output_statement)
+ else: # MacOS on CPU
+ knl = GenericDebugScanKernel(self.ctx,
+ dtype=int2,
+ arguments=arguments,
+ input_expr=input_expr,
+ scan_expr=scan_expr,
+ neutral=neutral,
+ output_statement=output_statement)
+ return knl
+
+ def decode(self, raw, as_float=False, out=None):
+ """This function actually performs the decompression by calling the kernels
+
+ :param numpy.ndarray raw: The compressed data as a 1D numpy array of char.
+ :param bool as_float: True to decompress as float32,
+ False (default) to decompress as int32
+ :param pyopencl.array out: pyopencl array in which to place the result.
+ :return: The decompressed image as an pyopencl array.
+ :rtype: pyopencl.array
+ """
+ assert self.dec_size is not None, \
+ "dec_size is a mandatory ByteOffset init argument for decompression"
+
+ events = []
+ with self.sem:
+ len_raw = numpy.int32(len(raw))
+ if len_raw > self.padded_raw_size:
+ wg = self.block_size
+ self.raw_size = int(len(raw))
+ self.padded_raw_size = (self.raw_size + wg - 1) & ~(wg - 1)
+ logger.info("increase raw buffer size to %s", self.padded_raw_size)
+ buffers = {
+ "raw": pyopencl.array.empty(self.queue, self.padded_raw_size, dtype=numpy.int8),
+ "mask": pyopencl.array.empty(self.queue, self.padded_raw_size, dtype=numpy.int32),
+ "exceptions": pyopencl.array.empty(self.queue, self.padded_raw_size, dtype=numpy.int32),
+ "values": pyopencl.array.empty(self.queue, self.padded_raw_size, dtype=numpy.int32),
+ }
+ self.cl_mem.update(buffers)
+ else:
+ wg = self.block_size
+
+ evt = pyopencl.enqueue_copy(self.queue, self.cl_mem["raw"].data,
+ raw,
+ is_blocking=False)
+ events.append(EventDescription("copy raw H -> D", evt))
+ evt = self.kernels.fill_int_mem(self.queue, (self.padded_raw_size,), (wg,),
+ self.cl_mem["mask"].data,
+ numpy.int32(self.padded_raw_size),
+ numpy.int32(0),
+ numpy.int32(0))
+ events.append(EventDescription("memset mask", evt))
+ evt = self.kernels.fill_int_mem(self.queue, (1,), (1,),
+ self.cl_mem["counter"].data,
+ numpy.int32(1),
+ numpy.int32(0),
+ numpy.int32(0))
+ events.append(EventDescription("memset counter", evt))
+ evt = self.kernels.mark_exceptions(self.queue, (self.padded_raw_size,), (wg,),
+ self.cl_mem["raw"].data,
+ len_raw,
+ numpy.int32(self.raw_size),
+ self.cl_mem["mask"].data,
+ self.cl_mem["values"].data,
+ self.cl_mem["counter"].data,
+ self.cl_mem["exceptions"].data)
+ events.append(EventDescription("mark exceptions", evt))
+ nb_exceptions = numpy.empty(1, dtype=numpy.int32)
+ evt = pyopencl.enqueue_copy(self.queue, nb_exceptions, self.cl_mem["counter"].data,
+ is_blocking=False)
+ events.append(EventDescription("copy counter D -> H", evt))
+ evt.wait()
+ nbexc = int(nb_exceptions[0])
+ if nbexc == 0:
+ logger.info("nbexc %i", nbexc)
+ else:
+ evt = self.kernels.treat_exceptions(self.queue, (nbexc,), (1,),
+ self.cl_mem["raw"].data,
+ len_raw,
+ self.cl_mem["mask"].data,
+ self.cl_mem["exceptions"].data,
+ self.cl_mem["values"].data
+ )
+ events.append(EventDescription("treat_exceptions", evt))
+
+ #self.cl_mem["copy_values"] = self.cl_mem["values"].copy()
+ #self.cl_mem["copy_mask"] = self.cl_mem["mask"].copy()
+ evt = self.kernels.scan(self.cl_mem["values"],
+ self.cl_mem["mask"],
+ queue=self.queue,
+ size=int(len_raw),
+ wait_for=(evt,))
+ events.append(EventDescription("double scan", evt))
+ #evt.wait()
+ if out is not None:
+ if out.dtype == numpy.float32:
+ copy_results = self.kernels.copy_result_float
+ else:
+ copy_results = self.kernels.copy_result_int
+ else:
+ if as_float:
+ out = self.cl_mem["data_float"]
+ copy_results = self.kernels.copy_result_float
+ else:
+ out = self.cl_mem["data_int"]
+ copy_results = self.kernels.copy_result_int
+ evt = copy_results(self.queue, (self.padded_raw_size,), (wg,),
+ self.cl_mem["values"].data,
+ self.cl_mem["mask"].data,
+ len_raw,
+ self.dec_size,
+ out.data
+ )
+ events.append(EventDescription("copy_results", evt))
+ #evt.wait()
+ if self.profile:
+ self.events += events
+ return out
+
+ __call__ = decode
+
+ def _init_compression_scan(self):
+ """Initialize CBF compression scan kernels"""
+ preamble = """
+ int compressed_size(int diff) {
+ int abs_diff = abs(diff);
+
+ if (abs_diff < 128) {
+ return 1;
+ }
+ else if (abs_diff < 32768) {
+ return 3;
+ }
+ else {
+ return 7;
+ }
+ }
+
+ void write(const int index,
+ const int diff,
+ global char *output) {
+ int abs_diff = abs(diff);
+
+ if (abs_diff < 128) {
+ output[index] = (char) diff;
+ }
+ else if (abs_diff < 32768) {
+ output[index] = -128;
+ output[index + 1] = (char) (diff >> 0);
+ output[index + 2] = (char) (diff >> 8);
+ }
+ else {
+ output[index] = -128;
+ output[index + 1] = 0;
+ output[index + 2] = -128;
+ output[index + 3] = (char) (diff >> 0);
+ output[index + 4] = (char) (diff >> 8);
+ output[index + 5] = (char) (diff >> 16);
+ output[index + 6] = (char) (diff >> 24);
+ }
+ }
+ """
+ arguments = "__global const int *data, __global char *compressed, __global int *size"
+ input_expr = "compressed_size((i == 0) ? data[0] : (data[i] - data[i - 1]))"
+ scan_expr = "a+b"
+ neutral = "0"
+ output_statement = """
+ if (prev_item == 0) { // 1st thread store compressed data size
+ size[0] = last_item;
+ }
+ write(prev_item, (i == 0) ? data[0] : (data[i] - data[i - 1]), compressed);
+ """
+
+ if self.block_size >= 64:
+ knl = GenericScanKernel(self.ctx,
+ dtype=numpy.int32,
+ preamble=preamble,
+ arguments=arguments,
+ input_expr=input_expr,
+ scan_expr=scan_expr,
+ neutral=neutral,
+ output_statement=output_statement)
+ else: # MacOS on CPU
+ knl = GenericDebugScanKernel(self.ctx,
+ dtype=numpy.int32,
+ preamble=preamble,
+ arguments=arguments,
+ input_expr=input_expr,
+ scan_expr=scan_expr,
+ neutral=neutral,
+ output_statement=output_statement)
+ return knl
+
+ def encode(self, data, out=None):
+ """Compress data to CBF.
+
+ :param data: The data to compress as a numpy array
+ (or a pyopencl Array) of int32.
+ :type data: Union[numpy.ndarray, pyopencl.array.Array]
+ :param pyopencl.array out:
+ pyopencl array of int8 in which to store the result.
+ The array should be large enough to store the compressed data.
+ :return: The compressed data as a pyopencl array.
+ If out is provided, this array shares the backing buffer,
+ but has the exact size of the compressed data and the queue
+ of the ByteOffset instance.
+ :rtype: pyopencl.array
+ :raises ValueError: if out array is not large enough
+ """
+
+ events = []
+ with self.sem:
+ if isinstance(data, pyopencl.array.Array):
+ d_data = data # Uses provided array
+
+ else: # Copy data to device
+ data = numpy.ascontiguousarray(data, dtype=numpy.int32).ravel()
+
+ # Make sure data array exists and is large enough
+ if ("data_input" not in self.cl_mem or
+ self.cl_mem["data_input"].size < data.size):
+ logger.info("increase data input buffer size to %s", data.size)
+ self.cl_mem.update({
+ "data_input": pyopencl.array.empty(self.queue,
+ data.size,
+ dtype=numpy.int32)})
+ d_data = self.cl_mem["data_input"]
+
+ evt = pyopencl.enqueue_copy(
+ self.queue, d_data.data, data, is_blocking=False)
+ events.append(EventDescription("copy data H -> D", evt))
+
+ # Make sure compressed array exists and is large enough
+ compressed_size = d_data.size * 7
+ if ("compressed" not in self.cl_mem or
+ self.cl_mem["compressed"].size < compressed_size):
+ logger.info("increase compressed buffer size to %s", compressed_size)
+ self.cl_mem.update({
+ "compressed": pyopencl.array.empty(self.queue,
+ compressed_size,
+ dtype=numpy.int8)})
+ d_compressed = self.cl_mem["compressed"]
+ d_size = self.cl_mem["counter"] # Shared with decompression
+
+ evt = self.kernels.compression_scan(d_data, d_compressed, d_size)
+ events.append(EventDescription("compression scan", evt))
+ byte_count = int(d_size.get()[0])
+
+ if out is None:
+ # Create out array from a sub-region of the compressed buffer
+ out = pyopencl.array.Array(
+ self.queue,
+ shape=(byte_count,),
+ dtype=numpy.int8,
+ allocator=functools.partial(
+ d_compressed.base_data.get_sub_region,
+ d_compressed.offset))
+
+ elif out.size < byte_count:
+ raise ValueError(
+ "Provided output buffer is not large enough: "
+ "requires %d bytes, got %d" % (byte_count, out.size))
+
+ else: # out.size >= byte_count
+ # Create an array with a sub-region of out and this class queue
+ out = pyopencl.array.Array(
+ self.queue,
+ shape=(byte_count,),
+ dtype=numpy.int8,
+ allocator=functools.partial(out.base_data.get_sub_region,
+ out.offset))
+
+ evt = pyopencl.enqueue_copy(self.queue, out.data, d_compressed.data,
+ byte_count=byte_count)
+ events.append(
+ EventDescription("copy D -> D: internal -> out", evt))
+
+ if self.profile:
+ self.events += events
+
+ return out
+
+ def encode_to_bytes(self, data):
+ """Compresses data to CBF and returns compressed data as bytes.
+
+ Usage:
+
+ Provided an image (`image`) stored as a numpy array of int32,
+ first, create a byte offset compression/decompression object:
+
+ >>> from silx.opencl.codec.byte_offset import ByteOffset
+ >>> byte_offset_codec = ByteOffset()
+
+ Then, compress an image into bytes:
+
+ >>> compressed = byte_offset_codec.encode_to_bytes(image)
+
+ :param data: The data to compress as a numpy array
+ (or a pyopencl Array) of int32.
+ :type data: Union[numpy.ndarray, pyopencl.array.Array]
+ :return: The compressed data as bytes.
+ :rtype: bytes
+ """
+ compressed_array = self.encode(data)
+ return compressed_array.get().tobytes()
diff --git a/src/silx/opencl/codec/setup.py b/src/silx/opencl/codec/setup.py
new file mode 100644
index 0000000..4a5c1e5
--- /dev/null
+++ b/src/silx/opencl/codec/setup.py
@@ -0,0 +1,43 @@
+# coding: utf-8
+#
+# Copyright (C) 2016-2017 European Synchrotron Radiation Facility
+#
+# Permission is hereby granted, free of charge, to any person obtaining a copy
+# of this software and associated documentation files (the "Software"), to deal
+# in the Software without restriction, including without limitation the rights
+# to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
+# copies of the Software, and to permit persons to whom the Software is
+# furnished to do so, subject to the following conditions:
+#
+# The above copyright notice and this permission notice shall be included in
+# all copies or substantial portions of the Software.
+#
+# THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+# IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+# FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
+# AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+# LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
+# OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN
+# THE SOFTWARE.
+#
+
+from __future__ import division
+
+__contact__ = "jerome.kieffer@esrf.eu"
+__license__ = "MIT"
+__copyright__ = "European Synchrotron Radiation Facility, Grenoble, France"
+__authors__ = ["J. Kieffer"]
+__date__ = "13/10/2017"
+
+from numpy.distutils.misc_util import Configuration
+
+
+def configuration(parent_package='', top_path=None):
+ config = Configuration('codec', parent_package, top_path)
+ config.add_subpackage('test')
+ return config
+
+
+if __name__ == "__main__":
+ from numpy.distutils.core import setup
+ setup(configuration=configuration)
diff --git a/src/silx/opencl/codec/test/__init__.py b/src/silx/opencl/codec/test/__init__.py
new file mode 100644
index 0000000..325c2c7
--- /dev/null
+++ b/src/silx/opencl/codec/test/__init__.py
@@ -0,0 +1,23 @@
+# -*- coding: utf-8 -*-
+#
+# Project: silx
+# https://github.com/silx-kit/silx
+#
+# Copyright (C) 2013-2017 European Synchrotron Radiation Facility, Grenoble, France
+# Permission is hereby granted, free of charge, to any person obtaining a copy
+# of this software and associated documentation files (the "Software"), to deal
+# in the Software without restriction, including without limitation the rights
+# to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
+# copies of the Software, and to permit persons to whom the Software is
+# furnished to do so, subject to the following conditions:
+#
+# The above copyright notice and this permission notice shall be included in
+# all copies or substantial portions of the Software.
+#
+# THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+# IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+# FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
+# AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+# LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
+# OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN
+# THE SOFTWARE.
diff --git a/src/silx/opencl/codec/test/test_byte_offset.py b/src/silx/opencl/codec/test/test_byte_offset.py
new file mode 100644
index 0000000..4b2d5a3
--- /dev/null
+++ b/src/silx/opencl/codec/test/test_byte_offset.py
@@ -0,0 +1,303 @@
+#!/usr/bin/env python
+# -*- coding: utf-8 -*-
+#
+# Project: Byte-offset decompression in OpenCL
+# https://github.com/silx-kit/silx
+#
+# Copyright (C) 2013-2020 European Synchrotron Radiation Facility,
+# Grenoble, France
+# Permission is hereby granted, free of charge, to any person
+# obtaining a copy of this software and associated documentation
+# files (the "Software"), to deal in the Software without
+# restriction, including without limitation the rights to use,
+# copy, modify, merge, publish, distribute, sublicense, and/or sell
+# copies of the Software, and to permit persons to whom the
+# Software is furnished to do so, subject to the following
+# conditions:
+#
+# The above copyright notice and this permission notice shall be
+# included in all copies or substantial portions of the Software.
+#
+# THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
+# EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES
+# OF MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
+# NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT
+# HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY,
+# WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
+# FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR
+# OTHER DEALINGS IN THE SOFTWARE.
+
+"""
+Test suite for byte-offset decompression
+"""
+
+from __future__ import division, print_function
+
+__authors__ = ["Jérôme Kieffer"]
+__contact__ = "jerome.kieffer@esrf.eu"
+__license__ = "MIT"
+__copyright__ = "2013 European Synchrotron Radiation Facility, Grenoble, France"
+__date__ = "02/03/2021"
+
+import sys
+import time
+import logging
+import numpy
+from silx.opencl.common import ocl, pyopencl
+from silx.opencl.codec import byte_offset
+import fabio
+import unittest
+logger = logging.getLogger(__name__)
+
+
+@unittest.skipUnless(ocl and pyopencl,
+ "PyOpenCl is missing")
+class TestByteOffset(unittest.TestCase):
+
+ @staticmethod
+ def _create_test_data(shape, nexcept, lam=200):
+ """Create test (image, compressed stream) pair.
+
+ :param shape: Shape of test image
+ :param int nexcept: Number of exceptions in the image
+ :param lam: Expectation of interval argument for numpy.random.poisson
+ :return: (reference image array, compressed stream)
+ """
+ size = numpy.prod(shape)
+ ref = numpy.random.poisson(lam, numpy.prod(shape))
+ exception_loc = numpy.random.randint(0, size, size=nexcept)
+ exception_value = numpy.random.randint(0, 1000000, size=nexcept)
+ ref[exception_loc] = exception_value
+ ref.shape = shape
+
+ raw = fabio.compression.compByteOffset(ref)
+ return ref, raw
+
+ def test_decompress(self):
+ """
+ tests the byte offset decompression on GPU
+ """
+ ref, raw = self._create_test_data(shape=(91, 97), nexcept=229)
+ # ref, raw = self._create_test_data(shape=(7, 9), nexcept=0)
+
+ size = numpy.prod(ref.shape)
+
+ try:
+ bo = byte_offset.ByteOffset(raw_size=len(raw), dec_size=size, profile=True)
+ except (RuntimeError, pyopencl.RuntimeError) as err:
+ logger.warning(err)
+ if sys.platform == "darwin":
+ raise unittest.SkipTest("Byte-offset decompression is known to be buggy on MacOS-CPU")
+ else:
+ raise err
+ print(bo.block_size)
+
+ t0 = time.time()
+ res_cy = fabio.compression.decByteOffset(raw)
+ t1 = time.time()
+ res_cl = bo.decode(raw)
+ t2 = time.time()
+ delta_cy = abs(ref.ravel() - res_cy).max()
+ delta_cl = abs(ref.ravel() - res_cl.get()).max()
+
+ logger.debug("Global execution time: fabio %.3fms, OpenCL: %.3fms.",
+ 1000.0 * (t1 - t0),
+ 1000.0 * (t2 - t1))
+ bo.log_profile()
+ # print(ref)
+ # print(res_cl.get())
+ self.assertEqual(delta_cy, 0, "Checks fabio works")
+ self.assertEqual(delta_cl, 0, "Checks opencl works")
+
+ def test_many_decompress(self, ntest=10):
+ """
+ tests the byte offset decompression on GPU, many images to ensure there
+ is not leaking in memory
+ """
+ shape = (991, 997)
+ size = numpy.prod(shape)
+ ref, raw = self._create_test_data(shape=shape, nexcept=0, lam=100)
+
+ try:
+ bo = byte_offset.ByteOffset(len(raw), size, profile=True)
+ except (RuntimeError, pyopencl.RuntimeError) as err:
+ logger.warning(err)
+ if sys.platform == "darwin":
+ raise unittest.SkipTest("Byte-offset decompression is known to be buggy on MacOS-CPU")
+ else:
+ raise err
+ t0 = time.time()
+ res_cy = fabio.compression.decByteOffset(raw)
+ t1 = time.time()
+ res_cl = bo(raw)
+ t2 = time.time()
+ delta_cy = abs(ref.ravel() - res_cy).max()
+ delta_cl = abs(ref.ravel() - res_cl.get()).max()
+ self.assertEqual(delta_cy, 0, "Checks fabio works")
+ self.assertEqual(delta_cl, 0, "Checks opencl works")
+ logger.debug("Global execution time: fabio %.3fms, OpenCL: %.3fms.",
+ 1000.0 * (t1 - t0),
+ 1000.0 * (t2 - t1))
+
+ for i in range(ntest):
+ ref, raw = self._create_test_data(shape=shape, nexcept=2729, lam=200)
+
+ t0 = time.time()
+ res_cy = fabio.compression.decByteOffset(raw)
+ t1 = time.time()
+ res_cl = bo(raw)
+ t2 = time.time()
+ delta_cy = abs(ref.ravel() - res_cy).max()
+ delta_cl = abs(ref.ravel() - res_cl.get()).max()
+ self.assertEqual(delta_cy, 0, "Checks fabio works #%i" % i)
+ self.assertEqual(delta_cl, 0, "Checks opencl works #%i" % i)
+
+ logger.debug("Global execution time: fabio %.3fms, OpenCL: %.3fms.",
+ 1000.0 * (t1 - t0),
+ 1000.0 * (t2 - t1))
+ bo.log_profile(stats=True)
+
+ def test_encode(self):
+ """Test byte offset compression"""
+ ref, raw = self._create_test_data(shape=(2713, 2719), nexcept=2729)
+
+ try:
+ bo = byte_offset.ByteOffset(len(raw), ref.size, profile=True)
+ except (RuntimeError, pyopencl.RuntimeError) as err:
+ logger.warning(err)
+ raise err
+
+ t0 = time.time()
+ compressed_array = bo.encode(ref)
+ t1 = time.time()
+
+ compressed_stream = compressed_array.get().tobytes()
+ self.assertEqual(raw, compressed_stream)
+
+ logger.debug("Global execution time: OpenCL: %.3fms.",
+ 1000.0 * (t1 - t0))
+ bo.log_profile()
+
+ def test_encode_to_array(self):
+ """Test byte offset compression while providing an out array"""
+
+ ref, raw = self._create_test_data(shape=(2713, 2719), nexcept=2729)
+
+ try:
+ bo = byte_offset.ByteOffset(profile=True)
+ except (RuntimeError, pyopencl.RuntimeError) as err:
+ logger.warning(err)
+ raise err
+ # Test with out buffer too small
+ out = pyopencl.array.empty(bo.queue, (10,), numpy.int8)
+ with self.assertRaises(ValueError):
+ bo.encode(ref, out)
+
+ # Test with out buffer too big
+ out = pyopencl.array.empty(bo.queue, (len(raw) + 10,), numpy.int8)
+
+ compressed_array = bo.encode(ref, out)
+
+ # Get size from returned array
+ compressed_size = compressed_array.size
+ self.assertEqual(compressed_size, len(raw))
+
+ # Get data from out array, read it from bo object queue
+ out_bo_queue = out.with_queue(bo.queue)
+ compressed_stream = out_bo_queue.get().tobytes()[:compressed_size]
+ self.assertEqual(raw, compressed_stream)
+
+ def test_encode_to_bytes(self):
+ """Test byte offset compression to bytes"""
+ ref, raw = self._create_test_data(shape=(2713, 2719), nexcept=2729)
+
+ try:
+ bo = byte_offset.ByteOffset(profile=True)
+ except (RuntimeError, pyopencl.RuntimeError) as err:
+ logger.warning(err)
+ raise err
+
+ t0 = time.time()
+ res_fabio = fabio.compression.compByteOffset(ref)
+ t1 = time.time()
+ compressed_stream = bo.encode_to_bytes(ref)
+ t2 = time.time()
+
+ self.assertEqual(raw, compressed_stream)
+
+ logger.debug("Global execution time: fabio %.3fms, OpenCL: %.3fms.",
+ 1000.0 * (t1 - t0),
+ 1000.0 * (t2 - t1))
+ bo.log_profile()
+
+ def test_encode_to_bytes_from_array(self):
+ """Test byte offset compression to bytes from a pyopencl array.
+ """
+ ref, raw = self._create_test_data(shape=(2713, 2719), nexcept=2729)
+
+ try:
+ bo = byte_offset.ByteOffset(profile=True)
+ except (RuntimeError, pyopencl.RuntimeError) as err:
+ logger.warning(err)
+ raise err
+
+ d_ref = pyopencl.array.to_device(
+ bo.queue, ref.astype(numpy.int32).ravel())
+
+ t0 = time.time()
+ res_fabio = fabio.compression.compByteOffset(ref)
+ t1 = time.time()
+ compressed_stream = bo.encode_to_bytes(d_ref)
+ t2 = time.time()
+
+ self.assertEqual(raw, compressed_stream)
+
+ logger.debug("Global execution time: fabio %.3fms, OpenCL: %.3fms.",
+ 1000.0 * (t1 - t0),
+ 1000.0 * (t2 - t1))
+ bo.log_profile()
+
+ def test_many_encode(self, ntest=10):
+ """Test byte offset compression with many image"""
+ shape = (991, 997)
+ ref, raw = self._create_test_data(shape=shape, nexcept=0, lam=100)
+
+ try:
+ bo = byte_offset.ByteOffset(profile=False)
+ except (RuntimeError, pyopencl.RuntimeError) as err:
+ logger.warning(err)
+ raise err
+
+ bo_durations = []
+
+ t0 = time.time()
+ res_fabio = fabio.compression.compByteOffset(ref)
+ t1 = time.time()
+ compressed_stream = bo.encode_to_bytes(ref)
+ t2 = time.time()
+ bo_durations.append(1000.0 * (t2 - t1))
+
+ self.assertEqual(raw, compressed_stream)
+ logger.debug("Global execution time: fabio %.3fms, OpenCL: %.3fms.",
+ 1000.0 * (t1 - t0),
+ 1000.0 * (t2 - t1))
+
+ for i in range(ntest):
+ ref, raw = self._create_test_data(shape=shape, nexcept=2729, lam=200)
+
+ t0 = time.time()
+ res_fabio = fabio.compression.compByteOffset(ref)
+ t1 = time.time()
+ compressed_stream = bo.encode_to_bytes(ref)
+ t2 = time.time()
+ bo_durations.append(1000.0 * (t2 - t1))
+
+ self.assertEqual(raw, compressed_stream)
+ logger.debug("Global execution time: fabio %.3fms, OpenCL: %.3fms.",
+ 1000.0 * (t1 - t0),
+ 1000.0 * (t2 - t1))
+
+ logger.debug("OpenCL execution time: Mean: %fms, Min: %fms, Max: %fms",
+ numpy.mean(bo_durations),
+ numpy.min(bo_durations),
+ numpy.max(bo_durations))
diff --git a/src/silx/opencl/common.py b/src/silx/opencl/common.py
new file mode 100644
index 0000000..888b1da
--- /dev/null
+++ b/src/silx/opencl/common.py
@@ -0,0 +1,694 @@
+#!/usr/bin/env python
+# -*- coding: utf-8 -*-
+#
+# Project: S I L X project
+# https://github.com/silx-kit/silx
+#
+# Copyright (C) 2012-2021 European Synchrotron Radiation Facility, Grenoble, France
+#
+# Principal author: Jérôme Kieffer (Jerome.Kieffer@ESRF.eu)
+#
+# Permission is hereby granted, free of charge, to any person
+# obtaining a copy of this software and associated documentation
+# files (the "Software"), to deal in the Software without
+# restriction, including without limitation the rights to use,
+# copy, modify, merge, publish, distribute, sublicense, and/or sell
+# copies of the Software, and to permit persons to whom the
+# Software is furnished to do so, subject to the following
+# conditions:
+#
+# The above copyright notice and this permission notice shall be
+# included in all copies or substantial portions of the Software.
+#
+# THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
+# EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES
+# OF MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
+# NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT
+# HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY,
+# WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
+# FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR
+# OTHER DEALINGS IN THE SOFTWARE.
+#
+
+__author__ = "Jerome Kieffer"
+__contact__ = "Jerome.Kieffer@ESRF.eu"
+__license__ = "MIT"
+__copyright__ = "2012-2017 European Synchrotron Radiation Facility, Grenoble, France"
+__date__ = "29/09/2021"
+__status__ = "stable"
+__all__ = ["ocl", "pyopencl", "mf", "release_cl_buffers", "allocate_cl_buffers",
+ "measure_workgroup_size", "kernel_workgroup_size"]
+
+import os
+import logging
+
+import numpy
+
+from .utils import get_opencl_code
+
+logger = logging.getLogger(__name__)
+
+if os.environ.get("SILX_OPENCL") in ["0", "False"]:
+ logger.info("Use of OpenCL has been disabled from environment variable: SILX_OPENCL=0")
+ pyopencl = None
+else:
+ try:
+ import pyopencl
+ except ImportError:
+ logger.warning("Unable to import pyOpenCl. Please install it from: https://pypi.org/project/pyopencl")
+ pyopencl = None
+ else:
+ try:
+ pyopencl.get_platforms()
+ except pyopencl.LogicError:
+ logger.warning("The module pyOpenCL has been imported but can't be used here")
+ pyopencl = None
+ else:
+ import pyopencl.array as array
+ mf = pyopencl.mem_flags
+
+if pyopencl is None:
+
+ # Define default mem flags
+ class mf(object):
+ WRITE_ONLY = 1
+ READ_ONLY = 1
+ READ_WRITE = 1
+else:
+ mf = pyopencl.mem_flags
+
+FLOP_PER_CORE = {"GPU": 64, # GPU, Fermi at least perform 64 flops per cycle/multicore, G80 were at 24 or 48 ...
+ "CPU": 4, # CPU, at least intel's have 4 operation per cycle
+ "ACC": 8} # ACC: the Xeon-phi (MIC) appears to be able to process 8 Flops per hyperthreaded-core
+
+# Sources : https://en.wikipedia.org/wiki/CUDA
+NVIDIA_FLOP_PER_CORE = {(1, 0): 24, # Guessed !
+ (1, 1): 24, # Measured on G98 [Quadro NVS 295]
+ (1, 2): 24, # Guessed !
+ (1, 3): 24, # measured on a GT285 (GT200)
+ (2, 0): 64, # Measured on a 580 (GF110)
+ (2, 1): 96, # Measured on Quadro2000 GF106GL
+ (3, 0): 384, # Guessed!
+ (3, 5): 384, # Measured on K20
+ (3, 7): 384, # K80: Guessed!
+ (5, 0): 256, # Maxwell 4 warps/SM 2 flops/ CU
+ (5, 2): 256, # Titan-X
+ (5, 3): 256, # TX1
+ (6, 0): 128, # GP100
+ (6, 1): 128, # GP104
+ (6, 2): 128, # ?
+ (7, 0): 128, # Volta # measured on Telsa V100
+ (7, 2): 128, # Volta ?
+ (7, 5): 128, # Turing # measured on RTX 6000
+ (8, 0): 128, # Ampere # measured on Tesla A100
+ (8, 6): 256, # Ampere # measured on RTX A5000
+ }
+
+AMD_FLOP_PER_CORE = 160 # Measured on a M7820 10 core, 700MHz 1120GFlops
+
+
+class Device(object):
+ """
+ Simple class that contains the structure of an OpenCL device
+ """
+
+ def __init__(self, name="None", dtype=None, version=None, driver_version=None,
+ extensions="", memory=None, available=None,
+ cores=None, frequency=None, flop_core=None, idx=0, workgroup=1):
+ """
+ Simple container with some important data for the OpenCL device description.
+
+ :param name: name of the device
+ :param dtype: device type: CPU/GPU/ACC...
+ :param version: driver version
+ :param driver_version:
+ :param extensions: List of opencl extensions
+ :param memory: maximum memory available on the device
+ :param available: is the device deactivated or not
+ :param cores: number of SM/cores
+ :param frequency: frequency of the device
+ :param flop_core: Flopating Point operation per core per cycle
+ :param idx: index of the device within the platform
+ :param workgroup: max workgroup size
+ """
+ self.name = name.strip()
+ self.type = dtype
+ self.version = version
+ self.driver_version = driver_version
+ self.extensions = extensions.split()
+ self.memory = memory
+ self.available = available
+ self.cores = cores
+ self.frequency = frequency
+ self.id = idx
+ self.max_work_group_size = workgroup
+ if not flop_core:
+ flop_core = FLOP_PER_CORE.get(dtype, 1)
+ if cores and frequency:
+ self.flops = cores * frequency * flop_core
+ else:
+ self.flops = flop_core
+
+ def __repr__(self):
+ return "%s" % self.name
+
+ def pretty_print(self):
+ """
+ Complete device description
+
+ :return: string
+ """
+ lst = ["Name\t\t:\t%s" % self.name,
+ "Type\t\t:\t%s" % self.type,
+ "Memory\t\t:\t%.3f MB" % (self.memory / 2.0 ** 20),
+ "Cores\t\t:\t%s CU" % self.cores,
+ "Frequency\t:\t%s MHz" % self.frequency,
+ "Speed\t\t:\t%.3f GFLOPS" % (self.flops / 1000.),
+ "Version\t\t:\t%s" % self.version,
+ "Available\t:\t%s" % self.available]
+ return os.linesep.join(lst)
+
+ def set_unavailable(self):
+ """Use this method to flag a faulty device
+ """
+ self.available = False
+
+
+class Platform(object):
+ """
+ Simple class that contains the structure of an OpenCL platform
+ """
+
+ def __init__(self, name="None", vendor="None", version=None, extensions=None, idx=0):
+ """
+ Class containing all descriptions of a platform and all devices description within that platform.
+
+ :param name: platform name
+ :param vendor: name of the brand/vendor
+ :param version:
+ :param extensions: list of the extension provided by the platform to all of its devices
+ :param idx: index of the platform
+ """
+ self.name = name.strip()
+ self.vendor = vendor.strip()
+ self.version = version
+ self.extensions = extensions.split()
+ self.devices = []
+ self.id = idx
+
+ def __repr__(self):
+ return "%s" % self.name
+
+ def add_device(self, device):
+ """
+ Add new device to the platform
+
+ :param device: Device instance
+ """
+ self.devices.append(device)
+
+ def get_device(self, key):
+ """
+ Return a device according to key
+
+ :param key: identifier for a device, either it's id (int) or it's name
+ :type key: int or str
+ """
+ out = None
+ try:
+ devid = int(key)
+ except ValueError:
+ for a_dev in self.devices:
+ if a_dev.name == key:
+ out = a_dev
+ else:
+ if len(self.devices) > devid > 0:
+ out = self.devices[devid]
+ return out
+
+
+def _measure_workgroup_size(device_or_context, fast=False):
+ """Mesure the maximal work group size of the given device
+
+ DEPRECATED since not perfectly correct !
+
+ :param device_or_context: instance of pyopencl.Device or pyopencl.Context
+ or 2-tuple (platformid,deviceid)
+ :param fast: ask the kernel the valid value, don't probe it
+ :return: maximum size for the workgroup
+ """
+ if isinstance(device_or_context, pyopencl.Device):
+ try:
+ ctx = pyopencl.Context(devices=[device_or_context])
+ except pyopencl._cl.LogicError as error:
+ platform = device_or_context.platform
+ platformid = pyopencl.get_platforms().index(platform)
+ deviceid = platform.get_devices().index(device_or_context)
+ ocl.platforms[platformid].devices[deviceid].set_unavailable()
+ raise RuntimeError("Unable to create context on %s/%s: %s" % (platform, device_or_context, error))
+ else:
+ device = device_or_context
+ elif isinstance(device_or_context, pyopencl.Context):
+ ctx = device_or_context
+ device = device_or_context.devices[0]
+ elif isinstance(device_or_context, (tuple, list)) and len(device_or_context) == 2:
+ ctx = ocl.create_context(platformid=device_or_context[0],
+ deviceid=device_or_context[1])
+ device = ctx.devices[0]
+ else:
+ raise RuntimeError("""given parameter device_or_context is not an
+ instanciation of a device or a context""")
+ shape = device.max_work_group_size
+ # get the context
+
+ assert ctx is not None
+ queue = pyopencl.CommandQueue(ctx)
+
+ max_valid_wg = 1
+ data = numpy.random.random(shape).astype(numpy.float32)
+ d_data = pyopencl.array.to_device(queue, data)
+ d_data_1 = pyopencl.array.empty_like(d_data)
+ d_data_1.fill(numpy.float32(1.0))
+
+ program = pyopencl.Program(ctx, get_opencl_code("addition")).build()
+ if fast:
+ max_valid_wg = program.addition.get_work_group_info(pyopencl.kernel_work_group_info.WORK_GROUP_SIZE, device)
+ else:
+ maxi = int(round(numpy.log2(shape)))
+ for i in range(maxi + 1):
+ d_res = pyopencl.array.empty_like(d_data)
+ wg = 1 << i
+ try:
+ evt = program.addition(
+ queue, (shape,), (wg,),
+ d_data.data, d_data_1.data, d_res.data, numpy.int32(shape))
+ evt.wait()
+ except Exception as error:
+ logger.info("%s on device %s for WG=%s/%s", error, device.name, wg, shape)
+ program = queue = d_res = d_data_1 = d_data = None
+ break
+ else:
+ res = d_res.get()
+ good = numpy.allclose(res, data + 1)
+ if good:
+ if wg > max_valid_wg:
+ max_valid_wg = wg
+ else:
+ logger.warning("ArithmeticError on %s for WG=%s/%s", wg, device.name, shape)
+
+ return max_valid_wg
+
+
+def _is_nvidia_gpu(vendor, devtype):
+ return (vendor == "NVIDIA Corporation") and (devtype == "GPU")
+
+
+class OpenCL(object):
+ """
+ Simple class that wraps the structure ocl_tools_extended.h
+
+ This is a static class.
+ ocl should be the only instance and shared among all python modules.
+ """
+
+ platforms = []
+ nb_devices = 0
+ context_cache = {} # key: 2-tuple of int, value: context
+ if pyopencl:
+ platform = device = pypl = devtype = extensions = pydev = None
+ for idx, platform in enumerate(pyopencl.get_platforms()):
+ pypl = Platform(platform.name, platform.vendor, platform.version, platform.extensions, idx)
+ for idd, device in enumerate(platform.get_devices()):
+ ####################################################
+ # Nvidia does not report int64 atomics (we are using) ...
+ # this is a hack around as any nvidia GPU with double-precision supports int64 atomics
+ ####################################################
+ extensions = device.extensions
+ if (pypl.vendor == "NVIDIA Corporation") and ('cl_khr_fp64' in extensions):
+ extensions += ' cl_khr_int64_base_atomics cl_khr_int64_extended_atomics'
+ try:
+ devtype = pyopencl.device_type.to_string(device.type).upper()
+ except ValueError:
+ # pocl does not describe itself as a CPU !
+ devtype = "CPU"
+ if len(devtype) > 3:
+ if "GPU" in devtype:
+ devtype = "GPU"
+ elif "ACC" in devtype:
+ devtype = "ACC"
+ elif "CPU" in devtype:
+ devtype = "CPU"
+ else:
+ devtype = devtype[:3]
+ if _is_nvidia_gpu(device.vendor, devtype) and ("compute_capability_major_nv" in dir(device)):
+ try:
+ comput_cap = device.compute_capability_major_nv, device.compute_capability_minor_nv
+ except pyopencl.LogicError:
+ flop_core = FLOP_PER_CORE["GPU"]
+ else:
+ flop_core = NVIDIA_FLOP_PER_CORE.get(comput_cap, FLOP_PER_CORE["GPU"])
+ elif (pypl.vendor == "Advanced Micro Devices, Inc.") and (devtype == "GPU"):
+ flop_core = AMD_FLOP_PER_CORE
+ elif devtype == "CPU":
+ flop_core = FLOP_PER_CORE.get(devtype, 1)
+ else:
+ flop_core = 1
+ workgroup = device.max_work_group_size
+ if (devtype == "CPU") and (pypl.vendor == "Apple"):
+ logger.info("For Apple's OpenCL on CPU: Measuring actual valid max_work_goup_size.")
+ workgroup = _measure_workgroup_size(device, fast=True)
+ if (devtype == "GPU") and os.environ.get("GPU") == "False":
+ # Environment variable to disable GPU devices
+ continue
+ pydev = Device(device.name, devtype, device.version, device.driver_version, extensions,
+ device.global_mem_size, bool(device.available), device.max_compute_units,
+ device.max_clock_frequency, flop_core, idd, workgroup)
+ pypl.add_device(pydev)
+ nb_devices += 1
+ platforms.append(pypl)
+ del platform, device, pypl, devtype, extensions, pydev
+
+ def __repr__(self):
+ out = ["OpenCL devices:"]
+ for platformid, platform in enumerate(self.platforms):
+ deviceids = ["(%s,%s) %s" % (platformid, deviceid, dev.name)
+ for deviceid, dev in enumerate(platform.devices)]
+ out.append("[%s] %s: " % (platformid, platform.name) + ", ".join(deviceids))
+ return os.linesep.join(out)
+
+ def get_platform(self, key):
+ """
+ Return a platform according
+
+ :param key: identifier for a platform, either an Id (int) or it's name
+ :type key: int or str
+ """
+ out = None
+ try:
+ platid = int(key)
+ except ValueError:
+ for a_plat in self.platforms:
+ if a_plat.name == key:
+ out = a_plat
+ else:
+ if len(self.platforms) > platid > 0:
+ out = self.platforms[platid]
+ return out
+
+ def select_device(self, dtype="ALL", memory=None, extensions=None, best=True, **kwargs):
+ """
+ Select a device based on few parameters (at the end, keep the one with most memory)
+
+ :param dtype: "gpu" or "cpu" or "all" ....
+ :param memory: minimum amount of memory (int)
+ :param extensions: list of extensions to be present
+ :param best: shall we look for the
+ :returns: A tuple of plateform ID and device ID, else None if nothing
+ found
+ """
+ if extensions is None:
+ extensions = []
+ if "type" in kwargs:
+ dtype = kwargs["type"].upper()
+ else:
+ dtype = dtype.upper()
+ if len(dtype) > 3:
+ dtype = dtype[:3]
+ best_found = None
+ for platformid, platform in enumerate(self.platforms):
+ for deviceid, device in enumerate(platform.devices):
+ if not device.available:
+ continue
+ if (dtype in ["ALL", "DEF"]) or (device.type == dtype):
+ if (memory is None) or (memory <= device.memory):
+ found = True
+ for ext in extensions:
+ if ext not in device.extensions:
+ found = False
+ if found:
+ if not best:
+ return platformid, deviceid
+ else:
+ if not best_found:
+ best_found = platformid, deviceid, device.flops
+ elif best_found[2] < device.flops:
+ best_found = platformid, deviceid, device.flops
+ if best_found:
+ return best_found[0], best_found[1]
+
+ # Nothing found
+ return None
+
+ def create_context(self, devicetype="ALL", useFp64=False, platformid=None,
+ deviceid=None, cached=True, memory=None, extensions=None):
+ """
+ Choose a device and initiate a context.
+
+ Devicetypes can be GPU,gpu,CPU,cpu,DEF,ACC,ALL.
+ Suggested are GPU,CPU.
+ For each setting to work there must be such an OpenCL device and properly installed.
+ E.g.: If Nvidia driver is installed, GPU will succeed but CPU will fail.
+ The AMD SDK kit is required for CPU via OpenCL.
+ :param devicetype: string in ["cpu","gpu", "all", "acc"]
+ :param useFp64: boolean specifying if double precision will be used: deprecated use extensions=["cl_khr_fp64"]
+ :param platformid: integer
+ :param deviceid: integer
+ :param cached: True if we want to cache the context
+ :param memory: minimum amount of memory of the device
+ :param extensions: list of extensions to be present
+ :return: OpenCL context on the selected device
+ """
+ if extensions is None:
+ extensions = []
+ if useFp64:
+ logger.warning("Deprecation: please select your device using the extension name!, i.e. extensions=['cl_khr_fp64']")
+ extensions.append('cl_khr_fp64')
+
+ if (platformid is not None) and (deviceid is not None):
+ platformid = int(platformid)
+ deviceid = int(deviceid)
+ elif "PYOPENCL_CTX" in os.environ:
+ pyopencl_ctx = [int(i) if i.isdigit() else 0 for i in os.environ["PYOPENCL_CTX"].split(":")]
+ pyopencl_ctx += [0] * (2 - len(pyopencl_ctx)) # pad with 0
+ platformid, deviceid = pyopencl_ctx
+ else:
+ ids = ocl.select_device(type=devicetype, extensions=extensions)
+ if ids:
+ platformid, deviceid = ids
+ ctx = None
+ if (platformid is not None) and (deviceid is not None):
+ if (platformid, deviceid) in self.context_cache:
+ ctx = self.context_cache[(platformid, deviceid)]
+ else:
+ try:
+ ctx = pyopencl.Context(devices=[pyopencl.get_platforms()[platformid].get_devices()[deviceid]])
+ except pyopencl._cl.LogicError as error:
+ self.platforms[platformid].devices[deviceid].set_unavailable()
+ logger.warning("Unable to create context on %s/%s: %s", platformid, deviceid, error)
+ ctx = None
+ else:
+ if cached:
+ self.context_cache[(platformid, deviceid)] = ctx
+ if ctx is None:
+ logger.warning("Last chance to get an OpenCL device ... probably not the one requested")
+ ctx = pyopencl.create_some_context(interactive=False)
+ return ctx
+
+ def device_from_context(self, context):
+ """
+ Retrieves the Device from the context
+
+ :param context: OpenCL context
+ :return: instance of Device
+ """
+ odevice = context.devices[0]
+ oplat = odevice.platform
+ device_id = oplat.get_devices().index(odevice)
+ platform_id = pyopencl.get_platforms().index(oplat)
+ return self.platforms[platform_id].devices[device_id]
+
+
+if pyopencl:
+ ocl = OpenCL()
+ if ocl.nb_devices == 0:
+ ocl = None
+else:
+ ocl = None
+
+
+def release_cl_buffers(cl_buffers):
+ """
+ :param cl_buffers: the buffer you want to release
+ :type cl_buffers: dict(str, pyopencl.Buffer)
+
+ This method release the memory of the buffers store in the dict
+ """
+ for key, buffer_ in cl_buffers.items():
+ if buffer_ is not None:
+ if isinstance(buffer_, pyopencl.array.Array):
+ try:
+ buffer_.data.release()
+ except pyopencl.LogicError:
+ logger.error("Error while freeing buffer %s", key)
+ else:
+ try:
+ buffer_.release()
+ except pyopencl.LogicError:
+ logger.error("Error while freeing buffer %s", key)
+ cl_buffers[key] = None
+ return cl_buffers
+
+
+def allocate_cl_buffers(buffers, device=None, context=None):
+ """
+ :param buffers: the buffers info use to create the pyopencl.Buffer
+ :type buffers: list(std, flag, numpy.dtype, int)
+ :param device: one of the context device
+ :param context: opencl contextdevice
+ :return: a dict containing the instanciated pyopencl.Buffer
+ :rtype: dict(str, pyopencl.Buffer)
+
+ This method instanciate the pyopencl.Buffer from the buffers
+ description.
+ """
+ mem = {}
+ if device is None:
+ device = ocl.device_from_context(context)
+
+ # check if enough memory is available on the device
+ ualloc = 0
+ for _, _, dtype, size in buffers:
+ ualloc += numpy.dtype(dtype).itemsize * size
+ memory = device.memory
+ logger.info("%.3fMB are needed on device which has %.3fMB",
+ ualloc / 1.0e6, memory / 1.0e6)
+ if ualloc >= memory:
+ memError = "Fatal error in allocate_buffers."
+ memError += "Not enough device memory for buffers"
+ memError += "(%lu requested, %lu available)" % (ualloc, memory)
+ raise MemoryError(memError) # noqa
+
+ # do the allocation
+ try:
+ for name, flag, dtype, size in buffers:
+ mem[name] = pyopencl.Buffer(context, flag,
+ numpy.dtype(dtype).itemsize * size)
+ except pyopencl.MemoryError as error:
+ release_cl_buffers(mem)
+ raise MemoryError(error)
+
+ return mem
+
+
+def allocate_texture(ctx, shape, hostbuf=None, support_1D=False):
+ """
+ Allocate an OpenCL image ("texture").
+
+ :param ctx: OpenCL context
+ :param shape: Shape of the image. Note that pyopencl and OpenCL < 1.2
+ do not support 1D images, so 1D images are handled as 2D with one row
+ :param support_1D: force the image to be 1D if the shape has only one dim
+ """
+ if len(shape) == 1 and not(support_1D):
+ shape = (1,) + shape
+ return pyopencl.Image(
+ ctx,
+ pyopencl.mem_flags.READ_ONLY | pyopencl.mem_flags.USE_HOST_PTR,
+ pyopencl.ImageFormat(
+ pyopencl.channel_order.INTENSITY,
+ pyopencl.channel_type.FLOAT
+ ),
+ hostbuf=numpy.zeros(shape[::-1], dtype=numpy.float32)
+ )
+
+
+def check_textures_availability(ctx):
+ """
+ Check whether textures are supported on the current OpenCL context.
+
+ :param ctx: OpenCL context
+ """
+ try:
+ dummy_texture = allocate_texture(ctx, (16, 16))
+ # Need to further access some attributes (pocl)
+ dummy_height = dummy_texture.height
+ textures_available = True
+ del dummy_texture, dummy_height
+ except (pyopencl.RuntimeError, pyopencl.LogicError):
+ textures_available = False
+ # Nvidia Fermi GPUs (compute capability 2.X) do not support opencl read_imagef
+ # There is no way to detect this until a kernel is compiled
+ try:
+ cc = ctx.devices[0].compute_capability_major_nv
+ textures_available &= (cc >= 3)
+ except (pyopencl.LogicError, AttributeError): # probably not a Nvidia GPU
+ pass
+ #
+ return textures_available
+
+
+def measure_workgroup_size(device):
+ """Measure the actual size of the workgroup
+
+ :param device: device or context or 2-tuple with indexes
+ :return: the actual measured workgroup size
+
+ if device is "all", returns a dict with all devices with their ids as keys.
+ """
+ if (ocl is None) or (device is None):
+ return None
+
+ if isinstance(device, tuple) and (len(device) == 2):
+ # this is probably a tuple (platformid, deviceid)
+ device = ocl.create_context(platformid=device[0], deviceid=device[1])
+
+ if device == "all":
+ res = {}
+ for pid, platform in enumerate(ocl.platforms):
+ for did, _devices in enumerate(platform.devices):
+ tup = (pid, did)
+ res[tup] = measure_workgroup_size(tup)
+ else:
+ res = _measure_workgroup_size(device)
+ return res
+
+
+def query_kernel_info(program, kernel, what="WORK_GROUP_SIZE"):
+ """Extract the compile time information from a kernel
+
+ :param program: OpenCL program
+ :param kernel: kernel or name of the kernel
+ :param what: what is the query about ?
+ :return: int or 3-int for the workgroup size.
+
+ Possible information available are:
+ * 'COMPILE_WORK_GROUP_SIZE': Returns the work-group size specified inside the kernel (__attribute__((reqd_work_gr oup_size(X, Y, Z))))
+ * 'GLOBAL_WORK_SIZE': maximum global size that can be used to execute a kernel #OCL2.1!
+ * 'LOCAL_MEM_SIZE': amount of local memory in bytes being used by the kernel
+ * 'PREFERRED_WORK_GROUP_SIZE_MULTIPLE': preferred multiple of workgroup size for launch. This is a performance hint.
+ * 'PRIVATE_MEM_SIZE' Returns the minimum amount of private memory, in bytes, used by each workitem in the kernel
+ * 'WORK_GROUP_SIZE': maximum work-group size that can be used to execute a kernel on a specific device given by device
+
+ Further information on:
+ https://www.khronos.org/registry/OpenCL/sdk/1.1/docs/man/xhtml/clGetKernelWorkGroupInfo.html
+
+ """
+ assert isinstance(program, pyopencl.Program)
+ if not isinstance(kernel, pyopencl.Kernel):
+ kernel_name = kernel
+ assert kernel in (k.function_name for k in program.all_kernels()), "the kernel exists"
+ kernel = program.__getattr__(kernel_name)
+
+ device = program.devices[0]
+ query_wg = getattr(pyopencl.kernel_work_group_info, what)
+ return kernel.get_work_group_info(query_wg, device)
+
+
+def kernel_workgroup_size(program, kernel):
+ """Extract the compile time maximum workgroup size
+
+ :param program: OpenCL program
+ :param kernel: kernel or name of the kernel
+ :return: the maximum acceptable workgroup size for the given kernel
+ """
+ return query_kernel_info(program, kernel, what="WORK_GROUP_SIZE")
diff --git a/src/silx/opencl/conftest.py b/src/silx/opencl/conftest.py
new file mode 100644
index 0000000..1fdc516
--- /dev/null
+++ b/src/silx/opencl/conftest.py
@@ -0,0 +1,5 @@
+import pytest
+
+@pytest.mark.usefixtures("use_opencl")
+def setup_module(module):
+ pass
diff --git a/src/silx/opencl/convolution.py b/src/silx/opencl/convolution.py
new file mode 100644
index 0000000..15ef931
--- /dev/null
+++ b/src/silx/opencl/convolution.py
@@ -0,0 +1,442 @@
+#!/usr/bin/env python
+# coding: utf-8
+# /*##########################################################################
+#
+# Copyright (c) 2019 European Synchrotron Radiation Facility
+#
+# Permission is hereby granted, free of charge, to any person obtaining a copy
+# of this software and associated documentation files (the "Software"), to deal
+# in the Software without restriction, including without limitation the rights
+# to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
+# copies of the Software, and to permit persons to whom the Software is
+# furnished to do so, subject to the following conditions:
+#
+# The above copyright notice and this permission notice shall be included in
+# all copies or substantial portions of the Software.
+#
+# THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+# IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+# FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
+# AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+# LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
+# OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN
+# THE SOFTWARE.
+#
+# ###########################################################################*/
+"""Module for convolution on CPU/GPU."""
+
+from __future__ import absolute_import, print_function, with_statement, division
+
+__authors__ = ["P. Paleo"]
+__license__ = "MIT"
+__date__ = "01/08/2019"
+
+import numpy as np
+from copy import copy # python2
+from .common import pyopencl as cl
+import pyopencl.array as parray
+from .processing import OpenclProcessing, EventDescription
+from .utils import ConvolutionInfos
+
+class Convolution(OpenclProcessing):
+ """
+ A class for performing convolution on CPU/GPU with OpenCL.
+ """
+
+ def __init__(self, shape, kernel, axes=None, mode=None, ctx=None,
+ devicetype="all", platformid=None, deviceid=None,
+ profile=False, extra_options=None):
+ """Constructor of OpenCL Convolution.
+
+ :param shape: shape of the array.
+ :param kernel: convolution kernel (1D, 2D or 3D).
+ :param axes: axes along which the convolution is performed,
+ for batched convolutions.
+ :param mode: Boundary handling mode. Available modes are:
+ "reflect": cba|abcd|dcb
+ "nearest": aaa|abcd|ddd
+ "wrap": bcd|abcd|abc
+ "constant": 000|abcd|000
+ Default is "reflect".
+ :param ctx: actual working context, left to None for automatic
+ initialization from device type or platformid/deviceid
+ :param devicetype: type of device, can be "CPU", "GPU", "ACC" or "ALL"
+ :param platformid: integer with the platform_identifier, as given by
+ clinfo
+ :param deviceid: Integer with the device identifier, as given by clinfo
+ :param profile: switch on profiling to be able to profile at the kernel
+ level, store profiling elements (makes code slightly
+ slower)
+ :param extra_options: Advanced options (dict). Current options are:
+ "allocate_input_array": True,
+ "allocate_output_array": True,
+ "allocate_tmp_array": True,
+ "dont_use_textures": False,
+ """
+ OpenclProcessing.__init__(self, ctx=ctx, devicetype=devicetype,
+ platformid=platformid, deviceid=deviceid,
+ profile=profile)
+
+ self._configure_extra_options(extra_options)
+ self._determine_use_case(shape, kernel, axes)
+ self._allocate_memory(mode)
+ self._init_kernels()
+
+ def _configure_extra_options(self, extra_options):
+ self.extra_options = {
+ "allocate_input_array": True,
+ "allocate_output_array": True,
+ "allocate_tmp_array": True,
+ "dont_use_textures": False,
+ }
+ extra_opts = extra_options or {}
+ self.extra_options.update(extra_opts)
+ self.use_textures = not(self.extra_options["dont_use_textures"])
+ self.use_textures &= self.check_textures_availability()
+
+ def _get_dimensions(self, shape, kernel):
+ self.shape = shape
+ self.data_ndim = self._check_dimensions(shape=shape, name="Data")
+ self.kernel_ndim = self._check_dimensions(arr=kernel, name="Kernel")
+ Nx = shape[-1]
+ if self.data_ndim >= 2:
+ Ny = shape[-2]
+ else:
+ Ny = 1
+ if self.data_ndim >= 3:
+ Nz = shape[-3]
+ else:
+ Nz = 1
+ self.Nx = np.int32(Nx)
+ self.Ny = np.int32(Ny)
+ self.Nz = np.int32(Nz)
+
+ def _determine_use_case(self, shape, kernel, axes):
+ """
+ Determine the convolution use case from the input/kernel shape, and axes.
+ """
+ self._get_dimensions(shape, kernel)
+ if self.kernel_ndim > self.data_ndim:
+ raise ValueError("Kernel dimensions cannot exceed data dimensions")
+ data_ndim = self.data_ndim
+ kernel_ndim = self.kernel_ndim
+ self.kernel = kernel.astype("f")
+
+ convol_infos = ConvolutionInfos()
+ k = (data_ndim, kernel_ndim)
+ if k not in convol_infos.use_cases:
+ raise ValueError(
+ "Cannot find a use case for data ndim = %d and kernel ndim = %d"
+ % (data_ndim, kernel_ndim)
+ )
+ possible_use_cases = convol_infos.use_cases[k]
+
+ self.use_case_name = None
+ for uc_name, uc_params in possible_use_cases.items():
+ if axes in convol_infos.allowed_axes[uc_name]:
+ self.use_case_name = uc_name
+ self.use_case_desc = uc_params["name"]
+ #~ self.use_case_kernels = uc_params["kernels"].copy()
+ self.use_case_kernels = copy(uc_params["kernels"]) # TODO use the above line once we get rid of python2
+ if self.use_case_name is None:
+ raise ValueError(
+ "Cannot find a use case for data ndim = %d, kernel ndim = %d and axes=%s"
+ % (data_ndim, kernel_ndim, str(axes))
+ )
+ # TODO implement this use case
+ if self.use_case_name == "batched_separable_2D_1D_3D":
+ raise NotImplementedError(
+ "The use case %s is not implemented"
+ % self.use_case_name
+ )
+ #
+ self.axes = axes
+ # Replace "axes=None" with an actual value (except for ND-ND)
+ allowed_axes = convol_infos.allowed_axes[self.use_case_name]
+ if len(allowed_axes) > 1:
+ # The default choice might impact perfs
+ self.axes = allowed_axes[0] or allowed_axes[1]
+ self.separable = self.use_case_name.startswith("separable")
+ self.batched = self.use_case_name.startswith("batched")
+ # Update kernel names when using textures
+ if self.use_textures:
+ for i, kern_name in enumerate(self.use_case_kernels):
+ self.use_case_kernels[i] = kern_name + "_tex"
+
+ def _allocate_memory(self, mode):
+ self.mode = mode or "reflect"
+ option_array_names = {
+ "allocate_input_array": "data_in",
+ "allocate_output_array": "data_out",
+ "allocate_tmp_array": "data_tmp",
+ }
+ # Nonseparable transforms do not need tmp array
+ if not(self.separable):
+ self.extra_options["allocate_tmp_array"] = False
+ # Allocate arrays
+ for option_name, array_name in option_array_names.items():
+ if self.extra_options[option_name]:
+ value = parray.empty(self.queue, self.shape, np.float32)
+ value.fill(np.float32(0.0))
+ else:
+ value = None
+ setattr(self, array_name, value)
+
+ if isinstance(self.kernel, np.ndarray):
+ self.d_kernel = parray.to_device(self.queue, self.kernel)
+ else:
+ if not(isinstance(self.kernel, parray.Array)):
+ raise ValueError("kernel must be either numpy array or pyopencl array")
+ self.d_kernel = self.kernel
+ self._old_input_ref = None
+ self._old_output_ref = None
+ if self.use_textures:
+ self._allocate_textures()
+ self._c_modes_mapping = {
+ "periodic": 2,
+ "wrap": 2,
+ "nearest": 1,
+ "replicate": 1,
+ "reflect": 0,
+ "constant": 3,
+ }
+ mp = self._c_modes_mapping
+ if self.mode.lower() not in mp:
+ raise ValueError(
+ """
+ Mode %s is not available for textures. Available modes are:
+ %s
+ """
+ % (self.mode, str(mp.keys()))
+ )
+ # TODO
+ if not(self.use_textures) and self.mode.lower() == "constant":
+ raise NotImplementedError(
+ "mode='constant' is not implemented without textures yet"
+ )
+ #
+ self._c_conv_mode = mp[self.mode]
+
+ def _allocate_textures(self):
+ self.data_in_tex = self.allocate_texture(self.shape)
+ self.d_kernel_tex = self.allocate_texture(self.kernel.shape)
+ self.transfer_to_texture(self.d_kernel, self.d_kernel_tex)
+
+ def _init_kernels(self):
+ if self.kernel_ndim > 1:
+ if np.abs(np.diff(self.kernel.shape)).max() > 0:
+ raise NotImplementedError(
+ "Non-separable convolution with non-square kernels is not implemented yet"
+ )
+ compile_options = [str("-DUSED_CONV_MODE=%d" % self._c_conv_mode)]
+ if self.use_textures:
+ kernel_files = ["convolution_textures.cl"]
+ compile_options.extend([
+ str("-DIMAGE_DIMS=%d" % self.data_ndim),
+ str("-DFILTER_DIMS=%d" % self.kernel_ndim),
+ ])
+ d_kernel_ref = self.d_kernel_tex
+ else:
+ kernel_files = ["convolution.cl"]
+ d_kernel_ref = self.d_kernel.data
+ self.compile_kernels(
+ kernel_files=kernel_files,
+ compile_options=compile_options
+ )
+ self.ndrange = self.shape[::-1]
+ self.wg = None
+ kernel_args = [
+ self.queue,
+ self.ndrange, self.wg,
+ None,
+ None,
+ d_kernel_ref,
+ np.int32(self.kernel.shape[0]),
+ self.Nx, self.Ny, self.Nz
+ ]
+ if self.kernel_ndim == 2:
+ kernel_args.insert(6, np.int32(self.kernel.shape[1]))
+ if self.kernel_ndim == 3:
+ kernel_args.insert(6, np.int32(self.kernel.shape[2]))
+ kernel_args.insert(7, np.int32(self.kernel.shape[1]))
+ self.kernel_args = tuple(kernel_args)
+ # If self.data_tmp is allocated, separable transforms can be performed
+ # by a series of batched transforms, without any copy, by swapping refs.
+ self.swap_pattern = None
+ if self.separable:
+ if self.data_tmp is not None:
+ self.swap_pattern = {
+ 2: [
+ ("data_in", "data_tmp"),
+ ("data_tmp", "data_out")
+ ],
+ 3: [
+ ("data_in", "data_out"),
+ ("data_out", "data_tmp"),
+ ("data_tmp", "data_out"),
+ ],
+ }
+ else:
+ # TODO
+ raise NotImplementedError("For now, data_tmp has to be allocated")
+
+ def _get_swapped_arrays(self, i):
+ """
+ Get the input and output arrays to use when using a "swap pattern".
+ Swapping refs enables to avoid copies between temp. array and output.
+ For example, a separable 2D->1D convolution on 2D data reads:
+ data_tmp = convol(data_input, kernel, axis=1) # step i=0
+ data_out = convol(data_tmp, kernel, axis=0) # step i=1
+
+ :param i: current step number of the separable convolution
+ """
+ if self.use_textures:
+ # copy is needed when using texture, as data_out is a Buffer
+ if i > 0:
+ self.transfer_to_texture(self.data_out, self.data_in_tex)
+ return self.data_in_tex, self.data_out
+ n_batchs = len(self.axes)
+ in_ref, out_ref = self.swap_pattern[n_batchs][i]
+ d_in = getattr(self, in_ref)
+ d_out = getattr(self, out_ref)
+ return d_in, d_out
+
+ def _configure_kernel_args(self, opencl_kernel_args, input_ref, output_ref):
+ # TODO more elegant
+ if isinstance(input_ref, parray.Array):
+ input_ref = input_ref.data
+ if isinstance(output_ref, parray.Array):
+ output_ref = output_ref.data
+ if input_ref is not None or output_ref is not None:
+ opencl_kernel_args = list(opencl_kernel_args)
+ if input_ref is not None:
+ opencl_kernel_args[3] = input_ref
+ if output_ref is not None:
+ opencl_kernel_args[4] = output_ref
+ opencl_kernel_args = tuple(opencl_kernel_args)
+ return opencl_kernel_args
+
+ @staticmethod
+ def _check_dimensions(arr=None, shape=None, name="", dim_min=1, dim_max=3):
+ if shape is not None:
+ ndim = len(shape)
+ elif arr is not None:
+ ndim = arr.ndim
+ else:
+ raise ValueError("Please provide either arr= or shape=")
+ if ndim < dim_min or ndim > dim_max:
+ raise ValueError("%s dimensions should be between %d and %d"
+ % (name, dim_min, dim_max)
+ )
+ return ndim
+
+ def _check_array(self, arr):
+ # TODO allow cl.Buffer
+ if not(isinstance(arr, parray.Array) or isinstance(arr, np.ndarray)):
+ raise TypeError("Expected either pyopencl.array.Array or numpy.ndarray")
+ # TODO composition with ImageProcessing/cast
+ if arr.dtype != np.float32:
+ raise TypeError("Data must be float32")
+ if arr.shape != self.shape:
+ raise ValueError("Expected data shape = %s" % str(self.shape))
+
+ def _set_arrays(self, array, output=None):
+ # When using textures: copy
+ if self.use_textures:
+ self.transfer_to_texture(array, self.data_in_tex)
+ data_in_ref = self.data_in_tex
+ else:
+ # Otherwise: copy H->D or update references.
+ if isinstance(array, np.ndarray):
+ self.data_in[:] = array[:]
+ else:
+ self._old_input_ref = self.data_in
+ self.data_in = array
+ data_in_ref = self.data_in
+ if output is not None:
+ if not(isinstance(output, np.ndarray)):
+ self._old_output_ref = self.data_out
+ self.data_out = output
+ # Update OpenCL kernel arguments with new array references
+ self.kernel_args = self._configure_kernel_args(
+ self.kernel_args,
+ data_in_ref,
+ self.data_out
+ )
+
+ def _separable_convolution(self):
+ assert len(self.axes) == len(self.use_case_kernels)
+ # Separable: one kernel call per data dimension
+ for i, axis in enumerate(self.axes):
+ in_ref, out_ref = self._get_swapped_arrays(i)
+ self._batched_convolution(axis, input_ref=in_ref, output_ref=out_ref)
+
+ def _batched_convolution(self, axis, input_ref=None, output_ref=None):
+ # Batched: one kernel call in total
+ opencl_kernel = self.kernels.get_kernel(self.use_case_kernels[axis])
+ opencl_kernel_args = self._configure_kernel_args(
+ self.kernel_args,
+ input_ref,
+ output_ref
+ )
+ ev = opencl_kernel(*opencl_kernel_args)
+ if self.profile:
+ self.events.append(EventDescription("batched convolution", ev))
+
+ def _nd_convolution(self):
+ assert len(self.use_case_kernels) == 1
+ opencl_kernel = self.kernels.get_kernel(self.use_case_kernels[0])
+ ev = opencl_kernel(*self.kernel_args)
+ if self.profile:
+ self.events.append(EventDescription("ND convolution", ev))
+
+ def _recover_arrays_references(self):
+ if self._old_input_ref is not None:
+ self.data_in = self._old_input_ref
+ self._old_input_ref = None
+ if self._old_output_ref is not None:
+ self.data_out = self._old_output_ref
+ self._old_output_ref = None
+ self.kernel_args = self._configure_kernel_args(
+ self.kernel_args,
+ self.data_in,
+ self.data_out
+ )
+
+ def _get_output(self, output):
+ if output is None:
+ res = self.data_out.get()
+ else:
+ res = output
+ if isinstance(output, np.ndarray):
+ output[:] = self.data_out[:]
+ self._recover_arrays_references()
+ return res
+
+ def convolve(self, array, output=None):
+ """
+ Convolve an array with the class kernel.
+
+ :param array: Input array. Can be numpy.ndarray or pyopencl.array.Array.
+ :param output: Output array. Can be numpy.ndarray or pyopencl.array.Array.
+ """
+ self._check_array(array)
+ self._set_arrays(array, output=output)
+ if self.axes is not None:
+ if self.separable:
+ self._separable_convolution()
+ elif self.batched:
+ assert len(self.axes) == 1
+ self._batched_convolution(self.axes[0])
+ # else: ND-ND convol
+ else:
+ # ND-ND convol
+ self._nd_convolution()
+
+ res = self._get_output(output)
+ return res
+
+
+ __call__ = convolve
+
+
diff --git a/src/silx/opencl/image.py b/src/silx/opencl/image.py
new file mode 100644
index 0000000..65e2d5e
--- /dev/null
+++ b/src/silx/opencl/image.py
@@ -0,0 +1,387 @@
+# -*- coding: utf-8 -*-
+#
+# Project: silx
+# https://github.com/silx-kit/silx
+#
+# Copyright (C) 2012-2017 European Synchrotron Radiation Facility, Grenoble, France
+#
+# Principal author: Jérôme Kieffer (Jerome.Kieffer@ESRF.eu)
+#
+# Permission is hereby granted, free of charge, to any person obtaining a copy
+# of this software and associated documentation files (the "Software"), to deal
+# in the Software without restriction, including without limitation the rights
+# to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
+# copies of the Software, and to permit persons to whom the Software is
+# furnished to do so, subject to the following conditions:
+# .
+# The above copyright notice and this permission notice shall be included in
+# all copies or substantial portions of the Software.
+# .
+# THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+# IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+# FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
+# AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+# LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
+# OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN
+# THE SOFTWARE.
+
+"""A general purpose library for manipulating 2D images in 1 or 3 colors
+
+"""
+from __future__ import absolute_import, print_function, with_statement, division
+
+
+__author__ = "Jerome Kieffer"
+__license__ = "MIT"
+__date__ = "12/02/2018"
+__copyright__ = "2012-2017, ESRF, Grenoble"
+__contact__ = "jerome.kieffer@esrf.fr"
+
+import os
+import logging
+import numpy
+from collections import OrderedDict
+from math import floor, ceil, sqrt, log
+
+from .common import pyopencl, kernel_workgroup_size
+from .processing import EventDescription, OpenclProcessing, BufferDescription
+
+if pyopencl:
+ mf = pyopencl.mem_flags
+logger = logging.getLogger(__name__)
+
+
+class ImageProcessing(OpenclProcessing):
+
+ kernel_files = ["cast", "map", "max_min", "histogram"]
+
+ converter = {numpy.dtype(numpy.uint8): "u8_to_float",
+ numpy.dtype(numpy.int8): "s8_to_float",
+ numpy.dtype(numpy.uint16): "u16_to_float",
+ numpy.dtype(numpy.int16): "s16_to_float",
+ numpy.dtype(numpy.uint32): "u32_to_float",
+ numpy.dtype(numpy.int32): "s32_to_float",
+ }
+
+ def __init__(self, shape=None, ncolors=1, template=None,
+ ctx=None, devicetype="all", platformid=None, deviceid=None,
+ block_size=None, memory=None, profile=False):
+ """Constructor of the ImageProcessing class
+
+ :param ctx: actual working context, left to None for automatic
+ initialization from device type or platformid/deviceid
+ :param devicetype: type of device, can be "CPU", "GPU", "ACC" or "ALL"
+ :param platformid: integer with the platform_identifier, as given by clinfo
+ :param deviceid: Integer with the device identifier, as given by clinfo
+ :param block_size: preferred workgroup size, may vary depending on the
+ out come of the compilation
+ :param memory: minimum memory available on device
+ :param profile: switch on profiling to be able to profile at the kernel
+ level, store profiling elements (makes code slightly slower)
+ """
+ OpenclProcessing.__init__(self, ctx=ctx, devicetype=devicetype,
+ platformid=platformid, deviceid=deviceid,
+ block_size=block_size, memory=memory, profile=profile)
+ if template is not None:
+ shape = template.shape
+ if len(shape) > 2:
+ self.ncolors = shape[2]
+ self.shape = shape[:2]
+ else:
+ self.ncolors = 1
+ self.shape = shape
+ else:
+ self.ncolors = ncolors
+ self.shape = shape
+ assert shape is not None
+ self.buffer_shape = self.shape if self.ncolors == 1 else self.shape + (self.ncolors,)
+ kernel_files = [os.path.join("image", i) for i in self.kernel_files]
+ self.compile_kernels(kernel_files,
+ compile_options="-DNB_COLOR=%i" % self.ncolors)
+ if self.ncolors == 1:
+ img_shape = self.shape
+ else:
+ img_shape = self.shape + (self.ncolors,)
+
+ buffers = [BufferDescription("image0_d", img_shape, numpy.float32, None),
+ BufferDescription("image1_d", img_shape, numpy.float32, None),
+ BufferDescription("image2_d", img_shape, numpy.float32, None),
+ BufferDescription("max_min_d", 2, numpy.float32, None),
+ BufferDescription("cnt_d", 1, numpy.int32, None), ]
+ # Temporary buffer for max-min reduction
+ self.wg_red = kernel_workgroup_size(self.program, self.kernels.max_min_reduction_stage1)
+ if self.wg_red > 1:
+ self.wg_red = min(self.wg_red,
+ numpy.int32(1 << int(floor(log(sqrt(numpy.prod(self.shape)), 2)))))
+ tmp = BufferDescription("tmp_max_min_d", 2 * self.wg_red, numpy.float32, None)
+ buffers.append(tmp)
+ self.allocate_buffers(buffers, use_array=True)
+ self.cl_mem["cnt_d"].fill(0)
+
+ def __repr__(self):
+ return "ImageProcessing for shape=%s, %i colors initalized on %s" % \
+ (self.shape, self.ncolors, self.ctx.devices[0].name)
+
+ def _get_in_out_buffers(self, img=None, copy=True, out=None,
+ out_dtype=None, out_size=None):
+ """Internal method used to select the proper buffers before processing.
+
+ :param img: expects a numpy array or a pyopencl.array of dim 2 or 3
+ :param copy: set to False to directly re-use a pyopencl array
+ :param out: provide an output buffer to store the result
+ :param out_dtype: enforce the type of the output buffer (optional)
+ :param out_size: enforce the size of the output buffer (optional)
+ :return: input_buffer, output_buffer
+
+ Nota: this is not locked.
+ """
+ events = []
+ if out is not None and isinstance(out, pyopencl.array.Array):
+ if (out_size or out_dtype) is not None:
+ if out_size is not None:
+ assert out.size > out_size
+ if out_dtype is not None:
+ assert out_dtype == out.dtype
+ else: # assume it is same size and type as weoking buffer
+ assert out.shape == self.buffer_shape
+ assert out.dtype == numpy.float32
+ out.finish()
+ output_array = out
+ else:
+ if out_dtype != numpy.float32 and out_size:
+ name = "%s_%s_d" % (numpy.dtype(out_dtype), out_size)
+ if name not in self.cl_mem:
+ output_array = self.cl_mem[name] = pyopencl.array.empty(self.queue, (out_size,), out_dtype)
+ else:
+ output_array = self.cl_mem[name]
+ else:
+ output_array = self.cl_mem["image2_d"]
+
+ if img is None:
+ input_array = self.cl_mem["image1_d"]
+ if isinstance(img, pyopencl.array.Array):
+ if copy:
+ evt = pyopencl.enqueue_copy(self.queue, self.cl_mem["image1_d"].data, img.data)
+ input_array = self.cl_mem["image1_d"]
+ events.append(EventDescription("copy D->D", evt))
+ else:
+ img.finish()
+ input_array = img
+ evt = None
+ else:
+ # assume this is numpy
+ if img.dtype.itemsize > 4:
+ logger.warning("Casting to float32 on CPU")
+ evt = pyopencl.enqueue_copy(self.queue, self.cl_mem["image1_d"].data, numpy.ascontiguousarray(img, numpy.float32))
+ input_array = self.cl_mem["image1_d"]
+ events.append(EventDescription("cast+copy H->D", evt))
+ else:
+ evt = pyopencl.enqueue_copy(self.queue, self.cl_mem["image1_d"].data, numpy.ascontiguousarray(img))
+ input_array = self.cl_mem["image1_d"]
+ events.append(EventDescription("copy H->D", evt))
+ if self.profile:
+ self.events += events
+ return input_array, output_array
+
+ def to_float(self, img, copy=True, out=None):
+ """ Takes any array and convert it to a float array for ease of processing.
+
+ :param img: expects a numpy array or a pyopencl.array of dim 2 or 3
+ :param copy: set to False to directly re-use a pyopencl array
+ :param out: provide an output buffer to store the result
+ """
+ assert img.shape == self.buffer_shape
+
+ events = []
+ with self.sem:
+ input_array, output_array = self._get_in_out_buffers(img, copy, out)
+ if (img.dtype.itemsize > 4) or (img.dtype == numpy.float32):
+ # copy device -> device, already there as float32
+ ev = pyopencl.enqueue_copy(self.queue, output_array.data, input_array.data)
+ events.append(EventDescription("copy D->D", ev))
+ else:
+ # Cast to float:
+ name = self.converter[img.dtype]
+ kernel = self.kernels.get_kernel(name)
+ ev = kernel(self.queue, (self.shape[1], self.shape[0]), None,
+ input_array.data, output_array.data,
+ numpy.int32(self.shape[1]), numpy.int32(self.shape[0])
+ )
+ events.append(EventDescription("cast %s" % name, ev))
+
+ if self.profile:
+ self.events += events
+ if out is None:
+ res = output_array.get()
+ return res
+ else:
+ output_array.finish()
+ return output_array
+
+ def normalize(self, img, mini=0.0, maxi=1.0, copy=True, out=None):
+ """Scale the intensity of the image so that the minimum is 0 and the
+ maximum is 1.0 (or any value suggested).
+
+ :param img: numpy array or pyopencl array of dim 2 or 3 and of type float
+ :param mini: Expected minimum value
+ :param maxi: expected maxiumum value
+ :param copy: set to False to use directly the input buffer
+ :param out: provides an output buffer. prevents a copy D->H
+
+ This uses a min/max reduction in two stages plus a map operation
+ """
+ assert img.shape == self.buffer_shape
+ events = []
+ with self.sem:
+ input_array, output_array = self._get_in_out_buffers(img, copy, out)
+ size = numpy.int32(numpy.prod(self.shape))
+ if self.wg_red == 1:
+ # Probably on MacOS CPU WG==1 --> serial code.
+ kernel = self.kernels.get_kernel("max_min_serial")
+ evt = kernel(self.queue, (1,), (1,),
+ input_array.data,
+ size,
+ self.cl_mem["max_min_d"].data)
+ ed = EventDescription("max_min_serial", evt)
+ events.append(ed)
+ else:
+ stage1 = self.kernels.max_min_reduction_stage1
+ stage2 = self.kernels.max_min_reduction_stage2
+ local_mem = pyopencl.LocalMemory(int(self.wg_red * 8))
+ k1 = stage1(self.queue, (int(self.wg_red ** 2),), (int(self.wg_red),),
+ input_array.data,
+ self.cl_mem["tmp_max_min_d"].data,
+ size,
+ local_mem)
+ k2 = stage2(self.queue, (int(self.wg_red),), (int(self.wg_red),),
+ self.cl_mem["tmp_max_min_d"].data,
+ self.cl_mem["max_min_d"].data,
+ local_mem)
+
+ events += [EventDescription("max_min_stage1", k1),
+ EventDescription("max_min_stage2", k2)]
+
+ evt = self.kernels.normalize_image(self.queue, (self.shape[1], self.shape[0]), None,
+ input_array.data, output_array.data,
+ numpy.int32(self.shape[1]), numpy.int32(self.shape[0]),
+ self.cl_mem["max_min_d"].data,
+ numpy.float32(mini), numpy.float32(maxi))
+ events.append(EventDescription("normalize", evt))
+ if self.profile:
+ self.events += events
+
+ if out is None:
+ res = output_array.get()
+ return res
+ else:
+ output_array.finish()
+ return output_array
+
+ def histogram(self, img=None, nbins=255, range=None,
+ log_scale=False, copy=True, out=None):
+ """Compute the histogram of a set of data.
+
+ :param img: input image. If None, use the one already on the device
+ :param nbins: number of bins
+ :param range: the lower and upper range of the bins. If not provided,
+ range is simply ``(a.min(), a.max())``. Values outside the
+ range are ignored. The first element of the range must be
+ less than or equal to the second.
+ :param log_scale: perform the binning in lograrithmic scale.
+ Open to extension
+ :param copy: unset to directly use the input buffer without copy
+ :param out: use a provided array for offering the result
+ :return: histogram (size=nbins), edges (size=nbins+1)
+ API similar to numpy
+ """
+ assert img.shape == self.buffer_shape
+
+ input_array = self.to_float(img, copy=copy, out=self.cl_mem["image0_d"])
+ events = []
+ with self.sem:
+ input_array, output_array = self._get_in_out_buffers(input_array, copy=False,
+ out=out,
+ out_dtype=numpy.int32,
+ out_size=nbins)
+
+ if range is None:
+ # measure actually the bounds
+ size = numpy.int32(numpy.prod(self.shape))
+ if self.wg_red == 1:
+ # Probably on MacOS CPU WG==1 --> serial code.
+ kernel = self.kernels.get_kernel("max_min_serial")
+
+ evt = kernel(self.queue, (1,), (1,),
+ input_array.data,
+ size,
+ self.cl_mem["max_min_d"].data)
+ events.append(EventDescription("max_min_serial", evt))
+ else:
+ stage1 = self.kernels.max_min_reduction_stage1
+ stage2 = self.kernels.max_min_reduction_stage2
+ local_mem = pyopencl.LocalMemory(int(self.wg_red * 2 * numpy.dtype("float32").itemsize))
+ k1 = stage1(self.queue, (int(self.wg_red ** 2),), (int(self.wg_red),),
+ input_array.data,
+ self.cl_mem["tmp_max_min_d"].data,
+ size,
+ local_mem)
+ k2 = stage2(self.queue, (int(self.wg_red),), (int(self.wg_red),),
+ self.cl_mem["tmp_max_min_d"].data,
+ self.cl_mem["max_min_d"].data,
+ local_mem)
+
+ events += [EventDescription("max_min_stage1", k1),
+ EventDescription("max_min_stage2", k2)]
+ maxi, mini = self.cl_mem["max_min_d"].get()
+ else:
+ mini = numpy.float32(min(range))
+ maxi = numpy.float32(max(range))
+ device = self.ctx.devices[0]
+ nb_engines = device.max_compute_units
+ tmp_size = nb_engines * nbins
+ name = "tmp_int32_%s_d" % (tmp_size)
+ if name not in self.cl_mem:
+ tmp_array = self.cl_mem[name] = pyopencl.array.empty(self.queue, (tmp_size,), numpy.int32)
+ else:
+ tmp_array = self.cl_mem[name]
+
+ edge_name = "tmp_float32_%s_d" % (nbins + 1)
+ if edge_name not in self.cl_mem:
+ edges_array = self.cl_mem[edge_name] = pyopencl.array.empty(self.queue, (nbins + 1,), numpy.float32)
+ else:
+ edges_array = self.cl_mem[edge_name]
+
+ shared = pyopencl.LocalMemory(numpy.dtype(numpy.int32).itemsize * nbins)
+
+ # Handle log-scale
+ if log_scale:
+ map_operation = numpy.int32(1)
+ else:
+ map_operation = numpy.int32(0)
+ kernel = self.kernels.get_kernel("histogram")
+ wg = min(device.max_work_group_size,
+ 1 << (int(ceil(log(nbins, 2)))),
+ self.kernels.max_workgroup_size(kernel))
+ evt = kernel(self.queue, (wg * nb_engines,), (wg,),
+ input_array.data,
+ numpy.int32(input_array.size),
+ mini,
+ maxi,
+ map_operation,
+ output_array.data,
+ edges_array.data,
+ numpy.int32(nbins),
+ tmp_array.data,
+ self.cl_mem["cnt_d"].data,
+ shared)
+ events.append(EventDescription("histogram", evt))
+
+ if self.profile:
+ self.events += events
+
+ if out is None:
+ res = output_array.get()
+ return res, edges_array.get()
+ else:
+ output_array.finish()
+ return output_array, edges_array
diff --git a/src/silx/opencl/linalg.py b/src/silx/opencl/linalg.py
new file mode 100644
index 0000000..a64122a
--- /dev/null
+++ b/src/silx/opencl/linalg.py
@@ -0,0 +1,220 @@
+#!/usr/bin/env python
+# coding: utf-8
+# /*##########################################################################
+#
+# Copyright (c) 2016 European Synchrotron Radiation Facility
+#
+# Permission is hereby granted, free of charge, to any person obtaining a copy
+# of this software and associated documentation files (the "Software"), to deal
+# in the Software without restriction, including without limitation the rights
+# to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
+# copies of the Software, and to permit persons to whom the Software is
+# furnished to do so, subject to the following conditions:
+#
+# The above copyright notice and this permission notice shall be included in
+# all copies or substantial portions of the Software.
+#
+# THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+# IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+# FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
+# AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+# LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
+# OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN
+# THE SOFTWARE.
+#
+# ###########################################################################*/
+"""Module for basic linear algebra in OpenCL"""
+
+from __future__ import absolute_import, print_function, with_statement, division
+
+__authors__ = ["P. Paleo"]
+__license__ = "MIT"
+__date__ = "01/08/2019"
+
+import numpy as np
+
+from .common import pyopencl
+from .processing import EventDescription, OpenclProcessing
+
+import pyopencl.array as parray
+cl = pyopencl
+
+
+class LinAlg(OpenclProcessing):
+
+ kernel_files = ["linalg.cl"]
+
+ def __init__(self, shape, do_checks=False, ctx=None, devicetype="all", platformid=None, deviceid=None, profile=False):
+ """
+ Create a "Linear Algebra" plan for a given image shape.
+
+ :param shape: shape of the image (num_rows, num_columns)
+ :param do_checks (optional): if True, memory and data type checks are performed when possible.
+ :param ctx: actual working context, left to None for automatic
+ initialization from device type or platformid/deviceid
+ :param devicetype: type of device, can be "CPU", "GPU", "ACC" or "ALL"
+ :param platformid: integer with the platform_identifier, as given by clinfo
+ :param deviceid: Integer with the device identifier, as given by clinfo
+ :param profile: switch on profiling to be able to profile at the kernel level,
+ store profiling elements (makes code slightly slower)
+
+ """
+ OpenclProcessing.__init__(self, ctx=ctx, devicetype=devicetype,
+ platformid=platformid, deviceid=deviceid,
+ profile=profile)
+
+ self.d_gradient = parray.empty(self.queue, shape, np.complex64)
+ self.d_gradient.fill(np.complex64(0.0))
+ self.d_image = parray.empty(self.queue, shape, np.float32)
+ self.d_image.fill(np.float32(0.0))
+ self.add_to_cl_mem({
+ "d_gradient": self.d_gradient,
+ "d_image": self.d_image
+ })
+
+ self.wg2D = None
+ self.shape = shape
+ self.ndrange2D = (
+ int(self.shape[1]),
+ int(self.shape[0])
+ )
+ self.do_checks = bool(do_checks)
+ OpenclProcessing.compile_kernels(self, self.kernel_files)
+
+ @staticmethod
+ def check_array(array, dtype, shape, arg_name):
+ if array.shape != shape or array.dtype != dtype:
+ raise ValueError("%s should be a %s array of type %s" %(arg_name, str(shape), str(dtype)))
+
+ def get_data_references(self, src, dst, default_src_ref, default_dst_ref):
+ """
+ From various types of src and dst arrays,
+ returns the references to the underlying data (Buffer) that will be used by the OpenCL kernels.
+ # TODO documentation
+
+ This function will make a copy host->device if the input is on host (eg. numpy array)
+ """
+ if dst is not None:
+ if isinstance(dst, cl.array.Array):
+ dst_ref = dst.data
+ elif isinstance(dst, cl.Buffer):
+ dst_ref = dst
+ else:
+ raise ValueError("dst should be either pyopencl.array.Array or pyopencl.Buffer")
+ else:
+ dst_ref = default_dst_ref
+
+ if isinstance(src, cl.array.Array):
+ src_ref = src.data
+ elif isinstance(src, cl.Buffer):
+ src_ref = src
+ else: # assuming numpy.ndarray
+ evt = cl.enqueue_copy(self.queue, default_src_ref, src)
+ self.events.append(EventDescription("copy H->D", evt))
+ src_ref = default_src_ref
+ return src_ref, dst_ref
+
+ def gradient(self, image, dst=None, return_to_host=False):
+ """
+ Compute the spatial gradient of an image.
+ The gradient is computed with first-order difference (not central difference).
+
+ :param image: image to compute the gradient from. It can be either a numpy.ndarray, a pyopencl Array or Buffer.
+ :param dst: optional, reference to a destination pyopencl Array or Buffer. It must be of complex64 data type.
+ :param return_to_host: optional, set to True if you want the result to be transferred back to host.
+
+ if dst is provided, it should be of type numpy.complex64 !
+ """
+ n_y, n_x = np.int32(self.shape)
+ if self.do_checks:
+ self.check_array(image, np.float32, self.shape, "image")
+ if dst is not None:
+ self.check_array(dst, np.complex64, self.shape, "dst")
+ img_ref, grad_ref = self.get_data_references(image, dst, self.d_image.data, self.d_gradient.data)
+
+ # Prepare the kernel call
+ kernel_args = [
+ img_ref,
+ grad_ref,
+ n_x,
+ n_y
+ ]
+ # Call the gradient kernel
+ evt = self.kernels.kern_gradient2D(
+ self.queue,
+ self.ndrange2D,
+ self.wg2D,
+ *kernel_args
+ )
+ self.events.append(EventDescription("gradient2D", evt))
+ # TODO: should the wait be done in any case ?
+ # In the case where dst=None, the wait() is mandatory since a user will be doing arithmetic on dst afterwards
+ if dst is None:
+ evt.wait()
+
+ if return_to_host:
+ if dst is not None:
+ res_tmp = self.d_gradient.get()
+ else:
+ res_tmp = np.zeros(self.shape, dtype=np.complex64)
+ cl.enqueue_copy(self.queue, res_tmp, grad_ref)
+ res = np.zeros((2,) + self.shape, dtype=np.float32)
+ res[0] = np.copy(res_tmp.real)
+ res[1] = np.copy(res_tmp.imag)
+ return res
+ else:
+ return dst
+
+ def divergence(self, gradient, dst=None, return_to_host=False):
+ """
+ Compute the spatial divergence of an image.
+ The divergence is designed to be the (negative) adjoint of the gradient.
+
+ :param gradient: gradient-like array to compute the divergence from. It can be either a numpy.ndarray, a pyopencl Array or Buffer.
+ :param dst: optional, reference to a destination pyopencl Array or Buffer. It must be of complex64 data type.
+ :param return_to_host: optional, set to True if you want the result to be transferred back to host.
+
+ if dst is provided, it should be of type numpy.complex64 !
+ """
+ n_y, n_x = np.int32(self.shape)
+ # numpy.ndarray gradients are expected to be (2, n_y, n_x)
+ if isinstance(gradient, np.ndarray):
+ gradient2 = np.zeros(self.shape, dtype=np.complex64)
+ gradient2.real = np.copy(gradient[0])
+ gradient2.imag = np.copy(gradient[1])
+ gradient = gradient2
+ elif self.do_checks:
+ self.check_array(gradient, np.complex64, self.shape, "gradient")
+ if dst is not None:
+ self.check_array(dst, np.float32, self.shape, "dst")
+ grad_ref, img_ref = self.get_data_references(gradient, dst, self.d_gradient.data, self.d_image.data)
+
+ # Prepare the kernel call
+ kernel_args = [
+ grad_ref,
+ img_ref,
+ n_x,
+ n_y
+ ]
+ # Call the gradient kernel
+ evt = self.kernels.kern_divergence2D(
+ self.queue,
+ self.ndrange2D,
+ self.wg2D,
+ *kernel_args
+ )
+ self.events.append(EventDescription("divergence2D", evt))
+ # TODO: should the wait be done in any case ?
+ # In the case where dst=None, the wait() is mandatory since a user will be doing arithmetic on dst afterwards
+ if dst is None:
+ evt.wait()
+
+ if return_to_host:
+ if dst is not None:
+ res = self.d_image.get()
+ else:
+ res = np.zeros(self.shape, dtype=np.float32)
+ cl.enqueue_copy(self.queue, res, img_ref)
+ return res
+ else:
+ return dst
diff --git a/src/silx/opencl/medfilt.py b/src/silx/opencl/medfilt.py
new file mode 100644
index 0000000..d4e425b
--- /dev/null
+++ b/src/silx/opencl/medfilt.py
@@ -0,0 +1,269 @@
+# -*- coding: utf-8 -*-
+#
+# Project: Azimuthal integration
+# https://github.com/silx-kit/pyFAI
+#
+# Copyright (C) 2012-2017 European Synchrotron Radiation Facility, Grenoble, France
+#
+# Principal author: Jérôme Kieffer (Jerome.Kieffer@ESRF.eu)
+#
+# Permission is hereby granted, free of charge, to any person obtaining a copy
+# of this software and associated documentation files (the "Software"), to deal
+# in the Software without restriction, including without limitation the rights
+# to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
+# copies of the Software, and to permit persons to whom the Software is
+# furnished to do so, subject to the following conditions:
+# .
+# The above copyright notice and this permission notice shall be included in
+# all copies or substantial portions of the Software.
+# .
+# THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+# IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+# FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
+# AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+# LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
+# OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN
+# THE SOFTWARE.
+
+"""A module for performing the 1d, 2d and 3d median filter ...
+
+The target is to mimic the signature of scipy.signal.medfilt and scipy.medfilt2
+
+The first implementation targets 2D implementation where this operation is costly (~10s/2kx2k image)
+"""
+from __future__ import absolute_import, print_function, with_statement, division
+
+
+__author__ = "Jerome Kieffer"
+__license__ = "MIT"
+__date__ = "12/09/2017"
+__copyright__ = "2012-2017, ESRF, Grenoble"
+__contact__ = "jerome.kieffer@esrf.fr"
+
+import logging
+import numpy
+from collections import OrderedDict
+
+from .common import pyopencl, kernel_workgroup_size
+from .processing import EventDescription, OpenclProcessing, BufferDescription
+
+if pyopencl:
+ mf = pyopencl.mem_flags
+else:
+ raise ImportError("pyopencl is not installed")
+logger = logging.getLogger(__name__)
+
+
+class MedianFilter2D(OpenclProcessing):
+ """A class for doing median filtering using OpenCL"""
+ buffers = [
+ BufferDescription("result", 1, numpy.float32, mf.WRITE_ONLY),
+ BufferDescription("image_raw", 1, numpy.float32, mf.READ_ONLY),
+ BufferDescription("image", 1, numpy.float32, mf.READ_WRITE),
+ ]
+ kernel_files = ["preprocess.cl", "bitonic.cl", "medfilt.cl"]
+ mapping = {numpy.int8: "s8_to_float",
+ numpy.uint8: "u8_to_float",
+ numpy.int16: "s16_to_float",
+ numpy.uint16: "u16_to_float",
+ numpy.uint32: "u32_to_float",
+ numpy.int32: "s32_to_float"}
+
+ def __init__(self, shape, kernel_size=(3, 3),
+ ctx=None, devicetype="all", platformid=None, deviceid=None,
+ block_size=None, profile=False
+ ):
+ """Constructor of the OpenCL 2D median filtering class
+
+ :param shape: shape of the images to treat
+ :param kernel size: 2-tuple of odd values
+ :param ctx: actual working context, left to None for automatic
+ initialization from device type or platformid/deviceid
+ :param devicetype: type of device, can be "CPU", "GPU", "ACC" or "ALL"
+ :param platformid: integer with the platform_identifier, as given by clinfo
+ :param deviceid: Integer with the device identifier, as given by clinfo
+ :param block_size: preferred workgroup size, may vary depending on the outpcome of the compilation
+ :param profile: switch on profiling to be able to profile at the kernel level,
+ store profiling elements (makes code slightly slower)
+ """
+ OpenclProcessing.__init__(self, ctx=ctx, devicetype=devicetype,
+ platformid=platformid, deviceid=deviceid,
+ block_size=block_size, profile=profile)
+ self.shape = shape
+ self.size = self.shape[0] * self.shape[1]
+ self.kernel_size = self.calc_kernel_size(kernel_size)
+ self.workgroup_size = (self.calc_wg(self.kernel_size), 1) # 3D kernel
+ self.buffers = [BufferDescription(i.name, i.size * self.size, i.dtype, i.flags)
+ for i in self.__class__.buffers]
+
+ self.allocate_buffers()
+ self.local_mem = self._get_local_mem(self.workgroup_size[0])
+ OpenclProcessing.compile_kernels(self, self.kernel_files, "-D NIMAGE=%i" % self.size)
+ self.set_kernel_arguments()
+
+ def set_kernel_arguments(self):
+ """Parametrize all kernel arguments
+ """
+ for val in self.mapping.values():
+ self.cl_kernel_args[val] = OrderedDict(((i, self.cl_mem[i]) for i in ("image_raw", "image")))
+ self.cl_kernel_args["medfilt2d"] = OrderedDict((("image", self.cl_mem["image"]),
+ ("result", self.cl_mem["result"]),
+ ("local", self.local_mem),
+ ("khs1", numpy.int32(self.kernel_size[0] // 2)), # Kernel half-size along dim1 (lines)
+ ("khs2", numpy.int32(self.kernel_size[1] // 2)), # Kernel half-size along dim2 (columns)
+ ("height", numpy.int32(self.shape[0])), # Image size along dim1 (lines)
+ ("width", numpy.int32(self.shape[1]))))
+# ('debug', self.cl_mem["debug"]))) # Image size along dim2 (columns))
+
+ def _get_local_mem(self, wg):
+ return pyopencl.LocalMemory(wg * 32) # 4byte per float, 8 element per thread
+
+ def send_buffer(self, data, dest):
+ """Send a numpy array to the device, including the cast on the device if possible
+
+ :param data: numpy array with data
+ :param dest: name of the buffer as registered in the class
+ """
+
+ dest_type = numpy.dtype([i.dtype for i in self.buffers if i.name == dest][0])
+ events = []
+ if (data.dtype == dest_type) or (data.dtype.itemsize > dest_type.itemsize):
+ copy_image = pyopencl.enqueue_copy(self.queue, self.cl_mem[dest], numpy.ascontiguousarray(data, dest_type))
+ events.append(EventDescription("copy H->D %s" % dest, copy_image))
+ else:
+ copy_image = pyopencl.enqueue_copy(self.queue, self.cl_mem["image_raw"], numpy.ascontiguousarray(data))
+ kernel = getattr(self.program, self.mapping[data.dtype.type])
+ cast_to_float = kernel(self.queue, (self.size,), None, self.cl_mem["image_raw"], self.cl_mem[dest])
+ events += [EventDescription("copy H->D %s" % dest, copy_image), EventDescription("cast to float", cast_to_float)]
+ if self.profile:
+ self.events += events
+
+ def calc_wg(self, kernel_size):
+ """calculate and return the optimal workgroup size for the first dimension, taking into account
+ the 8-height band
+
+ :param kernel_size: 2-tuple of int, shape of the median window
+ :return: optimal workgroup size
+ """
+ needed_threads = ((kernel_size[0] + 7) // 8) * kernel_size[1]
+ if needed_threads < 8:
+ wg = 8
+ elif needed_threads < 32:
+ wg = 32
+ else:
+ wg = 1 << (int(needed_threads).bit_length())
+ return wg
+
+ def medfilt2d(self, image, kernel_size=None):
+ """Actually apply the median filtering on the image
+
+ :param image: numpy array with the image
+ :param kernel_size: 2-tuple if
+ :return: median-filtered 2D image
+
+
+ Nota: for window size 1x1 -> 7x7 up to 49 / 64 elements in 8 threads, 8elt/th
+ 9x9 -> 15x15 up to 225 / 256 elements in 32 threads, 8elt/th
+ 17x17 -> 21x21 up to 441 / 512 elements in 64 threads, 8elt/th
+
+ TODO: change window size on the fly,
+
+
+ """
+ events = []
+ if kernel_size is None:
+ kernel_size = self.kernel_size
+ else:
+ kernel_size = self.calc_kernel_size(kernel_size)
+ kernel_half_size = kernel_size // numpy.int32(2)
+ # this is the workgroup size
+ wg = self.calc_wg(kernel_size)
+
+ # check for valid work group size:
+ amws = kernel_workgroup_size(self.program, "medfilt2d")
+ logger.warning("max actual workgroup size: %s, expected: %s", amws, wg)
+ if wg > amws:
+ raise RuntimeError("Workgroup size is too big for medfilt2d: %s>%s" % (wg, amws))
+
+ localmem = self._get_local_mem(wg)
+
+ assert image.ndim == 2, "Treat only 2D images"
+ assert image.shape[0] <= self.shape[0], "height is OK"
+ assert image.shape[1] <= self.shape[1], "width is OK"
+
+ with self.sem:
+ self.send_buffer(image, "image")
+
+ kwargs = self.cl_kernel_args["medfilt2d"]
+ kwargs["local"] = localmem
+ kwargs["khs1"] = kernel_half_size[0]
+ kwargs["khs2"] = kernel_half_size[1]
+ kwargs["height"] = numpy.int32(image.shape[0])
+ kwargs["width"] = numpy.int32(image.shape[1])
+# for k, v in kwargs.items():
+# print("%s: %s (%s)" % (k, v, type(v)))
+ mf2d = self.kernels.medfilt2d(self.queue,
+ (wg, image.shape[1]),
+ (wg, 1), *list(kwargs.values()))
+ events.append(EventDescription("median filter 2d", mf2d))
+
+ result = numpy.empty(image.shape, numpy.float32)
+ ev = pyopencl.enqueue_copy(self.queue, result, self.cl_mem["result"])
+ events.append(EventDescription("copy D->H result", ev))
+ ev.wait()
+ if self.profile:
+ self.events += events
+ return result
+ __call__ = medfilt2d
+
+ @staticmethod
+ def calc_kernel_size(kernel_size):
+ """format the kernel size to be a 2-length numpy array of int32
+ """
+ kernel_size = numpy.asarray(kernel_size, dtype=numpy.int32)
+ if kernel_size.shape == ():
+ kernel_size = numpy.repeat(kernel_size.item(), 2).astype(numpy.int32)
+ for size in kernel_size:
+ if (size % 2) != 1:
+ raise ValueError("Each element of kernel_size should be odd.")
+ return kernel_size
+
+
+class _MedFilt2d(object):
+ median_filter = None
+
+ @classmethod
+ def medfilt2d(cls, ary, kernel_size=3):
+ """Median filter a 2-dimensional array.
+
+ Apply a median filter to the `input` array using a local window-size
+ given by `kernel_size` (must be odd).
+
+ :param ary: A 2-dimensional input array.
+ :param kernel_size: A scalar or a list of length 2, giving the size of the
+ median filter window in each dimension. Elements of
+ `kernel_size` should be odd. If `kernel_size` is a scalar,
+ then this scalar is used as the size in each dimension.
+ Default is a kernel of size (3, 3).
+ :return: An array the same size as input containing the median filtered
+ result. always work on float32 values
+
+ About the padding:
+
+ * The filling mode in scipy.signal.medfilt2d is zero-padding
+ * This implementation is equivalent to:
+ scipy.ndimage.filters.median_filter(ary, kernel_size, mode="nearest")
+
+ """
+ image = numpy.atleast_2d(ary)
+ shape = numpy.array(image.shape)
+ if cls.median_filter is None:
+ cls.median_filter = MedianFilter2D(image.shape, kernel_size)
+ elif (numpy.array(cls.median_filter.shape) < shape).any():
+ # enlarger the buffer size
+ new_shape = numpy.maximum(numpy.array(cls.median_filter.shape), shape)
+ ctx = cls.median_filter.ctx
+ cls.median_filter = MedianFilter2D(new_shape, kernel_size, ctx=ctx)
+ return cls.median_filter.medfilt2d(image, kernel_size=kernel_size)
+
+medfilt2d = _MedFilt2d.medfilt2d
diff --git a/src/silx/opencl/processing.py b/src/silx/opencl/processing.py
new file mode 100644
index 0000000..8b81f7f
--- /dev/null
+++ b/src/silx/opencl/processing.py
@@ -0,0 +1,447 @@
+#!/usr/bin/env python
+# -*- coding: utf-8 -*-
+#
+# Project: S I L X project
+# https://github.com/silx-kit/silx
+#
+# Copyright (C) 2012-2018 European Synchrotron Radiation Facility, Grenoble, France
+#
+# Principal author: Jérôme Kieffer (Jerome.Kieffer@ESRF.eu)
+#
+# Permission is hereby granted, free of charge, to any person
+# obtaining a copy of this software and associated documentation
+# files (the "Software"), to deal in the Software without
+# restriction, including without limitation the rights to use,
+# copy, modify, merge, publish, distribute, sublicense, and/or sell
+# copies of the Software, and to permit persons to whom the
+# Software is furnished to do so, subject to the following
+# conditions:
+#
+# The above copyright notice and this permission notice shall be
+# included in all copies or substantial portions of the Software.
+#
+# THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
+# EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES
+# OF MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
+# NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT
+# HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY,
+# WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
+# FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR
+# OTHER DEALINGS IN THE SOFTWARE.
+#
+
+"""
+Common OpenCL abstract base classe for different processing
+"""
+
+__author__ = "Jerome Kieffer"
+__contact__ = "Jerome.Kieffer@ESRF.eu"
+__license__ = "MIT"
+__copyright__ = "European Synchrotron Radiation Facility, Grenoble, France"
+__date__ = "02/03/2021"
+__status__ = "stable"
+
+import sys
+import os
+import logging
+import gc
+from collections import namedtuple, OrderedDict
+import numpy
+import threading
+from .common import ocl, pyopencl, release_cl_buffers, query_kernel_info, allocate_texture, check_textures_availability
+from .utils import concatenate_cl_kernel
+import platform
+
+BufferDescription = namedtuple("BufferDescription", ["name", "size", "dtype", "flags"])
+EventDescription = namedtuple("EventDescription", ["name", "event"])
+
+logger = logging.getLogger(__name__)
+
+
+class KernelContainer(object):
+ """Those object holds a copy of all kernels accessible as attributes"""
+
+ def __init__(self, program):
+ """Constructor of the class
+
+ :param program: the OpenCL program as generated by PyOpenCL
+ """
+ self._program = program
+ for kernel in program.all_kernels():
+ self.__setattr__(kernel.function_name, kernel)
+
+ def get_kernels(self):
+ "return the dictionary with all kernels"
+ return dict(item for item in self.__dict__.items()
+ if not item[0].startswith("_"))
+
+ def get_kernel(self, name):
+ "get a kernel from its name"
+ logger.debug("KernelContainer.get_kernel(%s)", name)
+ return self.__dict__.get(name)
+
+ def max_workgroup_size(self, kernel_name):
+ "Retrieve the compile time WORK_GROUP_SIZE for a given kernel"
+ if isinstance(kernel_name, pyopencl.Kernel):
+ kernel = kernel_name
+ else:
+ kernel = self.get_kernel(kernel_name)
+
+ return query_kernel_info(self._program, kernel, "WORK_GROUP_SIZE")
+
+ def min_workgroup_size(self, kernel_name):
+ "Retrieve the compile time PREFERRED_WORK_GROUP_SIZE_MULTIPLE for a given kernel"
+ if isinstance(kernel_name, pyopencl.Kernel):
+ kernel = kernel_name
+ else:
+ kernel = self.get_kernel(kernel_name)
+
+ return query_kernel_info(self._program, kernel, "PREFERRED_WORK_GROUP_SIZE_MULTIPLE")
+
+
+class OpenclProcessing(object):
+ """Abstract class for different types of OpenCL processing.
+
+ This class provides:
+ * Generation of the context, queues, profiling mode
+ * Additional function to allocate/free all buffers declared as static attributes of the class
+ * Functions to compile kernels, cache them and clean them
+ * helper functions to clone the object
+ """
+ # Example of how to create an output buffer of 10 floats
+ buffers = [BufferDescription("output", 10, numpy.float32, None),
+ ]
+ # list of kernel source files to be concatenated before compilation of the program
+ kernel_files = []
+
+ def __init__(self, ctx=None, devicetype="all", platformid=None, deviceid=None,
+ block_size=None, memory=None, profile=False):
+ """Constructor of the abstract OpenCL processing class
+
+ :param ctx: actual working context, left to None for automatic
+ initialization from device type or platformid/deviceid
+ :param devicetype: type of device, can be "CPU", "GPU", "ACC" or "ALL"
+ :param platformid: integer with the platform_identifier, as given by clinfo
+ :param deviceid: Integer with the device identifier, as given by clinfo
+ :param block_size: preferred workgroup size, may vary depending on the
+ out come of the compilation
+ :param memory: minimum memory available on device
+ :param profile: switch on profiling to be able to profile at the kernel
+ level, store profiling elements (makes code slightly slower)
+ """
+ self.sem = threading.Semaphore()
+ self._X87_VOLATILE = None
+ self.profile = None
+ self.events = [] # List with of EventDescription, kept for profiling
+ self.cl_mem = {} # dict with all buffer allocated
+ self.cl_program = None # The actual OpenCL program
+ self.cl_kernel_args = {} # dict with all kernel arguments
+ self.queue = None
+ if ctx:
+ self.ctx = ctx
+ else:
+ self.ctx = ocl.create_context(devicetype=devicetype,
+ platformid=platformid, deviceid=deviceid,
+ memory=memory)
+ device_name = self.ctx.devices[0].name.strip()
+ platform_name = self.ctx.devices[0].platform.name.strip()
+ platform = ocl.get_platform(platform_name)
+ self.device = platform.get_device(device_name)
+ self.cl_kernel_args = {} # dict with all kernel arguments
+
+ self.set_profiling(profile)
+ self.block_size = block_size
+ self.program = None
+ self.kernels = None
+
+ def check_textures_availability(self):
+ return check_textures_availability(self.ctx)
+
+ def __del__(self):
+ """Destructor: release all buffers and programs
+ """
+ try:
+ self.reset_log()
+ self.free_kernels()
+ self.free_buffers()
+ if self.queue is not None:
+ self.queue.finish()
+ except Exception as err:
+ logger.warning("%s: %s", type(err), err)
+ self.queue = None
+ self.device = None
+ self.ctx = None
+ gc.collect()
+
+ def allocate_buffers(self, buffers=None, use_array=False):
+ """
+ Allocate OpenCL buffers required for a specific configuration
+
+ :param buffers: a list of BufferDescriptions, leave to None for
+ paramatrized buffers.
+ :param use_array: allocate memory as pyopencl.array.Array
+ instead of pyopencl.Buffer
+
+ Note that an OpenCL context also requires some memory, as well
+ as Event and other OpenCL functionalities which cannot and are
+ not taken into account here. The memory required by a context
+ varies depending on the device. Typical for GTX580 is 65Mb but
+ for a 9300m is ~15Mb In addition, a GPU will always have at
+ least 3-5Mb of memory in use. Unfortunately, OpenCL does NOT
+ have a built-in way to check the actual free memory on a
+ device, only the total memory.
+ """
+ if buffers is None:
+ buffers = self.buffers
+
+ with self.sem:
+ mem = {}
+
+ # check if enough memory is available on the device
+ ualloc = 0
+ for buf in buffers:
+ ualloc += numpy.dtype(buf.dtype).itemsize * numpy.prod(buf.size)
+ logger.info("%.3fMB are needed on device: %s, which has %.3fMB",
+ ualloc / 1.0e6, self.device, self.device.memory / 1.0e6)
+
+ if ualloc >= self.device.memory:
+ raise MemoryError("Fatal error in allocate_buffers. Not enough "
+ " device memory for buffers (%lu requested, %lu available)"
+ % (ualloc, self.device.memory))
+
+ # do the allocation
+ try:
+ if use_array:
+ for buf in buffers:
+ mem[buf.name] = pyopencl.array.empty(self.queue, buf.size, buf.dtype)
+ else:
+ for buf in buffers:
+ size = numpy.dtype(buf.dtype).itemsize * numpy.prod(buf.size)
+ mem[buf.name] = pyopencl.Buffer(self.ctx, buf.flags, int(size))
+ except pyopencl.MemoryError as error:
+ release_cl_buffers(mem)
+ raise MemoryError(error)
+
+ self.cl_mem.update(mem)
+
+ def add_to_cl_mem(self, parrays):
+ """
+ Add pyopencl.array, which are allocated by pyopencl, to self.cl_mem.
+ This should be used before calling allocate_buffers().
+
+ :param parrays: a dictionary of `pyopencl.array.Array` or `pyopencl.Buffer`
+ """
+ mem = self.cl_mem
+ for name, parr in parrays.items():
+ mem[name] = parr
+ self.cl_mem.update(mem)
+
+ def check_workgroup_size(self, kernel_name):
+ "Calculate the maximum workgroup size from given kernel after compilation"
+ return self.kernels.max_workgroup_size(kernel_name)
+
+ def free_buffers(self):
+ """free all device.memory allocated on the device
+ """
+ with self.sem:
+ for key, buf in list(self.cl_mem.items()):
+ if buf is not None:
+ if isinstance(buf, pyopencl.array.Array):
+ try:
+ buf.data.release()
+ except pyopencl.LogicError:
+ logger.error("Error while freeing buffer %s", key)
+ else:
+ try:
+ buf.release()
+ except pyopencl.LogicError:
+ logger.error("Error while freeing buffer %s", key)
+ self.cl_mem[key] = None
+
+ def compile_kernels(self, kernel_files=None, compile_options=None):
+ """Call the OpenCL compiler
+
+ :param kernel_files: list of path to the kernel
+ (by default use the one declared in the class)
+ :param compile_options: string of compile options
+ """
+ # concatenate all needed source files into a single openCL module
+ kernel_files = kernel_files or self.kernel_files
+ kernel_src = concatenate_cl_kernel(kernel_files)
+
+ compile_options = compile_options or self.get_compiler_options()
+ logger.info("Compiling file %s with options %s", kernel_files, compile_options)
+ try:
+ self.program = pyopencl.Program(self.ctx, kernel_src).build(options=compile_options)
+ except (pyopencl.MemoryError, pyopencl.LogicError) as error:
+ raise MemoryError(error)
+ else:
+ self.kernels = KernelContainer(self.program)
+
+ def free_kernels(self):
+ """Free all kernels
+ """
+ for kernel in self.cl_kernel_args:
+ self.cl_kernel_args[kernel] = []
+ self.kernels = None
+ self.program = None
+
+ def set_profiling(self, value=True):
+ """Switch On/Off the profiling flag of the command queue to allow debugging
+
+ :param value: set to True to enable profiling, or to False to disable it.
+ Without profiling, the processing is marginally faster
+
+ Profiling information can then be retrieved with the 'log_profile' method
+ """
+ if bool(value) != self.profile:
+ with self.sem:
+ self.profile = bool(value)
+ if self.queue is not None:
+ self.queue.finish()
+ if self.profile:
+ self.queue = pyopencl.CommandQueue(self.ctx,
+ properties=pyopencl.command_queue_properties.PROFILING_ENABLE)
+ else:
+ self.queue = pyopencl.CommandQueue(self.ctx)
+
+ def profile_add(self, event, desc):
+ """
+ Add an OpenCL event to the events lists, if profiling is enabled.
+
+ :param event: silx.opencl.processing.EventDescription.
+ :param desc: event description
+ """
+ if self.profile:
+ self.events.append(EventDescription(desc, event))
+
+ def allocate_texture(self, shape, hostbuf=None, support_1D=False):
+ return allocate_texture(self.ctx, shape, hostbuf=hostbuf, support_1D=support_1D)
+
+ def transfer_to_texture(self, arr, tex_ref):
+ """
+ Transfer an array to a texture.
+
+ :param arr: Input array. Can be a numpy array or a pyopencl array.
+ :param tex_ref: texture reference (pyopencl._cl.Image).
+ """
+ copy_args = [self.queue, tex_ref, arr]
+ shp = arr.shape
+ ndim = arr.ndim
+ if ndim == 1:
+ # pyopencl and OpenCL < 1.2 do not support image1d_t
+ # force 2D with one row in this case
+ # ~ ndim = 2
+ shp = (1,) + shp
+ copy_kwargs = {"origin":(0,) * ndim, "region": shp[::-1]}
+ if not(isinstance(arr, numpy.ndarray)): # assuming pyopencl.array.Array
+ # D->D copy
+ copy_args[2] = arr.data
+ copy_kwargs["offset"] = 0
+ ev = pyopencl.enqueue_copy(*copy_args, **copy_kwargs)
+ self.profile_add(ev, "Transfer to texture")
+
+ def log_profile(self, stats=False):
+ """If we are in profiling mode, prints out all timing for every single OpenCL call
+
+ :param stats: if True, prints the statistics on each kernel instead of all execution timings
+ :return: list of lines to print
+ """
+ total_time = 0.0
+ out = [""]
+ if stats:
+ stats = OrderedDict()
+ out.append(f"OpenCL kernel profiling statistics in milliseconds for: {self.__class__.__name__}")
+ out.append(f"{'Kernel name':>50} (count): min median max mean std")
+ else:
+ stats = None
+ out.append(f"Profiling info for OpenCL: {self.__class__.__name__}")
+
+ if self.profile:
+ for e in self.events:
+ if "__len__" in dir(e) and len(e) >= 2:
+ name = e[0]
+ pr = e[1].profile
+ t0 = pr.start
+ t1 = pr.end
+ et = 1e-6 * (t1 - t0)
+ total_time += et
+ if stats is None:
+ out.append(f"{name:>50} : {et:.3f}ms")
+ else:
+ if name in stats:
+ stats[name].append(et)
+ else:
+ stats[name] = [et]
+ if stats is not None:
+ for k, v in stats.items():
+ n = numpy.array(v)
+ out.append(f"{k:>50} ({len(v):5}): {n.min():8.3f} {numpy.median(n):8.3f} {n.max():8.3f} {n.mean():8.3f} {n.std():8.3f}")
+ out.append("_" * 80)
+ out.append(f"{'Total OpenCL execution time':>50} : {total_time:.3f}ms")
+
+ logger.info(os.linesep.join(out))
+ return out
+
+ def reset_log(self):
+ """
+ Resets the profiling timers
+ """
+ with self.sem:
+ self.events = []
+
+ @property
+ def x87_volatile_option(self):
+ # this is running 32 bits OpenCL woth POCL
+ if self._X87_VOLATILE is None:
+ if (platform.machine() in ("i386", "i686", "x86_64", "AMD64") and
+ (tuple.__itemsize__ == 4) and
+ self.ctx.devices[0].platform.name == 'Portable Computing Language'):
+ self._X87_VOLATILE = "-DX87_VOLATILE=volatile"
+ else:
+ self._X87_VOLATILE = ""
+ return self._X87_VOLATILE
+
+ def get_compiler_options(self, x87_volatile=False):
+ """Provide the default OpenCL compiler options
+
+ :param x87_volatile: needed for Kahan summation
+ :return: string with compiler option
+ """
+ option_list = []
+ if x87_volatile:
+ option_list.append(self.x87_volatile_option)
+ return " ".join(i for i in option_list if i)
+
+# This should be implemented by concrete class
+# def __copy__(self):
+# """Shallow copy of the object
+#
+# :return: copy of the object
+# """
+# return self.__class__((self._data, self._indices, self._indptr),
+# self.size, block_size=self.BLOCK_SIZE,
+# platformid=self.platform.id,
+# deviceid=self.device.id,
+# checksum=self.on_device.get("data"),
+# profile=self.profile, empty=self.empty)
+#
+# def __deepcopy__(self, memo=None):
+# """deep copy of the object
+#
+# :return: deepcopy of the object
+# """
+# if memo is None:
+# memo = {}
+# new_csr = self._data.copy(), self._indices.copy(), self._indptr.copy()
+# memo[id(self._data)] = new_csr[0]
+# memo[id(self._indices)] = new_csr[1]
+# memo[id(self._indptr)] = new_csr[2]
+# new_obj = self.__class__(new_csr, self.size,
+# block_size=self.BLOCK_SIZE,
+# platformid=self.platform.id,
+# deviceid=self.device.id,
+# checksum=self.on_device.get("data"),
+# profile=self.profile, empty=self.empty)
+# memo[id(self)] = new_obj
+# return new_obj
diff --git a/src/silx/opencl/projection.py b/src/silx/opencl/projection.py
new file mode 100644
index 0000000..c02faf6
--- /dev/null
+++ b/src/silx/opencl/projection.py
@@ -0,0 +1,428 @@
+#!/usr/bin/env python
+# coding: utf-8
+# /*##########################################################################
+#
+# Copyright (c) 2016-2020 European Synchrotron Radiation Facility
+#
+# Permission is hereby granted, free of charge, to any person obtaining a copy
+# of this software and associated documentation files (the "Software"), to deal
+# in the Software without restriction, including without limitation the rights
+# to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
+# copies of the Software, and to permit persons to whom the Software is
+# furnished to do so, subject to the following conditions:
+#
+# The above copyright notice and this permission notice shall be included in
+# all copies or substantial portions of the Software.
+#
+# THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+# IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+# FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
+# AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+# LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
+# OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN
+# THE SOFTWARE.
+#
+# ###########################################################################*/
+"""Module for tomographic projector on the GPU"""
+
+from __future__ import absolute_import, print_function, with_statement, division
+
+__authors__ = ["A. Mirone, P. Paleo"]
+__license__ = "MIT"
+__date__ = "01/08/2019"
+
+import logging
+import numpy as np
+
+from .common import pyopencl
+from .processing import EventDescription, OpenclProcessing, BufferDescription
+from .backprojection import _sizeof, _idivup
+
+if pyopencl:
+ mf = pyopencl.mem_flags
+ import pyopencl.array as parray
+else:
+ raise ImportError("pyopencl is not installed")
+logger = logging.getLogger(__name__)
+
+
+class Projection(OpenclProcessing):
+ """
+ A class for performing a tomographic projection (Radon Transform) using
+ OpenCL
+ """
+ kernel_files = ["proj.cl", "array_utils.cl"]
+ logger.warning("Forward Projecter is untested and unsuported for now")
+
+ def __init__(self, slice_shape, angles, axis_position=None,
+ detector_width=None, normalize=False, ctx=None,
+ devicetype="all", platformid=None, deviceid=None,
+ profile=False
+ ):
+ """Constructor of the OpenCL projector.
+
+ :param slice_shape: shape of the slice: (num_rows, num_columns).
+ :param angles: Either an integer number of angles, or a list of custom
+ angles values in radian.
+ :param axis_position: Optional, axis position. Default is
+ `(shape[1]-1)/2.0`.
+ :param detector_width: Optional, detector width in pixels.
+ If detector_width > slice_shape[1], the
+ projection data will be surrounded with zeros.
+ Using detector_width < slice_shape[1] might
+ result in a local tomography setup.
+ :param normalize: Optional, normalization. If set, the sinograms are
+ multiplied by the factor pi/(2*nprojs).
+ :param ctx: actual working context, left to None for automatic
+ initialization from device type or platformid/deviceid
+ :param devicetype: type of device, can be "CPU", "GPU", "ACC" or "ALL"
+ :param platformid: integer with the platform_identifier, as given by
+ clinfo
+ :param deviceid: Integer with the device identifier, as given by clinfo
+ :param profile: switch on profiling to be able to profile at the kernel
+ level, store profiling elements (makes code slightly
+ slower)
+ """
+ # OS X enforces a workgroup size of 1 when the kernel has synchronization barriers
+ # if sys.platform.startswith('darwin'): # assuming no discrete GPU
+ # raise NotImplementedError("Backprojection is not implemented on CPU for OS X yet")
+
+ OpenclProcessing.__init__(self, ctx=ctx, devicetype=devicetype,
+ platformid=platformid, deviceid=deviceid,
+ profile=profile)
+ self.shape = slice_shape
+ self.axis_pos = axis_position
+ self.angles = angles
+ self.dwidth = detector_width
+ self.normalize = normalize
+
+ # Default values
+ if self.axis_pos is None:
+ self.axis_pos = (self.shape[1] - 1) / 2.
+ if self.dwidth is None:
+ self.dwidth = self.shape[1]
+ if not(np.iterable(self.angles)):
+ if self.angles is None:
+ self.nprojs = self.shape[0]
+ else:
+ self.nprojs = self.angles
+ self.angles = np.linspace(start=0,
+ stop=np.pi,
+ num=self.nprojs,
+ endpoint=False).astype(dtype=np.float32)
+ else:
+ self.nprojs = len(self.angles)
+ self.offset_x = -np.float32((self.shape[1] - 1) / 2. - self.axis_pos) # TODO: custom
+ self.offset_y = -np.float32((self.shape[0] - 1) / 2. - self.axis_pos) # TODO: custom
+ # Reset axis_pos once offset are computed
+ self.axis_pos0 = np.float64((self.shape[1] - 1) / 2.)
+
+ # Workgroup, ndrange and shared size
+ self.dimgrid_x = _idivup(self.dwidth, 16)
+ self.dimgrid_y = _idivup(self.nprojs, 16)
+ self._dimrecx = np.int32(self.dimgrid_x * 16)
+ self._dimrecy = np.int32(self.dimgrid_y * 16)
+ self.local_mem = 16 * 7 * _sizeof(np.float32)
+ self.wg = (16, 16)
+ self.ndrange = (
+ int(self.dimgrid_x) * self.wg[0], # int(): pyopencl <= 2015.1
+ int(self.dimgrid_y) * self.wg[1] # int(): pyopencl <= 2015.1
+ )
+
+ self._use_textures = self.check_textures_availability()
+
+ # Allocate memory
+ self.buffers = [
+ BufferDescription("_d_sino", self._dimrecx * self._dimrecy, np.float32, mf.READ_WRITE),
+ BufferDescription("d_angles", self._dimrecy, np.float32, mf.READ_ONLY),
+ BufferDescription("d_beginPos", self._dimrecy * 2, np.int32, mf.READ_ONLY),
+ BufferDescription("d_strideJoseph", self._dimrecy * 2, np.int32, mf.READ_ONLY),
+ BufferDescription("d_strideLine", self._dimrecy * 2, np.int32, mf.READ_ONLY),
+ ]
+ d_axis_corrections = parray.empty(self.queue, self.nprojs, np.float32)
+ d_axis_corrections.fill(np.float32(0.0))
+ self.add_to_cl_mem(
+ {
+ "d_axis_corrections": d_axis_corrections
+ }
+ )
+ self._tmp_extended_img = np.zeros((self.shape[0] + 2, self.shape[1] + 2),
+ dtype=np.float32)
+ if not(self._use_textures):
+ self.allocate_slice()
+ else:
+ self.allocate_textures()
+ self.allocate_buffers()
+ self._ex_sino = np.zeros((self._dimrecy, self._dimrecx),
+ dtype=np.float32)
+ if not(self._use_textures):
+ self.cl_mem["d_slice"].fill(0.)
+ # enqueue_fill_buffer has issues if opencl 1.2 is not present
+ # ~ pyopencl.enqueue_fill_buffer(
+ # ~ self.queue,
+ # ~ self.cl_mem["d_slice"],
+ # ~ np.float32(0),
+ # ~ 0,
+ # ~ self._tmp_extended_img.size * _sizeof(np.float32)
+ # ~ )
+ # Precomputations
+ self.compute_angles()
+ self.proj_precomputations()
+ self.cl_mem["d_axis_corrections"].fill(0.)
+ # enqueue_fill_buffer has issues if opencl 1.2 is not present
+ # ~ pyopencl.enqueue_fill_buffer(
+ # ~ self.queue,
+ # ~ self.cl_mem["d_axis_corrections"],
+ # ~ np.float32(0),
+ # ~ 0,
+ # ~ self.nprojs*_sizeof(np.float32)
+ # ~ )
+ # Shorthands
+ self._d_sino = self.cl_mem["_d_sino"]
+
+ compile_options = None
+ if not(self._use_textures):
+ compile_options = "-DDONT_USE_TEXTURES"
+ OpenclProcessing.compile_kernels(
+ self,
+ self.kernel_files,
+ compile_options=compile_options
+ )
+ # check that workgroup can actually be (16, 16)
+ self.compiletime_workgroup_size = self.kernels.max_workgroup_size("forward_kernel_cpu")
+
+ def compute_angles(self):
+ angles2 = np.zeros(self._dimrecy, dtype=np.float32) # dimrecy != num_projs
+ angles2[:self.nprojs] = np.copy(self.angles)
+ angles2[self.nprojs:] = angles2[self.nprojs - 1]
+ self.angles2 = angles2
+ pyopencl.enqueue_copy(self.queue, self.cl_mem["d_angles"], angles2)
+
+ def allocate_slice(self):
+ ary = parray.empty(self.queue, (self.shape[1] + 2, self.shape[1] + 2), np.float32)
+ ary.fill(0)
+ self.add_to_cl_mem({"d_slice": ary})
+
+ def allocate_textures(self):
+ self.d_image_tex = pyopencl.Image(
+ self.ctx,
+ mf.READ_ONLY | mf.USE_HOST_PTR,
+ pyopencl.ImageFormat(
+ pyopencl.channel_order.INTENSITY,
+ pyopencl.channel_type.FLOAT
+ ), hostbuf=np.ascontiguousarray(self._tmp_extended_img.T),
+ )
+
+ def transfer_to_texture(self, image):
+ image2 = image
+ if not(image.flags["C_CONTIGUOUS"] and image.dtype == np.float32):
+ image2 = np.ascontiguousarray(image)
+ if not(self._use_textures):
+ # TODO: create NoneEvent
+ return self.transfer_to_slice(image2)
+ # ~ return pyopencl.enqueue_copy(
+ # ~ self.queue,
+ # ~ self.cl_mem["d_slice"].data,
+ # ~ image2,
+ # ~ origin=(1, 1),
+ # ~ region=image.shape[::-1]
+ # ~ )
+ else:
+ return pyopencl.enqueue_copy(
+ self.queue,
+ self.d_image_tex,
+ image2,
+ origin=(1, 1),
+ region=image.shape[::-1]
+ )
+
+ def transfer_device_to_texture(self, d_image):
+ if not(self._use_textures):
+ # TODO this copy should not be necessary
+ return self.cpy2d_to_slice(d_image)
+ else:
+ return pyopencl.enqueue_copy(
+ self.queue,
+ self.d_image_tex,
+ d_image,
+ offset=0,
+ origin=(1, 1),
+ region=(int(self.shape[1]), int(self.shape[0])) # self.shape[::-1] # pyopencl <= 2015.2
+ )
+
+ def transfer_to_slice(self, image):
+ image2 = np.zeros((image.shape[0] + 2, image.shape[1] + 2), dtype=np.float32)
+ image2[1:-1, 1:-1] = image.astype(np.float32)
+ self.cl_mem["d_slice"].set(image2)
+
+ def proj_precomputations(self):
+ beginPos = np.zeros((2, self._dimrecy), dtype=np.int32)
+ strideJoseph = np.zeros((2, self._dimrecy), dtype=np.int32)
+ strideLine = np.zeros((2, self._dimrecy), dtype=np.int32)
+ cos_angles = np.cos(self.angles2)
+ sin_angles = np.sin(self.angles2)
+ dimslice = self.shape[1]
+
+ M1 = np.abs(cos_angles) > 0.70710678
+ M1b = np.logical_not(M1)
+ M2 = cos_angles > 0
+ M2b = np.logical_not(M2)
+ M3 = sin_angles > 0
+ M3b = np.logical_not(M3)
+ case1 = M1 * M2
+ case2 = M1 * M2b
+ case3 = M1b * M3
+ case4 = M1b * M3b
+
+ beginPos[0][case1] = 0
+ beginPos[1][case1] = 0
+ strideJoseph[0][case1] = 1
+ strideJoseph[1][case1] = 0
+ strideLine[0][case1] = 0
+ strideLine[1][case1] = 1
+
+ beginPos[0][case2] = dimslice - 1
+ beginPos[1][case2] = dimslice - 1
+ strideJoseph[0][case2] = -1
+ strideJoseph[1][case2] = 0
+ strideLine[0][case2] = 0
+ strideLine[1][case2] = -1
+
+ beginPos[0][case3] = dimslice - 1
+ beginPos[1][case3] = 0
+ strideJoseph[0][case3] = 0
+ strideJoseph[1][case3] = 1
+ strideLine[0][case3] = -1
+ strideLine[1][case3] = 0
+
+ beginPos[0][case4] = 0
+ beginPos[1][case4] = dimslice - 1
+ strideJoseph[0][case4] = 0
+ strideJoseph[1][case4] = -1
+ strideLine[0][case4] = 1
+ strideLine[1][case4] = 0
+
+ # For debug purpose
+ # ~ self.beginPos = beginPos
+ # ~ self.strideJoseph = strideJoseph
+ # ~ self.strideLine = strideLine
+ #
+
+ pyopencl.enqueue_copy(self.queue, self.cl_mem["d_beginPos"], beginPos)
+ pyopencl.enqueue_copy(self.queue, self.cl_mem["d_strideJoseph"], strideJoseph)
+ pyopencl.enqueue_copy(self.queue, self.cl_mem["d_strideLine"], strideLine)
+
+ def _get_local_mem(self):
+ return pyopencl.LocalMemory(self.local_mem) # constant for all image sizes
+
+ def cpy2d_to_sino(self, dst):
+ ndrange = (int(self.dwidth), int(self.nprojs)) # pyopencl < 2015.2
+ sino_shape_ocl = np.int32(ndrange)
+ wg = None
+ kernel_args = (
+ dst.data,
+ self._d_sino,
+ np.int32(self.dwidth),
+ np.int32(self._dimrecx),
+ np.int32((0, 0)),
+ np.int32((0, 0)),
+ sino_shape_ocl
+ )
+ return self.kernels.cpy2d(self.queue, ndrange, wg, *kernel_args)
+
+ def cpy2d_to_slice(self, src):
+ """
+ copy a Nx * Ny slice to self.d_slice which is (Nx+2)*(Ny+2)
+ """
+ ndrange = (int(self.shape[1]), int(self.shape[0])) # self.shape[::-1] # pyopencl < 2015.2
+ wg = None
+ slice_shape_ocl = np.int32(ndrange)
+ kernel_args = (
+ self.cl_mem["d_slice"].data,
+ src,
+ np.int32(self.shape[1] + 2),
+ np.int32(self.shape[1]),
+ np.int32((1, 1)),
+ np.int32((0, 0)),
+ slice_shape_ocl
+ )
+ return self.kernels.cpy2d(self.queue, ndrange, wg, *kernel_args)
+
+ def projection(self, image=None, dst=None):
+ """Perform the projection on an input image
+
+ :param image: Image to project
+ :return: A sinogram
+ """
+ events = []
+ with self.sem:
+ if image is not None:
+ assert image.ndim == 2, "Treat only 2D images"
+ assert image.shape[0] == self.shape[0], "image shape is OK"
+ assert image.shape[1] == self.shape[1], "image shape is OK"
+ if self._use_textures:
+ self.transfer_to_texture(image)
+ slice_ref = self.d_image_tex
+ else:
+ self.transfer_to_slice(image)
+ slice_ref = self.cl_mem["d_slice"].data
+ else:
+ if not(self._use_textures):
+ slice_ref = self.cl_mem["d_slice"].data
+ else:
+ slice_ref = self.d_image_tex
+
+ kernel_args = (
+ self._d_sino,
+ slice_ref,
+ np.int32(self.shape[1]),
+ np.int32(self.dwidth),
+ self.cl_mem["d_angles"],
+ np.float32(self.axis_pos0),
+ self.cl_mem["d_axis_corrections"].data, # TODO custom
+ self.cl_mem["d_beginPos"],
+ self.cl_mem["d_strideJoseph"],
+ self.cl_mem["d_strideLine"],
+ np.int32(self.nprojs),
+ self._dimrecx,
+ self._dimrecy,
+ self.offset_x,
+ self.offset_y,
+ np.int32(1), # josephnoclip, 1 by default
+ np.int32(self.normalize)
+ )
+
+ # Call the kernel
+ if not(self._use_textures):
+ event_pj = self.kernels.forward_kernel_cpu(
+ self.queue,
+ self.ndrange,
+ self.wg,
+ *kernel_args
+ )
+ else:
+ event_pj = self.kernels.forward_kernel(
+ self.queue,
+ self.ndrange,
+ self.wg,
+ *kernel_args
+ )
+ events.append(EventDescription("projection", event_pj))
+ if dst is None:
+ self._ex_sino[:] = 0
+ ev = pyopencl.enqueue_copy(self.queue, self._ex_sino, self._d_sino)
+ events.append(EventDescription("copy D->H result", ev))
+ ev.wait()
+ res = np.copy(self._ex_sino[:self.nprojs, :self.dwidth])
+ else:
+ ev = self.cpy2d_to_sino(dst)
+ events.append(EventDescription("copy D->D result", ev))
+ ev.wait()
+ res = dst
+ # /with self.sem
+ if self.profile:
+ self.events += events
+ # ~ res = self._ex_sino
+ return res
+
+ __call__ = projection
diff --git a/src/silx/opencl/reconstruction.py b/src/silx/opencl/reconstruction.py
new file mode 100644
index 0000000..2c84aee
--- /dev/null
+++ b/src/silx/opencl/reconstruction.py
@@ -0,0 +1,388 @@
+#!/usr/bin/env python
+# coding: utf-8
+# /*##########################################################################
+#
+# Copyright (c) 2016 European Synchrotron Radiation Facility
+#
+# Permission is hereby granted, free of charge, to any person obtaining a copy
+# of this software and associated documentation files (the "Software"), to deal
+# in the Software without restriction, including without limitation the rights
+# to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
+# copies of the Software, and to permit persons to whom the Software is
+# furnished to do so, subject to the following conditions:
+#
+# The above copyright notice and this permission notice shall be included in
+# all copies or substantial portions of the Software.
+#
+# THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+# IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+# FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
+# AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+# LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
+# OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN
+# THE SOFTWARE.
+#
+# ###########################################################################*/
+"""Module for tomographic reconstruction algorithms"""
+
+from __future__ import absolute_import, print_function, with_statement, division
+
+__authors__ = ["P. Paleo"]
+__license__ = "MIT"
+__date__ = "01/08/2019"
+
+import logging
+import numpy as np
+
+from .common import pyopencl
+from .processing import OpenclProcessing
+from .backprojection import Backprojection
+from .projection import Projection
+from .linalg import LinAlg
+
+import pyopencl.array as parray
+from pyopencl.elementwise import ElementwiseKernel
+logger = logging.getLogger(__name__)
+
+cl = pyopencl
+
+
+class ReconstructionAlgorithm(OpenclProcessing):
+ """
+ A parent class for all iterative tomographic reconstruction algorithms
+
+ :param sino_shape: shape of the sinogram. The sinogram is in the format
+ (n_b, n_a) where n_b is the number of detector bins and
+ n_a is the number of angles.
+ :param slice_shape: Optional, shape of the reconstructed slice.
+ By default, it is a square slice where the dimension
+ is the "x dimension" of the sinogram (number of bins).
+ :param axis_position: Optional, axis position. Default is `(shape[1]-1)/2.0`.
+ :param angles: Optional, a list of custom angles in radian.
+ :param ctx: actual working context, left to None for automatic
+ initialization from device type or platformid/deviceid
+ :param devicetype: type of device, can be "CPU", "GPU", "ACC" or "ALL"
+ :param platformid: integer with the platform_identifier, as given by clinfo
+ :param deviceid: Integer with the device identifier, as given by clinfo
+ :param profile: switch on profiling to be able to profile at the kernel level,
+ store profiling elements (makes code slightly slower)
+ """
+
+ def __init__(self, sino_shape, slice_shape=None, axis_position=None, angles=None,
+ ctx=None, devicetype="all", platformid=None, deviceid=None,
+ profile=False
+ ):
+ OpenclProcessing.__init__(self, ctx=ctx, devicetype=devicetype,
+ platformid=platformid, deviceid=deviceid,
+ profile=profile)
+
+ # Create a backprojector
+ self.backprojector = Backprojection(
+ sino_shape,
+ slice_shape=slice_shape,
+ axis_position=axis_position,
+ angles=angles,
+ ctx=self.ctx,
+ profile=profile
+ )
+ # Create a projector
+ self.projector = Projection(
+ self.backprojector.slice_shape,
+ self.backprojector.angles,
+ axis_position=axis_position,
+ detector_width=self.backprojector.num_bins,
+ normalize=False,
+ ctx=self.ctx,
+ profile=profile
+ )
+ self.sino_shape = sino_shape
+ self.is_cpu = self.backprojector.is_cpu
+ # Arrays
+ self.d_data = parray.empty(self.queue, sino_shape, dtype=np.float32)
+ self.d_data.fill(0.0)
+ self.d_sino = parray.empty_like(self.d_data)
+ self.d_sino.fill(0.0)
+ self.d_x = parray.empty(self.queue,
+ self.backprojector.slice_shape,
+ dtype=np.float32)
+ self.d_x.fill(0.0)
+ self.d_x_old = parray.empty_like(self.d_x)
+ self.d_x_old.fill(0.0)
+
+ self.add_to_cl_mem({
+ "d_data": self.d_data,
+ "d_sino": self.d_sino,
+ "d_x": self.d_x,
+ "d_x_old": self.d_x_old,
+ })
+
+ def proj(self, d_slice, d_sino):
+ """
+ Project d_slice to d_sino
+ """
+ self.projector.transfer_device_to_texture(d_slice.data) #.wait()
+ self.projector.projection(dst=d_sino)
+
+ def backproj(self, d_sino, d_slice):
+ """
+ Backproject d_sino to d_slice
+ """
+ self.backprojector.transfer_device_to_texture(d_sino.data) #.wait()
+ self.backprojector.backprojection(dst=d_slice)
+
+
+class SIRT(ReconstructionAlgorithm):
+ """
+ A class for the SIRT algorithm
+
+ :param sino_shape: shape of the sinogram. The sinogram is in the format
+ (n_b, n_a) where n_b is the number of detector bins and
+ n_a is the number of angles.
+ :param slice_shape: Optional, shape of the reconstructed slice.
+ By default, it is a square slice where the dimension is
+ the "x dimension" of the sinogram (number of bins).
+ :param axis_position: Optional, axis position. Default is `(shape[1]-1)/2.0`.
+ :param angles: Optional, a list of custom angles in radian.
+ :param ctx: actual working context, left to None for automatic
+ initialization from device type or platformid/deviceid
+ :param devicetype: type of device, can be "CPU", "GPU", "ACC" or "ALL"
+ :param platformid: integer with the platform_identifier, as given by clinfo
+ :param deviceid: Integer with the device identifier, as given by clinfo
+ :param profile: switch on profiling to be able to profile at the kernel level,
+ store profiling elements (makes code slightly slower)
+
+ .. warning:: This is a beta version of the SIRT algorithm. Reconstruction
+ fails for at least on CPU (Xeon E3-1245 v5) using the AMD opencl
+ implementation.
+ """
+
+ def __init__(self, sino_shape, slice_shape=None, axis_position=None, angles=None,
+ ctx=None, devicetype="all", platformid=None, deviceid=None,
+ profile=False
+ ):
+
+ ReconstructionAlgorithm.__init__(self, sino_shape, slice_shape=slice_shape,
+ axis_position=axis_position, angles=angles,
+ ctx=ctx, devicetype=devicetype, platformid=platformid,
+ deviceid=deviceid, profile=profile)
+ self.compute_preconditioners()
+
+ def compute_preconditioners(self):
+ """
+ Create a diagonal preconditioner for the projection and backprojection
+ operator.
+ Each term of the diagonal is the sum of the projector/backprojector
+ along rows [1], i.e the projection/backprojection of an array of ones.
+
+ [1] Jens Gregor and Thomas Benson,
+ Computational Analysis and Improvement of SIRT,
+ IEEE transactions on medical imaging, vol. 27, no. 7, 2008
+ """
+
+ # r_{i,i} = 1/(sum_j a_{i,j})
+ slice_ones = np.ones(self.backprojector.slice_shape, dtype=np.float32)
+ R = 1./self.projector.projection(slice_ones) # could be all done on GPU, but I want extra checks
+ R[np.logical_not(np.isfinite(R))] = 1. # In the case where the rotation axis is excentred
+ self.d_R = parray.to_device(self.queue, R)
+ # c_{j,j} = 1/(sum_i a_{i,j})
+ sino_ones = np.ones(self.sino_shape, dtype=np.float32)
+ C = 1./self.backprojector.backprojection(sino_ones)
+ C[np.logical_not(np.isfinite(C))] = 1. # In the case where the rotation axis is excentred
+ self.d_C = parray.to_device(self.queue, C)
+
+ self.add_to_cl_mem({
+ "d_R": self.d_R,
+ "d_C": self.d_C
+ })
+
+ # TODO: compute and possibly return the residual
+ def run(self, data, n_it):
+ """
+ Run n_it iterations of the SIRT algorithm.
+ """
+ cl.enqueue_copy(self.queue, self.d_data.data, np.ascontiguousarray(data.astype(np.float32)))
+
+ d_x_old = self.d_x_old
+ d_x = self.d_x
+ d_R = self.d_R
+ d_C = self.d_C
+ d_sino = self.d_sino
+ d_x *= 0
+
+ for k in range(n_it):
+ d_x_old[:] = d_x[:]
+ # x{k+1} = x{k} - C A^T R (A x{k} - b)
+ self.proj(d_x, d_sino)
+ d_sino -= self.d_data
+ d_sino *= d_R
+ if self.is_cpu:
+ # This sync is necessary when using CPU, while it is not for GPU
+ d_sino.finish()
+ self.backproj(d_sino, d_x)
+ d_x *= -d_C
+ d_x += d_x_old
+ if self.is_cpu:
+ # This sync is necessary when using CPU, while it is not for GPU
+ d_x.finish()
+
+ return d_x
+
+ __call__ = run
+
+
+class TV(ReconstructionAlgorithm):
+ """
+ A class for reconstruction with Total Variation regularization using the
+ Chambolle-Pock TV reconstruction algorithm.
+
+ :param sino_shape: shape of the sinogram. The sinogram is in the format
+ (n_b, n_a) where n_b is the number of detector bins and
+ n_a is the number of angles.
+ :param slice_shape: Optional, shape of the reconstructed slice. By default,
+ it is a square slice where the dimension is the
+ "x dimension" of the sinogram (number of bins).
+ :param axis_position: Optional, axis position. Default is
+ `(shape[1]-1)/2.0`.
+ :param angles: Optional, a list of custom angles in radian.
+ :param ctx: actual working context, left to None for automatic
+ initialization from device type or platformid/deviceid
+ :param devicetype: type of device, can be "CPU", "GPU", "ACC" or "ALL"
+ :param platformid: integer with the platform_identifier, as given by clinfo
+ :param deviceid: Integer with the device identifier, as given by clinfo
+ :param profile: switch on profiling to be able to profile at the kernel
+ level, store profiling elements (makes code slightly slower)
+
+ .. warning:: This is a beta version of the Chambolle-Pock TV algorithm.
+ Reconstruction fails for at least on CPU (Xeon E3-1245 v5) using
+ the AMD opencl implementation.
+ """
+
+ def __init__(self, sino_shape, slice_shape=None, axis_position=None, angles=None,
+ ctx=None, devicetype="all", platformid=None, deviceid=None,
+ profile=False
+ ):
+ ReconstructionAlgorithm.__init__(self, sino_shape, slice_shape=slice_shape,
+ axis_position=axis_position, angles=angles,
+ ctx=ctx, devicetype=devicetype, platformid=platformid,
+ deviceid=deviceid, profile=profile)
+ self.compute_preconditioners()
+
+ # Create a LinAlg instance
+ self.linalg = LinAlg(self.backprojector.slice_shape, ctx=self.ctx)
+ # Positivity constraint
+ self.elwise_clamp = ElementwiseKernel(self.ctx, "float *a", "a[i] = max(a[i], 0.0f);")
+ # Projection onto the L-infinity ball of radius Lambda
+ self.elwise_proj_linf = ElementwiseKernel(
+ self.ctx,
+ "float2* a, float Lambda",
+ "a[i].x = copysign(min(fabs(a[i].x), Lambda), a[i].x); a[i].y = copysign(min(fabs(a[i].y), Lambda), a[i].y);",
+ "elwise_proj_linf"
+ )
+ # Additional arrays
+ self.linalg.gradient(self.d_x)
+ self.d_p = parray.empty_like(self.linalg.cl_mem["d_gradient"])
+ self.d_q = parray.empty_like(self.d_data)
+ self.d_g = self.linalg.d_image
+ self.d_tmp = parray.empty_like(self.d_x)
+ self.d_p.fill(0)
+ self.d_q.fill(0)
+ self.d_tmp.fill(0)
+ self.add_to_cl_mem({
+ "d_p": self.d_p,
+ "d_q": self.d_q,
+ "d_tmp": self.d_tmp,
+ })
+
+ self.theta = 1.0
+
+ def compute_preconditioners(self):
+ """
+ Create a diagonal preconditioner for the projection and backprojection
+ operator.
+ Each term of the diagonal is the sum of the projector/backprojector
+ along rows [2],
+ i.e the projection/backprojection of an array of ones.
+
+ [2] T. Pock, A. Chambolle,
+ Diagonal preconditioning for first order primal-dual algorithms in
+ convex optimization,
+ International Conference on Computer Vision, 2011
+ """
+
+ # Compute the diagonal preconditioner "Sigma"
+ slice_ones = np.ones(self.backprojector.slice_shape, dtype=np.float32)
+ Sigma_k = 1./self.projector.projection(slice_ones)
+ Sigma_k[np.logical_not(np.isfinite(Sigma_k))] = 1.
+ self.d_Sigma_k = parray.to_device(self.queue, Sigma_k)
+ self.d_Sigma_kp1 = self.d_Sigma_k + 1 # TODO: memory vs computation
+ self.Sigma_grad = 1/2.0 # For discrete gradient, sum|D_i,j| = 2 along lines or cols
+
+ # Compute the diagonal preconditioner "Tau"
+ sino_ones = np.ones(self.sino_shape, dtype=np.float32)
+ C = self.backprojector.backprojection(sino_ones)
+ Tau = 1./(C + 2.)
+ self.d_Tau = parray.to_device(self.queue, Tau)
+
+ self.add_to_cl_mem({
+ "d_Sigma_k": self.d_Sigma_k,
+ "d_Sigma_kp1": self.d_Sigma_kp1,
+ "d_Tau": self.d_Tau
+ })
+
+ def run(self, data, n_it, Lambda, pos_constraint=False):
+ """
+ Run n_it iterations of the TV-regularized reconstruction,
+ with the regularization parameter Lambda.
+ """
+ cl.enqueue_copy(self.queue, self.d_data.data, np.ascontiguousarray(data.astype(np.float32)))
+
+ d_x = self.d_x
+ d_x_old = self.d_x_old
+ d_tmp = self.d_tmp
+ d_sino = self.d_sino
+ d_p = self.d_p
+ d_q = self.d_q
+ d_g = self.d_g
+
+ d_x *= 0
+ d_p *= 0
+ d_q *= 0
+
+ for k in range(0, n_it):
+ # Update primal variables
+ d_x_old[:] = d_x[:]
+ #~ x = x + Tau*div(p) - Tau*Kadj(q)
+ self.backproj(d_q, d_tmp)
+ self.linalg.divergence(d_p)
+ # TODO: this in less than three ops (one kernel ?)
+ d_g -= d_tmp # d_g -> L.d_image
+ d_g *= self.d_Tau
+ d_x += d_g
+
+ if pos_constraint:
+ self.elwise_clamp(d_x)
+
+ # Update dual variables
+ #~ p = proj_linf(p + Sigma_grad*gradient(x + theta*(x - x_old)), Lambda)
+ d_tmp[:] = d_x[:]
+ # FIXME: mul_add is out of place, put an equivalent thing in linalg...
+ #~ d_tmp.mul_add(1 + theta, d_x_old, -theta)
+ d_tmp *= 1+self.theta
+ d_tmp -= self.theta*d_x_old
+ self.linalg.gradient(d_tmp)
+ # TODO: out of place mul_add
+ #~ d_p.mul_add(1, L.cl_mem["d_gradient"], Sigma_grad)
+ self.linalg.cl_mem["d_gradient"] *= self.Sigma_grad
+ d_p += self.linalg.cl_mem["d_gradient"]
+ self.elwise_proj_linf(d_p, Lambda)
+
+ #~ q = (q + Sigma_k*K(x + theta*(x - x_old)) - Sigma_k*data)/(1.0 + Sigma_k)
+ self.proj(d_tmp, d_sino)
+ # TODO: this in less instructions
+ d_sino -= self.d_data
+ d_sino *= self.d_Sigma_k
+ d_q += d_sino
+ d_q /= self.d_Sigma_kp1
+ return d_x
+
+ __call__ = run
diff --git a/src/silx/opencl/setup.py b/src/silx/opencl/setup.py
new file mode 100644
index 0000000..10fb1be
--- /dev/null
+++ b/src/silx/opencl/setup.py
@@ -0,0 +1,48 @@
+# coding: utf-8
+#
+# Copyright (C) 2016-2017 European Synchrotron Radiation Facility
+#
+# Permission is hereby granted, free of charge, to any person obtaining a copy
+# of this software and associated documentation files (the "Software"), to deal
+# in the Software without restriction, including without limitation the rights
+# to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
+# copies of the Software, and to permit persons to whom the Software is
+# furnished to do so, subject to the following conditions:
+#
+# The above copyright notice and this permission notice shall be included in
+# all copies or substantial portions of the Software.
+#
+# THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+# IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+# FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
+# AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+# LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
+# OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN
+# THE SOFTWARE.
+#
+
+from __future__ import division
+
+__contact__ = "jerome.kieffer@esrf.eu"
+__license__ = "MIT"
+__copyright__ = "European Synchrotron Radiation Facility, Grenoble, France"
+__authors__ = ["J. Kieffer"]
+__date__ = "16/10/2017"
+
+import os.path
+from numpy.distutils.misc_util import Configuration
+
+
+def configuration(parent_package='', top_path=None):
+ config = Configuration('opencl', parent_package, top_path)
+ path = os.path.dirname(os.path.abspath(__file__))
+ if os.path.exists(os.path.join(path, 'sift')):
+ config.add_subpackage('sift')
+ config.add_subpackage('codec')
+ config.add_subpackage('test')
+ return config
+
+
+if __name__ == "__main__":
+ from numpy.distutils.core import setup
+ setup(configuration=configuration)
diff --git a/src/silx/opencl/sinofilter.py b/src/silx/opencl/sinofilter.py
new file mode 100644
index 0000000..d608744
--- /dev/null
+++ b/src/silx/opencl/sinofilter.py
@@ -0,0 +1,435 @@
+#!/usr/bin/env python
+# coding: utf-8
+# /*##########################################################################
+#
+# Copyright (c) 2016-2019 European Synchrotron Radiation Facility
+#
+# Permission is hereby granted, free of charge, to any person obtaining a copy
+# of this software and associated documentation files (the "Software"), to deal
+# in the Software without restriction, including without limitation the rights
+# to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
+# copies of the Software, and to permit persons to whom the Software is
+# furnished to do so, subject to the following conditions:
+#
+# The above copyright notice and this permission notice shall be included in
+# all copies or substantial portions of the Software.
+#
+# THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+# IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+# FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
+# AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+# LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
+# OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN
+# THE SOFTWARE.
+#
+# ###########################################################################*/
+"""Module for sinogram filtering on CPU/GPU."""
+
+from __future__ import absolute_import, print_function, with_statement, division
+
+__authors__ = ["P. Paleo"]
+__license__ = "MIT"
+__date__ = "07/06/2019"
+
+import numpy as np
+from math import pi
+
+
+import pyopencl.array as parray
+from .common import pyopencl as cl
+from .processing import OpenclProcessing
+from ..math.fft.clfft import CLFFT, __have_clfft__
+from ..math.fft.npfft import NPFFT
+from ..image.tomography import generate_powers, get_next_power, compute_fourier_filter
+from ..utils.deprecation import deprecated
+
+
+
+class SinoFilter(OpenclProcessing):
+ """A class for performing sinogram filtering on GPU using OpenCL.
+
+ This is a convolution in the Fourier space, along one dimension:
+
+ - In 2D: (n_a, d_x): n_a filterings (1D FFT of size d_x)
+ - In 3D: (n_z, n_a, d_x): n_z*n_a filterings (1D FFT of size d_x)
+ """
+ kernel_files = ["array_utils.cl"]
+ powers = generate_powers()
+
+ def __init__(self, sino_shape, filter_name=None, ctx=None,
+ devicetype="all", platformid=None, deviceid=None,
+ profile=False, extra_options=None):
+ """Constructor of OpenCL FFT-Convolve.
+
+ :param sino_shape: shape of the sinogram.
+ :param filter_name: Name of the filter. Defaut is "ram-lak".
+ :param ctx: actual working context, left to None for automatic
+ initialization from device type or platformid/deviceid
+ :param devicetype: type of device, can be "CPU", "GPU", "ACC" or "ALL"
+ :param platformid: integer with the platform_identifier, as given by
+ clinfo
+ :param deviceid: Integer with the device identifier, as given by clinfo
+ :param profile: switch on profiling to be able to profile at the kernel
+ level, store profiling elements (makes code slightly
+ slower)
+ :param dict extra_options: Advanced extra options.
+ Current options are: cutoff, use_numpy_fft
+ """
+ OpenclProcessing.__init__(self, ctx=ctx, devicetype=devicetype,
+ platformid=platformid, deviceid=deviceid,
+ profile=profile)
+
+ self._init_extra_options(extra_options)
+ self._calculate_shapes(sino_shape)
+ self._init_fft()
+ self._allocate_memory()
+ self._compute_filter(filter_name)
+ self._init_kernels()
+
+ def _calculate_shapes(self, sino_shape):
+ """
+
+ :param sino_shape: shape of the sinogram.
+ """
+ self.ndim = len(sino_shape)
+ if self.ndim == 2:
+ n_angles, dwidth = sino_shape
+ else:
+ raise ValueError("Invalid sinogram number of dimensions: "
+ "expected 2 dimensions")
+ self.sino_shape = sino_shape
+ self.n_angles = n_angles
+ self.dwidth = dwidth
+ self.dwidth_padded = get_next_power(2 * self.dwidth, powers=self.powers)
+ self.sino_padded_shape = (n_angles, self.dwidth_padded)
+ sino_f_shape = list(self.sino_padded_shape)
+ sino_f_shape[-1] = sino_f_shape[-1] // 2 + 1
+ self.sino_f_shape = tuple(sino_f_shape)
+
+ def _init_extra_options(self, extra_options):
+ """
+
+ :param dict extra_options: Advanced extra options.
+ Current options are: cutoff,
+ """
+ self.extra_options = {
+ "cutoff": 1.,
+ "use_numpy_fft": False,
+ }
+ if extra_options is not None:
+ self.extra_options.update(extra_options)
+
+ def _init_fft(self):
+ if __have_clfft__ and not(self.extra_options["use_numpy_fft"]):
+ self.fft_backend = "opencl"
+ self.fft = CLFFT(
+ self.sino_padded_shape,
+ dtype=np.float32,
+ axes=(-1,),
+ ctx=self.ctx,
+ )
+ else:
+ self.fft_backend = "numpy"
+ print("The gpyfft module was not found. The Fourier transforms "
+ "will be done on CPU. For more performances, it is advised "
+ "to install gpyfft.""")
+ self.fft = NPFFT(
+ template=np.zeros(self.sino_padded_shape, "f"),
+ axes=(-1,),
+ )
+
+ def _allocate_memory(self):
+ self.d_filter_f = parray.zeros(self.queue, (self.sino_f_shape[-1],), np.complex64)
+ self.is_cpu = (self.device.type == "CPU")
+ # These are already allocated by FFT() if using the opencl backend
+ if self.fft_backend == "opencl":
+ self.d_sino_padded = self.fft.data_in
+ self.d_sino_f = self.fft.data_out
+ else:
+ # When using the numpy backend, arrays are not pre-allocated
+ self.d_sino_padded = np.zeros(self.sino_padded_shape, "f")
+ self.d_sino_f = np.zeros(self.sino_f_shape, np.complex64)
+ # These are needed for rectangular memcpy in certain cases (see below).
+ self.tmp_sino_device = parray.zeros(self.queue, self.sino_shape, "f")
+ self.tmp_sino_host = np.zeros(self.sino_shape, "f")
+
+ def _compute_filter(self, filter_name):
+ """
+
+ :param str filter_name: filter name
+ """
+ self.filter_name = filter_name or "ram-lak"
+ filter_f = compute_fourier_filter(
+ self.dwidth_padded,
+ self.filter_name,
+ cutoff=self.extra_options["cutoff"],
+ )[:self.dwidth_padded // 2 + 1] # R2C
+ self.set_filter(filter_f, normalize=True)
+
+ def set_filter(self, h_filt, normalize=True):
+ """
+ Set a filter for sinogram filtering.
+
+ :param h_filt: Filter. Each line of the sinogram will be filtered with
+ this filter. It has to be the Real-to-Complex Fourier Transform
+ of some real filter, padded to 2*sinogram_width.
+ :param normalize: Whether to normalize the filter with pi/num_angles.
+ """
+ if h_filt.size != self.sino_f_shape[-1]:
+ raise ValueError(
+ """
+ Invalid filter size: expected %d, got %d.
+ Please check that the filter is the Fourier R2C transform of
+ some real 1D filter.
+ """
+ % (self.sino_f_shape[-1], h_filt.size)
+ )
+ if not(np.iscomplexobj(h_filt)):
+ print("Warning: expected a complex Fourier filter")
+ self.filter_f = h_filt
+ if normalize:
+ self.filter_f *= pi / self.n_angles
+ self.filter_f = self.filter_f.astype(np.complex64)
+ self.d_filter_f[:] = self.filter_f[:]
+
+ def _init_kernels(self):
+ OpenclProcessing.compile_kernels(self, self.kernel_files)
+ h, w = self.d_sino_f.shape
+ self.mult_kern_args = (self.queue, (int(w), (int(h))), None,
+ self.d_sino_f.data,
+ self.d_filter_f.data,
+ np.int32(w),
+ np.int32(h))
+
+ def check_array(self, arr):
+ if arr.dtype != np.float32:
+ raise ValueError("Expected data type = numpy.float32")
+ if arr.shape != self.sino_shape:
+ raise ValueError("Expected sinogram shape %s, got %s" %
+ (self.sino_shape, arr.shape))
+ if not(isinstance(arr, np.ndarray) or isinstance(arr, parray.Array)):
+ raise ValueError("Expected either numpy.ndarray or "
+ "pyopencl.array.Array")
+
+ def copy2d(self, dst, src, transfer_shape, dst_offset=(0, 0),
+ src_offset=(0, 0)):
+ """
+
+ :param dst:
+ :param src:
+ :param transfer_shape:
+ :param dst_offset:
+ :param src_offset:
+ """
+ shape = tuple(int(i) for i in transfer_shape[::-1])
+ ev = self.kernels.cpy2d(self.queue, shape, None,
+ dst.data,
+ src.data,
+ np.int32(dst.shape[1]),
+ np.int32(src.shape[1]),
+ np.int32(dst_offset),
+ np.int32(src_offset),
+ np.int32(transfer_shape[::-1]))
+ ev.wait()
+
+ def copy2d_host(self, dst, src, transfer_shape, dst_offset=(0, 0),
+ src_offset=(0, 0)):
+ """
+
+ :param dst:
+ :param src:
+ :param transfer_shape:
+ :param dst_offset:
+ :param src_offset:
+ """
+ s = transfer_shape
+ do = dst_offset
+ so = src_offset
+ dst[do[0]:do[0] + s[0], do[1]:do[1] + s[1]] = src[so[0]:so[0] + s[0], so[1]:so[1] + s[1]]
+
+ def _prepare_input_sino(self, sino):
+ """
+ :param sino: sinogram
+ """
+ self.check_array(sino)
+ self.d_sino_padded.fill(0)
+ if self.fft_backend == "opencl":
+ # OpenCL backend: FFT/mult/IFFT are done on device.
+ if isinstance(sino, np.ndarray):
+ # OpenCL backend + numpy input: copy H->D.
+ # As pyopencl does not support rectangular copies, we have to
+ # do a copy H->D in a temporary device buffer, and then call a
+ # kernel doing the rectangular D-D copy.
+ self.tmp_sino_device[:] = sino[:]
+ if self.is_cpu:
+ self.tmp_sino_device.finish()
+ d_sino_ref = self.tmp_sino_device
+ else:
+ d_sino_ref = sino
+ # Rectangular copy D->D
+ self.copy2d(self.d_sino_padded, d_sino_ref, self.sino_shape)
+ if self.is_cpu:
+ self.d_sino_padded.finish() # should not be required here
+ else:
+ # Numpy backend: FFT/mult/IFFT are done on host.
+ if not(isinstance(sino, np.ndarray)):
+ # Numpy backend + pyopencl input: need to copy D->H
+ self.tmp_sino_host[:] = sino[:]
+ h_sino_ref = self.tmp_sino_host
+ else:
+ h_sino_ref = sino
+ # Rectangular copy H->H
+ self.copy2d_host(self.d_sino_padded, h_sino_ref, self.sino_shape)
+
+ def _get_output_sino(self, output):
+ """
+ :param Union[numpy.dtype,None] output: sinogram output.
+ :return: sinogram
+ """
+ if output is None:
+ res = np.zeros(self.sino_shape, dtype=np.float32)
+ else:
+ res = output
+ if self.fft_backend == "opencl":
+ if isinstance(res, np.ndarray):
+ # OpenCL backend + numpy output: copy D->H
+ # As pyopencl does not support rectangular copies, we first have
+ # to call a kernel doing rectangular copy D->D, then do a copy
+ # D->H.
+ self.copy2d(dst=self.tmp_sino_device,
+ src=self.d_sino_padded,
+ transfer_shape=self.sino_shape)
+ if self.is_cpu:
+ self.tmp_sino_device.finish() # should not be required here
+ res[:] = self.tmp_sino_device.get()[:]
+ else:
+ if self.is_cpu:
+ self.d_sino_padded.finish()
+ self.copy2d(res, self.d_sino_padded, self.sino_shape)
+ if self.is_cpu:
+ res.finish() # should not be required here
+ else:
+ if not(isinstance(res, np.ndarray)):
+ # Numpy backend + pyopencl output: rect copy H->H + copy H->D
+ self.copy2d_host(dst=self.tmp_sino_host,
+ src=self.d_sino_padded,
+ transfer_shape=self.sino_shape)
+ res[:] = self.tmp_sino_host[:]
+ else:
+ # Numpy backend + numpy output: rect copy H->H
+ self.copy2d_host(res, self.d_sino_padded, self.sino_shape)
+ return res
+
+ def _do_fft(self):
+ if self.fft_backend == "opencl":
+ self.fft.fft(self.d_sino_padded, output=self.d_sino_f)
+ if self.is_cpu:
+ self.d_sino_f.finish()
+ else:
+ # numpy backend does not support "output=" argument,
+ # and rfft always return a complex128 result.
+ res = self.fft.fft(self.d_sino_padded).astype(np.complex64)
+ self.d_sino_f[:] = res[:]
+
+ def _multiply_fourier(self):
+ if self.fft_backend == "opencl":
+ # Everything is on device. Call the multiplication kernel.
+ ev = self.kernels.inplace_complex_mul_2Dby1D(
+ *self.mult_kern_args
+ )
+ ev.wait()
+ if self.is_cpu:
+ self.d_sino_f.finish() # should not be required here
+ else:
+ # Everything is on host.
+ self.d_sino_f *= self.filter_f
+
+ def _do_ifft(self):
+ if self.fft_backend == "opencl":
+ if self.is_cpu:
+ self.d_sino_padded.fill(0)
+ self.d_sino_padded.finish()
+ self.fft.ifft(self.d_sino_f, output=self.d_sino_padded)
+ if self.is_cpu:
+ self.d_sino_padded.finish()
+ else:
+ # numpy backend does not support "output=" argument,
+ # and irfft always return a float64 result.
+ res = self.fft.ifft(self.d_sino_f).astype("f")
+ self.d_sino_padded[:] = res[:]
+
+ def filter_sino(self, sino, output=None):
+ """
+
+ :param sino: sinogram
+ :param output:
+ :return: filtered sinogram
+ """
+ # Handle input sinogram
+ self._prepare_input_sino(sino)
+ # FFT
+ self._do_fft()
+ # multiply with filter in the Fourier domain
+ self._multiply_fourier()
+ # iFFT
+ self._do_ifft()
+ # return
+ res = self._get_output_sino(output)
+ return res
+ # ~ return output
+
+ __call__ = filter_sino
+
+
+
+
+# -------------------
+# - Compatibility -
+# -------------------
+
+
+def nextpow2(N):
+ p = 1
+ while p < N:
+ p *= 2
+ return p
+
+
+@deprecated(replacement="Backprojection.sino_filter", since_version="0.10")
+def fourier_filter(sino, filter_=None, fft_size=None):
+ """Simple np based implementation of fourier space filter.
+ This function is deprecated, please use silx.opencl.sinofilter.SinoFilter.
+
+ :param sino: of shape shape = (num_projs, num_bins)
+ :param filter: filter function to apply in fourier space
+ :fft_size: size on which perform the fft. May be larger than the sino array
+ :return: filtered sinogram
+ """
+ assert sino.ndim == 2
+ num_projs, num_bins = sino.shape
+ if fft_size is None:
+ fft_size = nextpow2(num_bins * 2 - 1)
+ else:
+ assert fft_size >= num_bins
+ if fft_size == num_bins:
+ sino_zeropadded = sino.astype(np.float32)
+ else:
+ sino_zeropadded = np.zeros((num_projs, fft_size),
+ dtype=np.complex64)
+ sino_zeropadded[:, :num_bins] = sino.astype(np.float32)
+
+ if filter_ is None:
+ h = np.zeros(fft_size, dtype=np.float32)
+ L2 = fft_size // 2 + 1
+ h[0] = 1 / 4.
+ j = np.linspace(1, L2, L2 // 2, False)
+ h[1:L2:2] = -1. / (np.pi ** 2 * j ** 2)
+ h[L2:] = np.copy(h[1:L2 - 1][::-1])
+ filter_ = np.fft.fft(h).astype(np.complex64)
+
+ # Linear convolution
+ sino_f = np.fft.fft(sino, fft_size)
+ sino_f = sino_f * filter_
+ sino_filtered = np.fft.ifft(sino_f)[:, :num_bins].real
+
+ return np.ascontiguousarray(sino_filtered.real, dtype=np.float32)
diff --git a/src/silx/opencl/sparse.py b/src/silx/opencl/sparse.py
new file mode 100644
index 0000000..514589a
--- /dev/null
+++ b/src/silx/opencl/sparse.py
@@ -0,0 +1,377 @@
+#!/usr/bin/env python
+# coding: utf-8
+# /*##########################################################################
+#
+# Copyright (c) 2019 European Synchrotron Radiation Facility
+#
+# Permission is hereby granted, free of charge, to any person obtaining a copy
+# of this software and associated documentation files (the "Software"), to deal
+# in the Software without restriction, including without limitation the rights
+# to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
+# copies of the Software, and to permit persons to whom the Software is
+# furnished to do so, subject to the following conditions:
+#
+# The above copyright notice and this permission notice shall be included in
+# all copies or substantial portions of the Software.
+#
+# THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+# IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+# FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
+# AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+# LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
+# OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN
+# THE SOFTWARE.
+#
+# ###########################################################################*/
+"""Module for data sparsification on CPU/GPU."""
+
+from __future__ import absolute_import, print_function, with_statement, division
+
+__authors__ = ["P. Paleo"]
+__license__ = "MIT"
+__date__ = "07/06/2019"
+
+import numpy
+import pyopencl.array as parray
+from collections import namedtuple
+from pyopencl.scan import GenericScanKernel
+from pyopencl.tools import dtype_to_ctype
+from .common import pyopencl as cl
+from .processing import OpenclProcessing, EventDescription, BufferDescription
+mf = cl.mem_flags
+
+
+CSRData = namedtuple("CSRData", ["data", "indices", "indptr"])
+
+def tuple_to_csrdata(arrs):
+ """
+ Converts a 3-tuple to a CSRData namedtuple.
+ """
+ if arrs is None:
+ return None
+ return CSRData(data=arrs[0], indices=arrs[1], indptr=arrs[2])
+
+
+
+class CSR(OpenclProcessing):
+ kernel_files = ["sparse.cl"]
+
+ def __init__(self, shape, dtype="f", max_nnz=None, idx_dtype=numpy.int32,
+ ctx=None, devicetype="all", platformid=None, deviceid=None,
+ block_size=None, memory=None, profile=False):
+ """
+ Compute Compressed Sparse Row format of an image (2D matrix).
+ It is designed to be compatible with scipy.sparse.csr_matrix.
+
+ :param shape: tuple
+ Matrix shape.
+ :param dtype: str or numpy.dtype, optional
+ Numeric data type. By default, sparse matrix data will be float32.
+ :param max_nnz: int, optional
+ Maximum number of non-zero elements. By default, the arrays "data"
+ and "indices" are allocated with prod(shape) elements, but
+ in practice a much lesser space is needed.
+ The number of non-zero items cannot be known in advance, but one can
+ estimate an upper-bound with this parameter to save memory.
+
+ Opencl processing parameters
+ -----------------------------
+ Please refer to the documentation of silx.opencl.processing.OpenclProcessing
+ for information on the other parameters.
+ """
+
+ OpenclProcessing.__init__(self, ctx=ctx, devicetype=devicetype,
+ platformid=platformid, deviceid=deviceid,
+ block_size=block_size, memory=memory,
+ profile=profile)
+ self._set_parameters(shape, dtype, max_nnz, idx_dtype)
+ self._allocate_memory()
+ self._setup_kernels()
+
+ # --------------------------------------------------------------------------
+ # -------------------------- Initialization --------------------------------
+ # --------------------------------------------------------------------------
+
+ def _set_parameters(self, shape, dtype, max_nnz, idx_dtype):
+ self.shape = shape
+ self.size = numpy.prod(shape)
+ self._set_idx_dtype(idx_dtype)
+ assert len(shape) == 2 #
+ if max_nnz is None:
+ self.max_nnz = numpy.prod(shape) # worst case
+ else:
+ self.max_nnz = int(max_nnz)
+ self._set_dtype(dtype)
+
+
+ def _set_idx_dtype(self, idx_dtype):
+ idx_dtype = numpy.dtype(idx_dtype)
+ if idx_dtype.kind not in ["i", "u"]:
+ raise ValueError("Not an integer type: %s" % idx_dtype)
+ # scan value type must have size divisible by 4 bytes
+ if idx_dtype.itemsize % 4 != 0:
+ raise ValueError("Due to an internal pyopencl limitation, idx_dtype type must have size divisible by 4 bytes")
+ self.indice_dtype = idx_dtype #
+
+
+ def _set_dtype(self, dtype):
+ self.dtype = numpy.dtype(dtype)
+ if self.dtype.kind == "c":
+ raise ValueError("Complex data is not supported")
+ if self.dtype == numpy.dtype(numpy.float32):
+ self._c_zero_str = "0.0f"
+ elif self.dtype == numpy.dtype(numpy.float64):
+ self._c_zero_str = "0.0"
+ else: # assuming integer
+ self._c_zero_str = "0"
+ self.c_dtype = dtype_to_ctype(self.dtype)
+ self.idx_c_dtype = dtype_to_ctype(self.indice_dtype)
+
+
+ def _allocate_memory(self):
+ self.is_cpu = (self.device.type == "CPU") # move to OpenclProcessing ?
+ self.buffers = [
+ BufferDescription("array", (self.size,), self.dtype, mf.READ_ONLY),
+ BufferDescription("data", (self.max_nnz,), self.dtype, mf.READ_WRITE),
+ BufferDescription("indices", (self.max_nnz,), self.indice_dtype, mf.READ_WRITE),
+ BufferDescription("indptr", (self.shape[0]+1,), self.indice_dtype, mf.READ_WRITE),
+ ]
+ self.allocate_buffers(use_array=True)
+ for arr_name in ["array", "data", "indices", "indptr"]:
+ setattr(self, arr_name, self.cl_mem[arr_name])
+ self.cl_mem[arr_name].fill(0) # allocate_buffers() uses empty()
+ self._old_array = self.array
+ self._old_data = self.data
+ self._old_indices = self.indices
+ self._old_indptr = self.indptr
+
+
+ def _setup_kernels(self):
+ self._setup_compaction_kernel()
+ self._setup_decompaction_kernel()
+
+
+ def _setup_compaction_kernel(self):
+ kernel_signature = str(
+ "__global %s *data, \
+ __global %s *data_compacted, \
+ __global %s *indices, \
+ __global %s* indptr \
+ """ % (self.c_dtype, self.c_dtype, self.idx_c_dtype, self.idx_c_dtype)
+ )
+ if self.dtype.kind == "f":
+ map_nonzero_expr = "(fabs(data[i]) > %s) ? 1 : 0" % self._c_zero_str
+ elif self.dtype.kind in ["u", "i"]:
+ map_nonzero_expr = "(data[i] != %s) ? 1 : 0" % self._c_zero_str
+ else:
+ raise ValueError("Unknown data type")
+
+ self.scan_kernel = GenericScanKernel(
+ self.ctx, self.indice_dtype,
+ arguments=kernel_signature,
+ input_expr=map_nonzero_expr,
+ scan_expr="a+b", neutral="0",
+ output_statement="""
+ // item is the running sum of input_expr(i), i.e the cumsum of "nonzero"
+ if (prev_item != item) {
+ data_compacted[item-1] = data[i];
+ indices[item-1] = GET_INDEX(i);
+ }
+ // The last cumsum element of each line of "nonzero" goes to inptr[i]
+ if ((i+1) % IMAGE_WIDTH == 0) {
+ indptr[(i/IMAGE_WIDTH)+1] = item;
+ }
+ """,
+ options=["-DIMAGE_WIDTH=%d" % self.shape[1]],
+ preamble="#define GET_INDEX(i) (i % IMAGE_WIDTH)",
+ )
+
+
+ def _setup_decompaction_kernel(self):
+ OpenclProcessing.compile_kernels(
+ self,
+ self.kernel_files,
+ compile_options=[
+ "-DIMAGE_WIDTH=%d" % self.shape[1],
+ "-DDTYPE=%s" % self.c_dtype,
+ "-DIDX_DTYPE=%s" % self.idx_c_dtype,
+ ]
+ )
+ device = self.ctx.devices[0]
+ wg_x = min(
+ device.max_work_group_size,
+ 32,
+ self.kernels.max_workgroup_size("densify_csr")
+ )
+ self._decomp_wg = (wg_x, 1)
+ self._decomp_grid = (self._decomp_wg[0], self.shape[0])
+
+
+ # --------------------------------------------------------------------------
+ # -------------------------- Array utils -----------------------------------
+ # --------------------------------------------------------------------------
+
+ # TODO handle pyopencl Buffer
+ def check_array(self, arr):
+ """
+ Check that provided array is compatible with current context.
+
+ :param arr: numpy.ndarray or pyopencl.array.Array
+ 2D array in dense format.
+ """
+ assert arr.size == self.size
+ assert arr.dtype == self.dtype
+
+
+ # TODO handle pyopencl Buffer
+ def check_sparse_arrays(self, csr_data):
+ """
+ Check that the provided sparse arrays are compatible with the current
+ context.
+
+ :param arrays: namedtuple CSRData.
+ It contains the arrays "data", "indices", "indptr"
+ """
+ assert isinstance(csr_data, CSRData)
+ for arr in [csr_data.data, csr_data.indices, csr_data.indptr]:
+ assert arr.ndim == 1
+ assert csr_data.data.size <= self.max_nnz
+ assert csr_data.indices.size <= self.max_nnz
+ assert csr_data.indptr.size == self.shape[0]+1
+ assert csr_data.data.dtype == self.dtype
+ assert csr_data.indices.dtype == self.indice_dtype
+ assert csr_data.indptr.dtype == self.indice_dtype
+
+
+ def set_array(self, arr):
+ """
+ Set the provided array as the current context 2D matrix.
+
+ :param arr: numpy.ndarray or pyopencl.array.Array
+ 2D array in dense format.
+ """
+ if arr is None:
+ return
+ self.check_array(arr)
+ # GenericScanKernel only supports 1D data
+ if isinstance(arr, parray.Array):
+ self._old_array = self.array
+ self.array = arr
+ elif isinstance(arr, numpy.ndarray):
+ self.array[:] = arr.ravel()[:]
+ else:
+ raise ValueError("Expected pyopencl array or numpy array")
+
+
+ def set_sparse_arrays(self, csr_data):
+ if csr_data is None:
+ return
+ self.check_sparse_arrays(csr_data)
+ for name, arr in {"data": csr_data.data, "indices": csr_data.indices, "indptr": csr_data.indptr}.items():
+ # The current array is a device array. Don't copy, use it directly
+ if isinstance(arr, parray.Array):
+ setattr(self, "_old_" + name, getattr(self, name))
+ setattr(self, name, arr)
+ # The current array is a numpy.ndarray: copy H2D
+ elif isinstance(arr, numpy.ndarray):
+ getattr(self, name)[:arr.size] = arr[:]
+ else:
+ raise ValueError("Unsupported array type: %s" % type(arr))
+
+
+ def _recover_arrays_references(self):
+ """
+ Recover the previous arrays references, and return the references of the
+ "current" arrays.
+ """
+ array = self.array
+ data = self.data
+ indices = self.indices
+ indptr = self.indptr
+ for name in ["array", "data", "indices", "indptr"]:
+ # self.X = self._old_X
+ setattr(self, name, getattr(self, "_old_" + name))
+ return array, (data, indices, indptr)
+
+
+ def get_sparse_arrays(self, output):
+ """
+ Get the 2D dense array of the current context.
+
+ :param output: tuple or None
+ tuple in the form (data, indices, indptr). These arrays have to be
+ compatible with the current context (size and data type).
+ The content of these arrays will be overwritten with the result of
+ the previous computation.
+ """
+ numels = self.max_nnz
+ if output is None:
+ data = self.data.get()[:numels]
+ ind = self.indices.get()[:numels]
+ indptr = self.indptr.get()
+ res = (data, ind, indptr)
+ else:
+ res = output
+ return res
+
+
+ def get_array(self, output):
+ if output is None:
+ res = self.array.get().reshape(self.shape)
+ else:
+ res = output
+ return res
+
+ # --------------------------------------------------------------------------
+ # -------------------------- Compaction ------------------------------------
+ # --------------------------------------------------------------------------
+
+ def sparsify(self, arr, output=None):
+ """
+ Convert an image (2D matrix) into a CSR representation.
+
+ :param arr: numpy.ndarray or pyopencl.array.Array
+ Input array.
+ :param output: tuple of pyopencl.array.Array, optional
+ If provided, this must be a tuple of 3 arrays (data, indices, indptr).
+ The content of each array is overwritten by the computation result.
+ """
+ self.set_array(arr)
+ self.set_sparse_arrays(tuple_to_csrdata(output))
+ evt = self.scan_kernel(
+ self.array,
+ self.data,
+ self.indices,
+ self.indptr,
+ )
+ #~ evt.wait()
+ self.profile_add(evt, "sparsification kernel")
+ res = self.get_sparse_arrays(output)
+ self._recover_arrays_references()
+ return res
+
+ # --------------------------------------------------------------------------
+ # -------------------------- Decompaction ----------------------------------
+ # --------------------------------------------------------------------------
+
+ def densify(self, data, indices, indptr, output=None):
+ self.set_sparse_arrays(
+ CSRData(data=data, indices=indices, indptr=indptr)
+ )
+ self.set_array(output)
+ evt = self.kernels.densify_csr(
+ self.queue,
+ self._decomp_grid,
+ self._decomp_wg,
+ self.data.data,
+ self.indices.data,
+ self.indptr.data,
+ self.array.data,
+ numpy.int32(self.shape[0]),
+ )
+ #~ evt.wait()
+ self.profile_add(evt, "desparsification kernel")
+ res = self.get_array(output)
+ self._recover_arrays_references()
+ return res
+
diff --git a/src/silx/opencl/statistics.py b/src/silx/opencl/statistics.py
new file mode 100644
index 0000000..a96ee33
--- /dev/null
+++ b/src/silx/opencl/statistics.py
@@ -0,0 +1,242 @@
+# -*- coding: utf-8 -*-
+#
+# Project: SILX
+# https://github.com/silx-kit/silx
+#
+# Copyright (C) 2012-2019 European Synchrotron Radiation Facility, Grenoble, France
+#
+# Principal author: Jérôme Kieffer (Jerome.Kieffer@ESRF.eu)
+#
+# Permission is hereby granted, free of charge, to any person obtaining a copy
+# of this software and associated documentation files (the "Software"), to deal
+# in the Software without restriction, including without limitation the rights
+# to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
+# copies of the Software, and to permit persons to whom the Software is
+# furnished to do so, subject to the following conditions:
+# .
+# The above copyright notice and this permission notice shall be included in
+# all copies or substantial portions of the Software.
+# .
+# THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+# IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+# FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
+# AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+# LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
+# OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN
+# THE SOFTWARE.
+
+"""A module for performing basic statistical analysis (min, max, mean, std) on
+large data where numpy is not very efficient.
+"""
+
+__author__ = "Jerome Kieffer"
+__license__ = "MIT"
+__date__ = "19/05/2021"
+__copyright__ = "2012-2019, ESRF, Grenoble"
+__contact__ = "jerome.kieffer@esrf.fr"
+
+import logging
+import numpy
+from collections import OrderedDict, namedtuple
+from math import sqrt
+
+from .common import pyopencl
+from .processing import EventDescription, OpenclProcessing, BufferDescription
+from .utils import concatenate_cl_kernel
+
+if pyopencl:
+ mf = pyopencl.mem_flags
+ from pyopencl.reduction import ReductionKernel
+ try:
+ from pyopencl import cltypes
+ except ImportError:
+ v = pyopencl.array.vec()
+ float8 = v.float8
+ else:
+ float8 = cltypes.float8
+
+else:
+ raise ImportError("pyopencl is not installed")
+logger = logging.getLogger(__name__)
+
+StatResults = namedtuple("StatResults", ["min", "max", "cnt", "sum", "mean",
+ "var", "std"])
+zero8 = "(float8)(FLT_MAX, -FLT_MAX, 0.0f, 0.0f, 0.0f, 0.0f, 0.0f, 0.0f)"
+# min max cnt cnt_e sum sum_e var var_e
+
+
+class Statistics(OpenclProcessing):
+ """A class for doing statistical analysis using OpenCL
+
+ :param List[int] size: Shape of input data to treat
+ :param numpy.dtype dtype: Input data type
+ :param numpy.ndarray template: Data template to extract size & dtype
+ :param ctx: Actual working context, left to None for automatic
+ initialization from device type or platformid/deviceid
+ :param str devicetype: Type of device, can be "CPU", "GPU", "ACC" or "ALL"
+ :param int platformid: Platform identifier as given by clinfo
+ :param int deviceid: Device identifier as given by clinfo
+ :param int block_size:
+ Preferred workgroup size, may vary depending on the outcome of the compilation
+ :param bool profile:
+ Switch on profiling to be able to profile at the kernel level,
+ store profiling elements (makes code slightly slower)
+ """
+ buffers = [
+ BufferDescription("raw", 1, numpy.float32, mf.READ_ONLY),
+ BufferDescription("converted", 1, numpy.float32, mf.READ_WRITE),
+ ]
+ kernel_files = ["preprocess.cl"]
+ mapping = {numpy.int8: "s8_to_float",
+ numpy.uint8: "u8_to_float",
+ numpy.int16: "s16_to_float",
+ numpy.uint16: "u16_to_float",
+ numpy.uint32: "u32_to_float",
+ numpy.int32: "s32_to_float"}
+
+ def __init__(self, size=None, dtype=None, template=None,
+ ctx=None, devicetype="all", platformid=None, deviceid=None,
+ block_size=None, profile=False
+ ):
+ OpenclProcessing.__init__(self, ctx=ctx, devicetype=devicetype,
+ platformid=platformid, deviceid=deviceid,
+ block_size=block_size, profile=profile)
+ self.size = size
+ self.dtype = dtype
+ if template is not None:
+ self.size = template.size
+ self.dtype = template.dtype
+
+ self.buffers = [BufferDescription(i.name, i.size * self.size, i.dtype, i.flags)
+ for i in self.__class__.buffers]
+
+ self.allocate_buffers(use_array=True)
+ self.compile_kernels()
+ self.set_kernel_arguments()
+
+ def set_kernel_arguments(self):
+ """Parametrize all kernel arguments"""
+ for val in self.mapping.values():
+ self.cl_kernel_args[val] = OrderedDict(((i, self.cl_mem[i]) for i in ("raw", "converted")))
+
+ def compile_kernels(self):
+ """Compile the kernel"""
+ OpenclProcessing.compile_kernels(self,
+ self.kernel_files,
+ "-D NIMAGE=%i" % self.size)
+ compiler_options = self.get_compiler_options(x87_volatile=True)
+ src = concatenate_cl_kernel(("doubleword.cl", "statistics.cl"))
+ self.reduction_comp = ReductionKernel(self.ctx,
+ dtype_out=float8,
+ neutral=zero8,
+ map_expr="map_statistics(data, i)",
+ reduce_expr="reduce_statistics(a,b)",
+ arguments="__global float *data",
+ preamble=src,
+ options=compiler_options)
+ self.reduction_simple = ReductionKernel(self.ctx,
+ dtype_out=float8,
+ neutral=zero8,
+ map_expr="map_statistics(data, i)",
+ reduce_expr="reduce_statistics_simple(a,b)",
+ arguments="__global float *data",
+ preamble=src,
+ options=compiler_options)
+
+ if "cl_khr_fp64" in self.device.extensions:
+ self.reduction_double = ReductionKernel(self.ctx,
+ dtype_out=float8,
+ neutral=zero8,
+ map_expr="map_statistics(data, i)",
+ reduce_expr="reduce_statistics_double(a,b)",
+ arguments="__global float *data",
+ preamble=src,
+ options=compiler_options)
+ else:
+ logger.info("Device %s does not support double-precision arithmetics, fall-back on compensated one", self.device)
+ self.reduction_double = self.reduction_comp
+
+ def send_buffer(self, data, dest):
+ """
+ Send a numpy array to the device, including the cast on the device if
+ possible
+
+ :param numpy.ndarray data: numpy array with data
+ :param dest: name of the buffer as registered in the class
+ """
+ logger.info("send data to %s", dest)
+ dest_type = numpy.dtype([i.dtype for i in self.buffers if i.name == dest][0])
+ events = []
+ if (data.dtype == dest_type) or (data.dtype.itemsize > dest_type.itemsize):
+ copy_image = pyopencl.enqueue_copy(self.queue,
+ self.cl_mem[dest].data,
+ numpy.ascontiguousarray(data, dest_type))
+ events.append(EventDescription("copy H->D %s" % dest, copy_image))
+ else:
+ copy_image = pyopencl.enqueue_copy(self.queue,
+ self.cl_mem["raw"].data,
+ numpy.ascontiguousarray(data))
+ kernel = getattr(self.program, self.mapping[data.dtype.type])
+ cast_to_float = kernel(self.queue,
+ (self.size,),
+ None,
+ self.cl_mem["raw"].data,
+ self.cl_mem[dest].data)
+ events += [
+ EventDescription("copy H->D raw", copy_image),
+ EventDescription(f"cast to float {dest}", cast_to_float)
+ ]
+ if self.profile:
+ self.events += events
+ return events
+
+ def process(self, data, comp=True):
+ """Actually calculate the statics on the data
+
+ :param numpy.ndarray data: numpy array with the image
+ :param comp: use Kahan compensated arithmetics for the calculation
+ :return: Statistics named tuple
+ :rtype: StatResults
+ """
+ if data.ndim != 1:
+ data = data.ravel()
+ size = data.size
+ assert size <= self.size, "size is OK"
+ events = []
+ if comp is True:
+ comp = "comp"
+ elif comp is False:
+ comp = "single"
+ else:
+ comp = comp.lower()
+ with self.sem:
+ self.send_buffer(data, "converted")
+ if comp in ("single", "fp32", "float32"):
+ reduction = self.reduction_simple
+ elif comp in ("double", "fp64", "float64"):
+ reduction = self.reduction_double
+ else:
+ reduction = self.reduction_comp
+ res_d, evt = reduction(self.cl_mem["converted"][:self.size],
+ queue=self.queue,
+ return_event=True)
+ events.append(EventDescription(f"statistical reduction {comp}", evt))
+ if self.profile:
+ self.events += events
+ res_h = res_d.get()
+ min_ = 1.0 * res_h["s0"]
+ max_ = 1.0 * res_h["s1"]
+ count = 1.0 * res_h["s2"] + res_h["s3"]
+ sum_ = 1.0 * res_h["s4"] + res_h["s5"]
+ m2 = 1.0 * res_h["s6"] + res_h["s7"]
+ var = m2 / (count - 1.0)
+ res = StatResults(min_,
+ max_,
+ count,
+ sum_,
+ sum_ / count,
+ var,
+ sqrt(var))
+ return res
+
+ __call__ = process
diff --git a/src/silx/opencl/test/__init__.py b/src/silx/opencl/test/__init__.py
new file mode 100644
index 0000000..92cda4a
--- /dev/null
+++ b/src/silx/opencl/test/__init__.py
@@ -0,0 +1,23 @@
+# -*- coding: utf-8 -*-
+#
+# Project: silx
+# https://github.com/silx-kit/silx
+#
+# Copyright (C) 2012-2016 European Synchrotron Radiation Facility, Grenoble, France
+# Permission is hereby granted, free of charge, to any person obtaining a copy
+# of this software and associated documentation files (the "Software"), to deal
+# in the Software without restriction, including without limitation the rights
+# to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
+# copies of the Software, and to permit persons to whom the Software is
+# furnished to do so, subject to the following conditions:
+#
+# The above copyright notice and this permission notice shall be included in
+# all copies or substantial portions of the Software.
+#
+# THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+# IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+# FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
+# AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+# LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
+# OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN
+# THE SOFTWARE.
diff --git a/src/silx/opencl/test/test_addition.py b/src/silx/opencl/test/test_addition.py
new file mode 100644
index 0000000..3b668bf
--- /dev/null
+++ b/src/silx/opencl/test/test_addition.py
@@ -0,0 +1,140 @@
+#!/usr/bin/env python
+# -*- coding: utf-8 -*-
+#
+# Project: Sift implementation in Python + OpenCL
+# https://github.com/silx-kit/silx
+#
+# Permission is hereby granted, free of charge, to any person
+# obtaining a copy of this software and associated documentation
+# files (the "Software"), to deal in the Software without
+# restriction, including without limitation the rights to use,
+# copy, modify, merge, publish, distribute, sublicense, and/or sell
+# copies of the Software, and to permit persons to whom the
+# Software is furnished to do so, subject to the following
+# conditions:
+#
+# The above copyright notice and this permission notice shall be
+# included in all copies or substantial portions of the Software.
+#
+# THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
+# EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES
+# OF MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
+# NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT
+# HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY,
+# WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
+# FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR
+# OTHER DEALINGS IN THE SOFTWARE.
+
+"""
+Simple test of an addition
+"""
+
+__authors__ = ["Henri Payno, Jérôme Kieffer"]
+__contact__ = "jerome.kieffer@esrf.eu"
+__license__ = "MIT"
+__copyright__ = "2013 European Synchrotron Radiation Facility, Grenoble, France"
+__date__ = "30/11/2020"
+
+import logging
+import numpy
+import pytest
+
+import unittest
+from ..common import ocl, _measure_workgroup_size, query_kernel_info
+if ocl:
+ import pyopencl
+ import pyopencl.array
+from ..utils import get_opencl_code
+logger = logging.getLogger(__name__)
+
+
+@unittest.skipUnless(ocl, "PyOpenCl is missing")
+class TestAddition(unittest.TestCase):
+
+ @classmethod
+ def setUpClass(cls):
+ super(TestAddition, cls).setUpClass()
+ if ocl:
+ cls.ctx = ocl.create_context()
+ if logger.getEffectiveLevel() <= logging.INFO:
+ cls.PROFILE = True
+ cls.queue = pyopencl.CommandQueue(
+ cls.ctx,
+ properties=pyopencl.command_queue_properties.PROFILING_ENABLE)
+ else:
+ cls.PROFILE = False
+ cls.queue = pyopencl.CommandQueue(cls.ctx)
+ cls.max_valid_wg = 0
+
+ @classmethod
+ def tearDownClass(cls):
+ super(TestAddition, cls).tearDownClass()
+ print("Maximum valid workgroup size %s on device %s" % (cls.max_valid_wg, cls.ctx.devices[0]))
+ cls.ctx = None
+ cls.queue = None
+
+ def setUp(self):
+ if ocl is None:
+ return
+ self.shape = 4096
+ self.data = numpy.random.random(self.shape).astype(numpy.float32)
+ self.d_array_img = pyopencl.array.to_device(self.queue, self.data)
+ self.d_array_5 = pyopencl.array.empty_like(self.d_array_img)
+ self.d_array_5.fill(-5)
+ self.program = pyopencl.Program(self.ctx, get_opencl_code("addition")).build()
+
+ def tearDown(self):
+ self.img = self.data = None
+ self.d_array_img = self.d_array_5 = self.program = None
+
+ def test_add(self):
+ """
+ tests the addition kernel
+ """
+ maxi = int(round(numpy.log2(self.shape)))
+ for i in range(maxi):
+ d_array_result = pyopencl.array.empty_like(self.d_array_img)
+ wg = 1 << i
+ try:
+ evt = self.program.addition(self.queue, (self.shape,), (wg,),
+ self.d_array_img.data, self.d_array_5.data, d_array_result.data, numpy.int32(self.shape))
+ evt.wait()
+ except Exception as error:
+ max_valid_wg = self.program.addition.get_work_group_info(pyopencl.kernel_work_group_info.WORK_GROUP_SIZE, self.ctx.devices[0])
+ msg = "Error %s on WG=%s: %s" % (error, wg, max_valid_wg)
+ self.assertLess(max_valid_wg, wg, msg)
+ break
+ else:
+ res = d_array_result.get()
+ good = numpy.allclose(res, self.data - 5)
+ if good and wg > self.max_valid_wg:
+ self.__class__.max_valid_wg = wg
+ self.assertTrue(good, "calculation is correct for WG=%s" % wg)
+
+ def test_measurement(self):
+ """
+ tests that all devices are working properly ... lengthy and error prone
+ """
+ for platform in ocl.platforms:
+ for did, device in enumerate(platform.devices):
+ meas = _measure_workgroup_size((platform.id, device.id))
+ self.assertEqual(meas, device.max_work_group_size,
+ "Workgroup size for %s/%s: %s == %s" % (platform, device, meas, device.max_work_group_size))
+
+ def test_query(self):
+ """
+ tests that all devices are working properly ... lengthy and error prone
+ """
+ for what in ("COMPILE_WORK_GROUP_SIZE",
+ "LOCAL_MEM_SIZE",
+ "PREFERRED_WORK_GROUP_SIZE_MULTIPLE",
+ "PRIVATE_MEM_SIZE",
+ "WORK_GROUP_SIZE"):
+ logger.info("%s: %s", what, query_kernel_info(program=self.program, kernel="addition", what=what))
+
+ # Not all ICD work properly ....
+ #self.assertEqual(3, len(query_kernel_info(program=self.program, kernel="addition", what="COMPILE_WORK_GROUP_SIZE")), "3D kernel")
+
+ min_wg = query_kernel_info(program=self.program, kernel="addition", what="PREFERRED_WORK_GROUP_SIZE_MULTIPLE")
+ max_wg = query_kernel_info(program=self.program, kernel="addition", what="WORK_GROUP_SIZE")
+ self.assertEqual(max_wg % min_wg, 0, msg="max_wg is a multiple of min_wg")
diff --git a/src/silx/opencl/test/test_array_utils.py b/src/silx/opencl/test/test_array_utils.py
new file mode 100644
index 0000000..325a6c3
--- /dev/null
+++ b/src/silx/opencl/test/test_array_utils.py
@@ -0,0 +1,152 @@
+#!/usr/bin/env python
+# coding: utf-8
+# /*##########################################################################
+#
+# Copyright (c) 2016 European Synchrotron Radiation Facility
+#
+# Permission is hereby granted, free of charge, to any person obtaining a copy
+# of this software and associated documentation files (the "Software"), to deal
+# in the Software without restriction, including without limitation the rights
+# to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
+# copies of the Software, and to permit persons to whom the Software is
+# furnished to do so, subject to the following conditions:
+#
+# The above copyright notice and this permission notice shall be included in
+# all copies or substantial portions of the Software.
+#
+# THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+# IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+# FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
+# AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+# LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
+# OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN
+# THE SOFTWARE.
+#
+# ###########################################################################*/
+"""Test of the OpenCL array_utils"""
+
+from __future__ import division, print_function
+
+__authors__ = ["Pierre paleo"]
+__license__ = "MIT"
+__copyright__ = "2013-2017 European Synchrotron Radiation Facility, Grenoble, France"
+__date__ = "14/06/2017"
+
+
+import time
+import logging
+import numpy as np
+import unittest
+try:
+ import mako
+except ImportError:
+ mako = None
+from ..common import ocl
+if ocl:
+ import pyopencl as cl
+ import pyopencl.array as parray
+ from .. import linalg
+from ..utils import get_opencl_code
+from silx.test.utils import utilstest
+
+logger = logging.getLogger(__name__)
+try:
+ from scipy.ndimage.filters import laplace
+ _has_scipy = True
+except ImportError:
+ _has_scipy = False
+
+
+
+@unittest.skipUnless(ocl and mako, "PyOpenCl is missing")
+class TestCpy2d(unittest.TestCase):
+
+ def setUp(self):
+ if ocl is None:
+ return
+ self.ctx = ocl.create_context()
+ if logger.getEffectiveLevel() <= logging.INFO:
+ self.PROFILE = True
+ self.queue = cl.CommandQueue(
+ self.ctx,
+ properties=cl.command_queue_properties.PROFILING_ENABLE)
+ else:
+ self.PROFILE = False
+ self.queue = cl.CommandQueue(self.ctx)
+ self.allocate_arrays()
+ self.program = cl.Program(self.ctx, get_opencl_code("array_utils")).build()
+
+ def allocate_arrays(self):
+ """
+ Allocate various types of arrays for the tests
+ """
+ self.prng_state = np.random.get_state()
+ # Generate arrays of random shape
+ self.shape1 = np.random.randint(20, high=512, size=(2,))
+ self.shape2 = np.random.randint(20, high=512, size=(2,))
+ self.array1 = np.random.rand(*self.shape1).astype(np.float32)
+ self.array2 = np.random.rand(*self.shape2).astype(np.float32)
+ self.d_array1 = parray.to_device(self.queue, self.array1)
+ self.d_array2 = parray.to_device(self.queue, self.array2)
+ # Generate random offsets
+ offset1_y = np.random.randint(2, high=min(self.shape1[0], self.shape2[0]) - 10)
+ offset1_x = np.random.randint(2, high=min(self.shape1[1], self.shape2[1]) - 10)
+ offset2_y = np.random.randint(2, high=min(self.shape1[0], self.shape2[0]) - 10)
+ offset2_x = np.random.randint(2, high=min(self.shape1[1], self.shape2[1]) - 10)
+ self.offset1 = (offset1_y, offset1_x)
+ self.offset2 = (offset2_y, offset2_x)
+ # Compute the size of the rectangle to transfer
+ size_y = np.random.randint(2, high=min(self.shape1[0], self.shape2[0]) - max(offset1_y, offset2_y) + 1)
+ size_x = np.random.randint(2, high=min(self.shape1[1], self.shape2[1]) - max(offset1_x, offset2_x) + 1)
+ self.transfer_shape = (size_y, size_x)
+
+ def tearDown(self):
+ self.array1 = None
+ self.array2 = None
+ self.d_array1.data.release()
+ self.d_array2.data.release()
+ self.d_array1 = None
+ self.d_array2 = None
+ self.ctx = None
+ self.queue = None
+
+ def compare(self, result, reference):
+ errmax = np.max(np.abs(result - reference))
+ logger.info("Max error = %e" % (errmax))
+ self.assertTrue(errmax == 0, str("Max error is too high"))#. PRNG state was %s" % str(self.prng_state)))
+
+ @unittest.skipUnless(ocl and mako, "pyopencl is missing")
+ def test_cpy2d(self):
+ """
+ Test rectangular transfer of self.d_array1 to self.d_array2
+ """
+ # Reference
+ o1 = self.offset1
+ o2 = self.offset2
+ T = self.transfer_shape
+ logger.info("""Testing D->D rectangular copy with (N1_y, N1_x) = %s,
+ (N2_y, N2_x) = %s:
+ array2[%d:%d, %d:%d] = array1[%d:%d, %d:%d]""" %
+ (
+ str(self.shape1), str(self.shape2),
+ o2[0], o2[0] + T[0],
+ o2[1], o2[1] + T[1],
+ o1[0], o1[0] + T[0],
+ o1[1], o1[1] + T[1]
+ )
+ )
+ self.array2[o2[0]:o2[0] + T[0], o2[1]:o2[1] + T[1]] = self.array1[o1[0]:o1[0] + T[0], o1[1]:o1[1] + T[1]]
+ kernel_args = (
+ self.d_array2.data,
+ self.d_array1.data,
+ np.int32(self.shape2[1]),
+ np.int32(self.shape1[1]),
+ np.int32(self.offset2[::-1]),
+ np.int32(self.offset1[::-1]),
+ np.int32(self.transfer_shape[::-1])
+ )
+ wg = None
+ ndrange = self.transfer_shape[::-1]
+ self.program.cpy2d(self.queue, ndrange, wg, *kernel_args)
+ res = self.d_array2.get()
+ self.compare(res, self.array2)
diff --git a/src/silx/opencl/test/test_backprojection.py b/src/silx/opencl/test/test_backprojection.py
new file mode 100644
index 0000000..96d56fa
--- /dev/null
+++ b/src/silx/opencl/test/test_backprojection.py
@@ -0,0 +1,217 @@
+#!/usr/bin/env python
+# coding: utf-8
+# /*##########################################################################
+#
+# Copyright (c) 2016 European Synchrotron Radiation Facility
+#
+# Permission is hereby granted, free of charge, to any person obtaining a copy
+# of this software and associated documentation files (the "Software"), to deal
+# in the Software without restriction, including without limitation the rights
+# to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
+# copies of the Software, and to permit persons to whom the Software is
+# furnished to do so, subject to the following conditions:
+#
+# The above copyright notice and this permission notice shall be included in
+# all copies or substantial portions of the Software.
+#
+# THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+# IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+# FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
+# AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+# LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
+# OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN
+# THE SOFTWARE.
+#
+# ###########################################################################*/
+"""Test of the filtered backprojection module"""
+
+from __future__ import division, print_function
+
+__authors__ = ["Pierre paleo"]
+__license__ = "MIT"
+__copyright__ = "2013-2017 European Synchrotron Radiation Facility, Grenoble, France"
+__date__ = "19/01/2018"
+
+
+import time
+import logging
+import numpy as np
+import unittest
+from math import pi
+try:
+ import mako
+except ImportError:
+ mako = None
+from ..common import ocl
+if ocl:
+ from .. import backprojection
+ from ...image.tomography import compute_fourier_filter
+from silx.test.utils import utilstest
+
+logger = logging.getLogger(__name__)
+
+
+def generate_coords(img_shp, center=None):
+ """
+ Return two 2D arrays containing the indexes of an image.
+ The zero is at the center of the image.
+ """
+ l_r, l_c = float(img_shp[0]), float(img_shp[1])
+ R, C = np.mgrid[:l_r, :l_c]
+ if center is None:
+ center0, center1 = l_r / 2., l_c / 2.
+ else:
+ center0, center1 = center
+ R = R + 0.5 - center0
+ C = C + 0.5 - center1
+ return R, C
+
+
+def clip_circle(img, center=None, radius=None):
+ """
+ Puts zeros outside the inscribed circle of the image support.
+ """
+ R, C = generate_coords(img.shape, center)
+ M = R * R + C * C
+ res = np.zeros_like(img)
+ if radius is None:
+ radius = img.shape[0] / 2. - 1
+ mask = M < radius * radius
+ res[mask] = img[mask]
+ return res
+
+
+@unittest.skipUnless(ocl and mako, "PyOpenCl is missing")
+class TestFBP(unittest.TestCase):
+
+ def setUp(self):
+ if ocl is None:
+ return
+ self.getfiles()
+ self.fbp = backprojection.Backprojection(self.sino.shape, profile=True)
+ if self.fbp.compiletime_workgroup_size < 16 * 16:
+ self.skipTest("Current implementation of OpenCL backprojection is "
+ "not supported on this platform yet")
+ # Astra does not use the same backprojector implementation.
+ # Therefore, we cannot expect results to be the "same" (up to float32
+ # numerical error)
+ self.tol = 5e-2
+ if not(self.fbp._use_textures) or self.fbp.device.type == "CPU":
+ # Precision is less when using CPU
+ # (either CPU textures or "manual" linear interpolation)
+ self.tol *= 2
+
+ def tearDown(self):
+ self.sino = None
+ # self.fbp.log_profile()
+ self.fbp = None
+
+ def getfiles(self):
+ # load sinogram of 512x512 MRI phantom
+ self.sino = np.load(utilstest.getfile("sino500.npz"))["data"]
+ # load reconstruction made with ASTRA FBP (with filter designed in spatial domain)
+ self.reference_rec = np.load(utilstest.getfile("rec_astra_500.npz"))["data"]
+
+ def measure(self):
+ "Common measurement of timings"
+ t1 = time.time()
+ try:
+ result = self.fbp.filtered_backprojection(self.sino)
+ except RuntimeError as msg:
+ logger.error(msg)
+ return
+ t2 = time.time()
+ return t2 - t1, result
+
+ def compare(self, res):
+ """
+ Compare a result with the reference reconstruction.
+ Only the valid reconstruction zone (inscribed circle) is taken into
+ account
+ """
+ res_clipped = clip_circle(res)
+ ref_clipped = clip_circle(self.reference_rec)
+ delta = abs(res_clipped - ref_clipped)
+ bad = delta > 1
+ logger.debug("Absolute difference: %s with %s outlier pixels out of %s"
+ "", delta.max(), bad.sum(), np.prod(bad.shape))
+ return delta.max()
+
+ @unittest.skipUnless(ocl and mako, "pyopencl is missing")
+ def test_fbp(self):
+ """
+ tests FBP
+ """
+ # Test single reconstruction
+ # --------------------------
+ t, res = self.measure()
+ if t is None:
+ logger.info("test_fp: skipped")
+ else:
+ logger.info("test_backproj: time = %.3fs" % t)
+ err = self.compare(res)
+ msg = str("Max error = %e" % err)
+ logger.info(msg)
+ self.assertTrue(err < self.tol, "Max error is too high")
+
+ # Test multiple reconstructions
+ # -----------------------------
+ res0 = np.copy(res)
+ for i in range(10):
+ res = self.fbp.filtered_backprojection(self.sino)
+ errmax = np.max(np.abs(res - res0))
+ self.assertTrue(errmax < 1.e-6, "Max error is too high")
+
+ @unittest.skipUnless(ocl and mako, "pyopencl is missing")
+ def test_fbp_filters(self):
+ """
+ Test the different available filters of silx FBP.
+ """
+ avail_filters = [
+ "ramlak", "shepp-logan", "cosine", "hamming",
+ "hann"
+ ]
+ # Create a Dirac delta function at a single angle view.
+ # As the filters are radially invarant:
+ # - backprojection yields an image where each line is a Dirac.
+ # - FBP yields an image where each line is the spatial filter
+ # One can simply filter "dirac" without backprojecting it, but this
+ # test will also ensure that backprojection behaves well.
+ dirac = np.zeros_like(self.sino)
+ na, dw = dirac.shape
+ dirac[0, dw//2] = na / pi * 2
+
+ for filter_name in avail_filters:
+ B = backprojection.Backprojection(dirac.shape, filter_name=filter_name)
+ r = B(dirac)
+ # Check that radial invariance is kept
+ std0 = np.max(np.abs(np.std(r, axis=0)))
+ self.assertTrue(
+ std0 < 5.e-6,
+ "Something wrong with FBP(filter=%s)" % filter_name
+ )
+ # Check that the filter is retrieved
+ r_f = np.fft.fft(np.fft.fftshift(r[0])).real / 2. # filter factor
+ ref_filter_f = compute_fourier_filter(dw, filter_name)
+ errmax = np.max(np.abs(r_f - ref_filter_f))
+ logger.info("FBP filter %s: max error=%e" % (filter_name, errmax))
+ self.assertTrue(
+ errmax < 1.e-3,
+ "Something wrong with FBP(filter=%s)" % filter_name
+ )
+
+ @unittest.skipUnless(ocl and mako, "pyopencl is missing")
+ def test_fbp_oddsize(self):
+ # Generate a 513-sinogram.
+ # The padded width will be nextpow(513*2).
+ # silx [0.10, 0.10.1] will give 1029, which makes R2C transform fail.
+ sino = np.pad(self.sino, ((0, 0), (1, 0)), mode='edge')
+ B = backprojection.Backprojection(sino.shape, axis_position=self.fbp.axis_pos+1)
+ res = B(sino)
+ # Compare with self.reference_rec. Tolerance is high as backprojector
+ # is not fully shift-invariant.
+ errmax = np.max(np.abs(clip_circle(res[1:, 1:] - self.reference_rec)))
+ self.assertLess(
+ errmax, 1.e-1,
+ "Something wrong with FBP on odd-sized sinogram"
+ )
diff --git a/src/silx/opencl/test/test_convolution.py b/src/silx/opencl/test/test_convolution.py
new file mode 100644
index 0000000..6a2759d
--- /dev/null
+++ b/src/silx/opencl/test/test_convolution.py
@@ -0,0 +1,280 @@
+#!/usr/bin/env python
+# coding: utf-8
+# /*##########################################################################
+#
+# Copyright (c) 2019 European Synchrotron Radiation Facility
+#
+# Permission is hereby granted, free of charge, to any person obtaining a copy
+# of this software and associated documentation files (the "Software"), to deal
+# in the Software without restriction, including without limitation the rights
+# to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
+# copies of the Software, and to permit persons to whom the Software is
+# furnished to do so, subject to the following conditions:
+#
+# The above copyright notice and this permission notice shall be included in
+# all copies or substantial portions of the Software.
+#
+# THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+# IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+# FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
+# AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+# LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
+# OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN
+# THE SOFTWARE.
+#
+# ###########################################################################*/
+
+"""
+Test of the Convolution class.
+"""
+
+from __future__ import division, print_function
+
+__authors__ = ["Pierre Paleo"]
+__contact__ = "pierre.paleo@esrf.fr"
+__license__ = "MIT"
+__copyright__ = "2019 European Synchrotron Radiation Facility, Grenoble, France"
+__date__ = "01/08/2019"
+
+import pytest
+import logging
+from itertools import product
+import numpy as np
+from silx.image.utils import gaussian_kernel
+
+try:
+ from scipy.ndimage import convolve, convolve1d
+ from scipy.misc import ascent
+
+ scipy_convolve = convolve
+ scipy_convolve1d = convolve1d
+except ImportError:
+ scipy_convolve = None
+import unittest
+from ..common import ocl, check_textures_availability
+
+if ocl:
+ import pyopencl as cl
+ import pyopencl.array as parray
+ from silx.opencl.convolution import Convolution
+logger = logging.getLogger(__name__)
+
+
+class ConvolutionData:
+
+ def __init__(self, param):
+ self.param = param
+ self.mode = param["boundary_handling"]
+ logger.debug(
+ """
+ Testing convolution with boundary_handling=%s,
+ use_textures=%s, input_device=%s, output_device=%s
+ """
+ % (
+ self.mode,
+ param["use_textures"],
+ param["input_on_device"],
+ param["output_on_device"],
+ )
+ )
+
+ @classmethod
+ def setUpClass(cls):
+ cls.image = np.ascontiguousarray(ascent()[:, :511], dtype="f")
+ cls.data1d = cls.image[0]
+ cls.data2d = cls.image
+ cls.data3d = np.tile(cls.image[224:-224, 224:-224], (62, 1, 1))
+ cls.kernel1d = gaussian_kernel(1.0)
+ cls.kernel2d = np.outer(cls.kernel1d, cls.kernel1d)
+ cls.kernel3d = np.multiply.outer(cls.kernel2d, cls.kernel1d)
+ cls.ctx = ocl.create_context()
+ cls.tol = {
+ "1D": 1e-4,
+ "2D": 1e-3,
+ "3D": 1e-3,
+ }
+
+ @classmethod
+ def tearDownClass(cls):
+ cls.data1d = cls.data2d = cls.data3d = cls.image = None
+ cls.kernel1d = cls.kernel2d = cls.kernel3d = None
+
+ @staticmethod
+ def compare(arr1, arr2):
+ return np.max(np.abs(arr1 - arr2))
+
+ @staticmethod
+ def print_err(conv):
+ errmsg = str(
+ """
+ Something wrong with %s
+ mode=%s, texture=%s
+ """
+ % (conv.use_case_desc, conv.mode, conv.use_textures)
+ )
+ return errmsg
+
+ def instantiate_convol(self, shape, kernel, axes=None):
+ if self.mode == "constant":
+ if not (self.param["use_textures"]) or (
+ self.param["use_textures"]
+ and not (check_textures_availability(self.ctx))
+ ):
+ pytest.skip("mode=constant not implemented without textures")
+ C = Convolution(
+ shape,
+ kernel,
+ mode=self.mode,
+ ctx=self.ctx,
+ axes=axes,
+ extra_options={"dont_use_textures": not (self.param["use_textures"])},
+ )
+ return C
+
+ def get_data_and_kernel(self, test_name):
+ dims = {
+ "test_1D": (1, 1),
+ "test_separable_2D": (2, 1),
+ "test_separable_3D": (3, 1),
+ "test_nonseparable_2D": (2, 2),
+ "test_nonseparable_3D": (3, 3),
+ }
+ dim_data = {1: self.data1d, 2: self.data2d, 3: self.data3d}
+ dim_kernel = {
+ 1: self.kernel1d,
+ 2: self.kernel2d,
+ 3: self.kernel3d,
+ }
+ dd, kd = dims[test_name]
+ return dim_data[dd], dim_kernel[kd]
+
+ def get_reference_function(self, test_name):
+ ref_func = {
+ "test_1D": lambda x, y: scipy_convolve1d(x, y, mode=self.mode),
+ "test_separable_2D": lambda x, y: scipy_convolve1d(
+ scipy_convolve1d(x, y, mode=self.mode, axis=1),
+ y,
+ mode=self.mode,
+ axis=0,
+ ),
+ "test_separable_3D": lambda x, y: scipy_convolve1d(
+ scipy_convolve1d(
+ scipy_convolve1d(x, y, mode=self.mode, axis=2),
+ y,
+ mode=self.mode,
+ axis=1,
+ ),
+ y,
+ mode=self.mode,
+ axis=0,
+ ),
+ "test_nonseparable_2D": lambda x, y: scipy_convolve(x, y, mode=self.mode),
+ "test_nonseparable_3D": lambda x, y: scipy_convolve(x, y, mode=self.mode),
+ }
+ return ref_func[test_name]
+
+ def template_test(self, test_name):
+ data, kernel = self.get_data_and_kernel(test_name)
+ conv = self.instantiate_convol(data.shape, kernel)
+ if self.param["input_on_device"]:
+ data_ref = parray.to_device(conv.queue, data)
+ else:
+ data_ref = data
+ if self.param["output_on_device"]:
+ d_res = parray.empty_like(conv.data_out)
+ d_res.fill(0)
+ res = d_res
+ else:
+ res = None
+ res = conv(data_ref, output=res)
+ if self.param["output_on_device"]:
+ res = res.get()
+ ref_func = self.get_reference_function(test_name)
+ ref = ref_func(data, kernel)
+ metric = self.compare(res, ref)
+ logger.info("%s: max error = %.2e" % (test_name, metric))
+ tol = self.tol[str("%dD" % kernel.ndim)]
+ assert metric < tol, self.print_err(conv)
+
+
+def convolution_data_params():
+ boundary_handlings = ["reflect", "nearest", "wrap", "constant"]
+ use_textures = [True, False]
+ input_on_devices = [True, False]
+ output_on_devices = [True, False]
+ param_vals = list(
+ product(boundary_handlings, use_textures, input_on_devices, output_on_devices)
+ )
+ params = []
+ for boundary_handling, use_texture, input_dev, output_dev in param_vals:
+ param={
+ "boundary_handling": boundary_handling,
+ "input_on_device": input_dev,
+ "output_on_device": output_dev,
+ "use_textures": use_texture,
+ }
+ params.append(param)
+
+ return params
+
+
+@pytest.fixture(scope="module", params=convolution_data_params())
+def convolution_data(request):
+ """Provide a set of convolution data
+
+ The module scope allows to test each function during a single setup of each
+ convolution data
+ """
+ cdata = None
+ try:
+ cdata = ConvolutionData(request.param)
+ cdata.setUpClass()
+ yield cdata
+ finally:
+ cdata.tearDownClass()
+
+
+@pytest.mark.skipif(ocl is None, reason="OpenCL is missing")
+@pytest.mark.skipif(scipy_convolve is None, reason="scipy is missing")
+def test_1D(convolution_data):
+ convolution_data.template_test("test_1D")
+
+@pytest.mark.skipif(ocl is None, reason="OpenCL is missing")
+@pytest.mark.skipif(scipy_convolve is None, reason="scipy is missing")
+def test_separable_2D(convolution_data):
+ convolution_data.template_test("test_separable_2D")
+
+@pytest.mark.skipif(ocl is None, reason="OpenCL is missing")
+@pytest.mark.skipif(scipy_convolve is None, reason="scipy is missing")
+def test_separable_3D(convolution_data):
+ convolution_data.template_test("test_separable_3D")
+
+@pytest.mark.skipif(ocl is None, reason="OpenCL is missing")
+@pytest.mark.skipif(scipy_convolve is None, reason="scipy is missing")
+def test_nonseparable_2D(convolution_data):
+ convolution_data.template_test("test_nonseparable_2D")
+
+@pytest.mark.skipif(ocl is None, reason="OpenCL is missing")
+@pytest.mark.skipif(scipy_convolve is None, reason="scipy is missing")
+def test_nonseparable_3D(convolution_data):
+ convolution_data.template_test("test_nonseparable_3D")
+
+@pytest.mark.skipif(ocl is None, reason="OpenCL is missing")
+@pytest.mark.skipif(scipy_convolve is None, reason="scipy is missing")
+def test_batched_2D(convolution_data):
+ """
+ Test batched (nonseparable) 2D convolution on 3D data.
+ In this test: batch along "z" (axis 0)
+ """
+ data = convolution_data.data3d
+ kernel = convolution_data.kernel2d
+ conv = convolution_data.instantiate_convol(data.shape, kernel, axes=(0,))
+ res = conv(data) # 3D
+ ref = scipy_convolve(data[0], kernel, mode=convolution_data.mode) # 2D
+
+ std = np.std(res, axis=0)
+ std_max = np.max(np.abs(std))
+ assert std_max < convolution_data.tol["2D"], convolution_data.print_err(conv)
+ metric = convolution_data.compare(res[0], ref)
+ logger.info("test_nonseparable_3D: max error = %.2e" % metric)
+ assert metric < convolution_data.tol["2D"], convolution_data.print_err(conv)
diff --git a/src/silx/opencl/test/test_doubleword.py b/src/silx/opencl/test/test_doubleword.py
new file mode 100644
index 0000000..a33cf5a
--- /dev/null
+++ b/src/silx/opencl/test/test_doubleword.py
@@ -0,0 +1,244 @@
+#!/usr/bin/env python
+# coding: utf-8
+#
+# Project: The silx project
+# https://github.com/silx-kit/silx
+#
+# Copyright (C) 2021-2021 European Synchrotron Radiation Facility, Grenoble, France
+#
+# Principal author: Jérôme Kieffer (Jerome.Kieffer@ESRF.eu)
+#
+# Permission is hereby granted, free of charge, to any person obtaining a copy
+# of this software and associated documentation files (the "Software"), to deal
+# in the Software without restriction, including without limitation the rights
+# to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
+# copies of the Software, and to permit persons to whom the Software is
+# furnished to do so, subject to the following conditions:
+#
+# The above copyright notice and this permission notice shall be included in
+# all copies or substantial portions of the Software.
+#
+# THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+# IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+# FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
+# AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+# LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
+# OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN
+# THE SOFTWARE.
+
+"test suite for OpenCL code"
+
+__author__ = "Jérôme Kieffer"
+__contact__ = "Jerome.Kieffer@ESRF.eu"
+__license__ = "MIT"
+__copyright__ = "European Synchrotron Radiation Facility, Grenoble, France"
+__date__ = "31/05/2021"
+
+import unittest
+import numpy
+import logging
+import platform
+
+logger = logging.getLogger(__name__)
+try:
+ import pyopencl
+except ImportError as error:
+ logger.warning("OpenCL module (pyopencl) is not present, skip tests. %s.", error)
+ pyopencl = None
+
+from .. import ocl
+if ocl is not None:
+ from ..utils import read_cl_file
+ from .. import pyopencl
+ import pyopencl.array
+ from pyopencl.elementwise import ElementwiseKernel
+
+EPS32 = numpy.finfo("float32").eps
+EPS64 = numpy.finfo("float64").eps
+
+
+@unittest.skipUnless(ocl, "PyOpenCl is missing")
+class TestDoubleWord(unittest.TestCase):
+ """
+ Test the kernels for compensated math in OpenCL
+ """
+
+ @classmethod
+ def setUpClass(cls):
+ if pyopencl is None or ocl is None:
+ raise unittest.SkipTest("OpenCL module (pyopencl) is not present or no device available")
+
+ cls.ctx = ocl.create_context(devicetype="GPU")
+ cls.queue = pyopencl.CommandQueue(cls.ctx, properties=pyopencl.command_queue_properties.PROFILING_ENABLE)
+
+ # this is running 32 bits OpenCL woth POCL
+ if (platform.machine() in ("i386", "i686", "x86_64") and (tuple.__itemsize__ == 4) and
+ cls.ctx.devices[0].platform.name == 'Portable Computing Language'):
+ cls.args = "-DX87_VOLATILE=volatile"
+ else:
+ cls.args = ""
+ size = 1024
+ cls.a = 1.0 + numpy.random.random(size)
+ cls.b = 1.0 + numpy.random.random(size)
+ cls.ah = cls.a.astype(numpy.float32)
+ cls.bh = cls.b.astype(numpy.float32)
+ cls.al = (cls.a - cls.ah).astype(numpy.float32)
+ cls.bl = (cls.b - cls.bh).astype(numpy.float32)
+ cls.doubleword = read_cl_file("doubleword.cl")
+
+ @classmethod
+ def tearDownClass(cls):
+ cls.queue = None
+ cls.ctx = None
+ cls.a = cls.al = cls.ah = None
+ cls.b = cls.bl = cls.bh = None
+ cls.doubleword = None
+
+ def test_fast_sum2(self):
+ test_kernel = ElementwiseKernel(self.ctx,
+ "float *a, float *b, float *res_h, float *res_l",
+ "float2 tmp = fast_fp_plus_fp(a[i], b[i]); res_h[i] = tmp.s0; res_l[i] = tmp.s1",
+ preamble=self.doubleword)
+ a_g = pyopencl.array.to_device(self.queue, self.ah)
+ b_g = pyopencl.array.to_device(self.queue, self.bl)
+ res_l = pyopencl.array.empty_like(a_g)
+ res_h = pyopencl.array.empty_like(a_g)
+ test_kernel(a_g, b_g, res_h, res_l)
+ self.assertEqual(abs(self.ah + self.bl - res_h.get()).max(), 0, "Major matches")
+ self.assertGreater(abs(self.ah.astype(numpy.float64) + self.bl - res_h.get()).max(), 0, "Exact mismatches")
+ self.assertEqual(abs(self.ah.astype(numpy.float64) + self.bl - (res_h.get().astype(numpy.float64) + res_l.get())).max(), 0, "Exact matches")
+
+ def test_sum2(self):
+ test_kernel = ElementwiseKernel(self.ctx,
+ "float *a, float *b, float *res_h, float *res_l",
+ "float2 tmp = fp_plus_fp(a[i],b[i]); res_h[i]=tmp.s0; res_l[i]=tmp.s1;",
+ preamble=self.doubleword)
+ a_g = pyopencl.array.to_device(self.queue, self.ah)
+ b_g = pyopencl.array.to_device(self.queue, self.bh)
+ res_l = pyopencl.array.empty_like(a_g)
+ res_h = pyopencl.array.empty_like(a_g)
+ test_kernel(a_g, b_g, res_h, res_l)
+ self.assertEqual(abs(self.ah + self.bh - res_h.get()).max(), 0, "Major matches")
+ self.assertGreater(abs(self.ah.astype(numpy.float64) + self.bh - res_h.get()).max(), 0, "Exact mismatches")
+ self.assertEqual(abs(self.ah.astype(numpy.float64) + self.bh - (res_h.get().astype(numpy.float64) + res_l.get())).max(), 0, "Exact matches")
+
+ def test_prod2(self):
+ test_kernel = ElementwiseKernel(self.ctx,
+ "float *a, float *b, float *res_h, float *res_l",
+ "float2 tmp = fp_times_fp(a[i],b[i]); res_h[i]=tmp.s0; res_l[i]=tmp.s1;",
+ preamble=self.doubleword)
+ a_g = pyopencl.array.to_device(self.queue, self.ah)
+ b_g = pyopencl.array.to_device(self.queue, self.bh)
+ res_l = pyopencl.array.empty_like(a_g)
+ res_h = pyopencl.array.empty_like(a_g)
+ test_kernel(a_g, b_g, res_h, res_l)
+ res_m = res_h.get()
+ res = res_h.get().astype(numpy.float64) + res_l.get()
+ self.assertEqual(abs(self.ah * self.bh - res_m).max(), 0, "Major matches")
+ self.assertGreater(abs(self.ah.astype(numpy.float64) * self.bh - res_m).max(), 0, "Exact mismatches")
+ self.assertEqual(abs(self.ah.astype(numpy.float64) * self.bh - res).max(), 0, "Exact matches")
+
+ def test_dw_plus_fp(self):
+ test_kernel = ElementwiseKernel(self.ctx,
+ "float *ah, float *al, float *b, float *res_h, float *res_l",
+ "float2 tmp = dw_plus_fp((float2)(ah[i], al[i]),b[i]); res_h[i]=tmp.s0; res_l[i]=tmp.s1;",
+ preamble=self.doubleword)
+ ah_g = pyopencl.array.to_device(self.queue, self.ah)
+ al_g = pyopencl.array.to_device(self.queue, self.al)
+ b_g = pyopencl.array.to_device(self.queue, self.bh)
+ res_l = pyopencl.array.empty_like(b_g)
+ res_h = pyopencl.array.empty_like(b_g)
+ test_kernel(ah_g, al_g, b_g, res_h, res_l)
+ res_m = res_h.get()
+ res = res_h.get().astype(numpy.float64) + res_l.get()
+ self.assertLess(abs(self.a + self.bh - res_m).max(), EPS32, "Major matches")
+ self.assertGreater(abs(self.a + self.bh - res_m).max(), EPS64, "Exact mismatches")
+ self.assertLess(abs(self.ah.astype(numpy.float64) + self.al + self.bh - res).max(), 2 * EPS32 ** 2, "Exact matches")
+
+ def test_dw_plus_dw(self):
+ test_kernel = ElementwiseKernel(self.ctx,
+ "float *ah, float *al, float *bh, float *bl, float *res_h, float *res_l",
+ "float2 tmp = dw_plus_dw((float2)(ah[i], al[i]),(float2)(bh[i], bl[i])); res_h[i]=tmp.s0; res_l[i]=tmp.s1;",
+ preamble=self.doubleword)
+ ah_g = pyopencl.array.to_device(self.queue, self.ah)
+ al_g = pyopencl.array.to_device(self.queue, self.al)
+ bh_g = pyopencl.array.to_device(self.queue, self.bh)
+ bl_g = pyopencl.array.to_device(self.queue, self.bl)
+ res_l = pyopencl.array.empty_like(bh_g)
+ res_h = pyopencl.array.empty_like(bh_g)
+ test_kernel(ah_g, al_g, bh_g, bl_g, res_h, res_l)
+ res_m = res_h.get()
+ res = res_h.get().astype(numpy.float64) + res_l.get()
+ self.assertLess(abs(self.a + self.b - res_m).max(), EPS32, "Major matches")
+ self.assertGreater(abs(self.a + self.b - res_m).max(), EPS64, "Exact mismatches")
+ self.assertLess(abs(self.a + self.b - res).max(), 3 * EPS32 ** 2, "Exact matches")
+
+ def test_dw_times_fp(self):
+ test_kernel = ElementwiseKernel(self.ctx,
+ "float *ah, float *al, float *b, float *res_h, float *res_l",
+ "float2 tmp = dw_times_fp((float2)(ah[i], al[i]),b[i]); res_h[i]=tmp.s0; res_l[i]=tmp.s1;",
+ preamble=self.doubleword)
+ ah_g = pyopencl.array.to_device(self.queue, self.ah)
+ al_g = pyopencl.array.to_device(self.queue, self.al)
+ b_g = pyopencl.array.to_device(self.queue, self.bh)
+ res_l = pyopencl.array.empty_like(b_g)
+ res_h = pyopencl.array.empty_like(b_g)
+ test_kernel(ah_g, al_g, b_g, res_h, res_l)
+ res_m = res_h.get()
+ res = res_h.get().astype(numpy.float64) + res_l.get()
+ self.assertLess(abs(self.a * self.bh - res_m).max(), EPS32, "Major matches")
+ self.assertGreater(abs(self.a * self.bh - res_m).max(), EPS64, "Exact mismatches")
+ self.assertLess(abs(self.a * self.bh - res).max(), 2 * EPS32 ** 2, "Exact matches")
+
+ def test_dw_times_dw(self):
+ test_kernel = ElementwiseKernel(self.ctx,
+ "float *ah, float *al, float *bh, float *bl, float *res_h, float *res_l",
+ "float2 tmp = dw_times_dw((float2)(ah[i], al[i]),(float2)(bh[i], bl[i])); res_h[i]=tmp.s0; res_l[i]=tmp.s1;",
+ preamble=self.doubleword)
+ ah_g = pyopencl.array.to_device(self.queue, self.ah)
+ al_g = pyopencl.array.to_device(self.queue, self.al)
+ bh_g = pyopencl.array.to_device(self.queue, self.bh)
+ bl_g = pyopencl.array.to_device(self.queue, self.bl)
+ res_l = pyopencl.array.empty_like(bh_g)
+ res_h = pyopencl.array.empty_like(bh_g)
+ test_kernel(ah_g, al_g, bh_g, bl_g, res_h, res_l)
+ res_m = res_h.get()
+ res = res_h.get().astype(numpy.float64) + res_l.get()
+ self.assertLess(abs(self.a * self.b - res_m).max(), EPS32, "Major matches")
+ self.assertGreater(abs(self.a * self.b - res_m).max(), EPS64, "Exact mismatches")
+ self.assertLess(abs(self.a * self.b - res).max(), 5 * EPS32 ** 2, "Exact matches")
+
+ def test_dw_div_fp(self):
+ test_kernel = ElementwiseKernel(self.ctx,
+ "float *ah, float *al, float *b, float *res_h, float *res_l",
+ "float2 tmp = dw_div_fp((float2)(ah[i], al[i]),b[i]); res_h[i]=tmp.s0; res_l[i]=tmp.s1;",
+ preamble=self.doubleword)
+ ah_g = pyopencl.array.to_device(self.queue, self.ah)
+ al_g = pyopencl.array.to_device(self.queue, self.al)
+ b_g = pyopencl.array.to_device(self.queue, self.bh)
+ res_l = pyopencl.array.empty_like(b_g)
+ res_h = pyopencl.array.empty_like(b_g)
+ test_kernel(ah_g, al_g, b_g, res_h, res_l)
+ res_m = res_h.get()
+ res = res_h.get().astype(numpy.float64) + res_l.get()
+ self.assertLess(abs(self.a / self.bh - res_m).max(), EPS32, "Major matches")
+ self.assertGreater(abs(self.a / self.bh - res_m).max(), EPS64, "Exact mismatches")
+ self.assertLess(abs(self.a / self.bh - res).max(), 3 * EPS32 ** 2, "Exact matches")
+
+ def test_dw_div_dw(self):
+ test_kernel = ElementwiseKernel(self.ctx,
+ "float *ah, float *al, float *bh, float *bl, float *res_h, float *res_l",
+ "float2 tmp = dw_div_dw((float2)(ah[i], al[i]),(float2)(bh[i], bl[i])); res_h[i]=tmp.s0; res_l[i]=tmp.s1;",
+ preamble=self.doubleword)
+ ah_g = pyopencl.array.to_device(self.queue, self.ah)
+ al_g = pyopencl.array.to_device(self.queue, self.al)
+ bh_g = pyopencl.array.to_device(self.queue, self.bh)
+ bl_g = pyopencl.array.to_device(self.queue, self.bl)
+ res_l = pyopencl.array.empty_like(bh_g)
+ res_h = pyopencl.array.empty_like(bh_g)
+ test_kernel(ah_g, al_g, bh_g, bl_g, res_h, res_l)
+ res_m = res_h.get()
+ res = res_h.get().astype(numpy.float64) + res_l.get()
+ self.assertLess(abs(self.a / self.b - res_m).max(), EPS32, "Major matches")
+ self.assertGreater(abs(self.a / self.b - res_m).max(), EPS64, "Exact mismatches")
+ self.assertLess(abs(self.a / self.b - res).max(), 6 * EPS32 ** 2, "Exact matches")
diff --git a/src/silx/opencl/test/test_image.py b/src/silx/opencl/test/test_image.py
new file mode 100644
index 0000000..73c771b
--- /dev/null
+++ b/src/silx/opencl/test/test_image.py
@@ -0,0 +1,125 @@
+#!/usr/bin/env python
+# -*- coding: utf-8 -*-
+#
+# Project: image manipulation in OpenCL
+# https://github.com/silx-kit/silx
+#
+# Permission is hereby granted, free of charge, to any person
+# obtaining a copy of this software and associated documentation
+# files (the "Software"), to deal in the Software without
+# restriction, including without limitation the rights to use,
+# copy, modify, merge, publish, distribute, sublicense, and/or sell
+# copies of the Software, and to permit persons to whom the
+# Software is furnished to do so, subject to the following
+# conditions:
+#
+# The above copyright notice and this permission notice shall be
+# included in all copies or substantial portions of the Software.
+#
+# THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
+# EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES
+# OF MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
+# NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT
+# HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY,
+# WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
+# FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR
+# OTHER DEALINGS IN THE SOFTWARE.
+
+"""
+Simple test of image manipulation
+"""
+
+from __future__ import division, print_function
+
+__authors__ = ["Jérôme Kieffer"]
+__contact__ = "jerome.kieffer@esrf.eu"
+__license__ = "MIT"
+__copyright__ = "2017 European Synchrotron Radiation Facility, Grenoble, France"
+__date__ = "13/02/2018"
+
+import logging
+import numpy
+
+import unittest
+from ..common import ocl, _measure_workgroup_size
+if ocl:
+ import pyopencl
+ import pyopencl.array
+from ...test.utils import utilstest
+from ..image import ImageProcessing
+logger = logging.getLogger(__name__)
+try:
+ from PIL import Image
+except ImportError:
+ Image = None
+
+
+@unittest.skipUnless(ocl and Image, "PyOpenCl/Image is missing")
+class TestImage(unittest.TestCase):
+
+ @classmethod
+ def setUpClass(cls):
+ super(TestImage, cls).setUpClass()
+ if ocl:
+ cls.ctx = ocl.create_context()
+ cls.lena = utilstest.getfile("lena.png")
+ cls.data = numpy.asarray(Image.open(cls.lena))
+ cls.ip = ImageProcessing(ctx=cls.ctx, template=cls.data, profile=True)
+
+ @classmethod
+ def tearDownClass(cls):
+ super(TestImage, cls).tearDownClass()
+ cls.ctx = None
+ cls.lena = None
+ cls.data = None
+ if logger.level <= logging.INFO:
+ logger.warning("\n".join(cls.ip.log_profile()))
+ cls.ip = None
+
+ def setUp(self):
+ if ocl is None:
+ return
+ self.data = numpy.asarray(Image.open(self.lena))
+
+ def tearDown(self):
+ self.img = self.data = None
+
+ @unittest.skipUnless(ocl, "pyopencl is missing")
+ def test_cast(self):
+ """
+ tests the cast kernel
+ """
+ res = self.ip.to_float(self.data)
+ self.assertEqual(res.shape, self.data.shape, "shape")
+ self.assertEqual(res.dtype, numpy.float32, "dtype")
+ self.assertEqual(abs(res - self.data).max(), 0, "content")
+
+ @unittest.skipUnless(ocl, "pyopencl is missing")
+ def test_normalize(self):
+ """
+ tests that all devices are working properly ...
+ """
+ tmp = pyopencl.array.empty(self.ip.ctx, self.data.shape, "float32")
+ res = self.ip.to_float(self.data, out=tmp)
+ res2 = self.ip.normalize(tmp, -100, 100, copy=False)
+ norm = (self.data.astype(numpy.float32) - self.data.min()) / (self.data.max() - self.data.min())
+ ref2 = 200 * norm - 100
+ self.assertLess(abs(res2 - ref2).max(), 3e-5, "content")
+
+ @unittest.skipUnless(ocl, "pyopencl is missing")
+ def test_histogram(self):
+ """
+ Test on a greyscaled image ... of Lena :)
+ """
+ lena_bw = (0.2126 * self.data[:, :, 0] +
+ 0.7152 * self.data[:, :, 1] +
+ 0.0722 * self.data[:, :, 2]).astype("int32")
+ ref = numpy.histogram(lena_bw, 255)
+ ip = ImageProcessing(ctx=self.ctx, template=lena_bw, profile=True)
+ res = ip.histogram(lena_bw, 255)
+ ip.log_profile()
+ delta = (ref[0] - res[0])
+ deltap = (ref[1] - res[1])
+ self.assertEqual(delta.sum(), 0, "errors are self-compensated")
+ self.assertLessEqual(abs(delta).max(), 1, "errors are small")
+ self.assertLessEqual(abs(deltap).max(), 3e-5, "errors on position are small: %s" % (abs(deltap).max()))
diff --git a/src/silx/opencl/test/test_kahan.py b/src/silx/opencl/test/test_kahan.py
new file mode 100644
index 0000000..9e4a1e3
--- /dev/null
+++ b/src/silx/opencl/test/test_kahan.py
@@ -0,0 +1,254 @@
+#!/usr/bin/env python
+# coding: utf-8
+#
+# Project: OpenCL numerical library
+# https://github.com/silx-kit/silx
+#
+# Copyright (C) 2015-2021 European Synchrotron Radiation Facility, Grenoble, France
+#
+# Principal author: Jérôme Kieffer (Jerome.Kieffer@ESRF.eu)
+#
+# Permission is hereby granted, free of charge, to any person obtaining a copy
+# of this software and associated documentation files (the "Software"), to deal
+# in the Software without restriction, including without limitation the rights
+# to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
+# copies of the Software, and to permit persons to whom the Software is
+# furnished to do so, subject to the following conditions:
+#
+# The above copyright notice and this permission notice shall be included in
+# all copies or substantial portions of the Software.
+#
+# THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+# IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+# FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
+# AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+# LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
+# OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN
+# THE SOFTWARE.
+
+"test suite for OpenCL code"
+
+__author__ = "Jérôme Kieffer"
+__contact__ = "Jerome.Kieffer@ESRF.eu"
+__license__ = "MIT"
+__copyright__ = "European Synchrotron Radiation Facility, Grenoble, France"
+__date__ = "17/05/2021"
+
+
+import unittest
+import numpy
+import logging
+import platform
+
+logger = logging.getLogger(__name__)
+try:
+ import pyopencl
+except ImportError as error:
+ logger.warning("OpenCL module (pyopencl) is not present, skip tests. %s.", error)
+ pyopencl = None
+
+from .. import ocl
+if ocl is not None:
+ from ..utils import read_cl_file
+ from .. import pyopencl
+ import pyopencl.array
+
+
+class TestKahan(unittest.TestCase):
+ """
+ Test the kernels for compensated math in OpenCL
+ """
+
+ @classmethod
+ def setUpClass(cls):
+ if pyopencl is None or ocl is None:
+ raise unittest.SkipTest("OpenCL module (pyopencl) is not present or no device available")
+
+ cls.ctx = ocl.create_context(devicetype="GPU")
+ cls.queue = pyopencl.CommandQueue(cls.ctx, properties=pyopencl.command_queue_properties.PROFILING_ENABLE)
+
+ # this is running 32 bits OpenCL woth POCL
+ if (platform.machine() in ("i386", "i686", "x86_64") and (tuple.__itemsize__ == 4) and
+ cls.ctx.devices[0].platform.name == 'Portable Computing Language'):
+ cls.args = "-DX87_VOLATILE=volatile"
+ else:
+ cls.args = ""
+
+ @classmethod
+ def tearDownClass(cls):
+ cls.queue = None
+ cls.ctx = None
+
+ @staticmethod
+ def dummy_sum(ary, dtype=None):
+ "perform the actual sum in a dummy way "
+ if dtype is None:
+ dtype = ary.dtype.type
+ sum_ = dtype(0)
+ for i in ary:
+ sum_ += i
+ return sum_
+
+ def test_kahan(self):
+ # simple test
+ N = 26
+ data = (1 << (N - 1 - numpy.arange(N))).astype(numpy.float32)
+
+ ref64 = numpy.sum(data, dtype=numpy.float64)
+ ref32 = self.dummy_sum(data)
+ if (ref64 == ref32):
+ logger.warning("Kahan: invalid tests as float32 provides the same result as float64")
+ # Dummy kernel to evaluate
+ src = """
+ kernel void summation(global float* data,
+ int size,
+ global float* result)
+ {
+ float2 acc = (float2)(0.0f, 0.0f);
+ for (int i=0; i<size; i++)
+ {
+ acc = kahan_sum(acc, data[i]);
+ }
+ result[0] = acc.s0;
+ result[1] = acc.s1;
+ }
+ """
+ prg = pyopencl.Program(self.ctx, read_cl_file("kahan.cl") + src).build(self.args)
+ ones_d = pyopencl.array.to_device(self.queue, data)
+ res_d = pyopencl.array.empty(self.queue, 2, numpy.float32)
+ res_d.fill(0)
+ evt = prg.summation(self.queue, (1,), (1,), ones_d.data, numpy.int32(N), res_d.data)
+ evt.wait()
+ res = res_d.get().sum(dtype=numpy.float64)
+ self.assertEqual(ref64, res, "test_kahan")
+
+ def test_dot16(self):
+ # simple test
+ N = 16
+ data = (1 << (N - 1 - numpy.arange(N))).astype(numpy.float32)
+
+ ref64 = numpy.dot(data.astype(numpy.float64), data.astype(numpy.float64))
+ ref32 = numpy.dot(data, data)
+ if (ref64 == ref32):
+ logger.warning("dot16: invalid tests as float32 provides the same result as float64")
+ # Dummy kernel to evaluate
+ src = """
+ kernel void test_dot16(global float* data,
+ int size,
+ global float* result)
+ {
+ float2 acc = (float2)(0.0f, 0.0f);
+ float16 data16 = (float16) (data[0],data[1],data[2],data[3],data[4],
+ data[5],data[6],data[7],data[8],data[9],
+ data[10],data[11],data[12],data[13],data[14],data[15]);
+ acc = comp_dot16(data16, data16);
+ result[0] = acc.s0;
+ result[1] = acc.s1;
+ }
+
+ kernel void test_dot8(global float* data,
+ int size,
+ global float* result)
+ {
+ float2 acc = (float2)(0.0f, 0.0f);
+ float8 data0 = (float8) (data[0],data[2],data[4],data[6],data[8],data[10],data[12],data[14]);
+ float8 data1 = (float8) (data[1],data[3],data[5],data[7],data[9],data[11],data[13],data[15]);
+ acc = comp_dot8(data0, data1);
+ result[0] = acc.s0;
+ result[1] = acc.s1;
+ }
+
+ kernel void test_dot4(global float* data,
+ int size,
+ global float* result)
+ {
+ float2 acc = (float2)(0.0f, 0.0f);
+ float4 data0 = (float4) (data[0],data[4],data[8],data[12]);
+ float4 data1 = (float4) (data[3],data[7],data[11],data[15]);
+ acc = comp_dot4(data0, data1);
+ result[0] = acc.s0;
+ result[1] = acc.s1;
+ }
+
+ kernel void test_dot3(global float* data,
+ int size,
+ global float* result)
+ {
+ float2 acc = (float2)(0.0f, 0.0f);
+ float3 data0 = (float3) (data[0],data[4],data[12]);
+ float3 data1 = (float3) (data[3],data[11],data[15]);
+ acc = comp_dot3(data0, data1);
+ result[0] = acc.s0;
+ result[1] = acc.s1;
+ }
+
+ kernel void test_dot2(global float* data,
+ int size,
+ global float* result)
+ {
+ float2 acc = (float2)(0.0f, 0.0f);
+ float2 data0 = (float2) (data[0],data[14]);
+ float2 data1 = (float2) (data[1],data[15]);
+ acc = comp_dot2(data0, data1);
+ result[0] = acc.s0;
+ result[1] = acc.s1;
+ }
+
+ """
+
+ prg = pyopencl.Program(self.ctx, read_cl_file("kahan.cl") + src).build(self.args)
+ ones_d = pyopencl.array.to_device(self.queue, data)
+ res_d = pyopencl.array.empty(self.queue, 2, numpy.float32)
+ res_d.fill(0)
+ evt = prg.test_dot16(self.queue, (1,), (1,), ones_d.data, numpy.int32(N), res_d.data)
+ evt.wait()
+ res = res_d.get().sum(dtype="float64")
+ self.assertEqual(ref64, res, "test_dot16")
+
+ res_d.fill(0)
+ data0 = data[0::2]
+ data1 = data[1::2]
+ ref64 = numpy.dot(data0.astype(numpy.float64), data1.astype(numpy.float64))
+ ref32 = numpy.dot(data0, data1)
+ if (ref64 == ref32):
+ logger.warning("dot8: invalid tests as float32 provides the same result as float64")
+ evt = prg.test_dot8(self.queue, (1,), (1,), ones_d.data, numpy.int32(N), res_d.data)
+ evt.wait()
+ res = res_d.get().sum(dtype="float64")
+ self.assertEqual(ref64, res, "test_dot8")
+
+ res_d.fill(0)
+ data0 = data[0::4]
+ data1 = data[3::4]
+ ref64 = numpy.dot(data0.astype(numpy.float64), data1.astype(numpy.float64))
+ ref32 = numpy.dot(data0, data1)
+ if (ref64 == ref32):
+ logger.warning("dot4: invalid tests as float32 provides the same result as float64")
+ evt = prg.test_dot4(self.queue, (1,), (1,), ones_d.data, numpy.int32(N), res_d.data)
+ evt.wait()
+ res = res_d.get().sum(dtype="float64")
+ self.assertEqual(ref64, res, "test_dot4")
+
+ res_d.fill(0)
+ data0 = numpy.array([data[0], data[4], data[12]])
+ data1 = numpy.array([data[3], data[11], data[15]])
+ ref64 = numpy.dot(data0.astype(numpy.float64), data1.astype(numpy.float64))
+ ref32 = numpy.dot(data0, data1)
+ if (ref64 == ref32):
+ logger.warning("dot3: invalid tests as float32 provides the same result as float64")
+ evt = prg.test_dot3(self.queue, (1,), (1,), ones_d.data, numpy.int32(N), res_d.data)
+ evt.wait()
+ res = res_d.get().sum(dtype="float64")
+ self.assertEqual(ref64, res, "test_dot3")
+
+ res_d.fill(0)
+ data0 = numpy.array([data[0], data[14]])
+ data1 = numpy.array([data[1], data[15]])
+ ref64 = numpy.dot(data0.astype(numpy.float64), data1.astype(numpy.float64))
+ ref32 = numpy.dot(data0, data1)
+ if (ref64 == ref32):
+ logger.warning("dot2: invalid tests as float32 provides the same result as float64")
+ evt = prg.test_dot2(self.queue, (1,), (1,), ones_d.data, numpy.int32(N), res_d.data)
+ evt.wait()
+ res = res_d.get().sum(dtype="float64")
+ self.assertEqual(ref64, res, "test_dot2")
diff --git a/src/silx/opencl/test/test_linalg.py b/src/silx/opencl/test/test_linalg.py
new file mode 100644
index 0000000..a997a36
--- /dev/null
+++ b/src/silx/opencl/test/test_linalg.py
@@ -0,0 +1,204 @@
+#!/usr/bin/env python
+# coding: utf-8
+# /*##########################################################################
+#
+# Copyright (c) 2016 European Synchrotron Radiation Facility
+#
+# Permission is hereby granted, free of charge, to any person obtaining a copy
+# of this software and associated documentation files (the "Software"), to deal
+# in the Software without restriction, including without limitation the rights
+# to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
+# copies of the Software, and to permit persons to whom the Software is
+# furnished to do so, subject to the following conditions:
+#
+# The above copyright notice and this permission notice shall be included in
+# all copies or substantial portions of the Software.
+#
+# THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+# IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+# FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
+# AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+# LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
+# OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN
+# THE SOFTWARE.
+#
+# ###########################################################################*/
+"""Test of the linalg module"""
+
+from __future__ import division, print_function
+
+__authors__ = ["Pierre paleo"]
+__license__ = "MIT"
+__copyright__ = "2013-2017 European Synchrotron Radiation Facility, Grenoble, France"
+__date__ = "01/08/2019"
+
+
+import time
+import logging
+import numpy as np
+import unittest
+try:
+ import mako
+except ImportError:
+ mako = None
+from ..common import ocl
+if ocl:
+ import pyopencl as cl
+ import pyopencl.array as parray
+ from .. import linalg
+from silx.test.utils import utilstest
+
+logger = logging.getLogger(__name__)
+try:
+ from scipy.ndimage.filters import laplace
+ _has_scipy = True
+except ImportError:
+ _has_scipy = False
+
+
+# TODO move this function in math or image ?
+def gradient(img):
+ '''
+ Compute the gradient of an image as a numpy array
+ Code from https://github.com/emmanuelle/tomo-tv/
+ '''
+ shape = [img.ndim, ] + list(img.shape)
+ gradient = np.zeros(shape, dtype=img.dtype)
+ slice_all = [0, slice(None, -1),]
+ for d in range(img.ndim):
+ gradient[tuple(slice_all)] = np.diff(img, axis=d)
+ slice_all[0] = d + 1
+ slice_all.insert(1, slice(None))
+ return gradient
+
+
+# TODO move this function in math or image ?
+def divergence(grad):
+ '''
+ Compute the divergence of a gradient
+ Code from https://github.com/emmanuelle/tomo-tv/
+ '''
+ res = np.zeros(grad.shape[1:])
+ for d in range(grad.shape[0]):
+ this_grad = np.rollaxis(grad[d], d)
+ this_res = np.rollaxis(res, d)
+ this_res[:-1] += this_grad[:-1]
+ this_res[1:-1] -= this_grad[:-2]
+ this_res[-1] -= this_grad[-2]
+ return res
+
+
+@unittest.skipUnless(ocl and mako, "PyOpenCl is missing")
+class TestLinAlg(unittest.TestCase):
+
+ def setUp(self):
+ if ocl is None:
+ return
+ self.getfiles()
+ self.la = linalg.LinAlg(self.image.shape)
+ self.allocate_arrays()
+
+ def allocate_arrays(self):
+ """
+ Allocate various types of arrays for the tests
+ """
+ # numpy images
+ self.grad = np.zeros(self.image.shape, dtype=np.complex64)
+ self.grad2 = np.zeros((2,) + self.image.shape, dtype=np.float32)
+ self.grad_ref = gradient(self.image)
+ self.div_ref = divergence(self.grad_ref)
+ self.image2 = np.zeros_like(self.image)
+ # Device images
+ self.gradient_parray = parray.empty(self.la.queue, self.image.shape, np.complex64)
+ self.gradient_parray.fill(0)
+ # we should be using cl.Buffer(self.la.ctx, cl.mem_flags.READ_WRITE, size=self.image.nbytes*2),
+ # but platforms not suporting openCL 1.2 have a problem with enqueue_fill_buffer,
+ # so we use the parray "fill" utility
+ self.gradient_buffer = self.gradient_parray.data
+ # Do the same for image
+ self.image_parray = parray.to_device(self.la.queue, self.image)
+ self.image_buffer = self.image_parray.data
+ # Refs
+ tmp = np.zeros(self.image.shape, dtype=np.complex64)
+ tmp.real = np.copy(self.grad_ref[0])
+ tmp.imag = np.copy(self.grad_ref[1])
+ self.grad_ref_parray = parray.to_device(self.la.queue, tmp)
+ self.grad_ref_buffer = self.grad_ref_parray.data
+
+ def tearDown(self):
+ self.image = None
+ self.image2 = None
+ self.grad = None
+ self.grad2 = None
+ self.grad_ref = None
+ self.div_ref = None
+ self.gradient_parray.data.release()
+ self.gradient_parray = None
+ self.gradient_buffer = None
+ self.image_parray.data.release()
+ self.image_parray = None
+ self.image_buffer = None
+ self.grad_ref_parray.data.release()
+ self.grad_ref_parray = None
+ self.grad_ref_buffer = None
+
+ def getfiles(self):
+ # load 512x512 MRI phantom - TODO include Lena or ascent once a .npz is available
+ self.image = np.load(utilstest.getfile("Brain512.npz"))["data"]
+
+ def compare(self, result, reference, abstol, name):
+ errmax = np.max(np.abs(result - reference))
+ logger.info("%s: Max error = %e" % (name, errmax))
+ self.assertTrue(errmax < abstol, str("%s: Max error is too high" % name))
+
+ @unittest.skipUnless(ocl and mako, "pyopencl is missing")
+ def test_gradient(self):
+ arrays = {
+ "numpy.ndarray": self.image,
+ "buffer": self.image_buffer,
+ "parray": self.image_parray
+ }
+ for desc, image in arrays.items():
+ # Test with dst on host (numpy.ndarray)
+ res = self.la.gradient(image, return_to_host=True)
+ self.compare(res, self.grad_ref, 1e-6, str("gradient[src=%s, dst=numpy.ndarray]" % desc))
+ # Test with dst on device (pyopencl.Buffer)
+ self.la.gradient(image, dst=self.gradient_buffer)
+ cl.enqueue_copy(self.la.queue, self.grad, self.gradient_buffer)
+ self.grad2[0] = self.grad.real
+ self.grad2[1] = self.grad.imag
+ self.compare(self.grad2, self.grad_ref, 1e-6, str("gradient[src=%s, dst=buffer]" % desc))
+ # Test with dst on device (pyopencl.Array)
+ self.la.gradient(image, dst=self.gradient_parray)
+ self.grad = self.gradient_parray.get()
+ self.grad2[0] = self.grad.real
+ self.grad2[1] = self.grad.imag
+ self.compare(self.grad2, self.grad_ref, 1e-6, str("gradient[src=%s, dst=parray]" % desc))
+
+ @unittest.skipUnless(ocl and mako, "pyopencl is missing")
+ def test_divergence(self):
+ arrays = {
+ "numpy.ndarray": self.grad_ref,
+ "buffer": self.grad_ref_buffer,
+ "parray": self.grad_ref_parray
+ }
+ for desc, grad in arrays.items():
+ # Test with dst on host (numpy.ndarray)
+ res = self.la.divergence(grad, return_to_host=True)
+ self.compare(res, self.div_ref, 1e-6, str("divergence[src=%s, dst=numpy.ndarray]" % desc))
+ # Test with dst on device (pyopencl.Buffer)
+ self.la.divergence(grad, dst=self.image_buffer)
+ cl.enqueue_copy(self.la.queue, self.image2, self.image_buffer)
+ self.compare(self.image2, self.div_ref, 1e-6, str("divergence[src=%s, dst=buffer]" % desc))
+ # Test with dst on device (pyopencl.Array)
+ self.la.divergence(grad, dst=self.image_parray)
+ self.image2 = self.image_parray.get()
+ self.compare(self.image2, self.div_ref, 1e-6, str("divergence[src=%s, dst=parray]" % desc))
+
+ @unittest.skipUnless(ocl and mako and _has_scipy, "pyopencl and/or scipy is missing")
+ def test_laplacian(self):
+ laplacian_ref = laplace(self.image)
+ # Laplacian = div(grad)
+ self.la.gradient(self.image)
+ laplacian_ocl = self.la.divergence(self.la.d_gradient, return_to_host=True)
+ self.compare(laplacian_ocl, laplacian_ref, 1e-6, "laplacian")
diff --git a/src/silx/opencl/test/test_medfilt.py b/src/silx/opencl/test/test_medfilt.py
new file mode 100644
index 0000000..339e0f2
--- /dev/null
+++ b/src/silx/opencl/test/test_medfilt.py
@@ -0,0 +1,162 @@
+#!/usr/bin/env python
+# -*- coding: utf-8 -*-
+#
+# Project: Median filter of images + OpenCL
+# https://github.com/silx-kit/silx
+#
+# Permission is hereby granted, free of charge, to any person
+# obtaining a copy of this software and associated documentation
+# files (the "Software"), to deal in the Software without
+# restriction, including without limitation the rights to use,
+# copy, modify, merge, publish, distribute, sublicense, and/or sell
+# copies of the Software, and to permit persons to whom the
+# Software is furnished to do so, subject to the following
+# conditions:
+#
+# The above copyright notice and this permission notice shall be
+# included in all copies or substantial portions of the Software.
+#
+# THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
+# EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES
+# OF MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
+# NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT
+# HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY,
+# WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
+# FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR
+# OTHER DEALINGS IN THE SOFTWARE.
+
+"""
+Simple test of the median filter
+"""
+
+from __future__ import division, print_function
+
+__authors__ = ["Jérôme Kieffer"]
+__contact__ = "jerome.kieffer@esrf.eu"
+__license__ = "MIT"
+__copyright__ = "2013-2017 European Synchrotron Radiation Facility, Grenoble, France"
+__date__ = "05/07/2018"
+
+
+import sys
+import time
+import logging
+import numpy
+import unittest
+from collections import namedtuple
+try:
+ import mako
+except ImportError:
+ mako = None
+from ..common import ocl
+if ocl:
+ import pyopencl
+ import pyopencl.array
+ from .. import medfilt
+
+logger = logging.getLogger(__name__)
+
+Result = namedtuple("Result", ["size", "error", "sp_time", "oc_time"])
+
+try:
+ from scipy.misc import ascent
+except:
+ def ascent():
+ """Dummy image from random data"""
+ return numpy.random.random((512, 512))
+try:
+ from scipy.ndimage import filters
+ median_filter = filters.median_filter
+ HAS_SCIPY = True
+except:
+ HAS_SCIPY = False
+ from silx.math import medfilt2d as median_filter
+
+@unittest.skipUnless(ocl and mako, "PyOpenCl is missing")
+class TestMedianFilter(unittest.TestCase):
+
+ def setUp(self):
+ if ocl is None:
+ return
+ self.data = ascent().astype(numpy.float32)
+ self.medianfilter = medfilt.MedianFilter2D(self.data.shape, devicetype="gpu")
+
+ def tearDown(self):
+ self.data = None
+ self.medianfilter = None
+
+ def measure(self, size):
+ "Common measurement of accuracy and timings"
+ t0 = time.time()
+ if HAS_SCIPY:
+ ref = median_filter(self.data, size, mode="nearest")
+ else:
+ ref = median_filter(self.data, size)
+ t1 = time.time()
+ try:
+ got = self.medianfilter.medfilt2d(self.data, size)
+ except RuntimeError as msg:
+ logger.error(msg)
+ return
+ t2 = time.time()
+ delta = abs(got - ref).max()
+ return Result(size, delta, t1 - t0, t2 - t1)
+
+ @unittest.skipUnless(ocl and mako, "pyopencl is missing")
+ def test_medfilt(self):
+ """
+ tests the median filter kernel
+ """
+ r = self.measure(size=11)
+ if r is None:
+ logger.info("test_medfilt: size: %s: skipped")
+ else:
+ logger.info("test_medfilt: size: %s error %s, t_ref: %.3fs, t_ocl: %.3fs" % r)
+ self.assertEqual(r.error, 0, 'Results are correct')
+
+ def benchmark(self, limit=36):
+ "Run some benchmarking"
+ try:
+ import PyQt5
+ from ...gui.matplotlib import pylab
+ from ...gui.utils import update_fig
+ except:
+ pylab = None
+
+ def update_fig(*ag, **kwarg):
+ pass
+
+ fig = pylab.figure()
+ fig.suptitle("Median filter of an image 512x512")
+ sp = fig.add_subplot(1, 1, 1)
+ sp.set_title(self.medianfilter.ctx.devices[0].name)
+ sp.set_xlabel("Window width & height")
+ sp.set_ylabel("Execution time (s)")
+ sp.set_xlim(2, limit + 1)
+ sp.set_ylim(0, 4)
+ data_size = []
+ data_scipy = []
+ data_opencl = []
+ plot_sp = sp.plot(data_size, data_scipy, "-or", label="scipy")[0]
+ plot_opencl = sp.plot(data_size, data_opencl, "-ob", label="opencl")[0]
+ sp.legend(loc=2)
+ fig.show()
+ update_fig(fig)
+ for s in range(3, limit, 2):
+ r = self.measure(s)
+ print(r)
+ if r.error == 0:
+ data_size.append(s)
+ data_scipy.append(r.sp_time)
+ data_opencl.append(r.oc_time)
+ plot_sp.set_data(data_size, data_scipy)
+ plot_opencl.set_data(data_size, data_opencl)
+ update_fig(fig)
+ fig.show()
+ input()
+
+
+def benchmark():
+ testSuite = unittest.TestSuite()
+ testSuite.addTest(TestMedianFilter("benchmark"))
+ return testSuite
diff --git a/src/silx/opencl/test/test_projection.py b/src/silx/opencl/test/test_projection.py
new file mode 100644
index 0000000..13db5f4
--- /dev/null
+++ b/src/silx/opencl/test/test_projection.py
@@ -0,0 +1,121 @@
+#!/usr/bin/env python
+# coding: utf-8
+# /*##########################################################################
+#
+# Copyright (c) 2016 European Synchrotron Radiation Facility
+#
+# Permission is hereby granted, free of charge, to any person obtaining a copy
+# of this software and associated documentation files (the "Software"), to deal
+# in the Software without restriction, including without limitation the rights
+# to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
+# copies of the Software, and to permit persons to whom the Software is
+# furnished to do so, subject to the following conditions:
+#
+# The above copyright notice and this permission notice shall be included in
+# all copies or substantial portions of the Software.
+#
+# THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+# IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+# FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
+# AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+# LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
+# OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN
+# THE SOFTWARE.
+#
+# ###########################################################################*/
+"""Test of the forward projection module"""
+
+from __future__ import division, print_function
+
+__authors__ = ["Pierre paleo"]
+__license__ = "MIT"
+__copyright__ = "2013-2017 European Synchrotron Radiation Facility, Grenoble, France"
+__date__ = "19/01/2018"
+
+
+import time
+import logging
+import numpy as np
+import unittest
+try:
+ import mako
+except ImportError:
+ mako = None
+from ..common import ocl
+if ocl:
+ from .. import projection
+from silx.test.utils import utilstest
+
+logger = logging.getLogger(__name__)
+
+
+@unittest.skipUnless(ocl and mako, "PyOpenCl is missing")
+class TestProj(unittest.TestCase):
+
+ def setUp(self):
+ if ocl is None:
+ return
+ # ~ if sys.platform.startswith('darwin'):
+ # ~ self.skipTest("Projection is not implemented on CPU for OS X yet")
+ self.getfiles()
+ n_angles = self.sino.shape[0]
+ self.proj = projection.Projection(self.phantom.shape, n_angles)
+ if self.proj.compiletime_workgroup_size < 16 * 16:
+ self.skipTest("Current implementation of OpenCL projection is not supported on this platform yet")
+
+ def tearDown(self):
+ self.phantom = None
+ self.sino = None
+ self.proj = None
+
+ def getfiles(self):
+ # load 512x512 MRI phantom
+ self.phantom = np.load(utilstest.getfile("Brain512.npz"))["data"]
+ # load sinogram computed with PyHST
+ self.sino = np.load(utilstest.getfile("sino500_pyhst.npz"))["data"]
+
+ def measure(self):
+ "Common measurement of timings"
+ t1 = time.time()
+ try:
+ result = self.proj.projection(self.phantom)
+ except RuntimeError as msg:
+ logger.error(msg)
+ return
+ t2 = time.time()
+ return t2 - t1, result
+
+ def compare(self, res):
+ """
+ Compare a result with the reference reconstruction.
+ Only the valid reconstruction zone (inscribed circle) is taken into account
+ """
+ # Compare with the original phantom.
+ # TODO: compare a standard projection
+ ref = self.sino
+ return np.max(np.abs(res - ref))
+
+ @unittest.skipUnless(ocl and mako, "pyopencl is missing")
+ def test_proj(self):
+ """
+ tests Projection
+ """
+ # Test single reconstruction
+ # --------------------------
+ t, res = self.measure()
+ if t is None:
+ logger.info("test_proj: skipped")
+ else:
+ logger.info("test_proj: time = %.3fs" % t)
+ err = self.compare(res)
+ msg = str("Max error = %e" % err)
+ logger.info(msg)
+ # Interpolation differs at some lines, giving relative error of 10/50000
+ self.assertTrue(err < 20., "Max error is too high")
+ # Test multiple reconstructions
+ # -----------------------------
+ res0 = np.copy(res)
+ for i in range(10):
+ res = self.proj.projection(self.phantom)
+ errmax = np.max(np.abs(res - res0))
+ self.assertTrue(errmax < 1.e-6, "Max error is too high")
diff --git a/src/silx/opencl/test/test_sparse.py b/src/silx/opencl/test/test_sparse.py
new file mode 100644
index 0000000..1d26b36
--- /dev/null
+++ b/src/silx/opencl/test/test_sparse.py
@@ -0,0 +1,188 @@
+#!/usr/bin/env python
+# coding: utf-8
+# /*##########################################################################
+#
+# Copyright (c) 2018-2019 European Synchrotron Radiation Facility
+#
+# Permission is hereby granted, free of charge, to any person obtaining a copy
+# of this software and associated documentation files (the "Software"), to deal
+# in the Software without restriction, including without limitation the rights
+# to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
+# copies of the Software, and to permit persons to whom the Software is
+# furnished to do so, subject to the following conditions:
+#
+# The above copyright notice and this permission notice shall be included in
+# all copies or substantial portions of the Software.
+#
+# THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+# IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+# FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
+# AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+# LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
+# OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN
+# THE SOFTWARE.
+#
+# ###########################################################################*/
+"""Test of the sparse module"""
+
+import numpy as np
+import unittest
+import logging
+from itertools import product
+from ..common import ocl
+if ocl:
+ import pyopencl.array as parray
+ from silx.opencl.sparse import CSR
+try:
+ import scipy.sparse as sp
+except ImportError:
+ sp = None
+logger = logging.getLogger(__name__)
+
+
+
+def generate_sparse_random_data(
+ shape=(1000,),
+ data_min=0, data_max=100,
+ density=0.1,
+ use_only_integers=True,
+ dtype="f"):
+ """
+ Generate random sparse data where.
+
+ Parameters
+ ------------
+ shape: tuple
+ Output data shape.
+ data_min: int or float
+ Minimum value of data
+ data_max: int or float
+ Maximum value of data
+ density: float
+ Density of non-zero elements in the output data.
+ Low value of density mean low number of non-zero elements.
+ use_only_integers: bool
+ If set to True, the output data items will be primarily integers,
+ possibly casted to float if dtype is a floating-point type.
+ This can be used for ease of debugging.
+ dtype: str or numpy.dtype
+ Output data type
+ """
+ mask = np.random.binomial(1, density, size=shape)
+ if use_only_integers:
+ d = np.random.randint(data_min, high=data_max, size=shape)
+ else:
+ d = data_min + (data_max - data_min) * np.random.rand(*shape)
+ return (d * mask).astype(dtype)
+
+
+
+@unittest.skipUnless(ocl and sp, "PyOpenCl/scipy is missing")
+class TestCSR(unittest.TestCase):
+ """Test CSR format"""
+
+ def setUp(self):
+ # Test possible configurations
+ input_on_device = [False, True]
+ output_on_device = [False, True]
+ dtypes = [np.float32, np.int32, np.uint16]
+ self._test_configs = list(product(input_on_device, output_on_device, dtypes))
+
+
+ def compute_ref_sparsification(self, array):
+ ref_sparse = sp.csr_matrix(array)
+ return ref_sparse
+
+
+ def test_sparsification(self):
+ for input_on_device, output_on_device, dtype in self._test_configs:
+ self._test_sparsification(input_on_device, output_on_device, dtype)
+
+
+ def _test_sparsification(self, input_on_device, output_on_device, dtype):
+ current_config = "input on device: %s, output on device: %s, dtype: %s" % (
+ str(input_on_device), str(output_on_device), str(dtype)
+ )
+ logger.debug("CSR: %s" % current_config)
+ # Generate data and reference CSR
+ array = generate_sparse_random_data(shape=(512, 511), dtype=dtype)
+ ref_sparse = self.compute_ref_sparsification(array)
+ # Sparsify on device
+ csr = CSR(array.shape, dtype=dtype)
+ if input_on_device:
+ # The array has to be flattened
+ arr = parray.to_device(csr.queue, array.ravel())
+ else:
+ arr = array
+ if output_on_device:
+ d_data = parray.empty_like(csr.data)
+ d_indices = parray.empty_like(csr.indices)
+ d_indptr = parray.empty_like(csr.indptr)
+ d_data.fill(0)
+ d_indices.fill(0)
+ d_indptr.fill(0)
+ output = (d_data, d_indices, d_indptr)
+ else:
+ output = None
+ data, indices, indptr = csr.sparsify(arr, output=output)
+ if output_on_device:
+ data = data.get()
+ indices = indices.get()
+ indptr = indptr.get()
+ # Compare
+ nnz = ref_sparse.nnz
+ self.assertTrue(
+ np.allclose(data[:nnz], ref_sparse.data),
+ "something wrong with sparsified data (%s)"
+ % current_config
+ )
+ self.assertTrue(
+ np.allclose(indices[:nnz], ref_sparse.indices),
+ "something wrong with sparsified indices (%s)"
+ % current_config
+ )
+ self.assertTrue(
+ np.allclose(indptr, ref_sparse.indptr),
+ "something wrong with sparsified indices pointers (indptr) (%s)"
+ % current_config
+ )
+
+
+ def test_desparsification(self):
+ for input_on_device, output_on_device, dtype in self._test_configs:
+ self._test_desparsification(input_on_device, output_on_device, dtype)
+
+
+ def _test_desparsification(self, input_on_device, output_on_device, dtype):
+ current_config = "input on device: %s, output on device: %s, dtype: %s" % (
+ str(input_on_device), str(output_on_device), str(dtype)
+ )
+ logger.debug("CSR: %s" % current_config)
+ # Generate data and reference CSR
+ array = generate_sparse_random_data(shape=(512, 511), dtype=dtype)
+ ref_sparse = self.compute_ref_sparsification(array)
+ # De-sparsify on device
+ csr = CSR(array.shape, dtype=dtype, max_nnz=ref_sparse.nnz)
+ if input_on_device:
+ data = parray.to_device(csr.queue, ref_sparse.data)
+ indices = parray.to_device(csr.queue, ref_sparse.indices)
+ indptr = parray.to_device(csr.queue, ref_sparse.indptr)
+ else:
+ data = ref_sparse.data
+ indices = ref_sparse.indices
+ indptr = ref_sparse.indptr
+ if output_on_device:
+ d_arr = parray.empty_like(csr.array)
+ d_arr.fill(0)
+ output = d_arr
+ else:
+ output = None
+ arr = csr.densify(data, indices, indptr, output=output)
+ if output_on_device:
+ arr = arr.get()
+ # Compare
+ self.assertTrue(
+ np.allclose(arr.reshape(array.shape), array),
+ "something wrong with densified data (%s)"
+ % current_config
+ )
diff --git a/src/silx/opencl/test/test_stats.py b/src/silx/opencl/test/test_stats.py
new file mode 100644
index 0000000..859271d
--- /dev/null
+++ b/src/silx/opencl/test/test_stats.py
@@ -0,0 +1,106 @@
+#!/usr/bin/env python
+# -*- coding: utf-8 -*-
+#
+# Project: Sift implementation in Python + OpenCL
+# https://github.com/silx-kit/silx
+#
+# Permission is hereby granted, free of charge, to any person
+# obtaining a copy of this software and associated documentation
+# files (the "Software"), to deal in the Software without
+# restriction, including without limitation the rights to use,
+# copy, modify, merge, publish, distribute, sublicense, and/or sell
+# copies of the Software, and to permit persons to whom the
+# Software is furnished to do so, subject to the following
+# conditions:
+#
+# The above copyright notice and this permission notice shall be
+# included in all copies or substantial portions of the Software.
+#
+# THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
+# EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES
+# OF MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
+# NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT
+# HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY,
+# WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
+# FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR
+# OTHER DEALINGS IN THE SOFTWARE.
+
+"""
+Simple test of an addition
+"""
+__authors__ = ["Henri Payno, Jérôme Kieffer"]
+__contact__ = "jerome.kieffer@esrf.eu"
+__license__ = "MIT"
+__copyright__ = "2013 European Synchrotron Radiation Facility, Grenoble, France"
+__date__ = "19/05/2021"
+
+import logging
+import time
+import numpy
+
+import unittest
+from ..common import ocl
+if ocl:
+ import pyopencl
+ import pyopencl.array
+ from ..statistics import StatResults, Statistics
+from ..utils import get_opencl_code
+logger = logging.getLogger(__name__)
+
+
+@unittest.skipUnless(ocl, "PyOpenCl is missing")
+class TestStatistics(unittest.TestCase):
+
+ @classmethod
+ def setUpClass(cls):
+ cls.size = 1 << 20 # 1 million elements
+ cls.data = numpy.random.randint(0, 65000, cls.size).astype("uint16")
+ fdata = cls.data.astype("float64")
+ t0 = time.perf_counter()
+ std = fdata.std()
+ cls.ref = StatResults(fdata.min(), fdata.max(), float(fdata.size),
+ fdata.sum(), fdata.mean(), std ** 2,
+ std)
+ t1 = time.perf_counter()
+ cls.ref_time = t1 - t0
+
+ @classmethod
+ def tearDownClass(cls):
+ cls.size = cls.ref = cls.data = cls.ref_time = None
+
+ @classmethod
+ def validate(cls, res):
+ return (
+ (res.min == cls.ref.min) and
+ (res.max == cls.ref.max) and
+ (res.cnt == cls.ref.cnt) and
+ abs(res.mean - cls.ref.mean) < 0.01 and
+ abs(res.std - cls.ref.std) < 0.1)
+
+ def test_measurement(self):
+ """
+ tests that all devices are working properly ...
+ """
+ logger.info("Reference results: %s", self.ref)
+ for pid, platform in enumerate(ocl.platforms):
+ for did, device in enumerate(platform.devices):
+ try:
+ s = Statistics(template=self.data, platformid=pid, deviceid=did)
+ except Exception as err:
+ failed_init = True
+ res = StatResults(0, 0, 0, 0, 0, 0, 0)
+ print(err)
+ else:
+ failed_init = False
+ for comp in ("single", "double", "comp"):
+ t0 = time.perf_counter()
+ res = s(self.data, comp=comp)
+ t1 = time.perf_counter()
+ logger.info("Runtime on %s/%s : %.3fms x%.1f", platform, device, 1000 * (t1 - t0), self.ref_time / (t1 - t0))
+
+ if failed_init or not self.validate(res):
+ logger.error("failed_init %s; Computation modes %s", failed_init, comp)
+ logger.error("Failed on platform %s device %s", platform, device)
+ logger.error("Reference results: %s", self.ref)
+ logger.error("Faulty results: %s", res)
+ self.assertTrue(False, f"Stat calculation failed on {platform},{device} in mode {comp}")
diff --git a/src/silx/opencl/utils.py b/src/silx/opencl/utils.py
new file mode 100644
index 0000000..575e018
--- /dev/null
+++ b/src/silx/opencl/utils.py
@@ -0,0 +1,214 @@
+# -*- coding: utf-8 -*-
+# /*##########################################################################
+# Copyright (C) 2017 European Synchrotron Radiation Facility
+#
+# Permission is hereby granted, free of charge, to any person obtaining a copy
+# of this software and associated documentation files (the "Software"), to deal
+# in the Software without restriction, including without limitation the rights
+# to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
+# copies of the Software, and to permit persons to whom the Software is
+# furnished to do so, subject to the following conditions:
+#
+# The above copyright notice and this permission notice shall be included in
+# all copies or substantial portions of the Software.
+#
+# THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+# IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+# FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
+# AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+# LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
+# OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN
+# THE SOFTWARE.
+#
+# ############################################################################*/
+"""
+Project: Sift implementation in Python + OpenCL
+ https://github.com/silx-kit/silx
+"""
+
+from __future__ import division
+
+__authors__ = ["Jérôme Kieffer", "Pierre Paleo"]
+__contact__ = "jerome.kieffer@esrf.eu"
+__license__ = "MIT"
+__copyright__ = "European Synchrotron Radiation Facility, Grenoble, France"
+__date__ = "06/09/2017"
+__status__ = "Production"
+
+import os
+import numpy
+from .. import resources
+from math import log, ceil
+
+
+def calc_size(shape, blocksize):
+ """
+ Calculate the optimal size for a kernel according to the workgroup size
+ """
+ if "__len__" in dir(blocksize):
+ return tuple((int(i) + int(j) - 1) & ~(int(j) - 1) for i, j in zip(shape, blocksize))
+ else:
+ return tuple((int(i) + int(blocksize) - 1) & ~(int(blocksize) - 1) for i in shape)
+
+
+def nextpower(n):
+ """Calculate the power of two
+
+ :param n: an integer, for example 100
+ :return: another integer, 100-> 128
+ """
+ return 1 << int(ceil(log(n, 2)))
+
+
+def sizeof(shape, dtype="uint8"):
+ """
+ Calculate the number of bytes needed to allocate for a given structure
+
+ :param shape: size or tuple of sizes
+ :param dtype: data type
+ """
+ itemsize = numpy.dtype(dtype).itemsize
+ cnt = 1
+ if "__len__" in dir(shape):
+ for dim in shape:
+ cnt *= dim
+ else:
+ cnt = int(shape)
+ return cnt * itemsize
+
+
+def get_cl_file(resource):
+ """get the full path of a openCL resource file
+
+ The resource name can be prefixed by the name of a resource directory. For
+ example "silx:foo.png" identify the resource "foo.png" from the resource
+ directory "silx".
+ See also :func:`silx.resources.register_resource_directory`.
+
+ :param str resource: Resource name. File name contained if the `opencl`
+ directory of the resources.
+ :return: the full path of the openCL source file
+ """
+ if not resource.endswith(".cl"):
+ resource += ".cl"
+ return resources._resource_filename(resource,
+ default_directory="opencl")
+
+
+def read_cl_file(filename):
+ """
+ :param filename: read an OpenCL file and apply a preprocessor
+ :return: preprocessed source code
+ """
+ with open(get_cl_file(filename), "r") as f:
+ # Dummy preprocessor which removes the #include
+ lines = [i for i in f.readlines() if not i.startswith("#include ")]
+ return "".join(lines)
+
+
+get_opencl_code = read_cl_file
+
+
+def concatenate_cl_kernel(filenames):
+ """Concatenates all the kernel from the list of files
+
+ :param filenames: filenames containing the kernels
+ :type filenames: list of str which can be filename of kernel as a string.
+ :return: a string with all kernels concatenated
+
+ this method concatenates all the kernel from the list
+ """
+ return os.linesep.join(read_cl_file(fn) for fn in filenames)
+
+
+
+
+class ConvolutionInfos(object):
+ allowed_axes = {
+ "1D": [None],
+ "separable_2D_1D_2D": [None, (0, 1), (1, 0)],
+ "batched_1D_2D": [(0,), (1,)],
+ "separable_3D_1D_3D": [
+ None,
+ (0, 1, 2),
+ (1, 2, 0),
+ (2, 0, 1),
+ (2, 1, 0),
+ (1, 0, 2),
+ (0, 2, 1)
+ ],
+ "batched_1D_3D": [(0,), (1,), (2,)],
+ "batched_separable_2D_1D_3D": [(0,), (1,), (2,)], # unsupported (?)
+ "2D": [None],
+ "batched_2D_3D": [(0,), (1,), (2,)],
+ "separable_3D_2D_3D": [
+ (1, 0),
+ (0, 1),
+ (2, 0),
+ (0, 2),
+ (1, 2),
+ (2, 1),
+ ],
+ "3D": [None],
+ }
+ use_cases = {
+ (1, 1): {
+ "1D": {
+ "name": "1D convolution on 1D data",
+ "kernels": ["convol_1D_X"],
+ },
+ },
+ (2, 2): {
+ "2D": {
+ "name": "2D convolution on 2D data",
+ "kernels": ["convol_2D_XY"],
+ },
+ },
+ (3, 3): {
+ "3D": {
+ "name": "3D convolution on 3D data",
+ "kernels": ["convol_3D_XYZ"],
+ },
+ },
+ (2, 1): {
+ "separable_2D_1D_2D": {
+ "name": "Separable (2D->1D) convolution on 2D data",
+ "kernels": ["convol_1D_X", "convol_1D_Y"],
+ },
+ "batched_1D_2D": {
+ "name": "Batched 1D convolution on 2D data",
+ "kernels": ["convol_1D_X", "convol_1D_Y"],
+ },
+ },
+ (3, 1): {
+ "separable_3D_1D_3D": {
+ "name": "Separable (3D->1D) convolution on 3D data",
+ "kernels": ["convol_1D_X", "convol_1D_Y", "convol_1D_Z"],
+ },
+ "batched_1D_3D": {
+ "name": "Batched 1D convolution on 3D data",
+ "kernels": ["convol_1D_X", "convol_1D_Y", "convol_1D_Z"],
+ },
+ "batched_separable_2D_1D_3D": {
+ "name": "Batched separable (2D->1D) convolution on 3D data",
+ "kernels": ["convol_1D_X", "convol_1D_Y", "convol_1D_Z"],
+ },
+ },
+ (3, 2): {
+ "separable_3D_2D_3D": {
+ "name": "Separable (3D->2D) convolution on 3D data",
+ "kernels": ["convol_2D_XY", "convol_2D_XZ", "convol_2D_YZ"],
+ },
+ "batched_2D_3D": {
+ "name": "Batched 2D convolution on 3D data",
+ "kernels": ["convol_2D_XY", "convol_2D_XZ", "convol_2D_YZ"],
+ },
+ },
+ }
+
+
+
+
+
+
+