diff options
Diffstat (limited to 'silx/opencl')
-rw-r--r-- | silx/opencl/backprojection.py | 11 | ||||
-rw-r--r-- | silx/opencl/common.py | 52 | ||||
-rw-r--r-- | silx/opencl/convolution.py | 555 | ||||
-rw-r--r-- | silx/opencl/processing.py | 57 | ||||
-rw-r--r-- | silx/opencl/sinofilter.py | 205 | ||||
-rw-r--r-- | silx/opencl/sparse.py | 331 | ||||
-rw-r--r-- | silx/opencl/test/__init__.py | 4 | ||||
-rw-r--r-- | silx/opencl/test/test_backprojection.py | 21 | ||||
-rw-r--r-- | silx/opencl/test/test_convolution.py | 263 | ||||
-rw-r--r-- | silx/opencl/test/test_sparse.py | 192 |
10 files changed, 1507 insertions, 184 deletions
diff --git a/silx/opencl/backprojection.py b/silx/opencl/backprojection.py index c257872..5a4087b 100644 --- a/silx/opencl/backprojection.py +++ b/silx/opencl/backprojection.py @@ -154,6 +154,9 @@ class Backprojection(OpenclProcessing): self.extra_options = { "cutoff": 1., "use_numpy_fft": False, + # It is axis_pos - (num_bins-1)/2 in PyHST + "gpu_offset_x": 0., #self.axis_pos - (self.num_bins - 1) / 2., + "gpu_offset_y": 0., #self.axis_pos - (self.num_bins - 1) / 2. } if extra_options is not None: self.extra_options.update(extra_options) @@ -221,10 +224,10 @@ class Backprojection(OpenclProcessing): self.cl_mem["_d_slice"].data, # d_sino (__read_only image2d_t or float*) d_sino_ref, - # gpu_offset_x (float32) # TODO custom ? - -np.float32((self.num_bins - 1) / 2. - self.axis_pos), - # gpu_offset_y (float32) # TODO custom ? - -np.float32((self.num_bins - 1) / 2. - self.axis_pos), + # gpu_offset_x (float32) + np.float32(self.extra_options["gpu_offset_x"]), + # gpu_offset_y (float32) + np.float32(self.extra_options["gpu_offset_y"]), # d_cos (__global float32*) self.cl_mem["d_cos"].data, # d_sin (__global float32*) diff --git a/silx/opencl/common.py b/silx/opencl/common.py index 9a04035..8d31c8a 100644 --- a/silx/opencl/common.py +++ b/silx/opencl/common.py @@ -34,7 +34,7 @@ __author__ = "Jerome Kieffer" __contact__ = "Jerome.Kieffer@ESRF.eu" __license__ = "MIT" __copyright__ = "2012-2017 European Synchrotron Radiation Facility, Grenoble, France" -__date__ = "20/09/2018" +__date__ = "07/06/2019" __status__ = "stable" __all__ = ["ocl", "pyopencl", "mf", "release_cl_buffers", "allocate_cl_buffers", "measure_workgroup_size", "kernel_workgroup_size"] @@ -158,6 +158,11 @@ class Device(object): "Available\t:\t%s" % self.available] return os.linesep.join(lst) + def set_unavailable(self): + """Use this method to flag a faulty device + """ + self.available = False + class Platform(object): """ @@ -220,8 +225,16 @@ def _measure_workgroup_size(device_or_context, fast=False): :return: maximum size for the workgroup """ if isinstance(device_or_context, pyopencl.Device): - ctx = pyopencl.Context(devices=[device_or_context]) - device = device_or_context + try: + ctx = pyopencl.Context(devices=[device_or_context]) + except pyopencl._cl.LogicError as error: + platform = device_or_context.platform + platformid = pyopencl.get_platforms().index(platform) + deviceid = platform.get_devices().index(device_or_context) + ocl.platforms[platformid].devices[deviceid].set_unavailable() + raise RuntimeError("Unable to create context on %s/%s: %s" % (platform, device_or_context, error)) + else: + device = device_or_context elif isinstance(device_or_context, pyopencl.Context): ctx = device_or_context device = device_or_context.devices[0] @@ -379,6 +392,8 @@ class OpenCL(object): best_found = None for platformid, platform in enumerate(self.platforms): for deviceid, device in enumerate(platform.devices): + if not device.available: + continue if (dtype in ["ALL", "DEF"]) or (device.type == dtype): if (memory is None) or (memory <= device.memory): found = True @@ -400,7 +415,7 @@ class OpenCL(object): return None def create_context(self, devicetype="ALL", useFp64=False, platformid=None, - deviceid=None, cached=True, memory=None): + deviceid=None, cached=True, memory=None, extensions=None): """ Choose a device and initiate a context. @@ -410,13 +425,20 @@ class OpenCL(object): E.g.: If Nvidia driver is installed, GPU will succeed but CPU will fail. The AMD SDK kit is required for CPU via OpenCL. :param devicetype: string in ["cpu","gpu", "all", "acc"] - :param useFp64: boolean specifying if double precision will be used + :param useFp64: boolean specifying if double precision will be used: deprecated use extensions=["cl_khr_fp64"] :param platformid: integer :param deviceid: integer :param cached: True if we want to cache the context :param memory: minimum amount of memory of the device + :param extensions: list of extensions to be present :return: OpenCL context on the selected device """ + if extensions is None: + extensions = [] + if useFp64: + logger.warning("Deprecation: please select your device using the extension name!, i.e. extensions=['cl_khr_fp64']") + extensions.append('cl_khr_fp64') + if (platformid is not None) and (deviceid is not None): platformid = int(platformid) deviceid = int(deviceid) @@ -425,20 +447,24 @@ class OpenCL(object): pyopencl_ctx += [0] * (2 - len(pyopencl_ctx)) # pad with 0 platformid, deviceid = pyopencl_ctx else: - if useFp64: - ids = ocl.select_device(type=devicetype, extensions=["cl_khr_int64_base_atomics"]) - else: - ids = ocl.select_device(dtype=devicetype) + ids = ocl.select_device(type=devicetype, extensions=extensions) if ids: platformid, deviceid = ids + ctx = None if (platformid is not None) and (deviceid is not None): if (platformid, deviceid) in self.context_cache: ctx = self.context_cache[(platformid, deviceid)] else: - ctx = pyopencl.Context(devices=[pyopencl.get_platforms()[platformid].get_devices()[deviceid]]) - if cached: - self.context_cache[(platformid, deviceid)] = ctx - else: + try: + ctx = pyopencl.Context(devices=[pyopencl.get_platforms()[platformid].get_devices()[deviceid]]) + except pyopencl._cl.LogicError as error: + self.platforms[platformid].devices[deviceid].set_unavailable() + logger.warning("Unable to create context on %s/%s: %s", platformid, deviceid, error) + ctx = None + else: + if cached: + self.context_cache[(platformid, deviceid)] = ctx + if ctx is None: logger.warning("Last chance to get an OpenCL device ... probably not the one requested") ctx = pyopencl.create_some_context(interactive=False) return ctx diff --git a/silx/opencl/convolution.py b/silx/opencl/convolution.py new file mode 100644 index 0000000..18232f4 --- /dev/null +++ b/silx/opencl/convolution.py @@ -0,0 +1,555 @@ +#!/usr/bin/env python +# coding: utf-8 +# /*########################################################################## +# +# Copyright (c) 2019 European Synchrotron Radiation Facility +# +# Permission is hereby granted, free of charge, to any person obtaining a copy +# of this software and associated documentation files (the "Software"), to deal +# in the Software without restriction, including without limitation the rights +# to use, copy, modify, merge, publish, distribute, sublicense, and/or sell +# copies of the Software, and to permit persons to whom the Software is +# furnished to do so, subject to the following conditions: +# +# The above copyright notice and this permission notice shall be included in +# all copies or substantial portions of the Software. +# +# THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR +# IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, +# FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE +# AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER +# LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, +# OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN +# THE SOFTWARE. +# +# ###########################################################################*/ +"""Module for convolution on CPU/GPU.""" + +from __future__ import absolute_import, print_function, with_statement, division + +__authors__ = ["P. Paleo"] +__license__ = "MIT" +__date__ = "11/02/2019" + +import numpy as np +from math import ceil +from copy import copy # python2 +from .common import pyopencl as cl +import pyopencl.array as parray +from .processing import OpenclProcessing, EventDescription + + +class ConvolutionInfos(object): + allowed_axes = { + "1D": [None], + "separable_2D_1D_2D": [None, (0, 1), (1, 0)], + "batched_1D_2D": [(0,), (1,)], + "separable_3D_1D_3D": [ + None, + (0, 1, 2), + (1, 2, 0), + (2, 0, 1), + (2, 1, 0), + (1, 0, 2), + (0, 2, 1) + ], + "batched_1D_3D": [(0,), (1,), (2,)], + "batched_separable_2D_1D_3D": [(0,), (1,), (2,)], # unsupported (?) + "2D": [None], + "batched_2D_3D": [(0,), (1,), (2,)], + "separable_3D_2D_3D": [ + (1, 0), + (0, 1), + (2, 0), + (0, 2), + (1, 2), + (2, 1), + ], + "3D": [None], + } + use_cases = { + (1, 1): { + "1D": { + "name": "1D convolution on 1D data", + "kernels": ["convol_1D_X"], + }, + }, + (2, 2): { + "2D": { + "name": "2D convolution on 2D data", + "kernels": ["convol_2D_XY"], + }, + }, + (3, 3): { + "3D": { + "name": "3D convolution on 3D data", + "kernels": ["convol_3D_XYZ"], + }, + }, + (2, 1): { + "separable_2D_1D_2D": { + "name": "Separable (2D->1D) convolution on 2D data", + "kernels": ["convol_1D_X", "convol_1D_Y"], + }, + "batched_1D_2D": { + "name": "Batched 1D convolution on 2D data", + "kernels": ["convol_1D_X", "convol_1D_Y"], + }, + }, + (3, 1): { + "separable_3D_1D_3D": { + "name": "Separable (3D->1D) convolution on 3D data", + "kernels": ["convol_1D_X", "convol_1D_Y", "convol_1D_Z"], + }, + "batched_1D_3D": { + "name": "Batched 1D convolution on 3D data", + "kernels": ["convol_1D_X", "convol_1D_Y", "convol_1D_Z"], + }, + "batched_separable_2D_1D_3D": { + "name": "Batched separable (2D->1D) convolution on 3D data", + "kernels": ["convol_1D_X", "convol_1D_Y", "convol_1D_Z"], + }, + }, + (3, 2): { + "separable_3D_2D_3D": { + "name": "Separable (3D->2D) convolution on 3D data", + "kernels": ["convol_2D_XY", "convol_2D_XZ", "convol_2D_YZ"], + }, + "batched_2D_3D": { + "name": "Batched 2D convolution on 3D data", + "kernels": ["convol_2D_XY", "convol_2D_XZ", "convol_2D_YZ"], + }, + }, + } + + +class Convolution(OpenclProcessing): + """ + A class for performing convolution on CPU/GPU with OpenCL. + """ + + def __init__(self, shape, kernel, axes=None, mode=None, ctx=None, + devicetype="all", platformid=None, deviceid=None, + profile=False, extra_options=None): + """Constructor of OpenCL Convolution. + + :param shape: shape of the array. + :param kernel: convolution kernel (1D, 2D or 3D). + :param axes: axes along which the convolution is performed, + for batched convolutions. + :param mode: Boundary handling mode. Available modes are: + "reflect": cba|abcd|dcb + "nearest": aaa|abcd|ddd + "wrap": bcd|abcd|abc + "constant": 000|abcd|000 + Default is "reflect". + :param ctx: actual working context, left to None for automatic + initialization from device type or platformid/deviceid + :param devicetype: type of device, can be "CPU", "GPU", "ACC" or "ALL" + :param platformid: integer with the platform_identifier, as given by + clinfo + :param deviceid: Integer with the device identifier, as given by clinfo + :param profile: switch on profiling to be able to profile at the kernel + level, store profiling elements (makes code slightly + slower) + :param extra_options: Advanced options (dict). Current options are: + "allocate_input_array": True, + "allocate_output_array": True, + "allocate_tmp_array": True, + "dont_use_textures": False, + """ + OpenclProcessing.__init__(self, ctx=ctx, devicetype=devicetype, + platformid=platformid, deviceid=deviceid, + profile=profile) + + self._configure_extra_options(extra_options) + self._determine_use_case(shape, kernel, axes) + self._allocate_memory(mode) + self._init_kernels() + + def _configure_extra_options(self, extra_options): + self.extra_options = { + "allocate_input_array": True, + "allocate_output_array": True, + "allocate_tmp_array": True, + "dont_use_textures": False, + } + extra_opts = extra_options or {} + self.extra_options.update(extra_opts) + self.is_cpu = (self.device.type == "CPU") + self.use_textures = not(self.extra_options["dont_use_textures"]) + self.use_textures *= not(self.is_cpu) + + def _get_dimensions(self, shape, kernel): + self.shape = shape + self.data_ndim = self._check_dimensions(shape=shape, name="Data") + self.kernel_ndim = self._check_dimensions(arr=kernel, name="Kernel") + Nx = shape[-1] + if self.data_ndim >= 2: + Ny = shape[-2] + else: + Ny = 1 + if self.data_ndim >= 3: + Nz = shape[-3] + else: + Nz = 1 + self.Nx = np.int32(Nx) + self.Ny = np.int32(Ny) + self.Nz = np.int32(Nz) + + def _determine_use_case(self, shape, kernel, axes): + """ + Determine the convolution use case from the input/kernel shape, and axes. + """ + self._get_dimensions(shape, kernel) + if self.kernel_ndim > self.data_ndim: + raise ValueError("Kernel dimensions cannot exceed data dimensions") + data_ndim = self.data_ndim + kernel_ndim = self.kernel_ndim + self.kernel = kernel.astype("f") + + convol_infos = ConvolutionInfos() + k = (data_ndim, kernel_ndim) + if k not in convol_infos.use_cases: + raise ValueError( + "Cannot find a use case for data ndim = %d and kernel ndim = %d" + % (data_ndim, kernel_ndim) + ) + possible_use_cases = convol_infos.use_cases[k] + + self.use_case_name = None + for uc_name, uc_params in possible_use_cases.items(): + if axes in convol_infos.allowed_axes[uc_name]: + self.use_case_name = uc_name + self.use_case_desc = uc_params["name"] + #~ self.use_case_kernels = uc_params["kernels"].copy() + self.use_case_kernels = copy(uc_params["kernels"]) # TODO use the above line once we get rid of python2 + if self.use_case_name is None: + raise ValueError( + "Cannot find a use case for data ndim = %d, kernel ndim = %d and axes=%s" + % (data_ndim, kernel_ndim, str(axes)) + ) + # TODO implement this use case + if self.use_case_name == "batched_separable_2D_1D_3D": + raise NotImplementedError( + "The use case %s is not implemented" + % self.use_case_name + ) + # + self.axes = axes + # Replace "axes=None" with an actual value (except for ND-ND) + allowed_axes = convol_infos.allowed_axes[self.use_case_name] + if len(allowed_axes) > 1: + # The default choice might impact perfs + self.axes = allowed_axes[0] or allowed_axes[1] + self.separable = self.use_case_name.startswith("separable") + self.batched = self.use_case_name.startswith("batched") + # Update kernel names when using textures + if self.use_textures: + for i, kern_name in enumerate(self.use_case_kernels): + self.use_case_kernels[i] = kern_name + "_tex" + + def _allocate_memory(self, mode): + self.mode = mode or "reflect" + option_array_names = { + "allocate_input_array": "data_in", + "allocate_output_array": "data_out", + "allocate_tmp_array": "data_tmp", + } + # Nonseparable transforms do not need tmp array + if not(self.separable): + self.extra_options["allocate_tmp_array"] = False + # Allocate arrays + for option_name, array_name in option_array_names.items(): + if self.extra_options[option_name]: + value = parray.zeros(self.queue, self.shape, "f") + else: + value = None + setattr(self, array_name, value) + + if isinstance(self.kernel, np.ndarray): + self.d_kernel = parray.to_device(self.queue, self.kernel) + else: + if not(isinstance(self.kernel, parray.Array)): + raise ValueError("kernel must be either numpy array or pyopencl array") + self.d_kernel = self.kernel + self._old_input_ref = None + self._old_output_ref = None + if self.use_textures: + self._allocate_textures() + self._c_modes_mapping = { + "periodic": 2, + "wrap": 2, + "nearest": 1, + "replicate": 1, + "reflect": 0, + "constant": 3, + } + mp = self._c_modes_mapping + if self.mode.lower() not in mp: + raise ValueError( + """ + Mode %s is not available for textures. Available modes are: + %s + """ + % (self.mode, str(mp.keys())) + ) + # TODO + if not(self.use_textures) and self.mode.lower() == "constant": + raise NotImplementedError( + "mode='constant' is not implemented without textures yet" + ) + # + self._c_conv_mode = mp[self.mode] + + def _allocate_textures(self): + self.data_in_tex = self.allocate_texture(self.shape) + self.d_kernel_tex = self.allocate_texture(self.kernel.shape) + self.transfer_to_texture(self.d_kernel, self.d_kernel_tex) + + def _init_kernels(self): + if self.kernel_ndim > 1: + if np.abs(np.diff(self.kernel.shape)).max() > 0: + raise NotImplementedError( + "Non-separable convolution with non-square kernels is not implemented yet" + ) + compile_options = [str("-DUSED_CONV_MODE=%d" % self._c_conv_mode)] + if self.use_textures: + kernel_files = ["convolution_textures.cl"] + compile_options.extend([ + str("-DIMAGE_DIMS=%d" % self.data_ndim), + str("-DFILTER_DIMS=%d" % self.kernel_ndim), + ]) + data_in_ref = self.data_in_tex + d_kernel_ref = self.d_kernel_tex + else: + kernel_files = ["convolution.cl"] + data_in_ref = self.data_in.data + d_kernel_ref = self.d_kernel.data + self.compile_kernels( + kernel_files=kernel_files, + compile_options=compile_options + ) + self.ndrange = self.shape[::-1] + self.wg = None + kernel_args = [ + self.queue, + self.ndrange, self.wg, + data_in_ref, + self.data_out.data, + d_kernel_ref, + np.int32(self.kernel.shape[0]), + self.Nx, self.Ny, self.Nz + ] + if self.kernel_ndim == 2: + kernel_args.insert(6, np.int32(self.kernel.shape[1])) + if self.kernel_ndim == 3: + kernel_args.insert(6, np.int32(self.kernel.shape[2])) + kernel_args.insert(7, np.int32(self.kernel.shape[1])) + self.kernel_args = tuple(kernel_args) + # If self.data_tmp is allocated, separable transforms can be performed + # by a series of batched transforms, without any copy, by swapping refs. + self.swap_pattern = None + if self.separable: + if self.data_tmp is not None: + self.swap_pattern = { + 2: [ + ("data_in", "data_tmp"), + ("data_tmp", "data_out") + ], + 3: [ + ("data_in", "data_out"), + ("data_out", "data_tmp"), + ("data_tmp", "data_out"), + ], + } + else: + # TODO + raise NotImplementedError("For now, data_tmp has to be allocated") + + def _get_swapped_arrays(self, i): + """ + Get the input and output arrays to use when using a "swap pattern". + Swapping refs enables to avoid copies between temp. array and output. + For example, a separable 2D->1D convolution on 2D data reads: + data_tmp = convol(data_input, kernel, axis=1) # step i=0 + data_out = convol(data_tmp, kernel, axis=0) # step i=1 + + :param i: current step number of the separable convolution + """ + if self.use_textures: + # copy is needed when using texture, as data_out is a Buffer + if i > 0: + self.transfer_to_texture(self.data_out, self.data_in_tex) + return self.data_in_tex, self.data_out + n_batchs = len(self.axes) + in_ref, out_ref = self.swap_pattern[n_batchs][i] + d_in = getattr(self, in_ref) + d_out = getattr(self, out_ref) + return d_in, d_out + + def _configure_kernel_args(self, opencl_kernel_args, input_ref, output_ref): + # TODO more elegant + if isinstance(input_ref, parray.Array): + input_ref = input_ref.data + if isinstance(output_ref, parray.Array): + output_ref = output_ref.data + if input_ref is not None or output_ref is not None: + opencl_kernel_args = list(opencl_kernel_args) + if input_ref is not None: + opencl_kernel_args[3] = input_ref + if output_ref is not None: + opencl_kernel_args[4] = output_ref + opencl_kernel_args = tuple(opencl_kernel_args) + return opencl_kernel_args + + @staticmethod + def _check_dimensions(arr=None, shape=None, name="", dim_min=1, dim_max=3): + if shape is not None: + ndim = len(shape) + elif arr is not None: + ndim = arr.ndim + else: + raise ValueError("Please provide either arr= or shape=") + if ndim < dim_min or ndim > dim_max: + raise ValueError("%s dimensions should be between %d and %d" + % (name, dim_min, dim_max) + ) + return ndim + + def _check_array(self, arr): + # TODO allow cl.Buffer + if not(isinstance(arr, parray.Array) or isinstance(arr, np.ndarray)): + raise TypeError("Expected either pyopencl.array.Array or numpy.ndarray") + # TODO composition with ImageProcessing/cast + if arr.dtype != np.float32: + raise TypeError("Data must be float32") + if arr.shape != self.shape: + raise ValueError("Expected data shape = %s" % str(self.shape)) + + def _set_arrays(self, array, output=None): + # When using textures: copy + if self.use_textures: + self.transfer_to_texture(array, self.data_in_tex) + data_in_ref = self.data_in_tex + else: + # Otherwise: copy H->D or update references. + if isinstance(array, np.ndarray): + self.data_in[:] = array[:] + else: + self._old_input_ref = self.data_in + self.data_in = array + data_in_ref = self.data_in + if output is not None: + if not(isinstance(output, np.ndarray)): + self._old_output_ref = self.data_out + self.data_out = output + # Update OpenCL kernel arguments with new array references + self.kernel_args = self._configure_kernel_args( + self.kernel_args, + data_in_ref, + self.data_out + ) + + def _separable_convolution(self): + assert len(self.axes) == len(self.use_case_kernels) + # Separable: one kernel call per data dimension + for i, axis in enumerate(self.axes): + in_ref, out_ref = self._get_swapped_arrays(i) + self._batched_convolution(axis, input_ref=in_ref, output_ref=out_ref) + + def _batched_convolution(self, axis, input_ref=None, output_ref=None): + # Batched: one kernel call in total + opencl_kernel = self.kernels.get_kernel(self.use_case_kernels[axis]) + opencl_kernel_args = self._configure_kernel_args( + self.kernel_args, + input_ref, + output_ref + ) + ev = opencl_kernel(*opencl_kernel_args) + if self.profile: + self.events.append(EventDescription("batched convolution", ev)) + + def _nd_convolution(self): + assert len(self.use_case_kernels) == 1 + opencl_kernel = self.kernels.get_kernel(self.use_case_kernels[0]) + ev = opencl_kernel(*self.kernel_args) + if self.profile: + self.events.append(EventDescription("ND convolution", ev)) + + def _recover_arrays_references(self): + if self._old_input_ref is not None: + self.data_in = self._old_input_ref + self._old_input_ref = None + if self._old_output_ref is not None: + self.data_out = self._old_output_ref + self._old_output_ref = None + self.kernel_args = self._configure_kernel_args( + self.kernel_args, + self.data_in, + self.data_out + ) + + def _get_output(self, output): + if output is None: + res = self.data_out.get() + else: + res = output + if isinstance(output, np.ndarray): + output[:] = self.data_out[:] + self._recover_arrays_references() + return res + + def convolve(self, array, output=None): + """ + Convolve an array with the class kernel. + + :param array: Input array. Can be numpy.ndarray or pyopencl.array.Array. + :param output: Output array. Can be numpy.ndarray or pyopencl.array.Array. + """ + self._check_array(array) + self._set_arrays(array, output=output) + if self.axes is not None: + if self.separable: + self._separable_convolution() + elif self.batched: + assert len(self.axes) == 1 + self._batched_convolution(self.axes[0]) + # else: ND-ND convol + else: + # ND-ND convol + self._nd_convolution() + + res = self._get_output(output) + return res + + + __call__ = convolve + + +def gaussian_kernel(sigma, cutoff=4, force_odd_size=False): + """ + Generates a Gaussian convolution kernel. + + :param sigma: Standard Deviation of the Gaussian curve. + :param cutoff: Parameter tuning the truncation of the Gaussian. + The higher cutoff, the biggest the array will be (and the closest to + a "true" Gaussian function). + :param force_odd_size: when set to True, the resulting array will always + have an odd size, regardless of the values of "sigma" and "cutoff". + :return: a numpy.ndarray containing the truncated Gaussian function. + The array size is 2*c*s+1 where c=cutoff, s=sigma. + + Nota: due to the quick decay of the Gaussian function, small values of the + "cutoff" parameter are usually fine. The energy difference between a + Gaussian truncated to [-c, c] and a "true" one is + erfc(c/(sqrt(2)*s)) + so choosing cutoff=4*sigma keeps the truncation error below 1e-4. + """ + size = int(ceil(2 * cutoff * sigma + 1)) + if force_odd_size and size % 2 == 0: + size += 1 + x = np.arange(size) - (size - 1.0) / 2.0 + g = np.exp(-(x / sigma) ** 2 / 2.0) + g /= g.sum() + return g diff --git a/silx/opencl/processing.py b/silx/opencl/processing.py index 045a9d3..707aa72 100644 --- a/silx/opencl/processing.py +++ b/silx/opencl/processing.py @@ -168,7 +168,7 @@ class OpenclProcessing(object): paramatrized buffers. :param use_array: allocate memory as pyopencl.array.Array instead of pyopencl.Buffer - + Note that an OpenCL context also requires some memory, as well as Event and other OpenCL functionalities which cannot and are not taken into account here. The memory required by a context @@ -290,6 +290,59 @@ class OpenclProcessing(object): else: self.queue = pyopencl.CommandQueue(self.ctx) + def profile_add(self, event, desc): + """ + Add an OpenCL event to the events lists, if profiling is enabled. + + :param event: silx.opencl.processing.EventDescription. + :param desc: event description + """ + if self.profile: + self.events.append(EventDescription(desc, event)) + + def allocate_texture(self, shape, hostbuf=None, support_1D=False): + """ + Allocate an OpenCL image ("texture"). + + :param shape: Shape of the image. Note that pyopencl and OpenCL < 1.2 + do not support 1D images, so 1D images are handled as 2D with one row + :param support_1D: force the image to be 1D if the shape has only one dim + """ + if len(shape) == 1 and not(support_1D): + shape = (1,) + shape + return pyopencl.Image( + self.ctx, + pyopencl.mem_flags.READ_ONLY | pyopencl.mem_flags.USE_HOST_PTR, + pyopencl.ImageFormat( + pyopencl.channel_order.INTENSITY, + pyopencl.channel_type.FLOAT + ), + hostbuf=numpy.zeros(shape[::-1], dtype=numpy.float32) + ) + + def transfer_to_texture(self, arr, tex_ref): + """ + Transfer an array to a texture. + + :param arr: Input array. Can be a numpy array or a pyopencl array. + :param tex_ref: texture reference (pyopencl._cl.Image). + """ + copy_args = [self.queue, tex_ref, arr] + shp = arr.shape + ndim = arr.ndim + if ndim == 1: + # pyopencl and OpenCL < 1.2 do not support image1d_t + # force 2D with one row in this case + #~ ndim = 2 + shp = (1,) + shp + copy_kwargs = {"origin":(0,) * ndim, "region": shp[::-1]} + if not(isinstance(arr, numpy.ndarray)): # assuming pyopencl.array.Array + # D->D copy + copy_args[2] = arr.data + copy_kwargs["offset"] = 0 + ev = pyopencl.enqueue_copy(*copy_args, **copy_kwargs) + self.profile_add(ev, "Transfer to texture") + def log_profile(self): """If we are in profiling mode, prints out all timing for every single OpenCL call """ @@ -328,7 +381,7 @@ class OpenclProcessing(object): def get_compiler_options(self, x87_volatile=False): """Provide the default OpenCL compiler options - + :param x87_volatile: needed for Kahan summation :return: string with compiler option """ diff --git a/silx/opencl/sinofilter.py b/silx/opencl/sinofilter.py index 3e4d69c..ad17acb 100644 --- a/silx/opencl/sinofilter.py +++ b/silx/opencl/sinofilter.py @@ -29,135 +29,21 @@ from __future__ import absolute_import, print_function, with_statement, division __authors__ = ["P. Paleo"] __license__ = "MIT" -__date__ = "01/02/2019" +__date__ = "07/06/2019" import numpy as np from math import pi -from itertools import product -from bisect import bisect -from .common import pyopencl as cl + import pyopencl.array as parray +from .common import pyopencl as cl from .processing import OpenclProcessing from ..math.fft import FFT from ..math.fft.clfft import __have_clfft__ +from ..image.tomography import generate_powers, get_next_power, compute_fourier_filter from ..utils.deprecation import deprecated -def nextpow2(N): - p = 1 - while p < N: - p *= 2 - return p - -def compute_ramlak_filter(dwidth_padded, dtype=np.float32): - """ - Compute the Ramachandran-Lakshminarayanan (Ram-Lak) filter, used in - filtered backprojection. - - :param dwidth_padded: width of the 2D sinogram after padding - :param dtype: data type - """ - #~ dwidth_padded = dwidth * 2 - L = dwidth_padded - h = np.zeros(L, dtype=dtype) - L2 = L//2+1 - h[0] = 1/4. - j = np.linspace(1, L2, L2//2, False).astype(dtype) # np < 1.9.0 - # h[2::2] = 0 - h[1:L2:2] = -1./(pi**2 * j**2) - # h[-1:L2-1:-2] = -1./(pi**2 * j**2) - h[L2:] = np.copy(h[1:L2-1][::-1]) - return h - - -def tukey(N, alpha=0.5): - """ - Compute the Tukey apodization window. - - :param int N: Number of points. - :param float alpha: - """ - apod = np.zeros(N) - x = np.arange(N)/(N-1) - r = alpha - M1 = (0 <= x) * (x < r/2) - M2 = (r/2 <= x) * (x <= 1 - r/2) - M3 = (1 - r/2 < x) * (x <= 1) - apod[M1] = (1 + np.cos(2*pi/r * (x[M1] - r/2)))/2. - apod[M2] = 1. - apod[M3] = (1 + np.cos(2*pi/r * (x[M3] - 1 + r/2)))/2. - return apod - - -def lanczos(N): - """ - Compute the Lanczos window (truncated sinc) of width N. - - :param int N: window width - """ - x = np.arange(N)/(N-1) - return np.sin(pi*(2*x-1))/(pi*(2*x-1)) - - -def compute_fourier_filter(dwidth_padded, filter_name, cutoff=1.): - """ - Compute the filter used for FBP. - - :param dwidth_padded: padded detector width. As the filtering is done by the - Fourier convolution theorem, dwidth_padded should be at least 2*dwidth. - :param filter_name: Name of the filter. Available filters are: - Ram-Lak, Shepp-Logan, Cosine, Hamming, Hann, Tukey, Lanczos. - :param cutoff: Cut-off frequency, if relevant. - """ - Nf = dwidth_padded - #~ filt_f = np.abs(np.fft.fftfreq(Nf)) - rl = compute_ramlak_filter(Nf, dtype=np.float64) - filt_f = np.fft.fft(rl) - - filter_name = filter_name.lower() - if filter_name in ["ram-lak", "ramlak"]: - return filt_f - - w = 2 * pi * np.fft.fftfreq(dwidth_padded) - d = cutoff - apodization = { - # ~OK - "shepp-logan": np.sin(w[1:Nf]/(2*d))/(w[1:Nf]/(2*d)), - # ~OK - "cosine": np.cos(w[1:Nf]/(2*d)), - # OK - "hamming": 0.54*np.ones_like(filt_f)[1:Nf] + .46 * np.cos(w[1:Nf]/d), - # OK - "hann": (np.ones_like(filt_f)[1:Nf] + np.cos(w[1:Nf]/d))/2., - # These one is not compatible with Astra - TODO investigate why - "tukey": np.fft.fftshift(tukey(dwidth_padded, alpha=d/2.))[1:Nf], - "lanczos": np.fft.fftshift(lanczos(dwidth_padded))[1:Nf], - } - if filter_name not in apodization: - raise ValueError("Unknown filter %s. Available filters are %s" % - (filter_name, str(apodization.keys()))) - filt_f[1:Nf] *= apodization[filter_name] - return filt_f - - -def generate_powers(): - """ - Generate a list of powers of [2, 3, 5, 7], - up to (2**15)*(3**9)*(5**6)*(7**5). - """ - primes = [2, 3, 5, 7] - maxpow = {2: 15, 3: 9, 5: 6, 7: 5} - valuations = [] - for prime in primes: - valuations.append(range(maxpow[prime]+1)) - powers = product(*valuations) - res = [] - for pw in powers: - res.append(np.prod(list(map(lambda x : x[0]**x[1], zip(primes, pw))))) - return np.unique(res) - - class SinoFilter(OpenclProcessing): """ @@ -213,10 +99,10 @@ class SinoFilter(OpenclProcessing): self.sino_shape = sino_shape self.n_angles = n_angles self.dwidth = dwidth - self.dwidth_padded = self.get_next_power(2*self.dwidth) + self.dwidth_padded = get_next_power(2 * self.dwidth, powers=self.powers) self.sino_padded_shape = (n_angles, self.dwidth_padded) sino_f_shape = list(self.sino_padded_shape) - sino_f_shape[-1] = sino_f_shape[-1]//2+1 + sino_f_shape[-1] = sino_f_shape[-1] // 2 + 1 self.sino_f_shape = tuple(sino_f_shape) def _init_extra_options(self, extra_options): @@ -268,16 +154,6 @@ class SinoFilter(OpenclProcessing): self.tmp_sino_device = parray.zeros(self.queue, self.sino_shape, "f") self.tmp_sino_host = np.zeros(self.sino_shape, "f") - def get_next_power(self, n): - """ - Given a number, get the closest (upper) number p such that - p is a power of 2, 3, 5 and 7. - """ - idx = bisect(self.powers, n) - if self.powers[idx-1] == n: - return n - return self.powers[idx] - def _compute_filter(self, filter_name): """ @@ -286,9 +162,9 @@ class SinoFilter(OpenclProcessing): self.filter_name = filter_name or "ram-lak" filter_f = compute_fourier_filter( self.dwidth_padded, - filter_name, + self.filter_name, cutoff=self.extra_options["cutoff"], - )[:self.dwidth_padded//2+1] # R2C + )[:self.dwidth_padded // 2 + 1] # R2C self.set_filter(filter_f, normalize=True) def set_filter(self, h_filt, normalize=True): @@ -313,22 +189,18 @@ class SinoFilter(OpenclProcessing): print("Warning: expected a complex Fourier filter") self.filter_f = h_filt if normalize: - self.filter_f *= pi/self.n_angles + self.filter_f *= pi / self.n_angles self.filter_f = self.filter_f.astype(np.complex64) self.d_filter_f[:] = self.filter_f[:] def _init_kernels(self): OpenclProcessing.compile_kernels(self, self.kernel_files) h, w = self.d_sino_f.shape - self.mult_kern_args = ( - self.queue, - np.int32(self.d_sino_f.shape[::-1]), - None, - self.d_sino_f.data, - self.d_filter_f.data, - np.int32(w), - np.int32(h) - ) + self.mult_kern_args = (self.queue, (int(w), (int(h))), None, + self.d_sino_f.data, + self.d_filter_f.data, + np.int32(w), + np.int32(h)) def check_array(self, arr): if arr.dtype != np.float32: @@ -350,18 +222,16 @@ class SinoFilter(OpenclProcessing): :param dst_offset: :param src_offset: """ - self.kernels.cpy2d( - self.queue, - np.int32(transfer_shape[::-1]), - None, - dst.data, - src.data, - np.int32(dst.shape[1]), - np.int32(src.shape[1]), - np.int32(dst_offset), - np.int32(src_offset), - np.int32(transfer_shape[::-1]) - ) + shape = tuple(int(i) for i in transfer_shape[::-1]) + ev = self.kernels.cpy2d(self.queue, shape, None, + dst.data, + src.data, + np.int32(dst.shape[1]), + np.int32(src.shape[1]), + np.int32(dst_offset), + np.int32(src_offset), + np.int32(transfer_shape[::-1])) + ev.wait() def copy2d_host(self, dst, src, transfer_shape, dst_offset=(0, 0), src_offset=(0, 0)): @@ -376,11 +246,10 @@ class SinoFilter(OpenclProcessing): s = transfer_shape do = dst_offset so = src_offset - dst[do[0]:do[0]+s[0], do[1]:do[1]+s[1]] = src[so[0]:so[0]+s[0], so[1]:so[1]+s[1]] + dst[do[0]:do[0] + s[0], do[1]:do[1] + s[1]] = src[so[0]:so[0] + s[0], so[1]:so[1] + s[1]] def _prepare_input_sino(self, sino): """ - :param sino: sinogram """ self.check_array(sino) @@ -401,7 +270,7 @@ class SinoFilter(OpenclProcessing): # Rectangular copy D->D self.copy2d(self.d_sino_padded, d_sino_ref, self.sino_shape) if self.is_cpu: - self.d_sino_padded.finish() + self.d_sino_padded.finish() # should not be required here else: # Numpy backend: FFT/mult/IFFT are done on host. if not(isinstance(sino, np.ndarray)): @@ -415,7 +284,6 @@ class SinoFilter(OpenclProcessing): def _get_output_sino(self, output): """ - :param Union[numpy.dtype,None] output: sinogram output. :return: sinogram """ @@ -433,14 +301,14 @@ class SinoFilter(OpenclProcessing): src=self.d_sino_padded, transfer_shape=self.sino_shape) if self.is_cpu: - self.tmp_sino_device.finish() - res[:] = self.tmp_sino_device[:] + self.tmp_sino_device.finish() # should not be required here + res[:] = self.tmp_sino_device.get()[:] else: if self.is_cpu: self.d_sino_padded.finish() self.copy2d(res, self.d_sino_padded, self.sino_shape) if self.is_cpu: - res.finish() + res.finish() # should not be required here else: if not(isinstance(res, np.ndarray)): # Numpy backend + pyopencl output: rect copy H->H + copy H->D @@ -467,11 +335,12 @@ class SinoFilter(OpenclProcessing): def _multiply_fourier(self): if self.fft_backend == "opencl": # Everything is on device. Call the multiplication kernel. - self.kernels.inplace_complex_mul_2Dby1D( + ev = self.kernels.inplace_complex_mul_2Dby1D( *self.mult_kern_args ) + ev.wait() if self.is_cpu: - self.d_sino_f.finish() + self.d_sino_f.finish() # should not be required here else: # Everything is on host. self.d_sino_f *= self.filter_f @@ -508,7 +377,7 @@ class SinoFilter(OpenclProcessing): # return res = self._get_output_sino(output) return res - #~ return output + # ~ return output __call__ = filter_sino @@ -519,6 +388,14 @@ class SinoFilter(OpenclProcessing): # - Compatibility - # ------------------- + +def nextpow2(N): + p = 1 + while p < N: + p *= 2 + return p + + @deprecated(replacement="Backprojection.sino_filter", since_version="0.10") def fourier_filter(sino, filter_=None, fft_size=None): """Simple np based implementation of fourier space filter. diff --git a/silx/opencl/sparse.py b/silx/opencl/sparse.py new file mode 100644 index 0000000..8bfaea8 --- /dev/null +++ b/silx/opencl/sparse.py @@ -0,0 +1,331 @@ +#!/usr/bin/env python +# coding: utf-8 +# /*########################################################################## +# +# Copyright (c) 2019 European Synchrotron Radiation Facility +# +# Permission is hereby granted, free of charge, to any person obtaining a copy +# of this software and associated documentation files (the "Software"), to deal +# in the Software without restriction, including without limitation the rights +# to use, copy, modify, merge, publish, distribute, sublicense, and/or sell +# copies of the Software, and to permit persons to whom the Software is +# furnished to do so, subject to the following conditions: +# +# The above copyright notice and this permission notice shall be included in +# all copies or substantial portions of the Software. +# +# THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR +# IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, +# FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE +# AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER +# LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, +# OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN +# THE SOFTWARE. +# +# ###########################################################################*/ +"""Module for data sparsification on CPU/GPU.""" + +from __future__ import absolute_import, print_function, with_statement, division + +__authors__ = ["P. Paleo"] +__license__ = "MIT" +__date__ = "07/06/2019" + +import numpy +import pyopencl.array as parray +from collections import namedtuple +from pyopencl.scan import GenericScanKernel +from .common import pyopencl as cl +from .processing import OpenclProcessing, EventDescription, BufferDescription +mf = cl.mem_flags + + +CSRData = namedtuple("CSRData", ["data", "indices", "indptr"]) + +def tuple_to_csrdata(arrs): + """ + Converts a 3-tuple to a CSRData namedtuple. + """ + if arrs is None: + return None + return CSRData(data=arrs[0], indices=arrs[1], indptr=arrs[2]) + + + +# only float32 arrays are supported for now +class CSR(OpenclProcessing): + kernel_files = ["sparse.cl"] + + def __init__(self, shape, max_nnz=None, ctx=None, devicetype="all", + platformid=None, deviceid=None, block_size=None, memory=None, + profile=False): + """ + Compute Compressed Sparse Row format of an image (2D matrix). + It is designed to be compatible with scipy.sparse.csr_matrix. + + :param shape: tuple + Matrix shape. + :param max_nnz: int, optional + Maximum number of non-zero elements. By default, the arrays "data" + and "indices" are allocated with prod(shape) elements, but + in practice a much lesser space is needed. + The number of non-zero items cannot be known in advance, but one can + estimate an upper-bound with this parameter to save memory. + + Opencl processing parameters + ----------------------------- + Please refer to the documentation of silx.opencl.processing.OpenclProcessing + for information on the other parameters. + """ + + OpenclProcessing.__init__(self, ctx=ctx, devicetype=devicetype, + platformid=platformid, deviceid=deviceid, + profile=profile) + self._set_parameters(shape, max_nnz) + self._allocate_memory() + self._setup_kernels() + + # -------------------------------------------------------------------------- + # -------------------------- Initialization -------------------------------- + # -------------------------------------------------------------------------- + + def _set_parameters(self, shape, max_nnz): + self.shape = shape + self.size = numpy.prod(shape) + self.indice_dtype = numpy.int32 # + assert len(shape) == 2 # + if max_nnz is None: + self.max_nnz = numpy.prod(shape) # worst case + else: + self.max_nnz = int(max_nnz) + + + def _allocate_memory(self): + self.is_cpu = (self.device.type == "CPU") # move to OpenclProcessing ? + self.buffers = [ + BufferDescription("array", (self.size,), numpy.float32, mf.READ_ONLY), + BufferDescription("data", (self.max_nnz,), numpy.float32, mf.READ_WRITE), + BufferDescription("indices", (self.max_nnz,), self.indice_dtype, mf.READ_WRITE), + BufferDescription("indptr", (self.shape[0]+1,), self.indice_dtype, mf.READ_WRITE), + ] + self.allocate_buffers(use_array=True) + for arr_name in ["array", "data", "indices", "indptr"]: + setattr(self, arr_name, self.cl_mem[arr_name]) + self.cl_mem[arr_name].fill(0) # allocate_buffers() uses empty() + self._old_array = self.array + self._old_data = self.data + self._old_indices = self.indices + self._old_indptr = self.indptr + + + def _setup_kernels(self): + self._setup_compaction_kernel() + self._setup_decompaction_kernel() + + + def _setup_compaction_kernel(self): + self.scan_kernel = GenericScanKernel( + self.ctx, self.indice_dtype, + arguments="__global float* data, __global float *data_compacted, __global int *indices, __global int* indptr", + input_expr="(fabs(data[i]) > 0.0f) ? 1 : 0", + scan_expr="a+b", neutral="0", + output_statement=""" + // item is the running sum of input_expr(i), i.e the cumsum of "nonzero" + if (prev_item != item) { + data_compacted[item-1] = data[i]; + indices[item-1] = GET_INDEX(i); + } + // The last cumsum element of each line of "nonzero" goes to inptr[i] + if ((i+1) % IMAGE_WIDTH == 0) { + indptr[(i/IMAGE_WIDTH)+1] = item; + } + """, + options="-DIMAGE_WIDTH=%d" % self.shape[1], + preamble="#define GET_INDEX(i) (i % IMAGE_WIDTH)", + ) + + + def _setup_decompaction_kernel(self): + OpenclProcessing.compile_kernels( + self, + self.kernel_files, + compile_options=["-DIMAGE_WIDTH=%d" % self.shape[1]] + ) + device = self.ctx.devices[0] + wg_x = min( + device.max_work_group_size, + 32, + self.kernels.max_workgroup_size("densify_csr") + ) + self._decomp_wg = (wg_x, 1) + self._decomp_grid = (self._decomp_wg[0], self.shape[0]) + + + # -------------------------------------------------------------------------- + # -------------------------- Array utils ----------------------------------- + # -------------------------------------------------------------------------- + + # TODO handle pyopencl Buffer + def check_array(self, arr): + """ + Check that provided array is compatible with current context. + + :param arr: numpy.ndarray or pyopencl.array.Array + 2D array in dense format. + """ + assert arr.size == self.size + assert arr.dtype == numpy.float32 + + + # TODO handle pyopencl Buffer + def check_sparse_arrays(self, csr_data): + """ + Check that the provided sparse arrays are compatible with the current + context. + + :param arrays: namedtuple CSRData. + It contains the arrays "data", "indices", "indptr" + """ + assert isinstance(csr_data, CSRData) + for arr in [csr_data.data, csr_data.indices, csr_data.indptr]: + assert arr.ndim == 1 + assert csr_data.data.size == self.max_nnz + assert csr_data.indices.size == self.max_nnz + assert csr_data.indptr.size == self.shape[0]+1 + assert csr_data.data.dtype == numpy.float32 + assert csr_data.indices.dtype == self.indice_dtype + assert csr_data.indptr.dtype == self.indice_dtype + + + def set_array(self, arr): + """ + Set the provided array as the current context 2D matrix. + + :param arr: numpy.ndarray or pyopencl.array.Array + 2D array in dense format. + """ + if arr is None: + return + self.check_array(arr) + # GenericScanKernel only supports 1D data + if isinstance(arr, parray.Array): + self._old_array = self.array + self.array = arr + elif isinstance(arr, numpy.ndarray): + self.array[:] = arr.ravel()[:] + else: + raise ValueError("Expected pyopencl array or numpy array") + + + def set_sparse_arrays(self, csr_data): + if csr_data is None: + return + self.check_sparse_arrays(csr_data) + for name, arr in {"data": csr_data.data, "indices": csr_data.indices, "indptr": csr_data.indptr}.items(): + # The current array is a device array. Don't copy, use it directly + if isinstance(arr, parray.Array): + setattr(self, "_old_" + name, getattr(self, name)) + setattr(self, name, arr) + # The current array is a numpy.ndarray: copy H2D + elif isinstance(arr, numpy.ndarray): + getattr(self, name)[:] = arr[:] + else: + raise ValueError("Unsupported array type: %s" % type(arr)) + + + def _recover_arrays_references(self): + """ + Recover the previous arrays references, and return the references of the + "current" arrays. + """ + array = self.array + data = self.data + indices = self.indices + indptr = self.indptr + for name in ["array", "data", "indices", "indptr"]: + # self.X = self._old_X + setattr(self, name, getattr(self, "_old_" + name)) + return array, (data, indices, indptr) + + + def get_sparse_arrays(self, output): + """ + Get the 2D dense array of the current context. + + :param output: tuple or None + tuple in the form (data, indices, indptr). These arrays have to be + compatible with the current context (size and data type). + The content of these arrays will be overwritten with the result of + the previous computation. + """ + numels = self.max_nnz + if output is None: + data = self.data.get()[:numels] + ind = self.indices.get()[:numels] + indptr = self.indptr.get() + res = (data, ind, indptr) + else: + res = output + return res + + + def get_array(self, output): + if output is None: + res = self.array.get().reshape(self.shape) + else: + res = output + return res + + # -------------------------------------------------------------------------- + # -------------------------- Compaction ------------------------------------ + # -------------------------------------------------------------------------- + + def sparsify(self, arr, output=None): + """ + Convert an image (2D matrix) into a CSR representation. + + :param arr: numpy.ndarray or pyopencl.array.Array + Input array. + :param output: tuple of pyopencl.array.Array, optional + If provided, this must be a tuple of 3 arrays (data, indices, indptr). + The content of each array is overwritten by the computation result. + """ + self.set_array(arr) + self.set_sparse_arrays(tuple_to_csrdata(output)) + evt = self.scan_kernel( + self.array, + self.data, + self.indices, + self.indptr, + ) + #~ evt.wait() + self.profile_add(evt, "sparsification kernel") + res = self.get_sparse_arrays(output) + self._recover_arrays_references() + return res + + # -------------------------------------------------------------------------- + # -------------------------- Decompaction ---------------------------------- + # -------------------------------------------------------------------------- + + def densify(self, data, indices, indptr, output=None): + self.set_sparse_arrays( + CSRData(data=data, indices=indices, indptr=indptr) + ) + self.set_array(output) + evt = self.kernels.densify_csr( + self.queue, + self._decomp_grid, + self._decomp_wg, + self.data.data, + self.indices.data, + self.indptr.data, + self.array.data, + numpy.int32(self.shape[0]), + ) + #~ evt.wait() + self.profile_add(evt, "desparsification kernel") + res = self.get_array(output) + self._recover_arrays_references() + return res + diff --git a/silx/opencl/test/__init__.py b/silx/opencl/test/__init__.py index f3121d5..2e90e66 100644 --- a/silx/opencl/test/__init__.py +++ b/silx/opencl/test/__init__.py @@ -38,6 +38,8 @@ from ..codec import test as test_codec from . import test_image from . import test_kahan from . import test_stats +from . import test_convolution +from . import test_sparse def suite(): @@ -52,6 +54,8 @@ def suite(): test_suite.addTests(test_image.suite()) test_suite.addTests(test_kahan.suite()) test_suite.addTests(test_stats.suite()) + test_suite.addTests(test_convolution.suite()) + test_suite.addTests(test_sparse.suite()) # Allow to remove sift from the project test_base_dir = os.path.dirname(__file__) sift_dir = os.path.join(test_base_dir, "..", "sift") diff --git a/silx/opencl/test/test_backprojection.py b/silx/opencl/test/test_backprojection.py index 7ab416d..b2f2070 100644 --- a/silx/opencl/test/test_backprojection.py +++ b/silx/opencl/test/test_backprojection.py @@ -45,7 +45,7 @@ except ImportError: from ..common import ocl if ocl: from .. import backprojection - from ..sinofilter import compute_fourier_filter + from ...image.tomography import compute_fourier_filter from silx.test.utils import utilstest logger = logging.getLogger(__name__) @@ -199,11 +199,30 @@ class TestFBP(unittest.TestCase): "Something wrong with FBP(filter=%s)" % filter_name ) + @unittest.skipUnless(ocl and mako, "pyopencl is missing") + def test_fbp_oddsize(self): + # Generate a 513-sinogram. + # The padded width will be nextpow(513*2). + # silx [0.10, 0.10.1] will give 1029, which makes R2C transform fail. + sino = np.pad(self.sino, ((0, 0), (1, 0)), mode='edge') + B = backprojection.Backprojection(sino.shape, axis_position=self.fbp.axis_pos+1) + res = B(sino) + # Compare with self.reference_rec. Tolerance is high as backprojector + # is not fully shift-invariant. + errmax = np.max(np.abs(clip_circle(res[1:, 1:] - self.reference_rec))) + self.assertLess( + errmax, 1.e-1, + "Something wrong with FBP on odd-sized sinogram" + ) + + + def suite(): testSuite = unittest.TestSuite() testSuite.addTest(TestFBP("test_fbp")) testSuite.addTest(TestFBP("test_fbp_filters")) + testSuite.addTest(TestFBP("test_fbp_oddsize")) return testSuite diff --git a/silx/opencl/test/test_convolution.py b/silx/opencl/test/test_convolution.py new file mode 100644 index 0000000..8acd385 --- /dev/null +++ b/silx/opencl/test/test_convolution.py @@ -0,0 +1,263 @@ +#!/usr/bin/env python +# coding: utf-8 +# /*########################################################################## +# +# Copyright (c) 2019 European Synchrotron Radiation Facility +# +# Permission is hereby granted, free of charge, to any person obtaining a copy +# of this software and associated documentation files (the "Software"), to deal +# in the Software without restriction, including without limitation the rights +# to use, copy, modify, merge, publish, distribute, sublicense, and/or sell +# copies of the Software, and to permit persons to whom the Software is +# furnished to do so, subject to the following conditions: +# +# The above copyright notice and this permission notice shall be included in +# all copies or substantial portions of the Software. +# +# THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR +# IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, +# FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE +# AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER +# LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, +# OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN +# THE SOFTWARE. +# +# ###########################################################################*/ + +""" +Test of the Convolution class. +""" + +from __future__ import division, print_function + +__authors__ = ["Pierre Paleo"] +__contact__ = "pierre.paleo@esrf.fr" +__license__ = "MIT" +__copyright__ = "2019 European Synchrotron Radiation Facility, Grenoble, France" +__date__ = "15/02/2019" + +import logging +from itertools import product +import numpy as np +from silx.utils.testutils import parameterize +try: + from scipy.ndimage import convolve, convolve1d + from scipy.misc import ascent + scipy_convolve = convolve + scipy_convolve1d = convolve1d +except ImportError: + scipy_convolve = None +import unittest +from ..common import ocl +if ocl: + import pyopencl as cl + import pyopencl.array as parray + from ..convolution import Convolution, gaussian_kernel +logger = logging.getLogger(__name__) + + +@unittest.skipUnless(ocl and scipy_convolve, "PyOpenCl/scipy is missing") +class TestConvolution(unittest.TestCase): + + @classmethod + def setUpClass(cls): + super(TestConvolution, cls).setUpClass() + cls.image = np.ascontiguousarray(ascent()[:, :511], dtype="f") + cls.data1d = cls.image[0] + cls.data2d = cls.image + cls.data3d = np.tile(cls.image[224:-224, 224:-224], (62, 1, 1)) + cls.kernel1d = gaussian_kernel(1.) + cls.kernel2d = np.outer(cls.kernel1d, cls.kernel1d) + cls.kernel3d = np.multiply.outer(cls.kernel2d, cls.kernel1d) + cls.ctx = ocl.create_context() + cls.tol = { + "1D": 1e-4, + "2D": 1e-3, + "3D": 1e-3, + } + + @classmethod + def tearDownClass(cls): + cls.data1d = cls.data2d = cls.data3d = cls.image = None + cls.kernel1d = cls.kernel2d = cls.kernel3d = None + + @staticmethod + def compare(arr1, arr2): + return np.max(np.abs(arr1 - arr2)) + + @staticmethod + def print_err(conv): + errmsg = str( + """ + Something wrong with %s + mode=%s, texture=%s + """ + % (conv.use_case_desc, conv.mode, conv.use_textures) + ) + return errmsg + + def __init__(self, methodName='runTest', param=None): + unittest.TestCase.__init__(self, methodName) + self.param = param + self.mode = param["boundary_handling"] + logger.debug( + """ + Testing convolution with boundary_handling=%s, + use_textures=%s, input_device=%s, output_device=%s + """ + % ( + self.mode, param["use_textures"], + param["input_on_device"], param["output_on_device"] + ) + ) + + def instantiate_convol(self, shape, kernel, axes=None): + if (self.mode == "constant") and ( + not(self.param["use_textures"]) + or (self.ctx.devices[0].type == cl._cl.device_type.CPU) + ): + self.skipTest("mode=constant not implemented without textures") + C = Convolution( + shape, kernel, + mode=self.mode, + ctx=self.ctx, + axes=axes, + extra_options={"dont_use_textures": not(self.param["use_textures"])} + ) + return C + + def get_data_and_kernel(self, test_name): + dims = { + "test_1D": (1, 1), + "test_separable_2D": (2, 1), + "test_separable_3D": (3, 1), + "test_nonseparable_2D": (2, 2), + "test_nonseparable_3D": (3, 3), + } + dim_data = { + 1: self.data1d, + 2: self.data2d, + 3: self.data3d + } + dim_kernel = { + 1: self.kernel1d, + 2: self.kernel2d, + 3: self.kernel3d, + } + dd, kd = dims[test_name] + return dim_data[dd], dim_kernel[kd] + + def get_reference_function(self, test_name): + ref_func = { + "test_1D": + lambda x, y : scipy_convolve1d(x, y, mode=self.mode), + "test_separable_2D": + lambda x, y : scipy_convolve1d( + scipy_convolve1d(x, y, mode=self.mode, axis=1), + y, mode=self.mode, axis=0 + ), + "test_separable_3D": + lambda x, y: scipy_convolve1d( + scipy_convolve1d( + scipy_convolve1d(x, y, mode=self.mode, axis=2), + y, mode=self.mode, axis=1), + y, mode=self.mode, axis=0 + ), + "test_nonseparable_2D": + lambda x, y: scipy_convolve(x, y, mode=self.mode), + "test_nonseparable_3D": + lambda x, y : scipy_convolve(x, y, mode=self.mode), + } + return ref_func[test_name] + + def template_test(self, test_name): + data, kernel = self.get_data_and_kernel(test_name) + conv = self.instantiate_convol(data.shape, kernel) + if self.param["input_on_device"]: + data_ref = parray.to_device(conv.queue, data) + else: + data_ref = data + if self.param["output_on_device"]: + d_res = parray.zeros_like(conv.data_out) + res = d_res + else: + res = None + res = conv(data_ref, output=res) + if self.param["output_on_device"]: + res = res.get() + ref_func = self.get_reference_function(test_name) + ref = ref_func(data, kernel) + metric = self.compare(res, ref) + logger.info("%s: max error = %.2e" % (test_name, metric)) + tol = self.tol[str("%dD" % kernel.ndim)] + self.assertLess(metric, tol, self.print_err(conv)) + + def test_1D(self): + self.template_test("test_1D") + + def test_separable_2D(self): + self.template_test("test_separable_2D") + + def test_separable_3D(self): + self.template_test("test_separable_3D") + + def test_nonseparable_2D(self): + self.template_test("test_nonseparable_2D") + + def test_nonseparable_3D(self): + self.template_test("test_nonseparable_3D") + + def test_batched_2D(self): + """ + Test batched (nonseparable) 2D convolution on 3D data. + In this test: batch along "z" (axis 0) + """ + data = self.data3d + kernel = self.kernel2d + conv = self.instantiate_convol(data.shape, kernel, axes=(0,)) + res = conv(data) # 3D + ref = scipy_convolve(data[0], kernel, mode=self.mode) # 2D + + std = np.std(res, axis=0) + std_max = np.max(np.abs(std)) + self.assertLess(std_max, self.tol["2D"], self.print_err(conv)) + metric = self.compare(res[0], ref) + logger.info("test_nonseparable_3D: max error = %.2e" % metric) + self.assertLess(metric, self.tol["2D"], self.print_err(conv)) + + +def test_convolution(): + boundary_handling_ = ["reflect", "nearest", "wrap", "constant"] + use_textures_ = [True, False] + input_on_device_ = [True, False] + output_on_device_ = [True, False] + testSuite = unittest.TestSuite() + + param_vals = list(product( + boundary_handling_, + use_textures_, + input_on_device_, + output_on_device_ + )) + for boundary_handling, use_textures, input_dev, output_dev in param_vals: + testcase = parameterize( + TestConvolution, + param={ + "boundary_handling": boundary_handling, + "input_on_device": input_dev, + "output_on_device": output_dev, + "use_textures": use_textures, + } + ) + testSuite.addTest(testcase) + return testSuite + + + +def suite(): + testSuite = test_convolution() + return testSuite + + +if __name__ == '__main__': + unittest.main(defaultTest="suite") diff --git a/silx/opencl/test/test_sparse.py b/silx/opencl/test/test_sparse.py new file mode 100644 index 0000000..56f1ba4 --- /dev/null +++ b/silx/opencl/test/test_sparse.py @@ -0,0 +1,192 @@ +#!/usr/bin/env python +# coding: utf-8 +# /*########################################################################## +# +# Copyright (c) 2018-2019 European Synchrotron Radiation Facility +# +# Permission is hereby granted, free of charge, to any person obtaining a copy +# of this software and associated documentation files (the "Software"), to deal +# in the Software without restriction, including without limitation the rights +# to use, copy, modify, merge, publish, distribute, sublicense, and/or sell +# copies of the Software, and to permit persons to whom the Software is +# furnished to do so, subject to the following conditions: +# +# The above copyright notice and this permission notice shall be included in +# all copies or substantial portions of the Software. +# +# THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR +# IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, +# FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE +# AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER +# LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, +# OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN +# THE SOFTWARE. +# +# ###########################################################################*/ +"""Test of the sparse module""" + +import numpy as np +import unittest +import logging +from itertools import product +from ..common import ocl +if ocl: + import pyopencl.array as parray + from silx.opencl.sparse import CSR +try: + import scipy.sparse as sp +except ImportError: + sp = None +logger = logging.getLogger(__name__) + + + +def generate_sparse_random_data( + shape=(1000,), + data_min=0, data_max=100, + density=0.1, + use_only_integers=True, + dtype="f"): + """ + Generate random sparse data where. + + Parameters + ------------ + shape: tuple + Output data shape. + data_min: int or float + Minimum value of data + data_max: int or float + Maximum value of data + density: float + Density of non-zero elements in the output data. + Low value of density mean low number of non-zero elements. + use_only_integers: bool + If set to True, the output data items will be primarily integers, + possibly casted to float if dtype is a floating-point type. + This can be used for ease of debugging. + dtype: str or numpy.dtype + Output data type + """ + mask = np.random.binomial(1, density, size=shape) + if use_only_integers: + d = np.random.randint(data_min, high=data_max, size=shape) + else: + d = data_min + (data_max - data_min) * np.random.rand(*shape) + return (d * mask).astype(dtype) + + + +@unittest.skipUnless(ocl and sp, "PyOpenCl/scipy is missing") +class TestCSR(unittest.TestCase): + """Test CSR format""" + + def setUp(self): + self.array = generate_sparse_random_data(shape=(512, 511)) + # Compute reference sparsification + a_s = sp.csr_matrix(self.array) + self.ref_data = a_s.data + self.ref_indices = a_s.indices + self.ref_indptr = a_s.indptr + self.ref_nnz = a_s.nnz + # Test possible configurations + input_on_device = [False, True] + output_on_device = [False, True] + self._test_configs = list(product(input_on_device, output_on_device)) + + + def test_sparsification(self): + for input_on_device, output_on_device in self._test_configs: + self._test_sparsification(input_on_device, output_on_device) + + + def _test_sparsification(self, input_on_device, output_on_device): + current_config = "input on device: %s, output on device: %s" % ( + str(input_on_device), str(output_on_device) + ) + # Sparsify on device + csr = CSR(self.array.shape) + if input_on_device: + # The array has to be flattened + arr = parray.to_device(csr.queue, self.array.ravel()) + else: + arr = self.array + if output_on_device: + d_data = parray.zeros_like(csr.data) + d_indices = parray.zeros_like(csr.indices) + d_indptr = parray.zeros_like(csr.indptr) + output = (d_data, d_indices, d_indptr) + else: + output = None + data, indices, indptr = csr.sparsify(arr, output=output) + if output_on_device: + data = data.get() + indices = indices.get() + indptr = indptr.get() + # Compare + nnz = self.ref_nnz + self.assertTrue( + np.allclose(data[:nnz], self.ref_data), + "something wrong with sparsified data (%s)" + % current_config + ) + self.assertTrue( + np.allclose(indices[:nnz], self.ref_indices), + "something wrong with sparsified indices (%s)" + % current_config + ) + self.assertTrue( + np.allclose(indptr, self.ref_indptr), + "something wrong with sparsified indices pointers (indptr) (%s)" + % current_config + ) + + + def test_desparsification(self): + for input_on_device, output_on_device in self._test_configs: + self._test_desparsification(input_on_device, output_on_device) + + + def _test_desparsification(self, input_on_device, output_on_device): + current_config = "input on device: %s, output on device: %s" % ( + str(input_on_device), str(output_on_device) + ) + # De-sparsify on device + csr = CSR(self.array.shape, max_nnz=self.ref_nnz) + if input_on_device: + data = parray.to_device(csr.queue, self.ref_data) + indices = parray.to_device(csr.queue, self.ref_indices) + indptr = parray.to_device(csr.queue, self.ref_indptr) + else: + data = self.ref_data + indices = self.ref_indices + indptr = self.ref_indptr + if output_on_device: + d_arr = parray.zeros_like(csr.array) + output = d_arr + else: + output = None + arr = csr.densify(data, indices, indptr, output=output) + if output_on_device: + arr = arr.get() + # Compare + self.assertTrue( + np.allclose(arr.reshape(self.array.shape), self.array), + "something wrong with densified data (%s)" + % current_config + ) + + + +def suite(): + suite = unittest.TestSuite() + suite.addTest( + unittest.defaultTestLoader.loadTestsFromTestCase(TestCSR) + ) + return suite + + +if __name__ == '__main__': + unittest.main(defaultTest="suite") + + |