summaryrefslogtreecommitdiff
path: root/silx/opencl
diff options
context:
space:
mode:
Diffstat (limited to 'silx/opencl')
-rw-r--r--silx/opencl/backprojection.py11
-rw-r--r--silx/opencl/common.py52
-rw-r--r--silx/opencl/convolution.py555
-rw-r--r--silx/opencl/processing.py57
-rw-r--r--silx/opencl/sinofilter.py205
-rw-r--r--silx/opencl/sparse.py331
-rw-r--r--silx/opencl/test/__init__.py4
-rw-r--r--silx/opencl/test/test_backprojection.py21
-rw-r--r--silx/opencl/test/test_convolution.py263
-rw-r--r--silx/opencl/test/test_sparse.py192
10 files changed, 1507 insertions, 184 deletions
diff --git a/silx/opencl/backprojection.py b/silx/opencl/backprojection.py
index c257872..5a4087b 100644
--- a/silx/opencl/backprojection.py
+++ b/silx/opencl/backprojection.py
@@ -154,6 +154,9 @@ class Backprojection(OpenclProcessing):
self.extra_options = {
"cutoff": 1.,
"use_numpy_fft": False,
+ # It is axis_pos - (num_bins-1)/2 in PyHST
+ "gpu_offset_x": 0., #self.axis_pos - (self.num_bins - 1) / 2.,
+ "gpu_offset_y": 0., #self.axis_pos - (self.num_bins - 1) / 2.
}
if extra_options is not None:
self.extra_options.update(extra_options)
@@ -221,10 +224,10 @@ class Backprojection(OpenclProcessing):
self.cl_mem["_d_slice"].data,
# d_sino (__read_only image2d_t or float*)
d_sino_ref,
- # gpu_offset_x (float32) # TODO custom ?
- -np.float32((self.num_bins - 1) / 2. - self.axis_pos),
- # gpu_offset_y (float32) # TODO custom ?
- -np.float32((self.num_bins - 1) / 2. - self.axis_pos),
+ # gpu_offset_x (float32) 
+ np.float32(self.extra_options["gpu_offset_x"]),
+ # gpu_offset_y (float32)
+ np.float32(self.extra_options["gpu_offset_y"]),
# d_cos (__global float32*)
self.cl_mem["d_cos"].data,
# d_sin (__global float32*)
diff --git a/silx/opencl/common.py b/silx/opencl/common.py
index 9a04035..8d31c8a 100644
--- a/silx/opencl/common.py
+++ b/silx/opencl/common.py
@@ -34,7 +34,7 @@ __author__ = "Jerome Kieffer"
__contact__ = "Jerome.Kieffer@ESRF.eu"
__license__ = "MIT"
__copyright__ = "2012-2017 European Synchrotron Radiation Facility, Grenoble, France"
-__date__ = "20/09/2018"
+__date__ = "07/06/2019"
__status__ = "stable"
__all__ = ["ocl", "pyopencl", "mf", "release_cl_buffers", "allocate_cl_buffers",
"measure_workgroup_size", "kernel_workgroup_size"]
@@ -158,6 +158,11 @@ class Device(object):
"Available\t:\t%s" % self.available]
return os.linesep.join(lst)
+ def set_unavailable(self):
+ """Use this method to flag a faulty device
+ """
+ self.available = False
+
class Platform(object):
"""
@@ -220,8 +225,16 @@ def _measure_workgroup_size(device_or_context, fast=False):
:return: maximum size for the workgroup
"""
if isinstance(device_or_context, pyopencl.Device):
- ctx = pyopencl.Context(devices=[device_or_context])
- device = device_or_context
+ try:
+ ctx = pyopencl.Context(devices=[device_or_context])
+ except pyopencl._cl.LogicError as error:
+ platform = device_or_context.platform
+ platformid = pyopencl.get_platforms().index(platform)
+ deviceid = platform.get_devices().index(device_or_context)
+ ocl.platforms[platformid].devices[deviceid].set_unavailable()
+ raise RuntimeError("Unable to create context on %s/%s: %s" % (platform, device_or_context, error))
+ else:
+ device = device_or_context
elif isinstance(device_or_context, pyopencl.Context):
ctx = device_or_context
device = device_or_context.devices[0]
@@ -379,6 +392,8 @@ class OpenCL(object):
best_found = None
for platformid, platform in enumerate(self.platforms):
for deviceid, device in enumerate(platform.devices):
+ if not device.available:
+ continue
if (dtype in ["ALL", "DEF"]) or (device.type == dtype):
if (memory is None) or (memory <= device.memory):
found = True
@@ -400,7 +415,7 @@ class OpenCL(object):
return None
def create_context(self, devicetype="ALL", useFp64=False, platformid=None,
- deviceid=None, cached=True, memory=None):
+ deviceid=None, cached=True, memory=None, extensions=None):
"""
Choose a device and initiate a context.
@@ -410,13 +425,20 @@ class OpenCL(object):
E.g.: If Nvidia driver is installed, GPU will succeed but CPU will fail.
The AMD SDK kit is required for CPU via OpenCL.
:param devicetype: string in ["cpu","gpu", "all", "acc"]
- :param useFp64: boolean specifying if double precision will be used
+ :param useFp64: boolean specifying if double precision will be used: deprecated use extensions=["cl_khr_fp64"]
:param platformid: integer
:param deviceid: integer
:param cached: True if we want to cache the context
:param memory: minimum amount of memory of the device
+ :param extensions: list of extensions to be present
:return: OpenCL context on the selected device
"""
+ if extensions is None:
+ extensions = []
+ if useFp64:
+ logger.warning("Deprecation: please select your device using the extension name!, i.e. extensions=['cl_khr_fp64']")
+ extensions.append('cl_khr_fp64')
+
if (platformid is not None) and (deviceid is not None):
platformid = int(platformid)
deviceid = int(deviceid)
@@ -425,20 +447,24 @@ class OpenCL(object):
pyopencl_ctx += [0] * (2 - len(pyopencl_ctx)) # pad with 0
platformid, deviceid = pyopencl_ctx
else:
- if useFp64:
- ids = ocl.select_device(type=devicetype, extensions=["cl_khr_int64_base_atomics"])
- else:
- ids = ocl.select_device(dtype=devicetype)
+ ids = ocl.select_device(type=devicetype, extensions=extensions)
if ids:
platformid, deviceid = ids
+ ctx = None
if (platformid is not None) and (deviceid is not None):
if (platformid, deviceid) in self.context_cache:
ctx = self.context_cache[(platformid, deviceid)]
else:
- ctx = pyopencl.Context(devices=[pyopencl.get_platforms()[platformid].get_devices()[deviceid]])
- if cached:
- self.context_cache[(platformid, deviceid)] = ctx
- else:
+ try:
+ ctx = pyopencl.Context(devices=[pyopencl.get_platforms()[platformid].get_devices()[deviceid]])
+ except pyopencl._cl.LogicError as error:
+ self.platforms[platformid].devices[deviceid].set_unavailable()
+ logger.warning("Unable to create context on %s/%s: %s", platformid, deviceid, error)
+ ctx = None
+ else:
+ if cached:
+ self.context_cache[(platformid, deviceid)] = ctx
+ if ctx is None:
logger.warning("Last chance to get an OpenCL device ... probably not the one requested")
ctx = pyopencl.create_some_context(interactive=False)
return ctx
diff --git a/silx/opencl/convolution.py b/silx/opencl/convolution.py
new file mode 100644
index 0000000..18232f4
--- /dev/null
+++ b/silx/opencl/convolution.py
@@ -0,0 +1,555 @@
+#!/usr/bin/env python
+# coding: utf-8
+# /*##########################################################################
+#
+# Copyright (c) 2019 European Synchrotron Radiation Facility
+#
+# Permission is hereby granted, free of charge, to any person obtaining a copy
+# of this software and associated documentation files (the "Software"), to deal
+# in the Software without restriction, including without limitation the rights
+# to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
+# copies of the Software, and to permit persons to whom the Software is
+# furnished to do so, subject to the following conditions:
+#
+# The above copyright notice and this permission notice shall be included in
+# all copies or substantial portions of the Software.
+#
+# THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+# IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+# FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
+# AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+# LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
+# OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN
+# THE SOFTWARE.
+#
+# ###########################################################################*/
+"""Module for convolution on CPU/GPU."""
+
+from __future__ import absolute_import, print_function, with_statement, division
+
+__authors__ = ["P. Paleo"]
+__license__ = "MIT"
+__date__ = "11/02/2019"
+
+import numpy as np
+from math import ceil
+from copy import copy # python2
+from .common import pyopencl as cl
+import pyopencl.array as parray
+from .processing import OpenclProcessing, EventDescription
+
+
+class ConvolutionInfos(object):
+ allowed_axes = {
+ "1D": [None],
+ "separable_2D_1D_2D": [None, (0, 1), (1, 0)],
+ "batched_1D_2D": [(0,), (1,)],
+ "separable_3D_1D_3D": [
+ None,
+ (0, 1, 2),
+ (1, 2, 0),
+ (2, 0, 1),
+ (2, 1, 0),
+ (1, 0, 2),
+ (0, 2, 1)
+ ],
+ "batched_1D_3D": [(0,), (1,), (2,)],
+ "batched_separable_2D_1D_3D": [(0,), (1,), (2,)], # unsupported (?)
+ "2D": [None],
+ "batched_2D_3D": [(0,), (1,), (2,)],
+ "separable_3D_2D_3D": [
+ (1, 0),
+ (0, 1),
+ (2, 0),
+ (0, 2),
+ (1, 2),
+ (2, 1),
+ ],
+ "3D": [None],
+ }
+ use_cases = {
+ (1, 1): {
+ "1D": {
+ "name": "1D convolution on 1D data",
+ "kernels": ["convol_1D_X"],
+ },
+ },
+ (2, 2): {
+ "2D": {
+ "name": "2D convolution on 2D data",
+ "kernels": ["convol_2D_XY"],
+ },
+ },
+ (3, 3): {
+ "3D": {
+ "name": "3D convolution on 3D data",
+ "kernels": ["convol_3D_XYZ"],
+ },
+ },
+ (2, 1): {
+ "separable_2D_1D_2D": {
+ "name": "Separable (2D->1D) convolution on 2D data",
+ "kernels": ["convol_1D_X", "convol_1D_Y"],
+ },
+ "batched_1D_2D": {
+ "name": "Batched 1D convolution on 2D data",
+ "kernels": ["convol_1D_X", "convol_1D_Y"],
+ },
+ },
+ (3, 1): {
+ "separable_3D_1D_3D": {
+ "name": "Separable (3D->1D) convolution on 3D data",
+ "kernels": ["convol_1D_X", "convol_1D_Y", "convol_1D_Z"],
+ },
+ "batched_1D_3D": {
+ "name": "Batched 1D convolution on 3D data",
+ "kernels": ["convol_1D_X", "convol_1D_Y", "convol_1D_Z"],
+ },
+ "batched_separable_2D_1D_3D": {
+ "name": "Batched separable (2D->1D) convolution on 3D data",
+ "kernels": ["convol_1D_X", "convol_1D_Y", "convol_1D_Z"],
+ },
+ },
+ (3, 2): {
+ "separable_3D_2D_3D": {
+ "name": "Separable (3D->2D) convolution on 3D data",
+ "kernels": ["convol_2D_XY", "convol_2D_XZ", "convol_2D_YZ"],
+ },
+ "batched_2D_3D": {
+ "name": "Batched 2D convolution on 3D data",
+ "kernels": ["convol_2D_XY", "convol_2D_XZ", "convol_2D_YZ"],
+ },
+ },
+ }
+
+
+class Convolution(OpenclProcessing):
+ """
+ A class for performing convolution on CPU/GPU with OpenCL.
+ """
+
+ def __init__(self, shape, kernel, axes=None, mode=None, ctx=None,
+ devicetype="all", platformid=None, deviceid=None,
+ profile=False, extra_options=None):
+ """Constructor of OpenCL Convolution.
+
+ :param shape: shape of the array.
+ :param kernel: convolution kernel (1D, 2D or 3D).
+ :param axes: axes along which the convolution is performed,
+ for batched convolutions.
+ :param mode: Boundary handling mode. Available modes are:
+ "reflect": cba|abcd|dcb
+ "nearest": aaa|abcd|ddd
+ "wrap": bcd|abcd|abc
+ "constant": 000|abcd|000
+ Default is "reflect".
+ :param ctx: actual working context, left to None for automatic
+ initialization from device type or platformid/deviceid
+ :param devicetype: type of device, can be "CPU", "GPU", "ACC" or "ALL"
+ :param platformid: integer with the platform_identifier, as given by
+ clinfo
+ :param deviceid: Integer with the device identifier, as given by clinfo
+ :param profile: switch on profiling to be able to profile at the kernel
+ level, store profiling elements (makes code slightly
+ slower)
+ :param extra_options: Advanced options (dict). Current options are:
+ "allocate_input_array": True,
+ "allocate_output_array": True,
+ "allocate_tmp_array": True,
+ "dont_use_textures": False,
+ """
+ OpenclProcessing.__init__(self, ctx=ctx, devicetype=devicetype,
+ platformid=platformid, deviceid=deviceid,
+ profile=profile)
+
+ self._configure_extra_options(extra_options)
+ self._determine_use_case(shape, kernel, axes)
+ self._allocate_memory(mode)
+ self._init_kernels()
+
+ def _configure_extra_options(self, extra_options):
+ self.extra_options = {
+ "allocate_input_array": True,
+ "allocate_output_array": True,
+ "allocate_tmp_array": True,
+ "dont_use_textures": False,
+ }
+ extra_opts = extra_options or {}
+ self.extra_options.update(extra_opts)
+ self.is_cpu = (self.device.type == "CPU")
+ self.use_textures = not(self.extra_options["dont_use_textures"])
+ self.use_textures *= not(self.is_cpu)
+
+ def _get_dimensions(self, shape, kernel):
+ self.shape = shape
+ self.data_ndim = self._check_dimensions(shape=shape, name="Data")
+ self.kernel_ndim = self._check_dimensions(arr=kernel, name="Kernel")
+ Nx = shape[-1]
+ if self.data_ndim >= 2:
+ Ny = shape[-2]
+ else:
+ Ny = 1
+ if self.data_ndim >= 3:
+ Nz = shape[-3]
+ else:
+ Nz = 1
+ self.Nx = np.int32(Nx)
+ self.Ny = np.int32(Ny)
+ self.Nz = np.int32(Nz)
+
+ def _determine_use_case(self, shape, kernel, axes):
+ """
+ Determine the convolution use case from the input/kernel shape, and axes.
+ """
+ self._get_dimensions(shape, kernel)
+ if self.kernel_ndim > self.data_ndim:
+ raise ValueError("Kernel dimensions cannot exceed data dimensions")
+ data_ndim = self.data_ndim
+ kernel_ndim = self.kernel_ndim
+ self.kernel = kernel.astype("f")
+
+ convol_infos = ConvolutionInfos()
+ k = (data_ndim, kernel_ndim)
+ if k not in convol_infos.use_cases:
+ raise ValueError(
+ "Cannot find a use case for data ndim = %d and kernel ndim = %d"
+ % (data_ndim, kernel_ndim)
+ )
+ possible_use_cases = convol_infos.use_cases[k]
+
+ self.use_case_name = None
+ for uc_name, uc_params in possible_use_cases.items():
+ if axes in convol_infos.allowed_axes[uc_name]:
+ self.use_case_name = uc_name
+ self.use_case_desc = uc_params["name"]
+ #~ self.use_case_kernels = uc_params["kernels"].copy()
+ self.use_case_kernels = copy(uc_params["kernels"]) # TODO use the above line once we get rid of python2
+ if self.use_case_name is None:
+ raise ValueError(
+ "Cannot find a use case for data ndim = %d, kernel ndim = %d and axes=%s"
+ % (data_ndim, kernel_ndim, str(axes))
+ )
+ # TODO implement this use case
+ if self.use_case_name == "batched_separable_2D_1D_3D":
+ raise NotImplementedError(
+ "The use case %s is not implemented"
+ % self.use_case_name
+ )
+ #
+ self.axes = axes
+ # Replace "axes=None" with an actual value (except for ND-ND)
+ allowed_axes = convol_infos.allowed_axes[self.use_case_name]
+ if len(allowed_axes) > 1:
+ # The default choice might impact perfs
+ self.axes = allowed_axes[0] or allowed_axes[1]
+ self.separable = self.use_case_name.startswith("separable")
+ self.batched = self.use_case_name.startswith("batched")
+ # Update kernel names when using textures
+ if self.use_textures:
+ for i, kern_name in enumerate(self.use_case_kernels):
+ self.use_case_kernels[i] = kern_name + "_tex"
+
+ def _allocate_memory(self, mode):
+ self.mode = mode or "reflect"
+ option_array_names = {
+ "allocate_input_array": "data_in",
+ "allocate_output_array": "data_out",
+ "allocate_tmp_array": "data_tmp",
+ }
+ # Nonseparable transforms do not need tmp array
+ if not(self.separable):
+ self.extra_options["allocate_tmp_array"] = False
+ # Allocate arrays
+ for option_name, array_name in option_array_names.items():
+ if self.extra_options[option_name]:
+ value = parray.zeros(self.queue, self.shape, "f")
+ else:
+ value = None
+ setattr(self, array_name, value)
+
+ if isinstance(self.kernel, np.ndarray):
+ self.d_kernel = parray.to_device(self.queue, self.kernel)
+ else:
+ if not(isinstance(self.kernel, parray.Array)):
+ raise ValueError("kernel must be either numpy array or pyopencl array")
+ self.d_kernel = self.kernel
+ self._old_input_ref = None
+ self._old_output_ref = None
+ if self.use_textures:
+ self._allocate_textures()
+ self._c_modes_mapping = {
+ "periodic": 2,
+ "wrap": 2,
+ "nearest": 1,
+ "replicate": 1,
+ "reflect": 0,
+ "constant": 3,
+ }
+ mp = self._c_modes_mapping
+ if self.mode.lower() not in mp:
+ raise ValueError(
+ """
+ Mode %s is not available for textures. Available modes are:
+ %s
+ """
+ % (self.mode, str(mp.keys()))
+ )
+ # TODO
+ if not(self.use_textures) and self.mode.lower() == "constant":
+ raise NotImplementedError(
+ "mode='constant' is not implemented without textures yet"
+ )
+ #
+ self._c_conv_mode = mp[self.mode]
+
+ def _allocate_textures(self):
+ self.data_in_tex = self.allocate_texture(self.shape)
+ self.d_kernel_tex = self.allocate_texture(self.kernel.shape)
+ self.transfer_to_texture(self.d_kernel, self.d_kernel_tex)
+
+ def _init_kernels(self):
+ if self.kernel_ndim > 1:
+ if np.abs(np.diff(self.kernel.shape)).max() > 0:
+ raise NotImplementedError(
+ "Non-separable convolution with non-square kernels is not implemented yet"
+ )
+ compile_options = [str("-DUSED_CONV_MODE=%d" % self._c_conv_mode)]
+ if self.use_textures:
+ kernel_files = ["convolution_textures.cl"]
+ compile_options.extend([
+ str("-DIMAGE_DIMS=%d" % self.data_ndim),
+ str("-DFILTER_DIMS=%d" % self.kernel_ndim),
+ ])
+ data_in_ref = self.data_in_tex
+ d_kernel_ref = self.d_kernel_tex
+ else:
+ kernel_files = ["convolution.cl"]
+ data_in_ref = self.data_in.data
+ d_kernel_ref = self.d_kernel.data
+ self.compile_kernels(
+ kernel_files=kernel_files,
+ compile_options=compile_options
+ )
+ self.ndrange = self.shape[::-1]
+ self.wg = None
+ kernel_args = [
+ self.queue,
+ self.ndrange, self.wg,
+ data_in_ref,
+ self.data_out.data,
+ d_kernel_ref,
+ np.int32(self.kernel.shape[0]),
+ self.Nx, self.Ny, self.Nz
+ ]
+ if self.kernel_ndim == 2:
+ kernel_args.insert(6, np.int32(self.kernel.shape[1]))
+ if self.kernel_ndim == 3:
+ kernel_args.insert(6, np.int32(self.kernel.shape[2]))
+ kernel_args.insert(7, np.int32(self.kernel.shape[1]))
+ self.kernel_args = tuple(kernel_args)
+ # If self.data_tmp is allocated, separable transforms can be performed
+ # by a series of batched transforms, without any copy, by swapping refs.
+ self.swap_pattern = None
+ if self.separable:
+ if self.data_tmp is not None:
+ self.swap_pattern = {
+ 2: [
+ ("data_in", "data_tmp"),
+ ("data_tmp", "data_out")
+ ],
+ 3: [
+ ("data_in", "data_out"),
+ ("data_out", "data_tmp"),
+ ("data_tmp", "data_out"),
+ ],
+ }
+ else:
+ # TODO
+ raise NotImplementedError("For now, data_tmp has to be allocated")
+
+ def _get_swapped_arrays(self, i):
+ """
+ Get the input and output arrays to use when using a "swap pattern".
+ Swapping refs enables to avoid copies between temp. array and output.
+ For example, a separable 2D->1D convolution on 2D data reads:
+ data_tmp = convol(data_input, kernel, axis=1) # step i=0
+ data_out = convol(data_tmp, kernel, axis=0) # step i=1
+
+ :param i: current step number of the separable convolution
+ """
+ if self.use_textures:
+ # copy is needed when using texture, as data_out is a Buffer
+ if i > 0:
+ self.transfer_to_texture(self.data_out, self.data_in_tex)
+ return self.data_in_tex, self.data_out
+ n_batchs = len(self.axes)
+ in_ref, out_ref = self.swap_pattern[n_batchs][i]
+ d_in = getattr(self, in_ref)
+ d_out = getattr(self, out_ref)
+ return d_in, d_out
+
+ def _configure_kernel_args(self, opencl_kernel_args, input_ref, output_ref):
+ # TODO more elegant
+ if isinstance(input_ref, parray.Array):
+ input_ref = input_ref.data
+ if isinstance(output_ref, parray.Array):
+ output_ref = output_ref.data
+ if input_ref is not None or output_ref is not None:
+ opencl_kernel_args = list(opencl_kernel_args)
+ if input_ref is not None:
+ opencl_kernel_args[3] = input_ref
+ if output_ref is not None:
+ opencl_kernel_args[4] = output_ref
+ opencl_kernel_args = tuple(opencl_kernel_args)
+ return opencl_kernel_args
+
+ @staticmethod
+ def _check_dimensions(arr=None, shape=None, name="", dim_min=1, dim_max=3):
+ if shape is not None:
+ ndim = len(shape)
+ elif arr is not None:
+ ndim = arr.ndim
+ else:
+ raise ValueError("Please provide either arr= or shape=")
+ if ndim < dim_min or ndim > dim_max:
+ raise ValueError("%s dimensions should be between %d and %d"
+ % (name, dim_min, dim_max)
+ )
+ return ndim
+
+ def _check_array(self, arr):
+ # TODO allow cl.Buffer
+ if not(isinstance(arr, parray.Array) or isinstance(arr, np.ndarray)):
+ raise TypeError("Expected either pyopencl.array.Array or numpy.ndarray")
+ # TODO composition with ImageProcessing/cast
+ if arr.dtype != np.float32:
+ raise TypeError("Data must be float32")
+ if arr.shape != self.shape:
+ raise ValueError("Expected data shape = %s" % str(self.shape))
+
+ def _set_arrays(self, array, output=None):
+ # When using textures: copy
+ if self.use_textures:
+ self.transfer_to_texture(array, self.data_in_tex)
+ data_in_ref = self.data_in_tex
+ else:
+ # Otherwise: copy H->D or update references.
+ if isinstance(array, np.ndarray):
+ self.data_in[:] = array[:]
+ else:
+ self._old_input_ref = self.data_in
+ self.data_in = array
+ data_in_ref = self.data_in
+ if output is not None:
+ if not(isinstance(output, np.ndarray)):
+ self._old_output_ref = self.data_out
+ self.data_out = output
+ # Update OpenCL kernel arguments with new array references
+ self.kernel_args = self._configure_kernel_args(
+ self.kernel_args,
+ data_in_ref,
+ self.data_out
+ )
+
+ def _separable_convolution(self):
+ assert len(self.axes) == len(self.use_case_kernels)
+ # Separable: one kernel call per data dimension
+ for i, axis in enumerate(self.axes):
+ in_ref, out_ref = self._get_swapped_arrays(i)
+ self._batched_convolution(axis, input_ref=in_ref, output_ref=out_ref)
+
+ def _batched_convolution(self, axis, input_ref=None, output_ref=None):
+ # Batched: one kernel call in total
+ opencl_kernel = self.kernels.get_kernel(self.use_case_kernels[axis])
+ opencl_kernel_args = self._configure_kernel_args(
+ self.kernel_args,
+ input_ref,
+ output_ref
+ )
+ ev = opencl_kernel(*opencl_kernel_args)
+ if self.profile:
+ self.events.append(EventDescription("batched convolution", ev))
+
+ def _nd_convolution(self):
+ assert len(self.use_case_kernels) == 1
+ opencl_kernel = self.kernels.get_kernel(self.use_case_kernels[0])
+ ev = opencl_kernel(*self.kernel_args)
+ if self.profile:
+ self.events.append(EventDescription("ND convolution", ev))
+
+ def _recover_arrays_references(self):
+ if self._old_input_ref is not None:
+ self.data_in = self._old_input_ref
+ self._old_input_ref = None
+ if self._old_output_ref is not None:
+ self.data_out = self._old_output_ref
+ self._old_output_ref = None
+ self.kernel_args = self._configure_kernel_args(
+ self.kernel_args,
+ self.data_in,
+ self.data_out
+ )
+
+ def _get_output(self, output):
+ if output is None:
+ res = self.data_out.get()
+ else:
+ res = output
+ if isinstance(output, np.ndarray):
+ output[:] = self.data_out[:]
+ self._recover_arrays_references()
+ return res
+
+ def convolve(self, array, output=None):
+ """
+ Convolve an array with the class kernel.
+
+ :param array: Input array. Can be numpy.ndarray or pyopencl.array.Array.
+ :param output: Output array. Can be numpy.ndarray or pyopencl.array.Array.
+ """
+ self._check_array(array)
+ self._set_arrays(array, output=output)
+ if self.axes is not None:
+ if self.separable:
+ self._separable_convolution()
+ elif self.batched:
+ assert len(self.axes) == 1
+ self._batched_convolution(self.axes[0])
+ # else: ND-ND convol
+ else:
+ # ND-ND convol
+ self._nd_convolution()
+
+ res = self._get_output(output)
+ return res
+
+
+ __call__ = convolve
+
+
+def gaussian_kernel(sigma, cutoff=4, force_odd_size=False):
+ """
+ Generates a Gaussian convolution kernel.
+
+ :param sigma: Standard Deviation of the Gaussian curve.
+ :param cutoff: Parameter tuning the truncation of the Gaussian.
+ The higher cutoff, the biggest the array will be (and the closest to
+ a "true" Gaussian function).
+ :param force_odd_size: when set to True, the resulting array will always
+ have an odd size, regardless of the values of "sigma" and "cutoff".
+ :return: a numpy.ndarray containing the truncated Gaussian function.
+ The array size is 2*c*s+1 where c=cutoff, s=sigma.
+
+ Nota: due to the quick decay of the Gaussian function, small values of the
+ "cutoff" parameter are usually fine. The energy difference between a
+ Gaussian truncated to [-c, c] and a "true" one is
+ erfc(c/(sqrt(2)*s))
+ so choosing cutoff=4*sigma keeps the truncation error below 1e-4.
+ """
+ size = int(ceil(2 * cutoff * sigma + 1))
+ if force_odd_size and size % 2 == 0:
+ size += 1
+ x = np.arange(size) - (size - 1.0) / 2.0
+ g = np.exp(-(x / sigma) ** 2 / 2.0)
+ g /= g.sum()
+ return g
diff --git a/silx/opencl/processing.py b/silx/opencl/processing.py
index 045a9d3..707aa72 100644
--- a/silx/opencl/processing.py
+++ b/silx/opencl/processing.py
@@ -168,7 +168,7 @@ class OpenclProcessing(object):
paramatrized buffers.
:param use_array: allocate memory as pyopencl.array.Array
instead of pyopencl.Buffer
-
+
Note that an OpenCL context also requires some memory, as well
as Event and other OpenCL functionalities which cannot and are
not taken into account here. The memory required by a context
@@ -290,6 +290,59 @@ class OpenclProcessing(object):
else:
self.queue = pyopencl.CommandQueue(self.ctx)
+ def profile_add(self, event, desc):
+ """
+ Add an OpenCL event to the events lists, if profiling is enabled.
+
+ :param event: silx.opencl.processing.EventDescription.
+ :param desc: event description
+ """
+ if self.profile:
+ self.events.append(EventDescription(desc, event))
+
+ def allocate_texture(self, shape, hostbuf=None, support_1D=False):
+ """
+ Allocate an OpenCL image ("texture").
+
+ :param shape: Shape of the image. Note that pyopencl and OpenCL < 1.2
+ do not support 1D images, so 1D images are handled as 2D with one row
+ :param support_1D: force the image to be 1D if the shape has only one dim
+ """
+ if len(shape) == 1 and not(support_1D):
+ shape = (1,) + shape
+ return pyopencl.Image(
+ self.ctx,
+ pyopencl.mem_flags.READ_ONLY | pyopencl.mem_flags.USE_HOST_PTR,
+ pyopencl.ImageFormat(
+ pyopencl.channel_order.INTENSITY,
+ pyopencl.channel_type.FLOAT
+ ),
+ hostbuf=numpy.zeros(shape[::-1], dtype=numpy.float32)
+ )
+
+ def transfer_to_texture(self, arr, tex_ref):
+ """
+ Transfer an array to a texture.
+
+ :param arr: Input array. Can be a numpy array or a pyopencl array.
+ :param tex_ref: texture reference (pyopencl._cl.Image).
+ """
+ copy_args = [self.queue, tex_ref, arr]
+ shp = arr.shape
+ ndim = arr.ndim
+ if ndim == 1:
+ # pyopencl and OpenCL < 1.2 do not support image1d_t
+ # force 2D with one row in this case
+ #~ ndim = 2
+ shp = (1,) + shp
+ copy_kwargs = {"origin":(0,) * ndim, "region": shp[::-1]}
+ if not(isinstance(arr, numpy.ndarray)): # assuming pyopencl.array.Array
+ # D->D copy
+ copy_args[2] = arr.data
+ copy_kwargs["offset"] = 0
+ ev = pyopencl.enqueue_copy(*copy_args, **copy_kwargs)
+ self.profile_add(ev, "Transfer to texture")
+
def log_profile(self):
"""If we are in profiling mode, prints out all timing for every single OpenCL call
"""
@@ -328,7 +381,7 @@ class OpenclProcessing(object):
def get_compiler_options(self, x87_volatile=False):
"""Provide the default OpenCL compiler options
-
+
:param x87_volatile: needed for Kahan summation
:return: string with compiler option
"""
diff --git a/silx/opencl/sinofilter.py b/silx/opencl/sinofilter.py
index 3e4d69c..ad17acb 100644
--- a/silx/opencl/sinofilter.py
+++ b/silx/opencl/sinofilter.py
@@ -29,135 +29,21 @@ from __future__ import absolute_import, print_function, with_statement, division
__authors__ = ["P. Paleo"]
__license__ = "MIT"
-__date__ = "01/02/2019"
+__date__ = "07/06/2019"
import numpy as np
from math import pi
-from itertools import product
-from bisect import bisect
-from .common import pyopencl as cl
+
import pyopencl.array as parray
+from .common import pyopencl as cl
from .processing import OpenclProcessing
from ..math.fft import FFT
from ..math.fft.clfft import __have_clfft__
+from ..image.tomography import generate_powers, get_next_power, compute_fourier_filter
from ..utils.deprecation import deprecated
-def nextpow2(N):
- p = 1
- while p < N:
- p *= 2
- return p
-
-def compute_ramlak_filter(dwidth_padded, dtype=np.float32):
- """
- Compute the Ramachandran-Lakshminarayanan (Ram-Lak) filter, used in
- filtered backprojection.
-
- :param dwidth_padded: width of the 2D sinogram after padding
- :param dtype: data type
- """
- #~ dwidth_padded = dwidth * 2
- L = dwidth_padded
- h = np.zeros(L, dtype=dtype)
- L2 = L//2+1
- h[0] = 1/4.
- j = np.linspace(1, L2, L2//2, False).astype(dtype) # np < 1.9.0
- # h[2::2] = 0
- h[1:L2:2] = -1./(pi**2 * j**2)
- # h[-1:L2-1:-2] = -1./(pi**2 * j**2)
- h[L2:] = np.copy(h[1:L2-1][::-1])
- return h
-
-
-def tukey(N, alpha=0.5):
- """
- Compute the Tukey apodization window.
-
- :param int N: Number of points.
- :param float alpha:
- """
- apod = np.zeros(N)
- x = np.arange(N)/(N-1)
- r = alpha
- M1 = (0 <= x) * (x < r/2)
- M2 = (r/2 <= x) * (x <= 1 - r/2)
- M3 = (1 - r/2 < x) * (x <= 1)
- apod[M1] = (1 + np.cos(2*pi/r * (x[M1] - r/2)))/2.
- apod[M2] = 1.
- apod[M3] = (1 + np.cos(2*pi/r * (x[M3] - 1 + r/2)))/2.
- return apod
-
-
-def lanczos(N):
- """
- Compute the Lanczos window (truncated sinc) of width N.
-
- :param int N: window width
- """
- x = np.arange(N)/(N-1)
- return np.sin(pi*(2*x-1))/(pi*(2*x-1))
-
-
-def compute_fourier_filter(dwidth_padded, filter_name, cutoff=1.):
- """
- Compute the filter used for FBP.
-
- :param dwidth_padded: padded detector width. As the filtering is done by the
- Fourier convolution theorem, dwidth_padded should be at least 2*dwidth.
- :param filter_name: Name of the filter. Available filters are:
- Ram-Lak, Shepp-Logan, Cosine, Hamming, Hann, Tukey, Lanczos.
- :param cutoff: Cut-off frequency, if relevant.
- """
- Nf = dwidth_padded
- #~ filt_f = np.abs(np.fft.fftfreq(Nf))
- rl = compute_ramlak_filter(Nf, dtype=np.float64)
- filt_f = np.fft.fft(rl)
-
- filter_name = filter_name.lower()
- if filter_name in ["ram-lak", "ramlak"]:
- return filt_f
-
- w = 2 * pi * np.fft.fftfreq(dwidth_padded)
- d = cutoff
- apodization = {
- # ~OK
- "shepp-logan": np.sin(w[1:Nf]/(2*d))/(w[1:Nf]/(2*d)),
- # ~OK
- "cosine": np.cos(w[1:Nf]/(2*d)),
- # OK
- "hamming": 0.54*np.ones_like(filt_f)[1:Nf] + .46 * np.cos(w[1:Nf]/d),
- # OK
- "hann": (np.ones_like(filt_f)[1:Nf] + np.cos(w[1:Nf]/d))/2.,
- # These one is not compatible with Astra - TODO investigate why
- "tukey": np.fft.fftshift(tukey(dwidth_padded, alpha=d/2.))[1:Nf],
- "lanczos": np.fft.fftshift(lanczos(dwidth_padded))[1:Nf],
- }
- if filter_name not in apodization:
- raise ValueError("Unknown filter %s. Available filters are %s" %
- (filter_name, str(apodization.keys())))
- filt_f[1:Nf] *= apodization[filter_name]
- return filt_f
-
-
-def generate_powers():
- """
- Generate a list of powers of [2, 3, 5, 7],
- up to (2**15)*(3**9)*(5**6)*(7**5).
- """
- primes = [2, 3, 5, 7]
- maxpow = {2: 15, 3: 9, 5: 6, 7: 5}
- valuations = []
- for prime in primes:
- valuations.append(range(maxpow[prime]+1))
- powers = product(*valuations)
- res = []
- for pw in powers:
- res.append(np.prod(list(map(lambda x : x[0]**x[1], zip(primes, pw)))))
- return np.unique(res)
-
-
class SinoFilter(OpenclProcessing):
"""
@@ -213,10 +99,10 @@ class SinoFilter(OpenclProcessing):
self.sino_shape = sino_shape
self.n_angles = n_angles
self.dwidth = dwidth
- self.dwidth_padded = self.get_next_power(2*self.dwidth)
+ self.dwidth_padded = get_next_power(2 * self.dwidth, powers=self.powers)
self.sino_padded_shape = (n_angles, self.dwidth_padded)
sino_f_shape = list(self.sino_padded_shape)
- sino_f_shape[-1] = sino_f_shape[-1]//2+1
+ sino_f_shape[-1] = sino_f_shape[-1] // 2 + 1
self.sino_f_shape = tuple(sino_f_shape)
def _init_extra_options(self, extra_options):
@@ -268,16 +154,6 @@ class SinoFilter(OpenclProcessing):
self.tmp_sino_device = parray.zeros(self.queue, self.sino_shape, "f")
self.tmp_sino_host = np.zeros(self.sino_shape, "f")
- def get_next_power(self, n):
- """
- Given a number, get the closest (upper) number p such that
- p is a power of 2, 3, 5 and 7.
- """
- idx = bisect(self.powers, n)
- if self.powers[idx-1] == n:
- return n
- return self.powers[idx]
-
def _compute_filter(self, filter_name):
"""
@@ -286,9 +162,9 @@ class SinoFilter(OpenclProcessing):
self.filter_name = filter_name or "ram-lak"
filter_f = compute_fourier_filter(
self.dwidth_padded,
- filter_name,
+ self.filter_name,
cutoff=self.extra_options["cutoff"],
- )[:self.dwidth_padded//2+1] # R2C
+ )[:self.dwidth_padded // 2 + 1] # R2C
self.set_filter(filter_f, normalize=True)
def set_filter(self, h_filt, normalize=True):
@@ -313,22 +189,18 @@ class SinoFilter(OpenclProcessing):
print("Warning: expected a complex Fourier filter")
self.filter_f = h_filt
if normalize:
- self.filter_f *= pi/self.n_angles
+ self.filter_f *= pi / self.n_angles
self.filter_f = self.filter_f.astype(np.complex64)
self.d_filter_f[:] = self.filter_f[:]
def _init_kernels(self):
OpenclProcessing.compile_kernels(self, self.kernel_files)
h, w = self.d_sino_f.shape
- self.mult_kern_args = (
- self.queue,
- np.int32(self.d_sino_f.shape[::-1]),
- None,
- self.d_sino_f.data,
- self.d_filter_f.data,
- np.int32(w),
- np.int32(h)
- )
+ self.mult_kern_args = (self.queue, (int(w), (int(h))), None,
+ self.d_sino_f.data,
+ self.d_filter_f.data,
+ np.int32(w),
+ np.int32(h))
def check_array(self, arr):
if arr.dtype != np.float32:
@@ -350,18 +222,16 @@ class SinoFilter(OpenclProcessing):
:param dst_offset:
:param src_offset:
"""
- self.kernels.cpy2d(
- self.queue,
- np.int32(transfer_shape[::-1]),
- None,
- dst.data,
- src.data,
- np.int32(dst.shape[1]),
- np.int32(src.shape[1]),
- np.int32(dst_offset),
- np.int32(src_offset),
- np.int32(transfer_shape[::-1])
- )
+ shape = tuple(int(i) for i in transfer_shape[::-1])
+ ev = self.kernels.cpy2d(self.queue, shape, None,
+ dst.data,
+ src.data,
+ np.int32(dst.shape[1]),
+ np.int32(src.shape[1]),
+ np.int32(dst_offset),
+ np.int32(src_offset),
+ np.int32(transfer_shape[::-1]))
+ ev.wait()
def copy2d_host(self, dst, src, transfer_shape, dst_offset=(0, 0),
src_offset=(0, 0)):
@@ -376,11 +246,10 @@ class SinoFilter(OpenclProcessing):
s = transfer_shape
do = dst_offset
so = src_offset
- dst[do[0]:do[0]+s[0], do[1]:do[1]+s[1]] = src[so[0]:so[0]+s[0], so[1]:so[1]+s[1]]
+ dst[do[0]:do[0] + s[0], do[1]:do[1] + s[1]] = src[so[0]:so[0] + s[0], so[1]:so[1] + s[1]]
def _prepare_input_sino(self, sino):
"""
-
:param sino: sinogram
"""
self.check_array(sino)
@@ -401,7 +270,7 @@ class SinoFilter(OpenclProcessing):
# Rectangular copy D->D
self.copy2d(self.d_sino_padded, d_sino_ref, self.sino_shape)
if self.is_cpu:
- self.d_sino_padded.finish()
+ self.d_sino_padded.finish() # should not be required here
else:
# Numpy backend: FFT/mult/IFFT are done on host.
if not(isinstance(sino, np.ndarray)):
@@ -415,7 +284,6 @@ class SinoFilter(OpenclProcessing):
def _get_output_sino(self, output):
"""
-
:param Union[numpy.dtype,None] output: sinogram output.
:return: sinogram
"""
@@ -433,14 +301,14 @@ class SinoFilter(OpenclProcessing):
src=self.d_sino_padded,
transfer_shape=self.sino_shape)
if self.is_cpu:
- self.tmp_sino_device.finish()
- res[:] = self.tmp_sino_device[:]
+ self.tmp_sino_device.finish() # should not be required here
+ res[:] = self.tmp_sino_device.get()[:]
else:
if self.is_cpu:
self.d_sino_padded.finish()
self.copy2d(res, self.d_sino_padded, self.sino_shape)
if self.is_cpu:
- res.finish()
+ res.finish() # should not be required here
else:
if not(isinstance(res, np.ndarray)):
# Numpy backend + pyopencl output: rect copy H->H + copy H->D
@@ -467,11 +335,12 @@ class SinoFilter(OpenclProcessing):
def _multiply_fourier(self):
if self.fft_backend == "opencl":
# Everything is on device. Call the multiplication kernel.
- self.kernels.inplace_complex_mul_2Dby1D(
+ ev = self.kernels.inplace_complex_mul_2Dby1D(
*self.mult_kern_args
)
+ ev.wait()
if self.is_cpu:
- self.d_sino_f.finish()
+ self.d_sino_f.finish() # should not be required here
else:
# Everything is on host.
self.d_sino_f *= self.filter_f
@@ -508,7 +377,7 @@ class SinoFilter(OpenclProcessing):
# return
res = self._get_output_sino(output)
return res
- #~ return output
+ # ~ return output
__call__ = filter_sino
@@ -519,6 +388,14 @@ class SinoFilter(OpenclProcessing):
# - Compatibility -
# -------------------
+
+def nextpow2(N):
+ p = 1
+ while p < N:
+ p *= 2
+ return p
+
+
@deprecated(replacement="Backprojection.sino_filter", since_version="0.10")
def fourier_filter(sino, filter_=None, fft_size=None):
"""Simple np based implementation of fourier space filter.
diff --git a/silx/opencl/sparse.py b/silx/opencl/sparse.py
new file mode 100644
index 0000000..8bfaea8
--- /dev/null
+++ b/silx/opencl/sparse.py
@@ -0,0 +1,331 @@
+#!/usr/bin/env python
+# coding: utf-8
+# /*##########################################################################
+#
+# Copyright (c) 2019 European Synchrotron Radiation Facility
+#
+# Permission is hereby granted, free of charge, to any person obtaining a copy
+# of this software and associated documentation files (the "Software"), to deal
+# in the Software without restriction, including without limitation the rights
+# to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
+# copies of the Software, and to permit persons to whom the Software is
+# furnished to do so, subject to the following conditions:
+#
+# The above copyright notice and this permission notice shall be included in
+# all copies or substantial portions of the Software.
+#
+# THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+# IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+# FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
+# AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+# LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
+# OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN
+# THE SOFTWARE.
+#
+# ###########################################################################*/
+"""Module for data sparsification on CPU/GPU."""
+
+from __future__ import absolute_import, print_function, with_statement, division
+
+__authors__ = ["P. Paleo"]
+__license__ = "MIT"
+__date__ = "07/06/2019"
+
+import numpy
+import pyopencl.array as parray
+from collections import namedtuple
+from pyopencl.scan import GenericScanKernel
+from .common import pyopencl as cl
+from .processing import OpenclProcessing, EventDescription, BufferDescription
+mf = cl.mem_flags
+
+
+CSRData = namedtuple("CSRData", ["data", "indices", "indptr"])
+
+def tuple_to_csrdata(arrs):
+ """
+ Converts a 3-tuple to a CSRData namedtuple.
+ """
+ if arrs is None:
+ return None
+ return CSRData(data=arrs[0], indices=arrs[1], indptr=arrs[2])
+
+
+
+# only float32 arrays are supported for now
+class CSR(OpenclProcessing):
+ kernel_files = ["sparse.cl"]
+
+ def __init__(self, shape, max_nnz=None, ctx=None, devicetype="all",
+ platformid=None, deviceid=None, block_size=None, memory=None,
+ profile=False):
+ """
+ Compute Compressed Sparse Row format of an image (2D matrix).
+ It is designed to be compatible with scipy.sparse.csr_matrix.
+
+ :param shape: tuple
+ Matrix shape.
+ :param max_nnz: int, optional
+ Maximum number of non-zero elements. By default, the arrays "data"
+ and "indices" are allocated with prod(shape) elements, but
+ in practice a much lesser space is needed.
+ The number of non-zero items cannot be known in advance, but one can
+ estimate an upper-bound with this parameter to save memory.
+
+ Opencl processing parameters
+ -----------------------------
+ Please refer to the documentation of silx.opencl.processing.OpenclProcessing
+ for information on the other parameters.
+ """
+
+ OpenclProcessing.__init__(self, ctx=ctx, devicetype=devicetype,
+ platformid=platformid, deviceid=deviceid,
+ profile=profile)
+ self._set_parameters(shape, max_nnz)
+ self._allocate_memory()
+ self._setup_kernels()
+
+ # --------------------------------------------------------------------------
+ # -------------------------- Initialization --------------------------------
+ # --------------------------------------------------------------------------
+
+ def _set_parameters(self, shape, max_nnz):
+ self.shape = shape
+ self.size = numpy.prod(shape)
+ self.indice_dtype = numpy.int32 #
+ assert len(shape) == 2 #
+ if max_nnz is None:
+ self.max_nnz = numpy.prod(shape) # worst case
+ else:
+ self.max_nnz = int(max_nnz)
+
+
+ def _allocate_memory(self):
+ self.is_cpu = (self.device.type == "CPU") # move to OpenclProcessing ?
+ self.buffers = [
+ BufferDescription("array", (self.size,), numpy.float32, mf.READ_ONLY),
+ BufferDescription("data", (self.max_nnz,), numpy.float32, mf.READ_WRITE),
+ BufferDescription("indices", (self.max_nnz,), self.indice_dtype, mf.READ_WRITE),
+ BufferDescription("indptr", (self.shape[0]+1,), self.indice_dtype, mf.READ_WRITE),
+ ]
+ self.allocate_buffers(use_array=True)
+ for arr_name in ["array", "data", "indices", "indptr"]:
+ setattr(self, arr_name, self.cl_mem[arr_name])
+ self.cl_mem[arr_name].fill(0) # allocate_buffers() uses empty()
+ self._old_array = self.array
+ self._old_data = self.data
+ self._old_indices = self.indices
+ self._old_indptr = self.indptr
+
+
+ def _setup_kernels(self):
+ self._setup_compaction_kernel()
+ self._setup_decompaction_kernel()
+
+
+ def _setup_compaction_kernel(self):
+ self.scan_kernel = GenericScanKernel(
+ self.ctx, self.indice_dtype,
+ arguments="__global float* data, __global float *data_compacted, __global int *indices, __global int* indptr",
+ input_expr="(fabs(data[i]) > 0.0f) ? 1 : 0",
+ scan_expr="a+b", neutral="0",
+ output_statement="""
+ // item is the running sum of input_expr(i), i.e the cumsum of "nonzero"
+ if (prev_item != item) {
+ data_compacted[item-1] = data[i];
+ indices[item-1] = GET_INDEX(i);
+ }
+ // The last cumsum element of each line of "nonzero" goes to inptr[i]
+ if ((i+1) % IMAGE_WIDTH == 0) {
+ indptr[(i/IMAGE_WIDTH)+1] = item;
+ }
+ """,
+ options="-DIMAGE_WIDTH=%d" % self.shape[1],
+ preamble="#define GET_INDEX(i) (i % IMAGE_WIDTH)",
+ )
+
+
+ def _setup_decompaction_kernel(self):
+ OpenclProcessing.compile_kernels(
+ self,
+ self.kernel_files,
+ compile_options=["-DIMAGE_WIDTH=%d" % self.shape[1]]
+ )
+ device = self.ctx.devices[0]
+ wg_x = min(
+ device.max_work_group_size,
+ 32,
+ self.kernels.max_workgroup_size("densify_csr")
+ )
+ self._decomp_wg = (wg_x, 1)
+ self._decomp_grid = (self._decomp_wg[0], self.shape[0])
+
+
+ # --------------------------------------------------------------------------
+ # -------------------------- Array utils -----------------------------------
+ # --------------------------------------------------------------------------
+
+ # TODO handle pyopencl Buffer
+ def check_array(self, arr):
+ """
+ Check that provided array is compatible with current context.
+
+ :param arr: numpy.ndarray or pyopencl.array.Array
+ 2D array in dense format.
+ """
+ assert arr.size == self.size
+ assert arr.dtype == numpy.float32
+
+
+ # TODO handle pyopencl Buffer
+ def check_sparse_arrays(self, csr_data):
+ """
+ Check that the provided sparse arrays are compatible with the current
+ context.
+
+ :param arrays: namedtuple CSRData.
+ It contains the arrays "data", "indices", "indptr"
+ """
+ assert isinstance(csr_data, CSRData)
+ for arr in [csr_data.data, csr_data.indices, csr_data.indptr]:
+ assert arr.ndim == 1
+ assert csr_data.data.size == self.max_nnz
+ assert csr_data.indices.size == self.max_nnz
+ assert csr_data.indptr.size == self.shape[0]+1
+ assert csr_data.data.dtype == numpy.float32
+ assert csr_data.indices.dtype == self.indice_dtype
+ assert csr_data.indptr.dtype == self.indice_dtype
+
+
+ def set_array(self, arr):
+ """
+ Set the provided array as the current context 2D matrix.
+
+ :param arr: numpy.ndarray or pyopencl.array.Array
+ 2D array in dense format.
+ """
+ if arr is None:
+ return
+ self.check_array(arr)
+ # GenericScanKernel only supports 1D data
+ if isinstance(arr, parray.Array):
+ self._old_array = self.array
+ self.array = arr
+ elif isinstance(arr, numpy.ndarray):
+ self.array[:] = arr.ravel()[:]
+ else:
+ raise ValueError("Expected pyopencl array or numpy array")
+
+
+ def set_sparse_arrays(self, csr_data):
+ if csr_data is None:
+ return
+ self.check_sparse_arrays(csr_data)
+ for name, arr in {"data": csr_data.data, "indices": csr_data.indices, "indptr": csr_data.indptr}.items():
+ # The current array is a device array. Don't copy, use it directly
+ if isinstance(arr, parray.Array):
+ setattr(self, "_old_" + name, getattr(self, name))
+ setattr(self, name, arr)
+ # The current array is a numpy.ndarray: copy H2D
+ elif isinstance(arr, numpy.ndarray):
+ getattr(self, name)[:] = arr[:]
+ else:
+ raise ValueError("Unsupported array type: %s" % type(arr))
+
+
+ def _recover_arrays_references(self):
+ """
+ Recover the previous arrays references, and return the references of the
+ "current" arrays.
+ """
+ array = self.array
+ data = self.data
+ indices = self.indices
+ indptr = self.indptr
+ for name in ["array", "data", "indices", "indptr"]:
+ # self.X = self._old_X
+ setattr(self, name, getattr(self, "_old_" + name))
+ return array, (data, indices, indptr)
+
+
+ def get_sparse_arrays(self, output):
+ """
+ Get the 2D dense array of the current context.
+
+ :param output: tuple or None
+ tuple in the form (data, indices, indptr). These arrays have to be
+ compatible with the current context (size and data type).
+ The content of these arrays will be overwritten with the result of
+ the previous computation.
+ """
+ numels = self.max_nnz
+ if output is None:
+ data = self.data.get()[:numels]
+ ind = self.indices.get()[:numels]
+ indptr = self.indptr.get()
+ res = (data, ind, indptr)
+ else:
+ res = output
+ return res
+
+
+ def get_array(self, output):
+ if output is None:
+ res = self.array.get().reshape(self.shape)
+ else:
+ res = output
+ return res
+
+ # --------------------------------------------------------------------------
+ # -------------------------- Compaction ------------------------------------
+ # --------------------------------------------------------------------------
+
+ def sparsify(self, arr, output=None):
+ """
+ Convert an image (2D matrix) into a CSR representation.
+
+ :param arr: numpy.ndarray or pyopencl.array.Array
+ Input array.
+ :param output: tuple of pyopencl.array.Array, optional
+ If provided, this must be a tuple of 3 arrays (data, indices, indptr).
+ The content of each array is overwritten by the computation result.
+ """
+ self.set_array(arr)
+ self.set_sparse_arrays(tuple_to_csrdata(output))
+ evt = self.scan_kernel(
+ self.array,
+ self.data,
+ self.indices,
+ self.indptr,
+ )
+ #~ evt.wait()
+ self.profile_add(evt, "sparsification kernel")
+ res = self.get_sparse_arrays(output)
+ self._recover_arrays_references()
+ return res
+
+ # --------------------------------------------------------------------------
+ # -------------------------- Decompaction ----------------------------------
+ # --------------------------------------------------------------------------
+
+ def densify(self, data, indices, indptr, output=None):
+ self.set_sparse_arrays(
+ CSRData(data=data, indices=indices, indptr=indptr)
+ )
+ self.set_array(output)
+ evt = self.kernels.densify_csr(
+ self.queue,
+ self._decomp_grid,
+ self._decomp_wg,
+ self.data.data,
+ self.indices.data,
+ self.indptr.data,
+ self.array.data,
+ numpy.int32(self.shape[0]),
+ )
+ #~ evt.wait()
+ self.profile_add(evt, "desparsification kernel")
+ res = self.get_array(output)
+ self._recover_arrays_references()
+ return res
+
diff --git a/silx/opencl/test/__init__.py b/silx/opencl/test/__init__.py
index f3121d5..2e90e66 100644
--- a/silx/opencl/test/__init__.py
+++ b/silx/opencl/test/__init__.py
@@ -38,6 +38,8 @@ from ..codec import test as test_codec
from . import test_image
from . import test_kahan
from . import test_stats
+from . import test_convolution
+from . import test_sparse
def suite():
@@ -52,6 +54,8 @@ def suite():
test_suite.addTests(test_image.suite())
test_suite.addTests(test_kahan.suite())
test_suite.addTests(test_stats.suite())
+ test_suite.addTests(test_convolution.suite())
+ test_suite.addTests(test_sparse.suite())
# Allow to remove sift from the project
test_base_dir = os.path.dirname(__file__)
sift_dir = os.path.join(test_base_dir, "..", "sift")
diff --git a/silx/opencl/test/test_backprojection.py b/silx/opencl/test/test_backprojection.py
index 7ab416d..b2f2070 100644
--- a/silx/opencl/test/test_backprojection.py
+++ b/silx/opencl/test/test_backprojection.py
@@ -45,7 +45,7 @@ except ImportError:
from ..common import ocl
if ocl:
from .. import backprojection
- from ..sinofilter import compute_fourier_filter
+ from ...image.tomography import compute_fourier_filter
from silx.test.utils import utilstest
logger = logging.getLogger(__name__)
@@ -199,11 +199,30 @@ class TestFBP(unittest.TestCase):
"Something wrong with FBP(filter=%s)" % filter_name
)
+ @unittest.skipUnless(ocl and mako, "pyopencl is missing")
+ def test_fbp_oddsize(self):
+ # Generate a 513-sinogram.
+ # The padded width will be nextpow(513*2).
+ # silx [0.10, 0.10.1] will give 1029, which makes R2C transform fail.
+ sino = np.pad(self.sino, ((0, 0), (1, 0)), mode='edge')
+ B = backprojection.Backprojection(sino.shape, axis_position=self.fbp.axis_pos+1)
+ res = B(sino)
+ # Compare with self.reference_rec. Tolerance is high as backprojector
+ # is not fully shift-invariant.
+ errmax = np.max(np.abs(clip_circle(res[1:, 1:] - self.reference_rec)))
+ self.assertLess(
+ errmax, 1.e-1,
+ "Something wrong with FBP on odd-sized sinogram"
+ )
+
+
+
def suite():
testSuite = unittest.TestSuite()
testSuite.addTest(TestFBP("test_fbp"))
testSuite.addTest(TestFBP("test_fbp_filters"))
+ testSuite.addTest(TestFBP("test_fbp_oddsize"))
return testSuite
diff --git a/silx/opencl/test/test_convolution.py b/silx/opencl/test/test_convolution.py
new file mode 100644
index 0000000..8acd385
--- /dev/null
+++ b/silx/opencl/test/test_convolution.py
@@ -0,0 +1,263 @@
+#!/usr/bin/env python
+# coding: utf-8
+# /*##########################################################################
+#
+# Copyright (c) 2019 European Synchrotron Radiation Facility
+#
+# Permission is hereby granted, free of charge, to any person obtaining a copy
+# of this software and associated documentation files (the "Software"), to deal
+# in the Software without restriction, including without limitation the rights
+# to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
+# copies of the Software, and to permit persons to whom the Software is
+# furnished to do so, subject to the following conditions:
+#
+# The above copyright notice and this permission notice shall be included in
+# all copies or substantial portions of the Software.
+#
+# THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+# IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+# FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
+# AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+# LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
+# OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN
+# THE SOFTWARE.
+#
+# ###########################################################################*/
+
+"""
+Test of the Convolution class.
+"""
+
+from __future__ import division, print_function
+
+__authors__ = ["Pierre Paleo"]
+__contact__ = "pierre.paleo@esrf.fr"
+__license__ = "MIT"
+__copyright__ = "2019 European Synchrotron Radiation Facility, Grenoble, France"
+__date__ = "15/02/2019"
+
+import logging
+from itertools import product
+import numpy as np
+from silx.utils.testutils import parameterize
+try:
+ from scipy.ndimage import convolve, convolve1d
+ from scipy.misc import ascent
+ scipy_convolve = convolve
+ scipy_convolve1d = convolve1d
+except ImportError:
+ scipy_convolve = None
+import unittest
+from ..common import ocl
+if ocl:
+ import pyopencl as cl
+ import pyopencl.array as parray
+ from ..convolution import Convolution, gaussian_kernel
+logger = logging.getLogger(__name__)
+
+
+@unittest.skipUnless(ocl and scipy_convolve, "PyOpenCl/scipy is missing")
+class TestConvolution(unittest.TestCase):
+
+ @classmethod
+ def setUpClass(cls):
+ super(TestConvolution, cls).setUpClass()
+ cls.image = np.ascontiguousarray(ascent()[:, :511], dtype="f")
+ cls.data1d = cls.image[0]
+ cls.data2d = cls.image
+ cls.data3d = np.tile(cls.image[224:-224, 224:-224], (62, 1, 1))
+ cls.kernel1d = gaussian_kernel(1.)
+ cls.kernel2d = np.outer(cls.kernel1d, cls.kernel1d)
+ cls.kernel3d = np.multiply.outer(cls.kernel2d, cls.kernel1d)
+ cls.ctx = ocl.create_context()
+ cls.tol = {
+ "1D": 1e-4,
+ "2D": 1e-3,
+ "3D": 1e-3,
+ }
+
+ @classmethod
+ def tearDownClass(cls):
+ cls.data1d = cls.data2d = cls.data3d = cls.image = None
+ cls.kernel1d = cls.kernel2d = cls.kernel3d = None
+
+ @staticmethod
+ def compare(arr1, arr2):
+ return np.max(np.abs(arr1 - arr2))
+
+ @staticmethod
+ def print_err(conv):
+ errmsg = str(
+ """
+ Something wrong with %s
+ mode=%s, texture=%s
+ """
+ % (conv.use_case_desc, conv.mode, conv.use_textures)
+ )
+ return errmsg
+
+ def __init__(self, methodName='runTest', param=None):
+ unittest.TestCase.__init__(self, methodName)
+ self.param = param
+ self.mode = param["boundary_handling"]
+ logger.debug(
+ """
+ Testing convolution with boundary_handling=%s,
+ use_textures=%s, input_device=%s, output_device=%s
+ """
+ % (
+ self.mode, param["use_textures"],
+ param["input_on_device"], param["output_on_device"]
+ )
+ )
+
+ def instantiate_convol(self, shape, kernel, axes=None):
+ if (self.mode == "constant") and (
+ not(self.param["use_textures"])
+ or (self.ctx.devices[0].type == cl._cl.device_type.CPU)
+ ):
+ self.skipTest("mode=constant not implemented without textures")
+ C = Convolution(
+ shape, kernel,
+ mode=self.mode,
+ ctx=self.ctx,
+ axes=axes,
+ extra_options={"dont_use_textures": not(self.param["use_textures"])}
+ )
+ return C
+
+ def get_data_and_kernel(self, test_name):
+ dims = {
+ "test_1D": (1, 1),
+ "test_separable_2D": (2, 1),
+ "test_separable_3D": (3, 1),
+ "test_nonseparable_2D": (2, 2),
+ "test_nonseparable_3D": (3, 3),
+ }
+ dim_data = {
+ 1: self.data1d,
+ 2: self.data2d,
+ 3: self.data3d
+ }
+ dim_kernel = {
+ 1: self.kernel1d,
+ 2: self.kernel2d,
+ 3: self.kernel3d,
+ }
+ dd, kd = dims[test_name]
+ return dim_data[dd], dim_kernel[kd]
+
+ def get_reference_function(self, test_name):
+ ref_func = {
+ "test_1D":
+ lambda x, y : scipy_convolve1d(x, y, mode=self.mode),
+ "test_separable_2D":
+ lambda x, y : scipy_convolve1d(
+ scipy_convolve1d(x, y, mode=self.mode, axis=1),
+ y, mode=self.mode, axis=0
+ ),
+ "test_separable_3D":
+ lambda x, y: scipy_convolve1d(
+ scipy_convolve1d(
+ scipy_convolve1d(x, y, mode=self.mode, axis=2),
+ y, mode=self.mode, axis=1),
+ y, mode=self.mode, axis=0
+ ),
+ "test_nonseparable_2D":
+ lambda x, y: scipy_convolve(x, y, mode=self.mode),
+ "test_nonseparable_3D":
+ lambda x, y : scipy_convolve(x, y, mode=self.mode),
+ }
+ return ref_func[test_name]
+
+ def template_test(self, test_name):
+ data, kernel = self.get_data_and_kernel(test_name)
+ conv = self.instantiate_convol(data.shape, kernel)
+ if self.param["input_on_device"]:
+ data_ref = parray.to_device(conv.queue, data)
+ else:
+ data_ref = data
+ if self.param["output_on_device"]:
+ d_res = parray.zeros_like(conv.data_out)
+ res = d_res
+ else:
+ res = None
+ res = conv(data_ref, output=res)
+ if self.param["output_on_device"]:
+ res = res.get()
+ ref_func = self.get_reference_function(test_name)
+ ref = ref_func(data, kernel)
+ metric = self.compare(res, ref)
+ logger.info("%s: max error = %.2e" % (test_name, metric))
+ tol = self.tol[str("%dD" % kernel.ndim)]
+ self.assertLess(metric, tol, self.print_err(conv))
+
+ def test_1D(self):
+ self.template_test("test_1D")
+
+ def test_separable_2D(self):
+ self.template_test("test_separable_2D")
+
+ def test_separable_3D(self):
+ self.template_test("test_separable_3D")
+
+ def test_nonseparable_2D(self):
+ self.template_test("test_nonseparable_2D")
+
+ def test_nonseparable_3D(self):
+ self.template_test("test_nonseparable_3D")
+
+ def test_batched_2D(self):
+ """
+ Test batched (nonseparable) 2D convolution on 3D data.
+ In this test: batch along "z" (axis 0)
+ """
+ data = self.data3d
+ kernel = self.kernel2d
+ conv = self.instantiate_convol(data.shape, kernel, axes=(0,))
+ res = conv(data) # 3D
+ ref = scipy_convolve(data[0], kernel, mode=self.mode) # 2D
+
+ std = np.std(res, axis=0)
+ std_max = np.max(np.abs(std))
+ self.assertLess(std_max, self.tol["2D"], self.print_err(conv))
+ metric = self.compare(res[0], ref)
+ logger.info("test_nonseparable_3D: max error = %.2e" % metric)
+ self.assertLess(metric, self.tol["2D"], self.print_err(conv))
+
+
+def test_convolution():
+ boundary_handling_ = ["reflect", "nearest", "wrap", "constant"]
+ use_textures_ = [True, False]
+ input_on_device_ = [True, False]
+ output_on_device_ = [True, False]
+ testSuite = unittest.TestSuite()
+
+ param_vals = list(product(
+ boundary_handling_,
+ use_textures_,
+ input_on_device_,
+ output_on_device_
+ ))
+ for boundary_handling, use_textures, input_dev, output_dev in param_vals:
+ testcase = parameterize(
+ TestConvolution,
+ param={
+ "boundary_handling": boundary_handling,
+ "input_on_device": input_dev,
+ "output_on_device": output_dev,
+ "use_textures": use_textures,
+ }
+ )
+ testSuite.addTest(testcase)
+ return testSuite
+
+
+
+def suite():
+ testSuite = test_convolution()
+ return testSuite
+
+
+if __name__ == '__main__':
+ unittest.main(defaultTest="suite")
diff --git a/silx/opencl/test/test_sparse.py b/silx/opencl/test/test_sparse.py
new file mode 100644
index 0000000..56f1ba4
--- /dev/null
+++ b/silx/opencl/test/test_sparse.py
@@ -0,0 +1,192 @@
+#!/usr/bin/env python
+# coding: utf-8
+# /*##########################################################################
+#
+# Copyright (c) 2018-2019 European Synchrotron Radiation Facility
+#
+# Permission is hereby granted, free of charge, to any person obtaining a copy
+# of this software and associated documentation files (the "Software"), to deal
+# in the Software without restriction, including without limitation the rights
+# to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
+# copies of the Software, and to permit persons to whom the Software is
+# furnished to do so, subject to the following conditions:
+#
+# The above copyright notice and this permission notice shall be included in
+# all copies or substantial portions of the Software.
+#
+# THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+# IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+# FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
+# AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+# LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
+# OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN
+# THE SOFTWARE.
+#
+# ###########################################################################*/
+"""Test of the sparse module"""
+
+import numpy as np
+import unittest
+import logging
+from itertools import product
+from ..common import ocl
+if ocl:
+ import pyopencl.array as parray
+ from silx.opencl.sparse import CSR
+try:
+ import scipy.sparse as sp
+except ImportError:
+ sp = None
+logger = logging.getLogger(__name__)
+
+
+
+def generate_sparse_random_data(
+ shape=(1000,),
+ data_min=0, data_max=100,
+ density=0.1,
+ use_only_integers=True,
+ dtype="f"):
+ """
+ Generate random sparse data where.
+
+ Parameters
+ ------------
+ shape: tuple
+ Output data shape.
+ data_min: int or float
+ Minimum value of data
+ data_max: int or float
+ Maximum value of data
+ density: float
+ Density of non-zero elements in the output data.
+ Low value of density mean low number of non-zero elements.
+ use_only_integers: bool
+ If set to True, the output data items will be primarily integers,
+ possibly casted to float if dtype is a floating-point type.
+ This can be used for ease of debugging.
+ dtype: str or numpy.dtype
+ Output data type
+ """
+ mask = np.random.binomial(1, density, size=shape)
+ if use_only_integers:
+ d = np.random.randint(data_min, high=data_max, size=shape)
+ else:
+ d = data_min + (data_max - data_min) * np.random.rand(*shape)
+ return (d * mask).astype(dtype)
+
+
+
+@unittest.skipUnless(ocl and sp, "PyOpenCl/scipy is missing")
+class TestCSR(unittest.TestCase):
+ """Test CSR format"""
+
+ def setUp(self):
+ self.array = generate_sparse_random_data(shape=(512, 511))
+ # Compute reference sparsification
+ a_s = sp.csr_matrix(self.array)
+ self.ref_data = a_s.data
+ self.ref_indices = a_s.indices
+ self.ref_indptr = a_s.indptr
+ self.ref_nnz = a_s.nnz
+ # Test possible configurations
+ input_on_device = [False, True]
+ output_on_device = [False, True]
+ self._test_configs = list(product(input_on_device, output_on_device))
+
+
+ def test_sparsification(self):
+ for input_on_device, output_on_device in self._test_configs:
+ self._test_sparsification(input_on_device, output_on_device)
+
+
+ def _test_sparsification(self, input_on_device, output_on_device):
+ current_config = "input on device: %s, output on device: %s" % (
+ str(input_on_device), str(output_on_device)
+ )
+ # Sparsify on device
+ csr = CSR(self.array.shape)
+ if input_on_device:
+ # The array has to be flattened
+ arr = parray.to_device(csr.queue, self.array.ravel())
+ else:
+ arr = self.array
+ if output_on_device:
+ d_data = parray.zeros_like(csr.data)
+ d_indices = parray.zeros_like(csr.indices)
+ d_indptr = parray.zeros_like(csr.indptr)
+ output = (d_data, d_indices, d_indptr)
+ else:
+ output = None
+ data, indices, indptr = csr.sparsify(arr, output=output)
+ if output_on_device:
+ data = data.get()
+ indices = indices.get()
+ indptr = indptr.get()
+ # Compare
+ nnz = self.ref_nnz
+ self.assertTrue(
+ np.allclose(data[:nnz], self.ref_data),
+ "something wrong with sparsified data (%s)"
+ % current_config
+ )
+ self.assertTrue(
+ np.allclose(indices[:nnz], self.ref_indices),
+ "something wrong with sparsified indices (%s)"
+ % current_config
+ )
+ self.assertTrue(
+ np.allclose(indptr, self.ref_indptr),
+ "something wrong with sparsified indices pointers (indptr) (%s)"
+ % current_config
+ )
+
+
+ def test_desparsification(self):
+ for input_on_device, output_on_device in self._test_configs:
+ self._test_desparsification(input_on_device, output_on_device)
+
+
+ def _test_desparsification(self, input_on_device, output_on_device):
+ current_config = "input on device: %s, output on device: %s" % (
+ str(input_on_device), str(output_on_device)
+ )
+ # De-sparsify on device
+ csr = CSR(self.array.shape, max_nnz=self.ref_nnz)
+ if input_on_device:
+ data = parray.to_device(csr.queue, self.ref_data)
+ indices = parray.to_device(csr.queue, self.ref_indices)
+ indptr = parray.to_device(csr.queue, self.ref_indptr)
+ else:
+ data = self.ref_data
+ indices = self.ref_indices
+ indptr = self.ref_indptr
+ if output_on_device:
+ d_arr = parray.zeros_like(csr.array)
+ output = d_arr
+ else:
+ output = None
+ arr = csr.densify(data, indices, indptr, output=output)
+ if output_on_device:
+ arr = arr.get()
+ # Compare
+ self.assertTrue(
+ np.allclose(arr.reshape(self.array.shape), self.array),
+ "something wrong with densified data (%s)"
+ % current_config
+ )
+
+
+
+def suite():
+ suite = unittest.TestSuite()
+ suite.addTest(
+ unittest.defaultTestLoader.loadTestsFromTestCase(TestCSR)
+ )
+ return suite
+
+
+if __name__ == '__main__':
+ unittest.main(defaultTest="suite")
+
+