diff options
Diffstat (limited to 'silx/opencl')
-rw-r--r-- | silx/opencl/common.py | 38 | ||||
-rw-r--r-- | silx/opencl/convolution.py | 124 | ||||
-rw-r--r-- | silx/opencl/linalg.py | 8 | ||||
-rw-r--r-- | silx/opencl/processing.py | 11 | ||||
-rw-r--r-- | silx/opencl/projection.py | 11 | ||||
-rw-r--r-- | silx/opencl/reconstruction.py | 33 | ||||
-rw-r--r-- | silx/opencl/sinofilter.py | 21 | ||||
-rw-r--r-- | silx/opencl/sparse.py | 82 | ||||
-rw-r--r-- | silx/opencl/test/test_addition.py | 5 | ||||
-rw-r--r-- | silx/opencl/test/test_convolution.py | 8 | ||||
-rw-r--r-- | silx/opencl/test/test_kahan.py | 8 | ||||
-rw-r--r-- | silx/opencl/test/test_linalg.py | 5 | ||||
-rw-r--r-- | silx/opencl/test/test_sparse.py | 85 | ||||
-rw-r--r-- | silx/opencl/utils.py | 93 |
14 files changed, 304 insertions, 228 deletions
diff --git a/silx/opencl/common.py b/silx/opencl/common.py index 8d31c8a..110d941 100644 --- a/silx/opencl/common.py +++ b/silx/opencl/common.py @@ -34,7 +34,7 @@ __author__ = "Jerome Kieffer" __contact__ = "Jerome.Kieffer@ESRF.eu" __license__ = "MIT" __copyright__ = "2012-2017 European Synchrotron Radiation Facility, Grenoble, France" -__date__ = "07/06/2019" +__date__ = "28/11/2019" __status__ = "stable" __all__ = ["ocl", "pyopencl", "mf", "release_cl_buffers", "allocate_cl_buffers", "measure_workgroup_size", "kernel_workgroup_size"] @@ -60,8 +60,14 @@ else: logger.warning("Unable to import pyOpenCl. Please install it from: http://pypi.python.org/pypi/pyopencl") pyopencl = None else: - import pyopencl.array as array - mf = pyopencl.mem_flags + try: + pyopencl.get_platforms() + except pyopencl.LogicError: + logger.warning("The module pyOpenCL has been imported but can't be used here") + pyopencl = None + else: + import pyopencl.array as array + mf = pyopencl.mem_flags if pyopencl is None: # Define default mem flags @@ -91,8 +97,8 @@ NVIDIA_FLOP_PER_CORE = {(1, 0): 24, # Guessed ! (6, 0): 128, # GP100 (6, 1): 128, # GP104 (6, 2): 128, # ? - (7, 0): 256, # Volta ? - (7, 1): 256, # Volta ? + (7, 0): 128, # Volta # measured on Telsa V100 + (7, 1): 128, # Volta ? } AMD_FLOP_PER_CORE = 160 # Measured on a M7820 10 core, 700MHz 1120GFlops @@ -254,7 +260,8 @@ def _measure_workgroup_size(device_or_context, fast=False): max_valid_wg = 1 data = numpy.random.random(shape).astype(numpy.float32) d_data = pyopencl.array.to_device(queue, data) - d_data_1 = pyopencl.array.zeros_like(d_data) + 1 + d_data_1 = pyopencl.array.empty_like(d_data) + d_data_1.fill(numpy.float32(1.0)) program = pyopencl.Program(ctx, get_opencl_code("addition")).build() if fast: @@ -318,10 +325,21 @@ class OpenCL(object): # pocl does not describe itself as a CPU ! devtype = "CPU" if len(devtype) > 3: - devtype = devtype[:3] - if _is_nvidia_gpu(pypl.vendor, devtype) and "compute_capability_major_nv" in dir(device): - comput_cap = device.compute_capability_major_nv, device.compute_capability_minor_nv - flop_core = NVIDIA_FLOP_PER_CORE.get(comput_cap, min(NVIDIA_FLOP_PER_CORE.values())) + if "GPU" in devtype: + devtype = "GPU" + elif "ACC" in devtype: + devtype = "ACC" + elif "CPU" in devtype: + devtype = "CPU" + else: + devtype = devtype[:3] + if _is_nvidia_gpu(device.vendor, devtype) and ("compute_capability_major_nv" in dir(device)): + try: + comput_cap = device.compute_capability_major_nv, device.compute_capability_minor_nv + except pyopencl.LogicError: + flop_core = FLOP_PER_CORE["GPU"] + else: + flop_core = NVIDIA_FLOP_PER_CORE.get(comput_cap, FLOP_PER_CORE["GPU"]) elif (pypl.vendor == "Advanced Micro Devices, Inc.") and (devtype == "GPU"): flop_core = AMD_FLOP_PER_CORE elif devtype == "CPU": diff --git a/silx/opencl/convolution.py b/silx/opencl/convolution.py index 18232f4..b8dd8f6 100644 --- a/silx/opencl/convolution.py +++ b/silx/opencl/convolution.py @@ -29,99 +29,14 @@ from __future__ import absolute_import, print_function, with_statement, division __authors__ = ["P. Paleo"] __license__ = "MIT" -__date__ = "11/02/2019" +__date__ = "01/08/2019" import numpy as np -from math import ceil from copy import copy # python2 from .common import pyopencl as cl import pyopencl.array as parray from .processing import OpenclProcessing, EventDescription - - -class ConvolutionInfos(object): - allowed_axes = { - "1D": [None], - "separable_2D_1D_2D": [None, (0, 1), (1, 0)], - "batched_1D_2D": [(0,), (1,)], - "separable_3D_1D_3D": [ - None, - (0, 1, 2), - (1, 2, 0), - (2, 0, 1), - (2, 1, 0), - (1, 0, 2), - (0, 2, 1) - ], - "batched_1D_3D": [(0,), (1,), (2,)], - "batched_separable_2D_1D_3D": [(0,), (1,), (2,)], # unsupported (?) - "2D": [None], - "batched_2D_3D": [(0,), (1,), (2,)], - "separable_3D_2D_3D": [ - (1, 0), - (0, 1), - (2, 0), - (0, 2), - (1, 2), - (2, 1), - ], - "3D": [None], - } - use_cases = { - (1, 1): { - "1D": { - "name": "1D convolution on 1D data", - "kernels": ["convol_1D_X"], - }, - }, - (2, 2): { - "2D": { - "name": "2D convolution on 2D data", - "kernels": ["convol_2D_XY"], - }, - }, - (3, 3): { - "3D": { - "name": "3D convolution on 3D data", - "kernels": ["convol_3D_XYZ"], - }, - }, - (2, 1): { - "separable_2D_1D_2D": { - "name": "Separable (2D->1D) convolution on 2D data", - "kernels": ["convol_1D_X", "convol_1D_Y"], - }, - "batched_1D_2D": { - "name": "Batched 1D convolution on 2D data", - "kernels": ["convol_1D_X", "convol_1D_Y"], - }, - }, - (3, 1): { - "separable_3D_1D_3D": { - "name": "Separable (3D->1D) convolution on 3D data", - "kernels": ["convol_1D_X", "convol_1D_Y", "convol_1D_Z"], - }, - "batched_1D_3D": { - "name": "Batched 1D convolution on 3D data", - "kernels": ["convol_1D_X", "convol_1D_Y", "convol_1D_Z"], - }, - "batched_separable_2D_1D_3D": { - "name": "Batched separable (2D->1D) convolution on 3D data", - "kernels": ["convol_1D_X", "convol_1D_Y", "convol_1D_Z"], - }, - }, - (3, 2): { - "separable_3D_2D_3D": { - "name": "Separable (3D->2D) convolution on 3D data", - "kernels": ["convol_2D_XY", "convol_2D_XZ", "convol_2D_YZ"], - }, - "batched_2D_3D": { - "name": "Batched 2D convolution on 3D data", - "kernels": ["convol_2D_XY", "convol_2D_XZ", "convol_2D_YZ"], - }, - }, - } - +from .utils import ConvolutionInfos class Convolution(OpenclProcessing): """ @@ -262,7 +177,8 @@ class Convolution(OpenclProcessing): # Allocate arrays for option_name, array_name in option_array_names.items(): if self.extra_options[option_name]: - value = parray.zeros(self.queue, self.shape, "f") + value = parray.empty(self.queue, self.shape, np.float32) + value.fill(np.float32(0.0)) else: value = None setattr(self, array_name, value) @@ -320,11 +236,9 @@ class Convolution(OpenclProcessing): str("-DIMAGE_DIMS=%d" % self.data_ndim), str("-DFILTER_DIMS=%d" % self.kernel_ndim), ]) - data_in_ref = self.data_in_tex d_kernel_ref = self.d_kernel_tex else: kernel_files = ["convolution.cl"] - data_in_ref = self.data_in.data d_kernel_ref = self.d_kernel.data self.compile_kernels( kernel_files=kernel_files, @@ -335,8 +249,8 @@ class Convolution(OpenclProcessing): kernel_args = [ self.queue, self.ndrange, self.wg, - data_in_ref, - self.data_out.data, + None, + None, d_kernel_ref, np.int32(self.kernel.shape[0]), self.Nx, self.Ny, self.Nz @@ -527,29 +441,3 @@ class Convolution(OpenclProcessing): __call__ = convolve -def gaussian_kernel(sigma, cutoff=4, force_odd_size=False): - """ - Generates a Gaussian convolution kernel. - - :param sigma: Standard Deviation of the Gaussian curve. - :param cutoff: Parameter tuning the truncation of the Gaussian. - The higher cutoff, the biggest the array will be (and the closest to - a "true" Gaussian function). - :param force_odd_size: when set to True, the resulting array will always - have an odd size, regardless of the values of "sigma" and "cutoff". - :return: a numpy.ndarray containing the truncated Gaussian function. - The array size is 2*c*s+1 where c=cutoff, s=sigma. - - Nota: due to the quick decay of the Gaussian function, small values of the - "cutoff" parameter are usually fine. The energy difference between a - Gaussian truncated to [-c, c] and a "true" one is - erfc(c/(sqrt(2)*s)) - so choosing cutoff=4*sigma keeps the truncation error below 1e-4. - """ - size = int(ceil(2 * cutoff * sigma + 1)) - if force_odd_size and size % 2 == 0: - size += 1 - x = np.arange(size) - (size - 1.0) / 2.0 - g = np.exp(-(x / sigma) ** 2 / 2.0) - g /= g.sum() - return g diff --git a/silx/opencl/linalg.py b/silx/opencl/linalg.py index fba3e9c..a64122a 100644 --- a/silx/opencl/linalg.py +++ b/silx/opencl/linalg.py @@ -29,7 +29,7 @@ from __future__ import absolute_import, print_function, with_statement, division __authors__ = ["P. Paleo"] __license__ = "MIT" -__date__ = "10/08/2017" +__date__ = "01/08/2019" import numpy as np @@ -63,8 +63,10 @@ class LinAlg(OpenclProcessing): platformid=platformid, deviceid=deviceid, profile=profile) - self.d_gradient = parray.zeros(self.queue, shape, np.complex64) - self.d_image = parray.zeros(self.queue, shape, np.float32) + self.d_gradient = parray.empty(self.queue, shape, np.complex64) + self.d_gradient.fill(np.complex64(0.0)) + self.d_image = parray.empty(self.queue, shape, np.float32) + self.d_image.fill(np.float32(0.0)) self.add_to_cl_mem({ "d_gradient": self.d_gradient, "d_image": self.d_image diff --git a/silx/opencl/processing.py b/silx/opencl/processing.py index 707aa72..6b475b9 100644 --- a/silx/opencl/processing.py +++ b/silx/opencl/processing.py @@ -41,7 +41,7 @@ __author__ = "Jerome Kieffer" __contact__ = "Jerome.Kieffer@ESRF.eu" __license__ = "MIT" __copyright__ = "European Synchrotron Radiation Facility, Grenoble, France" -__date__ = "11/01/2019" +__date__ = "05/08/2019" __status__ = "stable" @@ -152,9 +152,12 @@ class OpenclProcessing(object): def __del__(self): """Destructor: release all buffers and programs """ - self.reset_log() - self.free_kernels() - self.free_buffers() + try: + self.reset_log() + self.free_kernels() + self.free_buffers() + except Exception: + pass self.queue = None self.device = None self.ctx = None diff --git a/silx/opencl/projection.py b/silx/opencl/projection.py index 0505d80..da8752f 100644 --- a/silx/opencl/projection.py +++ b/silx/opencl/projection.py @@ -29,7 +29,7 @@ from __future__ import absolute_import, print_function, with_statement, division __authors__ = ["A. Mirone, P. Paleo"] __license__ = "MIT" -__date__ = "28/02/2018" +__date__ = "01/08/2019" import logging import numpy as np @@ -141,10 +141,11 @@ class Projection(OpenclProcessing): BufferDescription("d_strideJoseph", self._dimrecy * 2, np.int32, mf.READ_ONLY), BufferDescription("d_strideLine", self._dimrecy * 2, np.int32, mf.READ_ONLY), ] + d_axis_corrections = parray.empty(self.queue, self.nprojs, np.float32) + d_axis_corrections.fill(np.float32(0.0)) self.add_to_cl_mem( { - "d_axis_corrections": parray.zeros(self.queue, - self.nprojs, np.float32) + "d_axis_corrections": d_axis_corrections } ) self._tmp_extended_img = np.zeros((self.shape[0] + 2, self.shape[1] + 2), @@ -193,7 +194,9 @@ class Projection(OpenclProcessing): pyopencl.enqueue_copy(self.queue, self.cl_mem["d_angles"], angles2) def allocate_slice(self): - self.add_to_cl_mem({"d_slice": parray.zeros(self.queue, (self.shape[1] + 2, self.shape[1] + 2), np.float32)}) + ary = parray.zeros(self.queue, (self.shape[1] + 2, self.shape[1] + 2), np.float32) + ary.fill(0) + self.add_to_cl_mem({"d_slice": ary}) def allocate_textures(self): self.d_image_tex = pyopencl.Image( diff --git a/silx/opencl/reconstruction.py b/silx/opencl/reconstruction.py index cb68680..2c84aee 100644 --- a/silx/opencl/reconstruction.py +++ b/silx/opencl/reconstruction.py @@ -29,7 +29,7 @@ from __future__ import absolute_import, print_function, with_statement, division __authors__ = ["P. Paleo"] __license__ = "MIT" -__date__ = "19/09/2017" +__date__ = "01/08/2019" import logging import numpy as np @@ -98,19 +98,23 @@ class ReconstructionAlgorithm(OpenclProcessing): self.sino_shape = sino_shape self.is_cpu = self.backprojector.is_cpu # Arrays - self.d_data = parray.zeros(self.queue, sino_shape, dtype=np.float32) - self.d_sino = parray.zeros_like(self.d_data) - self.d_x = parray.zeros(self.queue, + self.d_data = parray.empty(self.queue, sino_shape, dtype=np.float32) + self.d_data.fill(0.0) + self.d_sino = parray.empty_like(self.d_data) + self.d_sino.fill(0.0) + self.d_x = parray.empty(self.queue, self.backprojector.slice_shape, dtype=np.float32) - self.d_x_old = parray.zeros_like(self.d_x) + self.d_x.fill(0.0) + self.d_x_old = parray.empty_like(self.d_x) + self.d_x_old.fill(0.0) self.add_to_cl_mem({ - "d_data": self.d_data, - "d_sino": self.d_sino, - "d_x": self.d_x, - "d_x_old": self.d_x_old, - }) + "d_data": self.d_data, + "d_sino": self.d_sino, + "d_x": self.d_x, + "d_x_old": self.d_x_old, + }) def proj(self, d_slice, d_sino): """ @@ -276,10 +280,13 @@ class TV(ReconstructionAlgorithm): ) # Additional arrays self.linalg.gradient(self.d_x) - self.d_p = parray.zeros_like(self.linalg.cl_mem["d_gradient"]) - self.d_q = parray.zeros_like(self.d_data) + self.d_p = parray.empty_like(self.linalg.cl_mem["d_gradient"]) + self.d_q = parray.empty_like(self.d_data) self.d_g = self.linalg.d_image - self.d_tmp = parray.zeros_like(self.d_x) + self.d_tmp = parray.empty_like(self.d_x) + self.d_p.fill(0) + self.d_q.fill(0) + self.d_tmp.fill(0) self.add_to_cl_mem({ "d_p": self.d_p, "d_q": self.d_q, diff --git a/silx/opencl/sinofilter.py b/silx/opencl/sinofilter.py index ad17acb..d608744 100644 --- a/silx/opencl/sinofilter.py +++ b/silx/opencl/sinofilter.py @@ -2,7 +2,7 @@ # coding: utf-8 # /*########################################################################## # -# Copyright (c) 2016 European Synchrotron Radiation Facility +# Copyright (c) 2016-2019 European Synchrotron Radiation Facility # # Permission is hereby granted, free of charge, to any person obtaining a copy # of this software and associated documentation files (the "Software"), to deal @@ -38,19 +38,20 @@ from math import pi import pyopencl.array as parray from .common import pyopencl as cl from .processing import OpenclProcessing -from ..math.fft import FFT -from ..math.fft.clfft import __have_clfft__ +from ..math.fft.clfft import CLFFT, __have_clfft__ +from ..math.fft.npfft import NPFFT from ..image.tomography import generate_powers, get_next_power, compute_fourier_filter from ..utils.deprecation import deprecated class SinoFilter(OpenclProcessing): - """ - A class for performing sinogram filtering on GPU using OpenCL. + """A class for performing sinogram filtering on GPU using OpenCL. + This is a convolution in the Fourier space, along one dimension: - - In 2D: (n_a, d_x): n_a filterings (1D FFT of size d_x) - - In 3D: (n_z, n_a, d_x): n_z*n_a filterings (1D FFT of size d_x) + + - In 2D: (n_a, d_x): n_a filterings (1D FFT of size d_x) + - In 3D: (n_z, n_a, d_x): n_z*n_a filterings (1D FFT of size d_x) """ kernel_files = ["array_utils.cl"] powers = generate_powers() @@ -121,11 +122,10 @@ class SinoFilter(OpenclProcessing): def _init_fft(self): if __have_clfft__ and not(self.extra_options["use_numpy_fft"]): self.fft_backend = "opencl" - self.fft = FFT( + self.fft = CLFFT( self.sino_padded_shape, dtype=np.float32, axes=(-1,), - backend="opencl", ctx=self.ctx, ) else: @@ -133,10 +133,9 @@ class SinoFilter(OpenclProcessing): print("The gpyfft module was not found. The Fourier transforms " "will be done on CPU. For more performances, it is advised " "to install gpyfft.""") - self.fft = FFT( + self.fft = NPFFT( template=np.zeros(self.sino_padded_shape, "f"), axes=(-1,), - backend="numpy", ) def _allocate_memory(self): diff --git a/silx/opencl/sparse.py b/silx/opencl/sparse.py index 8bfaea8..514589a 100644 --- a/silx/opencl/sparse.py +++ b/silx/opencl/sparse.py @@ -35,6 +35,7 @@ import numpy import pyopencl.array as parray from collections import namedtuple from pyopencl.scan import GenericScanKernel +from pyopencl.tools import dtype_to_ctype from .common import pyopencl as cl from .processing import OpenclProcessing, EventDescription, BufferDescription mf = cl.mem_flags @@ -52,19 +53,20 @@ def tuple_to_csrdata(arrs): -# only float32 arrays are supported for now class CSR(OpenclProcessing): kernel_files = ["sparse.cl"] - def __init__(self, shape, max_nnz=None, ctx=None, devicetype="all", - platformid=None, deviceid=None, block_size=None, memory=None, - profile=False): + def __init__(self, shape, dtype="f", max_nnz=None, idx_dtype=numpy.int32, + ctx=None, devicetype="all", platformid=None, deviceid=None, + block_size=None, memory=None, profile=False): """ Compute Compressed Sparse Row format of an image (2D matrix). It is designed to be compatible with scipy.sparse.csr_matrix. :param shape: tuple Matrix shape. + :param dtype: str or numpy.dtype, optional + Numeric data type. By default, sparse matrix data will be float32. :param max_nnz: int, optional Maximum number of non-zero elements. By default, the arrays "data" and "indices" are allocated with prod(shape) elements, but @@ -80,8 +82,9 @@ class CSR(OpenclProcessing): OpenclProcessing.__init__(self, ctx=ctx, devicetype=devicetype, platformid=platformid, deviceid=deviceid, + block_size=block_size, memory=memory, profile=profile) - self._set_parameters(shape, max_nnz) + self._set_parameters(shape, dtype, max_nnz, idx_dtype) self._allocate_memory() self._setup_kernels() @@ -89,22 +92,47 @@ class CSR(OpenclProcessing): # -------------------------- Initialization -------------------------------- # -------------------------------------------------------------------------- - def _set_parameters(self, shape, max_nnz): + def _set_parameters(self, shape, dtype, max_nnz, idx_dtype): self.shape = shape self.size = numpy.prod(shape) - self.indice_dtype = numpy.int32 # + self._set_idx_dtype(idx_dtype) assert len(shape) == 2 # if max_nnz is None: self.max_nnz = numpy.prod(shape) # worst case else: self.max_nnz = int(max_nnz) + self._set_dtype(dtype) + + + def _set_idx_dtype(self, idx_dtype): + idx_dtype = numpy.dtype(idx_dtype) + if idx_dtype.kind not in ["i", "u"]: + raise ValueError("Not an integer type: %s" % idx_dtype) + # scan value type must have size divisible by 4 bytes + if idx_dtype.itemsize % 4 != 0: + raise ValueError("Due to an internal pyopencl limitation, idx_dtype type must have size divisible by 4 bytes") + self.indice_dtype = idx_dtype # + + + def _set_dtype(self, dtype): + self.dtype = numpy.dtype(dtype) + if self.dtype.kind == "c": + raise ValueError("Complex data is not supported") + if self.dtype == numpy.dtype(numpy.float32): + self._c_zero_str = "0.0f" + elif self.dtype == numpy.dtype(numpy.float64): + self._c_zero_str = "0.0" + else: # assuming integer + self._c_zero_str = "0" + self.c_dtype = dtype_to_ctype(self.dtype) + self.idx_c_dtype = dtype_to_ctype(self.indice_dtype) def _allocate_memory(self): self.is_cpu = (self.device.type == "CPU") # move to OpenclProcessing ? self.buffers = [ - BufferDescription("array", (self.size,), numpy.float32, mf.READ_ONLY), - BufferDescription("data", (self.max_nnz,), numpy.float32, mf.READ_WRITE), + BufferDescription("array", (self.size,), self.dtype, mf.READ_ONLY), + BufferDescription("data", (self.max_nnz,), self.dtype, mf.READ_WRITE), BufferDescription("indices", (self.max_nnz,), self.indice_dtype, mf.READ_WRITE), BufferDescription("indptr", (self.shape[0]+1,), self.indice_dtype, mf.READ_WRITE), ] @@ -124,10 +152,24 @@ class CSR(OpenclProcessing): def _setup_compaction_kernel(self): + kernel_signature = str( + "__global %s *data, \ + __global %s *data_compacted, \ + __global %s *indices, \ + __global %s* indptr \ + """ % (self.c_dtype, self.c_dtype, self.idx_c_dtype, self.idx_c_dtype) + ) + if self.dtype.kind == "f": + map_nonzero_expr = "(fabs(data[i]) > %s) ? 1 : 0" % self._c_zero_str + elif self.dtype.kind in ["u", "i"]: + map_nonzero_expr = "(data[i] != %s) ? 1 : 0" % self._c_zero_str + else: + raise ValueError("Unknown data type") + self.scan_kernel = GenericScanKernel( self.ctx, self.indice_dtype, - arguments="__global float* data, __global float *data_compacted, __global int *indices, __global int* indptr", - input_expr="(fabs(data[i]) > 0.0f) ? 1 : 0", + arguments=kernel_signature, + input_expr=map_nonzero_expr, scan_expr="a+b", neutral="0", output_statement=""" // item is the running sum of input_expr(i), i.e the cumsum of "nonzero" @@ -140,7 +182,7 @@ class CSR(OpenclProcessing): indptr[(i/IMAGE_WIDTH)+1] = item; } """, - options="-DIMAGE_WIDTH=%d" % self.shape[1], + options=["-DIMAGE_WIDTH=%d" % self.shape[1]], preamble="#define GET_INDEX(i) (i % IMAGE_WIDTH)", ) @@ -149,7 +191,11 @@ class CSR(OpenclProcessing): OpenclProcessing.compile_kernels( self, self.kernel_files, - compile_options=["-DIMAGE_WIDTH=%d" % self.shape[1]] + compile_options=[ + "-DIMAGE_WIDTH=%d" % self.shape[1], + "-DDTYPE=%s" % self.c_dtype, + "-DIDX_DTYPE=%s" % self.idx_c_dtype, + ] ) device = self.ctx.devices[0] wg_x = min( @@ -174,7 +220,7 @@ class CSR(OpenclProcessing): 2D array in dense format. """ assert arr.size == self.size - assert arr.dtype == numpy.float32 + assert arr.dtype == self.dtype # TODO handle pyopencl Buffer @@ -189,10 +235,10 @@ class CSR(OpenclProcessing): assert isinstance(csr_data, CSRData) for arr in [csr_data.data, csr_data.indices, csr_data.indptr]: assert arr.ndim == 1 - assert csr_data.data.size == self.max_nnz - assert csr_data.indices.size == self.max_nnz + assert csr_data.data.size <= self.max_nnz + assert csr_data.indices.size <= self.max_nnz assert csr_data.indptr.size == self.shape[0]+1 - assert csr_data.data.dtype == numpy.float32 + assert csr_data.data.dtype == self.dtype assert csr_data.indices.dtype == self.indice_dtype assert csr_data.indptr.dtype == self.indice_dtype @@ -228,7 +274,7 @@ class CSR(OpenclProcessing): setattr(self, name, arr) # The current array is a numpy.ndarray: copy H2D elif isinstance(arr, numpy.ndarray): - getattr(self, name)[:] = arr[:] + getattr(self, name)[:arr.size] = arr[:] else: raise ValueError("Unsupported array type: %s" % type(arr)) diff --git a/silx/opencl/test/test_addition.py b/silx/opencl/test/test_addition.py index 3283a4d..49cc0b4 100644 --- a/silx/opencl/test/test_addition.py +++ b/silx/opencl/test/test_addition.py @@ -35,7 +35,7 @@ __authors__ = ["Henri Payno, Jérôme Kieffer"] __contact__ = "jerome.kieffer@esrf.eu" __license__ = "MIT" __copyright__ = "2013 European Synchrotron Radiation Facility, Grenoble, France" -__date__ = "05/07/2018" +__date__ = "01/08/2019" import logging import numpy @@ -80,7 +80,8 @@ class TestAddition(unittest.TestCase): self.shape = 4096 self.data = numpy.random.random(self.shape).astype(numpy.float32) self.d_array_img = pyopencl.array.to_device(self.queue, self.data) - self.d_array_5 = pyopencl.array.zeros_like(self.d_array_img) - 5 + self.d_array_5 = pyopencl.array.empty_like(self.d_array_img) + self.d_array_5.fill(-5) self.program = pyopencl.Program(self.ctx, get_opencl_code("addition")).build() def tearDown(self): diff --git a/silx/opencl/test/test_convolution.py b/silx/opencl/test/test_convolution.py index 8acd385..c213808 100644 --- a/silx/opencl/test/test_convolution.py +++ b/silx/opencl/test/test_convolution.py @@ -34,12 +34,13 @@ __authors__ = ["Pierre Paleo"] __contact__ = "pierre.paleo@esrf.fr" __license__ = "MIT" __copyright__ = "2019 European Synchrotron Radiation Facility, Grenoble, France" -__date__ = "15/02/2019" +__date__ = "01/08/2019" import logging from itertools import product import numpy as np from silx.utils.testutils import parameterize +from silx.image.utils import gaussian_kernel try: from scipy.ndimage import convolve, convolve1d from scipy.misc import ascent @@ -52,7 +53,7 @@ from ..common import ocl if ocl: import pyopencl as cl import pyopencl.array as parray - from ..convolution import Convolution, gaussian_kernel + from silx.opencl.convolution import Convolution logger = logging.getLogger(__name__) @@ -178,7 +179,8 @@ class TestConvolution(unittest.TestCase): else: data_ref = data if self.param["output_on_device"]: - d_res = parray.zeros_like(conv.data_out) + d_res = parray.empty_like(conv.data_out) + d_res.fill(0) res = d_res else: res = None diff --git a/silx/opencl/test/test_kahan.py b/silx/opencl/test/test_kahan.py index bb9ea3f..167640c 100644 --- a/silx/opencl/test/test_kahan.py +++ b/silx/opencl/test/test_kahan.py @@ -34,7 +34,7 @@ __author__ = "Jérôme Kieffer" __contact__ = "Jerome.Kieffer@ESRF.eu" __license__ = "MIT" __copyright__ = "European Synchrotron Radiation Facility, Grenoble, France" -__date__ = "30/01/2019" +__date__ = "01/08/2019" import unittest @@ -120,7 +120,8 @@ class TestKahan(unittest.TestCase): """ prg = pyopencl.Program(self.ctx, read_cl_file("kahan.cl") + src).build(self.args) ones_d = pyopencl.array.to_device(self.queue, data) - res_d = pyopencl.array.zeros(self.queue, 2, numpy.float32) + res_d = pyopencl.array.empty(self.queue, 2, numpy.float32) + res_d.fill(0) evt = prg.summation(self.queue, (1,), (1,), ones_d.data, numpy.int32(N), res_d.data) evt.wait() res = res_d.get().sum(dtype=numpy.float64) @@ -202,7 +203,8 @@ class TestKahan(unittest.TestCase): prg = pyopencl.Program(self.ctx, read_cl_file("kahan.cl") + src).build(self.args) ones_d = pyopencl.array.to_device(self.queue, data) - res_d = pyopencl.array.zeros(self.queue, 2, numpy.float32) + res_d = pyopencl.array.empty(self.queue, 2, numpy.float32) + res_d.fill(0) evt = prg.test_dot16(self.queue, (1,), (1,), ones_d.data, numpy.int32(N), res_d.data) evt.wait() res = res_d.get().sum(dtype="float64") diff --git a/silx/opencl/test/test_linalg.py b/silx/opencl/test/test_linalg.py index 7d03983..b72fa20 100644 --- a/silx/opencl/test/test_linalg.py +++ b/silx/opencl/test/test_linalg.py @@ -30,7 +30,7 @@ from __future__ import division, print_function __authors__ = ["Pierre paleo"] __license__ = "MIT" __copyright__ = "2013-2017 European Synchrotron Radiation Facility, Grenoble, France" -__date__ = "14/06/2017" +__date__ = "01/08/2019" import time @@ -109,7 +109,8 @@ class TestLinAlg(unittest.TestCase): self.div_ref = divergence(self.grad_ref) self.image2 = np.zeros_like(self.image) # Device images - self.gradient_parray = parray.zeros(self.la.queue, self.image.shape, np.complex64) + self.gradient_parray = parray.empty(self.la.queue, self.image.shape, np.complex64) + self.gradient_parray.fill(0) # we should be using cl.Buffer(self.la.ctx, cl.mem_flags.READ_WRITE, size=self.image.nbytes*2), # but platforms not suporting openCL 1.2 have a problem with enqueue_fill_buffer, # so we use the parray "fill" utility diff --git a/silx/opencl/test/test_sparse.py b/silx/opencl/test/test_sparse.py index 56f1ba4..76a6a0a 100644 --- a/silx/opencl/test/test_sparse.py +++ b/silx/opencl/test/test_sparse.py @@ -82,39 +82,45 @@ class TestCSR(unittest.TestCase): """Test CSR format""" def setUp(self): - self.array = generate_sparse_random_data(shape=(512, 511)) - # Compute reference sparsification - a_s = sp.csr_matrix(self.array) - self.ref_data = a_s.data - self.ref_indices = a_s.indices - self.ref_indptr = a_s.indptr - self.ref_nnz = a_s.nnz # Test possible configurations input_on_device = [False, True] output_on_device = [False, True] - self._test_configs = list(product(input_on_device, output_on_device)) + dtypes = [np.float32, np.int32, np.uint16] + self._test_configs = list(product(input_on_device, output_on_device, dtypes)) + + + def compute_ref_sparsification(self, array): + ref_sparse = sp.csr_matrix(array) + return ref_sparse def test_sparsification(self): - for input_on_device, output_on_device in self._test_configs: - self._test_sparsification(input_on_device, output_on_device) + for input_on_device, output_on_device, dtype in self._test_configs: + self._test_sparsification(input_on_device, output_on_device, dtype) - def _test_sparsification(self, input_on_device, output_on_device): - current_config = "input on device: %s, output on device: %s" % ( - str(input_on_device), str(output_on_device) + def _test_sparsification(self, input_on_device, output_on_device, dtype): + current_config = "input on device: %s, output on device: %s, dtype: %s" % ( + str(input_on_device), str(output_on_device), str(dtype) ) + logger.debug("CSR: %s" % current_config) + # Generate data and reference CSR + array = generate_sparse_random_data(shape=(512, 511), dtype=dtype) + ref_sparse = self.compute_ref_sparsification(array) # Sparsify on device - csr = CSR(self.array.shape) + csr = CSR(array.shape, dtype=dtype) if input_on_device: # The array has to be flattened - arr = parray.to_device(csr.queue, self.array.ravel()) + arr = parray.to_device(csr.queue, array.ravel()) else: - arr = self.array + arr = array if output_on_device: - d_data = parray.zeros_like(csr.data) - d_indices = parray.zeros_like(csr.indices) - d_indptr = parray.zeros_like(csr.indptr) + d_data = parray.empty_like(csr.data) + d_indices = parray.empty_like(csr.indices) + d_indptr = parray.empty_like(csr.indptr) + d_data.fill(0) + d_indices.fill(0) + d_indptr.fill(0) output = (d_data, d_indices, d_indptr) else: output = None @@ -124,45 +130,50 @@ class TestCSR(unittest.TestCase): indices = indices.get() indptr = indptr.get() # Compare - nnz = self.ref_nnz + nnz = ref_sparse.nnz self.assertTrue( - np.allclose(data[:nnz], self.ref_data), + np.allclose(data[:nnz], ref_sparse.data), "something wrong with sparsified data (%s)" % current_config ) self.assertTrue( - np.allclose(indices[:nnz], self.ref_indices), + np.allclose(indices[:nnz], ref_sparse.indices), "something wrong with sparsified indices (%s)" % current_config ) self.assertTrue( - np.allclose(indptr, self.ref_indptr), + np.allclose(indptr, ref_sparse.indptr), "something wrong with sparsified indices pointers (indptr) (%s)" % current_config ) def test_desparsification(self): - for input_on_device, output_on_device in self._test_configs: - self._test_desparsification(input_on_device, output_on_device) + for input_on_device, output_on_device, dtype in self._test_configs: + self._test_desparsification(input_on_device, output_on_device, dtype) - def _test_desparsification(self, input_on_device, output_on_device): - current_config = "input on device: %s, output on device: %s" % ( - str(input_on_device), str(output_on_device) + def _test_desparsification(self, input_on_device, output_on_device, dtype): + current_config = "input on device: %s, output on device: %s, dtype: %s" % ( + str(input_on_device), str(output_on_device), str(dtype) ) + logger.debug("CSR: %s" % current_config) + # Generate data and reference CSR + array = generate_sparse_random_data(shape=(512, 511), dtype=dtype) + ref_sparse = self.compute_ref_sparsification(array) # De-sparsify on device - csr = CSR(self.array.shape, max_nnz=self.ref_nnz) + csr = CSR(array.shape, dtype=dtype, max_nnz=ref_sparse.nnz) if input_on_device: - data = parray.to_device(csr.queue, self.ref_data) - indices = parray.to_device(csr.queue, self.ref_indices) - indptr = parray.to_device(csr.queue, self.ref_indptr) + data = parray.to_device(csr.queue, ref_sparse.data) + indices = parray.to_device(csr.queue, ref_sparse.indices) + indptr = parray.to_device(csr.queue, ref_sparse.indptr) else: - data = self.ref_data - indices = self.ref_indices - indptr = self.ref_indptr + data = ref_sparse.data + indices = ref_sparse.indices + indptr = ref_sparse.indptr if output_on_device: - d_arr = parray.zeros_like(csr.array) + d_arr = parray.empty_like(csr.array) + d_arr.fill(0) output = d_arr else: output = None @@ -171,7 +182,7 @@ class TestCSR(unittest.TestCase): arr = arr.get() # Compare self.assertTrue( - np.allclose(arr.reshape(self.array.shape), self.array), + np.allclose(arr.reshape(array.shape), array), "something wrong with densified data (%s)" % current_config ) diff --git a/silx/opencl/utils.py b/silx/opencl/utils.py index f558c1b..575e018 100644 --- a/silx/opencl/utils.py +++ b/silx/opencl/utils.py @@ -119,3 +119,96 @@ def concatenate_cl_kernel(filenames): this method concatenates all the kernel from the list """ return os.linesep.join(read_cl_file(fn) for fn in filenames) + + + + +class ConvolutionInfos(object): + allowed_axes = { + "1D": [None], + "separable_2D_1D_2D": [None, (0, 1), (1, 0)], + "batched_1D_2D": [(0,), (1,)], + "separable_3D_1D_3D": [ + None, + (0, 1, 2), + (1, 2, 0), + (2, 0, 1), + (2, 1, 0), + (1, 0, 2), + (0, 2, 1) + ], + "batched_1D_3D": [(0,), (1,), (2,)], + "batched_separable_2D_1D_3D": [(0,), (1,), (2,)], # unsupported (?) + "2D": [None], + "batched_2D_3D": [(0,), (1,), (2,)], + "separable_3D_2D_3D": [ + (1, 0), + (0, 1), + (2, 0), + (0, 2), + (1, 2), + (2, 1), + ], + "3D": [None], + } + use_cases = { + (1, 1): { + "1D": { + "name": "1D convolution on 1D data", + "kernels": ["convol_1D_X"], + }, + }, + (2, 2): { + "2D": { + "name": "2D convolution on 2D data", + "kernels": ["convol_2D_XY"], + }, + }, + (3, 3): { + "3D": { + "name": "3D convolution on 3D data", + "kernels": ["convol_3D_XYZ"], + }, + }, + (2, 1): { + "separable_2D_1D_2D": { + "name": "Separable (2D->1D) convolution on 2D data", + "kernels": ["convol_1D_X", "convol_1D_Y"], + }, + "batched_1D_2D": { + "name": "Batched 1D convolution on 2D data", + "kernels": ["convol_1D_X", "convol_1D_Y"], + }, + }, + (3, 1): { + "separable_3D_1D_3D": { + "name": "Separable (3D->1D) convolution on 3D data", + "kernels": ["convol_1D_X", "convol_1D_Y", "convol_1D_Z"], + }, + "batched_1D_3D": { + "name": "Batched 1D convolution on 3D data", + "kernels": ["convol_1D_X", "convol_1D_Y", "convol_1D_Z"], + }, + "batched_separable_2D_1D_3D": { + "name": "Batched separable (2D->1D) convolution on 3D data", + "kernels": ["convol_1D_X", "convol_1D_Y", "convol_1D_Z"], + }, + }, + (3, 2): { + "separable_3D_2D_3D": { + "name": "Separable (3D->2D) convolution on 3D data", + "kernels": ["convol_2D_XY", "convol_2D_XZ", "convol_2D_YZ"], + }, + "batched_2D_3D": { + "name": "Batched 2D convolution on 3D data", + "kernels": ["convol_2D_XY", "convol_2D_XZ", "convol_2D_YZ"], + }, + }, + } + + + + + + + |