summaryrefslogtreecommitdiff
path: root/silx/opencl
diff options
context:
space:
mode:
Diffstat (limited to 'silx/opencl')
-rw-r--r--silx/opencl/common.py38
-rw-r--r--silx/opencl/convolution.py124
-rw-r--r--silx/opencl/linalg.py8
-rw-r--r--silx/opencl/processing.py11
-rw-r--r--silx/opencl/projection.py11
-rw-r--r--silx/opencl/reconstruction.py33
-rw-r--r--silx/opencl/sinofilter.py21
-rw-r--r--silx/opencl/sparse.py82
-rw-r--r--silx/opencl/test/test_addition.py5
-rw-r--r--silx/opencl/test/test_convolution.py8
-rw-r--r--silx/opencl/test/test_kahan.py8
-rw-r--r--silx/opencl/test/test_linalg.py5
-rw-r--r--silx/opencl/test/test_sparse.py85
-rw-r--r--silx/opencl/utils.py93
14 files changed, 304 insertions, 228 deletions
diff --git a/silx/opencl/common.py b/silx/opencl/common.py
index 8d31c8a..110d941 100644
--- a/silx/opencl/common.py
+++ b/silx/opencl/common.py
@@ -34,7 +34,7 @@ __author__ = "Jerome Kieffer"
__contact__ = "Jerome.Kieffer@ESRF.eu"
__license__ = "MIT"
__copyright__ = "2012-2017 European Synchrotron Radiation Facility, Grenoble, France"
-__date__ = "07/06/2019"
+__date__ = "28/11/2019"
__status__ = "stable"
__all__ = ["ocl", "pyopencl", "mf", "release_cl_buffers", "allocate_cl_buffers",
"measure_workgroup_size", "kernel_workgroup_size"]
@@ -60,8 +60,14 @@ else:
logger.warning("Unable to import pyOpenCl. Please install it from: http://pypi.python.org/pypi/pyopencl")
pyopencl = None
else:
- import pyopencl.array as array
- mf = pyopencl.mem_flags
+ try:
+ pyopencl.get_platforms()
+ except pyopencl.LogicError:
+ logger.warning("The module pyOpenCL has been imported but can't be used here")
+ pyopencl = None
+ else:
+ import pyopencl.array as array
+ mf = pyopencl.mem_flags
if pyopencl is None:
# Define default mem flags
@@ -91,8 +97,8 @@ NVIDIA_FLOP_PER_CORE = {(1, 0): 24, # Guessed !
(6, 0): 128, # GP100
(6, 1): 128, # GP104
(6, 2): 128, # ?
- (7, 0): 256, # Volta ?
- (7, 1): 256, # Volta ?
+ (7, 0): 128, # Volta # measured on Telsa V100
+ (7, 1): 128, # Volta ?
}
AMD_FLOP_PER_CORE = 160 # Measured on a M7820 10 core, 700MHz 1120GFlops
@@ -254,7 +260,8 @@ def _measure_workgroup_size(device_or_context, fast=False):
max_valid_wg = 1
data = numpy.random.random(shape).astype(numpy.float32)
d_data = pyopencl.array.to_device(queue, data)
- d_data_1 = pyopencl.array.zeros_like(d_data) + 1
+ d_data_1 = pyopencl.array.empty_like(d_data)
+ d_data_1.fill(numpy.float32(1.0))
program = pyopencl.Program(ctx, get_opencl_code("addition")).build()
if fast:
@@ -318,10 +325,21 @@ class OpenCL(object):
# pocl does not describe itself as a CPU !
devtype = "CPU"
if len(devtype) > 3:
- devtype = devtype[:3]
- if _is_nvidia_gpu(pypl.vendor, devtype) and "compute_capability_major_nv" in dir(device):
- comput_cap = device.compute_capability_major_nv, device.compute_capability_minor_nv
- flop_core = NVIDIA_FLOP_PER_CORE.get(comput_cap, min(NVIDIA_FLOP_PER_CORE.values()))
+ if "GPU" in devtype:
+ devtype = "GPU"
+ elif "ACC" in devtype:
+ devtype = "ACC"
+ elif "CPU" in devtype:
+ devtype = "CPU"
+ else:
+ devtype = devtype[:3]
+ if _is_nvidia_gpu(device.vendor, devtype) and ("compute_capability_major_nv" in dir(device)):
+ try:
+ comput_cap = device.compute_capability_major_nv, device.compute_capability_minor_nv
+ except pyopencl.LogicError:
+ flop_core = FLOP_PER_CORE["GPU"]
+ else:
+ flop_core = NVIDIA_FLOP_PER_CORE.get(comput_cap, FLOP_PER_CORE["GPU"])
elif (pypl.vendor == "Advanced Micro Devices, Inc.") and (devtype == "GPU"):
flop_core = AMD_FLOP_PER_CORE
elif devtype == "CPU":
diff --git a/silx/opencl/convolution.py b/silx/opencl/convolution.py
index 18232f4..b8dd8f6 100644
--- a/silx/opencl/convolution.py
+++ b/silx/opencl/convolution.py
@@ -29,99 +29,14 @@ from __future__ import absolute_import, print_function, with_statement, division
__authors__ = ["P. Paleo"]
__license__ = "MIT"
-__date__ = "11/02/2019"
+__date__ = "01/08/2019"
import numpy as np
-from math import ceil
from copy import copy # python2
from .common import pyopencl as cl
import pyopencl.array as parray
from .processing import OpenclProcessing, EventDescription
-
-
-class ConvolutionInfos(object):
- allowed_axes = {
- "1D": [None],
- "separable_2D_1D_2D": [None, (0, 1), (1, 0)],
- "batched_1D_2D": [(0,), (1,)],
- "separable_3D_1D_3D": [
- None,
- (0, 1, 2),
- (1, 2, 0),
- (2, 0, 1),
- (2, 1, 0),
- (1, 0, 2),
- (0, 2, 1)
- ],
- "batched_1D_3D": [(0,), (1,), (2,)],
- "batched_separable_2D_1D_3D": [(0,), (1,), (2,)], # unsupported (?)
- "2D": [None],
- "batched_2D_3D": [(0,), (1,), (2,)],
- "separable_3D_2D_3D": [
- (1, 0),
- (0, 1),
- (2, 0),
- (0, 2),
- (1, 2),
- (2, 1),
- ],
- "3D": [None],
- }
- use_cases = {
- (1, 1): {
- "1D": {
- "name": "1D convolution on 1D data",
- "kernels": ["convol_1D_X"],
- },
- },
- (2, 2): {
- "2D": {
- "name": "2D convolution on 2D data",
- "kernels": ["convol_2D_XY"],
- },
- },
- (3, 3): {
- "3D": {
- "name": "3D convolution on 3D data",
- "kernels": ["convol_3D_XYZ"],
- },
- },
- (2, 1): {
- "separable_2D_1D_2D": {
- "name": "Separable (2D->1D) convolution on 2D data",
- "kernels": ["convol_1D_X", "convol_1D_Y"],
- },
- "batched_1D_2D": {
- "name": "Batched 1D convolution on 2D data",
- "kernels": ["convol_1D_X", "convol_1D_Y"],
- },
- },
- (3, 1): {
- "separable_3D_1D_3D": {
- "name": "Separable (3D->1D) convolution on 3D data",
- "kernels": ["convol_1D_X", "convol_1D_Y", "convol_1D_Z"],
- },
- "batched_1D_3D": {
- "name": "Batched 1D convolution on 3D data",
- "kernels": ["convol_1D_X", "convol_1D_Y", "convol_1D_Z"],
- },
- "batched_separable_2D_1D_3D": {
- "name": "Batched separable (2D->1D) convolution on 3D data",
- "kernels": ["convol_1D_X", "convol_1D_Y", "convol_1D_Z"],
- },
- },
- (3, 2): {
- "separable_3D_2D_3D": {
- "name": "Separable (3D->2D) convolution on 3D data",
- "kernels": ["convol_2D_XY", "convol_2D_XZ", "convol_2D_YZ"],
- },
- "batched_2D_3D": {
- "name": "Batched 2D convolution on 3D data",
- "kernels": ["convol_2D_XY", "convol_2D_XZ", "convol_2D_YZ"],
- },
- },
- }
-
+from .utils import ConvolutionInfos
class Convolution(OpenclProcessing):
"""
@@ -262,7 +177,8 @@ class Convolution(OpenclProcessing):
# Allocate arrays
for option_name, array_name in option_array_names.items():
if self.extra_options[option_name]:
- value = parray.zeros(self.queue, self.shape, "f")
+ value = parray.empty(self.queue, self.shape, np.float32)
+ value.fill(np.float32(0.0))
else:
value = None
setattr(self, array_name, value)
@@ -320,11 +236,9 @@ class Convolution(OpenclProcessing):
str("-DIMAGE_DIMS=%d" % self.data_ndim),
str("-DFILTER_DIMS=%d" % self.kernel_ndim),
])
- data_in_ref = self.data_in_tex
d_kernel_ref = self.d_kernel_tex
else:
kernel_files = ["convolution.cl"]
- data_in_ref = self.data_in.data
d_kernel_ref = self.d_kernel.data
self.compile_kernels(
kernel_files=kernel_files,
@@ -335,8 +249,8 @@ class Convolution(OpenclProcessing):
kernel_args = [
self.queue,
self.ndrange, self.wg,
- data_in_ref,
- self.data_out.data,
+ None,
+ None,
d_kernel_ref,
np.int32(self.kernel.shape[0]),
self.Nx, self.Ny, self.Nz
@@ -527,29 +441,3 @@ class Convolution(OpenclProcessing):
__call__ = convolve
-def gaussian_kernel(sigma, cutoff=4, force_odd_size=False):
- """
- Generates a Gaussian convolution kernel.
-
- :param sigma: Standard Deviation of the Gaussian curve.
- :param cutoff: Parameter tuning the truncation of the Gaussian.
- The higher cutoff, the biggest the array will be (and the closest to
- a "true" Gaussian function).
- :param force_odd_size: when set to True, the resulting array will always
- have an odd size, regardless of the values of "sigma" and "cutoff".
- :return: a numpy.ndarray containing the truncated Gaussian function.
- The array size is 2*c*s+1 where c=cutoff, s=sigma.
-
- Nota: due to the quick decay of the Gaussian function, small values of the
- "cutoff" parameter are usually fine. The energy difference between a
- Gaussian truncated to [-c, c] and a "true" one is
- erfc(c/(sqrt(2)*s))
- so choosing cutoff=4*sigma keeps the truncation error below 1e-4.
- """
- size = int(ceil(2 * cutoff * sigma + 1))
- if force_odd_size and size % 2 == 0:
- size += 1
- x = np.arange(size) - (size - 1.0) / 2.0
- g = np.exp(-(x / sigma) ** 2 / 2.0)
- g /= g.sum()
- return g
diff --git a/silx/opencl/linalg.py b/silx/opencl/linalg.py
index fba3e9c..a64122a 100644
--- a/silx/opencl/linalg.py
+++ b/silx/opencl/linalg.py
@@ -29,7 +29,7 @@ from __future__ import absolute_import, print_function, with_statement, division
__authors__ = ["P. Paleo"]
__license__ = "MIT"
-__date__ = "10/08/2017"
+__date__ = "01/08/2019"
import numpy as np
@@ -63,8 +63,10 @@ class LinAlg(OpenclProcessing):
platformid=platformid, deviceid=deviceid,
profile=profile)
- self.d_gradient = parray.zeros(self.queue, shape, np.complex64)
- self.d_image = parray.zeros(self.queue, shape, np.float32)
+ self.d_gradient = parray.empty(self.queue, shape, np.complex64)
+ self.d_gradient.fill(np.complex64(0.0))
+ self.d_image = parray.empty(self.queue, shape, np.float32)
+ self.d_image.fill(np.float32(0.0))
self.add_to_cl_mem({
"d_gradient": self.d_gradient,
"d_image": self.d_image
diff --git a/silx/opencl/processing.py b/silx/opencl/processing.py
index 707aa72..6b475b9 100644
--- a/silx/opencl/processing.py
+++ b/silx/opencl/processing.py
@@ -41,7 +41,7 @@ __author__ = "Jerome Kieffer"
__contact__ = "Jerome.Kieffer@ESRF.eu"
__license__ = "MIT"
__copyright__ = "European Synchrotron Radiation Facility, Grenoble, France"
-__date__ = "11/01/2019"
+__date__ = "05/08/2019"
__status__ = "stable"
@@ -152,9 +152,12 @@ class OpenclProcessing(object):
def __del__(self):
"""Destructor: release all buffers and programs
"""
- self.reset_log()
- self.free_kernels()
- self.free_buffers()
+ try:
+ self.reset_log()
+ self.free_kernels()
+ self.free_buffers()
+ except Exception:
+ pass
self.queue = None
self.device = None
self.ctx = None
diff --git a/silx/opencl/projection.py b/silx/opencl/projection.py
index 0505d80..da8752f 100644
--- a/silx/opencl/projection.py
+++ b/silx/opencl/projection.py
@@ -29,7 +29,7 @@ from __future__ import absolute_import, print_function, with_statement, division
__authors__ = ["A. Mirone, P. Paleo"]
__license__ = "MIT"
-__date__ = "28/02/2018"
+__date__ = "01/08/2019"
import logging
import numpy as np
@@ -141,10 +141,11 @@ class Projection(OpenclProcessing):
BufferDescription("d_strideJoseph", self._dimrecy * 2, np.int32, mf.READ_ONLY),
BufferDescription("d_strideLine", self._dimrecy * 2, np.int32, mf.READ_ONLY),
]
+ d_axis_corrections = parray.empty(self.queue, self.nprojs, np.float32)
+ d_axis_corrections.fill(np.float32(0.0))
self.add_to_cl_mem(
{
- "d_axis_corrections": parray.zeros(self.queue,
- self.nprojs, np.float32)
+ "d_axis_corrections": d_axis_corrections
}
)
self._tmp_extended_img = np.zeros((self.shape[0] + 2, self.shape[1] + 2),
@@ -193,7 +194,9 @@ class Projection(OpenclProcessing):
pyopencl.enqueue_copy(self.queue, self.cl_mem["d_angles"], angles2)
def allocate_slice(self):
- self.add_to_cl_mem({"d_slice": parray.zeros(self.queue, (self.shape[1] + 2, self.shape[1] + 2), np.float32)})
+ ary = parray.zeros(self.queue, (self.shape[1] + 2, self.shape[1] + 2), np.float32)
+ ary.fill(0)
+ self.add_to_cl_mem({"d_slice": ary})
def allocate_textures(self):
self.d_image_tex = pyopencl.Image(
diff --git a/silx/opencl/reconstruction.py b/silx/opencl/reconstruction.py
index cb68680..2c84aee 100644
--- a/silx/opencl/reconstruction.py
+++ b/silx/opencl/reconstruction.py
@@ -29,7 +29,7 @@ from __future__ import absolute_import, print_function, with_statement, division
__authors__ = ["P. Paleo"]
__license__ = "MIT"
-__date__ = "19/09/2017"
+__date__ = "01/08/2019"
import logging
import numpy as np
@@ -98,19 +98,23 @@ class ReconstructionAlgorithm(OpenclProcessing):
self.sino_shape = sino_shape
self.is_cpu = self.backprojector.is_cpu
# Arrays
- self.d_data = parray.zeros(self.queue, sino_shape, dtype=np.float32)
- self.d_sino = parray.zeros_like(self.d_data)
- self.d_x = parray.zeros(self.queue,
+ self.d_data = parray.empty(self.queue, sino_shape, dtype=np.float32)
+ self.d_data.fill(0.0)
+ self.d_sino = parray.empty_like(self.d_data)
+ self.d_sino.fill(0.0)
+ self.d_x = parray.empty(self.queue,
self.backprojector.slice_shape,
dtype=np.float32)
- self.d_x_old = parray.zeros_like(self.d_x)
+ self.d_x.fill(0.0)
+ self.d_x_old = parray.empty_like(self.d_x)
+ self.d_x_old.fill(0.0)
self.add_to_cl_mem({
- "d_data": self.d_data,
- "d_sino": self.d_sino,
- "d_x": self.d_x,
- "d_x_old": self.d_x_old,
- })
+ "d_data": self.d_data,
+ "d_sino": self.d_sino,
+ "d_x": self.d_x,
+ "d_x_old": self.d_x_old,
+ })
def proj(self, d_slice, d_sino):
"""
@@ -276,10 +280,13 @@ class TV(ReconstructionAlgorithm):
)
# Additional arrays
self.linalg.gradient(self.d_x)
- self.d_p = parray.zeros_like(self.linalg.cl_mem["d_gradient"])
- self.d_q = parray.zeros_like(self.d_data)
+ self.d_p = parray.empty_like(self.linalg.cl_mem["d_gradient"])
+ self.d_q = parray.empty_like(self.d_data)
self.d_g = self.linalg.d_image
- self.d_tmp = parray.zeros_like(self.d_x)
+ self.d_tmp = parray.empty_like(self.d_x)
+ self.d_p.fill(0)
+ self.d_q.fill(0)
+ self.d_tmp.fill(0)
self.add_to_cl_mem({
"d_p": self.d_p,
"d_q": self.d_q,
diff --git a/silx/opencl/sinofilter.py b/silx/opencl/sinofilter.py
index ad17acb..d608744 100644
--- a/silx/opencl/sinofilter.py
+++ b/silx/opencl/sinofilter.py
@@ -2,7 +2,7 @@
# coding: utf-8
# /*##########################################################################
#
-# Copyright (c) 2016 European Synchrotron Radiation Facility
+# Copyright (c) 2016-2019 European Synchrotron Radiation Facility
#
# Permission is hereby granted, free of charge, to any person obtaining a copy
# of this software and associated documentation files (the "Software"), to deal
@@ -38,19 +38,20 @@ from math import pi
import pyopencl.array as parray
from .common import pyopencl as cl
from .processing import OpenclProcessing
-from ..math.fft import FFT
-from ..math.fft.clfft import __have_clfft__
+from ..math.fft.clfft import CLFFT, __have_clfft__
+from ..math.fft.npfft import NPFFT
from ..image.tomography import generate_powers, get_next_power, compute_fourier_filter
from ..utils.deprecation import deprecated
class SinoFilter(OpenclProcessing):
- """
- A class for performing sinogram filtering on GPU using OpenCL.
+ """A class for performing sinogram filtering on GPU using OpenCL.
+
This is a convolution in the Fourier space, along one dimension:
- - In 2D: (n_a, d_x): n_a filterings (1D FFT of size d_x)
- - In 3D: (n_z, n_a, d_x): n_z*n_a filterings (1D FFT of size d_x)
+
+ - In 2D: (n_a, d_x): n_a filterings (1D FFT of size d_x)
+ - In 3D: (n_z, n_a, d_x): n_z*n_a filterings (1D FFT of size d_x)
"""
kernel_files = ["array_utils.cl"]
powers = generate_powers()
@@ -121,11 +122,10 @@ class SinoFilter(OpenclProcessing):
def _init_fft(self):
if __have_clfft__ and not(self.extra_options["use_numpy_fft"]):
self.fft_backend = "opencl"
- self.fft = FFT(
+ self.fft = CLFFT(
self.sino_padded_shape,
dtype=np.float32,
axes=(-1,),
- backend="opencl",
ctx=self.ctx,
)
else:
@@ -133,10 +133,9 @@ class SinoFilter(OpenclProcessing):
print("The gpyfft module was not found. The Fourier transforms "
"will be done on CPU. For more performances, it is advised "
"to install gpyfft.""")
- self.fft = FFT(
+ self.fft = NPFFT(
template=np.zeros(self.sino_padded_shape, "f"),
axes=(-1,),
- backend="numpy",
)
def _allocate_memory(self):
diff --git a/silx/opencl/sparse.py b/silx/opencl/sparse.py
index 8bfaea8..514589a 100644
--- a/silx/opencl/sparse.py
+++ b/silx/opencl/sparse.py
@@ -35,6 +35,7 @@ import numpy
import pyopencl.array as parray
from collections import namedtuple
from pyopencl.scan import GenericScanKernel
+from pyopencl.tools import dtype_to_ctype
from .common import pyopencl as cl
from .processing import OpenclProcessing, EventDescription, BufferDescription
mf = cl.mem_flags
@@ -52,19 +53,20 @@ def tuple_to_csrdata(arrs):
-# only float32 arrays are supported for now
class CSR(OpenclProcessing):
kernel_files = ["sparse.cl"]
- def __init__(self, shape, max_nnz=None, ctx=None, devicetype="all",
- platformid=None, deviceid=None, block_size=None, memory=None,
- profile=False):
+ def __init__(self, shape, dtype="f", max_nnz=None, idx_dtype=numpy.int32,
+ ctx=None, devicetype="all", platformid=None, deviceid=None,
+ block_size=None, memory=None, profile=False):
"""
Compute Compressed Sparse Row format of an image (2D matrix).
It is designed to be compatible with scipy.sparse.csr_matrix.
:param shape: tuple
Matrix shape.
+ :param dtype: str or numpy.dtype, optional
+ Numeric data type. By default, sparse matrix data will be float32.
:param max_nnz: int, optional
Maximum number of non-zero elements. By default, the arrays "data"
and "indices" are allocated with prod(shape) elements, but
@@ -80,8 +82,9 @@ class CSR(OpenclProcessing):
OpenclProcessing.__init__(self, ctx=ctx, devicetype=devicetype,
platformid=platformid, deviceid=deviceid,
+ block_size=block_size, memory=memory,
profile=profile)
- self._set_parameters(shape, max_nnz)
+ self._set_parameters(shape, dtype, max_nnz, idx_dtype)
self._allocate_memory()
self._setup_kernels()
@@ -89,22 +92,47 @@ class CSR(OpenclProcessing):
# -------------------------- Initialization --------------------------------
# --------------------------------------------------------------------------
- def _set_parameters(self, shape, max_nnz):
+ def _set_parameters(self, shape, dtype, max_nnz, idx_dtype):
self.shape = shape
self.size = numpy.prod(shape)
- self.indice_dtype = numpy.int32 #
+ self._set_idx_dtype(idx_dtype)
assert len(shape) == 2 #
if max_nnz is None:
self.max_nnz = numpy.prod(shape) # worst case
else:
self.max_nnz = int(max_nnz)
+ self._set_dtype(dtype)
+
+
+ def _set_idx_dtype(self, idx_dtype):
+ idx_dtype = numpy.dtype(idx_dtype)
+ if idx_dtype.kind not in ["i", "u"]:
+ raise ValueError("Not an integer type: %s" % idx_dtype)
+ # scan value type must have size divisible by 4 bytes
+ if idx_dtype.itemsize % 4 != 0:
+ raise ValueError("Due to an internal pyopencl limitation, idx_dtype type must have size divisible by 4 bytes")
+ self.indice_dtype = idx_dtype #
+
+
+ def _set_dtype(self, dtype):
+ self.dtype = numpy.dtype(dtype)
+ if self.dtype.kind == "c":
+ raise ValueError("Complex data is not supported")
+ if self.dtype == numpy.dtype(numpy.float32):
+ self._c_zero_str = "0.0f"
+ elif self.dtype == numpy.dtype(numpy.float64):
+ self._c_zero_str = "0.0"
+ else: # assuming integer
+ self._c_zero_str = "0"
+ self.c_dtype = dtype_to_ctype(self.dtype)
+ self.idx_c_dtype = dtype_to_ctype(self.indice_dtype)
def _allocate_memory(self):
self.is_cpu = (self.device.type == "CPU") # move to OpenclProcessing ?
self.buffers = [
- BufferDescription("array", (self.size,), numpy.float32, mf.READ_ONLY),
- BufferDescription("data", (self.max_nnz,), numpy.float32, mf.READ_WRITE),
+ BufferDescription("array", (self.size,), self.dtype, mf.READ_ONLY),
+ BufferDescription("data", (self.max_nnz,), self.dtype, mf.READ_WRITE),
BufferDescription("indices", (self.max_nnz,), self.indice_dtype, mf.READ_WRITE),
BufferDescription("indptr", (self.shape[0]+1,), self.indice_dtype, mf.READ_WRITE),
]
@@ -124,10 +152,24 @@ class CSR(OpenclProcessing):
def _setup_compaction_kernel(self):
+ kernel_signature = str(
+ "__global %s *data, \
+ __global %s *data_compacted, \
+ __global %s *indices, \
+ __global %s* indptr \
+ """ % (self.c_dtype, self.c_dtype, self.idx_c_dtype, self.idx_c_dtype)
+ )
+ if self.dtype.kind == "f":
+ map_nonzero_expr = "(fabs(data[i]) > %s) ? 1 : 0" % self._c_zero_str
+ elif self.dtype.kind in ["u", "i"]:
+ map_nonzero_expr = "(data[i] != %s) ? 1 : 0" % self._c_zero_str
+ else:
+ raise ValueError("Unknown data type")
+
self.scan_kernel = GenericScanKernel(
self.ctx, self.indice_dtype,
- arguments="__global float* data, __global float *data_compacted, __global int *indices, __global int* indptr",
- input_expr="(fabs(data[i]) > 0.0f) ? 1 : 0",
+ arguments=kernel_signature,
+ input_expr=map_nonzero_expr,
scan_expr="a+b", neutral="0",
output_statement="""
// item is the running sum of input_expr(i), i.e the cumsum of "nonzero"
@@ -140,7 +182,7 @@ class CSR(OpenclProcessing):
indptr[(i/IMAGE_WIDTH)+1] = item;
}
""",
- options="-DIMAGE_WIDTH=%d" % self.shape[1],
+ options=["-DIMAGE_WIDTH=%d" % self.shape[1]],
preamble="#define GET_INDEX(i) (i % IMAGE_WIDTH)",
)
@@ -149,7 +191,11 @@ class CSR(OpenclProcessing):
OpenclProcessing.compile_kernels(
self,
self.kernel_files,
- compile_options=["-DIMAGE_WIDTH=%d" % self.shape[1]]
+ compile_options=[
+ "-DIMAGE_WIDTH=%d" % self.shape[1],
+ "-DDTYPE=%s" % self.c_dtype,
+ "-DIDX_DTYPE=%s" % self.idx_c_dtype,
+ ]
)
device = self.ctx.devices[0]
wg_x = min(
@@ -174,7 +220,7 @@ class CSR(OpenclProcessing):
2D array in dense format.
"""
assert arr.size == self.size
- assert arr.dtype == numpy.float32
+ assert arr.dtype == self.dtype
# TODO handle pyopencl Buffer
@@ -189,10 +235,10 @@ class CSR(OpenclProcessing):
assert isinstance(csr_data, CSRData)
for arr in [csr_data.data, csr_data.indices, csr_data.indptr]:
assert arr.ndim == 1
- assert csr_data.data.size == self.max_nnz
- assert csr_data.indices.size == self.max_nnz
+ assert csr_data.data.size <= self.max_nnz
+ assert csr_data.indices.size <= self.max_nnz
assert csr_data.indptr.size == self.shape[0]+1
- assert csr_data.data.dtype == numpy.float32
+ assert csr_data.data.dtype == self.dtype
assert csr_data.indices.dtype == self.indice_dtype
assert csr_data.indptr.dtype == self.indice_dtype
@@ -228,7 +274,7 @@ class CSR(OpenclProcessing):
setattr(self, name, arr)
# The current array is a numpy.ndarray: copy H2D
elif isinstance(arr, numpy.ndarray):
- getattr(self, name)[:] = arr[:]
+ getattr(self, name)[:arr.size] = arr[:]
else:
raise ValueError("Unsupported array type: %s" % type(arr))
diff --git a/silx/opencl/test/test_addition.py b/silx/opencl/test/test_addition.py
index 3283a4d..49cc0b4 100644
--- a/silx/opencl/test/test_addition.py
+++ b/silx/opencl/test/test_addition.py
@@ -35,7 +35,7 @@ __authors__ = ["Henri Payno, Jérôme Kieffer"]
__contact__ = "jerome.kieffer@esrf.eu"
__license__ = "MIT"
__copyright__ = "2013 European Synchrotron Radiation Facility, Grenoble, France"
-__date__ = "05/07/2018"
+__date__ = "01/08/2019"
import logging
import numpy
@@ -80,7 +80,8 @@ class TestAddition(unittest.TestCase):
self.shape = 4096
self.data = numpy.random.random(self.shape).astype(numpy.float32)
self.d_array_img = pyopencl.array.to_device(self.queue, self.data)
- self.d_array_5 = pyopencl.array.zeros_like(self.d_array_img) - 5
+ self.d_array_5 = pyopencl.array.empty_like(self.d_array_img)
+ self.d_array_5.fill(-5)
self.program = pyopencl.Program(self.ctx, get_opencl_code("addition")).build()
def tearDown(self):
diff --git a/silx/opencl/test/test_convolution.py b/silx/opencl/test/test_convolution.py
index 8acd385..c213808 100644
--- a/silx/opencl/test/test_convolution.py
+++ b/silx/opencl/test/test_convolution.py
@@ -34,12 +34,13 @@ __authors__ = ["Pierre Paleo"]
__contact__ = "pierre.paleo@esrf.fr"
__license__ = "MIT"
__copyright__ = "2019 European Synchrotron Radiation Facility, Grenoble, France"
-__date__ = "15/02/2019"
+__date__ = "01/08/2019"
import logging
from itertools import product
import numpy as np
from silx.utils.testutils import parameterize
+from silx.image.utils import gaussian_kernel
try:
from scipy.ndimage import convolve, convolve1d
from scipy.misc import ascent
@@ -52,7 +53,7 @@ from ..common import ocl
if ocl:
import pyopencl as cl
import pyopencl.array as parray
- from ..convolution import Convolution, gaussian_kernel
+ from silx.opencl.convolution import Convolution
logger = logging.getLogger(__name__)
@@ -178,7 +179,8 @@ class TestConvolution(unittest.TestCase):
else:
data_ref = data
if self.param["output_on_device"]:
- d_res = parray.zeros_like(conv.data_out)
+ d_res = parray.empty_like(conv.data_out)
+ d_res.fill(0)
res = d_res
else:
res = None
diff --git a/silx/opencl/test/test_kahan.py b/silx/opencl/test/test_kahan.py
index bb9ea3f..167640c 100644
--- a/silx/opencl/test/test_kahan.py
+++ b/silx/opencl/test/test_kahan.py
@@ -34,7 +34,7 @@ __author__ = "Jérôme Kieffer"
__contact__ = "Jerome.Kieffer@ESRF.eu"
__license__ = "MIT"
__copyright__ = "European Synchrotron Radiation Facility, Grenoble, France"
-__date__ = "30/01/2019"
+__date__ = "01/08/2019"
import unittest
@@ -120,7 +120,8 @@ class TestKahan(unittest.TestCase):
"""
prg = pyopencl.Program(self.ctx, read_cl_file("kahan.cl") + src).build(self.args)
ones_d = pyopencl.array.to_device(self.queue, data)
- res_d = pyopencl.array.zeros(self.queue, 2, numpy.float32)
+ res_d = pyopencl.array.empty(self.queue, 2, numpy.float32)
+ res_d.fill(0)
evt = prg.summation(self.queue, (1,), (1,), ones_d.data, numpy.int32(N), res_d.data)
evt.wait()
res = res_d.get().sum(dtype=numpy.float64)
@@ -202,7 +203,8 @@ class TestKahan(unittest.TestCase):
prg = pyopencl.Program(self.ctx, read_cl_file("kahan.cl") + src).build(self.args)
ones_d = pyopencl.array.to_device(self.queue, data)
- res_d = pyopencl.array.zeros(self.queue, 2, numpy.float32)
+ res_d = pyopencl.array.empty(self.queue, 2, numpy.float32)
+ res_d.fill(0)
evt = prg.test_dot16(self.queue, (1,), (1,), ones_d.data, numpy.int32(N), res_d.data)
evt.wait()
res = res_d.get().sum(dtype="float64")
diff --git a/silx/opencl/test/test_linalg.py b/silx/opencl/test/test_linalg.py
index 7d03983..b72fa20 100644
--- a/silx/opencl/test/test_linalg.py
+++ b/silx/opencl/test/test_linalg.py
@@ -30,7 +30,7 @@ from __future__ import division, print_function
__authors__ = ["Pierre paleo"]
__license__ = "MIT"
__copyright__ = "2013-2017 European Synchrotron Radiation Facility, Grenoble, France"
-__date__ = "14/06/2017"
+__date__ = "01/08/2019"
import time
@@ -109,7 +109,8 @@ class TestLinAlg(unittest.TestCase):
self.div_ref = divergence(self.grad_ref)
self.image2 = np.zeros_like(self.image)
# Device images
- self.gradient_parray = parray.zeros(self.la.queue, self.image.shape, np.complex64)
+ self.gradient_parray = parray.empty(self.la.queue, self.image.shape, np.complex64)
+ self.gradient_parray.fill(0)
# we should be using cl.Buffer(self.la.ctx, cl.mem_flags.READ_WRITE, size=self.image.nbytes*2),
# but platforms not suporting openCL 1.2 have a problem with enqueue_fill_buffer,
# so we use the parray "fill" utility
diff --git a/silx/opencl/test/test_sparse.py b/silx/opencl/test/test_sparse.py
index 56f1ba4..76a6a0a 100644
--- a/silx/opencl/test/test_sparse.py
+++ b/silx/opencl/test/test_sparse.py
@@ -82,39 +82,45 @@ class TestCSR(unittest.TestCase):
"""Test CSR format"""
def setUp(self):
- self.array = generate_sparse_random_data(shape=(512, 511))
- # Compute reference sparsification
- a_s = sp.csr_matrix(self.array)
- self.ref_data = a_s.data
- self.ref_indices = a_s.indices
- self.ref_indptr = a_s.indptr
- self.ref_nnz = a_s.nnz
# Test possible configurations
input_on_device = [False, True]
output_on_device = [False, True]
- self._test_configs = list(product(input_on_device, output_on_device))
+ dtypes = [np.float32, np.int32, np.uint16]
+ self._test_configs = list(product(input_on_device, output_on_device, dtypes))
+
+
+ def compute_ref_sparsification(self, array):
+ ref_sparse = sp.csr_matrix(array)
+ return ref_sparse
def test_sparsification(self):
- for input_on_device, output_on_device in self._test_configs:
- self._test_sparsification(input_on_device, output_on_device)
+ for input_on_device, output_on_device, dtype in self._test_configs:
+ self._test_sparsification(input_on_device, output_on_device, dtype)
- def _test_sparsification(self, input_on_device, output_on_device):
- current_config = "input on device: %s, output on device: %s" % (
- str(input_on_device), str(output_on_device)
+ def _test_sparsification(self, input_on_device, output_on_device, dtype):
+ current_config = "input on device: %s, output on device: %s, dtype: %s" % (
+ str(input_on_device), str(output_on_device), str(dtype)
)
+ logger.debug("CSR: %s" % current_config)
+ # Generate data and reference CSR
+ array = generate_sparse_random_data(shape=(512, 511), dtype=dtype)
+ ref_sparse = self.compute_ref_sparsification(array)
# Sparsify on device
- csr = CSR(self.array.shape)
+ csr = CSR(array.shape, dtype=dtype)
if input_on_device:
# The array has to be flattened
- arr = parray.to_device(csr.queue, self.array.ravel())
+ arr = parray.to_device(csr.queue, array.ravel())
else:
- arr = self.array
+ arr = array
if output_on_device:
- d_data = parray.zeros_like(csr.data)
- d_indices = parray.zeros_like(csr.indices)
- d_indptr = parray.zeros_like(csr.indptr)
+ d_data = parray.empty_like(csr.data)
+ d_indices = parray.empty_like(csr.indices)
+ d_indptr = parray.empty_like(csr.indptr)
+ d_data.fill(0)
+ d_indices.fill(0)
+ d_indptr.fill(0)
output = (d_data, d_indices, d_indptr)
else:
output = None
@@ -124,45 +130,50 @@ class TestCSR(unittest.TestCase):
indices = indices.get()
indptr = indptr.get()
# Compare
- nnz = self.ref_nnz
+ nnz = ref_sparse.nnz
self.assertTrue(
- np.allclose(data[:nnz], self.ref_data),
+ np.allclose(data[:nnz], ref_sparse.data),
"something wrong with sparsified data (%s)"
% current_config
)
self.assertTrue(
- np.allclose(indices[:nnz], self.ref_indices),
+ np.allclose(indices[:nnz], ref_sparse.indices),
"something wrong with sparsified indices (%s)"
% current_config
)
self.assertTrue(
- np.allclose(indptr, self.ref_indptr),
+ np.allclose(indptr, ref_sparse.indptr),
"something wrong with sparsified indices pointers (indptr) (%s)"
% current_config
)
def test_desparsification(self):
- for input_on_device, output_on_device in self._test_configs:
- self._test_desparsification(input_on_device, output_on_device)
+ for input_on_device, output_on_device, dtype in self._test_configs:
+ self._test_desparsification(input_on_device, output_on_device, dtype)
- def _test_desparsification(self, input_on_device, output_on_device):
- current_config = "input on device: %s, output on device: %s" % (
- str(input_on_device), str(output_on_device)
+ def _test_desparsification(self, input_on_device, output_on_device, dtype):
+ current_config = "input on device: %s, output on device: %s, dtype: %s" % (
+ str(input_on_device), str(output_on_device), str(dtype)
)
+ logger.debug("CSR: %s" % current_config)
+ # Generate data and reference CSR
+ array = generate_sparse_random_data(shape=(512, 511), dtype=dtype)
+ ref_sparse = self.compute_ref_sparsification(array)
# De-sparsify on device
- csr = CSR(self.array.shape, max_nnz=self.ref_nnz)
+ csr = CSR(array.shape, dtype=dtype, max_nnz=ref_sparse.nnz)
if input_on_device:
- data = parray.to_device(csr.queue, self.ref_data)
- indices = parray.to_device(csr.queue, self.ref_indices)
- indptr = parray.to_device(csr.queue, self.ref_indptr)
+ data = parray.to_device(csr.queue, ref_sparse.data)
+ indices = parray.to_device(csr.queue, ref_sparse.indices)
+ indptr = parray.to_device(csr.queue, ref_sparse.indptr)
else:
- data = self.ref_data
- indices = self.ref_indices
- indptr = self.ref_indptr
+ data = ref_sparse.data
+ indices = ref_sparse.indices
+ indptr = ref_sparse.indptr
if output_on_device:
- d_arr = parray.zeros_like(csr.array)
+ d_arr = parray.empty_like(csr.array)
+ d_arr.fill(0)
output = d_arr
else:
output = None
@@ -171,7 +182,7 @@ class TestCSR(unittest.TestCase):
arr = arr.get()
# Compare
self.assertTrue(
- np.allclose(arr.reshape(self.array.shape), self.array),
+ np.allclose(arr.reshape(array.shape), array),
"something wrong with densified data (%s)"
% current_config
)
diff --git a/silx/opencl/utils.py b/silx/opencl/utils.py
index f558c1b..575e018 100644
--- a/silx/opencl/utils.py
+++ b/silx/opencl/utils.py
@@ -119,3 +119,96 @@ def concatenate_cl_kernel(filenames):
this method concatenates all the kernel from the list
"""
return os.linesep.join(read_cl_file(fn) for fn in filenames)
+
+
+
+
+class ConvolutionInfos(object):
+ allowed_axes = {
+ "1D": [None],
+ "separable_2D_1D_2D": [None, (0, 1), (1, 0)],
+ "batched_1D_2D": [(0,), (1,)],
+ "separable_3D_1D_3D": [
+ None,
+ (0, 1, 2),
+ (1, 2, 0),
+ (2, 0, 1),
+ (2, 1, 0),
+ (1, 0, 2),
+ (0, 2, 1)
+ ],
+ "batched_1D_3D": [(0,), (1,), (2,)],
+ "batched_separable_2D_1D_3D": [(0,), (1,), (2,)], # unsupported (?)
+ "2D": [None],
+ "batched_2D_3D": [(0,), (1,), (2,)],
+ "separable_3D_2D_3D": [
+ (1, 0),
+ (0, 1),
+ (2, 0),
+ (0, 2),
+ (1, 2),
+ (2, 1),
+ ],
+ "3D": [None],
+ }
+ use_cases = {
+ (1, 1): {
+ "1D": {
+ "name": "1D convolution on 1D data",
+ "kernels": ["convol_1D_X"],
+ },
+ },
+ (2, 2): {
+ "2D": {
+ "name": "2D convolution on 2D data",
+ "kernels": ["convol_2D_XY"],
+ },
+ },
+ (3, 3): {
+ "3D": {
+ "name": "3D convolution on 3D data",
+ "kernels": ["convol_3D_XYZ"],
+ },
+ },
+ (2, 1): {
+ "separable_2D_1D_2D": {
+ "name": "Separable (2D->1D) convolution on 2D data",
+ "kernels": ["convol_1D_X", "convol_1D_Y"],
+ },
+ "batched_1D_2D": {
+ "name": "Batched 1D convolution on 2D data",
+ "kernels": ["convol_1D_X", "convol_1D_Y"],
+ },
+ },
+ (3, 1): {
+ "separable_3D_1D_3D": {
+ "name": "Separable (3D->1D) convolution on 3D data",
+ "kernels": ["convol_1D_X", "convol_1D_Y", "convol_1D_Z"],
+ },
+ "batched_1D_3D": {
+ "name": "Batched 1D convolution on 3D data",
+ "kernels": ["convol_1D_X", "convol_1D_Y", "convol_1D_Z"],
+ },
+ "batched_separable_2D_1D_3D": {
+ "name": "Batched separable (2D->1D) convolution on 3D data",
+ "kernels": ["convol_1D_X", "convol_1D_Y", "convol_1D_Z"],
+ },
+ },
+ (3, 2): {
+ "separable_3D_2D_3D": {
+ "name": "Separable (3D->2D) convolution on 3D data",
+ "kernels": ["convol_2D_XY", "convol_2D_XZ", "convol_2D_YZ"],
+ },
+ "batched_2D_3D": {
+ "name": "Batched 2D convolution on 3D data",
+ "kernels": ["convol_2D_XY", "convol_2D_XZ", "convol_2D_YZ"],
+ },
+ },
+ }
+
+
+
+
+
+
+