summaryrefslogtreecommitdiff
path: root/src/silx/opencl
diff options
context:
space:
mode:
Diffstat (limited to 'src/silx/opencl')
-rw-r--r--src/silx/opencl/__init__.py1
-rw-r--r--src/silx/opencl/atomic.py93
-rw-r--r--src/silx/opencl/backprojection.py153
-rw-r--r--src/silx/opencl/codec/bitshuffle_lz4.py214
-rw-r--r--src/silx/opencl/codec/byte_offset.py335
-rw-r--r--src/silx/opencl/codec/setup.py43
-rw-r--r--src/silx/opencl/codec/test/__init__.py1
-rw-r--r--src/silx/opencl/codec/test/test_bitshuffle_lz4.py126
-rw-r--r--src/silx/opencl/codec/test/test_byte_offset.py97
-rw-r--r--src/silx/opencl/common.py334
-rw-r--r--src/silx/opencl/conftest.py1
-rw-r--r--src/silx/opencl/convolution.py98
-rw-r--r--src/silx/opencl/image.py336
-rw-r--r--src/silx/opencl/linalg.py78
-rw-r--r--src/silx/opencl/medfilt.py144
-rw-r--r--src/silx/opencl/processing.py245
-rw-r--r--src/silx/opencl/projection.py204
-rw-r--r--src/silx/opencl/reconstruction.py205
-rw-r--r--src/silx/opencl/setup.py48
-rw-r--r--src/silx/opencl/sinofilter.py199
-rw-r--r--src/silx/opencl/sparse.py106
-rw-r--r--src/silx/opencl/statistics.py177
-rw-r--r--src/silx/opencl/test/__init__.py1
-rw-r--r--src/silx/opencl/test/test_addition.py73
-rw-r--r--src/silx/opencl/test/test_array_utils.py65
-rw-r--r--src/silx/opencl/test/test_backprojection.py52
-rw-r--r--src/silx/opencl/test/test_convolution.py19
-rw-r--r--src/silx/opencl/test/test_doubleword.py199
-rw-r--r--src/silx/opencl/test/test_image.py28
-rw-r--r--src/silx/opencl/test/test_kahan.py87
-rw-r--r--src/silx/opencl/test/test_linalg.py85
-rw-r--r--src/silx/opencl/test/test_medfilt.py38
-rw-r--r--src/silx/opencl/test/test_projection.py16
-rw-r--r--src/silx/opencl/test/test_sparse.py34
-rw-r--r--src/silx/opencl/test/test_stats.py50
-rw-r--r--src/silx/opencl/utils.py27
36 files changed, 2550 insertions, 1462 deletions
diff --git a/src/silx/opencl/__init__.py b/src/silx/opencl/__init__.py
index fbd1f88..466ffaf 100644
--- a/src/silx/opencl/__init__.py
+++ b/src/silx/opencl/__init__.py
@@ -1,5 +1,4 @@
#!/usr/bin/env python
-# -*- coding: utf-8 -*-
#
# Project: S I L X project
# https://github.com/silx-kit/silx
diff --git a/src/silx/opencl/atomic.py b/src/silx/opencl/atomic.py
new file mode 100644
index 0000000..16d3eff
--- /dev/null
+++ b/src/silx/opencl/atomic.py
@@ -0,0 +1,93 @@
+#
+# Project: S I L X project
+# https://github.com/silx-kit/silx
+#
+# Copyright (C) 2012-2023 European Synchrotron Radiation Facility, Grenoble, France
+#
+# Principal author: Jérôme Kieffer (Jerome.Kieffer@ESRF.eu)
+#
+# Permission is hereby granted, free of charge, to any person obtaining a copy
+# of this software and associated documentation files (the "Software"), to deal
+# in the Software without restriction, including without limitation the rights
+# to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
+# copies of the Software, and to permit persons to whom the Software is
+# furnished to do so, subject to the following conditions:
+# .
+# The above copyright notice and this permission notice shall be included in
+# all copies or substantial portions of the Software.
+# .
+# THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+# IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+# FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
+# AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+# LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
+# OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN
+# THE SOFTWARE.
+
+"""
+Utilities around atomic operation in OpenCL
+"""
+
+__author__ = "Jerome Kieffer"
+__license__ = "MIT"
+__date__ = "14/06/2023"
+__copyright__ = "2023-2023, ESRF, Grenoble"
+__contact__ = "jerome.kieffer@esrf.fr"
+
+import numpy
+import pyopencl
+import pyopencl.array as cla
+
+
+def check_atomic32(device):
+ try:
+ ctx = pyopencl.Context(devices=[device])
+ except:
+ return False, f"Unable to create context on {device}"
+ else:
+ queue = pyopencl.CommandQueue(ctx)
+ src = """
+kernel void check_atomic32(global int* ary){
+int res = atom_inc(ary);
+}
+"""
+ try:
+ prg = pyopencl.Program(ctx, src).build()
+ except Exception as err:
+ return False, f"{type(err)}: {err}"
+ a = numpy.zeros(1, numpy.int32)
+ d = cla.to_device(queue, a)
+ prg.check_atomic32(queue, (1024,), (32,), d.data).wait()
+ value = d.get()[0]
+ return value == 1024, f"Got the proper value 1024=={value}"
+
+
+def check_atomic64(device):
+ try:
+ ctx = pyopencl.Context(devices=[device])
+ except:
+ return False, f"Unable to create context on {device}"
+ else:
+ queue = pyopencl.CommandQueue(ctx)
+ if (
+ device.platform.name == "Portable Computing Language"
+ and "GPU" in pyopencl.device_type.to_string(device.type).upper()
+ ):
+ # this configuration is known to seg-fault
+ return False, "PoCL + GPU do not support atomic64"
+ src = """
+#pragma OPENCL EXTENSION cl_khr_fp64: enable
+#pragma OPENCL EXTENSION cl_khr_int64_base_atomics : enable
+kernel void check_atomic64(global long* ary){
+long res = atom_inc(ary);
+}
+"""
+ try:
+ prg = pyopencl.Program(ctx, src).build()
+ except Exception as err:
+ return False, f"{type(err)}: {err}"
+ a = numpy.zeros(1, numpy.int64)
+ d = cla.to_device(queue, a)
+ prg.check_atomic64(queue, (1024,), (32,), d.data).wait()
+ value = d.get()[0]
+ return value == 1024, f"Got the proper value 1024=={value}"
diff --git a/src/silx/opencl/backprojection.py b/src/silx/opencl/backprojection.py
index 65a9836..5af2bc5 100644
--- a/src/silx/opencl/backprojection.py
+++ b/src/silx/opencl/backprojection.py
@@ -1,8 +1,7 @@
#!/usr/bin/env python
-# coding: utf-8
# /*##########################################################################
#
-# Copyright (c) 2016 European Synchrotron Radiation Facility
+# Copyright (c) 2016-2023 European Synchrotron Radiation Facility
#
# Permission is hereby granted, free of charge, to any person obtaining a copy
# of this software and associated documentation files (the "Software"), to deal
@@ -25,8 +24,6 @@
# ###########################################################################*/
"""Module for (filtered) backprojection on the GPU"""
-from __future__ import absolute_import, print_function, with_statement, division
-
__authors__ = ["A. Mirone, P. Paleo"]
__license__ = "MIT"
__date__ = "25/01/2019"
@@ -37,8 +34,6 @@ import numpy as np
from .common import pyopencl
from .processing import EventDescription, OpenclProcessing, BufferDescription
from .sinofilter import SinoFilter
-from .sinofilter import fourier_filter as fourier_filter_
-from ..utils.deprecation import deprecated
if pyopencl:
mf = pyopencl.mem_flags
@@ -64,12 +59,23 @@ def _idivup(a, b):
class Backprojection(OpenclProcessing):
"""A class for performing the backprojection using OpenCL"""
+
kernel_files = ["backproj.cl", "array_utils.cl"]
- def __init__(self, sino_shape, slice_shape=None, axis_position=None,
- angles=None, filter_name=None, ctx=None, devicetype="all",
- platformid=None, deviceid=None, profile=False,
- extra_options=None):
+ def __init__(
+ self,
+ sino_shape,
+ slice_shape=None,
+ axis_position=None,
+ angles=None,
+ filter_name=None,
+ ctx=None,
+ devicetype="all",
+ platformid=None,
+ deviceid=None,
+ profile=False,
+ extra_options=None,
+ ):
"""Constructor of the OpenCL (filtered) backprojection
:param sino_shape: shape of the sinogram. The sinogram is in the format
@@ -101,19 +107,26 @@ class Backprojection(OpenclProcessing):
# assuming no discrete GPU
# raise NotImplementedError("Backprojection is not implemented on CPU for OS X yet")
- OpenclProcessing.__init__(self, ctx=ctx, devicetype=devicetype,
- platformid=platformid, deviceid=deviceid,
- profile=profile)
+ OpenclProcessing.__init__(
+ self,
+ ctx=ctx,
+ devicetype=devicetype,
+ platformid=platformid,
+ deviceid=deviceid,
+ profile=profile,
+ )
- self._init_geometry(sino_shape, slice_shape, angles, axis_position,
- extra_options)
+ self._init_geometry(
+ sino_shape, slice_shape, angles, axis_position, extra_options
+ )
self._allocate_memory()
self._compute_angles()
self._init_kernels()
self._init_filter(filter_name)
- def _init_geometry(self, sino_shape, slice_shape, angles, axis_position,
- extra_options):
+ def _init_geometry(
+ self, sino_shape, slice_shape, angles, axis_position, extra_options
+ ):
"""Geometry Initialization
:param sino_shape: shape of the sinogram. The sinogram is in the format
@@ -137,12 +150,12 @@ class Backprojection(OpenclProcessing):
self.slice_shape = slice_shape
self.dimrec_shape = (
_idivup(self.slice_shape[0], 32) * 32,
- _idivup(self.slice_shape[1], 32) * 32
+ _idivup(self.slice_shape[1], 32) * 32,
)
if axis_position:
self.axis_pos = np.float32(axis_position)
else:
- self.axis_pos = np.float32((sino_shape[1] - 1.) / 2)
+ self.axis_pos = np.float32((sino_shape[1] - 1.0) / 2)
self.axis_array = None # TODO: add axis correction front-end
self._init_extra_options(extra_options)
@@ -152,11 +165,11 @@ class Backprojection(OpenclProcessing):
:param dict extra_options: Advanced extra options
"""
self.extra_options = {
- "cutoff": 1.,
+ "cutoff": 1.0,
"use_numpy_fft": False,
# It is axis_pos - (num_bins-1)/2 in PyHST
- "gpu_offset_x": 0., #self.axis_pos - (self.num_bins - 1) / 2.,
- "gpu_offset_y": 0., #self.axis_pos - (self.num_bins - 1) / 2.
+ "gpu_offset_x": 0.0, # self.axis_pos - (self.num_bins - 1) / 2.,
+ "gpu_offset_y": 0.0, # self.axis_pos - (self.num_bins - 1) / 2.
}
if extra_options is not None:
self.extra_options.update(extra_options)
@@ -169,7 +182,9 @@ class Backprojection(OpenclProcessing):
# Device memory
self.buffers = [
BufferDescription("_d_slice", self.dimrec_shape, np.float32, mf.READ_WRITE),
- BufferDescription("d_sino", self.shape, np.float32, mf.READ_WRITE), # before transferring to texture (if available)
+ BufferDescription(
+ "d_sino", self.shape, np.float32, mf.READ_WRITE
+ ), # before transferring to texture (if available)
BufferDescription("d_cos", (self.num_projs,), np.float32, mf.READ_ONLY),
BufferDescription("d_sin", (self.num_projs,), np.float32, mf.READ_ONLY),
BufferDescription("d_axes", (self.num_projs,), np.float32, mf.READ_ONLY),
@@ -194,27 +209,29 @@ class Backprojection(OpenclProcessing):
if self.axis_array:
self.cl_mem["d_axes"][:] = self.axis_array.astype(np.float32)[:]
else:
- self.cl_mem["d_axes"][:] = np.ones(self.num_projs, dtype="f") * self.axis_pos
+ self.cl_mem["d_axes"][:] = (
+ np.ones(self.num_projs, dtype="f") * self.axis_pos
+ )
def _init_kernels(self):
compile_options = None
- if not(self._use_textures):
+ if not (self._use_textures):
compile_options = "-DDONT_USE_TEXTURES"
OpenclProcessing.compile_kernels(
- self,
- self.kernel_files,
- compile_options=compile_options
+ self, self.kernel_files, compile_options=compile_options
)
# check that workgroup can actually be (16, 16)
- self.compiletime_workgroup_size = self.kernels.max_workgroup_size("backproj_cpu_kernel")
+ self.compiletime_workgroup_size = self.kernels.max_workgroup_size(
+ "backproj_cpu_kernel"
+ )
# Workgroup and ndrange sizes are always the same
self.wg = (16, 16)
self.ndrange = (
_idivup(int(self.dimrec_shape[1]), 32) * self.wg[0],
- _idivup(int(self.dimrec_shape[0]), 32) * self.wg[1]
+ _idivup(int(self.dimrec_shape[0]), 32) * self.wg[1],
)
# Prepare arguments for the kernel call
- if not(self._use_textures):
+ if not (self._use_textures):
d_sino_ref = self.d_sino.data
else:
d_sino_ref = self.d_sino_tex
@@ -229,7 +246,7 @@ class Backprojection(OpenclProcessing):
self.cl_mem["_d_slice"].data,
# d_sino (__read_only image2d_t or float*)
d_sino_ref,
- # gpu_offset_x (float32) 
+ # gpu_offset_x (float32)
np.float32(self.extra_options["gpu_offset_x"]),
# gpu_offset_y (float32)
np.float32(self.extra_options["gpu_offset_y"]),
@@ -240,7 +257,7 @@ class Backprojection(OpenclProcessing):
# d_axis (__global float32*)
self.cl_mem["d_axes"].data,
# shared mem (__local float32*)
- self._get_local_mem()
+ self._get_local_mem(),
)
def _allocate_textures(self):
@@ -276,7 +293,7 @@ class Backprojection(OpenclProcessing):
np.int32(self.dimrec_shape[1]),
np.int32((0, 0)),
np.int32((0, 0)),
- slice_shape_ocl
+ slice_shape_ocl,
)
return self.kernels.cpy2d(self.queue, ndrange, wg, *kernel_args)
@@ -284,47 +301,39 @@ class Backprojection(OpenclProcessing):
if isinstance(sino, parray.Array):
return self._transfer_device_to_texture(sino)
sino2 = sino
- if not(sino.flags["C_CONTIGUOUS"] and sino.dtype == np.float32):
+ if not (sino.flags["C_CONTIGUOUS"] and sino.dtype == np.float32):
sino2 = np.ascontiguousarray(sino, dtype=np.float32)
- if not(self._use_textures):
- ev = pyopencl.enqueue_copy(
- self.queue,
- self.d_sino.data,
- sino2
- )
+ if not (self._use_textures):
+ ev = pyopencl.enqueue_copy(self.queue, self.d_sino.data, sino2)
what = "transfer filtered sino H->D buffer"
ev.wait()
else:
ev = pyopencl.enqueue_copy(
- self.queue,
- self.d_sino_tex,
- sino2,
- origin=(0, 0),
- region=self.shape[::-1]
- )
+ self.queue,
+ self.d_sino_tex,
+ sino2,
+ origin=(0, 0),
+ region=self.shape[::-1],
+ )
what = "transfer filtered sino H->D texture"
return EventDescription(what, ev)
def _transfer_device_to_texture(self, d_sino):
- if not(self._use_textures):
+ if not (self._use_textures):
if id(self.d_sino) == id(d_sino):
return
- ev = pyopencl.enqueue_copy(
- self.queue,
- self.d_sino.data,
- d_sino
- )
+ ev = pyopencl.enqueue_copy(self.queue, self.d_sino.data, d_sino)
what = "transfer filtered sino D->D buffer"
ev.wait()
else:
ev = pyopencl.enqueue_copy(
- self.queue,
- self.d_sino_tex,
- d_sino.data,
- offset=0,
- origin=(0, 0),
- region=self.shape[::-1]
- )
+ self.queue,
+ self.d_sino_tex,
+ d_sino.data,
+ offset=0,
+ origin=(0, 0),
+ region=self.shape[::-1],
+ )
what = "transfer filtered sino D->D texture"
return EventDescription(what, ev)
@@ -340,20 +349,17 @@ class Backprojection(OpenclProcessing):
with self.sem:
events.append(self._transfer_to_texture(sino))
# Call the backprojection kernel
- if not(self._use_textures):
+ if not (self._use_textures):
kernel_to_call = self.kernels.backproj_cpu_kernel
else:
kernel_to_call = self.kernels.backproj_kernel
kernel_to_call(
- self.queue,
- self.ndrange,
- self.wg,
- *self._backproj_kernel_args
+ self.queue, self.ndrange, self.wg, *self._backproj_kernel_args
)
# Return
if output is None:
res = self.cl_mem["_d_slice"].get()
- res = res[:self.slice_shape[0], :self.slice_shape[1]]
+ res = res[: self.slice_shape[0], : self.slice_shape[1]]
else:
res = output
self._cpy2d_to_slice(output)
@@ -380,18 +386,3 @@ class Backprojection(OpenclProcessing):
return res
__call__ = filtered_backprojection
-
-
- # -------------------
- # - Compatibility -
- # -------------------
-
- @deprecated(replacement="Backprojection.sino_filter", since_version="0.10")
- def filter_projections(self, sino, rescale=True):
- self.sino_filter(sino, output=self.d_sino)
-
-
-
-def fourier_filter(sino, filter_=None, fft_size=None):
- return fourier_filter_(sino, filter_=filter_, fft_size=fft_size)
-
diff --git a/src/silx/opencl/codec/bitshuffle_lz4.py b/src/silx/opencl/codec/bitshuffle_lz4.py
new file mode 100644
index 0000000..b0992b9
--- /dev/null
+++ b/src/silx/opencl/codec/bitshuffle_lz4.py
@@ -0,0 +1,214 @@
+#!/usr/bin/env python
+#
+# Project: Sift implementation in Python + OpenCL
+# https://github.com/silx-kit/silx
+#
+# Copyright (C) 2022-2023 European Synchrotron Radiation Facility, Grenoble, France
+#
+# Permission is hereby granted, free of charge, to any person
+# obtaining a copy of this software and associated documentation
+# files (the "Software"), to deal in the Software without
+# restriction, including without limitation the rights to use,
+# copy, modify, merge, publish, distribute, sublicense, and/or sell
+# copies of the Software, and to permit persons to whom the
+# Software is furnished to do so, subject to the following
+# conditions:
+#
+# The above copyright notice and this permission notice shall be
+# included in all copies or substantial portions of the Software.
+#
+# THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
+# EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES
+# OF MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
+# NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT
+# HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY,
+# WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
+# FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR
+# OTHER DEALINGS IN THE SOFTWARE.
+
+"""
+This module provides a class for CBF byte offset compression/decompression.
+"""
+
+__authors__ = ["Jérôme Kieffer"]
+__contact__ = "jerome.kieffer@esrf.eu"
+__license__ = "MIT"
+__copyright__ = "European Synchrotron Radiation Facility, Grenoble, France"
+__date__ = "09/11/2022"
+__status__ = "production"
+
+
+import os
+import struct
+import numpy
+from ..common import ocl, pyopencl, kernel_workgroup_size
+from ..processing import BufferDescription, EventDescription, OpenclProcessing
+
+import logging
+
+logger = logging.getLogger(__name__)
+
+
+class BitshuffleLz4(OpenclProcessing):
+ """Perform the bitshuffle-lz4 decompression on the GPU
+ See :class:`OpenclProcessing` for optional arguments description.
+ :param int cmp_size:
+ Size of the raw stream for decompression.
+ It can be (slightly) larger than the array.
+ :param int dec_size:
+ Size of the decompression output array
+ (mandatory for decompression)
+ :param dtype: dtype of decompressed data
+ """
+
+ LZ4_BLOCK_SIZE = 8192
+
+ def __init__(
+ self,
+ cmp_size,
+ dec_size,
+ dtype,
+ ctx=None,
+ devicetype="all",
+ platformid=None,
+ deviceid=None,
+ block_size=None,
+ profile=False,
+ ):
+ """Constructor of the class:
+
+ :param cmp_size: size of the compressed data buffer (in bytes)
+ :param dec_size: size of the compressed data buffer (in words)
+ :param dtype: data type of one work in decompressed array
+
+ For the other, see the doc of OpenclProcessing
+ """
+ OpenclProcessing.__init__(
+ self,
+ ctx=ctx,
+ devicetype=devicetype,
+ platformid=platformid,
+ deviceid=deviceid,
+ block_size=block_size,
+ profile=profile,
+ )
+ if self.block_size is None:
+ try:
+ self.block_size = self.ctx.devices[0].preferred_work_group_size_multiple
+ except:
+ self.block_size = self.device.max_work_group_size
+
+ self.cmp_size = numpy.uint64(cmp_size)
+ self.dec_size = numpy.uint64(dec_size)
+ self.dec_dtype = numpy.dtype(dtype)
+ self.num_blocks = numpy.uint32(
+ (self.dec_dtype.itemsize * self.dec_size + self.LZ4_BLOCK_SIZE - 1)
+ // self.LZ4_BLOCK_SIZE
+ )
+
+ buffers = [
+ BufferDescription("nb_blocks", 1, numpy.uint32, None),
+ BufferDescription("block_position", self.num_blocks, numpy.uint64, None),
+ BufferDescription("cmp", self.cmp_size, numpy.uint8, None),
+ BufferDescription("dec", self.dec_size, self.dec_dtype, None),
+ ]
+
+ self.allocate_buffers(buffers, use_array=True)
+
+ self.compile_kernels([os.path.join("codec", "bitshuffle_lz4")])
+ self.block_size = min(
+ self.block_size,
+ kernel_workgroup_size(self.program, "bslz4_decompress_block"),
+ )
+
+ def decompress(self, raw, out=None, wg=None, nbytes=None):
+ """This function actually performs the decompression by calling the kernels
+ :param numpy.ndarray raw: The compressed data as a 1D numpy array of char or string
+ :param pyopencl.array out: pyopencl array in which to place the result.
+ :param wg: tuneable parameter with the workgroup size.
+ :param int nbytes: (Optional) Number of bytes occupied by the chunk in raw.
+ :return: The decompressed image as an pyopencl array.
+ :rtype: pyopencl.array
+ """
+
+ events = []
+ with self.sem:
+ if nbytes is not None:
+ assert nbytes <= raw.size
+ len_raw = numpy.uint64(nbytes)
+ elif isinstance(raw, pyopencl.Buffer):
+ len_raw = numpy.uint64(raw.size)
+ else:
+ len_raw = numpy.uint64(len(raw))
+
+ if isinstance(raw, pyopencl.array.Array):
+ cmp_buffer = raw.data
+ num_blocks = self.num_blocks
+ elif isinstance(raw, pyopencl.Buffer):
+ cmp_buffer = raw
+ num_blocks = self.num_blocks
+ else:
+ if len_raw > self.cmp_size:
+ self.cmp_size = len_raw
+ logger.info("increase cmp buffer size to %s", self.cmp_size)
+ self.cl_mem["cmp"] = pyopencl.array.empty(
+ self.queue, self.cmp_size, dtype=numpy.uint8
+ )
+ evt = pyopencl.enqueue_copy(
+ self.queue, self.cl_mem["cmp"].data, raw, is_blocking=False
+ )
+ events.append(EventDescription("copy raw H -> D", evt))
+ cmp_buffer = self.cl_mem["cmp"].data
+
+ dest_size = struct.unpack(">Q", raw[:8])
+ self_dest_nbyte = self.dec_size * self.dec_dtype.itemsize
+ if dest_size < self_dest_nbyte:
+ num_blocks = numpy.uint32(
+ (dest_size + self.LZ4_BLOCK_SIZE - 1) // self.LZ4_BLOCK_SIZE
+ )
+ elif dest_size > self_dest_nbyte:
+ num_blocks = numpy.uint32(
+ (dest_size + self.LZ4_BLOCK_SIZE - 1) // self.LZ4_BLOCK_SIZE
+ )
+ self.cl_mem["dec"] = pyopencl.array.empty(
+ self.queue, dest_size, self.dec_dtype
+ )
+ self.dec_size = dest_size // self.dec_dtype.itemsize
+ else:
+ num_blocks = self.num_blocks
+
+ wg = int(wg or self.block_size)
+
+ evt = self.program.lz4_unblock(
+ self.queue,
+ (1,),
+ (1,),
+ cmp_buffer,
+ len_raw,
+ self.cl_mem["block_position"].data,
+ num_blocks,
+ self.cl_mem["nb_blocks"].data,
+ )
+ events.append(EventDescription("LZ4 unblock", evt))
+
+ if out is None:
+ out = self.cl_mem["dec"]
+ else:
+ assert out.dtype == self.dec_dtype
+ assert out.size == self.dec_size
+
+ evt = self.program.bslz4_decompress_block(
+ self.queue,
+ (self.num_blocks * wg,),
+ (wg,),
+ cmp_buffer,
+ out.data,
+ self.cl_mem["block_position"].data,
+ self.cl_mem["nb_blocks"].data,
+ numpy.uint8(self.dec_dtype.itemsize),
+ )
+ events.append(EventDescription("LZ4 decompress", evt))
+ self.profile_multi(events)
+ return out
+
+ __call__ = decompress
diff --git a/src/silx/opencl/codec/byte_offset.py b/src/silx/opencl/codec/byte_offset.py
index 9a52427..e3df9b2 100644
--- a/src/silx/opencl/codec/byte_offset.py
+++ b/src/silx/opencl/codec/byte_offset.py
@@ -1,10 +1,9 @@
#!/usr/bin/env python
-# -*- coding: utf-8 -*-
#
# Project: Sift implementation in Python + OpenCL
# https://github.com/silx-kit/silx
#
-# Copyright (C) 2013-2020 European Synchrotron Radiation Facility, Grenoble, France
+# Copyright (C) 2013-2023 European Synchrotron Radiation Facility, Grenoble, France
#
# Permission is hereby granted, free of charge, to any person
# obtaining a copy of this software and associated documentation
@@ -31,8 +30,6 @@
This module provides a class for CBF byte offset compression/decompression.
"""
-from __future__ import division, print_function, with_statement
-
__authors__ = ["Jérôme Kieffer"]
__contact__ = "jerome.kieffer@esrf.eu"
__license__ = "MIT"
@@ -48,10 +45,12 @@ from ..common import ocl, pyopencl
from ..processing import BufferDescription, EventDescription, OpenclProcessing
import logging
+
logger = logging.getLogger(__name__)
if pyopencl:
import pyopencl.version
+
if pyopencl.version.VERSION < (2016, 0):
from pyopencl.scan import GenericScanKernel, GenericDebugScanKernel
else:
@@ -64,23 +63,36 @@ else:
class ByteOffset(OpenclProcessing):
"""Perform the byte offset compression/decompression on the GPU
- See :class:`OpenclProcessing` for optional arguments description.
-
- :param int raw_size:
- Size of the raw stream for decompression.
- It can be (slightly) larger than the array.
- :param int dec_size:
- Size of the decompression output array
- (mandatory for decompression)
- """
-
- def __init__(self, raw_size=None, dec_size=None,
- ctx=None, devicetype="all",
- platformid=None, deviceid=None,
- block_size=None, profile=False):
- OpenclProcessing.__init__(self, ctx=ctx, devicetype=devicetype,
- platformid=platformid, deviceid=deviceid,
- block_size=block_size, profile=profile)
+ See :class:`OpenclProcessing` for optional arguments description.
+
+ :param int raw_size:
+ Size of the raw stream for decompression.
+ It can be (slightly) larger than the array.
+ :param int dec_size:
+ Size of the decompression output array
+ (mandatory for decompression)
+ """
+
+ def __init__(
+ self,
+ raw_size=None,
+ dec_size=None,
+ ctx=None,
+ devicetype="all",
+ platformid=None,
+ deviceid=None,
+ block_size=None,
+ profile=False,
+ ):
+ OpenclProcessing.__init__(
+ self,
+ ctx=ctx,
+ devicetype=devicetype,
+ platformid=platformid,
+ deviceid=deviceid,
+ block_size=block_size,
+ profile=profile,
+ )
if self.block_size is None:
self.block_size = self.device.max_work_group_size
wg = self.block_size
@@ -97,7 +109,9 @@ class ByteOffset(OpenclProcessing):
BufferDescription("raw", self.padded_raw_size, numpy.int8, None),
BufferDescription("mask", self.padded_raw_size, numpy.int32, None),
BufferDescription("values", self.padded_raw_size, numpy.int32, None),
- BufferDescription("exceptions", self.padded_raw_size, numpy.int32, None)
+ BufferDescription(
+ "exceptions", self.padded_raw_size, numpy.int32, None
+ ),
]
if dec_size is None:
@@ -106,18 +120,17 @@ class ByteOffset(OpenclProcessing):
self.dec_size = numpy.int32(dec_size)
buffers += [
BufferDescription("data_float", self.dec_size, numpy.float32, None),
- BufferDescription("data_int", self.dec_size, numpy.int32, None)
+ BufferDescription("data_int", self.dec_size, numpy.int32, None),
]
self.allocate_buffers(buffers, use_array=True)
self.compile_kernels([os.path.join("codec", "byte_offset")])
self.kernels.__setattr__("scan", self._init_double_scan())
- self.kernels.__setattr__("compression_scan",
- self._init_compression_scan())
+ self.kernels.__setattr__("compression_scan", self._init_compression_scan())
def _init_double_scan(self):
- """"generates a double scan on indexes and values in one operation"""
+ """generates a double scan on indexes and values in one operation"""
arguments = "__global int *value", "__global int *index"
int2 = pyopencl.tools.get_or_register_dtype("int2")
input_expr = "index[i]>0 ? (int2)(0, 0) : (int2)(value[i], 1)"
@@ -126,21 +139,25 @@ class ByteOffset(OpenclProcessing):
output_statement = "value[i] = item.s0; index[i+1] = item.s1;"
if self.block_size > 256:
- knl = GenericScanKernel(self.ctx,
- dtype=int2,
- arguments=arguments,
- input_expr=input_expr,
- scan_expr=scan_expr,
- neutral=neutral,
- output_statement=output_statement)
+ knl = GenericScanKernel(
+ self.ctx,
+ dtype=int2,
+ arguments=arguments,
+ input_expr=input_expr,
+ scan_expr=scan_expr,
+ neutral=neutral,
+ output_statement=output_statement,
+ )
else: # MacOS on CPU
- knl = GenericDebugScanKernel(self.ctx,
- dtype=int2,
- arguments=arguments,
- input_expr=input_expr,
- scan_expr=scan_expr,
- neutral=neutral,
- output_statement=output_statement)
+ knl = GenericDebugScanKernel(
+ self.ctx,
+ dtype=int2,
+ arguments=arguments,
+ input_expr=input_expr,
+ scan_expr=scan_expr,
+ neutral=neutral,
+ output_statement=output_statement,
+ )
return knl
def decode(self, raw, as_float=False, out=None):
@@ -153,8 +170,9 @@ class ByteOffset(OpenclProcessing):
:return: The decompressed image as an pyopencl array.
:rtype: pyopencl.array
"""
- assert self.dec_size is not None, \
- "dec_size is a mandatory ByteOffset init argument for decompression"
+ assert (
+ self.dec_size is not None
+ ), "dec_size is a mandatory ByteOffset init argument for decompression"
events = []
with self.sem:
@@ -165,67 +183,96 @@ class ByteOffset(OpenclProcessing):
self.padded_raw_size = (self.raw_size + wg - 1) & ~(wg - 1)
logger.info("increase raw buffer size to %s", self.padded_raw_size)
buffers = {
- "raw": pyopencl.array.empty(self.queue, self.padded_raw_size, dtype=numpy.int8),
- "mask": pyopencl.array.empty(self.queue, self.padded_raw_size, dtype=numpy.int32),
- "exceptions": pyopencl.array.empty(self.queue, self.padded_raw_size, dtype=numpy.int32),
- "values": pyopencl.array.empty(self.queue, self.padded_raw_size, dtype=numpy.int32),
- }
+ "raw": pyopencl.array.empty(
+ self.queue, self.padded_raw_size, dtype=numpy.int8
+ ),
+ "mask": pyopencl.array.empty(
+ self.queue, self.padded_raw_size, dtype=numpy.int32
+ ),
+ "exceptions": pyopencl.array.empty(
+ self.queue, self.padded_raw_size, dtype=numpy.int32
+ ),
+ "values": pyopencl.array.empty(
+ self.queue, self.padded_raw_size, dtype=numpy.int32
+ ),
+ }
self.cl_mem.update(buffers)
else:
wg = self.block_size
- evt = pyopencl.enqueue_copy(self.queue, self.cl_mem["raw"].data,
- raw,
- is_blocking=False)
+ evt = pyopencl.enqueue_copy(
+ self.queue, self.cl_mem["raw"].data, raw, is_blocking=False
+ )
events.append(EventDescription("copy raw H -> D", evt))
- evt = self.kernels.fill_int_mem(self.queue, (self.padded_raw_size,), (wg,),
- self.cl_mem["mask"].data,
- numpy.int32(self.padded_raw_size),
- numpy.int32(0),
- numpy.int32(0))
+ evt = self.kernels.fill_int_mem(
+ self.queue,
+ (self.padded_raw_size,),
+ (wg,),
+ self.cl_mem["mask"].data,
+ numpy.int32(self.padded_raw_size),
+ numpy.int32(0),
+ numpy.int32(0),
+ )
events.append(EventDescription("memset mask", evt))
- evt = self.kernels.fill_int_mem(self.queue, (1,), (1,),
- self.cl_mem["counter"].data,
- numpy.int32(1),
- numpy.int32(0),
- numpy.int32(0))
+ evt = self.kernels.fill_int_mem(
+ self.queue,
+ (1,),
+ (1,),
+ self.cl_mem["counter"].data,
+ numpy.int32(1),
+ numpy.int32(0),
+ numpy.int32(0),
+ )
events.append(EventDescription("memset counter", evt))
- evt = self.kernels.mark_exceptions(self.queue, (self.padded_raw_size,), (wg,),
- self.cl_mem["raw"].data,
- len_raw,
- numpy.int32(self.raw_size),
- self.cl_mem["mask"].data,
- self.cl_mem["values"].data,
- self.cl_mem["counter"].data,
- self.cl_mem["exceptions"].data)
+ evt = self.kernels.mark_exceptions(
+ self.queue,
+ (self.padded_raw_size,),
+ (wg,),
+ self.cl_mem["raw"].data,
+ len_raw,
+ numpy.int32(self.raw_size),
+ self.cl_mem["mask"].data,
+ self.cl_mem["values"].data,
+ self.cl_mem["counter"].data,
+ self.cl_mem["exceptions"].data,
+ )
events.append(EventDescription("mark exceptions", evt))
nb_exceptions = numpy.empty(1, dtype=numpy.int32)
- evt = pyopencl.enqueue_copy(self.queue, nb_exceptions, self.cl_mem["counter"].data,
- is_blocking=False)
+ evt = pyopencl.enqueue_copy(
+ self.queue,
+ nb_exceptions,
+ self.cl_mem["counter"].data,
+ is_blocking=False,
+ )
events.append(EventDescription("copy counter D -> H", evt))
evt.wait()
nbexc = int(nb_exceptions[0])
if nbexc == 0:
logger.info("nbexc %i", nbexc)
else:
- evt = self.kernels.treat_exceptions(self.queue, (nbexc,), (1,),
- self.cl_mem["raw"].data,
- len_raw,
- self.cl_mem["mask"].data,
- self.cl_mem["exceptions"].data,
- self.cl_mem["values"].data
- )
+ evt = self.kernels.treat_exceptions(
+ self.queue,
+ (nbexc,),
+ (1,),
+ self.cl_mem["raw"].data,
+ len_raw,
+ self.cl_mem["mask"].data,
+ self.cl_mem["exceptions"].data,
+ self.cl_mem["values"].data,
+ )
events.append(EventDescription("treat_exceptions", evt))
- #self.cl_mem["copy_values"] = self.cl_mem["values"].copy()
- #self.cl_mem["copy_mask"] = self.cl_mem["mask"].copy()
- evt = self.kernels.scan(self.cl_mem["values"],
- self.cl_mem["mask"],
- queue=self.queue,
- size=int(len_raw),
- wait_for=(evt,))
+ # self.cl_mem["copy_values"] = self.cl_mem["values"].copy()
+ # self.cl_mem["copy_mask"] = self.cl_mem["mask"].copy()
+ evt = self.kernels.scan(
+ self.cl_mem["values"],
+ self.cl_mem["mask"],
+ queue=self.queue,
+ size=int(len_raw),
+ wait_for=(evt,),
+ )
events.append(EventDescription("double scan", evt))
- #evt.wait()
+ # evt.wait()
if out is not None:
if out.dtype == numpy.float32:
copy_results = self.kernels.copy_result_float
@@ -238,15 +285,18 @@ class ByteOffset(OpenclProcessing):
else:
out = self.cl_mem["data_int"]
copy_results = self.kernels.copy_result_int
- evt = copy_results(self.queue, (self.padded_raw_size,), (wg,),
- self.cl_mem["values"].data,
- self.cl_mem["mask"].data,
- len_raw,
- self.dec_size,
- out.data
- )
+ evt = copy_results(
+ self.queue,
+ (self.padded_raw_size,),
+ (wg,),
+ self.cl_mem["values"].data,
+ self.cl_mem["mask"].data,
+ len_raw,
+ self.dec_size,
+ out.data,
+ )
events.append(EventDescription("copy_results", evt))
- #evt.wait()
+ # evt.wait()
if self.profile:
self.events += events
return out
@@ -294,7 +344,9 @@ class ByteOffset(OpenclProcessing):
}
}
"""
- arguments = "__global const int *data, __global char *compressed, __global int *size"
+ arguments = (
+ "__global const int *data, __global char *compressed, __global int *size"
+ )
input_expr = "compressed_size((i == 0) ? data[0] : (data[i] - data[i - 1]))"
scan_expr = "a+b"
neutral = "0"
@@ -306,23 +358,27 @@ class ByteOffset(OpenclProcessing):
"""
if self.block_size >= 64:
- knl = GenericScanKernel(self.ctx,
- dtype=numpy.int32,
- preamble=preamble,
- arguments=arguments,
- input_expr=input_expr,
- scan_expr=scan_expr,
- neutral=neutral,
- output_statement=output_statement)
+ knl = GenericScanKernel(
+ self.ctx,
+ dtype=numpy.int32,
+ preamble=preamble,
+ arguments=arguments,
+ input_expr=input_expr,
+ scan_expr=scan_expr,
+ neutral=neutral,
+ output_statement=output_statement,
+ )
else: # MacOS on CPU
- knl = GenericDebugScanKernel(self.ctx,
- dtype=numpy.int32,
- preamble=preamble,
- arguments=arguments,
- input_expr=input_expr,
- scan_expr=scan_expr,
- neutral=neutral,
- output_statement=output_statement)
+ knl = GenericDebugScanKernel(
+ self.ctx,
+ dtype=numpy.int32,
+ preamble=preamble,
+ arguments=arguments,
+ input_expr=input_expr,
+ scan_expr=scan_expr,
+ neutral=neutral,
+ output_statement=output_statement,
+ )
return knl
def encode(self, data, out=None):
@@ -351,28 +407,39 @@ class ByteOffset(OpenclProcessing):
data = numpy.ascontiguousarray(data, dtype=numpy.int32).ravel()
# Make sure data array exists and is large enough
- if ("data_input" not in self.cl_mem or
- self.cl_mem["data_input"].size < data.size):
+ if (
+ "data_input" not in self.cl_mem
+ or self.cl_mem["data_input"].size < data.size
+ ):
logger.info("increase data input buffer size to %s", data.size)
- self.cl_mem.update({
- "data_input": pyopencl.array.empty(self.queue,
- data.size,
- dtype=numpy.int32)})
+ self.cl_mem.update(
+ {
+ "data_input": pyopencl.array.empty(
+ self.queue, data.size, dtype=numpy.int32
+ )
+ }
+ )
d_data = self.cl_mem["data_input"]
evt = pyopencl.enqueue_copy(
- self.queue, d_data.data, data, is_blocking=False)
+ self.queue, d_data.data, data, is_blocking=False
+ )
events.append(EventDescription("copy data H -> D", evt))
# Make sure compressed array exists and is large enough
compressed_size = d_data.size * 7
- if ("compressed" not in self.cl_mem or
- self.cl_mem["compressed"].size < compressed_size):
+ if (
+ "compressed" not in self.cl_mem
+ or self.cl_mem["compressed"].size < compressed_size
+ ):
logger.info("increase compressed buffer size to %s", compressed_size)
- self.cl_mem.update({
- "compressed": pyopencl.array.empty(self.queue,
- compressed_size,
- dtype=numpy.int8)})
+ self.cl_mem.update(
+ {
+ "compressed": pyopencl.array.empty(
+ self.queue, compressed_size, dtype=numpy.int8
+ )
+ }
+ )
d_compressed = self.cl_mem["compressed"]
d_size = self.cl_mem["counter"] # Shared with decompression
@@ -387,13 +454,15 @@ class ByteOffset(OpenclProcessing):
shape=(byte_count,),
dtype=numpy.int8,
allocator=functools.partial(
- d_compressed.base_data.get_sub_region,
- d_compressed.offset))
+ d_compressed.base_data.get_sub_region, d_compressed.offset
+ ),
+ )
elif out.size < byte_count:
raise ValueError(
"Provided output buffer is not large enough: "
- "requires %d bytes, got %d" % (byte_count, out.size))
+ "requires %d bytes, got %d" % (byte_count, out.size)
+ )
else: # out.size >= byte_count
# Create an array with a sub-region of out and this class queue
@@ -401,13 +470,15 @@ class ByteOffset(OpenclProcessing):
self.queue,
shape=(byte_count,),
dtype=numpy.int8,
- allocator=functools.partial(out.base_data.get_sub_region,
- out.offset))
+ allocator=functools.partial(
+ out.base_data.get_sub_region, out.offset
+ ),
+ )
- evt = pyopencl.enqueue_copy(self.queue, out.data, d_compressed.data,
- byte_count=byte_count)
- events.append(
- EventDescription("copy D -> D: internal -> out", evt))
+ evt = pyopencl.enqueue_copy(
+ self.queue, out.data, d_compressed.data, byte_count=byte_count
+ )
+ events.append(EventDescription("copy D -> D: internal -> out", evt))
if self.profile:
self.events += events
diff --git a/src/silx/opencl/codec/setup.py b/src/silx/opencl/codec/setup.py
deleted file mode 100644
index 4a5c1e5..0000000
--- a/src/silx/opencl/codec/setup.py
+++ /dev/null
@@ -1,43 +0,0 @@
-# coding: utf-8
-#
-# Copyright (C) 2016-2017 European Synchrotron Radiation Facility
-#
-# Permission is hereby granted, free of charge, to any person obtaining a copy
-# of this software and associated documentation files (the "Software"), to deal
-# in the Software without restriction, including without limitation the rights
-# to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
-# copies of the Software, and to permit persons to whom the Software is
-# furnished to do so, subject to the following conditions:
-#
-# The above copyright notice and this permission notice shall be included in
-# all copies or substantial portions of the Software.
-#
-# THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
-# IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
-# FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
-# AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
-# LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
-# OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN
-# THE SOFTWARE.
-#
-
-from __future__ import division
-
-__contact__ = "jerome.kieffer@esrf.eu"
-__license__ = "MIT"
-__copyright__ = "European Synchrotron Radiation Facility, Grenoble, France"
-__authors__ = ["J. Kieffer"]
-__date__ = "13/10/2017"
-
-from numpy.distutils.misc_util import Configuration
-
-
-def configuration(parent_package='', top_path=None):
- config = Configuration('codec', parent_package, top_path)
- config.add_subpackage('test')
- return config
-
-
-if __name__ == "__main__":
- from numpy.distutils.core import setup
- setup(configuration=configuration)
diff --git a/src/silx/opencl/codec/test/__init__.py b/src/silx/opencl/codec/test/__init__.py
index 325c2c7..45065f8 100644
--- a/src/silx/opencl/codec/test/__init__.py
+++ b/src/silx/opencl/codec/test/__init__.py
@@ -1,4 +1,3 @@
-# -*- coding: utf-8 -*-
#
# Project: silx
# https://github.com/silx-kit/silx
diff --git a/src/silx/opencl/codec/test/test_bitshuffle_lz4.py b/src/silx/opencl/codec/test/test_bitshuffle_lz4.py
new file mode 100644
index 0000000..6c5891e
--- /dev/null
+++ b/src/silx/opencl/codec/test/test_bitshuffle_lz4.py
@@ -0,0 +1,126 @@
+#!/usr/bin/env python
+#
+# Project: Bitshuffle-LZ4 decompression in OpenCL
+# https://github.com/silx-kit/silx
+#
+# Copyright (C) 2022-2023 European Synchrotron Radiation Facility,
+# Grenoble, France
+# Permission is hereby granted, free of charge, to any person
+# obtaining a copy of this software and associated documentation
+# files (the "Software"), to deal in the Software without
+# restriction, including without limitation the rights to use,
+# copy, modify, merge, publish, distribute, sublicense, and/or sell
+# copies of the Software, and to permit persons to whom the
+# Software is furnished to do so, subject to the following
+# conditions:
+#
+# The above copyright notice and this permission notice shall be
+# included in all copies or substantial portions of the Software.
+#
+# THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
+# EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES
+# OF MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
+# NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT
+# HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY,
+# WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
+# FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR
+# OTHER DEALINGS IN THE SOFTWARE.
+
+"""
+Test suite for byte-offset decompression
+"""
+
+__authors__ = ["Jérôme Kieffer"]
+__contact__ = "jerome.kieffer@esrf.eu"
+__license__ = "MIT"
+__copyright__ = "2022 European Synchrotron Radiation Facility, Grenoble, France"
+__date__ = "07/11/2022"
+
+import struct
+import numpy
+import pytest
+
+try:
+ import bitshuffle
+except ImportError:
+ bitshuffle = None
+from silx.opencl.common import ocl, pyopencl
+from silx.opencl.codec.bitshuffle_lz4 import BitshuffleLz4
+
+
+TESTCASES = ( # dtype, shape
+ ("uint64", (103, 503)),
+ ("int64", (101, 509)),
+ ("uint32", (229, 659)),
+ ("int32", (233, 653)),
+ ("uint16", (743, 647)),
+ ("int16", (751, 643)),
+ ("uint8", (157, 1373)),
+ ("int8", (163, 1367)),
+)
+
+
+@pytest.mark.skipif(
+ not ocl or not pyopencl or bitshuffle is None,
+ reason="PyOpenCl or bitshuffle is missing",
+)
+class TestBitshuffleLz4:
+ """Test pyopencl bishuffle+LZ4 decompression"""
+
+ @staticmethod
+ def _create_test_data(shape, lam=100, dtype="uint32"):
+ """Create test (image, compressed stream) pair.
+
+ :param shape: Shape of test image
+ :param lam: Expectation of interval argument for numpy.random.poisson
+ :return: (reference image array, compressed stream)
+ """
+ ref = numpy.random.poisson(lam, size=shape).astype(dtype)
+ raw = (
+ struct.pack(">Q", ref.nbytes)
+ + b"\x00" * 4
+ + bitshuffle.compress_lz4(ref).tobytes()
+ )
+ return ref, raw
+
+ @pytest.mark.parametrize("dtype,shape", TESTCASES)
+ def test_decompress(self, dtype, shape):
+ """
+ Tests the byte offset decompression on GPU with various configuration
+ """
+ ref, raw = self._create_test_data(shape=shape, dtype=dtype)
+ bs = BitshuffleLz4(len(raw), numpy.prod(shape), dtype=dtype)
+ res = bs.decompress(raw).get()
+ assert numpy.array_equal(res, ref.ravel()), "Checks decompression works"
+
+ @pytest.mark.parametrize("dtype,shape", TESTCASES)
+ def test_decompress_from_buffer(self, dtype, shape):
+ """Test reading compressed data from pyopencl Buffer"""
+ ref, raw = self._create_test_data(shape=shape, dtype=dtype)
+
+ bs = BitshuffleLz4(0, numpy.prod(shape), dtype=dtype)
+
+ buffer = pyopencl.Buffer(
+ bs.ctx,
+ flags=pyopencl.mem_flags.COPY_HOST_PTR | pyopencl.mem_flags.READ_ONLY,
+ hostbuf=raw,
+ )
+
+ res = bs.decompress(buffer).get()
+ assert numpy.array_equal(res, ref.ravel()), "Checks decompression works"
+
+ @pytest.mark.parametrize("dtype,shape", TESTCASES)
+ def test_decompress_from_array(self, dtype, shape):
+ """Test reading compressed data from pyopencl Array"""
+ ref, raw = self._create_test_data(shape=shape, dtype=dtype)
+
+ bs = BitshuffleLz4(0, numpy.prod(shape), dtype=dtype)
+
+ array = pyopencl.array.to_device(
+ bs.queue,
+ numpy.frombuffer(raw, dtype=numpy.uint8),
+ array_queue=bs.queue,
+ )
+
+ res = bs.decompress(array).get()
+ assert numpy.array_equal(res, ref.ravel()), "Checks decompression works"
diff --git a/src/silx/opencl/codec/test/test_byte_offset.py b/src/silx/opencl/codec/test/test_byte_offset.py
index 4b2d5a3..0e58076 100644
--- a/src/silx/opencl/codec/test/test_byte_offset.py
+++ b/src/silx/opencl/codec/test/test_byte_offset.py
@@ -1,5 +1,4 @@
#!/usr/bin/env python
-# -*- coding: utf-8 -*-
#
# Project: Byte-offset decompression in OpenCL
# https://github.com/silx-kit/silx
@@ -31,8 +30,6 @@
Test suite for byte-offset decompression
"""
-from __future__ import division, print_function
-
__authors__ = ["Jérôme Kieffer"]
__contact__ = "jerome.kieffer@esrf.eu"
__license__ = "MIT"
@@ -47,13 +44,12 @@ from silx.opencl.common import ocl, pyopencl
from silx.opencl.codec import byte_offset
import fabio
import unittest
+
logger = logging.getLogger(__name__)
-@unittest.skipUnless(ocl and pyopencl,
- "PyOpenCl is missing")
+@unittest.skipUnless(ocl and pyopencl, "PyOpenCl is missing")
class TestByteOffset(unittest.TestCase):
-
@staticmethod
def _create_test_data(shape, nexcept, lam=200):
"""Create test (image, compressed stream) pair.
@@ -87,7 +83,9 @@ class TestByteOffset(unittest.TestCase):
except (RuntimeError, pyopencl.RuntimeError) as err:
logger.warning(err)
if sys.platform == "darwin":
- raise unittest.SkipTest("Byte-offset decompression is known to be buggy on MacOS-CPU")
+ raise unittest.SkipTest(
+ "Byte-offset decompression is known to be buggy on MacOS-CPU"
+ )
else:
raise err
print(bo.block_size)
@@ -100,9 +98,11 @@ class TestByteOffset(unittest.TestCase):
delta_cy = abs(ref.ravel() - res_cy).max()
delta_cl = abs(ref.ravel() - res_cl.get()).max()
- logger.debug("Global execution time: fabio %.3fms, OpenCL: %.3fms.",
- 1000.0 * (t1 - t0),
- 1000.0 * (t2 - t1))
+ logger.debug(
+ "Global execution time: fabio %.3fms, OpenCL: %.3fms.",
+ 1000.0 * (t1 - t0),
+ 1000.0 * (t2 - t1),
+ )
bo.log_profile()
# print(ref)
# print(res_cl.get())
@@ -111,8 +111,8 @@ class TestByteOffset(unittest.TestCase):
def test_many_decompress(self, ntest=10):
"""
- tests the byte offset decompression on GPU, many images to ensure there
- is not leaking in memory
+ tests the byte offset decompression on GPU, many images to ensure there
+ is not leaking in memory
"""
shape = (991, 997)
size = numpy.prod(shape)
@@ -123,7 +123,9 @@ class TestByteOffset(unittest.TestCase):
except (RuntimeError, pyopencl.RuntimeError) as err:
logger.warning(err)
if sys.platform == "darwin":
- raise unittest.SkipTest("Byte-offset decompression is known to be buggy on MacOS-CPU")
+ raise unittest.SkipTest(
+ "Byte-offset decompression is known to be buggy on MacOS-CPU"
+ )
else:
raise err
t0 = time.time()
@@ -135,9 +137,11 @@ class TestByteOffset(unittest.TestCase):
delta_cl = abs(ref.ravel() - res_cl.get()).max()
self.assertEqual(delta_cy, 0, "Checks fabio works")
self.assertEqual(delta_cl, 0, "Checks opencl works")
- logger.debug("Global execution time: fabio %.3fms, OpenCL: %.3fms.",
- 1000.0 * (t1 - t0),
- 1000.0 * (t2 - t1))
+ logger.debug(
+ "Global execution time: fabio %.3fms, OpenCL: %.3fms.",
+ 1000.0 * (t1 - t0),
+ 1000.0 * (t2 - t1),
+ )
for i in range(ntest):
ref, raw = self._create_test_data(shape=shape, nexcept=2729, lam=200)
@@ -152,9 +156,11 @@ class TestByteOffset(unittest.TestCase):
self.assertEqual(delta_cy, 0, "Checks fabio works #%i" % i)
self.assertEqual(delta_cl, 0, "Checks opencl works #%i" % i)
- logger.debug("Global execution time: fabio %.3fms, OpenCL: %.3fms.",
- 1000.0 * (t1 - t0),
- 1000.0 * (t2 - t1))
+ logger.debug(
+ "Global execution time: fabio %.3fms, OpenCL: %.3fms.",
+ 1000.0 * (t1 - t0),
+ 1000.0 * (t2 - t1),
+ )
bo.log_profile(stats=True)
def test_encode(self):
@@ -174,8 +180,7 @@ class TestByteOffset(unittest.TestCase):
compressed_stream = compressed_array.get().tobytes()
self.assertEqual(raw, compressed_stream)
- logger.debug("Global execution time: OpenCL: %.3fms.",
- 1000.0 * (t1 - t0))
+ logger.debug("Global execution time: OpenCL: %.3fms.", 1000.0 * (t1 - t0))
bo.log_profile()
def test_encode_to_array(self):
@@ -225,14 +230,15 @@ class TestByteOffset(unittest.TestCase):
self.assertEqual(raw, compressed_stream)
- logger.debug("Global execution time: fabio %.3fms, OpenCL: %.3fms.",
- 1000.0 * (t1 - t0),
- 1000.0 * (t2 - t1))
+ logger.debug(
+ "Global execution time: fabio %.3fms, OpenCL: %.3fms.",
+ 1000.0 * (t1 - t0),
+ 1000.0 * (t2 - t1),
+ )
bo.log_profile()
def test_encode_to_bytes_from_array(self):
- """Test byte offset compression to bytes from a pyopencl array.
- """
+ """Test byte offset compression to bytes from a pyopencl array."""
ref, raw = self._create_test_data(shape=(2713, 2719), nexcept=2729)
try:
@@ -241,8 +247,7 @@ class TestByteOffset(unittest.TestCase):
logger.warning(err)
raise err
- d_ref = pyopencl.array.to_device(
- bo.queue, ref.astype(numpy.int32).ravel())
+ d_ref = pyopencl.array.to_device(bo.queue, ref.astype(numpy.int32).ravel())
t0 = time.time()
res_fabio = fabio.compression.compByteOffset(ref)
@@ -252,9 +257,11 @@ class TestByteOffset(unittest.TestCase):
self.assertEqual(raw, compressed_stream)
- logger.debug("Global execution time: fabio %.3fms, OpenCL: %.3fms.",
- 1000.0 * (t1 - t0),
- 1000.0 * (t2 - t1))
+ logger.debug(
+ "Global execution time: fabio %.3fms, OpenCL: %.3fms.",
+ 1000.0 * (t1 - t0),
+ 1000.0 * (t2 - t1),
+ )
bo.log_profile()
def test_many_encode(self, ntest=10):
@@ -278,9 +285,11 @@ class TestByteOffset(unittest.TestCase):
bo_durations.append(1000.0 * (t2 - t1))
self.assertEqual(raw, compressed_stream)
- logger.debug("Global execution time: fabio %.3fms, OpenCL: %.3fms.",
- 1000.0 * (t1 - t0),
- 1000.0 * (t2 - t1))
+ logger.debug(
+ "Global execution time: fabio %.3fms, OpenCL: %.3fms.",
+ 1000.0 * (t1 - t0),
+ 1000.0 * (t2 - t1),
+ )
for i in range(ntest):
ref, raw = self._create_test_data(shape=shape, nexcept=2729, lam=200)
@@ -293,11 +302,15 @@ class TestByteOffset(unittest.TestCase):
bo_durations.append(1000.0 * (t2 - t1))
self.assertEqual(raw, compressed_stream)
- logger.debug("Global execution time: fabio %.3fms, OpenCL: %.3fms.",
- 1000.0 * (t1 - t0),
- 1000.0 * (t2 - t1))
-
- logger.debug("OpenCL execution time: Mean: %fms, Min: %fms, Max: %fms",
- numpy.mean(bo_durations),
- numpy.min(bo_durations),
- numpy.max(bo_durations))
+ logger.debug(
+ "Global execution time: fabio %.3fms, OpenCL: %.3fms.",
+ 1000.0 * (t1 - t0),
+ 1000.0 * (t2 - t1),
+ )
+
+ logger.debug(
+ "OpenCL execution time: Mean: %fms, Min: %fms, Max: %fms",
+ numpy.mean(bo_durations),
+ numpy.min(bo_durations),
+ numpy.max(bo_durations),
+ )
diff --git a/src/silx/opencl/common.py b/src/silx/opencl/common.py
index 888b1da..30c9ef7 100644
--- a/src/silx/opencl/common.py
+++ b/src/silx/opencl/common.py
@@ -1,10 +1,9 @@
#!/usr/bin/env python
-# -*- coding: utf-8 -*-
#
# Project: S I L X project
# https://github.com/silx-kit/silx
#
-# Copyright (C) 2012-2021 European Synchrotron Radiation Facility, Grenoble, France
+# Copyright (C) 2012-2023 European Synchrotron Radiation Facility, Grenoble, France
#
# Principal author: Jérôme Kieffer (Jerome.Kieffer@ESRF.eu)
#
@@ -34,10 +33,17 @@ __author__ = "Jerome Kieffer"
__contact__ = "Jerome.Kieffer@ESRF.eu"
__license__ = "MIT"
__copyright__ = "2012-2017 European Synchrotron Radiation Facility, Grenoble, France"
-__date__ = "29/09/2021"
+__date__ = "09/09/2023"
__status__ = "stable"
-__all__ = ["ocl", "pyopencl", "mf", "release_cl_buffers", "allocate_cl_buffers",
- "measure_workgroup_size", "kernel_workgroup_size"]
+__all__ = [
+ "ocl",
+ "pyopencl",
+ "mf",
+ "release_cl_buffers",
+ "allocate_cl_buffers",
+ "measure_workgroup_size",
+ "kernel_workgroup_size",
+]
import os
import logging
@@ -49,60 +55,69 @@ from .utils import get_opencl_code
logger = logging.getLogger(__name__)
if os.environ.get("SILX_OPENCL") in ["0", "False"]:
- logger.info("Use of OpenCL has been disabled from environment variable: SILX_OPENCL=0")
+ logger.info(
+ "Use of OpenCL has been disabled from environment variable: SILX_OPENCL=0"
+ )
pyopencl = None
else:
try:
import pyopencl
except ImportError:
- logger.warning("Unable to import pyOpenCl. Please install it from: https://pypi.org/project/pyopencl")
+ logger.warning(
+ "Unable to import pyOpenCl. Please install it from: https://pypi.org/project/pyopencl"
+ )
pyopencl = None
else:
try:
pyopencl.get_platforms()
except pyopencl.LogicError:
- logger.warning("The module pyOpenCL has been imported but can't be used here")
+ logger.warning(
+ "The module pyOpenCL has been imported but can't be used here"
+ )
pyopencl = None
- else:
- import pyopencl.array as array
- mf = pyopencl.mem_flags
-if pyopencl is None:
+if pyopencl is not None:
+ import pyopencl.array as array
+ mf = pyopencl.mem_flags
+ from .atomic import check_atomic32, check_atomic64
+else:
# Define default mem flags
class mf(object):
WRITE_ONLY = 1
READ_ONLY = 1
READ_WRITE = 1
-else:
- mf = pyopencl.mem_flags
-FLOP_PER_CORE = {"GPU": 64, # GPU, Fermi at least perform 64 flops per cycle/multicore, G80 were at 24 or 48 ...
- "CPU": 4, # CPU, at least intel's have 4 operation per cycle
- "ACC": 8} # ACC: the Xeon-phi (MIC) appears to be able to process 8 Flops per hyperthreaded-core
+
+FLOP_PER_CORE = {
+ "GPU": 64, # GPU, Fermi at least perform 64 flops per cycle/multicore, G80 were at 24 or 48 ...
+ "CPU": 4, # CPU, at least intel's have 4 operation per cycle
+ "ACC": 8,
+} # ACC: the Xeon-phi (MIC) appears to be able to process 8 Flops per hyperthreaded-core
# Sources : https://en.wikipedia.org/wiki/CUDA
-NVIDIA_FLOP_PER_CORE = {(1, 0): 24, # Guessed !
- (1, 1): 24, # Measured on G98 [Quadro NVS 295]
- (1, 2): 24, # Guessed !
- (1, 3): 24, # measured on a GT285 (GT200)
- (2, 0): 64, # Measured on a 580 (GF110)
- (2, 1): 96, # Measured on Quadro2000 GF106GL
- (3, 0): 384, # Guessed!
- (3, 5): 384, # Measured on K20
- (3, 7): 384, # K80: Guessed!
- (5, 0): 256, # Maxwell 4 warps/SM 2 flops/ CU
- (5, 2): 256, # Titan-X
- (5, 3): 256, # TX1
- (6, 0): 128, # GP100
- (6, 1): 128, # GP104
- (6, 2): 128, # ?
- (7, 0): 128, # Volta # measured on Telsa V100
- (7, 2): 128, # Volta ?
- (7, 5): 128, # Turing # measured on RTX 6000
- (8, 0): 128, # Ampere # measured on Tesla A100
- (8, 6): 256, # Ampere # measured on RTX A5000
- }
+NVIDIA_FLOP_PER_CORE = {
+ (1, 0): 24, # Guessed !
+ (1, 1): 24, # Measured on G98 [Quadro NVS 295]
+ (1, 2): 24, # Guessed !
+ (1, 3): 24, # measured on a GT285 (GT200)
+ (2, 0): 64, # Measured on a 580 (GF110)
+ (2, 1): 96, # Measured on Quadro2000 GF106GL
+ (3, 0): 384, # Guessed!
+ (3, 5): 384, # Measured on K20
+ (3, 7): 384, # K80: Guessed!
+ (5, 0): 256, # Maxwell 4 warps/SM 2 flops/ CU
+ (5, 2): 256, # Titan-X
+ (5, 3): 256, # TX1
+ (6, 0): 128, # GP100
+ (6, 1): 128, # GP104
+ (6, 2): 128, # ?
+ (7, 0): 128, # Volta # measured on Telsa V100
+ (7, 2): 128, # Volta ?
+ (7, 5): 128, # Turing # measured on RTX 6000
+ (8, 0): 128, # Ampere # measured on Tesla A100
+ (8, 6): 256, # Ampere # measured on RTX A5000
+}
AMD_FLOP_PER_CORE = 160 # Measured on a M7820 10 core, 700MHz 1120GFlops
@@ -112,9 +127,24 @@ class Device(object):
Simple class that contains the structure of an OpenCL device
"""
- def __init__(self, name="None", dtype=None, version=None, driver_version=None,
- extensions="", memory=None, available=None,
- cores=None, frequency=None, flop_core=None, idx=0, workgroup=1):
+ def __init__(
+ self,
+ name="None",
+ dtype=None,
+ version=None,
+ driver_version=None,
+ extensions="",
+ memory=None,
+ available=None,
+ cores=None,
+ frequency=None,
+ flop_core=None,
+ idx=0,
+ workgroup=1,
+ atomic32=None,
+ atomic64=None,
+ platform=None,
+ ):
"""
Simple container with some important data for the OpenCL device description.
@@ -130,6 +160,7 @@ class Device(object):
:param flop_core: Flopating Point operation per core per cycle
:param idx: index of the device within the platform
:param workgroup: max workgroup size
+ :param platform: the platform to which this device is attached
"""
self.name = name.strip()
self.type = dtype
@@ -142,12 +173,15 @@ class Device(object):
self.frequency = frequency
self.id = idx
self.max_work_group_size = workgroup
+ self.atomic32 = atomic32
+ self.atomic64 = atomic64
if not flop_core:
flop_core = FLOP_PER_CORE.get(dtype, 1)
if cores and frequency:
self.flops = cores * frequency * flop_core
else:
self.flops = flop_core
+ self.platform = platform
def __repr__(self):
return "%s" % self.name
@@ -158,19 +192,20 @@ class Device(object):
:return: string
"""
- lst = ["Name\t\t:\t%s" % self.name,
- "Type\t\t:\t%s" % self.type,
- "Memory\t\t:\t%.3f MB" % (self.memory / 2.0 ** 20),
- "Cores\t\t:\t%s CU" % self.cores,
- "Frequency\t:\t%s MHz" % self.frequency,
- "Speed\t\t:\t%.3f GFLOPS" % (self.flops / 1000.),
- "Version\t\t:\t%s" % self.version,
- "Available\t:\t%s" % self.available]
+ lst = [
+ "Name\t\t:\t%s" % self.name,
+ "Type\t\t:\t%s" % self.type,
+ "Memory\t\t:\t%.3f MB" % (self.memory / 2.0**20),
+ "Cores\t\t:\t%s CU" % self.cores,
+ "Frequency\t:\t%s MHz" % self.frequency,
+ "Speed\t\t:\t%.3f GFLOPS" % (self.flops / 1000.0),
+ "Version\t\t:\t%s" % self.version,
+ "Available\t:\t%s" % self.available,
+ ]
return os.linesep.join(lst)
def set_unavailable(self):
- """Use this method to flag a faulty device
- """
+ """Use this method to flag a faulty device"""
self.available = False
@@ -179,7 +214,9 @@ class Platform(object):
Simple class that contains the structure of an OpenCL platform
"""
- def __init__(self, name="None", vendor="None", version=None, extensions=None, idx=0):
+ def __init__(
+ self, name="None", vendor="None", version=None, extensions=None, idx=0
+ ):
"""
Class containing all descriptions of a platform and all devices description within that platform.
@@ -205,6 +242,7 @@ class Platform(object):
:param device: Device instance
"""
+ device.platform = self
self.devices.append(device)
def get_device(self, key):
@@ -245,19 +283,25 @@ def _measure_workgroup_size(device_or_context, fast=False):
platformid = pyopencl.get_platforms().index(platform)
deviceid = platform.get_devices().index(device_or_context)
ocl.platforms[platformid].devices[deviceid].set_unavailable()
- raise RuntimeError("Unable to create context on %s/%s: %s" % (platform, device_or_context, error))
+ raise RuntimeError(
+ "Unable to create context on %s/%s: %s"
+ % (platform, device_or_context, error)
+ )
else:
device = device_or_context
elif isinstance(device_or_context, pyopencl.Context):
ctx = device_or_context
device = device_or_context.devices[0]
elif isinstance(device_or_context, (tuple, list)) and len(device_or_context) == 2:
- ctx = ocl.create_context(platformid=device_or_context[0],
- deviceid=device_or_context[1])
+ ctx = ocl.create_context(
+ platformid=device_or_context[0], deviceid=device_or_context[1]
+ )
device = ctx.devices[0]
else:
- raise RuntimeError("""given parameter device_or_context is not an
- instanciation of a device or a context""")
+ raise RuntimeError(
+ """given parameter device_or_context is not an
+ instanciation of a device or a context"""
+ )
shape = device.max_work_group_size
# get the context
@@ -272,7 +316,9 @@ def _measure_workgroup_size(device_or_context, fast=False):
program = pyopencl.Program(ctx, get_opencl_code("addition")).build()
if fast:
- max_valid_wg = program.addition.get_work_group_info(pyopencl.kernel_work_group_info.WORK_GROUP_SIZE, device)
+ max_valid_wg = program.addition.get_work_group_info(
+ pyopencl.kernel_work_group_info.WORK_GROUP_SIZE, device
+ )
else:
maxi = int(round(numpy.log2(shape)))
for i in range(maxi + 1):
@@ -280,11 +326,19 @@ def _measure_workgroup_size(device_or_context, fast=False):
wg = 1 << i
try:
evt = program.addition(
- queue, (shape,), (wg,),
- d_data.data, d_data_1.data, d_res.data, numpy.int32(shape))
+ queue,
+ (shape,),
+ (wg,),
+ d_data.data,
+ d_data_1.data,
+ d_res.data,
+ numpy.int32(shape),
+ )
evt.wait()
except Exception as error:
- logger.info("%s on device %s for WG=%s/%s", error, device.name, wg, shape)
+ logger.info(
+ "%s on device %s for WG=%s/%s", error, device.name, wg, shape
+ )
program = queue = d_res = d_data_1 = d_data = None
break
else:
@@ -294,7 +348,9 @@ def _measure_workgroup_size(device_or_context, fast=False):
if wg > max_valid_wg:
max_valid_wg = wg
else:
- logger.warning("ArithmeticError on %s for WG=%s/%s", wg, device.name, shape)
+ logger.warning(
+ "ArithmeticError on %s for WG=%s/%s", wg, device.name, shape
+ )
return max_valid_wg
@@ -317,15 +373,25 @@ class OpenCL(object):
if pyopencl:
platform = device = pypl = devtype = extensions = pydev = None
for idx, platform in enumerate(pyopencl.get_platforms()):
- pypl = Platform(platform.name, platform.vendor, platform.version, platform.extensions, idx)
+ pypl = Platform(
+ platform.name,
+ platform.vendor,
+ platform.version,
+ platform.extensions,
+ idx,
+ )
for idd, device in enumerate(platform.get_devices()):
####################################################
# Nvidia does not report int64 atomics (we are using) ...
# this is a hack around as any nvidia GPU with double-precision supports int64 atomics
####################################################
extensions = device.extensions
- if (pypl.vendor == "NVIDIA Corporation") and ('cl_khr_fp64' in extensions):
- extensions += ' cl_khr_int64_base_atomics cl_khr_int64_extended_atomics'
+ if (pypl.vendor == "NVIDIA Corporation") and (
+ "cl_khr_fp64" in extensions
+ ):
+ extensions += (
+ " cl_khr_int64_base_atomics cl_khr_int64_extended_atomics"
+ )
try:
devtype = pyopencl.device_type.to_string(device.type).upper()
except ValueError:
@@ -340,14 +406,23 @@ class OpenCL(object):
devtype = "CPU"
else:
devtype = devtype[:3]
- if _is_nvidia_gpu(device.vendor, devtype) and ("compute_capability_major_nv" in dir(device)):
+ if _is_nvidia_gpu(device.vendor, devtype) and (
+ "compute_capability_major_nv" in dir(device)
+ ):
try:
- comput_cap = device.compute_capability_major_nv, device.compute_capability_minor_nv
+ comput_cap = (
+ device.compute_capability_major_nv,
+ device.compute_capability_minor_nv,
+ )
except pyopencl.LogicError:
flop_core = FLOP_PER_CORE["GPU"]
else:
- flop_core = NVIDIA_FLOP_PER_CORE.get(comput_cap, FLOP_PER_CORE["GPU"])
- elif (pypl.vendor == "Advanced Micro Devices, Inc.") and (devtype == "GPU"):
+ flop_core = NVIDIA_FLOP_PER_CORE.get(
+ comput_cap, FLOP_PER_CORE["GPU"]
+ )
+ elif (pypl.vendor == "Advanced Micro Devices, Inc.") and (
+ devtype == "GPU"
+ ):
flop_core = AMD_FLOP_PER_CORE
elif devtype == "CPU":
flop_core = FLOP_PER_CORE.get(devtype, 1)
@@ -355,14 +430,29 @@ class OpenCL(object):
flop_core = 1
workgroup = device.max_work_group_size
if (devtype == "CPU") and (pypl.vendor == "Apple"):
- logger.info("For Apple's OpenCL on CPU: Measuring actual valid max_work_goup_size.")
+ logger.info(
+ "For Apple's OpenCL on CPU: Measuring actual valid max_work_goup_size."
+ )
workgroup = _measure_workgroup_size(device, fast=True)
if (devtype == "GPU") and os.environ.get("GPU") == "False":
# Environment variable to disable GPU devices
continue
- pydev = Device(device.name, devtype, device.version, device.driver_version, extensions,
- device.global_mem_size, bool(device.available), device.max_compute_units,
- device.max_clock_frequency, flop_core, idd, workgroup)
+ pydev = Device(
+ device.name,
+ devtype,
+ device.version,
+ device.driver_version,
+ extensions,
+ device.global_mem_size,
+ bool(device.available),
+ device.max_compute_units,
+ device.max_clock_frequency,
+ flop_core,
+ idd,
+ workgroup,
+ check_atomic32(device)[0],
+ check_atomic64(device)[0],
+ )
pypl.add_device(pydev)
nb_devices += 1
platforms.append(pypl)
@@ -371,9 +461,11 @@ class OpenCL(object):
def __repr__(self):
out = ["OpenCL devices:"]
for platformid, platform in enumerate(self.platforms):
- deviceids = ["(%s,%s) %s" % (platformid, deviceid, dev.name)
- for deviceid, dev in enumerate(platform.devices)]
- out.append("[%s] %s: " % (platformid, platform.name) + ", ".join(deviceids))
+ deviceids = [
+ f"({platformid},{deviceid}) {dev.name}"
+ for deviceid, dev in enumerate(platform.devices)
+ ]
+ out.append(f"[{platformid}] {platform.name}: " + ", ".join(deviceids))
return os.linesep.join(out)
def get_platform(self, key):
@@ -395,7 +487,9 @@ class OpenCL(object):
out = self.platforms[platid]
return out
- def select_device(self, dtype="ALL", memory=None, extensions=None, best=True, **kwargs):
+ def select_device(
+ self, dtype="ALL", memory=None, extensions=None, best=True, **kwargs
+ ):
"""
Select a device based on few parameters (at the end, keep the one with most memory)
@@ -439,8 +533,15 @@ class OpenCL(object):
# Nothing found
return None
- def create_context(self, devicetype="ALL", useFp64=False, platformid=None,
- deviceid=None, cached=True, memory=None, extensions=None):
+ def create_context(
+ self,
+ devicetype="ALL",
+ platformid=None,
+ deviceid=None,
+ cached=True,
+ memory=None,
+ extensions=None,
+ ):
"""
Choose a device and initiate a context.
@@ -450,7 +551,6 @@ class OpenCL(object):
E.g.: If Nvidia driver is installed, GPU will succeed but CPU will fail.
The AMD SDK kit is required for CPU via OpenCL.
:param devicetype: string in ["cpu","gpu", "all", "acc"]
- :param useFp64: boolean specifying if double precision will be used: deprecated use extensions=["cl_khr_fp64"]
:param platformid: integer
:param deviceid: integer
:param cached: True if we want to cache the context
@@ -460,37 +560,60 @@ class OpenCL(object):
"""
if extensions is None:
extensions = []
- if useFp64:
- logger.warning("Deprecation: please select your device using the extension name!, i.e. extensions=['cl_khr_fp64']")
- extensions.append('cl_khr_fp64')
+ ctx = None
if (platformid is not None) and (deviceid is not None):
platformid = int(platformid)
deviceid = int(deviceid)
elif "PYOPENCL_CTX" in os.environ:
- pyopencl_ctx = [int(i) if i.isdigit() else 0 for i in os.environ["PYOPENCL_CTX"].split(":")]
- pyopencl_ctx += [0] * (2 - len(pyopencl_ctx)) # pad with 0
- platformid, deviceid = pyopencl_ctx
+ ctx = pyopencl.create_some_context()
+ # try:
+ device = ctx.devices[0]
+ platforms = [
+ i for i, p in enumerate(ocl.platforms) if device.platform.name == p.name
+ ]
+ if platforms:
+ platformid = platforms[0]
+ devices = [
+ i
+ for i, d in enumerate(ocl.platforms[platformid].devices)
+ if device.name == d.name
+ ]
+ if devices:
+ deviceid = devices[0]
+ if cached:
+ self.context_cache[(platformid, deviceid)] = ctx
else:
ids = ocl.select_device(type=devicetype, extensions=extensions)
if ids:
platformid, deviceid = ids
- ctx = None
- if (platformid is not None) and (deviceid is not None):
+
+ if (ctx is None) and (platformid is not None) and (deviceid is not None):
if (platformid, deviceid) in self.context_cache:
ctx = self.context_cache[(platformid, deviceid)]
else:
try:
- ctx = pyopencl.Context(devices=[pyopencl.get_platforms()[platformid].get_devices()[deviceid]])
+ ctx = pyopencl.Context(
+ devices=[
+ pyopencl.get_platforms()[platformid].get_devices()[deviceid]
+ ]
+ )
except pyopencl._cl.LogicError as error:
self.platforms[platformid].devices[deviceid].set_unavailable()
- logger.warning("Unable to create context on %s/%s: %s", platformid, deviceid, error)
+ logger.warning(
+ "Unable to create context on %s/%s: %s",
+ platformid,
+ deviceid,
+ error,
+ )
ctx = None
else:
if cached:
self.context_cache[(platformid, deviceid)] = ctx
if ctx is None:
- logger.warning("Last chance to get an OpenCL device ... probably not the one requested")
+ logger.warning(
+ "Last chance to get an OpenCL device ... probably not the one requested"
+ )
ctx = pyopencl.create_some_context(interactive=False)
return ctx
@@ -560,8 +683,9 @@ def allocate_cl_buffers(buffers, device=None, context=None):
for _, _, dtype, size in buffers:
ualloc += numpy.dtype(dtype).itemsize * size
memory = device.memory
- logger.info("%.3fMB are needed on device which has %.3fMB",
- ualloc / 1.0e6, memory / 1.0e6)
+ logger.info(
+ "%.3fMB are needed on device which has %.3fMB", ualloc / 1.0e6, memory / 1.0e6
+ )
if ualloc >= memory:
memError = "Fatal error in allocate_buffers."
memError += "Not enough device memory for buffers"
@@ -571,8 +695,9 @@ def allocate_cl_buffers(buffers, device=None, context=None):
# do the allocation
try:
for name, flag, dtype, size in buffers:
- mem[name] = pyopencl.Buffer(context, flag,
- numpy.dtype(dtype).itemsize * size)
+ mem[name] = pyopencl.Buffer(
+ context, flag, numpy.dtype(dtype).itemsize * size
+ )
except pyopencl.MemoryError as error:
release_cl_buffers(mem)
raise MemoryError(error)
@@ -589,16 +714,15 @@ def allocate_texture(ctx, shape, hostbuf=None, support_1D=False):
do not support 1D images, so 1D images are handled as 2D with one row
:param support_1D: force the image to be 1D if the shape has only one dim
"""
- if len(shape) == 1 and not(support_1D):
+ if len(shape) == 1 and not (support_1D):
shape = (1,) + shape
return pyopencl.Image(
ctx,
pyopencl.mem_flags.READ_ONLY | pyopencl.mem_flags.USE_HOST_PTR,
pyopencl.ImageFormat(
- pyopencl.channel_order.INTENSITY,
- pyopencl.channel_type.FLOAT
+ pyopencl.channel_order.INTENSITY, pyopencl.channel_type.FLOAT
),
- hostbuf=numpy.zeros(shape[::-1], dtype=numpy.float32)
+ hostbuf=numpy.zeros(shape[::-1], dtype=numpy.float32),
)
@@ -620,7 +744,7 @@ def check_textures_availability(ctx):
# There is no way to detect this until a kernel is compiled
try:
cc = ctx.devices[0].compute_capability_major_nv
- textures_available &= (cc >= 3)
+ textures_available &= cc >= 3
except (pyopencl.LogicError, AttributeError): # probably not a Nvidia GPU
pass
#
@@ -660,7 +784,7 @@ def query_kernel_info(program, kernel, what="WORK_GROUP_SIZE"):
:param kernel: kernel or name of the kernel
:param what: what is the query about ?
:return: int or 3-int for the workgroup size.
-
+
Possible information available are:
* 'COMPILE_WORK_GROUP_SIZE': Returns the work-group size specified inside the kernel (__attribute__((reqd_work_gr oup_size(X, Y, Z))))
* 'GLOBAL_WORK_SIZE': maximum global size that can be used to execute a kernel #OCL2.1!
@@ -668,15 +792,17 @@ def query_kernel_info(program, kernel, what="WORK_GROUP_SIZE"):
* 'PREFERRED_WORK_GROUP_SIZE_MULTIPLE': preferred multiple of workgroup size for launch. This is a performance hint.
* 'PRIVATE_MEM_SIZE' Returns the minimum amount of private memory, in bytes, used by each workitem in the kernel
* 'WORK_GROUP_SIZE': maximum work-group size that can be used to execute a kernel on a specific device given by device
-
+
Further information on:
https://www.khronos.org/registry/OpenCL/sdk/1.1/docs/man/xhtml/clGetKernelWorkGroupInfo.html
-
+
"""
assert isinstance(program, pyopencl.Program)
if not isinstance(kernel, pyopencl.Kernel):
kernel_name = kernel
- assert kernel in (k.function_name for k in program.all_kernels()), "the kernel exists"
+ assert kernel in (
+ k.function_name for k in program.all_kernels()
+ ), "the kernel exists"
kernel = program.__getattr__(kernel_name)
device = program.devices[0]
diff --git a/src/silx/opencl/conftest.py b/src/silx/opencl/conftest.py
index 1fdc516..f6cf5de 100644
--- a/src/silx/opencl/conftest.py
+++ b/src/silx/opencl/conftest.py
@@ -1,5 +1,6 @@
import pytest
+
@pytest.mark.usefixtures("use_opencl")
def setup_module(module):
pass
diff --git a/src/silx/opencl/convolution.py b/src/silx/opencl/convolution.py
index 15ef931..99ecd02 100644
--- a/src/silx/opencl/convolution.py
+++ b/src/silx/opencl/convolution.py
@@ -1,8 +1,7 @@
#!/usr/bin/env python
-# coding: utf-8
# /*##########################################################################
#
-# Copyright (c) 2019 European Synchrotron Radiation Facility
+# Copyright (c) 2019-2023 European Synchrotron Radiation Facility
#
# Permission is hereby granted, free of charge, to any person obtaining a copy
# of this software and associated documentation files (the "Software"), to deal
@@ -25,27 +24,35 @@
# ###########################################################################*/
"""Module for convolution on CPU/GPU."""
-from __future__ import absolute_import, print_function, with_statement, division
-
__authors__ = ["P. Paleo"]
__license__ = "MIT"
__date__ = "01/08/2019"
import numpy as np
-from copy import copy # python2
from .common import pyopencl as cl
import pyopencl.array as parray
from .processing import OpenclProcessing, EventDescription
from .utils import ConvolutionInfos
+
class Convolution(OpenclProcessing):
"""
A class for performing convolution on CPU/GPU with OpenCL.
"""
- def __init__(self, shape, kernel, axes=None, mode=None, ctx=None,
- devicetype="all", platformid=None, deviceid=None,
- profile=False, extra_options=None):
+ def __init__(
+ self,
+ shape,
+ kernel,
+ axes=None,
+ mode=None,
+ ctx=None,
+ devicetype="all",
+ platformid=None,
+ deviceid=None,
+ profile=False,
+ extra_options=None,
+ ):
"""Constructor of OpenCL Convolution.
:param shape: shape of the array.
@@ -73,9 +80,14 @@ class Convolution(OpenclProcessing):
"allocate_tmp_array": True,
"dont_use_textures": False,
"""
- OpenclProcessing.__init__(self, ctx=ctx, devicetype=devicetype,
- platformid=platformid, deviceid=deviceid,
- profile=profile)
+ OpenclProcessing.__init__(
+ self,
+ ctx=ctx,
+ devicetype=devicetype,
+ platformid=platformid,
+ deviceid=deviceid,
+ profile=profile,
+ )
self._configure_extra_options(extra_options)
self._determine_use_case(shape, kernel, axes)
@@ -91,7 +103,7 @@ class Convolution(OpenclProcessing):
}
extra_opts = extra_options or {}
self.extra_options.update(extra_opts)
- self.use_textures = not(self.extra_options["dont_use_textures"])
+ self.use_textures = not (self.extra_options["dont_use_textures"])
self.use_textures &= self.check_textures_availability()
def _get_dimensions(self, shape, kernel):
@@ -136,8 +148,7 @@ class Convolution(OpenclProcessing):
if axes in convol_infos.allowed_axes[uc_name]:
self.use_case_name = uc_name
self.use_case_desc = uc_params["name"]
- #~ self.use_case_kernels = uc_params["kernels"].copy()
- self.use_case_kernels = copy(uc_params["kernels"]) # TODO use the above line once we get rid of python2
+ self.use_case_kernels = uc_params["kernels"].copy()
if self.use_case_name is None:
raise ValueError(
"Cannot find a use case for data ndim = %d, kernel ndim = %d and axes=%s"
@@ -146,8 +157,7 @@ class Convolution(OpenclProcessing):
# TODO implement this use case
if self.use_case_name == "batched_separable_2D_1D_3D":
raise NotImplementedError(
- "The use case %s is not implemented"
- % self.use_case_name
+ "The use case %s is not implemented" % self.use_case_name
)
#
self.axes = axes
@@ -171,7 +181,7 @@ class Convolution(OpenclProcessing):
"allocate_tmp_array": "data_tmp",
}
# Nonseparable transforms do not need tmp array
- if not(self.separable):
+ if not (self.separable):
self.extra_options["allocate_tmp_array"] = False
# Allocate arrays
for option_name, array_name in option_array_names.items():
@@ -185,7 +195,7 @@ class Convolution(OpenclProcessing):
if isinstance(self.kernel, np.ndarray):
self.d_kernel = parray.to_device(self.queue, self.kernel)
else:
- if not(isinstance(self.kernel, parray.Array)):
+ if not (isinstance(self.kernel, parray.Array)):
raise ValueError("kernel must be either numpy array or pyopencl array")
self.d_kernel = self.kernel
self._old_input_ref = None
@@ -210,7 +220,7 @@ class Convolution(OpenclProcessing):
% (self.mode, str(mp.keys()))
)
# TODO
- if not(self.use_textures) and self.mode.lower() == "constant":
+ if not (self.use_textures) and self.mode.lower() == "constant":
raise NotImplementedError(
"mode='constant' is not implemented without textures yet"
)
@@ -231,28 +241,30 @@ class Convolution(OpenclProcessing):
compile_options = [str("-DUSED_CONV_MODE=%d" % self._c_conv_mode)]
if self.use_textures:
kernel_files = ["convolution_textures.cl"]
- compile_options.extend([
- str("-DIMAGE_DIMS=%d" % self.data_ndim),
- str("-DFILTER_DIMS=%d" % self.kernel_ndim),
- ])
+ compile_options.extend(
+ [
+ str("-DIMAGE_DIMS=%d" % self.data_ndim),
+ str("-DFILTER_DIMS=%d" % self.kernel_ndim),
+ ]
+ )
d_kernel_ref = self.d_kernel_tex
else:
kernel_files = ["convolution.cl"]
d_kernel_ref = self.d_kernel.data
- self.compile_kernels(
- kernel_files=kernel_files,
- compile_options=compile_options
- )
+ self.compile_kernels(kernel_files=kernel_files, compile_options=compile_options)
self.ndrange = self.shape[::-1]
self.wg = None
kernel_args = [
self.queue,
- self.ndrange, self.wg,
+ self.ndrange,
+ self.wg,
None,
None,
d_kernel_ref,
np.int32(self.kernel.shape[0]),
- self.Nx, self.Ny, self.Nz
+ self.Nx,
+ self.Ny,
+ self.Nz,
]
if self.kernel_ndim == 2:
kernel_args.insert(6, np.int32(self.kernel.shape[1]))
@@ -266,10 +278,7 @@ class Convolution(OpenclProcessing):
if self.separable:
if self.data_tmp is not None:
self.swap_pattern = {
- 2: [
- ("data_in", "data_tmp"),
- ("data_tmp", "data_out")
- ],
+ 2: [("data_in", "data_tmp"), ("data_tmp", "data_out")],
3: [
("data_in", "data_out"),
("data_out", "data_tmp"),
@@ -325,14 +334,14 @@ class Convolution(OpenclProcessing):
else:
raise ValueError("Please provide either arr= or shape=")
if ndim < dim_min or ndim > dim_max:
- raise ValueError("%s dimensions should be between %d and %d"
- % (name, dim_min, dim_max)
+ raise ValueError(
+ "%s dimensions should be between %d and %d" % (name, dim_min, dim_max)
)
return ndim
def _check_array(self, arr):
# TODO allow cl.Buffer
- if not(isinstance(arr, parray.Array) or isinstance(arr, np.ndarray)):
+ if not (isinstance(arr, parray.Array) or isinstance(arr, np.ndarray)):
raise TypeError("Expected either pyopencl.array.Array or numpy.ndarray")
# TODO composition with ImageProcessing/cast
if arr.dtype != np.float32:
@@ -354,14 +363,12 @@ class Convolution(OpenclProcessing):
self.data_in = array
data_in_ref = self.data_in
if output is not None:
- if not(isinstance(output, np.ndarray)):
+ if not (isinstance(output, np.ndarray)):
self._old_output_ref = self.data_out
self.data_out = output
# Update OpenCL kernel arguments with new array references
self.kernel_args = self._configure_kernel_args(
- self.kernel_args,
- data_in_ref,
- self.data_out
+ self.kernel_args, data_in_ref, self.data_out
)
def _separable_convolution(self):
@@ -375,9 +382,7 @@ class Convolution(OpenclProcessing):
# Batched: one kernel call in total
opencl_kernel = self.kernels.get_kernel(self.use_case_kernels[axis])
opencl_kernel_args = self._configure_kernel_args(
- self.kernel_args,
- input_ref,
- output_ref
+ self.kernel_args, input_ref, output_ref
)
ev = opencl_kernel(*opencl_kernel_args)
if self.profile:
@@ -398,9 +403,7 @@ class Convolution(OpenclProcessing):
self.data_out = self._old_output_ref
self._old_output_ref = None
self.kernel_args = self._configure_kernel_args(
- self.kernel_args,
- self.data_in,
- self.data_out
+ self.kernel_args, self.data_in, self.data_out
)
def _get_output(self, output):
@@ -436,7 +439,4 @@ class Convolution(OpenclProcessing):
res = self._get_output(output)
return res
-
__call__ = convolve
-
-
diff --git a/src/silx/opencl/image.py b/src/silx/opencl/image.py
index 65e2d5e..ec30e66 100644
--- a/src/silx/opencl/image.py
+++ b/src/silx/opencl/image.py
@@ -1,9 +1,8 @@
-# -*- coding: utf-8 -*-
#
# Project: silx
# https://github.com/silx-kit/silx
#
-# Copyright (C) 2012-2017 European Synchrotron Radiation Facility, Grenoble, France
+# Copyright (C) 2012-2023 European Synchrotron Radiation Facility, Grenoble, France
#
# Principal author: Jérôme Kieffer (Jerome.Kieffer@ESRF.eu)
#
@@ -28,8 +27,6 @@
"""A general purpose library for manipulating 2D images in 1 or 3 colors
"""
-from __future__ import absolute_import, print_function, with_statement, division
-
__author__ = "Jerome Kieffer"
__license__ = "MIT"
@@ -40,7 +37,6 @@ __contact__ = "jerome.kieffer@esrf.fr"
import os
import logging
import numpy
-from collections import OrderedDict
from math import floor, ceil, sqrt, log
from .common import pyopencl, kernel_workgroup_size
@@ -52,20 +48,30 @@ logger = logging.getLogger(__name__)
class ImageProcessing(OpenclProcessing):
-
kernel_files = ["cast", "map", "max_min", "histogram"]
- converter = {numpy.dtype(numpy.uint8): "u8_to_float",
- numpy.dtype(numpy.int8): "s8_to_float",
- numpy.dtype(numpy.uint16): "u16_to_float",
- numpy.dtype(numpy.int16): "s16_to_float",
- numpy.dtype(numpy.uint32): "u32_to_float",
- numpy.dtype(numpy.int32): "s32_to_float",
- }
-
- def __init__(self, shape=None, ncolors=1, template=None,
- ctx=None, devicetype="all", platformid=None, deviceid=None,
- block_size=None, memory=None, profile=False):
+ converter = {
+ numpy.dtype(numpy.uint8): "u8_to_float",
+ numpy.dtype(numpy.int8): "s8_to_float",
+ numpy.dtype(numpy.uint16): "u16_to_float",
+ numpy.dtype(numpy.int16): "s16_to_float",
+ numpy.dtype(numpy.uint32): "u32_to_float",
+ numpy.dtype(numpy.int32): "s32_to_float",
+ }
+
+ def __init__(
+ self,
+ shape=None,
+ ncolors=1,
+ template=None,
+ ctx=None,
+ devicetype="all",
+ platformid=None,
+ deviceid=None,
+ block_size=None,
+ memory=None,
+ profile=False,
+ ):
"""Constructor of the ImageProcessing class
:param ctx: actual working context, left to None for automatic
@@ -79,9 +85,16 @@ class ImageProcessing(OpenclProcessing):
:param profile: switch on profiling to be able to profile at the kernel
level, store profiling elements (makes code slightly slower)
"""
- OpenclProcessing.__init__(self, ctx=ctx, devicetype=devicetype,
- platformid=platformid, deviceid=deviceid,
- block_size=block_size, memory=memory, profile=profile)
+ OpenclProcessing.__init__(
+ self,
+ ctx=ctx,
+ devicetype=devicetype,
+ platformid=platformid,
+ deviceid=deviceid,
+ block_size=block_size,
+ memory=memory,
+ profile=profile,
+ )
if template is not None:
shape = template.shape
if len(shape) > 2:
@@ -94,36 +107,51 @@ class ImageProcessing(OpenclProcessing):
self.ncolors = ncolors
self.shape = shape
assert shape is not None
- self.buffer_shape = self.shape if self.ncolors == 1 else self.shape + (self.ncolors,)
+ self.buffer_shape = (
+ self.shape if self.ncolors == 1 else self.shape + (self.ncolors,)
+ )
kernel_files = [os.path.join("image", i) for i in self.kernel_files]
- self.compile_kernels(kernel_files,
- compile_options="-DNB_COLOR=%i" % self.ncolors)
+ self.compile_kernels(
+ kernel_files, compile_options="-DNB_COLOR=%i" % self.ncolors
+ )
if self.ncolors == 1:
img_shape = self.shape
else:
img_shape = self.shape + (self.ncolors,)
- buffers = [BufferDescription("image0_d", img_shape, numpy.float32, None),
- BufferDescription("image1_d", img_shape, numpy.float32, None),
- BufferDescription("image2_d", img_shape, numpy.float32, None),
- BufferDescription("max_min_d", 2, numpy.float32, None),
- BufferDescription("cnt_d", 1, numpy.int32, None), ]
+ buffers = [
+ BufferDescription("image0_d", img_shape, numpy.float32, None),
+ BufferDescription("image1_d", img_shape, numpy.float32, None),
+ BufferDescription("image2_d", img_shape, numpy.float32, None),
+ BufferDescription("max_min_d", 2, numpy.float32, None),
+ BufferDescription("cnt_d", 1, numpy.int32, None),
+ ]
# Temporary buffer for max-min reduction
- self.wg_red = kernel_workgroup_size(self.program, self.kernels.max_min_reduction_stage1)
+ self.wg_red = kernel_workgroup_size(
+ self.program, self.kernels.max_min_reduction_stage1
+ )
if self.wg_red > 1:
- self.wg_red = min(self.wg_red,
- numpy.int32(1 << int(floor(log(sqrt(numpy.prod(self.shape)), 2)))))
- tmp = BufferDescription("tmp_max_min_d", 2 * self.wg_red, numpy.float32, None)
+ self.wg_red = min(
+ self.wg_red,
+ numpy.int32(1 << int(floor(log(sqrt(numpy.prod(self.shape)), 2)))),
+ )
+ tmp = BufferDescription(
+ "tmp_max_min_d", 2 * self.wg_red, numpy.float32, None
+ )
buffers.append(tmp)
self.allocate_buffers(buffers, use_array=True)
self.cl_mem["cnt_d"].fill(0)
def __repr__(self):
- return "ImageProcessing for shape=%s, %i colors initalized on %s" % \
- (self.shape, self.ncolors, self.ctx.devices[0].name)
-
- def _get_in_out_buffers(self, img=None, copy=True, out=None,
- out_dtype=None, out_size=None):
+ return "ImageProcessing for shape=%s, %i colors initalized on %s" % (
+ self.shape,
+ self.ncolors,
+ self.ctx.devices[0].name,
+ )
+
+ def _get_in_out_buffers(
+ self, img=None, copy=True, out=None, out_dtype=None, out_size=None
+ ):
"""Internal method used to select the proper buffers before processing.
:param img: expects a numpy array or a pyopencl.array of dim 2 or 3
@@ -132,7 +160,7 @@ class ImageProcessing(OpenclProcessing):
:param out_dtype: enforce the type of the output buffer (optional)
:param out_size: enforce the size of the output buffer (optional)
:return: input_buffer, output_buffer
-
+
Nota: this is not locked.
"""
events = []
@@ -151,7 +179,9 @@ class ImageProcessing(OpenclProcessing):
if out_dtype != numpy.float32 and out_size:
name = "%s_%s_d" % (numpy.dtype(out_dtype), out_size)
if name not in self.cl_mem:
- output_array = self.cl_mem[name] = pyopencl.array.empty(self.queue, (out_size,), out_dtype)
+ output_array = self.cl_mem[name] = pyopencl.array.empty(
+ self.queue, (out_size,), out_dtype
+ )
else:
output_array = self.cl_mem[name]
else:
@@ -161,7 +191,9 @@ class ImageProcessing(OpenclProcessing):
input_array = self.cl_mem["image1_d"]
if isinstance(img, pyopencl.array.Array):
if copy:
- evt = pyopencl.enqueue_copy(self.queue, self.cl_mem["image1_d"].data, img.data)
+ evt = pyopencl.enqueue_copy(
+ self.queue, self.cl_mem["image1_d"].data, img.data
+ )
input_array = self.cl_mem["image1_d"]
events.append(EventDescription("copy D->D", evt))
else:
@@ -172,11 +204,19 @@ class ImageProcessing(OpenclProcessing):
# assume this is numpy
if img.dtype.itemsize > 4:
logger.warning("Casting to float32 on CPU")
- evt = pyopencl.enqueue_copy(self.queue, self.cl_mem["image1_d"].data, numpy.ascontiguousarray(img, numpy.float32))
+ evt = pyopencl.enqueue_copy(
+ self.queue,
+ self.cl_mem["image1_d"].data,
+ numpy.ascontiguousarray(img, numpy.float32),
+ )
input_array = self.cl_mem["image1_d"]
events.append(EventDescription("cast+copy H->D", evt))
else:
- evt = pyopencl.enqueue_copy(self.queue, self.cl_mem["image1_d"].data, numpy.ascontiguousarray(img))
+ evt = pyopencl.enqueue_copy(
+ self.queue,
+ self.cl_mem["image1_d"].data,
+ numpy.ascontiguousarray(img),
+ )
input_array = self.cl_mem["image1_d"]
events.append(EventDescription("copy H->D", evt))
if self.profile:
@@ -184,8 +224,8 @@ class ImageProcessing(OpenclProcessing):
return input_array, output_array
def to_float(self, img, copy=True, out=None):
- """ Takes any array and convert it to a float array for ease of processing.
-
+ """Takes any array and convert it to a float array for ease of processing.
+
:param img: expects a numpy array or a pyopencl.array of dim 2 or 3
:param copy: set to False to directly re-use a pyopencl array
:param out: provide an output buffer to store the result
@@ -197,16 +237,23 @@ class ImageProcessing(OpenclProcessing):
input_array, output_array = self._get_in_out_buffers(img, copy, out)
if (img.dtype.itemsize > 4) or (img.dtype == numpy.float32):
# copy device -> device, already there as float32
- ev = pyopencl.enqueue_copy(self.queue, output_array.data, input_array.data)
+ ev = pyopencl.enqueue_copy(
+ self.queue, output_array.data, input_array.data
+ )
events.append(EventDescription("copy D->D", ev))
else:
# Cast to float:
name = self.converter[img.dtype]
kernel = self.kernels.get_kernel(name)
- ev = kernel(self.queue, (self.shape[1], self.shape[0]), None,
- input_array.data, output_array.data,
- numpy.int32(self.shape[1]), numpy.int32(self.shape[0])
- )
+ ev = kernel(
+ self.queue,
+ (self.shape[1], self.shape[0]),
+ None,
+ input_array.data,
+ output_array.data,
+ numpy.int32(self.shape[1]),
+ numpy.int32(self.shape[0]),
+ )
events.append(EventDescription("cast %s" % name, ev))
if self.profile:
@@ -221,14 +268,14 @@ class ImageProcessing(OpenclProcessing):
def normalize(self, img, mini=0.0, maxi=1.0, copy=True, out=None):
"""Scale the intensity of the image so that the minimum is 0 and the
maximum is 1.0 (or any value suggested).
-
+
:param img: numpy array or pyopencl array of dim 2 or 3 and of type float
:param mini: Expected minimum value
:param maxi: expected maxiumum value
:param copy: set to False to use directly the input buffer
:param out: provides an output buffer. prevents a copy D->H
-
- This uses a min/max reduction in two stages plus a map operation
+
+ This uses a min/max reduction in two stages plus a map operation
"""
assert img.shape == self.buffer_shape
events = []
@@ -238,34 +285,55 @@ class ImageProcessing(OpenclProcessing):
if self.wg_red == 1:
# Probably on MacOS CPU WG==1 --> serial code.
kernel = self.kernels.get_kernel("max_min_serial")
- evt = kernel(self.queue, (1,), (1,),
- input_array.data,
- size,
- self.cl_mem["max_min_d"].data)
+ evt = kernel(
+ self.queue,
+ (1,),
+ (1,),
+ input_array.data,
+ size,
+ self.cl_mem["max_min_d"].data,
+ )
ed = EventDescription("max_min_serial", evt)
events.append(ed)
else:
stage1 = self.kernels.max_min_reduction_stage1
stage2 = self.kernels.max_min_reduction_stage2
local_mem = pyopencl.LocalMemory(int(self.wg_red * 8))
- k1 = stage1(self.queue, (int(self.wg_red ** 2),), (int(self.wg_red),),
- input_array.data,
- self.cl_mem["tmp_max_min_d"].data,
- size,
- local_mem)
- k2 = stage2(self.queue, (int(self.wg_red),), (int(self.wg_red),),
- self.cl_mem["tmp_max_min_d"].data,
- self.cl_mem["max_min_d"].data,
- local_mem)
-
- events += [EventDescription("max_min_stage1", k1),
- EventDescription("max_min_stage2", k2)]
-
- evt = self.kernels.normalize_image(self.queue, (self.shape[1], self.shape[0]), None,
- input_array.data, output_array.data,
- numpy.int32(self.shape[1]), numpy.int32(self.shape[0]),
- self.cl_mem["max_min_d"].data,
- numpy.float32(mini), numpy.float32(maxi))
+ k1 = stage1(
+ self.queue,
+ (int(self.wg_red**2),),
+ (int(self.wg_red),),
+ input_array.data,
+ self.cl_mem["tmp_max_min_d"].data,
+ size,
+ local_mem,
+ )
+ k2 = stage2(
+ self.queue,
+ (int(self.wg_red),),
+ (int(self.wg_red),),
+ self.cl_mem["tmp_max_min_d"].data,
+ self.cl_mem["max_min_d"].data,
+ local_mem,
+ )
+
+ events += [
+ EventDescription("max_min_stage1", k1),
+ EventDescription("max_min_stage2", k2),
+ ]
+
+ evt = self.kernels.normalize_image(
+ self.queue,
+ (self.shape[1], self.shape[0]),
+ None,
+ input_array.data,
+ output_array.data,
+ numpy.int32(self.shape[1]),
+ numpy.int32(self.shape[0]),
+ self.cl_mem["max_min_d"].data,
+ numpy.float32(mini),
+ numpy.float32(maxi),
+ )
events.append(EventDescription("normalize", evt))
if self.profile:
self.events += events
@@ -277,32 +345,32 @@ class ImageProcessing(OpenclProcessing):
output_array.finish()
return output_array
- def histogram(self, img=None, nbins=255, range=None,
- log_scale=False, copy=True, out=None):
+ def histogram(
+ self, img=None, nbins=255, range=None, log_scale=False, copy=True, out=None
+ ):
"""Compute the histogram of a set of data.
-
+
:param img: input image. If None, use the one already on the device
:param nbins: number of bins
- :param range: the lower and upper range of the bins. If not provided,
- range is simply ``(a.min(), a.max())``. Values outside the
- range are ignored. The first element of the range must be
+ :param range: the lower and upper range of the bins. If not provided,
+ range is simply ``(a.min(), a.max())``. Values outside the
+ range are ignored. The first element of the range must be
less than or equal to the second.
- :param log_scale: perform the binning in lograrithmic scale.
+ :param log_scale: perform the binning in lograrithmic scale.
Open to extension
:param copy: unset to directly use the input buffer without copy
- :param out: use a provided array for offering the result
+ :param out: use a provided array for offering the result
:return: histogram (size=nbins), edges (size=nbins+1)
- API similar to numpy
+ API similar to numpy
"""
assert img.shape == self.buffer_shape
input_array = self.to_float(img, copy=copy, out=self.cl_mem["image0_d"])
events = []
with self.sem:
- input_array, output_array = self._get_in_out_buffers(input_array, copy=False,
- out=out,
- out_dtype=numpy.int32,
- out_size=nbins)
+ input_array, output_array = self._get_in_out_buffers(
+ input_array, copy=False, out=out, out_dtype=numpy.int32, out_size=nbins
+ )
if range is None:
# measure actually the bounds
@@ -311,27 +379,43 @@ class ImageProcessing(OpenclProcessing):
# Probably on MacOS CPU WG==1 --> serial code.
kernel = self.kernels.get_kernel("max_min_serial")
- evt = kernel(self.queue, (1,), (1,),
- input_array.data,
- size,
- self.cl_mem["max_min_d"].data)
+ evt = kernel(
+ self.queue,
+ (1,),
+ (1,),
+ input_array.data,
+ size,
+ self.cl_mem["max_min_d"].data,
+ )
events.append(EventDescription("max_min_serial", evt))
else:
stage1 = self.kernels.max_min_reduction_stage1
stage2 = self.kernels.max_min_reduction_stage2
- local_mem = pyopencl.LocalMemory(int(self.wg_red * 2 * numpy.dtype("float32").itemsize))
- k1 = stage1(self.queue, (int(self.wg_red ** 2),), (int(self.wg_red),),
- input_array.data,
- self.cl_mem["tmp_max_min_d"].data,
- size,
- local_mem)
- k2 = stage2(self.queue, (int(self.wg_red),), (int(self.wg_red),),
- self.cl_mem["tmp_max_min_d"].data,
- self.cl_mem["max_min_d"].data,
- local_mem)
-
- events += [EventDescription("max_min_stage1", k1),
- EventDescription("max_min_stage2", k2)]
+ local_mem = pyopencl.LocalMemory(
+ int(self.wg_red * 2 * numpy.dtype("float32").itemsize)
+ )
+ k1 = stage1(
+ self.queue,
+ (int(self.wg_red**2),),
+ (int(self.wg_red),),
+ input_array.data,
+ self.cl_mem["tmp_max_min_d"].data,
+ size,
+ local_mem,
+ )
+ k2 = stage2(
+ self.queue,
+ (int(self.wg_red),),
+ (int(self.wg_red),),
+ self.cl_mem["tmp_max_min_d"].data,
+ self.cl_mem["max_min_d"].data,
+ local_mem,
+ )
+
+ events += [
+ EventDescription("max_min_stage1", k1),
+ EventDescription("max_min_stage2", k2),
+ ]
maxi, mini = self.cl_mem["max_min_d"].get()
else:
mini = numpy.float32(min(range))
@@ -341,13 +425,17 @@ class ImageProcessing(OpenclProcessing):
tmp_size = nb_engines * nbins
name = "tmp_int32_%s_d" % (tmp_size)
if name not in self.cl_mem:
- tmp_array = self.cl_mem[name] = pyopencl.array.empty(self.queue, (tmp_size,), numpy.int32)
+ tmp_array = self.cl_mem[name] = pyopencl.array.empty(
+ self.queue, (tmp_size,), numpy.int32
+ )
else:
tmp_array = self.cl_mem[name]
edge_name = "tmp_float32_%s_d" % (nbins + 1)
if edge_name not in self.cl_mem:
- edges_array = self.cl_mem[edge_name] = pyopencl.array.empty(self.queue, (nbins + 1,), numpy.float32)
+ edges_array = self.cl_mem[edge_name] = pyopencl.array.empty(
+ self.queue, (nbins + 1,), numpy.float32
+ )
else:
edges_array = self.cl_mem[edge_name]
@@ -359,21 +447,27 @@ class ImageProcessing(OpenclProcessing):
else:
map_operation = numpy.int32(0)
kernel = self.kernels.get_kernel("histogram")
- wg = min(device.max_work_group_size,
- 1 << (int(ceil(log(nbins, 2)))),
- self.kernels.max_workgroup_size(kernel))
- evt = kernel(self.queue, (wg * nb_engines,), (wg,),
- input_array.data,
- numpy.int32(input_array.size),
- mini,
- maxi,
- map_operation,
- output_array.data,
- edges_array.data,
- numpy.int32(nbins),
- tmp_array.data,
- self.cl_mem["cnt_d"].data,
- shared)
+ wg = min(
+ device.max_work_group_size,
+ 1 << (int(ceil(log(nbins, 2)))),
+ self.kernels.max_workgroup_size(kernel),
+ )
+ evt = kernel(
+ self.queue,
+ (wg * nb_engines,),
+ (wg,),
+ input_array.data,
+ numpy.int32(input_array.size),
+ mini,
+ maxi,
+ map_operation,
+ output_array.data,
+ edges_array.data,
+ numpy.int32(nbins),
+ tmp_array.data,
+ self.cl_mem["cnt_d"].data,
+ shared,
+ )
events.append(EventDescription("histogram", evt))
if self.profile:
diff --git a/src/silx/opencl/linalg.py b/src/silx/opencl/linalg.py
index a64122a..573ebce 100644
--- a/src/silx/opencl/linalg.py
+++ b/src/silx/opencl/linalg.py
@@ -1,5 +1,4 @@
#!/usr/bin/env python
-# coding: utf-8
# /*##########################################################################
#
# Copyright (c) 2016 European Synchrotron Radiation Facility
@@ -25,8 +24,6 @@
# ###########################################################################*/
"""Module for basic linear algebra in OpenCL"""
-from __future__ import absolute_import, print_function, with_statement, division
-
__authors__ = ["P. Paleo"]
__license__ = "MIT"
__date__ = "01/08/2019"
@@ -37,14 +34,23 @@ from .common import pyopencl
from .processing import EventDescription, OpenclProcessing
import pyopencl.array as parray
+
cl = pyopencl
class LinAlg(OpenclProcessing):
-
kernel_files = ["linalg.cl"]
- def __init__(self, shape, do_checks=False, ctx=None, devicetype="all", platformid=None, deviceid=None, profile=False):
+ def __init__(
+ self,
+ shape,
+ do_checks=False,
+ ctx=None,
+ devicetype="all",
+ platformid=None,
+ deviceid=None,
+ profile=False,
+ ):
"""
Create a "Linear Algebra" plan for a given image shape.
@@ -59,32 +65,34 @@ class LinAlg(OpenclProcessing):
store profiling elements (makes code slightly slower)
"""
- OpenclProcessing.__init__(self, ctx=ctx, devicetype=devicetype,
- platformid=platformid, deviceid=deviceid,
- profile=profile)
+ OpenclProcessing.__init__(
+ self,
+ ctx=ctx,
+ devicetype=devicetype,
+ platformid=platformid,
+ deviceid=deviceid,
+ profile=profile,
+ )
self.d_gradient = parray.empty(self.queue, shape, np.complex64)
self.d_gradient.fill(np.complex64(0.0))
self.d_image = parray.empty(self.queue, shape, np.float32)
self.d_image.fill(np.float32(0.0))
- self.add_to_cl_mem({
- "d_gradient": self.d_gradient,
- "d_image": self.d_image
- })
+ self.add_to_cl_mem({"d_gradient": self.d_gradient, "d_image": self.d_image})
self.wg2D = None
self.shape = shape
- self.ndrange2D = (
- int(self.shape[1]),
- int(self.shape[0])
- )
+ self.ndrange2D = (int(self.shape[1]), int(self.shape[0]))
self.do_checks = bool(do_checks)
OpenclProcessing.compile_kernels(self, self.kernel_files)
@staticmethod
def check_array(array, dtype, shape, arg_name):
if array.shape != shape or array.dtype != dtype:
- raise ValueError("%s should be a %s array of type %s" %(arg_name, str(shape), str(dtype)))
+ raise ValueError(
+ "%s should be a %s array of type %s"
+ % (arg_name, str(shape), str(dtype))
+ )
def get_data_references(self, src, dst, default_src_ref, default_dst_ref):
"""
@@ -100,7 +108,9 @@ class LinAlg(OpenclProcessing):
elif isinstance(dst, cl.Buffer):
dst_ref = dst
else:
- raise ValueError("dst should be either pyopencl.array.Array or pyopencl.Buffer")
+ raise ValueError(
+ "dst should be either pyopencl.array.Array or pyopencl.Buffer"
+ )
else:
dst_ref = default_dst_ref
@@ -130,21 +140,15 @@ class LinAlg(OpenclProcessing):
self.check_array(image, np.float32, self.shape, "image")
if dst is not None:
self.check_array(dst, np.complex64, self.shape, "dst")
- img_ref, grad_ref = self.get_data_references(image, dst, self.d_image.data, self.d_gradient.data)
+ img_ref, grad_ref = self.get_data_references(
+ image, dst, self.d_image.data, self.d_gradient.data
+ )
# Prepare the kernel call
- kernel_args = [
- img_ref,
- grad_ref,
- n_x,
- n_y
- ]
+ kernel_args = [img_ref, grad_ref, n_x, n_y]
# Call the gradient kernel
evt = self.kernels.kern_gradient2D(
- self.queue,
- self.ndrange2D,
- self.wg2D,
- *kernel_args
+ self.queue, self.ndrange2D, self.wg2D, *kernel_args
)
self.events.append(EventDescription("gradient2D", evt))
# TODO: should the wait be done in any case ?
@@ -187,21 +191,15 @@ class LinAlg(OpenclProcessing):
self.check_array(gradient, np.complex64, self.shape, "gradient")
if dst is not None:
self.check_array(dst, np.float32, self.shape, "dst")
- grad_ref, img_ref = self.get_data_references(gradient, dst, self.d_gradient.data, self.d_image.data)
+ grad_ref, img_ref = self.get_data_references(
+ gradient, dst, self.d_gradient.data, self.d_image.data
+ )
# Prepare the kernel call
- kernel_args = [
- grad_ref,
- img_ref,
- n_x,
- n_y
- ]
+ kernel_args = [grad_ref, img_ref, n_x, n_y]
# Call the gradient kernel
evt = self.kernels.kern_divergence2D(
- self.queue,
- self.ndrange2D,
- self.wg2D,
- *kernel_args
+ self.queue, self.ndrange2D, self.wg2D, *kernel_args
)
self.events.append(EventDescription("divergence2D", evt))
# TODO: should the wait be done in any case ?
diff --git a/src/silx/opencl/medfilt.py b/src/silx/opencl/medfilt.py
index d4e425b..a18c5a4 100644
--- a/src/silx/opencl/medfilt.py
+++ b/src/silx/opencl/medfilt.py
@@ -1,9 +1,8 @@
-# -*- coding: utf-8 -*-
#
# Project: Azimuthal integration
# https://github.com/silx-kit/pyFAI
#
-# Copyright (C) 2012-2017 European Synchrotron Radiation Facility, Grenoble, France
+# Copyright (C) 2012-2023 European Synchrotron Radiation Facility, Grenoble, France
#
# Principal author: Jérôme Kieffer (Jerome.Kieffer@ESRF.eu)
#
@@ -31,8 +30,6 @@ The target is to mimic the signature of scipy.signal.medfilt and scipy.medfilt2
The first implementation targets 2D implementation where this operation is costly (~10s/2kx2k image)
"""
-from __future__ import absolute_import, print_function, with_statement, division
-
__author__ = "Jerome Kieffer"
__license__ = "MIT"
@@ -42,7 +39,6 @@ __contact__ = "jerome.kieffer@esrf.fr"
import logging
import numpy
-from collections import OrderedDict
from .common import pyopencl, kernel_workgroup_size
from .processing import EventDescription, OpenclProcessing, BufferDescription
@@ -56,23 +52,33 @@ logger = logging.getLogger(__name__)
class MedianFilter2D(OpenclProcessing):
"""A class for doing median filtering using OpenCL"""
+
buffers = [
- BufferDescription("result", 1, numpy.float32, mf.WRITE_ONLY),
- BufferDescription("image_raw", 1, numpy.float32, mf.READ_ONLY),
- BufferDescription("image", 1, numpy.float32, mf.READ_WRITE),
- ]
+ BufferDescription("result", 1, numpy.float32, mf.WRITE_ONLY),
+ BufferDescription("image_raw", 1, numpy.float32, mf.READ_ONLY),
+ BufferDescription("image", 1, numpy.float32, mf.READ_WRITE),
+ ]
kernel_files = ["preprocess.cl", "bitonic.cl", "medfilt.cl"]
- mapping = {numpy.int8: "s8_to_float",
- numpy.uint8: "u8_to_float",
- numpy.int16: "s16_to_float",
- numpy.uint16: "u16_to_float",
- numpy.uint32: "u32_to_float",
- numpy.int32: "s32_to_float"}
-
- def __init__(self, shape, kernel_size=(3, 3),
- ctx=None, devicetype="all", platformid=None, deviceid=None,
- block_size=None, profile=False
- ):
+ mapping = {
+ numpy.int8: "s8_to_float",
+ numpy.uint8: "u8_to_float",
+ numpy.int16: "s16_to_float",
+ numpy.uint16: "u16_to_float",
+ numpy.uint32: "u32_to_float",
+ numpy.int32: "s32_to_float",
+ }
+
+ def __init__(
+ self,
+ shape,
+ kernel_size=(3, 3),
+ ctx=None,
+ devicetype="all",
+ platformid=None,
+ deviceid=None,
+ block_size=None,
+ profile=False,
+ ):
"""Constructor of the OpenCL 2D median filtering class
:param shape: shape of the images to treat
@@ -86,34 +92,56 @@ class MedianFilter2D(OpenclProcessing):
:param profile: switch on profiling to be able to profile at the kernel level,
store profiling elements (makes code slightly slower)
"""
- OpenclProcessing.__init__(self, ctx=ctx, devicetype=devicetype,
- platformid=platformid, deviceid=deviceid,
- block_size=block_size, profile=profile)
+ OpenclProcessing.__init__(
+ self,
+ ctx=ctx,
+ devicetype=devicetype,
+ platformid=platformid,
+ deviceid=deviceid,
+ block_size=block_size,
+ profile=profile,
+ )
self.shape = shape
self.size = self.shape[0] * self.shape[1]
self.kernel_size = self.calc_kernel_size(kernel_size)
self.workgroup_size = (self.calc_wg(self.kernel_size), 1) # 3D kernel
- self.buffers = [BufferDescription(i.name, i.size * self.size, i.dtype, i.flags)
- for i in self.__class__.buffers]
+ self.buffers = [
+ BufferDescription(i.name, i.size * self.size, i.dtype, i.flags)
+ for i in self.__class__.buffers
+ ]
self.allocate_buffers()
self.local_mem = self._get_local_mem(self.workgroup_size[0])
- OpenclProcessing.compile_kernels(self, self.kernel_files, "-D NIMAGE=%i" % self.size)
+ OpenclProcessing.compile_kernels(
+ self, self.kernel_files, "-D NIMAGE=%i" % self.size
+ )
self.set_kernel_arguments()
def set_kernel_arguments(self):
- """Parametrize all kernel arguments
- """
+ """Parametrize all kernel arguments"""
for val in self.mapping.values():
- self.cl_kernel_args[val] = OrderedDict(((i, self.cl_mem[i]) for i in ("image_raw", "image")))
- self.cl_kernel_args["medfilt2d"] = OrderedDict((("image", self.cl_mem["image"]),
- ("result", self.cl_mem["result"]),
- ("local", self.local_mem),
- ("khs1", numpy.int32(self.kernel_size[0] // 2)), # Kernel half-size along dim1 (lines)
- ("khs2", numpy.int32(self.kernel_size[1] // 2)), # Kernel half-size along dim2 (columns)
- ("height", numpy.int32(self.shape[0])), # Image size along dim1 (lines)
- ("width", numpy.int32(self.shape[1]))))
-# ('debug', self.cl_mem["debug"]))) # Image size along dim2 (columns))
+ self.cl_kernel_args[val] = dict(
+ ((i, self.cl_mem[i]) for i in ("image_raw", "image"))
+ )
+ self.cl_kernel_args["medfilt2d"] = dict(
+ (
+ ("image", self.cl_mem["image"]),
+ ("result", self.cl_mem["result"]),
+ ("local", self.local_mem),
+ (
+ "khs1",
+ numpy.int32(self.kernel_size[0] // 2),
+ ), # Kernel half-size along dim1 (lines)
+ (
+ "khs2",
+ numpy.int32(self.kernel_size[1] // 2),
+ ), # Kernel half-size along dim2 (columns)
+ ("height", numpy.int32(self.shape[0])), # Image size along dim1 (lines)
+ ("width", numpy.int32(self.shape[1])),
+ )
+ )
+
+ # ('debug', self.cl_mem["debug"]))) # Image size along dim2 (columns))
def _get_local_mem(self, wg):
return pyopencl.LocalMemory(wg * 32) # 4byte per float, 8 element per thread
@@ -128,13 +156,26 @@ class MedianFilter2D(OpenclProcessing):
dest_type = numpy.dtype([i.dtype for i in self.buffers if i.name == dest][0])
events = []
if (data.dtype == dest_type) or (data.dtype.itemsize > dest_type.itemsize):
- copy_image = pyopencl.enqueue_copy(self.queue, self.cl_mem[dest], numpy.ascontiguousarray(data, dest_type))
+ copy_image = pyopencl.enqueue_copy(
+ self.queue, self.cl_mem[dest], numpy.ascontiguousarray(data, dest_type)
+ )
events.append(EventDescription("copy H->D %s" % dest, copy_image))
else:
- copy_image = pyopencl.enqueue_copy(self.queue, self.cl_mem["image_raw"], numpy.ascontiguousarray(data))
+ copy_image = pyopencl.enqueue_copy(
+ self.queue, self.cl_mem["image_raw"], numpy.ascontiguousarray(data)
+ )
kernel = getattr(self.program, self.mapping[data.dtype.type])
- cast_to_float = kernel(self.queue, (self.size,), None, self.cl_mem["image_raw"], self.cl_mem[dest])
- events += [EventDescription("copy H->D %s" % dest, copy_image), EventDescription("cast to float", cast_to_float)]
+ cast_to_float = kernel(
+ self.queue,
+ (self.size,),
+ None,
+ self.cl_mem["image_raw"],
+ self.cl_mem[dest],
+ )
+ events += [
+ EventDescription("copy H->D %s" % dest, copy_image),
+ EventDescription("cast to float", cast_to_float),
+ ]
if self.profile:
self.events += events
@@ -183,7 +224,9 @@ class MedianFilter2D(OpenclProcessing):
amws = kernel_workgroup_size(self.program, "medfilt2d")
logger.warning("max actual workgroup size: %s, expected: %s", amws, wg)
if wg > amws:
- raise RuntimeError("Workgroup size is too big for medfilt2d: %s>%s" % (wg, amws))
+ raise RuntimeError(
+ "Workgroup size is too big for medfilt2d: %s>%s" % (wg, amws)
+ )
localmem = self._get_local_mem(wg)
@@ -200,11 +243,11 @@ class MedianFilter2D(OpenclProcessing):
kwargs["khs2"] = kernel_half_size[1]
kwargs["height"] = numpy.int32(image.shape[0])
kwargs["width"] = numpy.int32(image.shape[1])
-# for k, v in kwargs.items():
-# print("%s: %s (%s)" % (k, v, type(v)))
- mf2d = self.kernels.medfilt2d(self.queue,
- (wg, image.shape[1]),
- (wg, 1), *list(kwargs.values()))
+ # for k, v in kwargs.items():
+ # print("%s: %s (%s)" % (k, v, type(v)))
+ mf2d = self.kernels.medfilt2d(
+ self.queue, (wg, image.shape[1]), (wg, 1), *list(kwargs.values())
+ )
events.append(EventDescription("median filter 2d", mf2d))
result = numpy.empty(image.shape, numpy.float32)
@@ -214,12 +257,12 @@ class MedianFilter2D(OpenclProcessing):
if self.profile:
self.events += events
return result
+
__call__ = medfilt2d
@staticmethod
def calc_kernel_size(kernel_size):
- """format the kernel size to be a 2-length numpy array of int32
- """
+ """format the kernel size to be a 2-length numpy array of int32"""
kernel_size = numpy.asarray(kernel_size, dtype=numpy.int32)
if kernel_size.shape == ():
kernel_size = numpy.repeat(kernel_size.item(), 2).astype(numpy.int32)
@@ -252,7 +295,7 @@ class _MedFilt2d(object):
* The filling mode in scipy.signal.medfilt2d is zero-padding
* This implementation is equivalent to:
- scipy.ndimage.filters.median_filter(ary, kernel_size, mode="nearest")
+ scipy.ndimage.median_filter(ary, kernel_size, mode="nearest")
"""
image = numpy.atleast_2d(ary)
@@ -266,4 +309,5 @@ class _MedFilt2d(object):
cls.median_filter = MedianFilter2D(new_shape, kernel_size, ctx=ctx)
return cls.median_filter.medfilt2d(image, kernel_size=kernel_size)
+
medfilt2d = _MedFilt2d.medfilt2d
diff --git a/src/silx/opencl/processing.py b/src/silx/opencl/processing.py
index 8b81f7f..6db21d0 100644
--- a/src/silx/opencl/processing.py
+++ b/src/silx/opencl/processing.py
@@ -1,10 +1,9 @@
#!/usr/bin/env python
-# -*- coding: utf-8 -*-
#
# Project: S I L X project
# https://github.com/silx-kit/silx
#
-# Copyright (C) 2012-2018 European Synchrotron Radiation Facility, Grenoble, France
+# Copyright (C) 2012-2023 European Synchrotron Radiation Facility, Grenoble, France
#
# Principal author: Jérôme Kieffer (Jerome.Kieffer@ESRF.eu)
#
@@ -38,22 +37,32 @@ __author__ = "Jerome Kieffer"
__contact__ = "Jerome.Kieffer@ESRF.eu"
__license__ = "MIT"
__copyright__ = "European Synchrotron Radiation Facility, Grenoble, France"
-__date__ = "02/03/2021"
+__date__ = "09/11/2022"
__status__ = "stable"
import sys
import os
import logging
import gc
-from collections import namedtuple, OrderedDict
+from collections import namedtuple
import numpy
import threading
-from .common import ocl, pyopencl, release_cl_buffers, query_kernel_info, allocate_texture, check_textures_availability
+from .common import (
+ ocl,
+ pyopencl,
+ release_cl_buffers,
+ query_kernel_info,
+ allocate_texture,
+ check_textures_availability,
+)
from .utils import concatenate_cl_kernel
import platform
BufferDescription = namedtuple("BufferDescription", ["name", "size", "dtype", "flags"])
-EventDescription = namedtuple("EventDescription", ["name", "event"])
+EventDescription = namedtuple(
+ "EventDescription", ["name", "event"]
+) # Deprecated, please use ProfileDescription
+ProfileDescription = namedtuple("ProfileDescription", ["name", "start", "stop"])
logger = logging.getLogger(__name__)
@@ -72,8 +81,9 @@ class KernelContainer(object):
def get_kernels(self):
"return the dictionary with all kernels"
- return dict(item for item in self.__dict__.items()
- if not item[0].startswith("_"))
+ return dict(
+ item for item in self.__dict__.items() if not item[0].startswith("_")
+ )
def get_kernel(self, name):
"get a kernel from its name"
@@ -96,7 +106,9 @@ class KernelContainer(object):
else:
kernel = self.get_kernel(kernel_name)
- return query_kernel_info(self._program, kernel, "PREFERRED_WORK_GROUP_SIZE_MULTIPLE")
+ return query_kernel_info(
+ self._program, kernel, "PREFERRED_WORK_GROUP_SIZE_MULTIPLE"
+ )
class OpenclProcessing(object):
@@ -108,14 +120,24 @@ class OpenclProcessing(object):
* Functions to compile kernels, cache them and clean them
* helper functions to clone the object
"""
+
# Example of how to create an output buffer of 10 floats
- buffers = [BufferDescription("output", 10, numpy.float32, None),
- ]
+ buffers = [
+ BufferDescription("output", 10, numpy.float32, None),
+ ]
# list of kernel source files to be concatenated before compilation of the program
kernel_files = []
- def __init__(self, ctx=None, devicetype="all", platformid=None, deviceid=None,
- block_size=None, memory=None, profile=False):
+ def __init__(
+ self,
+ ctx=None,
+ devicetype="all",
+ platformid=None,
+ deviceid=None,
+ block_size=None,
+ memory=None,
+ profile=False,
+ ):
"""Constructor of the abstract OpenCL processing class
:param ctx: actual working context, left to None for automatic
@@ -140,9 +162,12 @@ class OpenclProcessing(object):
if ctx:
self.ctx = ctx
else:
- self.ctx = ocl.create_context(devicetype=devicetype,
- platformid=platformid, deviceid=deviceid,
- memory=memory)
+ self.ctx = ocl.create_context(
+ devicetype=devicetype,
+ platformid=platformid,
+ deviceid=deviceid,
+ memory=memory,
+ )
device_name = self.ctx.devices[0].name.strip()
platform_name = self.ctx.devices[0].platform.name.strip()
platform = ocl.get_platform(platform_name)
@@ -158,8 +183,7 @@ class OpenclProcessing(object):
return check_textures_availability(self.ctx)
def __del__(self):
- """Destructor: release all buffers and programs
- """
+ """Destructor: release all buffers and programs"""
try:
self.reset_log()
self.free_kernels()
@@ -201,19 +225,27 @@ class OpenclProcessing(object):
ualloc = 0
for buf in buffers:
ualloc += numpy.dtype(buf.dtype).itemsize * numpy.prod(buf.size)
- logger.info("%.3fMB are needed on device: %s, which has %.3fMB",
- ualloc / 1.0e6, self.device, self.device.memory / 1.0e6)
+ logger.info(
+ "%.3fMB are needed on device: %s, which has %.3fMB",
+ ualloc / 1.0e6,
+ self.device,
+ self.device.memory / 1.0e6,
+ )
if ualloc >= self.device.memory:
- raise MemoryError("Fatal error in allocate_buffers. Not enough "
- " device memory for buffers (%lu requested, %lu available)"
- % (ualloc, self.device.memory))
+ raise MemoryError(
+ "Fatal error in allocate_buffers. Not enough "
+ " device memory for buffers (%lu requested, %lu available)"
+ % (ualloc, self.device.memory)
+ )
# do the allocation
try:
if use_array:
for buf in buffers:
- mem[buf.name] = pyopencl.array.empty(self.queue, buf.size, buf.dtype)
+ mem[buf.name] = pyopencl.array.empty(
+ self.queue, buf.size, buf.dtype
+ )
else:
for buf in buffers:
size = numpy.dtype(buf.dtype).itemsize * numpy.prod(buf.size)
@@ -241,8 +273,7 @@ class OpenclProcessing(object):
return self.kernels.max_workgroup_size(kernel_name)
def free_buffers(self):
- """free all device.memory allocated on the device
- """
+ """free all device.memory allocated on the device"""
with self.sem:
for key, buf in list(self.cl_mem.items()):
if buf is not None:
@@ -272,20 +303,22 @@ class OpenclProcessing(object):
compile_options = compile_options or self.get_compiler_options()
logger.info("Compiling file %s with options %s", kernel_files, compile_options)
try:
- self.program = pyopencl.Program(self.ctx, kernel_src).build(options=compile_options)
+ self.program = pyopencl.Program(self.ctx, kernel_src).build(
+ options=compile_options
+ )
except (pyopencl.MemoryError, pyopencl.LogicError) as error:
raise MemoryError(error)
else:
self.kernels = KernelContainer(self.program)
def free_kernels(self):
- """Free all kernels
- """
+ """Free all kernels"""
for kernel in self.cl_kernel_args:
self.cl_kernel_args[kernel] = []
self.kernels = None
self.program = None
+ # Methods about Profiling
def set_profiling(self, value=True):
"""Switch On/Off the profiling flag of the command queue to allow debugging
@@ -300,85 +333,121 @@ class OpenclProcessing(object):
if self.queue is not None:
self.queue.finish()
if self.profile:
- self.queue = pyopencl.CommandQueue(self.ctx,
- properties=pyopencl.command_queue_properties.PROFILING_ENABLE)
+ self.queue = pyopencl.CommandQueue(
+ self.ctx,
+ properties=pyopencl.command_queue_properties.PROFILING_ENABLE,
+ )
else:
self.queue = pyopencl.CommandQueue(self.ctx)
+ # Update all memory-objects with the new queue:
+ for obj, cl_obj in list(self.cl_mem.items()):
+ if isinstance(cl_obj, pyopencl.array.Array):
+ self.cl_mem[obj] = cl_obj.with_queue(self.queue)
def profile_add(self, event, desc):
"""
Add an OpenCL event to the events lists, if profiling is enabled.
- :param event: silx.opencl.processing.EventDescription.
+ :param event: pyopencl.NanyEvent.
:param desc: event description
"""
if self.profile:
- self.events.append(EventDescription(desc, event))
-
- def allocate_texture(self, shape, hostbuf=None, support_1D=False):
- return allocate_texture(self.ctx, shape, hostbuf=hostbuf, support_1D=support_1D)
+ try:
+ profile = event.profile
+ self.events.append(ProfileDescription(desc, profile.start, profile.end))
+ except Exception:
+ # Probably the driver does not support profiling
+ pass
- def transfer_to_texture(self, arr, tex_ref):
+ def profile_multi(self, event_lists):
"""
- Transfer an array to a texture.
+ Extract profiling info from several OpenCL event, if profiling is enabled.
- :param arr: Input array. Can be a numpy array or a pyopencl array.
- :param tex_ref: texture reference (pyopencl._cl.Image).
+ :param event_lists: list of ("desc", pyopencl.NanyEvent).
"""
- copy_args = [self.queue, tex_ref, arr]
- shp = arr.shape
- ndim = arr.ndim
- if ndim == 1:
- # pyopencl and OpenCL < 1.2 do not support image1d_t
- # force 2D with one row in this case
- # ~ ndim = 2
- shp = (1,) + shp
- copy_kwargs = {"origin":(0,) * ndim, "region": shp[::-1]}
- if not(isinstance(arr, numpy.ndarray)): # assuming pyopencl.array.Array
- # D->D copy
- copy_args[2] = arr.data
- copy_kwargs["offset"] = 0
- ev = pyopencl.enqueue_copy(*copy_args, **copy_kwargs)
- self.profile_add(ev, "Transfer to texture")
+ if self.profile:
+ for event_desc in event_lists:
+ if isinstance(event_desc, ProfileDescription):
+ self.events.append(event_desc)
+ else:
+ if (
+ isinstance(event_desc, EventDescription)
+ or "__len__" in dir(event_desc)
+ and len(event_desc) == 2
+ ):
+ desc, event = event_desc
+ else:
+ desc = "?"
+ event = event_desc
+ try:
+ profile = event.profile
+ start = profile.start
+ end = profile.end
+ except Exception:
+ # probably an unfinished job ... use old-style.
+ self.events.append(event_desc)
+ else:
+ self.events.append(ProfileDescription(desc, start, end))
def log_profile(self, stats=False):
"""If we are in profiling mode, prints out all timing for every single OpenCL call
-
+
:param stats: if True, prints the statistics on each kernel instead of all execution timings
:return: list of lines to print
"""
total_time = 0.0
out = [""]
if stats:
- stats = OrderedDict()
- out.append(f"OpenCL kernel profiling statistics in milliseconds for: {self.__class__.__name__}")
- out.append(f"{'Kernel name':>50} (count): min median max mean std")
+ stats = {}
+ out.append(
+ f"OpenCL kernel profiling statistics in milliseconds for: {self.__class__.__name__}"
+ )
+ out.append(
+ f"{'Kernel name':>50} (count): min median max mean std"
+ )
else:
stats = None
out.append(f"Profiling info for OpenCL: {self.__class__.__name__}")
if self.profile:
for e in self.events:
- if "__len__" in dir(e) and len(e) >= 2:
+ if isinstance(e, ProfileDescription):
+ name = e[0]
+ t0 = e[1]
+ t1 = e[2]
+ elif (
+ isinstance(e, EventDescription)
+ or "__len__" in dir(e)
+ and len(e) == 2
+ ):
name = e[0]
pr = e[1].profile
t0 = pr.start
t1 = pr.end
- et = 1e-6 * (t1 - t0)
- total_time += et
- if stats is None:
- out.append(f"{name:>50} : {et:.3f}ms")
+ else:
+ name = "?"
+ t0 = e.profile.start
+ t1 = e.profile.end
+
+ et = 1e-6 * (t1 - t0)
+ total_time += et
+ if stats is None:
+ out.append(f"{name:>50} : {et:.3f}ms")
+ else:
+ if name in stats:
+ stats[name].append(et)
else:
- if name in stats:
- stats[name].append(et)
- else:
- stats[name] = [et]
+ stats[name] = [et]
if stats is not None:
for k, v in stats.items():
n = numpy.array(v)
- out.append(f"{k:>50} ({len(v):5}): {n.min():8.3f} {numpy.median(n):8.3f} {n.max():8.3f} {n.mean():8.3f} {n.std():8.3f}")
+ out.append(
+ f"{k:>50} ({len(v):5}): {n.min():8.3f} {numpy.median(n):8.3f} {n.max():8.3f} {n.mean():8.3f} {n.std():8.3f}"
+ )
out.append("_" * 80)
- out.append(f"{'Total OpenCL execution time':>50} : {total_time:.3f}ms")
+ out.append(
+ f"{'Total OpenCL execution time':>50} : {total_time:.3f}ms"
+ )
logger.info(os.linesep.join(out))
return out
@@ -390,13 +459,42 @@ class OpenclProcessing(object):
with self.sem:
self.events = []
+ # Methods about textures
+ def allocate_texture(self, shape, hostbuf=None, support_1D=False):
+ return allocate_texture(self.ctx, shape, hostbuf=hostbuf, support_1D=support_1D)
+
+ def transfer_to_texture(self, arr, tex_ref):
+ """
+ Transfer an array to a texture.
+
+ :param arr: Input array. Can be a numpy array or a pyopencl array.
+ :param tex_ref: texture reference (pyopencl._cl.Image).
+ """
+ copy_args = [self.queue, tex_ref, arr]
+ shp = arr.shape
+ ndim = arr.ndim
+ if ndim == 1:
+ # pyopencl and OpenCL < 1.2 do not support image1d_t
+ # force 2D with one row in this case
+ # ~ ndim = 2
+ shp = (1,) + shp
+ copy_kwargs = {"origin": (0,) * ndim, "region": shp[::-1]}
+ if not (isinstance(arr, numpy.ndarray)): # assuming pyopencl.array.Array
+ # D->D copy
+ copy_args[2] = arr.data
+ copy_kwargs["offset"] = 0
+ ev = pyopencl.enqueue_copy(*copy_args, **copy_kwargs)
+ self.profile_add(ev, "Transfer to texture")
+
@property
def x87_volatile_option(self):
# this is running 32 bits OpenCL woth POCL
if self._X87_VOLATILE is None:
- if (platform.machine() in ("i386", "i686", "x86_64", "AMD64") and
- (tuple.__itemsize__ == 4) and
- self.ctx.devices[0].platform.name == 'Portable Computing Language'):
+ if (
+ platform.machine() in ("i386", "i686", "x86_64", "AMD64")
+ and (tuple.__itemsize__ == 4)
+ and self.ctx.devices[0].platform.name == "Portable Computing Language"
+ ):
self._X87_VOLATILE = "-DX87_VOLATILE=volatile"
else:
self._X87_VOLATILE = ""
@@ -413,6 +511,7 @@ class OpenclProcessing(object):
option_list.append(self.x87_volatile_option)
return " ".join(i for i in option_list if i)
+
# This should be implemented by concrete class
# def __copy__(self):
# """Shallow copy of the object
diff --git a/src/silx/opencl/projection.py b/src/silx/opencl/projection.py
index c02faf6..cf4b625 100644
--- a/src/silx/opencl/projection.py
+++ b/src/silx/opencl/projection.py
@@ -1,5 +1,4 @@
#!/usr/bin/env python
-# coding: utf-8
# /*##########################################################################
#
# Copyright (c) 2016-2020 European Synchrotron Radiation Facility
@@ -25,8 +24,6 @@
# ###########################################################################*/
"""Module for tomographic projector on the GPU"""
-from __future__ import absolute_import, print_function, with_statement, division
-
__authors__ = ["A. Mirone, P. Paleo"]
__license__ = "MIT"
__date__ = "01/08/2019"
@@ -51,14 +48,23 @@ class Projection(OpenclProcessing):
A class for performing a tomographic projection (Radon Transform) using
OpenCL
"""
+
kernel_files = ["proj.cl", "array_utils.cl"]
logger.warning("Forward Projecter is untested and unsuported for now")
- def __init__(self, slice_shape, angles, axis_position=None,
- detector_width=None, normalize=False, ctx=None,
- devicetype="all", platformid=None, deviceid=None,
- profile=False
- ):
+ def __init__(
+ self,
+ slice_shape,
+ angles,
+ axis_position=None,
+ detector_width=None,
+ normalize=False,
+ ctx=None,
+ devicetype="all",
+ platformid=None,
+ deviceid=None,
+ profile=False,
+ ):
"""Constructor of the OpenCL projector.
:param slice_shape: shape of the slice: (num_rows, num_columns).
@@ -87,9 +93,14 @@ class Projection(OpenclProcessing):
# if sys.platform.startswith('darwin'): # assuming no discrete GPU
# raise NotImplementedError("Backprojection is not implemented on CPU for OS X yet")
- OpenclProcessing.__init__(self, ctx=ctx, devicetype=devicetype,
- platformid=platformid, deviceid=deviceid,
- profile=profile)
+ OpenclProcessing.__init__(
+ self,
+ ctx=ctx,
+ devicetype=devicetype,
+ platformid=platformid,
+ deviceid=deviceid,
+ profile=profile,
+ )
self.shape = slice_shape
self.axis_pos = axis_position
self.angles = angles
@@ -98,24 +109,27 @@ class Projection(OpenclProcessing):
# Default values
if self.axis_pos is None:
- self.axis_pos = (self.shape[1] - 1) / 2.
+ self.axis_pos = (self.shape[1] - 1) / 2.0
if self.dwidth is None:
self.dwidth = self.shape[1]
- if not(np.iterable(self.angles)):
+ if not (np.iterable(self.angles)):
if self.angles is None:
self.nprojs = self.shape[0]
else:
self.nprojs = self.angles
- self.angles = np.linspace(start=0,
- stop=np.pi,
- num=self.nprojs,
- endpoint=False).astype(dtype=np.float32)
+ self.angles = np.linspace(
+ start=0, stop=np.pi, num=self.nprojs, endpoint=False
+ ).astype(dtype=np.float32)
else:
self.nprojs = len(self.angles)
- self.offset_x = -np.float32((self.shape[1] - 1) / 2. - self.axis_pos) # TODO: custom
- self.offset_y = -np.float32((self.shape[0] - 1) / 2. - self.axis_pos) # TODO: custom
+ self.offset_x = -np.float32(
+ (self.shape[1] - 1) / 2.0 - self.axis_pos
+ ) # TODO: custom
+ self.offset_y = -np.float32(
+ (self.shape[0] - 1) / 2.0 - self.axis_pos
+ ) # TODO: custom
# Reset axis_pos once offset are computed
- self.axis_pos0 = np.float64((self.shape[1] - 1) / 2.)
+ self.axis_pos0 = np.float64((self.shape[1] - 1) / 2.0)
# Workgroup, ndrange and shared size
self.dimgrid_x = _idivup(self.dwidth, 16)
@@ -126,118 +140,122 @@ class Projection(OpenclProcessing):
self.wg = (16, 16)
self.ndrange = (
int(self.dimgrid_x) * self.wg[0], # int(): pyopencl <= 2015.1
- int(self.dimgrid_y) * self.wg[1] # int(): pyopencl <= 2015.1
+ int(self.dimgrid_y) * self.wg[1], # int(): pyopencl <= 2015.1
)
self._use_textures = self.check_textures_availability()
# Allocate memory
self.buffers = [
- BufferDescription("_d_sino", self._dimrecx * self._dimrecy, np.float32, mf.READ_WRITE),
+ BufferDescription(
+ "_d_sino", self._dimrecx * self._dimrecy, np.float32, mf.READ_WRITE
+ ),
BufferDescription("d_angles", self._dimrecy, np.float32, mf.READ_ONLY),
BufferDescription("d_beginPos", self._dimrecy * 2, np.int32, mf.READ_ONLY),
- BufferDescription("d_strideJoseph", self._dimrecy * 2, np.int32, mf.READ_ONLY),
- BufferDescription("d_strideLine", self._dimrecy * 2, np.int32, mf.READ_ONLY),
+ BufferDescription(
+ "d_strideJoseph", self._dimrecy * 2, np.int32, mf.READ_ONLY
+ ),
+ BufferDescription(
+ "d_strideLine", self._dimrecy * 2, np.int32, mf.READ_ONLY
+ ),
]
d_axis_corrections = parray.empty(self.queue, self.nprojs, np.float32)
d_axis_corrections.fill(np.float32(0.0))
- self.add_to_cl_mem(
- {
- "d_axis_corrections": d_axis_corrections
- }
+ self.add_to_cl_mem({"d_axis_corrections": d_axis_corrections})
+ self._tmp_extended_img = np.zeros(
+ (self.shape[0] + 2, self.shape[1] + 2), dtype=np.float32
)
- self._tmp_extended_img = np.zeros((self.shape[0] + 2, self.shape[1] + 2),
- dtype=np.float32)
- if not(self._use_textures):
+ if not (self._use_textures):
self.allocate_slice()
else:
self.allocate_textures()
self.allocate_buffers()
- self._ex_sino = np.zeros((self._dimrecy, self._dimrecx),
- dtype=np.float32)
- if not(self._use_textures):
- self.cl_mem["d_slice"].fill(0.)
+ self._ex_sino = np.zeros((self._dimrecy, self._dimrecx), dtype=np.float32)
+ if not (self._use_textures):
+ self.cl_mem["d_slice"].fill(0.0)
# enqueue_fill_buffer has issues if opencl 1.2 is not present
# ~ pyopencl.enqueue_fill_buffer(
- # ~ self.queue,
- # ~ self.cl_mem["d_slice"],
- # ~ np.float32(0),
- # ~ 0,
- # ~ self._tmp_extended_img.size * _sizeof(np.float32)
+ # ~ self.queue,
+ # ~ self.cl_mem["d_slice"],
+ # ~ np.float32(0),
+ # ~ 0,
+ # ~ self._tmp_extended_img.size * _sizeof(np.float32)
# ~ )
# Precomputations
self.compute_angles()
self.proj_precomputations()
- self.cl_mem["d_axis_corrections"].fill(0.)
+ self.cl_mem["d_axis_corrections"].fill(0.0)
# enqueue_fill_buffer has issues if opencl 1.2 is not present
# ~ pyopencl.enqueue_fill_buffer(
- # ~ self.queue,
- # ~ self.cl_mem["d_axis_corrections"],
- # ~ np.float32(0),
- # ~ 0,
- # ~ self.nprojs*_sizeof(np.float32)
- # ~ )
+ # ~ self.queue,
+ # ~ self.cl_mem["d_axis_corrections"],
+ # ~ np.float32(0),
+ # ~ 0,
+ # ~ self.nprojs*_sizeof(np.float32)
+ # ~ )
# Shorthands
self._d_sino = self.cl_mem["_d_sino"]
compile_options = None
- if not(self._use_textures):
+ if not (self._use_textures):
compile_options = "-DDONT_USE_TEXTURES"
OpenclProcessing.compile_kernels(
- self,
- self.kernel_files,
- compile_options=compile_options
+ self, self.kernel_files, compile_options=compile_options
)
# check that workgroup can actually be (16, 16)
- self.compiletime_workgroup_size = self.kernels.max_workgroup_size("forward_kernel_cpu")
+ self.compiletime_workgroup_size = self.kernels.max_workgroup_size(
+ "forward_kernel_cpu"
+ )
def compute_angles(self):
angles2 = np.zeros(self._dimrecy, dtype=np.float32) # dimrecy != num_projs
- angles2[:self.nprojs] = np.copy(self.angles)
- angles2[self.nprojs:] = angles2[self.nprojs - 1]
+ angles2[: self.nprojs] = np.copy(self.angles)
+ angles2[self.nprojs :] = angles2[self.nprojs - 1]
self.angles2 = angles2
pyopencl.enqueue_copy(self.queue, self.cl_mem["d_angles"], angles2)
def allocate_slice(self):
- ary = parray.empty(self.queue, (self.shape[1] + 2, self.shape[1] + 2), np.float32)
+ ary = parray.empty(
+ self.queue, (self.shape[1] + 2, self.shape[1] + 2), np.float32
+ )
ary.fill(0)
self.add_to_cl_mem({"d_slice": ary})
def allocate_textures(self):
self.d_image_tex = pyopencl.Image(
- self.ctx,
- mf.READ_ONLY | mf.USE_HOST_PTR,
- pyopencl.ImageFormat(
- pyopencl.channel_order.INTENSITY,
- pyopencl.channel_type.FLOAT
- ), hostbuf=np.ascontiguousarray(self._tmp_extended_img.T),
- )
+ self.ctx,
+ mf.READ_ONLY | mf.USE_HOST_PTR,
+ pyopencl.ImageFormat(
+ pyopencl.channel_order.INTENSITY, pyopencl.channel_type.FLOAT
+ ),
+ hostbuf=np.ascontiguousarray(self._tmp_extended_img.T),
+ )
def transfer_to_texture(self, image):
image2 = image
- if not(image.flags["C_CONTIGUOUS"] and image.dtype == np.float32):
+ if not (image.flags["C_CONTIGUOUS"] and image.dtype == np.float32):
image2 = np.ascontiguousarray(image)
- if not(self._use_textures):
+ if not (self._use_textures):
# TODO: create NoneEvent
return self.transfer_to_slice(image2)
# ~ return pyopencl.enqueue_copy(
- # ~ self.queue,
- # ~ self.cl_mem["d_slice"].data,
- # ~ image2,
- # ~ origin=(1, 1),
- # ~ region=image.shape[::-1]
- # ~ )
+ # ~ self.queue,
+ # ~ self.cl_mem["d_slice"].data,
+ # ~ image2,
+ # ~ origin=(1, 1),
+ # ~ region=image.shape[::-1]
+ # ~ )
else:
return pyopencl.enqueue_copy(
- self.queue,
- self.d_image_tex,
- image2,
- origin=(1, 1),
- region=image.shape[::-1]
- )
+ self.queue,
+ self.d_image_tex,
+ image2,
+ origin=(1, 1),
+ region=image.shape[::-1],
+ )
def transfer_device_to_texture(self, d_image):
- if not(self._use_textures):
+ if not (self._use_textures):
# TODO this copy should not be necessary
return self.cpy2d_to_slice(d_image)
else:
@@ -247,7 +265,10 @@ class Projection(OpenclProcessing):
d_image,
offset=0,
origin=(1, 1),
- region=(int(self.shape[1]), int(self.shape[0])) # self.shape[::-1] # pyopencl <= 2015.2
+ region=(
+ int(self.shape[1]),
+ int(self.shape[0]),
+ ), # self.shape[::-1] # pyopencl <= 2015.2
)
def transfer_to_slice(self, image):
@@ -326,7 +347,7 @@ class Projection(OpenclProcessing):
np.int32(self._dimrecx),
np.int32((0, 0)),
np.int32((0, 0)),
- sino_shape_ocl
+ sino_shape_ocl,
)
return self.kernels.cpy2d(self.queue, ndrange, wg, *kernel_args)
@@ -334,7 +355,10 @@ class Projection(OpenclProcessing):
"""
copy a Nx * Ny slice to self.d_slice which is (Nx+2)*(Ny+2)
"""
- ndrange = (int(self.shape[1]), int(self.shape[0])) # self.shape[::-1] # pyopencl < 2015.2
+ ndrange = (
+ int(self.shape[1]),
+ int(self.shape[0]),
+ ) # self.shape[::-1] # pyopencl < 2015.2
wg = None
slice_shape_ocl = np.int32(ndrange)
kernel_args = (
@@ -344,7 +368,7 @@ class Projection(OpenclProcessing):
np.int32(self.shape[1]),
np.int32((1, 1)),
np.int32((0, 0)),
- slice_shape_ocl
+ slice_shape_ocl,
)
return self.kernels.cpy2d(self.queue, ndrange, wg, *kernel_args)
@@ -367,7 +391,7 @@ class Projection(OpenclProcessing):
self.transfer_to_slice(image)
slice_ref = self.cl_mem["d_slice"].data
else:
- if not(self._use_textures):
+ if not (self._use_textures):
slice_ref = self.cl_mem["d_slice"].data
else:
slice_ref = self.d_image_tex
@@ -389,23 +413,17 @@ class Projection(OpenclProcessing):
self.offset_x,
self.offset_y,
np.int32(1), # josephnoclip, 1 by default
- np.int32(self.normalize)
+ np.int32(self.normalize),
)
# Call the kernel
- if not(self._use_textures):
+ if not (self._use_textures):
event_pj = self.kernels.forward_kernel_cpu(
- self.queue,
- self.ndrange,
- self.wg,
- *kernel_args
+ self.queue, self.ndrange, self.wg, *kernel_args
)
else:
event_pj = self.kernels.forward_kernel(
- self.queue,
- self.ndrange,
- self.wg,
- *kernel_args
+ self.queue, self.ndrange, self.wg, *kernel_args
)
events.append(EventDescription("projection", event_pj))
if dst is None:
@@ -413,7 +431,7 @@ class Projection(OpenclProcessing):
ev = pyopencl.enqueue_copy(self.queue, self._ex_sino, self._d_sino)
events.append(EventDescription("copy D->H result", ev))
ev.wait()
- res = np.copy(self._ex_sino[:self.nprojs, :self.dwidth])
+ res = np.copy(self._ex_sino[: self.nprojs, : self.dwidth])
else:
ev = self.cpy2d_to_sino(dst)
events.append(EventDescription("copy D->D result", ev))
diff --git a/src/silx/opencl/reconstruction.py b/src/silx/opencl/reconstruction.py
index 2c84aee..c80a0ef 100644
--- a/src/silx/opencl/reconstruction.py
+++ b/src/silx/opencl/reconstruction.py
@@ -1,5 +1,4 @@
#!/usr/bin/env python
-# coding: utf-8
# /*##########################################################################
#
# Copyright (c) 2016 European Synchrotron Radiation Facility
@@ -25,8 +24,6 @@
# ###########################################################################*/
"""Module for tomographic reconstruction algorithms"""
-from __future__ import absolute_import, print_function, with_statement, division
-
__authors__ = ["P. Paleo"]
__license__ = "MIT"
__date__ = "01/08/2019"
@@ -42,6 +39,7 @@ from .linalg import LinAlg
import pyopencl.array as parray
from pyopencl.elementwise import ElementwiseKernel
+
logger = logging.getLogger(__name__)
cl = pyopencl
@@ -68,13 +66,26 @@ class ReconstructionAlgorithm(OpenclProcessing):
store profiling elements (makes code slightly slower)
"""
- def __init__(self, sino_shape, slice_shape=None, axis_position=None, angles=None,
- ctx=None, devicetype="all", platformid=None, deviceid=None,
- profile=False
- ):
- OpenclProcessing.__init__(self, ctx=ctx, devicetype=devicetype,
- platformid=platformid, deviceid=deviceid,
- profile=profile)
+ def __init__(
+ self,
+ sino_shape,
+ slice_shape=None,
+ axis_position=None,
+ angles=None,
+ ctx=None,
+ devicetype="all",
+ platformid=None,
+ deviceid=None,
+ profile=False,
+ ):
+ OpenclProcessing.__init__(
+ self,
+ ctx=ctx,
+ devicetype=devicetype,
+ platformid=platformid,
+ deviceid=deviceid,
+ profile=profile,
+ )
# Create a backprojector
self.backprojector = Backprojection(
@@ -83,7 +94,7 @@ class ReconstructionAlgorithm(OpenclProcessing):
axis_position=axis_position,
angles=angles,
ctx=self.ctx,
- profile=profile
+ profile=profile,
)
# Create a projector
self.projector = Projection(
@@ -93,7 +104,7 @@ class ReconstructionAlgorithm(OpenclProcessing):
detector_width=self.backprojector.num_bins,
normalize=False,
ctx=self.ctx,
- profile=profile
+ profile=profile,
)
self.sino_shape = sino_shape
self.is_cpu = self.backprojector.is_cpu
@@ -102,32 +113,34 @@ class ReconstructionAlgorithm(OpenclProcessing):
self.d_data.fill(0.0)
self.d_sino = parray.empty_like(self.d_data)
self.d_sino.fill(0.0)
- self.d_x = parray.empty(self.queue,
- self.backprojector.slice_shape,
- dtype=np.float32)
+ self.d_x = parray.empty(
+ self.queue, self.backprojector.slice_shape, dtype=np.float32
+ )
self.d_x.fill(0.0)
self.d_x_old = parray.empty_like(self.d_x)
self.d_x_old.fill(0.0)
- self.add_to_cl_mem({
- "d_data": self.d_data,
- "d_sino": self.d_sino,
- "d_x": self.d_x,
- "d_x_old": self.d_x_old,
- })
+ self.add_to_cl_mem(
+ {
+ "d_data": self.d_data,
+ "d_sino": self.d_sino,
+ "d_x": self.d_x,
+ "d_x_old": self.d_x_old,
+ }
+ )
def proj(self, d_slice, d_sino):
"""
Project d_slice to d_sino
"""
- self.projector.transfer_device_to_texture(d_slice.data) #.wait()
+ self.projector.transfer_device_to_texture(d_slice.data) # .wait()
self.projector.projection(dst=d_sino)
def backproj(self, d_sino, d_slice):
"""
Backproject d_sino to d_slice
"""
- self.backprojector.transfer_device_to_texture(d_sino.data) #.wait()
+ self.backprojector.transfer_device_to_texture(d_sino.data) # .wait()
self.backprojector.backprojection(dst=d_slice)
@@ -156,15 +169,30 @@ class SIRT(ReconstructionAlgorithm):
implementation.
"""
- def __init__(self, sino_shape, slice_shape=None, axis_position=None, angles=None,
- ctx=None, devicetype="all", platformid=None, deviceid=None,
- profile=False
- ):
-
- ReconstructionAlgorithm.__init__(self, sino_shape, slice_shape=slice_shape,
- axis_position=axis_position, angles=angles,
- ctx=ctx, devicetype=devicetype, platformid=platformid,
- deviceid=deviceid, profile=profile)
+ def __init__(
+ self,
+ sino_shape,
+ slice_shape=None,
+ axis_position=None,
+ angles=None,
+ ctx=None,
+ devicetype="all",
+ platformid=None,
+ deviceid=None,
+ profile=False,
+ ):
+ ReconstructionAlgorithm.__init__(
+ self,
+ sino_shape,
+ slice_shape=slice_shape,
+ axis_position=axis_position,
+ angles=angles,
+ ctx=ctx,
+ devicetype=devicetype,
+ platformid=platformid,
+ deviceid=deviceid,
+ profile=profile,
+ )
self.compute_preconditioners()
def compute_preconditioners(self):
@@ -181,26 +209,31 @@ class SIRT(ReconstructionAlgorithm):
# r_{i,i} = 1/(sum_j a_{i,j})
slice_ones = np.ones(self.backprojector.slice_shape, dtype=np.float32)
- R = 1./self.projector.projection(slice_ones) # could be all done on GPU, but I want extra checks
- R[np.logical_not(np.isfinite(R))] = 1. # In the case where the rotation axis is excentred
+ R = 1.0 / self.projector.projection(
+ slice_ones
+ ) # could be all done on GPU, but I want extra checks
+ R[
+ np.logical_not(np.isfinite(R))
+ ] = 1.0 # In the case where the rotation axis is excentred
self.d_R = parray.to_device(self.queue, R)
# c_{j,j} = 1/(sum_i a_{i,j})
sino_ones = np.ones(self.sino_shape, dtype=np.float32)
- C = 1./self.backprojector.backprojection(sino_ones)
- C[np.logical_not(np.isfinite(C))] = 1. # In the case where the rotation axis is excentred
+ C = 1.0 / self.backprojector.backprojection(sino_ones)
+ C[
+ np.logical_not(np.isfinite(C))
+ ] = 1.0 # In the case where the rotation axis is excentred
self.d_C = parray.to_device(self.queue, C)
- self.add_to_cl_mem({
- "d_R": self.d_R,
- "d_C": self.d_C
- })
+ self.add_to_cl_mem({"d_R": self.d_R, "d_C": self.d_C})
# TODO: compute and possibly return the residual
def run(self, data, n_it):
"""
Run n_it iterations of the SIRT algorithm.
"""
- cl.enqueue_copy(self.queue, self.d_data.data, np.ascontiguousarray(data.astype(np.float32)))
+ cl.enqueue_copy(
+ self.queue, self.d_data.data, np.ascontiguousarray(data.astype(np.float32))
+ )
d_x_old = self.d_x_old
d_x = self.d_x
@@ -257,26 +290,44 @@ class TV(ReconstructionAlgorithm):
the AMD opencl implementation.
"""
- def __init__(self, sino_shape, slice_shape=None, axis_position=None, angles=None,
- ctx=None, devicetype="all", platformid=None, deviceid=None,
- profile=False
- ):
- ReconstructionAlgorithm.__init__(self, sino_shape, slice_shape=slice_shape,
- axis_position=axis_position, angles=angles,
- ctx=ctx, devicetype=devicetype, platformid=platformid,
- deviceid=deviceid, profile=profile)
+ def __init__(
+ self,
+ sino_shape,
+ slice_shape=None,
+ axis_position=None,
+ angles=None,
+ ctx=None,
+ devicetype="all",
+ platformid=None,
+ deviceid=None,
+ profile=False,
+ ):
+ ReconstructionAlgorithm.__init__(
+ self,
+ sino_shape,
+ slice_shape=slice_shape,
+ axis_position=axis_position,
+ angles=angles,
+ ctx=ctx,
+ devicetype=devicetype,
+ platformid=platformid,
+ deviceid=deviceid,
+ profile=profile,
+ )
self.compute_preconditioners()
# Create a LinAlg instance
self.linalg = LinAlg(self.backprojector.slice_shape, ctx=self.ctx)
# Positivity constraint
- self.elwise_clamp = ElementwiseKernel(self.ctx, "float *a", "a[i] = max(a[i], 0.0f);")
+ self.elwise_clamp = ElementwiseKernel(
+ self.ctx, "float *a", "a[i] = max(a[i], 0.0f);"
+ )
# Projection onto the L-infinity ball of radius Lambda
self.elwise_proj_linf = ElementwiseKernel(
self.ctx,
"float2* a, float Lambda",
"a[i].x = copysign(min(fabs(a[i].x), Lambda), a[i].x); a[i].y = copysign(min(fabs(a[i].y), Lambda), a[i].y);",
- "elwise_proj_linf"
+ "elwise_proj_linf",
)
# Additional arrays
self.linalg.gradient(self.d_x)
@@ -287,11 +338,13 @@ class TV(ReconstructionAlgorithm):
self.d_p.fill(0)
self.d_q.fill(0)
self.d_tmp.fill(0)
- self.add_to_cl_mem({
- "d_p": self.d_p,
- "d_q": self.d_q,
- "d_tmp": self.d_tmp,
- })
+ self.add_to_cl_mem(
+ {
+ "d_p": self.d_p,
+ "d_q": self.d_q,
+ "d_tmp": self.d_tmp,
+ }
+ )
self.theta = 1.0
@@ -311,30 +364,36 @@ class TV(ReconstructionAlgorithm):
# Compute the diagonal preconditioner "Sigma"
slice_ones = np.ones(self.backprojector.slice_shape, dtype=np.float32)
- Sigma_k = 1./self.projector.projection(slice_ones)
- Sigma_k[np.logical_not(np.isfinite(Sigma_k))] = 1.
+ Sigma_k = 1.0 / self.projector.projection(slice_ones)
+ Sigma_k[np.logical_not(np.isfinite(Sigma_k))] = 1.0
self.d_Sigma_k = parray.to_device(self.queue, Sigma_k)
self.d_Sigma_kp1 = self.d_Sigma_k + 1 # TODO: memory vs computation
- self.Sigma_grad = 1/2.0 # For discrete gradient, sum|D_i,j| = 2 along lines or cols
+ self.Sigma_grad = (
+ 1 / 2.0
+ ) # For discrete gradient, sum|D_i,j| = 2 along lines or cols
# Compute the diagonal preconditioner "Tau"
sino_ones = np.ones(self.sino_shape, dtype=np.float32)
C = self.backprojector.backprojection(sino_ones)
- Tau = 1./(C + 2.)
+ Tau = 1.0 / (C + 2.0)
self.d_Tau = parray.to_device(self.queue, Tau)
- self.add_to_cl_mem({
- "d_Sigma_k": self.d_Sigma_k,
- "d_Sigma_kp1": self.d_Sigma_kp1,
- "d_Tau": self.d_Tau
- })
+ self.add_to_cl_mem(
+ {
+ "d_Sigma_k": self.d_Sigma_k,
+ "d_Sigma_kp1": self.d_Sigma_kp1,
+ "d_Tau": self.d_Tau,
+ }
+ )
def run(self, data, n_it, Lambda, pos_constraint=False):
"""
Run n_it iterations of the TV-regularized reconstruction,
with the regularization parameter Lambda.
"""
- cl.enqueue_copy(self.queue, self.d_data.data, np.ascontiguousarray(data.astype(np.float32)))
+ cl.enqueue_copy(
+ self.queue, self.d_data.data, np.ascontiguousarray(data.astype(np.float32))
+ )
d_x = self.d_x
d_x_old = self.d_x_old
@@ -351,7 +410,7 @@ class TV(ReconstructionAlgorithm):
for k in range(0, n_it):
# Update primal variables
d_x_old[:] = d_x[:]
- #~ x = x + Tau*div(p) - Tau*Kadj(q)
+ # ~ x = x + Tau*div(p) - Tau*Kadj(q)
self.backproj(d_q, d_tmp)
self.linalg.divergence(d_p)
# TODO: this in less than three ops (one kernel ?)
@@ -363,20 +422,20 @@ class TV(ReconstructionAlgorithm):
self.elwise_clamp(d_x)
# Update dual variables
- #~ p = proj_linf(p + Sigma_grad*gradient(x + theta*(x - x_old)), Lambda)
+ # ~ p = proj_linf(p + Sigma_grad*gradient(x + theta*(x - x_old)), Lambda)
d_tmp[:] = d_x[:]
# FIXME: mul_add is out of place, put an equivalent thing in linalg...
- #~ d_tmp.mul_add(1 + theta, d_x_old, -theta)
- d_tmp *= 1+self.theta
- d_tmp -= self.theta*d_x_old
+ # ~ d_tmp.mul_add(1 + theta, d_x_old, -theta)
+ d_tmp *= 1 + self.theta
+ d_tmp -= self.theta * d_x_old
self.linalg.gradient(d_tmp)
# TODO: out of place mul_add
- #~ d_p.mul_add(1, L.cl_mem["d_gradient"], Sigma_grad)
+ # ~ d_p.mul_add(1, L.cl_mem["d_gradient"], Sigma_grad)
self.linalg.cl_mem["d_gradient"] *= self.Sigma_grad
d_p += self.linalg.cl_mem["d_gradient"]
self.elwise_proj_linf(d_p, Lambda)
- #~ q = (q + Sigma_k*K(x + theta*(x - x_old)) - Sigma_k*data)/(1.0 + Sigma_k)
+ # ~ q = (q + Sigma_k*K(x + theta*(x - x_old)) - Sigma_k*data)/(1.0 + Sigma_k)
self.proj(d_tmp, d_sino)
# TODO: this in less instructions
d_sino -= self.d_data
diff --git a/src/silx/opencl/setup.py b/src/silx/opencl/setup.py
deleted file mode 100644
index 10fb1be..0000000
--- a/src/silx/opencl/setup.py
+++ /dev/null
@@ -1,48 +0,0 @@
-# coding: utf-8
-#
-# Copyright (C) 2016-2017 European Synchrotron Radiation Facility
-#
-# Permission is hereby granted, free of charge, to any person obtaining a copy
-# of this software and associated documentation files (the "Software"), to deal
-# in the Software without restriction, including without limitation the rights
-# to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
-# copies of the Software, and to permit persons to whom the Software is
-# furnished to do so, subject to the following conditions:
-#
-# The above copyright notice and this permission notice shall be included in
-# all copies or substantial portions of the Software.
-#
-# THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
-# IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
-# FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
-# AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
-# LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
-# OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN
-# THE SOFTWARE.
-#
-
-from __future__ import division
-
-__contact__ = "jerome.kieffer@esrf.eu"
-__license__ = "MIT"
-__copyright__ = "European Synchrotron Radiation Facility, Grenoble, France"
-__authors__ = ["J. Kieffer"]
-__date__ = "16/10/2017"
-
-import os.path
-from numpy.distutils.misc_util import Configuration
-
-
-def configuration(parent_package='', top_path=None):
- config = Configuration('opencl', parent_package, top_path)
- path = os.path.dirname(os.path.abspath(__file__))
- if os.path.exists(os.path.join(path, 'sift')):
- config.add_subpackage('sift')
- config.add_subpackage('codec')
- config.add_subpackage('test')
- return config
-
-
-if __name__ == "__main__":
- from numpy.distutils.core import setup
- setup(configuration=configuration)
diff --git a/src/silx/opencl/sinofilter.py b/src/silx/opencl/sinofilter.py
index d608744..fc447de 100644
--- a/src/silx/opencl/sinofilter.py
+++ b/src/silx/opencl/sinofilter.py
@@ -1,8 +1,7 @@
#!/usr/bin/env python
-# coding: utf-8
# /*##########################################################################
#
-# Copyright (c) 2016-2019 European Synchrotron Radiation Facility
+# Copyright (c) 2016-2023 European Synchrotron Radiation Facility
#
# Permission is hereby granted, free of charge, to any person obtaining a copy
# of this software and associated documentation files (the "Software"), to deal
@@ -25,8 +24,6 @@
# ###########################################################################*/
"""Module for sinogram filtering on CPU/GPU."""
-from __future__ import absolute_import, print_function, with_statement, division
-
__authors__ = ["P. Paleo"]
__license__ = "MIT"
__date__ = "07/06/2019"
@@ -41,8 +38,6 @@ from .processing import OpenclProcessing
from ..math.fft.clfft import CLFFT, __have_clfft__
from ..math.fft.npfft import NPFFT
from ..image.tomography import generate_powers, get_next_power, compute_fourier_filter
-from ..utils.deprecation import deprecated
-
class SinoFilter(OpenclProcessing):
@@ -53,12 +48,21 @@ class SinoFilter(OpenclProcessing):
- In 2D: (n_a, d_x): n_a filterings (1D FFT of size d_x)
- In 3D: (n_z, n_a, d_x): n_z*n_a filterings (1D FFT of size d_x)
"""
+
kernel_files = ["array_utils.cl"]
powers = generate_powers()
- def __init__(self, sino_shape, filter_name=None, ctx=None,
- devicetype="all", platformid=None, deviceid=None,
- profile=False, extra_options=None):
+ def __init__(
+ self,
+ sino_shape,
+ filter_name=None,
+ ctx=None,
+ devicetype="all",
+ platformid=None,
+ deviceid=None,
+ profile=False,
+ extra_options=None,
+ ):
"""Constructor of OpenCL FFT-Convolve.
:param sino_shape: shape of the sinogram.
@@ -75,9 +79,14 @@ class SinoFilter(OpenclProcessing):
:param dict extra_options: Advanced extra options.
Current options are: cutoff, use_numpy_fft
"""
- OpenclProcessing.__init__(self, ctx=ctx, devicetype=devicetype,
- platformid=platformid, deviceid=deviceid,
- profile=profile)
+ OpenclProcessing.__init__(
+ self,
+ ctx=ctx,
+ devicetype=devicetype,
+ platformid=platformid,
+ deviceid=deviceid,
+ profile=profile,
+ )
self._init_extra_options(extra_options)
self._calculate_shapes(sino_shape)
@@ -95,8 +104,9 @@ class SinoFilter(OpenclProcessing):
if self.ndim == 2:
n_angles, dwidth = sino_shape
else:
- raise ValueError("Invalid sinogram number of dimensions: "
- "expected 2 dimensions")
+ raise ValueError(
+ "Invalid sinogram number of dimensions: " "expected 2 dimensions"
+ )
self.sino_shape = sino_shape
self.n_angles = n_angles
self.dwidth = dwidth
@@ -113,14 +123,14 @@ class SinoFilter(OpenclProcessing):
Current options are: cutoff,
"""
self.extra_options = {
- "cutoff": 1.,
+ "cutoff": 1.0,
"use_numpy_fft": False,
}
if extra_options is not None:
self.extra_options.update(extra_options)
def _init_fft(self):
- if __have_clfft__ and not(self.extra_options["use_numpy_fft"]):
+ if __have_clfft__ and not (self.extra_options["use_numpy_fft"]):
self.fft_backend = "opencl"
self.fft = CLFFT(
self.sino_padded_shape,
@@ -130,17 +140,22 @@ class SinoFilter(OpenclProcessing):
)
else:
self.fft_backend = "numpy"
- print("The gpyfft module was not found. The Fourier transforms "
- "will be done on CPU. For more performances, it is advised "
- "to install gpyfft.""")
+ print(
+ "The gpyfft module was not found. The Fourier transforms "
+ "will be done on CPU. For more performances, it is advised "
+ "to install gpyfft."
+ ""
+ )
self.fft = NPFFT(
template=np.zeros(self.sino_padded_shape, "f"),
axes=(-1,),
)
def _allocate_memory(self):
- self.d_filter_f = parray.zeros(self.queue, (self.sino_f_shape[-1],), np.complex64)
- self.is_cpu = (self.device.type == "CPU")
+ self.d_filter_f = parray.zeros(
+ self.queue, (self.sino_f_shape[-1],), np.complex64
+ )
+ self.is_cpu = self.device.type == "CPU"
# These are already allocated by FFT() if using the opencl backend
if self.fft_backend == "opencl":
self.d_sino_padded = self.fft.data_in
@@ -163,7 +178,9 @@ class SinoFilter(OpenclProcessing):
self.dwidth_padded,
self.filter_name,
cutoff=self.extra_options["cutoff"],
- )[:self.dwidth_padded // 2 + 1] # R2C
+ )[
+ : self.dwidth_padded // 2 + 1
+ ] # R2C
self.set_filter(filter_f, normalize=True)
def set_filter(self, h_filt, normalize=True):
@@ -184,7 +201,7 @@ class SinoFilter(OpenclProcessing):
"""
% (self.sino_f_shape[-1], h_filt.size)
)
- if not(np.iscomplexobj(h_filt)):
+ if not (np.iscomplexobj(h_filt)):
print("Warning: expected a complex Fourier filter")
self.filter_f = h_filt
if normalize:
@@ -195,24 +212,27 @@ class SinoFilter(OpenclProcessing):
def _init_kernels(self):
OpenclProcessing.compile_kernels(self, self.kernel_files)
h, w = self.d_sino_f.shape
- self.mult_kern_args = (self.queue, (int(w), (int(h))), None,
- self.d_sino_f.data,
- self.d_filter_f.data,
- np.int32(w),
- np.int32(h))
+ self.mult_kern_args = (
+ self.queue,
+ (int(w), (int(h))),
+ None,
+ self.d_sino_f.data,
+ self.d_filter_f.data,
+ np.int32(w),
+ np.int32(h),
+ )
def check_array(self, arr):
if arr.dtype != np.float32:
raise ValueError("Expected data type = numpy.float32")
if arr.shape != self.sino_shape:
- raise ValueError("Expected sinogram shape %s, got %s" %
- (self.sino_shape, arr.shape))
- if not(isinstance(arr, np.ndarray) or isinstance(arr, parray.Array)):
- raise ValueError("Expected either numpy.ndarray or "
- "pyopencl.array.Array")
-
- def copy2d(self, dst, src, transfer_shape, dst_offset=(0, 0),
- src_offset=(0, 0)):
+ raise ValueError(
+ "Expected sinogram shape %s, got %s" % (self.sino_shape, arr.shape)
+ )
+ if not (isinstance(arr, np.ndarray) or isinstance(arr, parray.Array)):
+ raise ValueError("Expected either numpy.ndarray or " "pyopencl.array.Array")
+
+ def copy2d(self, dst, src, transfer_shape, dst_offset=(0, 0), src_offset=(0, 0)):
"""
:param dst:
@@ -222,18 +242,23 @@ class SinoFilter(OpenclProcessing):
:param src_offset:
"""
shape = tuple(int(i) for i in transfer_shape[::-1])
- ev = self.kernels.cpy2d(self.queue, shape, None,
- dst.data,
- src.data,
- np.int32(dst.shape[1]),
- np.int32(src.shape[1]),
- np.int32(dst_offset),
- np.int32(src_offset),
- np.int32(transfer_shape[::-1]))
+ ev = self.kernels.cpy2d(
+ self.queue,
+ shape,
+ None,
+ dst.data,
+ src.data,
+ np.int32(dst.shape[1]),
+ np.int32(src.shape[1]),
+ np.int32(dst_offset),
+ np.int32(src_offset),
+ np.int32(transfer_shape[::-1]),
+ )
ev.wait()
- def copy2d_host(self, dst, src, transfer_shape, dst_offset=(0, 0),
- src_offset=(0, 0)):
+ def copy2d_host(
+ self, dst, src, transfer_shape, dst_offset=(0, 0), src_offset=(0, 0)
+ ):
"""
:param dst:
@@ -245,7 +270,9 @@ class SinoFilter(OpenclProcessing):
s = transfer_shape
do = dst_offset
so = src_offset
- dst[do[0]:do[0] + s[0], do[1]:do[1] + s[1]] = src[so[0]:so[0] + s[0], so[1]:so[1] + s[1]]
+ dst[do[0] : do[0] + s[0], do[1] : do[1] + s[1]] = src[
+ so[0] : so[0] + s[0], so[1] : so[1] + s[1]
+ ]
def _prepare_input_sino(self, sino):
"""
@@ -272,7 +299,7 @@ class SinoFilter(OpenclProcessing):
self.d_sino_padded.finish() # should not be required here
else:
# Numpy backend: FFT/mult/IFFT are done on host.
- if not(isinstance(sino, np.ndarray)):
+ if not (isinstance(sino, np.ndarray)):
# Numpy backend + pyopencl input: need to copy D->H
self.tmp_sino_host[:] = sino[:]
h_sino_ref = self.tmp_sino_host
@@ -296,9 +323,11 @@ class SinoFilter(OpenclProcessing):
# As pyopencl does not support rectangular copies, we first have
# to call a kernel doing rectangular copy D->D, then do a copy
# D->H.
- self.copy2d(dst=self.tmp_sino_device,
- src=self.d_sino_padded,
- transfer_shape=self.sino_shape)
+ self.copy2d(
+ dst=self.tmp_sino_device,
+ src=self.d_sino_padded,
+ transfer_shape=self.sino_shape,
+ )
if self.is_cpu:
self.tmp_sino_device.finish() # should not be required here
res[:] = self.tmp_sino_device.get()[:]
@@ -309,11 +338,13 @@ class SinoFilter(OpenclProcessing):
if self.is_cpu:
res.finish() # should not be required here
else:
- if not(isinstance(res, np.ndarray)):
+ if not (isinstance(res, np.ndarray)):
# Numpy backend + pyopencl output: rect copy H->H + copy H->D
- self.copy2d_host(dst=self.tmp_sino_host,
- src=self.d_sino_padded,
- transfer_shape=self.sino_shape)
+ self.copy2d_host(
+ dst=self.tmp_sino_host,
+ src=self.d_sino_padded,
+ transfer_shape=self.sino_shape,
+ )
res[:] = self.tmp_sino_host[:]
else:
# Numpy backend + numpy output: rect copy H->H
@@ -334,9 +365,7 @@ class SinoFilter(OpenclProcessing):
def _multiply_fourier(self):
if self.fft_backend == "opencl":
# Everything is on device. Call the multiplication kernel.
- ev = self.kernels.inplace_complex_mul_2Dby1D(
- *self.mult_kern_args
- )
+ ev = self.kernels.inplace_complex_mul_2Dby1D(*self.mult_kern_args)
ev.wait()
if self.is_cpu:
self.d_sino_f.finish() # should not be required here
@@ -379,57 +408,3 @@ class SinoFilter(OpenclProcessing):
# ~ return output
__call__ = filter_sino
-
-
-
-
-# -------------------
-# - Compatibility -
-# -------------------
-
-
-def nextpow2(N):
- p = 1
- while p < N:
- p *= 2
- return p
-
-
-@deprecated(replacement="Backprojection.sino_filter", since_version="0.10")
-def fourier_filter(sino, filter_=None, fft_size=None):
- """Simple np based implementation of fourier space filter.
- This function is deprecated, please use silx.opencl.sinofilter.SinoFilter.
-
- :param sino: of shape shape = (num_projs, num_bins)
- :param filter: filter function to apply in fourier space
- :fft_size: size on which perform the fft. May be larger than the sino array
- :return: filtered sinogram
- """
- assert sino.ndim == 2
- num_projs, num_bins = sino.shape
- if fft_size is None:
- fft_size = nextpow2(num_bins * 2 - 1)
- else:
- assert fft_size >= num_bins
- if fft_size == num_bins:
- sino_zeropadded = sino.astype(np.float32)
- else:
- sino_zeropadded = np.zeros((num_projs, fft_size),
- dtype=np.complex64)
- sino_zeropadded[:, :num_bins] = sino.astype(np.float32)
-
- if filter_ is None:
- h = np.zeros(fft_size, dtype=np.float32)
- L2 = fft_size // 2 + 1
- h[0] = 1 / 4.
- j = np.linspace(1, L2, L2 // 2, False)
- h[1:L2:2] = -1. / (np.pi ** 2 * j ** 2)
- h[L2:] = np.copy(h[1:L2 - 1][::-1])
- filter_ = np.fft.fft(h).astype(np.complex64)
-
- # Linear convolution
- sino_f = np.fft.fft(sino, fft_size)
- sino_f = sino_f * filter_
- sino_filtered = np.fft.ifft(sino_f)[:, :num_bins].real
-
- return np.ascontiguousarray(sino_filtered.real, dtype=np.float32)
diff --git a/src/silx/opencl/sparse.py b/src/silx/opencl/sparse.py
index 514589a..9baa3a0 100644
--- a/src/silx/opencl/sparse.py
+++ b/src/silx/opencl/sparse.py
@@ -1,5 +1,4 @@
#!/usr/bin/env python
-# coding: utf-8
# /*##########################################################################
#
# Copyright (c) 2019 European Synchrotron Radiation Facility
@@ -25,8 +24,6 @@
# ###########################################################################*/
"""Module for data sparsification on CPU/GPU."""
-from __future__ import absolute_import, print_function, with_statement, division
-
__authors__ = ["P. Paleo"]
__license__ = "MIT"
__date__ = "07/06/2019"
@@ -38,11 +35,13 @@ from pyopencl.scan import GenericScanKernel
from pyopencl.tools import dtype_to_ctype
from .common import pyopencl as cl
from .processing import OpenclProcessing, EventDescription, BufferDescription
+
mf = cl.mem_flags
CSRData = namedtuple("CSRData", ["data", "indices", "indptr"])
+
def tuple_to_csrdata(arrs):
"""
Converts a 3-tuple to a CSRData namedtuple.
@@ -52,13 +51,23 @@ def tuple_to_csrdata(arrs):
return CSRData(data=arrs[0], indices=arrs[1], indptr=arrs[2])
-
class CSR(OpenclProcessing):
kernel_files = ["sparse.cl"]
- def __init__(self, shape, dtype="f", max_nnz=None, idx_dtype=numpy.int32,
- ctx=None, devicetype="all", platformid=None, deviceid=None,
- block_size=None, memory=None, profile=False):
+ def __init__(
+ self,
+ shape,
+ dtype="f",
+ max_nnz=None,
+ idx_dtype=numpy.int32,
+ ctx=None,
+ devicetype="all",
+ platformid=None,
+ deviceid=None,
+ block_size=None,
+ memory=None,
+ profile=False,
+ ):
"""
Compute Compressed Sparse Row format of an image (2D matrix).
It is designed to be compatible with scipy.sparse.csr_matrix.
@@ -80,10 +89,16 @@ class CSR(OpenclProcessing):
for information on the other parameters.
"""
- OpenclProcessing.__init__(self, ctx=ctx, devicetype=devicetype,
- platformid=platformid, deviceid=deviceid,
- block_size=block_size, memory=memory,
- profile=profile)
+ OpenclProcessing.__init__(
+ self,
+ ctx=ctx,
+ devicetype=devicetype,
+ platformid=platformid,
+ deviceid=deviceid,
+ block_size=block_size,
+ memory=memory,
+ profile=profile,
+ )
self._set_parameters(shape, dtype, max_nnz, idx_dtype)
self._allocate_memory()
self._setup_kernels()
@@ -96,23 +111,23 @@ class CSR(OpenclProcessing):
self.shape = shape
self.size = numpy.prod(shape)
self._set_idx_dtype(idx_dtype)
- assert len(shape) == 2 #
+ assert len(shape) == 2 #
if max_nnz is None:
- self.max_nnz = numpy.prod(shape) # worst case
+ self.max_nnz = numpy.prod(shape) # worst case
else:
self.max_nnz = int(max_nnz)
self._set_dtype(dtype)
-
def _set_idx_dtype(self, idx_dtype):
idx_dtype = numpy.dtype(idx_dtype)
if idx_dtype.kind not in ["i", "u"]:
raise ValueError("Not an integer type: %s" % idx_dtype)
# scan value type must have size divisible by 4 bytes
if idx_dtype.itemsize % 4 != 0:
- raise ValueError("Due to an internal pyopencl limitation, idx_dtype type must have size divisible by 4 bytes")
- self.indice_dtype = idx_dtype #
-
+ raise ValueError(
+ "Due to an internal pyopencl limitation, idx_dtype type must have size divisible by 4 bytes"
+ )
+ self.indice_dtype = idx_dtype #
def _set_dtype(self, dtype):
self.dtype = numpy.dtype(dtype)
@@ -122,42 +137,44 @@ class CSR(OpenclProcessing):
self._c_zero_str = "0.0f"
elif self.dtype == numpy.dtype(numpy.float64):
self._c_zero_str = "0.0"
- else: # assuming integer
+ else: # assuming integer
self._c_zero_str = "0"
self.c_dtype = dtype_to_ctype(self.dtype)
self.idx_c_dtype = dtype_to_ctype(self.indice_dtype)
-
def _allocate_memory(self):
- self.is_cpu = (self.device.type == "CPU") # move to OpenclProcessing ?
+ self.is_cpu = self.device.type == "CPU" # move to OpenclProcessing ?
self.buffers = [
BufferDescription("array", (self.size,), self.dtype, mf.READ_ONLY),
BufferDescription("data", (self.max_nnz,), self.dtype, mf.READ_WRITE),
- BufferDescription("indices", (self.max_nnz,), self.indice_dtype, mf.READ_WRITE),
- BufferDescription("indptr", (self.shape[0]+1,), self.indice_dtype, mf.READ_WRITE),
+ BufferDescription(
+ "indices", (self.max_nnz,), self.indice_dtype, mf.READ_WRITE
+ ),
+ BufferDescription(
+ "indptr", (self.shape[0] + 1,), self.indice_dtype, mf.READ_WRITE
+ ),
]
self.allocate_buffers(use_array=True)
for arr_name in ["array", "data", "indices", "indptr"]:
setattr(self, arr_name, self.cl_mem[arr_name])
- self.cl_mem[arr_name].fill(0) # allocate_buffers() uses empty()
+ self.cl_mem[arr_name].fill(0) # allocate_buffers() uses empty()
self._old_array = self.array
self._old_data = self.data
self._old_indices = self.indices
self._old_indptr = self.indptr
-
def _setup_kernels(self):
self._setup_compaction_kernel()
self._setup_decompaction_kernel()
-
def _setup_compaction_kernel(self):
kernel_signature = str(
"__global %s *data, \
__global %s *data_compacted, \
__global %s *indices, \
__global %s* indptr \
- """ % (self.c_dtype, self.c_dtype, self.idx_c_dtype, self.idx_c_dtype)
+ "
+ "" % (self.c_dtype, self.c_dtype, self.idx_c_dtype, self.idx_c_dtype)
)
if self.dtype.kind == "f":
map_nonzero_expr = "(fabs(data[i]) > %s) ? 1 : 0" % self._c_zero_str
@@ -167,10 +184,12 @@ class CSR(OpenclProcessing):
raise ValueError("Unknown data type")
self.scan_kernel = GenericScanKernel(
- self.ctx, self.indice_dtype,
+ self.ctx,
+ self.indice_dtype,
arguments=kernel_signature,
input_expr=map_nonzero_expr,
- scan_expr="a+b", neutral="0",
+ scan_expr="a+b",
+ neutral="0",
output_statement="""
// item is the running sum of input_expr(i), i.e the cumsum of "nonzero"
if (prev_item != item) {
@@ -186,7 +205,6 @@ class CSR(OpenclProcessing):
preamble="#define GET_INDEX(i) (i % IMAGE_WIDTH)",
)
-
def _setup_decompaction_kernel(self):
OpenclProcessing.compile_kernels(
self,
@@ -195,18 +213,17 @@ class CSR(OpenclProcessing):
"-DIMAGE_WIDTH=%d" % self.shape[1],
"-DDTYPE=%s" % self.c_dtype,
"-DIDX_DTYPE=%s" % self.idx_c_dtype,
- ]
+ ],
)
device = self.ctx.devices[0]
wg_x = min(
device.max_work_group_size,
32,
- self.kernels.max_workgroup_size("densify_csr")
+ self.kernels.max_workgroup_size("densify_csr"),
)
self._decomp_wg = (wg_x, 1)
self._decomp_grid = (self._decomp_wg[0], self.shape[0])
-
# --------------------------------------------------------------------------
# -------------------------- Array utils -----------------------------------
# --------------------------------------------------------------------------
@@ -222,7 +239,6 @@ class CSR(OpenclProcessing):
assert arr.size == self.size
assert arr.dtype == self.dtype
-
# TODO handle pyopencl Buffer
def check_sparse_arrays(self, csr_data):
"""
@@ -237,12 +253,11 @@ class CSR(OpenclProcessing):
assert arr.ndim == 1
assert csr_data.data.size <= self.max_nnz
assert csr_data.indices.size <= self.max_nnz
- assert csr_data.indptr.size == self.shape[0]+1
+ assert csr_data.indptr.size == self.shape[0] + 1
assert csr_data.data.dtype == self.dtype
assert csr_data.indices.dtype == self.indice_dtype
assert csr_data.indptr.dtype == self.indice_dtype
-
def set_array(self, arr):
"""
Set the provided array as the current context 2D matrix.
@@ -262,23 +277,25 @@ class CSR(OpenclProcessing):
else:
raise ValueError("Expected pyopencl array or numpy array")
-
def set_sparse_arrays(self, csr_data):
if csr_data is None:
return
self.check_sparse_arrays(csr_data)
- for name, arr in {"data": csr_data.data, "indices": csr_data.indices, "indptr": csr_data.indptr}.items():
+ for name, arr in {
+ "data": csr_data.data,
+ "indices": csr_data.indices,
+ "indptr": csr_data.indptr,
+ }.items():
# The current array is a device array. Don't copy, use it directly
if isinstance(arr, parray.Array):
setattr(self, "_old_" + name, getattr(self, name))
setattr(self, name, arr)
# The current array is a numpy.ndarray: copy H2D
elif isinstance(arr, numpy.ndarray):
- getattr(self, name)[:arr.size] = arr[:]
+ getattr(self, name)[: arr.size] = arr[:]
else:
raise ValueError("Unsupported array type: %s" % type(arr))
-
def _recover_arrays_references(self):
"""
Recover the previous arrays references, and return the references of the
@@ -293,7 +310,6 @@ class CSR(OpenclProcessing):
setattr(self, name, getattr(self, "_old_" + name))
return array, (data, indices, indptr)
-
def get_sparse_arrays(self, output):
"""
Get the 2D dense array of the current context.
@@ -314,7 +330,6 @@ class CSR(OpenclProcessing):
res = output
return res
-
def get_array(self, output):
if output is None:
res = self.array.get().reshape(self.shape)
@@ -344,7 +359,7 @@ class CSR(OpenclProcessing):
self.indices,
self.indptr,
)
- #~ evt.wait()
+ # ~ evt.wait()
self.profile_add(evt, "sparsification kernel")
res = self.get_sparse_arrays(output)
self._recover_arrays_references()
@@ -355,9 +370,7 @@ class CSR(OpenclProcessing):
# --------------------------------------------------------------------------
def densify(self, data, indices, indptr, output=None):
- self.set_sparse_arrays(
- CSRData(data=data, indices=indices, indptr=indptr)
- )
+ self.set_sparse_arrays(CSRData(data=data, indices=indices, indptr=indptr))
self.set_array(output)
evt = self.kernels.densify_csr(
self.queue,
@@ -369,9 +382,8 @@ class CSR(OpenclProcessing):
self.array.data,
numpy.int32(self.shape[0]),
)
- #~ evt.wait()
+ # ~ evt.wait()
self.profile_add(evt, "desparsification kernel")
res = self.get_array(output)
self._recover_arrays_references()
return res
-
diff --git a/src/silx/opencl/statistics.py b/src/silx/opencl/statistics.py
index a96ee33..26d23e6 100644
--- a/src/silx/opencl/statistics.py
+++ b/src/silx/opencl/statistics.py
@@ -1,9 +1,8 @@
-# -*- coding: utf-8 -*-
#
# Project: SILX
# https://github.com/silx-kit/silx
#
-# Copyright (C) 2012-2019 European Synchrotron Radiation Facility, Grenoble, France
+# Copyright (C) 2012-2023 European Synchrotron Radiation Facility, Grenoble, France
#
# Principal author: Jérôme Kieffer (Jerome.Kieffer@ESRF.eu)
#
@@ -37,7 +36,7 @@ __contact__ = "jerome.kieffer@esrf.fr"
import logging
import numpy
-from collections import OrderedDict, namedtuple
+from collections import namedtuple
from math import sqrt
from .common import pyopencl
@@ -47,6 +46,7 @@ from .utils import concatenate_cl_kernel
if pyopencl:
mf = pyopencl.mem_flags
from pyopencl.reduction import ReductionKernel
+
try:
from pyopencl import cltypes
except ImportError:
@@ -59,8 +59,9 @@ else:
raise ImportError("pyopencl is not installed")
logger = logging.getLogger(__name__)
-StatResults = namedtuple("StatResults", ["min", "max", "cnt", "sum", "mean",
- "var", "std"])
+StatResults = namedtuple(
+ "StatResults", ["min", "max", "cnt", "sum", "mean", "var", "std"]
+)
zero8 = "(float8)(FLT_MAX, -FLT_MAX, 0.0f, 0.0f, 0.0f, 0.0f, 0.0f, 0.0f)"
# min max cnt cnt_e sum sum_e var var_e
@@ -82,33 +83,52 @@ class Statistics(OpenclProcessing):
Switch on profiling to be able to profile at the kernel level,
store profiling elements (makes code slightly slower)
"""
+
buffers = [
BufferDescription("raw", 1, numpy.float32, mf.READ_ONLY),
BufferDescription("converted", 1, numpy.float32, mf.READ_WRITE),
]
kernel_files = ["preprocess.cl"]
- mapping = {numpy.int8: "s8_to_float",
- numpy.uint8: "u8_to_float",
- numpy.int16: "s16_to_float",
- numpy.uint16: "u16_to_float",
- numpy.uint32: "u32_to_float",
- numpy.int32: "s32_to_float"}
-
- def __init__(self, size=None, dtype=None, template=None,
- ctx=None, devicetype="all", platformid=None, deviceid=None,
- block_size=None, profile=False
- ):
- OpenclProcessing.__init__(self, ctx=ctx, devicetype=devicetype,
- platformid=platformid, deviceid=deviceid,
- block_size=block_size, profile=profile)
+ mapping = {
+ numpy.int8: "s8_to_float",
+ numpy.uint8: "u8_to_float",
+ numpy.int16: "s16_to_float",
+ numpy.uint16: "u16_to_float",
+ numpy.uint32: "u32_to_float",
+ numpy.int32: "s32_to_float",
+ }
+
+ def __init__(
+ self,
+ size=None,
+ dtype=None,
+ template=None,
+ ctx=None,
+ devicetype="all",
+ platformid=None,
+ deviceid=None,
+ block_size=None,
+ profile=False,
+ ):
+ OpenclProcessing.__init__(
+ self,
+ ctx=ctx,
+ devicetype=devicetype,
+ platformid=platformid,
+ deviceid=deviceid,
+ block_size=block_size,
+ profile=profile,
+ )
self.size = size
self.dtype = dtype
if template is not None:
self.size = template.size
self.dtype = template.dtype
- self.buffers = [BufferDescription(i.name, i.size * self.size, i.dtype, i.flags)
- for i in self.__class__.buffers]
+ self.buffers = [
+ BufferDescription(i.name, i.size * self.size, i.dtype, i.flags)
+ for i in self.__class__.buffers
+ ]
self.allocate_buffers(use_array=True)
self.compile_kernels()
@@ -117,43 +137,54 @@ class Statistics(OpenclProcessing):
def set_kernel_arguments(self):
"""Parametrize all kernel arguments"""
for val in self.mapping.values():
- self.cl_kernel_args[val] = OrderedDict(((i, self.cl_mem[i]) for i in ("raw", "converted")))
+ self.cl_kernel_args[val] = dict(
+ ((i, self.cl_mem[i]) for i in ("raw", "converted"))
+ )
def compile_kernels(self):
"""Compile the kernel"""
- OpenclProcessing.compile_kernels(self,
- self.kernel_files,
- "-D NIMAGE=%i" % self.size)
+ OpenclProcessing.compile_kernels(
+ self, self.kernel_files, "-D NIMAGE=%i" % self.size
+ )
compiler_options = self.get_compiler_options(x87_volatile=True)
src = concatenate_cl_kernel(("doubleword.cl", "statistics.cl"))
- self.reduction_comp = ReductionKernel(self.ctx,
- dtype_out=float8,
- neutral=zero8,
- map_expr="map_statistics(data, i)",
- reduce_expr="reduce_statistics(a,b)",
- arguments="__global float *data",
- preamble=src,
- options=compiler_options)
- self.reduction_simple = ReductionKernel(self.ctx,
- dtype_out=float8,
- neutral=zero8,
- map_expr="map_statistics(data, i)",
- reduce_expr="reduce_statistics_simple(a,b)",
- arguments="__global float *data",
- preamble=src,
- options=compiler_options)
+ self.reduction_comp = ReductionKernel(
+ self.ctx,
+ dtype_out=float8,
+ neutral=zero8,
+ map_expr="map_statistics(data, i)",
+ reduce_expr="reduce_statistics(a,b)",
+ arguments="__global float *data",
+ preamble=src,
+ options=compiler_options,
+ )
+ self.reduction_simple = ReductionKernel(
+ self.ctx,
+ dtype_out=float8,
+ neutral=zero8,
+ map_expr="map_statistics(data, i)",
+ reduce_expr="reduce_statistics_simple(a,b)",
+ arguments="__global float *data",
+ preamble=src,
+ options=compiler_options,
+ )
if "cl_khr_fp64" in self.device.extensions:
- self.reduction_double = ReductionKernel(self.ctx,
- dtype_out=float8,
- neutral=zero8,
- map_expr="map_statistics(data, i)",
- reduce_expr="reduce_statistics_double(a,b)",
- arguments="__global float *data",
- preamble=src,
- options=compiler_options)
+ self.reduction_double = ReductionKernel(
+ self.ctx,
+ dtype_out=float8,
+ neutral=zero8,
+ map_expr="map_statistics(data, i)",
+ reduce_expr="reduce_statistics_double(a,b)",
+ arguments="__global float *data",
+ preamble=src,
+ options=compiler_options,
+ )
else:
- logger.info("Device %s does not support double-precision arithmetics, fall-back on compensated one", self.device)
+ logger.info(
+ "Device %s does not support double-precision arithmetics, fall-back on compensated one",
+ self.device,
+ )
self.reduction_double = self.reduction_comp
def send_buffer(self, data, dest):
@@ -168,23 +199,27 @@ class Statistics(OpenclProcessing):
dest_type = numpy.dtype([i.dtype for i in self.buffers if i.name == dest][0])
events = []
if (data.dtype == dest_type) or (data.dtype.itemsize > dest_type.itemsize):
- copy_image = pyopencl.enqueue_copy(self.queue,
- self.cl_mem[dest].data,
- numpy.ascontiguousarray(data, dest_type))
+ copy_image = pyopencl.enqueue_copy(
+ self.queue,
+ self.cl_mem[dest].data,
+ numpy.ascontiguousarray(data, dest_type),
+ )
events.append(EventDescription("copy H->D %s" % dest, copy_image))
else:
- copy_image = pyopencl.enqueue_copy(self.queue,
- self.cl_mem["raw"].data,
- numpy.ascontiguousarray(data))
+ copy_image = pyopencl.enqueue_copy(
+ self.queue, self.cl_mem["raw"].data, numpy.ascontiguousarray(data)
+ )
kernel = getattr(self.program, self.mapping[data.dtype.type])
- cast_to_float = kernel(self.queue,
- (self.size,),
- None,
- self.cl_mem["raw"].data,
- self.cl_mem[dest].data)
+ cast_to_float = kernel(
+ self.queue,
+ (self.size,),
+ None,
+ self.cl_mem["raw"].data,
+ self.cl_mem[dest].data,
+ )
events += [
EventDescription("copy H->D raw", copy_image),
- EventDescription(f"cast to float {dest}", cast_to_float)
+ EventDescription(f"cast to float {dest}", cast_to_float),
]
if self.profile:
self.events += events
@@ -194,7 +229,7 @@ class Statistics(OpenclProcessing):
"""Actually calculate the statics on the data
:param numpy.ndarray data: numpy array with the image
- :param comp: use Kahan compensated arithmetics for the calculation
+ :param comp: use Kahan compensated arithmetics for the calculation
:return: Statistics named tuple
:rtype: StatResults
"""
@@ -217,9 +252,11 @@ class Statistics(OpenclProcessing):
reduction = self.reduction_double
else:
reduction = self.reduction_comp
- res_d, evt = reduction(self.cl_mem["converted"][:self.size],
- queue=self.queue,
- return_event=True)
+ res_d, evt = reduction(
+ self.cl_mem["converted"][: self.size],
+ queue=self.queue,
+ return_event=True,
+ )
events.append(EventDescription(f"statistical reduction {comp}", evt))
if self.profile:
self.events += events
@@ -230,13 +267,7 @@ class Statistics(OpenclProcessing):
sum_ = 1.0 * res_h["s4"] + res_h["s5"]
m2 = 1.0 * res_h["s6"] + res_h["s7"]
var = m2 / (count - 1.0)
- res = StatResults(min_,
- max_,
- count,
- sum_,
- sum_ / count,
- var,
- sqrt(var))
+ res = StatResults(min_, max_, count, sum_, sum_ / count, var, sqrt(var))
return res
__call__ = process
diff --git a/src/silx/opencl/test/__init__.py b/src/silx/opencl/test/__init__.py
index 92cda4a..b1ecf1b 100644
--- a/src/silx/opencl/test/__init__.py
+++ b/src/silx/opencl/test/__init__.py
@@ -1,4 +1,3 @@
-# -*- coding: utf-8 -*-
#
# Project: silx
# https://github.com/silx-kit/silx
diff --git a/src/silx/opencl/test/test_addition.py b/src/silx/opencl/test/test_addition.py
index 3b668bf..98beab4 100644
--- a/src/silx/opencl/test/test_addition.py
+++ b/src/silx/opencl/test/test_addition.py
@@ -1,5 +1,4 @@
#!/usr/bin/env python
-# -*- coding: utf-8 -*-
#
# Project: Sift implementation in Python + OpenCL
# https://github.com/silx-kit/silx
@@ -41,16 +40,17 @@ import pytest
import unittest
from ..common import ocl, _measure_workgroup_size, query_kernel_info
+
if ocl:
import pyopencl
import pyopencl.array
from ..utils import get_opencl_code
+
logger = logging.getLogger(__name__)
@unittest.skipUnless(ocl, "PyOpenCl is missing")
class TestAddition(unittest.TestCase):
-
@classmethod
def setUpClass(cls):
super(TestAddition, cls).setUpClass()
@@ -59,8 +59,9 @@ class TestAddition(unittest.TestCase):
if logger.getEffectiveLevel() <= logging.INFO:
cls.PROFILE = True
cls.queue = pyopencl.CommandQueue(
- cls.ctx,
- properties=pyopencl.command_queue_properties.PROFILING_ENABLE)
+ cls.ctx,
+ properties=pyopencl.command_queue_properties.PROFILING_ENABLE,
+ )
else:
cls.PROFILE = False
cls.queue = pyopencl.CommandQueue(cls.ctx)
@@ -69,7 +70,10 @@ class TestAddition(unittest.TestCase):
@classmethod
def tearDownClass(cls):
super(TestAddition, cls).tearDownClass()
- print("Maximum valid workgroup size %s on device %s" % (cls.max_valid_wg, cls.ctx.devices[0]))
+ print(
+ "Maximum valid workgroup size %s on device %s"
+ % (cls.max_valid_wg, cls.ctx.devices[0])
+ )
cls.ctx = None
cls.queue = None
@@ -96,11 +100,20 @@ class TestAddition(unittest.TestCase):
d_array_result = pyopencl.array.empty_like(self.d_array_img)
wg = 1 << i
try:
- evt = self.program.addition(self.queue, (self.shape,), (wg,),
- self.d_array_img.data, self.d_array_5.data, d_array_result.data, numpy.int32(self.shape))
+ evt = self.program.addition(
+ self.queue,
+ (self.shape,),
+ (wg,),
+ self.d_array_img.data,
+ self.d_array_5.data,
+ d_array_result.data,
+ numpy.int32(self.shape),
+ )
evt.wait()
except Exception as error:
- max_valid_wg = self.program.addition.get_work_group_info(pyopencl.kernel_work_group_info.WORK_GROUP_SIZE, self.ctx.devices[0])
+ max_valid_wg = self.program.addition.get_work_group_info(
+ pyopencl.kernel_work_group_info.WORK_GROUP_SIZE, self.ctx.devices[0]
+ )
msg = "Error %s on WG=%s: %s" % (error, wg, max_valid_wg)
self.assertLess(max_valid_wg, wg, msg)
break
@@ -118,23 +131,39 @@ class TestAddition(unittest.TestCase):
for platform in ocl.platforms:
for did, device in enumerate(platform.devices):
meas = _measure_workgroup_size((platform.id, device.id))
- self.assertEqual(meas, device.max_work_group_size,
- "Workgroup size for %s/%s: %s == %s" % (platform, device, meas, device.max_work_group_size))
+ self.assertEqual(
+ meas,
+ device.max_work_group_size,
+ "Workgroup size for %s/%s: %s == %s"
+ % (platform, device, meas, device.max_work_group_size),
+ )
def test_query(self):
"""
tests that all devices are working properly ... lengthy and error prone
"""
- for what in ("COMPILE_WORK_GROUP_SIZE",
- "LOCAL_MEM_SIZE",
- "PREFERRED_WORK_GROUP_SIZE_MULTIPLE",
- "PRIVATE_MEM_SIZE",
- "WORK_GROUP_SIZE"):
- logger.info("%s: %s", what, query_kernel_info(program=self.program, kernel="addition", what=what))
-
- # Not all ICD work properly ....
- #self.assertEqual(3, len(query_kernel_info(program=self.program, kernel="addition", what="COMPILE_WORK_GROUP_SIZE")), "3D kernel")
-
- min_wg = query_kernel_info(program=self.program, kernel="addition", what="PREFERRED_WORK_GROUP_SIZE_MULTIPLE")
- max_wg = query_kernel_info(program=self.program, kernel="addition", what="WORK_GROUP_SIZE")
+ for what in (
+ "COMPILE_WORK_GROUP_SIZE",
+ "LOCAL_MEM_SIZE",
+ "PREFERRED_WORK_GROUP_SIZE_MULTIPLE",
+ "PRIVATE_MEM_SIZE",
+ "WORK_GROUP_SIZE",
+ ):
+ logger.info(
+ "%s: %s",
+ what,
+ query_kernel_info(program=self.program, kernel="addition", what=what),
+ )
+
+ # Not all ICD work properly ....
+ # self.assertEqual(3, len(query_kernel_info(program=self.program, kernel="addition", what="COMPILE_WORK_GROUP_SIZE")), "3D kernel")
+
+ min_wg = query_kernel_info(
+ program=self.program,
+ kernel="addition",
+ what="PREFERRED_WORK_GROUP_SIZE_MULTIPLE",
+ )
+ max_wg = query_kernel_info(
+ program=self.program, kernel="addition", what="WORK_GROUP_SIZE"
+ )
self.assertEqual(max_wg % min_wg, 0, msg="max_wg is a multiple of min_wg")
diff --git a/src/silx/opencl/test/test_array_utils.py b/src/silx/opencl/test/test_array_utils.py
index 325a6c3..98f8bf3 100644
--- a/src/silx/opencl/test/test_array_utils.py
+++ b/src/silx/opencl/test/test_array_utils.py
@@ -1,8 +1,7 @@
#!/usr/bin/env python
-# coding: utf-8
# /*##########################################################################
#
-# Copyright (c) 2016 European Synchrotron Radiation Facility
+# Copyright (c) 2016-2022 European Synchrotron Radiation Facility
#
# Permission is hereby granted, free of charge, to any person obtaining a copy
# of this software and associated documentation files (the "Software"), to deal
@@ -25,42 +24,32 @@
# ###########################################################################*/
"""Test of the OpenCL array_utils"""
-from __future__ import division, print_function
-
__authors__ = ["Pierre paleo"]
__license__ = "MIT"
__copyright__ = "2013-2017 European Synchrotron Radiation Facility, Grenoble, France"
__date__ = "14/06/2017"
-import time
import logging
import numpy as np
import unittest
+
try:
import mako
except ImportError:
mako = None
from ..common import ocl
+
if ocl:
import pyopencl as cl
import pyopencl.array as parray
- from .. import linalg
from ..utils import get_opencl_code
-from silx.test.utils import utilstest
logger = logging.getLogger(__name__)
-try:
- from scipy.ndimage.filters import laplace
- _has_scipy = True
-except ImportError:
- _has_scipy = False
-
@unittest.skipUnless(ocl and mako, "PyOpenCl is missing")
class TestCpy2d(unittest.TestCase):
-
def setUp(self):
if ocl is None:
return
@@ -68,8 +57,8 @@ class TestCpy2d(unittest.TestCase):
if logger.getEffectiveLevel() <= logging.INFO:
self.PROFILE = True
self.queue = cl.CommandQueue(
- self.ctx,
- properties=cl.command_queue_properties.PROFILING_ENABLE)
+ self.ctx, properties=cl.command_queue_properties.PROFILING_ENABLE
+ )
else:
self.PROFILE = False
self.queue = cl.CommandQueue(self.ctx)
@@ -96,8 +85,12 @@ class TestCpy2d(unittest.TestCase):
self.offset1 = (offset1_y, offset1_x)
self.offset2 = (offset2_y, offset2_x)
# Compute the size of the rectangle to transfer
- size_y = np.random.randint(2, high=min(self.shape1[0], self.shape2[0]) - max(offset1_y, offset2_y) + 1)
- size_x = np.random.randint(2, high=min(self.shape1[1], self.shape2[1]) - max(offset1_x, offset2_x) + 1)
+ size_y = np.random.randint(
+ 2, high=min(self.shape1[0], self.shape2[0]) - max(offset1_y, offset2_y) + 1
+ )
+ size_x = np.random.randint(
+ 2, high=min(self.shape1[1], self.shape2[1]) - max(offset1_x, offset2_x) + 1
+ )
self.transfer_shape = (size_y, size_x)
def tearDown(self):
@@ -113,7 +106,9 @@ class TestCpy2d(unittest.TestCase):
def compare(self, result, reference):
errmax = np.max(np.abs(result - reference))
logger.info("Max error = %e" % (errmax))
- self.assertTrue(errmax == 0, str("Max error is too high"))#. PRNG state was %s" % str(self.prng_state)))
+ self.assertTrue(
+ errmax == 0, str("Max error is too high")
+ ) # . PRNG state was %s" % str(self.prng_state)))
@unittest.skipUnless(ocl and mako, "pyopencl is missing")
def test_cpy2d(self):
@@ -124,18 +119,26 @@ class TestCpy2d(unittest.TestCase):
o1 = self.offset1
o2 = self.offset2
T = self.transfer_shape
- logger.info("""Testing D->D rectangular copy with (N1_y, N1_x) = %s,
+ logger.info(
+ """Testing D->D rectangular copy with (N1_y, N1_x) = %s,
(N2_y, N2_x) = %s:
- array2[%d:%d, %d:%d] = array1[%d:%d, %d:%d]""" %
- (
- str(self.shape1), str(self.shape2),
- o2[0], o2[0] + T[0],
- o2[1], o2[1] + T[1],
- o1[0], o1[0] + T[0],
- o1[1], o1[1] + T[1]
- )
- )
- self.array2[o2[0]:o2[0] + T[0], o2[1]:o2[1] + T[1]] = self.array1[o1[0]:o1[0] + T[0], o1[1]:o1[1] + T[1]]
+ array2[%d:%d, %d:%d] = array1[%d:%d, %d:%d]"""
+ % (
+ str(self.shape1),
+ str(self.shape2),
+ o2[0],
+ o2[0] + T[0],
+ o2[1],
+ o2[1] + T[1],
+ o1[0],
+ o1[0] + T[0],
+ o1[1],
+ o1[1] + T[1],
+ )
+ )
+ self.array2[o2[0] : o2[0] + T[0], o2[1] : o2[1] + T[1]] = self.array1[
+ o1[0] : o1[0] + T[0], o1[1] : o1[1] + T[1]
+ ]
kernel_args = (
self.d_array2.data,
self.d_array1.data,
@@ -143,7 +146,7 @@ class TestCpy2d(unittest.TestCase):
np.int32(self.shape1[1]),
np.int32(self.offset2[::-1]),
np.int32(self.offset1[::-1]),
- np.int32(self.transfer_shape[::-1])
+ np.int32(self.transfer_shape[::-1]),
)
wg = None
ndrange = self.transfer_shape[::-1]
diff --git a/src/silx/opencl/test/test_backprojection.py b/src/silx/opencl/test/test_backprojection.py
index 96d56fa..b08c972 100644
--- a/src/silx/opencl/test/test_backprojection.py
+++ b/src/silx/opencl/test/test_backprojection.py
@@ -1,5 +1,4 @@
#!/usr/bin/env python
-# coding: utf-8
# /*##########################################################################
#
# Copyright (c) 2016 European Synchrotron Radiation Facility
@@ -25,8 +24,6 @@
# ###########################################################################*/
"""Test of the filtered backprojection module"""
-from __future__ import division, print_function
-
__authors__ = ["Pierre paleo"]
__license__ = "MIT"
__copyright__ = "2013-2017 European Synchrotron Radiation Facility, Grenoble, France"
@@ -38,11 +35,13 @@ import logging
import numpy as np
import unittest
from math import pi
+
try:
import mako
except ImportError:
mako = None
from ..common import ocl
+
if ocl:
from .. import backprojection
from ...image.tomography import compute_fourier_filter
@@ -59,7 +58,7 @@ def generate_coords(img_shp, center=None):
l_r, l_c = float(img_shp[0]), float(img_shp[1])
R, C = np.mgrid[:l_r, :l_c]
if center is None:
- center0, center1 = l_r / 2., l_c / 2.
+ center0, center1 = l_r / 2.0, l_c / 2.0
else:
center0, center1 = center
R = R + 0.5 - center0
@@ -75,7 +74,7 @@ def clip_circle(img, center=None, radius=None):
M = R * R + C * C
res = np.zeros_like(img)
if radius is None:
- radius = img.shape[0] / 2. - 1
+ radius = img.shape[0] / 2.0 - 1
mask = M < radius * radius
res[mask] = img[mask]
return res
@@ -83,20 +82,21 @@ def clip_circle(img, center=None, radius=None):
@unittest.skipUnless(ocl and mako, "PyOpenCl is missing")
class TestFBP(unittest.TestCase):
-
def setUp(self):
if ocl is None:
return
self.getfiles()
self.fbp = backprojection.Backprojection(self.sino.shape, profile=True)
if self.fbp.compiletime_workgroup_size < 16 * 16:
- self.skipTest("Current implementation of OpenCL backprojection is "
- "not supported on this platform yet")
+ self.skipTest(
+ "Current implementation of OpenCL backprojection is "
+ "not supported on this platform yet"
+ )
# Astra does not use the same backprojector implementation.
# Therefore, we cannot expect results to be the "same" (up to float32
# numerical error)
self.tol = 5e-2
- if not(self.fbp._use_textures) or self.fbp.device.type == "CPU":
+ if not (self.fbp._use_textures) or self.fbp.device.type == "CPU":
# Precision is less when using CPU
# (either CPU textures or "manual" linear interpolation)
self.tol *= 2
@@ -133,8 +133,12 @@ class TestFBP(unittest.TestCase):
ref_clipped = clip_circle(self.reference_rec)
delta = abs(res_clipped - ref_clipped)
bad = delta > 1
- logger.debug("Absolute difference: %s with %s outlier pixels out of %s"
- "", delta.max(), bad.sum(), np.prod(bad.shape))
+ logger.debug(
+ "Absolute difference: %s with %s outlier pixels out of %s" "",
+ delta.max(),
+ bad.sum(),
+ np.prod(bad.shape),
+ )
return delta.max()
@unittest.skipUnless(ocl and mako, "pyopencl is missing")
@@ -160,17 +164,14 @@ class TestFBP(unittest.TestCase):
for i in range(10):
res = self.fbp.filtered_backprojection(self.sino)
errmax = np.max(np.abs(res - res0))
- self.assertTrue(errmax < 1.e-6, "Max error is too high")
+ self.assertTrue(errmax < 1.0e-6, "Max error is too high")
@unittest.skipUnless(ocl and mako, "pyopencl is missing")
def test_fbp_filters(self):
"""
Test the different available filters of silx FBP.
"""
- avail_filters = [
- "ramlak", "shepp-logan", "cosine", "hamming",
- "hann"
- ]
+ avail_filters = ["ramlak", "shepp-logan", "cosine", "hamming", "hann"]
# Create a Dirac delta function at a single angle view.
# As the filters are radially invarant:
# - backprojection yields an image where each line is a Dirac.
@@ -179,7 +180,7 @@ class TestFBP(unittest.TestCase):
# test will also ensure that backprojection behaves well.
dirac = np.zeros_like(self.sino)
na, dw = dirac.shape
- dirac[0, dw//2] = na / pi * 2
+ dirac[0, dw // 2] = na / pi * 2
for filter_name in avail_filters:
B = backprojection.Backprojection(dirac.shape, filter_name=filter_name)
@@ -187,17 +188,15 @@ class TestFBP(unittest.TestCase):
# Check that radial invariance is kept
std0 = np.max(np.abs(np.std(r, axis=0)))
self.assertTrue(
- std0 < 5.e-6,
- "Something wrong with FBP(filter=%s)" % filter_name
+ std0 < 5.0e-6, "Something wrong with FBP(filter=%s)" % filter_name
)
# Check that the filter is retrieved
- r_f = np.fft.fft(np.fft.fftshift(r[0])).real / 2. # filter factor
+ r_f = np.fft.fft(np.fft.fftshift(r[0])).real / 2.0 # filter factor
ref_filter_f = compute_fourier_filter(dw, filter_name)
errmax = np.max(np.abs(r_f - ref_filter_f))
logger.info("FBP filter %s: max error=%e" % (filter_name, errmax))
self.assertTrue(
- errmax < 1.e-3,
- "Something wrong with FBP(filter=%s)" % filter_name
+ errmax < 1.0e-3, "Something wrong with FBP(filter=%s)" % filter_name
)
@unittest.skipUnless(ocl and mako, "pyopencl is missing")
@@ -205,13 +204,14 @@ class TestFBP(unittest.TestCase):
# Generate a 513-sinogram.
# The padded width will be nextpow(513*2).
# silx [0.10, 0.10.1] will give 1029, which makes R2C transform fail.
- sino = np.pad(self.sino, ((0, 0), (1, 0)), mode='edge')
- B = backprojection.Backprojection(sino.shape, axis_position=self.fbp.axis_pos+1)
+ sino = np.pad(self.sino, ((0, 0), (1, 0)), mode="edge")
+ B = backprojection.Backprojection(
+ sino.shape, axis_position=self.fbp.axis_pos + 1
+ )
res = B(sino)
# Compare with self.reference_rec. Tolerance is high as backprojector
# is not fully shift-invariant.
errmax = np.max(np.abs(clip_circle(res[1:, 1:] - self.reference_rec)))
self.assertLess(
- errmax, 1.e-1,
- "Something wrong with FBP on odd-sized sinogram"
+ errmax, 1.0e-1, "Something wrong with FBP on odd-sized sinogram"
)
diff --git a/src/silx/opencl/test/test_convolution.py b/src/silx/opencl/test/test_convolution.py
index 6a2759d..86716f4 100644
--- a/src/silx/opencl/test/test_convolution.py
+++ b/src/silx/opencl/test/test_convolution.py
@@ -1,8 +1,7 @@
#!/usr/bin/env python
-# coding: utf-8
# /*##########################################################################
#
-# Copyright (c) 2019 European Synchrotron Radiation Facility
+# Copyright (c) 2019-2022 European Synchrotron Radiation Facility
#
# Permission is hereby granted, free of charge, to any person obtaining a copy
# of this software and associated documentation files (the "Software"), to deal
@@ -28,8 +27,6 @@
Test of the Convolution class.
"""
-from __future__ import division, print_function
-
__authors__ = ["Pierre Paleo"]
__contact__ = "pierre.paleo@esrf.fr"
__license__ = "MIT"
@@ -44,7 +41,11 @@ from silx.image.utils import gaussian_kernel
try:
from scipy.ndimage import convolve, convolve1d
- from scipy.misc import ascent
+
+ try:
+ from scipy.misc import ascent
+ except:
+ from scipy.datasets import ascent
scipy_convolve = convolve
scipy_convolve1d = convolve1d
@@ -61,7 +62,6 @@ logger = logging.getLogger(__name__)
class ConvolutionData:
-
def __init__(self, param):
self.param = param
self.mode = param["boundary_handling"]
@@ -207,7 +207,7 @@ def convolution_data_params():
)
params = []
for boundary_handling, use_texture, input_dev, output_dev in param_vals:
- param={
+ param = {
"boundary_handling": boundary_handling,
"input_on_device": input_dev,
"output_on_device": output_dev,
@@ -239,26 +239,31 @@ def convolution_data(request):
def test_1D(convolution_data):
convolution_data.template_test("test_1D")
+
@pytest.mark.skipif(ocl is None, reason="OpenCL is missing")
@pytest.mark.skipif(scipy_convolve is None, reason="scipy is missing")
def test_separable_2D(convolution_data):
convolution_data.template_test("test_separable_2D")
+
@pytest.mark.skipif(ocl is None, reason="OpenCL is missing")
@pytest.mark.skipif(scipy_convolve is None, reason="scipy is missing")
def test_separable_3D(convolution_data):
convolution_data.template_test("test_separable_3D")
+
@pytest.mark.skipif(ocl is None, reason="OpenCL is missing")
@pytest.mark.skipif(scipy_convolve is None, reason="scipy is missing")
def test_nonseparable_2D(convolution_data):
convolution_data.template_test("test_nonseparable_2D")
+
@pytest.mark.skipif(ocl is None, reason="OpenCL is missing")
@pytest.mark.skipif(scipy_convolve is None, reason="scipy is missing")
def test_nonseparable_3D(convolution_data):
convolution_data.template_test("test_nonseparable_3D")
+
@pytest.mark.skipif(ocl is None, reason="OpenCL is missing")
@pytest.mark.skipif(scipy_convolve is None, reason="scipy is missing")
def test_batched_2D(convolution_data):
diff --git a/src/silx/opencl/test/test_doubleword.py b/src/silx/opencl/test/test_doubleword.py
index a33cf5a..493d8c8 100644
--- a/src/silx/opencl/test/test_doubleword.py
+++ b/src/silx/opencl/test/test_doubleword.py
@@ -1,5 +1,4 @@
#!/usr/bin/env python
-# coding: utf-8
#
# Project: The silx project
# https://github.com/silx-kit/silx
@@ -47,6 +46,7 @@ except ImportError as error:
pyopencl = None
from .. import ocl
+
if ocl is not None:
from ..utils import read_cl_file
from .. import pyopencl
@@ -66,14 +66,21 @@ class TestDoubleWord(unittest.TestCase):
@classmethod
def setUpClass(cls):
if pyopencl is None or ocl is None:
- raise unittest.SkipTest("OpenCL module (pyopencl) is not present or no device available")
+ raise unittest.SkipTest(
+ "OpenCL module (pyopencl) is not present or no device available"
+ )
cls.ctx = ocl.create_context(devicetype="GPU")
- cls.queue = pyopencl.CommandQueue(cls.ctx, properties=pyopencl.command_queue_properties.PROFILING_ENABLE)
+ cls.queue = pyopencl.CommandQueue(
+ cls.ctx, properties=pyopencl.command_queue_properties.PROFILING_ENABLE
+ )
# this is running 32 bits OpenCL woth POCL
- if (platform.machine() in ("i386", "i686", "x86_64") and (tuple.__itemsize__ == 4) and
- cls.ctx.devices[0].platform.name == 'Portable Computing Language'):
+ if (
+ platform.machine() in ("i386", "i686", "x86_64")
+ and (tuple.__itemsize__ == 4)
+ and cls.ctx.devices[0].platform.name == "Portable Computing Language"
+ ):
cls.args = "-DX87_VOLATILE=volatile"
else:
cls.args = ""
@@ -95,38 +102,68 @@ class TestDoubleWord(unittest.TestCase):
cls.doubleword = None
def test_fast_sum2(self):
- test_kernel = ElementwiseKernel(self.ctx,
- "float *a, float *b, float *res_h, float *res_l",
- "float2 tmp = fast_fp_plus_fp(a[i], b[i]); res_h[i] = tmp.s0; res_l[i] = tmp.s1",
- preamble=self.doubleword)
+ test_kernel = ElementwiseKernel(
+ self.ctx,
+ "float *a, float *b, float *res_h, float *res_l",
+ "float2 tmp = fast_fp_plus_fp(a[i], b[i]); res_h[i] = tmp.s0; res_l[i] = tmp.s1",
+ preamble=self.doubleword,
+ )
a_g = pyopencl.array.to_device(self.queue, self.ah)
b_g = pyopencl.array.to_device(self.queue, self.bl)
res_l = pyopencl.array.empty_like(a_g)
res_h = pyopencl.array.empty_like(a_g)
test_kernel(a_g, b_g, res_h, res_l)
self.assertEqual(abs(self.ah + self.bl - res_h.get()).max(), 0, "Major matches")
- self.assertGreater(abs(self.ah.astype(numpy.float64) + self.bl - res_h.get()).max(), 0, "Exact mismatches")
- self.assertEqual(abs(self.ah.astype(numpy.float64) + self.bl - (res_h.get().astype(numpy.float64) + res_l.get())).max(), 0, "Exact matches")
+ self.assertGreater(
+ abs(self.ah.astype(numpy.float64) + self.bl - res_h.get()).max(),
+ 0,
+ "Exact mismatches",
+ )
+ self.assertEqual(
+ abs(
+ self.ah.astype(numpy.float64)
+ + self.bl
+ - (res_h.get().astype(numpy.float64) + res_l.get())
+ ).max(),
+ 0,
+ "Exact matches",
+ )
def test_sum2(self):
- test_kernel = ElementwiseKernel(self.ctx,
- "float *a, float *b, float *res_h, float *res_l",
- "float2 tmp = fp_plus_fp(a[i],b[i]); res_h[i]=tmp.s0; res_l[i]=tmp.s1;",
- preamble=self.doubleword)
+ test_kernel = ElementwiseKernel(
+ self.ctx,
+ "float *a, float *b, float *res_h, float *res_l",
+ "float2 tmp = fp_plus_fp(a[i],b[i]); res_h[i]=tmp.s0; res_l[i]=tmp.s1;",
+ preamble=self.doubleword,
+ )
a_g = pyopencl.array.to_device(self.queue, self.ah)
b_g = pyopencl.array.to_device(self.queue, self.bh)
res_l = pyopencl.array.empty_like(a_g)
res_h = pyopencl.array.empty_like(a_g)
test_kernel(a_g, b_g, res_h, res_l)
self.assertEqual(abs(self.ah + self.bh - res_h.get()).max(), 0, "Major matches")
- self.assertGreater(abs(self.ah.astype(numpy.float64) + self.bh - res_h.get()).max(), 0, "Exact mismatches")
- self.assertEqual(abs(self.ah.astype(numpy.float64) + self.bh - (res_h.get().astype(numpy.float64) + res_l.get())).max(), 0, "Exact matches")
+ self.assertGreater(
+ abs(self.ah.astype(numpy.float64) + self.bh - res_h.get()).max(),
+ 0,
+ "Exact mismatches",
+ )
+ self.assertEqual(
+ abs(
+ self.ah.astype(numpy.float64)
+ + self.bh
+ - (res_h.get().astype(numpy.float64) + res_l.get())
+ ).max(),
+ 0,
+ "Exact matches",
+ )
def test_prod2(self):
- test_kernel = ElementwiseKernel(self.ctx,
- "float *a, float *b, float *res_h, float *res_l",
- "float2 tmp = fp_times_fp(a[i],b[i]); res_h[i]=tmp.s0; res_l[i]=tmp.s1;",
- preamble=self.doubleword)
+ test_kernel = ElementwiseKernel(
+ self.ctx,
+ "float *a, float *b, float *res_h, float *res_l",
+ "float2 tmp = fp_times_fp(a[i],b[i]); res_h[i]=tmp.s0; res_l[i]=tmp.s1;",
+ preamble=self.doubleword,
+ )
a_g = pyopencl.array.to_device(self.queue, self.ah)
b_g = pyopencl.array.to_device(self.queue, self.bh)
res_l = pyopencl.array.empty_like(a_g)
@@ -135,14 +172,22 @@ class TestDoubleWord(unittest.TestCase):
res_m = res_h.get()
res = res_h.get().astype(numpy.float64) + res_l.get()
self.assertEqual(abs(self.ah * self.bh - res_m).max(), 0, "Major matches")
- self.assertGreater(abs(self.ah.astype(numpy.float64) * self.bh - res_m).max(), 0, "Exact mismatches")
- self.assertEqual(abs(self.ah.astype(numpy.float64) * self.bh - res).max(), 0, "Exact matches")
+ self.assertGreater(
+ abs(self.ah.astype(numpy.float64) * self.bh - res_m).max(),
+ 0,
+ "Exact mismatches",
+ )
+ self.assertEqual(
+ abs(self.ah.astype(numpy.float64) * self.bh - res).max(), 0, "Exact matches"
+ )
def test_dw_plus_fp(self):
- test_kernel = ElementwiseKernel(self.ctx,
- "float *ah, float *al, float *b, float *res_h, float *res_l",
- "float2 tmp = dw_plus_fp((float2)(ah[i], al[i]),b[i]); res_h[i]=tmp.s0; res_l[i]=tmp.s1;",
- preamble=self.doubleword)
+ test_kernel = ElementwiseKernel(
+ self.ctx,
+ "float *ah, float *al, float *b, float *res_h, float *res_l",
+ "float2 tmp = dw_plus_fp((float2)(ah[i], al[i]),b[i]); res_h[i]=tmp.s0; res_l[i]=tmp.s1;",
+ preamble=self.doubleword,
+ )
ah_g = pyopencl.array.to_device(self.queue, self.ah)
al_g = pyopencl.array.to_device(self.queue, self.al)
b_g = pyopencl.array.to_device(self.queue, self.bh)
@@ -152,14 +197,22 @@ class TestDoubleWord(unittest.TestCase):
res_m = res_h.get()
res = res_h.get().astype(numpy.float64) + res_l.get()
self.assertLess(abs(self.a + self.bh - res_m).max(), EPS32, "Major matches")
- self.assertGreater(abs(self.a + self.bh - res_m).max(), EPS64, "Exact mismatches")
- self.assertLess(abs(self.ah.astype(numpy.float64) + self.al + self.bh - res).max(), 2 * EPS32 ** 2, "Exact matches")
+ self.assertGreater(
+ abs(self.a + self.bh - res_m).max(), EPS64, "Exact mismatches"
+ )
+ self.assertLess(
+ abs(self.ah.astype(numpy.float64) + self.al + self.bh - res).max(),
+ 2 * EPS32**2,
+ "Exact matches",
+ )
def test_dw_plus_dw(self):
- test_kernel = ElementwiseKernel(self.ctx,
- "float *ah, float *al, float *bh, float *bl, float *res_h, float *res_l",
- "float2 tmp = dw_plus_dw((float2)(ah[i], al[i]),(float2)(bh[i], bl[i])); res_h[i]=tmp.s0; res_l[i]=tmp.s1;",
- preamble=self.doubleword)
+ test_kernel = ElementwiseKernel(
+ self.ctx,
+ "float *ah, float *al, float *bh, float *bl, float *res_h, float *res_l",
+ "float2 tmp = dw_plus_dw((float2)(ah[i], al[i]),(float2)(bh[i], bl[i])); res_h[i]=tmp.s0; res_l[i]=tmp.s1;",
+ preamble=self.doubleword,
+ )
ah_g = pyopencl.array.to_device(self.queue, self.ah)
al_g = pyopencl.array.to_device(self.queue, self.al)
bh_g = pyopencl.array.to_device(self.queue, self.bh)
@@ -170,14 +223,20 @@ class TestDoubleWord(unittest.TestCase):
res_m = res_h.get()
res = res_h.get().astype(numpy.float64) + res_l.get()
self.assertLess(abs(self.a + self.b - res_m).max(), EPS32, "Major matches")
- self.assertGreater(abs(self.a + self.b - res_m).max(), EPS64, "Exact mismatches")
- self.assertLess(abs(self.a + self.b - res).max(), 3 * EPS32 ** 2, "Exact matches")
+ self.assertGreater(
+ abs(self.a + self.b - res_m).max(), EPS64, "Exact mismatches"
+ )
+ self.assertLess(
+ abs(self.a + self.b - res).max(), 3 * EPS32**2, "Exact matches"
+ )
def test_dw_times_fp(self):
- test_kernel = ElementwiseKernel(self.ctx,
- "float *ah, float *al, float *b, float *res_h, float *res_l",
- "float2 tmp = dw_times_fp((float2)(ah[i], al[i]),b[i]); res_h[i]=tmp.s0; res_l[i]=tmp.s1;",
- preamble=self.doubleword)
+ test_kernel = ElementwiseKernel(
+ self.ctx,
+ "float *ah, float *al, float *b, float *res_h, float *res_l",
+ "float2 tmp = dw_times_fp((float2)(ah[i], al[i]),b[i]); res_h[i]=tmp.s0; res_l[i]=tmp.s1;",
+ preamble=self.doubleword,
+ )
ah_g = pyopencl.array.to_device(self.queue, self.ah)
al_g = pyopencl.array.to_device(self.queue, self.al)
b_g = pyopencl.array.to_device(self.queue, self.bh)
@@ -187,14 +246,20 @@ class TestDoubleWord(unittest.TestCase):
res_m = res_h.get()
res = res_h.get().astype(numpy.float64) + res_l.get()
self.assertLess(abs(self.a * self.bh - res_m).max(), EPS32, "Major matches")
- self.assertGreater(abs(self.a * self.bh - res_m).max(), EPS64, "Exact mismatches")
- self.assertLess(abs(self.a * self.bh - res).max(), 2 * EPS32 ** 2, "Exact matches")
+ self.assertGreater(
+ abs(self.a * self.bh - res_m).max(), EPS64, "Exact mismatches"
+ )
+ self.assertLess(
+ abs(self.a * self.bh - res).max(), 2 * EPS32**2, "Exact matches"
+ )
def test_dw_times_dw(self):
- test_kernel = ElementwiseKernel(self.ctx,
- "float *ah, float *al, float *bh, float *bl, float *res_h, float *res_l",
- "float2 tmp = dw_times_dw((float2)(ah[i], al[i]),(float2)(bh[i], bl[i])); res_h[i]=tmp.s0; res_l[i]=tmp.s1;",
- preamble=self.doubleword)
+ test_kernel = ElementwiseKernel(
+ self.ctx,
+ "float *ah, float *al, float *bh, float *bl, float *res_h, float *res_l",
+ "float2 tmp = dw_times_dw((float2)(ah[i], al[i]),(float2)(bh[i], bl[i])); res_h[i]=tmp.s0; res_l[i]=tmp.s1;",
+ preamble=self.doubleword,
+ )
ah_g = pyopencl.array.to_device(self.queue, self.ah)
al_g = pyopencl.array.to_device(self.queue, self.al)
bh_g = pyopencl.array.to_device(self.queue, self.bh)
@@ -205,14 +270,20 @@ class TestDoubleWord(unittest.TestCase):
res_m = res_h.get()
res = res_h.get().astype(numpy.float64) + res_l.get()
self.assertLess(abs(self.a * self.b - res_m).max(), EPS32, "Major matches")
- self.assertGreater(abs(self.a * self.b - res_m).max(), EPS64, "Exact mismatches")
- self.assertLess(abs(self.a * self.b - res).max(), 5 * EPS32 ** 2, "Exact matches")
+ self.assertGreater(
+ abs(self.a * self.b - res_m).max(), EPS64, "Exact mismatches"
+ )
+ self.assertLess(
+ abs(self.a * self.b - res).max(), 5 * EPS32**2, "Exact matches"
+ )
def test_dw_div_fp(self):
- test_kernel = ElementwiseKernel(self.ctx,
- "float *ah, float *al, float *b, float *res_h, float *res_l",
- "float2 tmp = dw_div_fp((float2)(ah[i], al[i]),b[i]); res_h[i]=tmp.s0; res_l[i]=tmp.s1;",
- preamble=self.doubleword)
+ test_kernel = ElementwiseKernel(
+ self.ctx,
+ "float *ah, float *al, float *b, float *res_h, float *res_l",
+ "float2 tmp = dw_div_fp((float2)(ah[i], al[i]),b[i]); res_h[i]=tmp.s0; res_l[i]=tmp.s1;",
+ preamble=self.doubleword,
+ )
ah_g = pyopencl.array.to_device(self.queue, self.ah)
al_g = pyopencl.array.to_device(self.queue, self.al)
b_g = pyopencl.array.to_device(self.queue, self.bh)
@@ -222,14 +293,20 @@ class TestDoubleWord(unittest.TestCase):
res_m = res_h.get()
res = res_h.get().astype(numpy.float64) + res_l.get()
self.assertLess(abs(self.a / self.bh - res_m).max(), EPS32, "Major matches")
- self.assertGreater(abs(self.a / self.bh - res_m).max(), EPS64, "Exact mismatches")
- self.assertLess(abs(self.a / self.bh - res).max(), 3 * EPS32 ** 2, "Exact matches")
+ self.assertGreater(
+ abs(self.a / self.bh - res_m).max(), EPS64, "Exact mismatches"
+ )
+ self.assertLess(
+ abs(self.a / self.bh - res).max(), 3 * EPS32**2, "Exact matches"
+ )
def test_dw_div_dw(self):
- test_kernel = ElementwiseKernel(self.ctx,
- "float *ah, float *al, float *bh, float *bl, float *res_h, float *res_l",
- "float2 tmp = dw_div_dw((float2)(ah[i], al[i]),(float2)(bh[i], bl[i])); res_h[i]=tmp.s0; res_l[i]=tmp.s1;",
- preamble=self.doubleword)
+ test_kernel = ElementwiseKernel(
+ self.ctx,
+ "float *ah, float *al, float *bh, float *bl, float *res_h, float *res_l",
+ "float2 tmp = dw_div_dw((float2)(ah[i], al[i]),(float2)(bh[i], bl[i])); res_h[i]=tmp.s0; res_l[i]=tmp.s1;",
+ preamble=self.doubleword,
+ )
ah_g = pyopencl.array.to_device(self.queue, self.ah)
al_g = pyopencl.array.to_device(self.queue, self.al)
bh_g = pyopencl.array.to_device(self.queue, self.bh)
@@ -240,5 +317,9 @@ class TestDoubleWord(unittest.TestCase):
res_m = res_h.get()
res = res_h.get().astype(numpy.float64) + res_l.get()
self.assertLess(abs(self.a / self.b - res_m).max(), EPS32, "Major matches")
- self.assertGreater(abs(self.a / self.b - res_m).max(), EPS64, "Exact mismatches")
- self.assertLess(abs(self.a / self.b - res).max(), 6 * EPS32 ** 2, "Exact matches")
+ self.assertGreater(
+ abs(self.a / self.b - res_m).max(), EPS64, "Exact mismatches"
+ )
+ self.assertLess(
+ abs(self.a / self.b - res).max(), 6 * EPS32**2, "Exact matches"
+ )
diff --git a/src/silx/opencl/test/test_image.py b/src/silx/opencl/test/test_image.py
index 73c771b..691ea82 100644
--- a/src/silx/opencl/test/test_image.py
+++ b/src/silx/opencl/test/test_image.py
@@ -1,5 +1,4 @@
#!/usr/bin/env python
-# -*- coding: utf-8 -*-
#
# Project: image manipulation in OpenCL
# https://github.com/silx-kit/silx
@@ -29,8 +28,6 @@
Simple test of image manipulation
"""
-from __future__ import division, print_function
-
__authors__ = ["Jérôme Kieffer"]
__contact__ = "jerome.kieffer@esrf.eu"
__license__ = "MIT"
@@ -42,11 +39,13 @@ import numpy
import unittest
from ..common import ocl, _measure_workgroup_size
+
if ocl:
import pyopencl
import pyopencl.array
from ...test.utils import utilstest
from ..image import ImageProcessing
+
logger = logging.getLogger(__name__)
try:
from PIL import Image
@@ -56,7 +55,6 @@ except ImportError:
@unittest.skipUnless(ocl and Image, "PyOpenCl/Image is missing")
class TestImage(unittest.TestCase):
-
@classmethod
def setUpClass(cls):
super(TestImage, cls).setUpClass()
@@ -102,7 +100,9 @@ class TestImage(unittest.TestCase):
tmp = pyopencl.array.empty(self.ip.ctx, self.data.shape, "float32")
res = self.ip.to_float(self.data, out=tmp)
res2 = self.ip.normalize(tmp, -100, 100, copy=False)
- norm = (self.data.astype(numpy.float32) - self.data.min()) / (self.data.max() - self.data.min())
+ norm = (self.data.astype(numpy.float32) - self.data.min()) / (
+ self.data.max() - self.data.min()
+ )
ref2 = 200 * norm - 100
self.assertLess(abs(res2 - ref2).max(), 3e-5, "content")
@@ -111,15 +111,21 @@ class TestImage(unittest.TestCase):
"""
Test on a greyscaled image ... of Lena :)
"""
- lena_bw = (0.2126 * self.data[:, :, 0] +
- 0.7152 * self.data[:, :, 1] +
- 0.0722 * self.data[:, :, 2]).astype("int32")
+ lena_bw = (
+ 0.2126 * self.data[:, :, 0]
+ + 0.7152 * self.data[:, :, 1]
+ + 0.0722 * self.data[:, :, 2]
+ ).astype("int32")
ref = numpy.histogram(lena_bw, 255)
ip = ImageProcessing(ctx=self.ctx, template=lena_bw, profile=True)
res = ip.histogram(lena_bw, 255)
ip.log_profile()
- delta = (ref[0] - res[0])
- deltap = (ref[1] - res[1])
+ delta = ref[0] - res[0]
+ deltap = ref[1] - res[1]
self.assertEqual(delta.sum(), 0, "errors are self-compensated")
self.assertLessEqual(abs(delta).max(), 1, "errors are small")
- self.assertLessEqual(abs(deltap).max(), 3e-5, "errors on position are small: %s" % (abs(deltap).max()))
+ self.assertLessEqual(
+ abs(deltap).max(),
+ 3e-5,
+ "errors on position are small: %s" % (abs(deltap).max()),
+ )
diff --git a/src/silx/opencl/test/test_kahan.py b/src/silx/opencl/test/test_kahan.py
index 9e4a1e3..069d7de 100644
--- a/src/silx/opencl/test/test_kahan.py
+++ b/src/silx/opencl/test/test_kahan.py
@@ -1,5 +1,4 @@
#!/usr/bin/env python
-# coding: utf-8
#
# Project: OpenCL numerical library
# https://github.com/silx-kit/silx
@@ -48,6 +47,7 @@ except ImportError as error:
pyopencl = None
from .. import ocl
+
if ocl is not None:
from ..utils import read_cl_file
from .. import pyopencl
@@ -62,14 +62,21 @@ class TestKahan(unittest.TestCase):
@classmethod
def setUpClass(cls):
if pyopencl is None or ocl is None:
- raise unittest.SkipTest("OpenCL module (pyopencl) is not present or no device available")
+ raise unittest.SkipTest(
+ "OpenCL module (pyopencl) is not present or no device available"
+ )
cls.ctx = ocl.create_context(devicetype="GPU")
- cls.queue = pyopencl.CommandQueue(cls.ctx, properties=pyopencl.command_queue_properties.PROFILING_ENABLE)
+ cls.queue = pyopencl.CommandQueue(
+ cls.ctx, properties=pyopencl.command_queue_properties.PROFILING_ENABLE
+ )
# this is running 32 bits OpenCL woth POCL
- if (platform.machine() in ("i386", "i686", "x86_64") and (tuple.__itemsize__ == 4) and
- cls.ctx.devices[0].platform.name == 'Portable Computing Language'):
+ if (
+ platform.machine() in ("i386", "i686", "x86_64")
+ and (tuple.__itemsize__ == 4)
+ and cls.ctx.devices[0].platform.name == "Portable Computing Language"
+ ):
cls.args = "-DX87_VOLATILE=volatile"
else:
cls.args = ""
@@ -81,7 +88,7 @@ class TestKahan(unittest.TestCase):
@staticmethod
def dummy_sum(ary, dtype=None):
- "perform the actual sum in a dummy way "
+ "perform the actual sum in a dummy way"
if dtype is None:
dtype = ary.dtype.type
sum_ = dtype(0)
@@ -96,8 +103,10 @@ class TestKahan(unittest.TestCase):
ref64 = numpy.sum(data, dtype=numpy.float64)
ref32 = self.dummy_sum(data)
- if (ref64 == ref32):
- logger.warning("Kahan: invalid tests as float32 provides the same result as float64")
+ if ref64 == ref32:
+ logger.warning(
+ "Kahan: invalid tests as float32 provides the same result as float64"
+ )
# Dummy kernel to evaluate
src = """
kernel void summation(global float* data,
@@ -113,11 +122,15 @@ class TestKahan(unittest.TestCase):
result[1] = acc.s1;
}
"""
- prg = pyopencl.Program(self.ctx, read_cl_file("kahan.cl") + src).build(self.args)
+ prg = pyopencl.Program(self.ctx, read_cl_file("kahan.cl") + src).build(
+ self.args
+ )
ones_d = pyopencl.array.to_device(self.queue, data)
res_d = pyopencl.array.empty(self.queue, 2, numpy.float32)
res_d.fill(0)
- evt = prg.summation(self.queue, (1,), (1,), ones_d.data, numpy.int32(N), res_d.data)
+ evt = prg.summation(
+ self.queue, (1,), (1,), ones_d.data, numpy.int32(N), res_d.data
+ )
evt.wait()
res = res_d.get().sum(dtype=numpy.float64)
self.assertEqual(ref64, res, "test_kahan")
@@ -129,8 +142,10 @@ class TestKahan(unittest.TestCase):
ref64 = numpy.dot(data.astype(numpy.float64), data.astype(numpy.float64))
ref32 = numpy.dot(data, data)
- if (ref64 == ref32):
- logger.warning("dot16: invalid tests as float32 provides the same result as float64")
+ if ref64 == ref32:
+ logger.warning(
+ "dot16: invalid tests as float32 provides the same result as float64"
+ )
# Dummy kernel to evaluate
src = """
kernel void test_dot16(global float* data,
@@ -196,11 +211,15 @@ class TestKahan(unittest.TestCase):
"""
- prg = pyopencl.Program(self.ctx, read_cl_file("kahan.cl") + src).build(self.args)
+ prg = pyopencl.Program(self.ctx, read_cl_file("kahan.cl") + src).build(
+ self.args
+ )
ones_d = pyopencl.array.to_device(self.queue, data)
res_d = pyopencl.array.empty(self.queue, 2, numpy.float32)
res_d.fill(0)
- evt = prg.test_dot16(self.queue, (1,), (1,), ones_d.data, numpy.int32(N), res_d.data)
+ evt = prg.test_dot16(
+ self.queue, (1,), (1,), ones_d.data, numpy.int32(N), res_d.data
+ )
evt.wait()
res = res_d.get().sum(dtype="float64")
self.assertEqual(ref64, res, "test_dot16")
@@ -210,9 +229,13 @@ class TestKahan(unittest.TestCase):
data1 = data[1::2]
ref64 = numpy.dot(data0.astype(numpy.float64), data1.astype(numpy.float64))
ref32 = numpy.dot(data0, data1)
- if (ref64 == ref32):
- logger.warning("dot8: invalid tests as float32 provides the same result as float64")
- evt = prg.test_dot8(self.queue, (1,), (1,), ones_d.data, numpy.int32(N), res_d.data)
+ if ref64 == ref32:
+ logger.warning(
+ "dot8: invalid tests as float32 provides the same result as float64"
+ )
+ evt = prg.test_dot8(
+ self.queue, (1,), (1,), ones_d.data, numpy.int32(N), res_d.data
+ )
evt.wait()
res = res_d.get().sum(dtype="float64")
self.assertEqual(ref64, res, "test_dot8")
@@ -222,9 +245,13 @@ class TestKahan(unittest.TestCase):
data1 = data[3::4]
ref64 = numpy.dot(data0.astype(numpy.float64), data1.astype(numpy.float64))
ref32 = numpy.dot(data0, data1)
- if (ref64 == ref32):
- logger.warning("dot4: invalid tests as float32 provides the same result as float64")
- evt = prg.test_dot4(self.queue, (1,), (1,), ones_d.data, numpy.int32(N), res_d.data)
+ if ref64 == ref32:
+ logger.warning(
+ "dot4: invalid tests as float32 provides the same result as float64"
+ )
+ evt = prg.test_dot4(
+ self.queue, (1,), (1,), ones_d.data, numpy.int32(N), res_d.data
+ )
evt.wait()
res = res_d.get().sum(dtype="float64")
self.assertEqual(ref64, res, "test_dot4")
@@ -234,9 +261,13 @@ class TestKahan(unittest.TestCase):
data1 = numpy.array([data[3], data[11], data[15]])
ref64 = numpy.dot(data0.astype(numpy.float64), data1.astype(numpy.float64))
ref32 = numpy.dot(data0, data1)
- if (ref64 == ref32):
- logger.warning("dot3: invalid tests as float32 provides the same result as float64")
- evt = prg.test_dot3(self.queue, (1,), (1,), ones_d.data, numpy.int32(N), res_d.data)
+ if ref64 == ref32:
+ logger.warning(
+ "dot3: invalid tests as float32 provides the same result as float64"
+ )
+ evt = prg.test_dot3(
+ self.queue, (1,), (1,), ones_d.data, numpy.int32(N), res_d.data
+ )
evt.wait()
res = res_d.get().sum(dtype="float64")
self.assertEqual(ref64, res, "test_dot3")
@@ -246,9 +277,13 @@ class TestKahan(unittest.TestCase):
data1 = numpy.array([data[1], data[15]])
ref64 = numpy.dot(data0.astype(numpy.float64), data1.astype(numpy.float64))
ref32 = numpy.dot(data0, data1)
- if (ref64 == ref32):
- logger.warning("dot2: invalid tests as float32 provides the same result as float64")
- evt = prg.test_dot2(self.queue, (1,), (1,), ones_d.data, numpy.int32(N), res_d.data)
+ if ref64 == ref32:
+ logger.warning(
+ "dot2: invalid tests as float32 provides the same result as float64"
+ )
+ evt = prg.test_dot2(
+ self.queue, (1,), (1,), ones_d.data, numpy.int32(N), res_d.data
+ )
evt.wait()
res = res_d.get().sum(dtype="float64")
self.assertEqual(ref64, res, "test_dot2")
diff --git a/src/silx/opencl/test/test_linalg.py b/src/silx/opencl/test/test_linalg.py
index a997a36..0b0a443 100644
--- a/src/silx/opencl/test/test_linalg.py
+++ b/src/silx/opencl/test/test_linalg.py
@@ -1,8 +1,7 @@
#!/usr/bin/env python
-# coding: utf-8
# /*##########################################################################
#
-# Copyright (c) 2016 European Synchrotron Radiation Facility
+# Copyright (c) 2016-2022 European Synchrotron Radiation Facility
#
# Permission is hereby granted, free of charge, to any person obtaining a copy
# of this software and associated documentation files (the "Software"), to deal
@@ -25,23 +24,22 @@
# ###########################################################################*/
"""Test of the linalg module"""
-from __future__ import division, print_function
-
__authors__ = ["Pierre paleo"]
__license__ = "MIT"
__copyright__ = "2013-2017 European Synchrotron Radiation Facility, Grenoble, France"
__date__ = "01/08/2019"
-import time
import logging
import numpy as np
import unittest
+
try:
import mako
except ImportError:
mako = None
from ..common import ocl
+
if ocl:
import pyopencl as cl
import pyopencl.array as parray
@@ -50,7 +48,8 @@ from silx.test.utils import utilstest
logger = logging.getLogger(__name__)
try:
- from scipy.ndimage.filters import laplace
+ from scipy.ndimage import laplace
+
_has_scipy = True
except ImportError:
_has_scipy = False
@@ -58,13 +57,18 @@ except ImportError:
# TODO move this function in math or image ?
def gradient(img):
- '''
+ """
Compute the gradient of an image as a numpy array
Code from https://github.com/emmanuelle/tomo-tv/
- '''
- shape = [img.ndim, ] + list(img.shape)
+ """
+ shape = [
+ img.ndim,
+ ] + list(img.shape)
gradient = np.zeros(shape, dtype=img.dtype)
- slice_all = [0, slice(None, -1),]
+ slice_all = [
+ 0,
+ slice(None, -1),
+ ]
for d in range(img.ndim):
gradient[tuple(slice_all)] = np.diff(img, axis=d)
slice_all[0] = d + 1
@@ -74,10 +78,10 @@ def gradient(img):
# TODO move this function in math or image ?
def divergence(grad):
- '''
+ """
Compute the divergence of a gradient
Code from https://github.com/emmanuelle/tomo-tv/
- '''
+ """
res = np.zeros(grad.shape[1:])
for d in range(grad.shape[0]):
this_grad = np.rollaxis(grad[d], d)
@@ -90,7 +94,6 @@ def divergence(grad):
@unittest.skipUnless(ocl and mako, "PyOpenCl is missing")
class TestLinAlg(unittest.TestCase):
-
def setUp(self):
if ocl is None:
return
@@ -109,7 +112,9 @@ class TestLinAlg(unittest.TestCase):
self.div_ref = divergence(self.grad_ref)
self.image2 = np.zeros_like(self.image)
# Device images
- self.gradient_parray = parray.empty(self.la.queue, self.image.shape, np.complex64)
+ self.gradient_parray = parray.empty(
+ self.la.queue, self.image.shape, np.complex64
+ )
self.gradient_parray.fill(0)
# we should be using cl.Buffer(self.la.ctx, cl.mem_flags.READ_WRITE, size=self.image.nbytes*2),
# but platforms not suporting openCL 1.2 have a problem with enqueue_fill_buffer,
@@ -156,46 +161,78 @@ class TestLinAlg(unittest.TestCase):
arrays = {
"numpy.ndarray": self.image,
"buffer": self.image_buffer,
- "parray": self.image_parray
+ "parray": self.image_parray,
}
for desc, image in arrays.items():
# Test with dst on host (numpy.ndarray)
res = self.la.gradient(image, return_to_host=True)
- self.compare(res, self.grad_ref, 1e-6, str("gradient[src=%s, dst=numpy.ndarray]" % desc))
+ self.compare(
+ res,
+ self.grad_ref,
+ 1e-6,
+ str("gradient[src=%s, dst=numpy.ndarray]" % desc),
+ )
# Test with dst on device (pyopencl.Buffer)
self.la.gradient(image, dst=self.gradient_buffer)
cl.enqueue_copy(self.la.queue, self.grad, self.gradient_buffer)
self.grad2[0] = self.grad.real
self.grad2[1] = self.grad.imag
- self.compare(self.grad2, self.grad_ref, 1e-6, str("gradient[src=%s, dst=buffer]" % desc))
+ self.compare(
+ self.grad2,
+ self.grad_ref,
+ 1e-6,
+ str("gradient[src=%s, dst=buffer]" % desc),
+ )
# Test with dst on device (pyopencl.Array)
self.la.gradient(image, dst=self.gradient_parray)
self.grad = self.gradient_parray.get()
self.grad2[0] = self.grad.real
self.grad2[1] = self.grad.imag
- self.compare(self.grad2, self.grad_ref, 1e-6, str("gradient[src=%s, dst=parray]" % desc))
+ self.compare(
+ self.grad2,
+ self.grad_ref,
+ 1e-6,
+ str("gradient[src=%s, dst=parray]" % desc),
+ )
@unittest.skipUnless(ocl and mako, "pyopencl is missing")
def test_divergence(self):
arrays = {
"numpy.ndarray": self.grad_ref,
"buffer": self.grad_ref_buffer,
- "parray": self.grad_ref_parray
+ "parray": self.grad_ref_parray,
}
for desc, grad in arrays.items():
# Test with dst on host (numpy.ndarray)
res = self.la.divergence(grad, return_to_host=True)
- self.compare(res, self.div_ref, 1e-6, str("divergence[src=%s, dst=numpy.ndarray]" % desc))
+ self.compare(
+ res,
+ self.div_ref,
+ 1e-6,
+ str("divergence[src=%s, dst=numpy.ndarray]" % desc),
+ )
# Test with dst on device (pyopencl.Buffer)
self.la.divergence(grad, dst=self.image_buffer)
cl.enqueue_copy(self.la.queue, self.image2, self.image_buffer)
- self.compare(self.image2, self.div_ref, 1e-6, str("divergence[src=%s, dst=buffer]" % desc))
+ self.compare(
+ self.image2,
+ self.div_ref,
+ 1e-6,
+ str("divergence[src=%s, dst=buffer]" % desc),
+ )
# Test with dst on device (pyopencl.Array)
self.la.divergence(grad, dst=self.image_parray)
self.image2 = self.image_parray.get()
- self.compare(self.image2, self.div_ref, 1e-6, str("divergence[src=%s, dst=parray]" % desc))
-
- @unittest.skipUnless(ocl and mako and _has_scipy, "pyopencl and/or scipy is missing")
+ self.compare(
+ self.image2,
+ self.div_ref,
+ 1e-6,
+ str("divergence[src=%s, dst=parray]" % desc),
+ )
+
+ @unittest.skipUnless(
+ ocl and mako and _has_scipy, "pyopencl and/or scipy is missing"
+ )
def test_laplacian(self):
laplacian_ref = laplace(self.image)
# Laplacian = div(grad)
diff --git a/src/silx/opencl/test/test_medfilt.py b/src/silx/opencl/test/test_medfilt.py
index 339e0f2..15cd749 100644
--- a/src/silx/opencl/test/test_medfilt.py
+++ b/src/silx/opencl/test/test_medfilt.py
@@ -1,5 +1,4 @@
#!/usr/bin/env python
-# -*- coding: utf-8 -*-
#
# Project: Median filter of images + OpenCL
# https://github.com/silx-kit/silx
@@ -29,13 +28,11 @@
Simple test of the median filter
"""
-from __future__ import division, print_function
-
__authors__ = ["Jérôme Kieffer"]
__contact__ = "jerome.kieffer@esrf.eu"
__license__ = "MIT"
-__copyright__ = "2013-2017 European Synchrotron Radiation Facility, Grenoble, France"
-__date__ = "05/07/2018"
+__copyright__ = "2013-2022 European Synchrotron Radiation Facility, Grenoble, France"
+__date__ = "09/05/2023"
import sys
@@ -43,12 +40,15 @@ import time
import logging
import numpy
import unittest
+
from collections import namedtuple
+
try:
import mako
except ImportError:
mako = None
from ..common import ocl
+
if ocl:
import pyopencl
import pyopencl.array
@@ -61,12 +61,18 @@ Result = namedtuple("Result", ["size", "error", "sp_time", "oc_time"])
try:
from scipy.misc import ascent
except:
- def ascent():
- """Dummy image from random data"""
- return numpy.random.random((512, 512))
+ try:
+ from scipy.datasets import ascent
+ except:
+
+ def ascent():
+ """Dummy image from random data"""
+ return numpy.random.random((512, 512))
+
+
try:
- from scipy.ndimage import filters
- median_filter = filters.median_filter
+ from scipy.ndimage import median_filter
+
HAS_SCIPY = True
except:
HAS_SCIPY = False
@@ -74,7 +80,6 @@ except:
@unittest.skipUnless(ocl and mako, "PyOpenCl is missing")
class TestMedianFilter(unittest.TestCase):
-
def setUp(self):
if ocl is None:
return
@@ -111,8 +116,15 @@ class TestMedianFilter(unittest.TestCase):
if r is None:
logger.info("test_medfilt: size: %s: skipped")
else:
- logger.info("test_medfilt: size: %s error %s, t_ref: %.3fs, t_ocl: %.3fs" % r)
- self.assertEqual(r.error, 0, 'Results are correct')
+ logger.info(
+ "test_medfilt: size: %s error %s, t_ref: %.3fs, t_ocl: %.3fs" % r
+ )
+ if (
+ self.medianfilter.device.platform.name.lower()
+ != "portable computing language"
+ ):
+ # Known broken
+ self.assertEqual(r.error, 0, "Results are correct")
def benchmark(self, limit=36):
"Run some benchmarking"
diff --git a/src/silx/opencl/test/test_projection.py b/src/silx/opencl/test/test_projection.py
index 13db5f4..550a2f6 100644
--- a/src/silx/opencl/test/test_projection.py
+++ b/src/silx/opencl/test/test_projection.py
@@ -1,5 +1,4 @@
#!/usr/bin/env python
-# coding: utf-8
# /*##########################################################################
#
# Copyright (c) 2016 European Synchrotron Radiation Facility
@@ -25,8 +24,6 @@
# ###########################################################################*/
"""Test of the forward projection module"""
-from __future__ import division, print_function
-
__authors__ = ["Pierre paleo"]
__license__ = "MIT"
__copyright__ = "2013-2017 European Synchrotron Radiation Facility, Grenoble, France"
@@ -37,11 +34,13 @@ import time
import logging
import numpy as np
import unittest
+
try:
import mako
except ImportError:
mako = None
from ..common import ocl
+
if ocl:
from .. import projection
from silx.test.utils import utilstest
@@ -51,17 +50,18 @@ logger = logging.getLogger(__name__)
@unittest.skipUnless(ocl and mako, "PyOpenCl is missing")
class TestProj(unittest.TestCase):
-
def setUp(self):
if ocl is None:
return
# ~ if sys.platform.startswith('darwin'):
- # ~ self.skipTest("Projection is not implemented on CPU for OS X yet")
+ # ~ self.skipTest("Projection is not implemented on CPU for OS X yet")
self.getfiles()
n_angles = self.sino.shape[0]
self.proj = projection.Projection(self.phantom.shape, n_angles)
if self.proj.compiletime_workgroup_size < 16 * 16:
- self.skipTest("Current implementation of OpenCL projection is not supported on this platform yet")
+ self.skipTest(
+ "Current implementation of OpenCL projection is not supported on this platform yet"
+ )
def tearDown(self):
self.phantom = None
@@ -111,11 +111,11 @@ class TestProj(unittest.TestCase):
msg = str("Max error = %e" % err)
logger.info(msg)
# Interpolation differs at some lines, giving relative error of 10/50000
- self.assertTrue(err < 20., "Max error is too high")
+ self.assertTrue(err < 20.0, "Max error is too high")
# Test multiple reconstructions
# -----------------------------
res0 = np.copy(res)
for i in range(10):
res = self.proj.projection(self.phantom)
errmax = np.max(np.abs(res - res0))
- self.assertTrue(errmax < 1.e-6, "Max error is too high")
+ self.assertTrue(errmax < 1.0e-6, "Max error is too high")
diff --git a/src/silx/opencl/test/test_sparse.py b/src/silx/opencl/test/test_sparse.py
index 1d26b36..db58220 100644
--- a/src/silx/opencl/test/test_sparse.py
+++ b/src/silx/opencl/test/test_sparse.py
@@ -1,5 +1,4 @@
#!/usr/bin/env python
-# coding: utf-8
# /*##########################################################################
#
# Copyright (c) 2018-2019 European Synchrotron Radiation Facility
@@ -30,6 +29,7 @@ import unittest
import logging
from itertools import product
from ..common import ocl
+
if ocl:
import pyopencl.array as parray
from silx.opencl.sparse import CSR
@@ -40,13 +40,14 @@ except ImportError:
logger = logging.getLogger(__name__)
-
def generate_sparse_random_data(
shape=(1000,),
- data_min=0, data_max=100,
+ data_min=0,
+ data_max=100,
density=0.1,
use_only_integers=True,
- dtype="f"):
+ dtype="f",
+):
"""
Generate random sparse data where.
@@ -76,7 +77,6 @@ def generate_sparse_random_data(
return (d * mask).astype(dtype)
-
@unittest.skipUnless(ocl and sp, "PyOpenCl/scipy is missing")
class TestCSR(unittest.TestCase):
"""Test CSR format"""
@@ -88,20 +88,19 @@ class TestCSR(unittest.TestCase):
dtypes = [np.float32, np.int32, np.uint16]
self._test_configs = list(product(input_on_device, output_on_device, dtypes))
-
def compute_ref_sparsification(self, array):
ref_sparse = sp.csr_matrix(array)
return ref_sparse
-
def test_sparsification(self):
for input_on_device, output_on_device, dtype in self._test_configs:
self._test_sparsification(input_on_device, output_on_device, dtype)
-
def _test_sparsification(self, input_on_device, output_on_device, dtype):
current_config = "input on device: %s, output on device: %s, dtype: %s" % (
- str(input_on_device), str(output_on_device), str(dtype)
+ str(input_on_device),
+ str(output_on_device),
+ str(dtype),
)
logger.debug("CSR: %s" % current_config)
# Generate data and reference CSR
@@ -133,29 +132,27 @@ class TestCSR(unittest.TestCase):
nnz = ref_sparse.nnz
self.assertTrue(
np.allclose(data[:nnz], ref_sparse.data),
- "something wrong with sparsified data (%s)"
- % current_config
+ "something wrong with sparsified data (%s)" % current_config,
)
self.assertTrue(
np.allclose(indices[:nnz], ref_sparse.indices),
- "something wrong with sparsified indices (%s)"
- % current_config
+ "something wrong with sparsified indices (%s)" % current_config,
)
self.assertTrue(
np.allclose(indptr, ref_sparse.indptr),
"something wrong with sparsified indices pointers (indptr) (%s)"
- % current_config
+ % current_config,
)
-
def test_desparsification(self):
for input_on_device, output_on_device, dtype in self._test_configs:
self._test_desparsification(input_on_device, output_on_device, dtype)
-
def _test_desparsification(self, input_on_device, output_on_device, dtype):
current_config = "input on device: %s, output on device: %s, dtype: %s" % (
- str(input_on_device), str(output_on_device), str(dtype)
+ str(input_on_device),
+ str(output_on_device),
+ str(dtype),
)
logger.debug("CSR: %s" % current_config)
# Generate data and reference CSR
@@ -183,6 +180,5 @@ class TestCSR(unittest.TestCase):
# Compare
self.assertTrue(
np.allclose(arr.reshape(array.shape), array),
- "something wrong with densified data (%s)"
- % current_config
+ "something wrong with densified data (%s)" % current_config,
)
diff --git a/src/silx/opencl/test/test_stats.py b/src/silx/opencl/test/test_stats.py
index 859271d..7637211 100644
--- a/src/silx/opencl/test/test_stats.py
+++ b/src/silx/opencl/test/test_stats.py
@@ -1,5 +1,4 @@
#!/usr/bin/env python
-# -*- coding: utf-8 -*-
#
# Project: Sift implementation in Python + OpenCL
# https://github.com/silx-kit/silx
@@ -40,17 +39,18 @@ import numpy
import unittest
from ..common import ocl
+
if ocl:
import pyopencl
import pyopencl.array
from ..statistics import StatResults, Statistics
from ..utils import get_opencl_code
+
logger = logging.getLogger(__name__)
@unittest.skipUnless(ocl, "PyOpenCl is missing")
class TestStatistics(unittest.TestCase):
-
@classmethod
def setUpClass(cls):
cls.size = 1 << 20 # 1 million elements
@@ -58,9 +58,15 @@ class TestStatistics(unittest.TestCase):
fdata = cls.data.astype("float64")
t0 = time.perf_counter()
std = fdata.std()
- cls.ref = StatResults(fdata.min(), fdata.max(), float(fdata.size),
- fdata.sum(), fdata.mean(), std ** 2,
- std)
+ cls.ref = StatResults(
+ fdata.min(),
+ fdata.max(),
+ float(fdata.size),
+ fdata.sum(),
+ fdata.mean(),
+ std**2,
+ std,
+ )
t1 = time.perf_counter()
cls.ref_time = t1 - t0
@@ -71,11 +77,12 @@ class TestStatistics(unittest.TestCase):
@classmethod
def validate(cls, res):
return (
- (res.min == cls.ref.min) and
- (res.max == cls.ref.max) and
- (res.cnt == cls.ref.cnt) and
- abs(res.mean - cls.ref.mean) < 0.01 and
- abs(res.std - cls.ref.std) < 0.1)
+ (res.min == cls.ref.min)
+ and (res.max == cls.ref.max)
+ and (res.cnt == cls.ref.cnt)
+ and abs(res.mean - cls.ref.mean) < 0.01
+ and abs(res.std - cls.ref.std) < 0.1
+ )
def test_measurement(self):
"""
@@ -96,11 +103,26 @@ class TestStatistics(unittest.TestCase):
t0 = time.perf_counter()
res = s(self.data, comp=comp)
t1 = time.perf_counter()
- logger.info("Runtime on %s/%s : %.3fms x%.1f", platform, device, 1000 * (t1 - t0), self.ref_time / (t1 - t0))
+ logger.info(
+ "Runtime on %s/%s : %.3fms x%.1f",
+ platform,
+ device,
+ 1000 * (t1 - t0),
+ self.ref_time / (t1 - t0),
+ )
if failed_init or not self.validate(res):
- logger.error("failed_init %s; Computation modes %s", failed_init, comp)
- logger.error("Failed on platform %s device %s", platform, device)
+ logger.error(
+ "failed_init %s; Computation modes %s",
+ failed_init,
+ comp,
+ )
+ logger.error(
+ "Failed on platform %s device %s", platform, device
+ )
logger.error("Reference results: %s", self.ref)
logger.error("Faulty results: %s", res)
- self.assertTrue(False, f"Stat calculation failed on {platform},{device} in mode {comp}")
+ self.assertTrue(
+ False,
+ f"Stat calculation failed on {platform},{device} in mode {comp}",
+ )
diff --git a/src/silx/opencl/utils.py b/src/silx/opencl/utils.py
index 575e018..c332402 100644
--- a/src/silx/opencl/utils.py
+++ b/src/silx/opencl/utils.py
@@ -1,4 +1,3 @@
-# -*- coding: utf-8 -*-
# /*##########################################################################
# Copyright (C) 2017 European Synchrotron Radiation Facility
#
@@ -26,8 +25,6 @@ Project: Sift implementation in Python + OpenCL
https://github.com/silx-kit/silx
"""
-from __future__ import division
-
__authors__ = ["Jérôme Kieffer", "Pierre Paleo"]
__contact__ = "jerome.kieffer@esrf.eu"
__license__ = "MIT"
@@ -46,9 +43,13 @@ def calc_size(shape, blocksize):
Calculate the optimal size for a kernel according to the workgroup size
"""
if "__len__" in dir(blocksize):
- return tuple((int(i) + int(j) - 1) & ~(int(j) - 1) for i, j in zip(shape, blocksize))
+ return tuple(
+ (int(i) + int(j) - 1) & ~(int(j) - 1) for i, j in zip(shape, blocksize)
+ )
else:
- return tuple((int(i) + int(blocksize) - 1) & ~(int(blocksize) - 1) for i in shape)
+ return tuple(
+ (int(i) + int(blocksize) - 1) & ~(int(blocksize) - 1) for i in shape
+ )
def nextpower(n):
@@ -91,8 +92,7 @@ def get_cl_file(resource):
"""
if not resource.endswith(".cl"):
resource += ".cl"
- return resources._resource_filename(resource,
- default_directory="opencl")
+ return resources._resource_filename(resource, default_directory="opencl")
def read_cl_file(filename):
@@ -121,8 +121,6 @@ def concatenate_cl_kernel(filenames):
return os.linesep.join(read_cl_file(fn) for fn in filenames)
-
-
class ConvolutionInfos(object):
allowed_axes = {
"1D": [None],
@@ -135,10 +133,10 @@ class ConvolutionInfos(object):
(2, 0, 1),
(2, 1, 0),
(1, 0, 2),
- (0, 2, 1)
+ (0, 2, 1),
],
"batched_1D_3D": [(0,), (1,), (2,)],
- "batched_separable_2D_1D_3D": [(0,), (1,), (2,)], # unsupported (?)
+ "batched_separable_2D_1D_3D": [(0,), (1,), (2,)], # unsupported (?)
"2D": [None],
"batched_2D_3D": [(0,), (1,), (2,)],
"separable_3D_2D_3D": [
@@ -205,10 +203,3 @@ class ConvolutionInfos(object):
},
},
}
-
-
-
-
-
-
-