diff options
Diffstat (limited to 'src/silx/opencl')
36 files changed, 2550 insertions, 1462 deletions
diff --git a/src/silx/opencl/__init__.py b/src/silx/opencl/__init__.py index fbd1f88..466ffaf 100644 --- a/src/silx/opencl/__init__.py +++ b/src/silx/opencl/__init__.py @@ -1,5 +1,4 @@ #!/usr/bin/env python -# -*- coding: utf-8 -*- # # Project: S I L X project # https://github.com/silx-kit/silx diff --git a/src/silx/opencl/atomic.py b/src/silx/opencl/atomic.py new file mode 100644 index 0000000..16d3eff --- /dev/null +++ b/src/silx/opencl/atomic.py @@ -0,0 +1,93 @@ +# +# Project: S I L X project +# https://github.com/silx-kit/silx +# +# Copyright (C) 2012-2023 European Synchrotron Radiation Facility, Grenoble, France +# +# Principal author: Jérôme Kieffer (Jerome.Kieffer@ESRF.eu) +# +# Permission is hereby granted, free of charge, to any person obtaining a copy +# of this software and associated documentation files (the "Software"), to deal +# in the Software without restriction, including without limitation the rights +# to use, copy, modify, merge, publish, distribute, sublicense, and/or sell +# copies of the Software, and to permit persons to whom the Software is +# furnished to do so, subject to the following conditions: +# . +# The above copyright notice and this permission notice shall be included in +# all copies or substantial portions of the Software. +# . +# THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR +# IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, +# FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE +# AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER +# LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, +# OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN +# THE SOFTWARE. + +""" +Utilities around atomic operation in OpenCL +""" + +__author__ = "Jerome Kieffer" +__license__ = "MIT" +__date__ = "14/06/2023" +__copyright__ = "2023-2023, ESRF, Grenoble" +__contact__ = "jerome.kieffer@esrf.fr" + +import numpy +import pyopencl +import pyopencl.array as cla + + +def check_atomic32(device): + try: + ctx = pyopencl.Context(devices=[device]) + except: + return False, f"Unable to create context on {device}" + else: + queue = pyopencl.CommandQueue(ctx) + src = """ +kernel void check_atomic32(global int* ary){ +int res = atom_inc(ary); +} +""" + try: + prg = pyopencl.Program(ctx, src).build() + except Exception as err: + return False, f"{type(err)}: {err}" + a = numpy.zeros(1, numpy.int32) + d = cla.to_device(queue, a) + prg.check_atomic32(queue, (1024,), (32,), d.data).wait() + value = d.get()[0] + return value == 1024, f"Got the proper value 1024=={value}" + + +def check_atomic64(device): + try: + ctx = pyopencl.Context(devices=[device]) + except: + return False, f"Unable to create context on {device}" + else: + queue = pyopencl.CommandQueue(ctx) + if ( + device.platform.name == "Portable Computing Language" + and "GPU" in pyopencl.device_type.to_string(device.type).upper() + ): + # this configuration is known to seg-fault + return False, "PoCL + GPU do not support atomic64" + src = """ +#pragma OPENCL EXTENSION cl_khr_fp64: enable +#pragma OPENCL EXTENSION cl_khr_int64_base_atomics : enable +kernel void check_atomic64(global long* ary){ +long res = atom_inc(ary); +} +""" + try: + prg = pyopencl.Program(ctx, src).build() + except Exception as err: + return False, f"{type(err)}: {err}" + a = numpy.zeros(1, numpy.int64) + d = cla.to_device(queue, a) + prg.check_atomic64(queue, (1024,), (32,), d.data).wait() + value = d.get()[0] + return value == 1024, f"Got the proper value 1024=={value}" diff --git a/src/silx/opencl/backprojection.py b/src/silx/opencl/backprojection.py index 65a9836..5af2bc5 100644 --- a/src/silx/opencl/backprojection.py +++ b/src/silx/opencl/backprojection.py @@ -1,8 +1,7 @@ #!/usr/bin/env python -# coding: utf-8 # /*########################################################################## # -# Copyright (c) 2016 European Synchrotron Radiation Facility +# Copyright (c) 2016-2023 European Synchrotron Radiation Facility # # Permission is hereby granted, free of charge, to any person obtaining a copy # of this software and associated documentation files (the "Software"), to deal @@ -25,8 +24,6 @@ # ###########################################################################*/ """Module for (filtered) backprojection on the GPU""" -from __future__ import absolute_import, print_function, with_statement, division - __authors__ = ["A. Mirone, P. Paleo"] __license__ = "MIT" __date__ = "25/01/2019" @@ -37,8 +34,6 @@ import numpy as np from .common import pyopencl from .processing import EventDescription, OpenclProcessing, BufferDescription from .sinofilter import SinoFilter -from .sinofilter import fourier_filter as fourier_filter_ -from ..utils.deprecation import deprecated if pyopencl: mf = pyopencl.mem_flags @@ -64,12 +59,23 @@ def _idivup(a, b): class Backprojection(OpenclProcessing): """A class for performing the backprojection using OpenCL""" + kernel_files = ["backproj.cl", "array_utils.cl"] - def __init__(self, sino_shape, slice_shape=None, axis_position=None, - angles=None, filter_name=None, ctx=None, devicetype="all", - platformid=None, deviceid=None, profile=False, - extra_options=None): + def __init__( + self, + sino_shape, + slice_shape=None, + axis_position=None, + angles=None, + filter_name=None, + ctx=None, + devicetype="all", + platformid=None, + deviceid=None, + profile=False, + extra_options=None, + ): """Constructor of the OpenCL (filtered) backprojection :param sino_shape: shape of the sinogram. The sinogram is in the format @@ -101,19 +107,26 @@ class Backprojection(OpenclProcessing): # assuming no discrete GPU # raise NotImplementedError("Backprojection is not implemented on CPU for OS X yet") - OpenclProcessing.__init__(self, ctx=ctx, devicetype=devicetype, - platformid=platformid, deviceid=deviceid, - profile=profile) + OpenclProcessing.__init__( + self, + ctx=ctx, + devicetype=devicetype, + platformid=platformid, + deviceid=deviceid, + profile=profile, + ) - self._init_geometry(sino_shape, slice_shape, angles, axis_position, - extra_options) + self._init_geometry( + sino_shape, slice_shape, angles, axis_position, extra_options + ) self._allocate_memory() self._compute_angles() self._init_kernels() self._init_filter(filter_name) - def _init_geometry(self, sino_shape, slice_shape, angles, axis_position, - extra_options): + def _init_geometry( + self, sino_shape, slice_shape, angles, axis_position, extra_options + ): """Geometry Initialization :param sino_shape: shape of the sinogram. The sinogram is in the format @@ -137,12 +150,12 @@ class Backprojection(OpenclProcessing): self.slice_shape = slice_shape self.dimrec_shape = ( _idivup(self.slice_shape[0], 32) * 32, - _idivup(self.slice_shape[1], 32) * 32 + _idivup(self.slice_shape[1], 32) * 32, ) if axis_position: self.axis_pos = np.float32(axis_position) else: - self.axis_pos = np.float32((sino_shape[1] - 1.) / 2) + self.axis_pos = np.float32((sino_shape[1] - 1.0) / 2) self.axis_array = None # TODO: add axis correction front-end self._init_extra_options(extra_options) @@ -152,11 +165,11 @@ class Backprojection(OpenclProcessing): :param dict extra_options: Advanced extra options """ self.extra_options = { - "cutoff": 1., + "cutoff": 1.0, "use_numpy_fft": False, # It is axis_pos - (num_bins-1)/2 in PyHST - "gpu_offset_x": 0., #self.axis_pos - (self.num_bins - 1) / 2., - "gpu_offset_y": 0., #self.axis_pos - (self.num_bins - 1) / 2. + "gpu_offset_x": 0.0, # self.axis_pos - (self.num_bins - 1) / 2., + "gpu_offset_y": 0.0, # self.axis_pos - (self.num_bins - 1) / 2. } if extra_options is not None: self.extra_options.update(extra_options) @@ -169,7 +182,9 @@ class Backprojection(OpenclProcessing): # Device memory self.buffers = [ BufferDescription("_d_slice", self.dimrec_shape, np.float32, mf.READ_WRITE), - BufferDescription("d_sino", self.shape, np.float32, mf.READ_WRITE), # before transferring to texture (if available) + BufferDescription( + "d_sino", self.shape, np.float32, mf.READ_WRITE + ), # before transferring to texture (if available) BufferDescription("d_cos", (self.num_projs,), np.float32, mf.READ_ONLY), BufferDescription("d_sin", (self.num_projs,), np.float32, mf.READ_ONLY), BufferDescription("d_axes", (self.num_projs,), np.float32, mf.READ_ONLY), @@ -194,27 +209,29 @@ class Backprojection(OpenclProcessing): if self.axis_array: self.cl_mem["d_axes"][:] = self.axis_array.astype(np.float32)[:] else: - self.cl_mem["d_axes"][:] = np.ones(self.num_projs, dtype="f") * self.axis_pos + self.cl_mem["d_axes"][:] = ( + np.ones(self.num_projs, dtype="f") * self.axis_pos + ) def _init_kernels(self): compile_options = None - if not(self._use_textures): + if not (self._use_textures): compile_options = "-DDONT_USE_TEXTURES" OpenclProcessing.compile_kernels( - self, - self.kernel_files, - compile_options=compile_options + self, self.kernel_files, compile_options=compile_options ) # check that workgroup can actually be (16, 16) - self.compiletime_workgroup_size = self.kernels.max_workgroup_size("backproj_cpu_kernel") + self.compiletime_workgroup_size = self.kernels.max_workgroup_size( + "backproj_cpu_kernel" + ) # Workgroup and ndrange sizes are always the same self.wg = (16, 16) self.ndrange = ( _idivup(int(self.dimrec_shape[1]), 32) * self.wg[0], - _idivup(int(self.dimrec_shape[0]), 32) * self.wg[1] + _idivup(int(self.dimrec_shape[0]), 32) * self.wg[1], ) # Prepare arguments for the kernel call - if not(self._use_textures): + if not (self._use_textures): d_sino_ref = self.d_sino.data else: d_sino_ref = self.d_sino_tex @@ -229,7 +246,7 @@ class Backprojection(OpenclProcessing): self.cl_mem["_d_slice"].data, # d_sino (__read_only image2d_t or float*) d_sino_ref, - # gpu_offset_x (float32) + # gpu_offset_x (float32) np.float32(self.extra_options["gpu_offset_x"]), # gpu_offset_y (float32) np.float32(self.extra_options["gpu_offset_y"]), @@ -240,7 +257,7 @@ class Backprojection(OpenclProcessing): # d_axis (__global float32*) self.cl_mem["d_axes"].data, # shared mem (__local float32*) - self._get_local_mem() + self._get_local_mem(), ) def _allocate_textures(self): @@ -276,7 +293,7 @@ class Backprojection(OpenclProcessing): np.int32(self.dimrec_shape[1]), np.int32((0, 0)), np.int32((0, 0)), - slice_shape_ocl + slice_shape_ocl, ) return self.kernels.cpy2d(self.queue, ndrange, wg, *kernel_args) @@ -284,47 +301,39 @@ class Backprojection(OpenclProcessing): if isinstance(sino, parray.Array): return self._transfer_device_to_texture(sino) sino2 = sino - if not(sino.flags["C_CONTIGUOUS"] and sino.dtype == np.float32): + if not (sino.flags["C_CONTIGUOUS"] and sino.dtype == np.float32): sino2 = np.ascontiguousarray(sino, dtype=np.float32) - if not(self._use_textures): - ev = pyopencl.enqueue_copy( - self.queue, - self.d_sino.data, - sino2 - ) + if not (self._use_textures): + ev = pyopencl.enqueue_copy(self.queue, self.d_sino.data, sino2) what = "transfer filtered sino H->D buffer" ev.wait() else: ev = pyopencl.enqueue_copy( - self.queue, - self.d_sino_tex, - sino2, - origin=(0, 0), - region=self.shape[::-1] - ) + self.queue, + self.d_sino_tex, + sino2, + origin=(0, 0), + region=self.shape[::-1], + ) what = "transfer filtered sino H->D texture" return EventDescription(what, ev) def _transfer_device_to_texture(self, d_sino): - if not(self._use_textures): + if not (self._use_textures): if id(self.d_sino) == id(d_sino): return - ev = pyopencl.enqueue_copy( - self.queue, - self.d_sino.data, - d_sino - ) + ev = pyopencl.enqueue_copy(self.queue, self.d_sino.data, d_sino) what = "transfer filtered sino D->D buffer" ev.wait() else: ev = pyopencl.enqueue_copy( - self.queue, - self.d_sino_tex, - d_sino.data, - offset=0, - origin=(0, 0), - region=self.shape[::-1] - ) + self.queue, + self.d_sino_tex, + d_sino.data, + offset=0, + origin=(0, 0), + region=self.shape[::-1], + ) what = "transfer filtered sino D->D texture" return EventDescription(what, ev) @@ -340,20 +349,17 @@ class Backprojection(OpenclProcessing): with self.sem: events.append(self._transfer_to_texture(sino)) # Call the backprojection kernel - if not(self._use_textures): + if not (self._use_textures): kernel_to_call = self.kernels.backproj_cpu_kernel else: kernel_to_call = self.kernels.backproj_kernel kernel_to_call( - self.queue, - self.ndrange, - self.wg, - *self._backproj_kernel_args + self.queue, self.ndrange, self.wg, *self._backproj_kernel_args ) # Return if output is None: res = self.cl_mem["_d_slice"].get() - res = res[:self.slice_shape[0], :self.slice_shape[1]] + res = res[: self.slice_shape[0], : self.slice_shape[1]] else: res = output self._cpy2d_to_slice(output) @@ -380,18 +386,3 @@ class Backprojection(OpenclProcessing): return res __call__ = filtered_backprojection - - - # ------------------- - # - Compatibility - - # ------------------- - - @deprecated(replacement="Backprojection.sino_filter", since_version="0.10") - def filter_projections(self, sino, rescale=True): - self.sino_filter(sino, output=self.d_sino) - - - -def fourier_filter(sino, filter_=None, fft_size=None): - return fourier_filter_(sino, filter_=filter_, fft_size=fft_size) - diff --git a/src/silx/opencl/codec/bitshuffle_lz4.py b/src/silx/opencl/codec/bitshuffle_lz4.py new file mode 100644 index 0000000..b0992b9 --- /dev/null +++ b/src/silx/opencl/codec/bitshuffle_lz4.py @@ -0,0 +1,214 @@ +#!/usr/bin/env python +# +# Project: Sift implementation in Python + OpenCL +# https://github.com/silx-kit/silx +# +# Copyright (C) 2022-2023 European Synchrotron Radiation Facility, Grenoble, France +# +# Permission is hereby granted, free of charge, to any person +# obtaining a copy of this software and associated documentation +# files (the "Software"), to deal in the Software without +# restriction, including without limitation the rights to use, +# copy, modify, merge, publish, distribute, sublicense, and/or sell +# copies of the Software, and to permit persons to whom the +# Software is furnished to do so, subject to the following +# conditions: +# +# The above copyright notice and this permission notice shall be +# included in all copies or substantial portions of the Software. +# +# THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, +# EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES +# OF MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND +# NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT +# HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, +# WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING +# FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR +# OTHER DEALINGS IN THE SOFTWARE. + +""" +This module provides a class for CBF byte offset compression/decompression. +""" + +__authors__ = ["Jérôme Kieffer"] +__contact__ = "jerome.kieffer@esrf.eu" +__license__ = "MIT" +__copyright__ = "European Synchrotron Radiation Facility, Grenoble, France" +__date__ = "09/11/2022" +__status__ = "production" + + +import os +import struct +import numpy +from ..common import ocl, pyopencl, kernel_workgroup_size +from ..processing import BufferDescription, EventDescription, OpenclProcessing + +import logging + +logger = logging.getLogger(__name__) + + +class BitshuffleLz4(OpenclProcessing): + """Perform the bitshuffle-lz4 decompression on the GPU + See :class:`OpenclProcessing` for optional arguments description. + :param int cmp_size: + Size of the raw stream for decompression. + It can be (slightly) larger than the array. + :param int dec_size: + Size of the decompression output array + (mandatory for decompression) + :param dtype: dtype of decompressed data + """ + + LZ4_BLOCK_SIZE = 8192 + + def __init__( + self, + cmp_size, + dec_size, + dtype, + ctx=None, + devicetype="all", + platformid=None, + deviceid=None, + block_size=None, + profile=False, + ): + """Constructor of the class: + + :param cmp_size: size of the compressed data buffer (in bytes) + :param dec_size: size of the compressed data buffer (in words) + :param dtype: data type of one work in decompressed array + + For the other, see the doc of OpenclProcessing + """ + OpenclProcessing.__init__( + self, + ctx=ctx, + devicetype=devicetype, + platformid=platformid, + deviceid=deviceid, + block_size=block_size, + profile=profile, + ) + if self.block_size is None: + try: + self.block_size = self.ctx.devices[0].preferred_work_group_size_multiple + except: + self.block_size = self.device.max_work_group_size + + self.cmp_size = numpy.uint64(cmp_size) + self.dec_size = numpy.uint64(dec_size) + self.dec_dtype = numpy.dtype(dtype) + self.num_blocks = numpy.uint32( + (self.dec_dtype.itemsize * self.dec_size + self.LZ4_BLOCK_SIZE - 1) + // self.LZ4_BLOCK_SIZE + ) + + buffers = [ + BufferDescription("nb_blocks", 1, numpy.uint32, None), + BufferDescription("block_position", self.num_blocks, numpy.uint64, None), + BufferDescription("cmp", self.cmp_size, numpy.uint8, None), + BufferDescription("dec", self.dec_size, self.dec_dtype, None), + ] + + self.allocate_buffers(buffers, use_array=True) + + self.compile_kernels([os.path.join("codec", "bitshuffle_lz4")]) + self.block_size = min( + self.block_size, + kernel_workgroup_size(self.program, "bslz4_decompress_block"), + ) + + def decompress(self, raw, out=None, wg=None, nbytes=None): + """This function actually performs the decompression by calling the kernels + :param numpy.ndarray raw: The compressed data as a 1D numpy array of char or string + :param pyopencl.array out: pyopencl array in which to place the result. + :param wg: tuneable parameter with the workgroup size. + :param int nbytes: (Optional) Number of bytes occupied by the chunk in raw. + :return: The decompressed image as an pyopencl array. + :rtype: pyopencl.array + """ + + events = [] + with self.sem: + if nbytes is not None: + assert nbytes <= raw.size + len_raw = numpy.uint64(nbytes) + elif isinstance(raw, pyopencl.Buffer): + len_raw = numpy.uint64(raw.size) + else: + len_raw = numpy.uint64(len(raw)) + + if isinstance(raw, pyopencl.array.Array): + cmp_buffer = raw.data + num_blocks = self.num_blocks + elif isinstance(raw, pyopencl.Buffer): + cmp_buffer = raw + num_blocks = self.num_blocks + else: + if len_raw > self.cmp_size: + self.cmp_size = len_raw + logger.info("increase cmp buffer size to %s", self.cmp_size) + self.cl_mem["cmp"] = pyopencl.array.empty( + self.queue, self.cmp_size, dtype=numpy.uint8 + ) + evt = pyopencl.enqueue_copy( + self.queue, self.cl_mem["cmp"].data, raw, is_blocking=False + ) + events.append(EventDescription("copy raw H -> D", evt)) + cmp_buffer = self.cl_mem["cmp"].data + + dest_size = struct.unpack(">Q", raw[:8]) + self_dest_nbyte = self.dec_size * self.dec_dtype.itemsize + if dest_size < self_dest_nbyte: + num_blocks = numpy.uint32( + (dest_size + self.LZ4_BLOCK_SIZE - 1) // self.LZ4_BLOCK_SIZE + ) + elif dest_size > self_dest_nbyte: + num_blocks = numpy.uint32( + (dest_size + self.LZ4_BLOCK_SIZE - 1) // self.LZ4_BLOCK_SIZE + ) + self.cl_mem["dec"] = pyopencl.array.empty( + self.queue, dest_size, self.dec_dtype + ) + self.dec_size = dest_size // self.dec_dtype.itemsize + else: + num_blocks = self.num_blocks + + wg = int(wg or self.block_size) + + evt = self.program.lz4_unblock( + self.queue, + (1,), + (1,), + cmp_buffer, + len_raw, + self.cl_mem["block_position"].data, + num_blocks, + self.cl_mem["nb_blocks"].data, + ) + events.append(EventDescription("LZ4 unblock", evt)) + + if out is None: + out = self.cl_mem["dec"] + else: + assert out.dtype == self.dec_dtype + assert out.size == self.dec_size + + evt = self.program.bslz4_decompress_block( + self.queue, + (self.num_blocks * wg,), + (wg,), + cmp_buffer, + out.data, + self.cl_mem["block_position"].data, + self.cl_mem["nb_blocks"].data, + numpy.uint8(self.dec_dtype.itemsize), + ) + events.append(EventDescription("LZ4 decompress", evt)) + self.profile_multi(events) + return out + + __call__ = decompress diff --git a/src/silx/opencl/codec/byte_offset.py b/src/silx/opencl/codec/byte_offset.py index 9a52427..e3df9b2 100644 --- a/src/silx/opencl/codec/byte_offset.py +++ b/src/silx/opencl/codec/byte_offset.py @@ -1,10 +1,9 @@ #!/usr/bin/env python -# -*- coding: utf-8 -*- # # Project: Sift implementation in Python + OpenCL # https://github.com/silx-kit/silx # -# Copyright (C) 2013-2020 European Synchrotron Radiation Facility, Grenoble, France +# Copyright (C) 2013-2023 European Synchrotron Radiation Facility, Grenoble, France # # Permission is hereby granted, free of charge, to any person # obtaining a copy of this software and associated documentation @@ -31,8 +30,6 @@ This module provides a class for CBF byte offset compression/decompression. """ -from __future__ import division, print_function, with_statement - __authors__ = ["Jérôme Kieffer"] __contact__ = "jerome.kieffer@esrf.eu" __license__ = "MIT" @@ -48,10 +45,12 @@ from ..common import ocl, pyopencl from ..processing import BufferDescription, EventDescription, OpenclProcessing import logging + logger = logging.getLogger(__name__) if pyopencl: import pyopencl.version + if pyopencl.version.VERSION < (2016, 0): from pyopencl.scan import GenericScanKernel, GenericDebugScanKernel else: @@ -64,23 +63,36 @@ else: class ByteOffset(OpenclProcessing): """Perform the byte offset compression/decompression on the GPU - See :class:`OpenclProcessing` for optional arguments description. - - :param int raw_size: - Size of the raw stream for decompression. - It can be (slightly) larger than the array. - :param int dec_size: - Size of the decompression output array - (mandatory for decompression) - """ - - def __init__(self, raw_size=None, dec_size=None, - ctx=None, devicetype="all", - platformid=None, deviceid=None, - block_size=None, profile=False): - OpenclProcessing.__init__(self, ctx=ctx, devicetype=devicetype, - platformid=platformid, deviceid=deviceid, - block_size=block_size, profile=profile) + See :class:`OpenclProcessing` for optional arguments description. + + :param int raw_size: + Size of the raw stream for decompression. + It can be (slightly) larger than the array. + :param int dec_size: + Size of the decompression output array + (mandatory for decompression) + """ + + def __init__( + self, + raw_size=None, + dec_size=None, + ctx=None, + devicetype="all", + platformid=None, + deviceid=None, + block_size=None, + profile=False, + ): + OpenclProcessing.__init__( + self, + ctx=ctx, + devicetype=devicetype, + platformid=platformid, + deviceid=deviceid, + block_size=block_size, + profile=profile, + ) if self.block_size is None: self.block_size = self.device.max_work_group_size wg = self.block_size @@ -97,7 +109,9 @@ class ByteOffset(OpenclProcessing): BufferDescription("raw", self.padded_raw_size, numpy.int8, None), BufferDescription("mask", self.padded_raw_size, numpy.int32, None), BufferDescription("values", self.padded_raw_size, numpy.int32, None), - BufferDescription("exceptions", self.padded_raw_size, numpy.int32, None) + BufferDescription( + "exceptions", self.padded_raw_size, numpy.int32, None + ), ] if dec_size is None: @@ -106,18 +120,17 @@ class ByteOffset(OpenclProcessing): self.dec_size = numpy.int32(dec_size) buffers += [ BufferDescription("data_float", self.dec_size, numpy.float32, None), - BufferDescription("data_int", self.dec_size, numpy.int32, None) + BufferDescription("data_int", self.dec_size, numpy.int32, None), ] self.allocate_buffers(buffers, use_array=True) self.compile_kernels([os.path.join("codec", "byte_offset")]) self.kernels.__setattr__("scan", self._init_double_scan()) - self.kernels.__setattr__("compression_scan", - self._init_compression_scan()) + self.kernels.__setattr__("compression_scan", self._init_compression_scan()) def _init_double_scan(self): - """"generates a double scan on indexes and values in one operation""" + """generates a double scan on indexes and values in one operation""" arguments = "__global int *value", "__global int *index" int2 = pyopencl.tools.get_or_register_dtype("int2") input_expr = "index[i]>0 ? (int2)(0, 0) : (int2)(value[i], 1)" @@ -126,21 +139,25 @@ class ByteOffset(OpenclProcessing): output_statement = "value[i] = item.s0; index[i+1] = item.s1;" if self.block_size > 256: - knl = GenericScanKernel(self.ctx, - dtype=int2, - arguments=arguments, - input_expr=input_expr, - scan_expr=scan_expr, - neutral=neutral, - output_statement=output_statement) + knl = GenericScanKernel( + self.ctx, + dtype=int2, + arguments=arguments, + input_expr=input_expr, + scan_expr=scan_expr, + neutral=neutral, + output_statement=output_statement, + ) else: # MacOS on CPU - knl = GenericDebugScanKernel(self.ctx, - dtype=int2, - arguments=arguments, - input_expr=input_expr, - scan_expr=scan_expr, - neutral=neutral, - output_statement=output_statement) + knl = GenericDebugScanKernel( + self.ctx, + dtype=int2, + arguments=arguments, + input_expr=input_expr, + scan_expr=scan_expr, + neutral=neutral, + output_statement=output_statement, + ) return knl def decode(self, raw, as_float=False, out=None): @@ -153,8 +170,9 @@ class ByteOffset(OpenclProcessing): :return: The decompressed image as an pyopencl array. :rtype: pyopencl.array """ - assert self.dec_size is not None, \ - "dec_size is a mandatory ByteOffset init argument for decompression" + assert ( + self.dec_size is not None + ), "dec_size is a mandatory ByteOffset init argument for decompression" events = [] with self.sem: @@ -165,67 +183,96 @@ class ByteOffset(OpenclProcessing): self.padded_raw_size = (self.raw_size + wg - 1) & ~(wg - 1) logger.info("increase raw buffer size to %s", self.padded_raw_size) buffers = { - "raw": pyopencl.array.empty(self.queue, self.padded_raw_size, dtype=numpy.int8), - "mask": pyopencl.array.empty(self.queue, self.padded_raw_size, dtype=numpy.int32), - "exceptions": pyopencl.array.empty(self.queue, self.padded_raw_size, dtype=numpy.int32), - "values": pyopencl.array.empty(self.queue, self.padded_raw_size, dtype=numpy.int32), - } + "raw": pyopencl.array.empty( + self.queue, self.padded_raw_size, dtype=numpy.int8 + ), + "mask": pyopencl.array.empty( + self.queue, self.padded_raw_size, dtype=numpy.int32 + ), + "exceptions": pyopencl.array.empty( + self.queue, self.padded_raw_size, dtype=numpy.int32 + ), + "values": pyopencl.array.empty( + self.queue, self.padded_raw_size, dtype=numpy.int32 + ), + } self.cl_mem.update(buffers) else: wg = self.block_size - evt = pyopencl.enqueue_copy(self.queue, self.cl_mem["raw"].data, - raw, - is_blocking=False) + evt = pyopencl.enqueue_copy( + self.queue, self.cl_mem["raw"].data, raw, is_blocking=False + ) events.append(EventDescription("copy raw H -> D", evt)) - evt = self.kernels.fill_int_mem(self.queue, (self.padded_raw_size,), (wg,), - self.cl_mem["mask"].data, - numpy.int32(self.padded_raw_size), - numpy.int32(0), - numpy.int32(0)) + evt = self.kernels.fill_int_mem( + self.queue, + (self.padded_raw_size,), + (wg,), + self.cl_mem["mask"].data, + numpy.int32(self.padded_raw_size), + numpy.int32(0), + numpy.int32(0), + ) events.append(EventDescription("memset mask", evt)) - evt = self.kernels.fill_int_mem(self.queue, (1,), (1,), - self.cl_mem["counter"].data, - numpy.int32(1), - numpy.int32(0), - numpy.int32(0)) + evt = self.kernels.fill_int_mem( + self.queue, + (1,), + (1,), + self.cl_mem["counter"].data, + numpy.int32(1), + numpy.int32(0), + numpy.int32(0), + ) events.append(EventDescription("memset counter", evt)) - evt = self.kernels.mark_exceptions(self.queue, (self.padded_raw_size,), (wg,), - self.cl_mem["raw"].data, - len_raw, - numpy.int32(self.raw_size), - self.cl_mem["mask"].data, - self.cl_mem["values"].data, - self.cl_mem["counter"].data, - self.cl_mem["exceptions"].data) + evt = self.kernels.mark_exceptions( + self.queue, + (self.padded_raw_size,), + (wg,), + self.cl_mem["raw"].data, + len_raw, + numpy.int32(self.raw_size), + self.cl_mem["mask"].data, + self.cl_mem["values"].data, + self.cl_mem["counter"].data, + self.cl_mem["exceptions"].data, + ) events.append(EventDescription("mark exceptions", evt)) nb_exceptions = numpy.empty(1, dtype=numpy.int32) - evt = pyopencl.enqueue_copy(self.queue, nb_exceptions, self.cl_mem["counter"].data, - is_blocking=False) + evt = pyopencl.enqueue_copy( + self.queue, + nb_exceptions, + self.cl_mem["counter"].data, + is_blocking=False, + ) events.append(EventDescription("copy counter D -> H", evt)) evt.wait() nbexc = int(nb_exceptions[0]) if nbexc == 0: logger.info("nbexc %i", nbexc) else: - evt = self.kernels.treat_exceptions(self.queue, (nbexc,), (1,), - self.cl_mem["raw"].data, - len_raw, - self.cl_mem["mask"].data, - self.cl_mem["exceptions"].data, - self.cl_mem["values"].data - ) + evt = self.kernels.treat_exceptions( + self.queue, + (nbexc,), + (1,), + self.cl_mem["raw"].data, + len_raw, + self.cl_mem["mask"].data, + self.cl_mem["exceptions"].data, + self.cl_mem["values"].data, + ) events.append(EventDescription("treat_exceptions", evt)) - #self.cl_mem["copy_values"] = self.cl_mem["values"].copy() - #self.cl_mem["copy_mask"] = self.cl_mem["mask"].copy() - evt = self.kernels.scan(self.cl_mem["values"], - self.cl_mem["mask"], - queue=self.queue, - size=int(len_raw), - wait_for=(evt,)) + # self.cl_mem["copy_values"] = self.cl_mem["values"].copy() + # self.cl_mem["copy_mask"] = self.cl_mem["mask"].copy() + evt = self.kernels.scan( + self.cl_mem["values"], + self.cl_mem["mask"], + queue=self.queue, + size=int(len_raw), + wait_for=(evt,), + ) events.append(EventDescription("double scan", evt)) - #evt.wait() + # evt.wait() if out is not None: if out.dtype == numpy.float32: copy_results = self.kernels.copy_result_float @@ -238,15 +285,18 @@ class ByteOffset(OpenclProcessing): else: out = self.cl_mem["data_int"] copy_results = self.kernels.copy_result_int - evt = copy_results(self.queue, (self.padded_raw_size,), (wg,), - self.cl_mem["values"].data, - self.cl_mem["mask"].data, - len_raw, - self.dec_size, - out.data - ) + evt = copy_results( + self.queue, + (self.padded_raw_size,), + (wg,), + self.cl_mem["values"].data, + self.cl_mem["mask"].data, + len_raw, + self.dec_size, + out.data, + ) events.append(EventDescription("copy_results", evt)) - #evt.wait() + # evt.wait() if self.profile: self.events += events return out @@ -294,7 +344,9 @@ class ByteOffset(OpenclProcessing): } } """ - arguments = "__global const int *data, __global char *compressed, __global int *size" + arguments = ( + "__global const int *data, __global char *compressed, __global int *size" + ) input_expr = "compressed_size((i == 0) ? data[0] : (data[i] - data[i - 1]))" scan_expr = "a+b" neutral = "0" @@ -306,23 +358,27 @@ class ByteOffset(OpenclProcessing): """ if self.block_size >= 64: - knl = GenericScanKernel(self.ctx, - dtype=numpy.int32, - preamble=preamble, - arguments=arguments, - input_expr=input_expr, - scan_expr=scan_expr, - neutral=neutral, - output_statement=output_statement) + knl = GenericScanKernel( + self.ctx, + dtype=numpy.int32, + preamble=preamble, + arguments=arguments, + input_expr=input_expr, + scan_expr=scan_expr, + neutral=neutral, + output_statement=output_statement, + ) else: # MacOS on CPU - knl = GenericDebugScanKernel(self.ctx, - dtype=numpy.int32, - preamble=preamble, - arguments=arguments, - input_expr=input_expr, - scan_expr=scan_expr, - neutral=neutral, - output_statement=output_statement) + knl = GenericDebugScanKernel( + self.ctx, + dtype=numpy.int32, + preamble=preamble, + arguments=arguments, + input_expr=input_expr, + scan_expr=scan_expr, + neutral=neutral, + output_statement=output_statement, + ) return knl def encode(self, data, out=None): @@ -351,28 +407,39 @@ class ByteOffset(OpenclProcessing): data = numpy.ascontiguousarray(data, dtype=numpy.int32).ravel() # Make sure data array exists and is large enough - if ("data_input" not in self.cl_mem or - self.cl_mem["data_input"].size < data.size): + if ( + "data_input" not in self.cl_mem + or self.cl_mem["data_input"].size < data.size + ): logger.info("increase data input buffer size to %s", data.size) - self.cl_mem.update({ - "data_input": pyopencl.array.empty(self.queue, - data.size, - dtype=numpy.int32)}) + self.cl_mem.update( + { + "data_input": pyopencl.array.empty( + self.queue, data.size, dtype=numpy.int32 + ) + } + ) d_data = self.cl_mem["data_input"] evt = pyopencl.enqueue_copy( - self.queue, d_data.data, data, is_blocking=False) + self.queue, d_data.data, data, is_blocking=False + ) events.append(EventDescription("copy data H -> D", evt)) # Make sure compressed array exists and is large enough compressed_size = d_data.size * 7 - if ("compressed" not in self.cl_mem or - self.cl_mem["compressed"].size < compressed_size): + if ( + "compressed" not in self.cl_mem + or self.cl_mem["compressed"].size < compressed_size + ): logger.info("increase compressed buffer size to %s", compressed_size) - self.cl_mem.update({ - "compressed": pyopencl.array.empty(self.queue, - compressed_size, - dtype=numpy.int8)}) + self.cl_mem.update( + { + "compressed": pyopencl.array.empty( + self.queue, compressed_size, dtype=numpy.int8 + ) + } + ) d_compressed = self.cl_mem["compressed"] d_size = self.cl_mem["counter"] # Shared with decompression @@ -387,13 +454,15 @@ class ByteOffset(OpenclProcessing): shape=(byte_count,), dtype=numpy.int8, allocator=functools.partial( - d_compressed.base_data.get_sub_region, - d_compressed.offset)) + d_compressed.base_data.get_sub_region, d_compressed.offset + ), + ) elif out.size < byte_count: raise ValueError( "Provided output buffer is not large enough: " - "requires %d bytes, got %d" % (byte_count, out.size)) + "requires %d bytes, got %d" % (byte_count, out.size) + ) else: # out.size >= byte_count # Create an array with a sub-region of out and this class queue @@ -401,13 +470,15 @@ class ByteOffset(OpenclProcessing): self.queue, shape=(byte_count,), dtype=numpy.int8, - allocator=functools.partial(out.base_data.get_sub_region, - out.offset)) + allocator=functools.partial( + out.base_data.get_sub_region, out.offset + ), + ) - evt = pyopencl.enqueue_copy(self.queue, out.data, d_compressed.data, - byte_count=byte_count) - events.append( - EventDescription("copy D -> D: internal -> out", evt)) + evt = pyopencl.enqueue_copy( + self.queue, out.data, d_compressed.data, byte_count=byte_count + ) + events.append(EventDescription("copy D -> D: internal -> out", evt)) if self.profile: self.events += events diff --git a/src/silx/opencl/codec/setup.py b/src/silx/opencl/codec/setup.py deleted file mode 100644 index 4a5c1e5..0000000 --- a/src/silx/opencl/codec/setup.py +++ /dev/null @@ -1,43 +0,0 @@ -# coding: utf-8 -# -# Copyright (C) 2016-2017 European Synchrotron Radiation Facility -# -# Permission is hereby granted, free of charge, to any person obtaining a copy -# of this software and associated documentation files (the "Software"), to deal -# in the Software without restriction, including without limitation the rights -# to use, copy, modify, merge, publish, distribute, sublicense, and/or sell -# copies of the Software, and to permit persons to whom the Software is -# furnished to do so, subject to the following conditions: -# -# The above copyright notice and this permission notice shall be included in -# all copies or substantial portions of the Software. -# -# THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR -# IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, -# FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE -# AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER -# LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, -# OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN -# THE SOFTWARE. -# - -from __future__ import division - -__contact__ = "jerome.kieffer@esrf.eu" -__license__ = "MIT" -__copyright__ = "European Synchrotron Radiation Facility, Grenoble, France" -__authors__ = ["J. Kieffer"] -__date__ = "13/10/2017" - -from numpy.distutils.misc_util import Configuration - - -def configuration(parent_package='', top_path=None): - config = Configuration('codec', parent_package, top_path) - config.add_subpackage('test') - return config - - -if __name__ == "__main__": - from numpy.distutils.core import setup - setup(configuration=configuration) diff --git a/src/silx/opencl/codec/test/__init__.py b/src/silx/opencl/codec/test/__init__.py index 325c2c7..45065f8 100644 --- a/src/silx/opencl/codec/test/__init__.py +++ b/src/silx/opencl/codec/test/__init__.py @@ -1,4 +1,3 @@ -# -*- coding: utf-8 -*- # # Project: silx # https://github.com/silx-kit/silx diff --git a/src/silx/opencl/codec/test/test_bitshuffle_lz4.py b/src/silx/opencl/codec/test/test_bitshuffle_lz4.py new file mode 100644 index 0000000..6c5891e --- /dev/null +++ b/src/silx/opencl/codec/test/test_bitshuffle_lz4.py @@ -0,0 +1,126 @@ +#!/usr/bin/env python +# +# Project: Bitshuffle-LZ4 decompression in OpenCL +# https://github.com/silx-kit/silx +# +# Copyright (C) 2022-2023 European Synchrotron Radiation Facility, +# Grenoble, France +# Permission is hereby granted, free of charge, to any person +# obtaining a copy of this software and associated documentation +# files (the "Software"), to deal in the Software without +# restriction, including without limitation the rights to use, +# copy, modify, merge, publish, distribute, sublicense, and/or sell +# copies of the Software, and to permit persons to whom the +# Software is furnished to do so, subject to the following +# conditions: +# +# The above copyright notice and this permission notice shall be +# included in all copies or substantial portions of the Software. +# +# THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, +# EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES +# OF MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND +# NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT +# HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, +# WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING +# FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR +# OTHER DEALINGS IN THE SOFTWARE. + +""" +Test suite for byte-offset decompression +""" + +__authors__ = ["Jérôme Kieffer"] +__contact__ = "jerome.kieffer@esrf.eu" +__license__ = "MIT" +__copyright__ = "2022 European Synchrotron Radiation Facility, Grenoble, France" +__date__ = "07/11/2022" + +import struct +import numpy +import pytest + +try: + import bitshuffle +except ImportError: + bitshuffle = None +from silx.opencl.common import ocl, pyopencl +from silx.opencl.codec.bitshuffle_lz4 import BitshuffleLz4 + + +TESTCASES = ( # dtype, shape + ("uint64", (103, 503)), + ("int64", (101, 509)), + ("uint32", (229, 659)), + ("int32", (233, 653)), + ("uint16", (743, 647)), + ("int16", (751, 643)), + ("uint8", (157, 1373)), + ("int8", (163, 1367)), +) + + +@pytest.mark.skipif( + not ocl or not pyopencl or bitshuffle is None, + reason="PyOpenCl or bitshuffle is missing", +) +class TestBitshuffleLz4: + """Test pyopencl bishuffle+LZ4 decompression""" + + @staticmethod + def _create_test_data(shape, lam=100, dtype="uint32"): + """Create test (image, compressed stream) pair. + + :param shape: Shape of test image + :param lam: Expectation of interval argument for numpy.random.poisson + :return: (reference image array, compressed stream) + """ + ref = numpy.random.poisson(lam, size=shape).astype(dtype) + raw = ( + struct.pack(">Q", ref.nbytes) + + b"\x00" * 4 + + bitshuffle.compress_lz4(ref).tobytes() + ) + return ref, raw + + @pytest.mark.parametrize("dtype,shape", TESTCASES) + def test_decompress(self, dtype, shape): + """ + Tests the byte offset decompression on GPU with various configuration + """ + ref, raw = self._create_test_data(shape=shape, dtype=dtype) + bs = BitshuffleLz4(len(raw), numpy.prod(shape), dtype=dtype) + res = bs.decompress(raw).get() + assert numpy.array_equal(res, ref.ravel()), "Checks decompression works" + + @pytest.mark.parametrize("dtype,shape", TESTCASES) + def test_decompress_from_buffer(self, dtype, shape): + """Test reading compressed data from pyopencl Buffer""" + ref, raw = self._create_test_data(shape=shape, dtype=dtype) + + bs = BitshuffleLz4(0, numpy.prod(shape), dtype=dtype) + + buffer = pyopencl.Buffer( + bs.ctx, + flags=pyopencl.mem_flags.COPY_HOST_PTR | pyopencl.mem_flags.READ_ONLY, + hostbuf=raw, + ) + + res = bs.decompress(buffer).get() + assert numpy.array_equal(res, ref.ravel()), "Checks decompression works" + + @pytest.mark.parametrize("dtype,shape", TESTCASES) + def test_decompress_from_array(self, dtype, shape): + """Test reading compressed data from pyopencl Array""" + ref, raw = self._create_test_data(shape=shape, dtype=dtype) + + bs = BitshuffleLz4(0, numpy.prod(shape), dtype=dtype) + + array = pyopencl.array.to_device( + bs.queue, + numpy.frombuffer(raw, dtype=numpy.uint8), + array_queue=bs.queue, + ) + + res = bs.decompress(array).get() + assert numpy.array_equal(res, ref.ravel()), "Checks decompression works" diff --git a/src/silx/opencl/codec/test/test_byte_offset.py b/src/silx/opencl/codec/test/test_byte_offset.py index 4b2d5a3..0e58076 100644 --- a/src/silx/opencl/codec/test/test_byte_offset.py +++ b/src/silx/opencl/codec/test/test_byte_offset.py @@ -1,5 +1,4 @@ #!/usr/bin/env python -# -*- coding: utf-8 -*- # # Project: Byte-offset decompression in OpenCL # https://github.com/silx-kit/silx @@ -31,8 +30,6 @@ Test suite for byte-offset decompression """ -from __future__ import division, print_function - __authors__ = ["Jérôme Kieffer"] __contact__ = "jerome.kieffer@esrf.eu" __license__ = "MIT" @@ -47,13 +44,12 @@ from silx.opencl.common import ocl, pyopencl from silx.opencl.codec import byte_offset import fabio import unittest + logger = logging.getLogger(__name__) -@unittest.skipUnless(ocl and pyopencl, - "PyOpenCl is missing") +@unittest.skipUnless(ocl and pyopencl, "PyOpenCl is missing") class TestByteOffset(unittest.TestCase): - @staticmethod def _create_test_data(shape, nexcept, lam=200): """Create test (image, compressed stream) pair. @@ -87,7 +83,9 @@ class TestByteOffset(unittest.TestCase): except (RuntimeError, pyopencl.RuntimeError) as err: logger.warning(err) if sys.platform == "darwin": - raise unittest.SkipTest("Byte-offset decompression is known to be buggy on MacOS-CPU") + raise unittest.SkipTest( + "Byte-offset decompression is known to be buggy on MacOS-CPU" + ) else: raise err print(bo.block_size) @@ -100,9 +98,11 @@ class TestByteOffset(unittest.TestCase): delta_cy = abs(ref.ravel() - res_cy).max() delta_cl = abs(ref.ravel() - res_cl.get()).max() - logger.debug("Global execution time: fabio %.3fms, OpenCL: %.3fms.", - 1000.0 * (t1 - t0), - 1000.0 * (t2 - t1)) + logger.debug( + "Global execution time: fabio %.3fms, OpenCL: %.3fms.", + 1000.0 * (t1 - t0), + 1000.0 * (t2 - t1), + ) bo.log_profile() # print(ref) # print(res_cl.get()) @@ -111,8 +111,8 @@ class TestByteOffset(unittest.TestCase): def test_many_decompress(self, ntest=10): """ - tests the byte offset decompression on GPU, many images to ensure there - is not leaking in memory + tests the byte offset decompression on GPU, many images to ensure there + is not leaking in memory """ shape = (991, 997) size = numpy.prod(shape) @@ -123,7 +123,9 @@ class TestByteOffset(unittest.TestCase): except (RuntimeError, pyopencl.RuntimeError) as err: logger.warning(err) if sys.platform == "darwin": - raise unittest.SkipTest("Byte-offset decompression is known to be buggy on MacOS-CPU") + raise unittest.SkipTest( + "Byte-offset decompression is known to be buggy on MacOS-CPU" + ) else: raise err t0 = time.time() @@ -135,9 +137,11 @@ class TestByteOffset(unittest.TestCase): delta_cl = abs(ref.ravel() - res_cl.get()).max() self.assertEqual(delta_cy, 0, "Checks fabio works") self.assertEqual(delta_cl, 0, "Checks opencl works") - logger.debug("Global execution time: fabio %.3fms, OpenCL: %.3fms.", - 1000.0 * (t1 - t0), - 1000.0 * (t2 - t1)) + logger.debug( + "Global execution time: fabio %.3fms, OpenCL: %.3fms.", + 1000.0 * (t1 - t0), + 1000.0 * (t2 - t1), + ) for i in range(ntest): ref, raw = self._create_test_data(shape=shape, nexcept=2729, lam=200) @@ -152,9 +156,11 @@ class TestByteOffset(unittest.TestCase): self.assertEqual(delta_cy, 0, "Checks fabio works #%i" % i) self.assertEqual(delta_cl, 0, "Checks opencl works #%i" % i) - logger.debug("Global execution time: fabio %.3fms, OpenCL: %.3fms.", - 1000.0 * (t1 - t0), - 1000.0 * (t2 - t1)) + logger.debug( + "Global execution time: fabio %.3fms, OpenCL: %.3fms.", + 1000.0 * (t1 - t0), + 1000.0 * (t2 - t1), + ) bo.log_profile(stats=True) def test_encode(self): @@ -174,8 +180,7 @@ class TestByteOffset(unittest.TestCase): compressed_stream = compressed_array.get().tobytes() self.assertEqual(raw, compressed_stream) - logger.debug("Global execution time: OpenCL: %.3fms.", - 1000.0 * (t1 - t0)) + logger.debug("Global execution time: OpenCL: %.3fms.", 1000.0 * (t1 - t0)) bo.log_profile() def test_encode_to_array(self): @@ -225,14 +230,15 @@ class TestByteOffset(unittest.TestCase): self.assertEqual(raw, compressed_stream) - logger.debug("Global execution time: fabio %.3fms, OpenCL: %.3fms.", - 1000.0 * (t1 - t0), - 1000.0 * (t2 - t1)) + logger.debug( + "Global execution time: fabio %.3fms, OpenCL: %.3fms.", + 1000.0 * (t1 - t0), + 1000.0 * (t2 - t1), + ) bo.log_profile() def test_encode_to_bytes_from_array(self): - """Test byte offset compression to bytes from a pyopencl array. - """ + """Test byte offset compression to bytes from a pyopencl array.""" ref, raw = self._create_test_data(shape=(2713, 2719), nexcept=2729) try: @@ -241,8 +247,7 @@ class TestByteOffset(unittest.TestCase): logger.warning(err) raise err - d_ref = pyopencl.array.to_device( - bo.queue, ref.astype(numpy.int32).ravel()) + d_ref = pyopencl.array.to_device(bo.queue, ref.astype(numpy.int32).ravel()) t0 = time.time() res_fabio = fabio.compression.compByteOffset(ref) @@ -252,9 +257,11 @@ class TestByteOffset(unittest.TestCase): self.assertEqual(raw, compressed_stream) - logger.debug("Global execution time: fabio %.3fms, OpenCL: %.3fms.", - 1000.0 * (t1 - t0), - 1000.0 * (t2 - t1)) + logger.debug( + "Global execution time: fabio %.3fms, OpenCL: %.3fms.", + 1000.0 * (t1 - t0), + 1000.0 * (t2 - t1), + ) bo.log_profile() def test_many_encode(self, ntest=10): @@ -278,9 +285,11 @@ class TestByteOffset(unittest.TestCase): bo_durations.append(1000.0 * (t2 - t1)) self.assertEqual(raw, compressed_stream) - logger.debug("Global execution time: fabio %.3fms, OpenCL: %.3fms.", - 1000.0 * (t1 - t0), - 1000.0 * (t2 - t1)) + logger.debug( + "Global execution time: fabio %.3fms, OpenCL: %.3fms.", + 1000.0 * (t1 - t0), + 1000.0 * (t2 - t1), + ) for i in range(ntest): ref, raw = self._create_test_data(shape=shape, nexcept=2729, lam=200) @@ -293,11 +302,15 @@ class TestByteOffset(unittest.TestCase): bo_durations.append(1000.0 * (t2 - t1)) self.assertEqual(raw, compressed_stream) - logger.debug("Global execution time: fabio %.3fms, OpenCL: %.3fms.", - 1000.0 * (t1 - t0), - 1000.0 * (t2 - t1)) - - logger.debug("OpenCL execution time: Mean: %fms, Min: %fms, Max: %fms", - numpy.mean(bo_durations), - numpy.min(bo_durations), - numpy.max(bo_durations)) + logger.debug( + "Global execution time: fabio %.3fms, OpenCL: %.3fms.", + 1000.0 * (t1 - t0), + 1000.0 * (t2 - t1), + ) + + logger.debug( + "OpenCL execution time: Mean: %fms, Min: %fms, Max: %fms", + numpy.mean(bo_durations), + numpy.min(bo_durations), + numpy.max(bo_durations), + ) diff --git a/src/silx/opencl/common.py b/src/silx/opencl/common.py index 888b1da..30c9ef7 100644 --- a/src/silx/opencl/common.py +++ b/src/silx/opencl/common.py @@ -1,10 +1,9 @@ #!/usr/bin/env python -# -*- coding: utf-8 -*- # # Project: S I L X project # https://github.com/silx-kit/silx # -# Copyright (C) 2012-2021 European Synchrotron Radiation Facility, Grenoble, France +# Copyright (C) 2012-2023 European Synchrotron Radiation Facility, Grenoble, France # # Principal author: Jérôme Kieffer (Jerome.Kieffer@ESRF.eu) # @@ -34,10 +33,17 @@ __author__ = "Jerome Kieffer" __contact__ = "Jerome.Kieffer@ESRF.eu" __license__ = "MIT" __copyright__ = "2012-2017 European Synchrotron Radiation Facility, Grenoble, France" -__date__ = "29/09/2021" +__date__ = "09/09/2023" __status__ = "stable" -__all__ = ["ocl", "pyopencl", "mf", "release_cl_buffers", "allocate_cl_buffers", - "measure_workgroup_size", "kernel_workgroup_size"] +__all__ = [ + "ocl", + "pyopencl", + "mf", + "release_cl_buffers", + "allocate_cl_buffers", + "measure_workgroup_size", + "kernel_workgroup_size", +] import os import logging @@ -49,60 +55,69 @@ from .utils import get_opencl_code logger = logging.getLogger(__name__) if os.environ.get("SILX_OPENCL") in ["0", "False"]: - logger.info("Use of OpenCL has been disabled from environment variable: SILX_OPENCL=0") + logger.info( + "Use of OpenCL has been disabled from environment variable: SILX_OPENCL=0" + ) pyopencl = None else: try: import pyopencl except ImportError: - logger.warning("Unable to import pyOpenCl. Please install it from: https://pypi.org/project/pyopencl") + logger.warning( + "Unable to import pyOpenCl. Please install it from: https://pypi.org/project/pyopencl" + ) pyopencl = None else: try: pyopencl.get_platforms() except pyopencl.LogicError: - logger.warning("The module pyOpenCL has been imported but can't be used here") + logger.warning( + "The module pyOpenCL has been imported but can't be used here" + ) pyopencl = None - else: - import pyopencl.array as array - mf = pyopencl.mem_flags -if pyopencl is None: +if pyopencl is not None: + import pyopencl.array as array + mf = pyopencl.mem_flags + from .atomic import check_atomic32, check_atomic64 +else: # Define default mem flags class mf(object): WRITE_ONLY = 1 READ_ONLY = 1 READ_WRITE = 1 -else: - mf = pyopencl.mem_flags -FLOP_PER_CORE = {"GPU": 64, # GPU, Fermi at least perform 64 flops per cycle/multicore, G80 were at 24 or 48 ... - "CPU": 4, # CPU, at least intel's have 4 operation per cycle - "ACC": 8} # ACC: the Xeon-phi (MIC) appears to be able to process 8 Flops per hyperthreaded-core + +FLOP_PER_CORE = { + "GPU": 64, # GPU, Fermi at least perform 64 flops per cycle/multicore, G80 were at 24 or 48 ... + "CPU": 4, # CPU, at least intel's have 4 operation per cycle + "ACC": 8, +} # ACC: the Xeon-phi (MIC) appears to be able to process 8 Flops per hyperthreaded-core # Sources : https://en.wikipedia.org/wiki/CUDA -NVIDIA_FLOP_PER_CORE = {(1, 0): 24, # Guessed ! - (1, 1): 24, # Measured on G98 [Quadro NVS 295] - (1, 2): 24, # Guessed ! - (1, 3): 24, # measured on a GT285 (GT200) - (2, 0): 64, # Measured on a 580 (GF110) - (2, 1): 96, # Measured on Quadro2000 GF106GL - (3, 0): 384, # Guessed! - (3, 5): 384, # Measured on K20 - (3, 7): 384, # K80: Guessed! - (5, 0): 256, # Maxwell 4 warps/SM 2 flops/ CU - (5, 2): 256, # Titan-X - (5, 3): 256, # TX1 - (6, 0): 128, # GP100 - (6, 1): 128, # GP104 - (6, 2): 128, # ? - (7, 0): 128, # Volta # measured on Telsa V100 - (7, 2): 128, # Volta ? - (7, 5): 128, # Turing # measured on RTX 6000 - (8, 0): 128, # Ampere # measured on Tesla A100 - (8, 6): 256, # Ampere # measured on RTX A5000 - } +NVIDIA_FLOP_PER_CORE = { + (1, 0): 24, # Guessed ! + (1, 1): 24, # Measured on G98 [Quadro NVS 295] + (1, 2): 24, # Guessed ! + (1, 3): 24, # measured on a GT285 (GT200) + (2, 0): 64, # Measured on a 580 (GF110) + (2, 1): 96, # Measured on Quadro2000 GF106GL + (3, 0): 384, # Guessed! + (3, 5): 384, # Measured on K20 + (3, 7): 384, # K80: Guessed! + (5, 0): 256, # Maxwell 4 warps/SM 2 flops/ CU + (5, 2): 256, # Titan-X + (5, 3): 256, # TX1 + (6, 0): 128, # GP100 + (6, 1): 128, # GP104 + (6, 2): 128, # ? + (7, 0): 128, # Volta # measured on Telsa V100 + (7, 2): 128, # Volta ? + (7, 5): 128, # Turing # measured on RTX 6000 + (8, 0): 128, # Ampere # measured on Tesla A100 + (8, 6): 256, # Ampere # measured on RTX A5000 +} AMD_FLOP_PER_CORE = 160 # Measured on a M7820 10 core, 700MHz 1120GFlops @@ -112,9 +127,24 @@ class Device(object): Simple class that contains the structure of an OpenCL device """ - def __init__(self, name="None", dtype=None, version=None, driver_version=None, - extensions="", memory=None, available=None, - cores=None, frequency=None, flop_core=None, idx=0, workgroup=1): + def __init__( + self, + name="None", + dtype=None, + version=None, + driver_version=None, + extensions="", + memory=None, + available=None, + cores=None, + frequency=None, + flop_core=None, + idx=0, + workgroup=1, + atomic32=None, + atomic64=None, + platform=None, + ): """ Simple container with some important data for the OpenCL device description. @@ -130,6 +160,7 @@ class Device(object): :param flop_core: Flopating Point operation per core per cycle :param idx: index of the device within the platform :param workgroup: max workgroup size + :param platform: the platform to which this device is attached """ self.name = name.strip() self.type = dtype @@ -142,12 +173,15 @@ class Device(object): self.frequency = frequency self.id = idx self.max_work_group_size = workgroup + self.atomic32 = atomic32 + self.atomic64 = atomic64 if not flop_core: flop_core = FLOP_PER_CORE.get(dtype, 1) if cores and frequency: self.flops = cores * frequency * flop_core else: self.flops = flop_core + self.platform = platform def __repr__(self): return "%s" % self.name @@ -158,19 +192,20 @@ class Device(object): :return: string """ - lst = ["Name\t\t:\t%s" % self.name, - "Type\t\t:\t%s" % self.type, - "Memory\t\t:\t%.3f MB" % (self.memory / 2.0 ** 20), - "Cores\t\t:\t%s CU" % self.cores, - "Frequency\t:\t%s MHz" % self.frequency, - "Speed\t\t:\t%.3f GFLOPS" % (self.flops / 1000.), - "Version\t\t:\t%s" % self.version, - "Available\t:\t%s" % self.available] + lst = [ + "Name\t\t:\t%s" % self.name, + "Type\t\t:\t%s" % self.type, + "Memory\t\t:\t%.3f MB" % (self.memory / 2.0**20), + "Cores\t\t:\t%s CU" % self.cores, + "Frequency\t:\t%s MHz" % self.frequency, + "Speed\t\t:\t%.3f GFLOPS" % (self.flops / 1000.0), + "Version\t\t:\t%s" % self.version, + "Available\t:\t%s" % self.available, + ] return os.linesep.join(lst) def set_unavailable(self): - """Use this method to flag a faulty device - """ + """Use this method to flag a faulty device""" self.available = False @@ -179,7 +214,9 @@ class Platform(object): Simple class that contains the structure of an OpenCL platform """ - def __init__(self, name="None", vendor="None", version=None, extensions=None, idx=0): + def __init__( + self, name="None", vendor="None", version=None, extensions=None, idx=0 + ): """ Class containing all descriptions of a platform and all devices description within that platform. @@ -205,6 +242,7 @@ class Platform(object): :param device: Device instance """ + device.platform = self self.devices.append(device) def get_device(self, key): @@ -245,19 +283,25 @@ def _measure_workgroup_size(device_or_context, fast=False): platformid = pyopencl.get_platforms().index(platform) deviceid = platform.get_devices().index(device_or_context) ocl.platforms[platformid].devices[deviceid].set_unavailable() - raise RuntimeError("Unable to create context on %s/%s: %s" % (platform, device_or_context, error)) + raise RuntimeError( + "Unable to create context on %s/%s: %s" + % (platform, device_or_context, error) + ) else: device = device_or_context elif isinstance(device_or_context, pyopencl.Context): ctx = device_or_context device = device_or_context.devices[0] elif isinstance(device_or_context, (tuple, list)) and len(device_or_context) == 2: - ctx = ocl.create_context(platformid=device_or_context[0], - deviceid=device_or_context[1]) + ctx = ocl.create_context( + platformid=device_or_context[0], deviceid=device_or_context[1] + ) device = ctx.devices[0] else: - raise RuntimeError("""given parameter device_or_context is not an - instanciation of a device or a context""") + raise RuntimeError( + """given parameter device_or_context is not an + instanciation of a device or a context""" + ) shape = device.max_work_group_size # get the context @@ -272,7 +316,9 @@ def _measure_workgroup_size(device_or_context, fast=False): program = pyopencl.Program(ctx, get_opencl_code("addition")).build() if fast: - max_valid_wg = program.addition.get_work_group_info(pyopencl.kernel_work_group_info.WORK_GROUP_SIZE, device) + max_valid_wg = program.addition.get_work_group_info( + pyopencl.kernel_work_group_info.WORK_GROUP_SIZE, device + ) else: maxi = int(round(numpy.log2(shape))) for i in range(maxi + 1): @@ -280,11 +326,19 @@ def _measure_workgroup_size(device_or_context, fast=False): wg = 1 << i try: evt = program.addition( - queue, (shape,), (wg,), - d_data.data, d_data_1.data, d_res.data, numpy.int32(shape)) + queue, + (shape,), + (wg,), + d_data.data, + d_data_1.data, + d_res.data, + numpy.int32(shape), + ) evt.wait() except Exception as error: - logger.info("%s on device %s for WG=%s/%s", error, device.name, wg, shape) + logger.info( + "%s on device %s for WG=%s/%s", error, device.name, wg, shape + ) program = queue = d_res = d_data_1 = d_data = None break else: @@ -294,7 +348,9 @@ def _measure_workgroup_size(device_or_context, fast=False): if wg > max_valid_wg: max_valid_wg = wg else: - logger.warning("ArithmeticError on %s for WG=%s/%s", wg, device.name, shape) + logger.warning( + "ArithmeticError on %s for WG=%s/%s", wg, device.name, shape + ) return max_valid_wg @@ -317,15 +373,25 @@ class OpenCL(object): if pyopencl: platform = device = pypl = devtype = extensions = pydev = None for idx, platform in enumerate(pyopencl.get_platforms()): - pypl = Platform(platform.name, platform.vendor, platform.version, platform.extensions, idx) + pypl = Platform( + platform.name, + platform.vendor, + platform.version, + platform.extensions, + idx, + ) for idd, device in enumerate(platform.get_devices()): #################################################### # Nvidia does not report int64 atomics (we are using) ... # this is a hack around as any nvidia GPU with double-precision supports int64 atomics #################################################### extensions = device.extensions - if (pypl.vendor == "NVIDIA Corporation") and ('cl_khr_fp64' in extensions): - extensions += ' cl_khr_int64_base_atomics cl_khr_int64_extended_atomics' + if (pypl.vendor == "NVIDIA Corporation") and ( + "cl_khr_fp64" in extensions + ): + extensions += ( + " cl_khr_int64_base_atomics cl_khr_int64_extended_atomics" + ) try: devtype = pyopencl.device_type.to_string(device.type).upper() except ValueError: @@ -340,14 +406,23 @@ class OpenCL(object): devtype = "CPU" else: devtype = devtype[:3] - if _is_nvidia_gpu(device.vendor, devtype) and ("compute_capability_major_nv" in dir(device)): + if _is_nvidia_gpu(device.vendor, devtype) and ( + "compute_capability_major_nv" in dir(device) + ): try: - comput_cap = device.compute_capability_major_nv, device.compute_capability_minor_nv + comput_cap = ( + device.compute_capability_major_nv, + device.compute_capability_minor_nv, + ) except pyopencl.LogicError: flop_core = FLOP_PER_CORE["GPU"] else: - flop_core = NVIDIA_FLOP_PER_CORE.get(comput_cap, FLOP_PER_CORE["GPU"]) - elif (pypl.vendor == "Advanced Micro Devices, Inc.") and (devtype == "GPU"): + flop_core = NVIDIA_FLOP_PER_CORE.get( + comput_cap, FLOP_PER_CORE["GPU"] + ) + elif (pypl.vendor == "Advanced Micro Devices, Inc.") and ( + devtype == "GPU" + ): flop_core = AMD_FLOP_PER_CORE elif devtype == "CPU": flop_core = FLOP_PER_CORE.get(devtype, 1) @@ -355,14 +430,29 @@ class OpenCL(object): flop_core = 1 workgroup = device.max_work_group_size if (devtype == "CPU") and (pypl.vendor == "Apple"): - logger.info("For Apple's OpenCL on CPU: Measuring actual valid max_work_goup_size.") + logger.info( + "For Apple's OpenCL on CPU: Measuring actual valid max_work_goup_size." + ) workgroup = _measure_workgroup_size(device, fast=True) if (devtype == "GPU") and os.environ.get("GPU") == "False": # Environment variable to disable GPU devices continue - pydev = Device(device.name, devtype, device.version, device.driver_version, extensions, - device.global_mem_size, bool(device.available), device.max_compute_units, - device.max_clock_frequency, flop_core, idd, workgroup) + pydev = Device( + device.name, + devtype, + device.version, + device.driver_version, + extensions, + device.global_mem_size, + bool(device.available), + device.max_compute_units, + device.max_clock_frequency, + flop_core, + idd, + workgroup, + check_atomic32(device)[0], + check_atomic64(device)[0], + ) pypl.add_device(pydev) nb_devices += 1 platforms.append(pypl) @@ -371,9 +461,11 @@ class OpenCL(object): def __repr__(self): out = ["OpenCL devices:"] for platformid, platform in enumerate(self.platforms): - deviceids = ["(%s,%s) %s" % (platformid, deviceid, dev.name) - for deviceid, dev in enumerate(platform.devices)] - out.append("[%s] %s: " % (platformid, platform.name) + ", ".join(deviceids)) + deviceids = [ + f"({platformid},{deviceid}) {dev.name}" + for deviceid, dev in enumerate(platform.devices) + ] + out.append(f"[{platformid}] {platform.name}: " + ", ".join(deviceids)) return os.linesep.join(out) def get_platform(self, key): @@ -395,7 +487,9 @@ class OpenCL(object): out = self.platforms[platid] return out - def select_device(self, dtype="ALL", memory=None, extensions=None, best=True, **kwargs): + def select_device( + self, dtype="ALL", memory=None, extensions=None, best=True, **kwargs + ): """ Select a device based on few parameters (at the end, keep the one with most memory) @@ -439,8 +533,15 @@ class OpenCL(object): # Nothing found return None - def create_context(self, devicetype="ALL", useFp64=False, platformid=None, - deviceid=None, cached=True, memory=None, extensions=None): + def create_context( + self, + devicetype="ALL", + platformid=None, + deviceid=None, + cached=True, + memory=None, + extensions=None, + ): """ Choose a device and initiate a context. @@ -450,7 +551,6 @@ class OpenCL(object): E.g.: If Nvidia driver is installed, GPU will succeed but CPU will fail. The AMD SDK kit is required for CPU via OpenCL. :param devicetype: string in ["cpu","gpu", "all", "acc"] - :param useFp64: boolean specifying if double precision will be used: deprecated use extensions=["cl_khr_fp64"] :param platformid: integer :param deviceid: integer :param cached: True if we want to cache the context @@ -460,37 +560,60 @@ class OpenCL(object): """ if extensions is None: extensions = [] - if useFp64: - logger.warning("Deprecation: please select your device using the extension name!, i.e. extensions=['cl_khr_fp64']") - extensions.append('cl_khr_fp64') + ctx = None if (platformid is not None) and (deviceid is not None): platformid = int(platformid) deviceid = int(deviceid) elif "PYOPENCL_CTX" in os.environ: - pyopencl_ctx = [int(i) if i.isdigit() else 0 for i in os.environ["PYOPENCL_CTX"].split(":")] - pyopencl_ctx += [0] * (2 - len(pyopencl_ctx)) # pad with 0 - platformid, deviceid = pyopencl_ctx + ctx = pyopencl.create_some_context() + # try: + device = ctx.devices[0] + platforms = [ + i for i, p in enumerate(ocl.platforms) if device.platform.name == p.name + ] + if platforms: + platformid = platforms[0] + devices = [ + i + for i, d in enumerate(ocl.platforms[platformid].devices) + if device.name == d.name + ] + if devices: + deviceid = devices[0] + if cached: + self.context_cache[(platformid, deviceid)] = ctx else: ids = ocl.select_device(type=devicetype, extensions=extensions) if ids: platformid, deviceid = ids - ctx = None - if (platformid is not None) and (deviceid is not None): + + if (ctx is None) and (platformid is not None) and (deviceid is not None): if (platformid, deviceid) in self.context_cache: ctx = self.context_cache[(platformid, deviceid)] else: try: - ctx = pyopencl.Context(devices=[pyopencl.get_platforms()[platformid].get_devices()[deviceid]]) + ctx = pyopencl.Context( + devices=[ + pyopencl.get_platforms()[platformid].get_devices()[deviceid] + ] + ) except pyopencl._cl.LogicError as error: self.platforms[platformid].devices[deviceid].set_unavailable() - logger.warning("Unable to create context on %s/%s: %s", platformid, deviceid, error) + logger.warning( + "Unable to create context on %s/%s: %s", + platformid, + deviceid, + error, + ) ctx = None else: if cached: self.context_cache[(platformid, deviceid)] = ctx if ctx is None: - logger.warning("Last chance to get an OpenCL device ... probably not the one requested") + logger.warning( + "Last chance to get an OpenCL device ... probably not the one requested" + ) ctx = pyopencl.create_some_context(interactive=False) return ctx @@ -560,8 +683,9 @@ def allocate_cl_buffers(buffers, device=None, context=None): for _, _, dtype, size in buffers: ualloc += numpy.dtype(dtype).itemsize * size memory = device.memory - logger.info("%.3fMB are needed on device which has %.3fMB", - ualloc / 1.0e6, memory / 1.0e6) + logger.info( + "%.3fMB are needed on device which has %.3fMB", ualloc / 1.0e6, memory / 1.0e6 + ) if ualloc >= memory: memError = "Fatal error in allocate_buffers." memError += "Not enough device memory for buffers" @@ -571,8 +695,9 @@ def allocate_cl_buffers(buffers, device=None, context=None): # do the allocation try: for name, flag, dtype, size in buffers: - mem[name] = pyopencl.Buffer(context, flag, - numpy.dtype(dtype).itemsize * size) + mem[name] = pyopencl.Buffer( + context, flag, numpy.dtype(dtype).itemsize * size + ) except pyopencl.MemoryError as error: release_cl_buffers(mem) raise MemoryError(error) @@ -589,16 +714,15 @@ def allocate_texture(ctx, shape, hostbuf=None, support_1D=False): do not support 1D images, so 1D images are handled as 2D with one row :param support_1D: force the image to be 1D if the shape has only one dim """ - if len(shape) == 1 and not(support_1D): + if len(shape) == 1 and not (support_1D): shape = (1,) + shape return pyopencl.Image( ctx, pyopencl.mem_flags.READ_ONLY | pyopencl.mem_flags.USE_HOST_PTR, pyopencl.ImageFormat( - pyopencl.channel_order.INTENSITY, - pyopencl.channel_type.FLOAT + pyopencl.channel_order.INTENSITY, pyopencl.channel_type.FLOAT ), - hostbuf=numpy.zeros(shape[::-1], dtype=numpy.float32) + hostbuf=numpy.zeros(shape[::-1], dtype=numpy.float32), ) @@ -620,7 +744,7 @@ def check_textures_availability(ctx): # There is no way to detect this until a kernel is compiled try: cc = ctx.devices[0].compute_capability_major_nv - textures_available &= (cc >= 3) + textures_available &= cc >= 3 except (pyopencl.LogicError, AttributeError): # probably not a Nvidia GPU pass # @@ -660,7 +784,7 @@ def query_kernel_info(program, kernel, what="WORK_GROUP_SIZE"): :param kernel: kernel or name of the kernel :param what: what is the query about ? :return: int or 3-int for the workgroup size. - + Possible information available are: * 'COMPILE_WORK_GROUP_SIZE': Returns the work-group size specified inside the kernel (__attribute__((reqd_work_gr oup_size(X, Y, Z)))) * 'GLOBAL_WORK_SIZE': maximum global size that can be used to execute a kernel #OCL2.1! @@ -668,15 +792,17 @@ def query_kernel_info(program, kernel, what="WORK_GROUP_SIZE"): * 'PREFERRED_WORK_GROUP_SIZE_MULTIPLE': preferred multiple of workgroup size for launch. This is a performance hint. * 'PRIVATE_MEM_SIZE' Returns the minimum amount of private memory, in bytes, used by each workitem in the kernel * 'WORK_GROUP_SIZE': maximum work-group size that can be used to execute a kernel on a specific device given by device - + Further information on: https://www.khronos.org/registry/OpenCL/sdk/1.1/docs/man/xhtml/clGetKernelWorkGroupInfo.html - + """ assert isinstance(program, pyopencl.Program) if not isinstance(kernel, pyopencl.Kernel): kernel_name = kernel - assert kernel in (k.function_name for k in program.all_kernels()), "the kernel exists" + assert kernel in ( + k.function_name for k in program.all_kernels() + ), "the kernel exists" kernel = program.__getattr__(kernel_name) device = program.devices[0] diff --git a/src/silx/opencl/conftest.py b/src/silx/opencl/conftest.py index 1fdc516..f6cf5de 100644 --- a/src/silx/opencl/conftest.py +++ b/src/silx/opencl/conftest.py @@ -1,5 +1,6 @@ import pytest + @pytest.mark.usefixtures("use_opencl") def setup_module(module): pass diff --git a/src/silx/opencl/convolution.py b/src/silx/opencl/convolution.py index 15ef931..99ecd02 100644 --- a/src/silx/opencl/convolution.py +++ b/src/silx/opencl/convolution.py @@ -1,8 +1,7 @@ #!/usr/bin/env python -# coding: utf-8 # /*########################################################################## # -# Copyright (c) 2019 European Synchrotron Radiation Facility +# Copyright (c) 2019-2023 European Synchrotron Radiation Facility # # Permission is hereby granted, free of charge, to any person obtaining a copy # of this software and associated documentation files (the "Software"), to deal @@ -25,27 +24,35 @@ # ###########################################################################*/ """Module for convolution on CPU/GPU.""" -from __future__ import absolute_import, print_function, with_statement, division - __authors__ = ["P. Paleo"] __license__ = "MIT" __date__ = "01/08/2019" import numpy as np -from copy import copy # python2 from .common import pyopencl as cl import pyopencl.array as parray from .processing import OpenclProcessing, EventDescription from .utils import ConvolutionInfos + class Convolution(OpenclProcessing): """ A class for performing convolution on CPU/GPU with OpenCL. """ - def __init__(self, shape, kernel, axes=None, mode=None, ctx=None, - devicetype="all", platformid=None, deviceid=None, - profile=False, extra_options=None): + def __init__( + self, + shape, + kernel, + axes=None, + mode=None, + ctx=None, + devicetype="all", + platformid=None, + deviceid=None, + profile=False, + extra_options=None, + ): """Constructor of OpenCL Convolution. :param shape: shape of the array. @@ -73,9 +80,14 @@ class Convolution(OpenclProcessing): "allocate_tmp_array": True, "dont_use_textures": False, """ - OpenclProcessing.__init__(self, ctx=ctx, devicetype=devicetype, - platformid=platformid, deviceid=deviceid, - profile=profile) + OpenclProcessing.__init__( + self, + ctx=ctx, + devicetype=devicetype, + platformid=platformid, + deviceid=deviceid, + profile=profile, + ) self._configure_extra_options(extra_options) self._determine_use_case(shape, kernel, axes) @@ -91,7 +103,7 @@ class Convolution(OpenclProcessing): } extra_opts = extra_options or {} self.extra_options.update(extra_opts) - self.use_textures = not(self.extra_options["dont_use_textures"]) + self.use_textures = not (self.extra_options["dont_use_textures"]) self.use_textures &= self.check_textures_availability() def _get_dimensions(self, shape, kernel): @@ -136,8 +148,7 @@ class Convolution(OpenclProcessing): if axes in convol_infos.allowed_axes[uc_name]: self.use_case_name = uc_name self.use_case_desc = uc_params["name"] - #~ self.use_case_kernels = uc_params["kernels"].copy() - self.use_case_kernels = copy(uc_params["kernels"]) # TODO use the above line once we get rid of python2 + self.use_case_kernels = uc_params["kernels"].copy() if self.use_case_name is None: raise ValueError( "Cannot find a use case for data ndim = %d, kernel ndim = %d and axes=%s" @@ -146,8 +157,7 @@ class Convolution(OpenclProcessing): # TODO implement this use case if self.use_case_name == "batched_separable_2D_1D_3D": raise NotImplementedError( - "The use case %s is not implemented" - % self.use_case_name + "The use case %s is not implemented" % self.use_case_name ) # self.axes = axes @@ -171,7 +181,7 @@ class Convolution(OpenclProcessing): "allocate_tmp_array": "data_tmp", } # Nonseparable transforms do not need tmp array - if not(self.separable): + if not (self.separable): self.extra_options["allocate_tmp_array"] = False # Allocate arrays for option_name, array_name in option_array_names.items(): @@ -185,7 +195,7 @@ class Convolution(OpenclProcessing): if isinstance(self.kernel, np.ndarray): self.d_kernel = parray.to_device(self.queue, self.kernel) else: - if not(isinstance(self.kernel, parray.Array)): + if not (isinstance(self.kernel, parray.Array)): raise ValueError("kernel must be either numpy array or pyopencl array") self.d_kernel = self.kernel self._old_input_ref = None @@ -210,7 +220,7 @@ class Convolution(OpenclProcessing): % (self.mode, str(mp.keys())) ) # TODO - if not(self.use_textures) and self.mode.lower() == "constant": + if not (self.use_textures) and self.mode.lower() == "constant": raise NotImplementedError( "mode='constant' is not implemented without textures yet" ) @@ -231,28 +241,30 @@ class Convolution(OpenclProcessing): compile_options = [str("-DUSED_CONV_MODE=%d" % self._c_conv_mode)] if self.use_textures: kernel_files = ["convolution_textures.cl"] - compile_options.extend([ - str("-DIMAGE_DIMS=%d" % self.data_ndim), - str("-DFILTER_DIMS=%d" % self.kernel_ndim), - ]) + compile_options.extend( + [ + str("-DIMAGE_DIMS=%d" % self.data_ndim), + str("-DFILTER_DIMS=%d" % self.kernel_ndim), + ] + ) d_kernel_ref = self.d_kernel_tex else: kernel_files = ["convolution.cl"] d_kernel_ref = self.d_kernel.data - self.compile_kernels( - kernel_files=kernel_files, - compile_options=compile_options - ) + self.compile_kernels(kernel_files=kernel_files, compile_options=compile_options) self.ndrange = self.shape[::-1] self.wg = None kernel_args = [ self.queue, - self.ndrange, self.wg, + self.ndrange, + self.wg, None, None, d_kernel_ref, np.int32(self.kernel.shape[0]), - self.Nx, self.Ny, self.Nz + self.Nx, + self.Ny, + self.Nz, ] if self.kernel_ndim == 2: kernel_args.insert(6, np.int32(self.kernel.shape[1])) @@ -266,10 +278,7 @@ class Convolution(OpenclProcessing): if self.separable: if self.data_tmp is not None: self.swap_pattern = { - 2: [ - ("data_in", "data_tmp"), - ("data_tmp", "data_out") - ], + 2: [("data_in", "data_tmp"), ("data_tmp", "data_out")], 3: [ ("data_in", "data_out"), ("data_out", "data_tmp"), @@ -325,14 +334,14 @@ class Convolution(OpenclProcessing): else: raise ValueError("Please provide either arr= or shape=") if ndim < dim_min or ndim > dim_max: - raise ValueError("%s dimensions should be between %d and %d" - % (name, dim_min, dim_max) + raise ValueError( + "%s dimensions should be between %d and %d" % (name, dim_min, dim_max) ) return ndim def _check_array(self, arr): # TODO allow cl.Buffer - if not(isinstance(arr, parray.Array) or isinstance(arr, np.ndarray)): + if not (isinstance(arr, parray.Array) or isinstance(arr, np.ndarray)): raise TypeError("Expected either pyopencl.array.Array or numpy.ndarray") # TODO composition with ImageProcessing/cast if arr.dtype != np.float32: @@ -354,14 +363,12 @@ class Convolution(OpenclProcessing): self.data_in = array data_in_ref = self.data_in if output is not None: - if not(isinstance(output, np.ndarray)): + if not (isinstance(output, np.ndarray)): self._old_output_ref = self.data_out self.data_out = output # Update OpenCL kernel arguments with new array references self.kernel_args = self._configure_kernel_args( - self.kernel_args, - data_in_ref, - self.data_out + self.kernel_args, data_in_ref, self.data_out ) def _separable_convolution(self): @@ -375,9 +382,7 @@ class Convolution(OpenclProcessing): # Batched: one kernel call in total opencl_kernel = self.kernels.get_kernel(self.use_case_kernels[axis]) opencl_kernel_args = self._configure_kernel_args( - self.kernel_args, - input_ref, - output_ref + self.kernel_args, input_ref, output_ref ) ev = opencl_kernel(*opencl_kernel_args) if self.profile: @@ -398,9 +403,7 @@ class Convolution(OpenclProcessing): self.data_out = self._old_output_ref self._old_output_ref = None self.kernel_args = self._configure_kernel_args( - self.kernel_args, - self.data_in, - self.data_out + self.kernel_args, self.data_in, self.data_out ) def _get_output(self, output): @@ -436,7 +439,4 @@ class Convolution(OpenclProcessing): res = self._get_output(output) return res - __call__ = convolve - - diff --git a/src/silx/opencl/image.py b/src/silx/opencl/image.py index 65e2d5e..ec30e66 100644 --- a/src/silx/opencl/image.py +++ b/src/silx/opencl/image.py @@ -1,9 +1,8 @@ -# -*- coding: utf-8 -*- # # Project: silx # https://github.com/silx-kit/silx # -# Copyright (C) 2012-2017 European Synchrotron Radiation Facility, Grenoble, France +# Copyright (C) 2012-2023 European Synchrotron Radiation Facility, Grenoble, France # # Principal author: Jérôme Kieffer (Jerome.Kieffer@ESRF.eu) # @@ -28,8 +27,6 @@ """A general purpose library for manipulating 2D images in 1 or 3 colors """ -from __future__ import absolute_import, print_function, with_statement, division - __author__ = "Jerome Kieffer" __license__ = "MIT" @@ -40,7 +37,6 @@ __contact__ = "jerome.kieffer@esrf.fr" import os import logging import numpy -from collections import OrderedDict from math import floor, ceil, sqrt, log from .common import pyopencl, kernel_workgroup_size @@ -52,20 +48,30 @@ logger = logging.getLogger(__name__) class ImageProcessing(OpenclProcessing): - kernel_files = ["cast", "map", "max_min", "histogram"] - converter = {numpy.dtype(numpy.uint8): "u8_to_float", - numpy.dtype(numpy.int8): "s8_to_float", - numpy.dtype(numpy.uint16): "u16_to_float", - numpy.dtype(numpy.int16): "s16_to_float", - numpy.dtype(numpy.uint32): "u32_to_float", - numpy.dtype(numpy.int32): "s32_to_float", - } - - def __init__(self, shape=None, ncolors=1, template=None, - ctx=None, devicetype="all", platformid=None, deviceid=None, - block_size=None, memory=None, profile=False): + converter = { + numpy.dtype(numpy.uint8): "u8_to_float", + numpy.dtype(numpy.int8): "s8_to_float", + numpy.dtype(numpy.uint16): "u16_to_float", + numpy.dtype(numpy.int16): "s16_to_float", + numpy.dtype(numpy.uint32): "u32_to_float", + numpy.dtype(numpy.int32): "s32_to_float", + } + + def __init__( + self, + shape=None, + ncolors=1, + template=None, + ctx=None, + devicetype="all", + platformid=None, + deviceid=None, + block_size=None, + memory=None, + profile=False, + ): """Constructor of the ImageProcessing class :param ctx: actual working context, left to None for automatic @@ -79,9 +85,16 @@ class ImageProcessing(OpenclProcessing): :param profile: switch on profiling to be able to profile at the kernel level, store profiling elements (makes code slightly slower) """ - OpenclProcessing.__init__(self, ctx=ctx, devicetype=devicetype, - platformid=platformid, deviceid=deviceid, - block_size=block_size, memory=memory, profile=profile) + OpenclProcessing.__init__( + self, + ctx=ctx, + devicetype=devicetype, + platformid=platformid, + deviceid=deviceid, + block_size=block_size, + memory=memory, + profile=profile, + ) if template is not None: shape = template.shape if len(shape) > 2: @@ -94,36 +107,51 @@ class ImageProcessing(OpenclProcessing): self.ncolors = ncolors self.shape = shape assert shape is not None - self.buffer_shape = self.shape if self.ncolors == 1 else self.shape + (self.ncolors,) + self.buffer_shape = ( + self.shape if self.ncolors == 1 else self.shape + (self.ncolors,) + ) kernel_files = [os.path.join("image", i) for i in self.kernel_files] - self.compile_kernels(kernel_files, - compile_options="-DNB_COLOR=%i" % self.ncolors) + self.compile_kernels( + kernel_files, compile_options="-DNB_COLOR=%i" % self.ncolors + ) if self.ncolors == 1: img_shape = self.shape else: img_shape = self.shape + (self.ncolors,) - buffers = [BufferDescription("image0_d", img_shape, numpy.float32, None), - BufferDescription("image1_d", img_shape, numpy.float32, None), - BufferDescription("image2_d", img_shape, numpy.float32, None), - BufferDescription("max_min_d", 2, numpy.float32, None), - BufferDescription("cnt_d", 1, numpy.int32, None), ] + buffers = [ + BufferDescription("image0_d", img_shape, numpy.float32, None), + BufferDescription("image1_d", img_shape, numpy.float32, None), + BufferDescription("image2_d", img_shape, numpy.float32, None), + BufferDescription("max_min_d", 2, numpy.float32, None), + BufferDescription("cnt_d", 1, numpy.int32, None), + ] # Temporary buffer for max-min reduction - self.wg_red = kernel_workgroup_size(self.program, self.kernels.max_min_reduction_stage1) + self.wg_red = kernel_workgroup_size( + self.program, self.kernels.max_min_reduction_stage1 + ) if self.wg_red > 1: - self.wg_red = min(self.wg_red, - numpy.int32(1 << int(floor(log(sqrt(numpy.prod(self.shape)), 2))))) - tmp = BufferDescription("tmp_max_min_d", 2 * self.wg_red, numpy.float32, None) + self.wg_red = min( + self.wg_red, + numpy.int32(1 << int(floor(log(sqrt(numpy.prod(self.shape)), 2)))), + ) + tmp = BufferDescription( + "tmp_max_min_d", 2 * self.wg_red, numpy.float32, None + ) buffers.append(tmp) self.allocate_buffers(buffers, use_array=True) self.cl_mem["cnt_d"].fill(0) def __repr__(self): - return "ImageProcessing for shape=%s, %i colors initalized on %s" % \ - (self.shape, self.ncolors, self.ctx.devices[0].name) - - def _get_in_out_buffers(self, img=None, copy=True, out=None, - out_dtype=None, out_size=None): + return "ImageProcessing for shape=%s, %i colors initalized on %s" % ( + self.shape, + self.ncolors, + self.ctx.devices[0].name, + ) + + def _get_in_out_buffers( + self, img=None, copy=True, out=None, out_dtype=None, out_size=None + ): """Internal method used to select the proper buffers before processing. :param img: expects a numpy array or a pyopencl.array of dim 2 or 3 @@ -132,7 +160,7 @@ class ImageProcessing(OpenclProcessing): :param out_dtype: enforce the type of the output buffer (optional) :param out_size: enforce the size of the output buffer (optional) :return: input_buffer, output_buffer - + Nota: this is not locked. """ events = [] @@ -151,7 +179,9 @@ class ImageProcessing(OpenclProcessing): if out_dtype != numpy.float32 and out_size: name = "%s_%s_d" % (numpy.dtype(out_dtype), out_size) if name not in self.cl_mem: - output_array = self.cl_mem[name] = pyopencl.array.empty(self.queue, (out_size,), out_dtype) + output_array = self.cl_mem[name] = pyopencl.array.empty( + self.queue, (out_size,), out_dtype + ) else: output_array = self.cl_mem[name] else: @@ -161,7 +191,9 @@ class ImageProcessing(OpenclProcessing): input_array = self.cl_mem["image1_d"] if isinstance(img, pyopencl.array.Array): if copy: - evt = pyopencl.enqueue_copy(self.queue, self.cl_mem["image1_d"].data, img.data) + evt = pyopencl.enqueue_copy( + self.queue, self.cl_mem["image1_d"].data, img.data + ) input_array = self.cl_mem["image1_d"] events.append(EventDescription("copy D->D", evt)) else: @@ -172,11 +204,19 @@ class ImageProcessing(OpenclProcessing): # assume this is numpy if img.dtype.itemsize > 4: logger.warning("Casting to float32 on CPU") - evt = pyopencl.enqueue_copy(self.queue, self.cl_mem["image1_d"].data, numpy.ascontiguousarray(img, numpy.float32)) + evt = pyopencl.enqueue_copy( + self.queue, + self.cl_mem["image1_d"].data, + numpy.ascontiguousarray(img, numpy.float32), + ) input_array = self.cl_mem["image1_d"] events.append(EventDescription("cast+copy H->D", evt)) else: - evt = pyopencl.enqueue_copy(self.queue, self.cl_mem["image1_d"].data, numpy.ascontiguousarray(img)) + evt = pyopencl.enqueue_copy( + self.queue, + self.cl_mem["image1_d"].data, + numpy.ascontiguousarray(img), + ) input_array = self.cl_mem["image1_d"] events.append(EventDescription("copy H->D", evt)) if self.profile: @@ -184,8 +224,8 @@ class ImageProcessing(OpenclProcessing): return input_array, output_array def to_float(self, img, copy=True, out=None): - """ Takes any array and convert it to a float array for ease of processing. - + """Takes any array and convert it to a float array for ease of processing. + :param img: expects a numpy array or a pyopencl.array of dim 2 or 3 :param copy: set to False to directly re-use a pyopencl array :param out: provide an output buffer to store the result @@ -197,16 +237,23 @@ class ImageProcessing(OpenclProcessing): input_array, output_array = self._get_in_out_buffers(img, copy, out) if (img.dtype.itemsize > 4) or (img.dtype == numpy.float32): # copy device -> device, already there as float32 - ev = pyopencl.enqueue_copy(self.queue, output_array.data, input_array.data) + ev = pyopencl.enqueue_copy( + self.queue, output_array.data, input_array.data + ) events.append(EventDescription("copy D->D", ev)) else: # Cast to float: name = self.converter[img.dtype] kernel = self.kernels.get_kernel(name) - ev = kernel(self.queue, (self.shape[1], self.shape[0]), None, - input_array.data, output_array.data, - numpy.int32(self.shape[1]), numpy.int32(self.shape[0]) - ) + ev = kernel( + self.queue, + (self.shape[1], self.shape[0]), + None, + input_array.data, + output_array.data, + numpy.int32(self.shape[1]), + numpy.int32(self.shape[0]), + ) events.append(EventDescription("cast %s" % name, ev)) if self.profile: @@ -221,14 +268,14 @@ class ImageProcessing(OpenclProcessing): def normalize(self, img, mini=0.0, maxi=1.0, copy=True, out=None): """Scale the intensity of the image so that the minimum is 0 and the maximum is 1.0 (or any value suggested). - + :param img: numpy array or pyopencl array of dim 2 or 3 and of type float :param mini: Expected minimum value :param maxi: expected maxiumum value :param copy: set to False to use directly the input buffer :param out: provides an output buffer. prevents a copy D->H - - This uses a min/max reduction in two stages plus a map operation + + This uses a min/max reduction in two stages plus a map operation """ assert img.shape == self.buffer_shape events = [] @@ -238,34 +285,55 @@ class ImageProcessing(OpenclProcessing): if self.wg_red == 1: # Probably on MacOS CPU WG==1 --> serial code. kernel = self.kernels.get_kernel("max_min_serial") - evt = kernel(self.queue, (1,), (1,), - input_array.data, - size, - self.cl_mem["max_min_d"].data) + evt = kernel( + self.queue, + (1,), + (1,), + input_array.data, + size, + self.cl_mem["max_min_d"].data, + ) ed = EventDescription("max_min_serial", evt) events.append(ed) else: stage1 = self.kernels.max_min_reduction_stage1 stage2 = self.kernels.max_min_reduction_stage2 local_mem = pyopencl.LocalMemory(int(self.wg_red * 8)) - k1 = stage1(self.queue, (int(self.wg_red ** 2),), (int(self.wg_red),), - input_array.data, - self.cl_mem["tmp_max_min_d"].data, - size, - local_mem) - k2 = stage2(self.queue, (int(self.wg_red),), (int(self.wg_red),), - self.cl_mem["tmp_max_min_d"].data, - self.cl_mem["max_min_d"].data, - local_mem) - - events += [EventDescription("max_min_stage1", k1), - EventDescription("max_min_stage2", k2)] - - evt = self.kernels.normalize_image(self.queue, (self.shape[1], self.shape[0]), None, - input_array.data, output_array.data, - numpy.int32(self.shape[1]), numpy.int32(self.shape[0]), - self.cl_mem["max_min_d"].data, - numpy.float32(mini), numpy.float32(maxi)) + k1 = stage1( + self.queue, + (int(self.wg_red**2),), + (int(self.wg_red),), + input_array.data, + self.cl_mem["tmp_max_min_d"].data, + size, + local_mem, + ) + k2 = stage2( + self.queue, + (int(self.wg_red),), + (int(self.wg_red),), + self.cl_mem["tmp_max_min_d"].data, + self.cl_mem["max_min_d"].data, + local_mem, + ) + + events += [ + EventDescription("max_min_stage1", k1), + EventDescription("max_min_stage2", k2), + ] + + evt = self.kernels.normalize_image( + self.queue, + (self.shape[1], self.shape[0]), + None, + input_array.data, + output_array.data, + numpy.int32(self.shape[1]), + numpy.int32(self.shape[0]), + self.cl_mem["max_min_d"].data, + numpy.float32(mini), + numpy.float32(maxi), + ) events.append(EventDescription("normalize", evt)) if self.profile: self.events += events @@ -277,32 +345,32 @@ class ImageProcessing(OpenclProcessing): output_array.finish() return output_array - def histogram(self, img=None, nbins=255, range=None, - log_scale=False, copy=True, out=None): + def histogram( + self, img=None, nbins=255, range=None, log_scale=False, copy=True, out=None + ): """Compute the histogram of a set of data. - + :param img: input image. If None, use the one already on the device :param nbins: number of bins - :param range: the lower and upper range of the bins. If not provided, - range is simply ``(a.min(), a.max())``. Values outside the - range are ignored. The first element of the range must be + :param range: the lower and upper range of the bins. If not provided, + range is simply ``(a.min(), a.max())``. Values outside the + range are ignored. The first element of the range must be less than or equal to the second. - :param log_scale: perform the binning in lograrithmic scale. + :param log_scale: perform the binning in lograrithmic scale. Open to extension :param copy: unset to directly use the input buffer without copy - :param out: use a provided array for offering the result + :param out: use a provided array for offering the result :return: histogram (size=nbins), edges (size=nbins+1) - API similar to numpy + API similar to numpy """ assert img.shape == self.buffer_shape input_array = self.to_float(img, copy=copy, out=self.cl_mem["image0_d"]) events = [] with self.sem: - input_array, output_array = self._get_in_out_buffers(input_array, copy=False, - out=out, - out_dtype=numpy.int32, - out_size=nbins) + input_array, output_array = self._get_in_out_buffers( + input_array, copy=False, out=out, out_dtype=numpy.int32, out_size=nbins + ) if range is None: # measure actually the bounds @@ -311,27 +379,43 @@ class ImageProcessing(OpenclProcessing): # Probably on MacOS CPU WG==1 --> serial code. kernel = self.kernels.get_kernel("max_min_serial") - evt = kernel(self.queue, (1,), (1,), - input_array.data, - size, - self.cl_mem["max_min_d"].data) + evt = kernel( + self.queue, + (1,), + (1,), + input_array.data, + size, + self.cl_mem["max_min_d"].data, + ) events.append(EventDescription("max_min_serial", evt)) else: stage1 = self.kernels.max_min_reduction_stage1 stage2 = self.kernels.max_min_reduction_stage2 - local_mem = pyopencl.LocalMemory(int(self.wg_red * 2 * numpy.dtype("float32").itemsize)) - k1 = stage1(self.queue, (int(self.wg_red ** 2),), (int(self.wg_red),), - input_array.data, - self.cl_mem["tmp_max_min_d"].data, - size, - local_mem) - k2 = stage2(self.queue, (int(self.wg_red),), (int(self.wg_red),), - self.cl_mem["tmp_max_min_d"].data, - self.cl_mem["max_min_d"].data, - local_mem) - - events += [EventDescription("max_min_stage1", k1), - EventDescription("max_min_stage2", k2)] + local_mem = pyopencl.LocalMemory( + int(self.wg_red * 2 * numpy.dtype("float32").itemsize) + ) + k1 = stage1( + self.queue, + (int(self.wg_red**2),), + (int(self.wg_red),), + input_array.data, + self.cl_mem["tmp_max_min_d"].data, + size, + local_mem, + ) + k2 = stage2( + self.queue, + (int(self.wg_red),), + (int(self.wg_red),), + self.cl_mem["tmp_max_min_d"].data, + self.cl_mem["max_min_d"].data, + local_mem, + ) + + events += [ + EventDescription("max_min_stage1", k1), + EventDescription("max_min_stage2", k2), + ] maxi, mini = self.cl_mem["max_min_d"].get() else: mini = numpy.float32(min(range)) @@ -341,13 +425,17 @@ class ImageProcessing(OpenclProcessing): tmp_size = nb_engines * nbins name = "tmp_int32_%s_d" % (tmp_size) if name not in self.cl_mem: - tmp_array = self.cl_mem[name] = pyopencl.array.empty(self.queue, (tmp_size,), numpy.int32) + tmp_array = self.cl_mem[name] = pyopencl.array.empty( + self.queue, (tmp_size,), numpy.int32 + ) else: tmp_array = self.cl_mem[name] edge_name = "tmp_float32_%s_d" % (nbins + 1) if edge_name not in self.cl_mem: - edges_array = self.cl_mem[edge_name] = pyopencl.array.empty(self.queue, (nbins + 1,), numpy.float32) + edges_array = self.cl_mem[edge_name] = pyopencl.array.empty( + self.queue, (nbins + 1,), numpy.float32 + ) else: edges_array = self.cl_mem[edge_name] @@ -359,21 +447,27 @@ class ImageProcessing(OpenclProcessing): else: map_operation = numpy.int32(0) kernel = self.kernels.get_kernel("histogram") - wg = min(device.max_work_group_size, - 1 << (int(ceil(log(nbins, 2)))), - self.kernels.max_workgroup_size(kernel)) - evt = kernel(self.queue, (wg * nb_engines,), (wg,), - input_array.data, - numpy.int32(input_array.size), - mini, - maxi, - map_operation, - output_array.data, - edges_array.data, - numpy.int32(nbins), - tmp_array.data, - self.cl_mem["cnt_d"].data, - shared) + wg = min( + device.max_work_group_size, + 1 << (int(ceil(log(nbins, 2)))), + self.kernels.max_workgroup_size(kernel), + ) + evt = kernel( + self.queue, + (wg * nb_engines,), + (wg,), + input_array.data, + numpy.int32(input_array.size), + mini, + maxi, + map_operation, + output_array.data, + edges_array.data, + numpy.int32(nbins), + tmp_array.data, + self.cl_mem["cnt_d"].data, + shared, + ) events.append(EventDescription("histogram", evt)) if self.profile: diff --git a/src/silx/opencl/linalg.py b/src/silx/opencl/linalg.py index a64122a..573ebce 100644 --- a/src/silx/opencl/linalg.py +++ b/src/silx/opencl/linalg.py @@ -1,5 +1,4 @@ #!/usr/bin/env python -# coding: utf-8 # /*########################################################################## # # Copyright (c) 2016 European Synchrotron Radiation Facility @@ -25,8 +24,6 @@ # ###########################################################################*/ """Module for basic linear algebra in OpenCL""" -from __future__ import absolute_import, print_function, with_statement, division - __authors__ = ["P. Paleo"] __license__ = "MIT" __date__ = "01/08/2019" @@ -37,14 +34,23 @@ from .common import pyopencl from .processing import EventDescription, OpenclProcessing import pyopencl.array as parray + cl = pyopencl class LinAlg(OpenclProcessing): - kernel_files = ["linalg.cl"] - def __init__(self, shape, do_checks=False, ctx=None, devicetype="all", platformid=None, deviceid=None, profile=False): + def __init__( + self, + shape, + do_checks=False, + ctx=None, + devicetype="all", + platformid=None, + deviceid=None, + profile=False, + ): """ Create a "Linear Algebra" plan for a given image shape. @@ -59,32 +65,34 @@ class LinAlg(OpenclProcessing): store profiling elements (makes code slightly slower) """ - OpenclProcessing.__init__(self, ctx=ctx, devicetype=devicetype, - platformid=platformid, deviceid=deviceid, - profile=profile) + OpenclProcessing.__init__( + self, + ctx=ctx, + devicetype=devicetype, + platformid=platformid, + deviceid=deviceid, + profile=profile, + ) self.d_gradient = parray.empty(self.queue, shape, np.complex64) self.d_gradient.fill(np.complex64(0.0)) self.d_image = parray.empty(self.queue, shape, np.float32) self.d_image.fill(np.float32(0.0)) - self.add_to_cl_mem({ - "d_gradient": self.d_gradient, - "d_image": self.d_image - }) + self.add_to_cl_mem({"d_gradient": self.d_gradient, "d_image": self.d_image}) self.wg2D = None self.shape = shape - self.ndrange2D = ( - int(self.shape[1]), - int(self.shape[0]) - ) + self.ndrange2D = (int(self.shape[1]), int(self.shape[0])) self.do_checks = bool(do_checks) OpenclProcessing.compile_kernels(self, self.kernel_files) @staticmethod def check_array(array, dtype, shape, arg_name): if array.shape != shape or array.dtype != dtype: - raise ValueError("%s should be a %s array of type %s" %(arg_name, str(shape), str(dtype))) + raise ValueError( + "%s should be a %s array of type %s" + % (arg_name, str(shape), str(dtype)) + ) def get_data_references(self, src, dst, default_src_ref, default_dst_ref): """ @@ -100,7 +108,9 @@ class LinAlg(OpenclProcessing): elif isinstance(dst, cl.Buffer): dst_ref = dst else: - raise ValueError("dst should be either pyopencl.array.Array or pyopencl.Buffer") + raise ValueError( + "dst should be either pyopencl.array.Array or pyopencl.Buffer" + ) else: dst_ref = default_dst_ref @@ -130,21 +140,15 @@ class LinAlg(OpenclProcessing): self.check_array(image, np.float32, self.shape, "image") if dst is not None: self.check_array(dst, np.complex64, self.shape, "dst") - img_ref, grad_ref = self.get_data_references(image, dst, self.d_image.data, self.d_gradient.data) + img_ref, grad_ref = self.get_data_references( + image, dst, self.d_image.data, self.d_gradient.data + ) # Prepare the kernel call - kernel_args = [ - img_ref, - grad_ref, - n_x, - n_y - ] + kernel_args = [img_ref, grad_ref, n_x, n_y] # Call the gradient kernel evt = self.kernels.kern_gradient2D( - self.queue, - self.ndrange2D, - self.wg2D, - *kernel_args + self.queue, self.ndrange2D, self.wg2D, *kernel_args ) self.events.append(EventDescription("gradient2D", evt)) # TODO: should the wait be done in any case ? @@ -187,21 +191,15 @@ class LinAlg(OpenclProcessing): self.check_array(gradient, np.complex64, self.shape, "gradient") if dst is not None: self.check_array(dst, np.float32, self.shape, "dst") - grad_ref, img_ref = self.get_data_references(gradient, dst, self.d_gradient.data, self.d_image.data) + grad_ref, img_ref = self.get_data_references( + gradient, dst, self.d_gradient.data, self.d_image.data + ) # Prepare the kernel call - kernel_args = [ - grad_ref, - img_ref, - n_x, - n_y - ] + kernel_args = [grad_ref, img_ref, n_x, n_y] # Call the gradient kernel evt = self.kernels.kern_divergence2D( - self.queue, - self.ndrange2D, - self.wg2D, - *kernel_args + self.queue, self.ndrange2D, self.wg2D, *kernel_args ) self.events.append(EventDescription("divergence2D", evt)) # TODO: should the wait be done in any case ? diff --git a/src/silx/opencl/medfilt.py b/src/silx/opencl/medfilt.py index d4e425b..a18c5a4 100644 --- a/src/silx/opencl/medfilt.py +++ b/src/silx/opencl/medfilt.py @@ -1,9 +1,8 @@ -# -*- coding: utf-8 -*- # # Project: Azimuthal integration # https://github.com/silx-kit/pyFAI # -# Copyright (C) 2012-2017 European Synchrotron Radiation Facility, Grenoble, France +# Copyright (C) 2012-2023 European Synchrotron Radiation Facility, Grenoble, France # # Principal author: Jérôme Kieffer (Jerome.Kieffer@ESRF.eu) # @@ -31,8 +30,6 @@ The target is to mimic the signature of scipy.signal.medfilt and scipy.medfilt2 The first implementation targets 2D implementation where this operation is costly (~10s/2kx2k image) """ -from __future__ import absolute_import, print_function, with_statement, division - __author__ = "Jerome Kieffer" __license__ = "MIT" @@ -42,7 +39,6 @@ __contact__ = "jerome.kieffer@esrf.fr" import logging import numpy -from collections import OrderedDict from .common import pyopencl, kernel_workgroup_size from .processing import EventDescription, OpenclProcessing, BufferDescription @@ -56,23 +52,33 @@ logger = logging.getLogger(__name__) class MedianFilter2D(OpenclProcessing): """A class for doing median filtering using OpenCL""" + buffers = [ - BufferDescription("result", 1, numpy.float32, mf.WRITE_ONLY), - BufferDescription("image_raw", 1, numpy.float32, mf.READ_ONLY), - BufferDescription("image", 1, numpy.float32, mf.READ_WRITE), - ] + BufferDescription("result", 1, numpy.float32, mf.WRITE_ONLY), + BufferDescription("image_raw", 1, numpy.float32, mf.READ_ONLY), + BufferDescription("image", 1, numpy.float32, mf.READ_WRITE), + ] kernel_files = ["preprocess.cl", "bitonic.cl", "medfilt.cl"] - mapping = {numpy.int8: "s8_to_float", - numpy.uint8: "u8_to_float", - numpy.int16: "s16_to_float", - numpy.uint16: "u16_to_float", - numpy.uint32: "u32_to_float", - numpy.int32: "s32_to_float"} - - def __init__(self, shape, kernel_size=(3, 3), - ctx=None, devicetype="all", platformid=None, deviceid=None, - block_size=None, profile=False - ): + mapping = { + numpy.int8: "s8_to_float", + numpy.uint8: "u8_to_float", + numpy.int16: "s16_to_float", + numpy.uint16: "u16_to_float", + numpy.uint32: "u32_to_float", + numpy.int32: "s32_to_float", + } + + def __init__( + self, + shape, + kernel_size=(3, 3), + ctx=None, + devicetype="all", + platformid=None, + deviceid=None, + block_size=None, + profile=False, + ): """Constructor of the OpenCL 2D median filtering class :param shape: shape of the images to treat @@ -86,34 +92,56 @@ class MedianFilter2D(OpenclProcessing): :param profile: switch on profiling to be able to profile at the kernel level, store profiling elements (makes code slightly slower) """ - OpenclProcessing.__init__(self, ctx=ctx, devicetype=devicetype, - platformid=platformid, deviceid=deviceid, - block_size=block_size, profile=profile) + OpenclProcessing.__init__( + self, + ctx=ctx, + devicetype=devicetype, + platformid=platformid, + deviceid=deviceid, + block_size=block_size, + profile=profile, + ) self.shape = shape self.size = self.shape[0] * self.shape[1] self.kernel_size = self.calc_kernel_size(kernel_size) self.workgroup_size = (self.calc_wg(self.kernel_size), 1) # 3D kernel - self.buffers = [BufferDescription(i.name, i.size * self.size, i.dtype, i.flags) - for i in self.__class__.buffers] + self.buffers = [ + BufferDescription(i.name, i.size * self.size, i.dtype, i.flags) + for i in self.__class__.buffers + ] self.allocate_buffers() self.local_mem = self._get_local_mem(self.workgroup_size[0]) - OpenclProcessing.compile_kernels(self, self.kernel_files, "-D NIMAGE=%i" % self.size) + OpenclProcessing.compile_kernels( + self, self.kernel_files, "-D NIMAGE=%i" % self.size + ) self.set_kernel_arguments() def set_kernel_arguments(self): - """Parametrize all kernel arguments - """ + """Parametrize all kernel arguments""" for val in self.mapping.values(): - self.cl_kernel_args[val] = OrderedDict(((i, self.cl_mem[i]) for i in ("image_raw", "image"))) - self.cl_kernel_args["medfilt2d"] = OrderedDict((("image", self.cl_mem["image"]), - ("result", self.cl_mem["result"]), - ("local", self.local_mem), - ("khs1", numpy.int32(self.kernel_size[0] // 2)), # Kernel half-size along dim1 (lines) - ("khs2", numpy.int32(self.kernel_size[1] // 2)), # Kernel half-size along dim2 (columns) - ("height", numpy.int32(self.shape[0])), # Image size along dim1 (lines) - ("width", numpy.int32(self.shape[1])))) -# ('debug', self.cl_mem["debug"]))) # Image size along dim2 (columns)) + self.cl_kernel_args[val] = dict( + ((i, self.cl_mem[i]) for i in ("image_raw", "image")) + ) + self.cl_kernel_args["medfilt2d"] = dict( + ( + ("image", self.cl_mem["image"]), + ("result", self.cl_mem["result"]), + ("local", self.local_mem), + ( + "khs1", + numpy.int32(self.kernel_size[0] // 2), + ), # Kernel half-size along dim1 (lines) + ( + "khs2", + numpy.int32(self.kernel_size[1] // 2), + ), # Kernel half-size along dim2 (columns) + ("height", numpy.int32(self.shape[0])), # Image size along dim1 (lines) + ("width", numpy.int32(self.shape[1])), + ) + ) + + # ('debug', self.cl_mem["debug"]))) # Image size along dim2 (columns)) def _get_local_mem(self, wg): return pyopencl.LocalMemory(wg * 32) # 4byte per float, 8 element per thread @@ -128,13 +156,26 @@ class MedianFilter2D(OpenclProcessing): dest_type = numpy.dtype([i.dtype for i in self.buffers if i.name == dest][0]) events = [] if (data.dtype == dest_type) or (data.dtype.itemsize > dest_type.itemsize): - copy_image = pyopencl.enqueue_copy(self.queue, self.cl_mem[dest], numpy.ascontiguousarray(data, dest_type)) + copy_image = pyopencl.enqueue_copy( + self.queue, self.cl_mem[dest], numpy.ascontiguousarray(data, dest_type) + ) events.append(EventDescription("copy H->D %s" % dest, copy_image)) else: - copy_image = pyopencl.enqueue_copy(self.queue, self.cl_mem["image_raw"], numpy.ascontiguousarray(data)) + copy_image = pyopencl.enqueue_copy( + self.queue, self.cl_mem["image_raw"], numpy.ascontiguousarray(data) + ) kernel = getattr(self.program, self.mapping[data.dtype.type]) - cast_to_float = kernel(self.queue, (self.size,), None, self.cl_mem["image_raw"], self.cl_mem[dest]) - events += [EventDescription("copy H->D %s" % dest, copy_image), EventDescription("cast to float", cast_to_float)] + cast_to_float = kernel( + self.queue, + (self.size,), + None, + self.cl_mem["image_raw"], + self.cl_mem[dest], + ) + events += [ + EventDescription("copy H->D %s" % dest, copy_image), + EventDescription("cast to float", cast_to_float), + ] if self.profile: self.events += events @@ -183,7 +224,9 @@ class MedianFilter2D(OpenclProcessing): amws = kernel_workgroup_size(self.program, "medfilt2d") logger.warning("max actual workgroup size: %s, expected: %s", amws, wg) if wg > amws: - raise RuntimeError("Workgroup size is too big for medfilt2d: %s>%s" % (wg, amws)) + raise RuntimeError( + "Workgroup size is too big for medfilt2d: %s>%s" % (wg, amws) + ) localmem = self._get_local_mem(wg) @@ -200,11 +243,11 @@ class MedianFilter2D(OpenclProcessing): kwargs["khs2"] = kernel_half_size[1] kwargs["height"] = numpy.int32(image.shape[0]) kwargs["width"] = numpy.int32(image.shape[1]) -# for k, v in kwargs.items(): -# print("%s: %s (%s)" % (k, v, type(v))) - mf2d = self.kernels.medfilt2d(self.queue, - (wg, image.shape[1]), - (wg, 1), *list(kwargs.values())) + # for k, v in kwargs.items(): + # print("%s: %s (%s)" % (k, v, type(v))) + mf2d = self.kernels.medfilt2d( + self.queue, (wg, image.shape[1]), (wg, 1), *list(kwargs.values()) + ) events.append(EventDescription("median filter 2d", mf2d)) result = numpy.empty(image.shape, numpy.float32) @@ -214,12 +257,12 @@ class MedianFilter2D(OpenclProcessing): if self.profile: self.events += events return result + __call__ = medfilt2d @staticmethod def calc_kernel_size(kernel_size): - """format the kernel size to be a 2-length numpy array of int32 - """ + """format the kernel size to be a 2-length numpy array of int32""" kernel_size = numpy.asarray(kernel_size, dtype=numpy.int32) if kernel_size.shape == (): kernel_size = numpy.repeat(kernel_size.item(), 2).astype(numpy.int32) @@ -252,7 +295,7 @@ class _MedFilt2d(object): * The filling mode in scipy.signal.medfilt2d is zero-padding * This implementation is equivalent to: - scipy.ndimage.filters.median_filter(ary, kernel_size, mode="nearest") + scipy.ndimage.median_filter(ary, kernel_size, mode="nearest") """ image = numpy.atleast_2d(ary) @@ -266,4 +309,5 @@ class _MedFilt2d(object): cls.median_filter = MedianFilter2D(new_shape, kernel_size, ctx=ctx) return cls.median_filter.medfilt2d(image, kernel_size=kernel_size) + medfilt2d = _MedFilt2d.medfilt2d diff --git a/src/silx/opencl/processing.py b/src/silx/opencl/processing.py index 8b81f7f..6db21d0 100644 --- a/src/silx/opencl/processing.py +++ b/src/silx/opencl/processing.py @@ -1,10 +1,9 @@ #!/usr/bin/env python -# -*- coding: utf-8 -*- # # Project: S I L X project # https://github.com/silx-kit/silx # -# Copyright (C) 2012-2018 European Synchrotron Radiation Facility, Grenoble, France +# Copyright (C) 2012-2023 European Synchrotron Radiation Facility, Grenoble, France # # Principal author: Jérôme Kieffer (Jerome.Kieffer@ESRF.eu) # @@ -38,22 +37,32 @@ __author__ = "Jerome Kieffer" __contact__ = "Jerome.Kieffer@ESRF.eu" __license__ = "MIT" __copyright__ = "European Synchrotron Radiation Facility, Grenoble, France" -__date__ = "02/03/2021" +__date__ = "09/11/2022" __status__ = "stable" import sys import os import logging import gc -from collections import namedtuple, OrderedDict +from collections import namedtuple import numpy import threading -from .common import ocl, pyopencl, release_cl_buffers, query_kernel_info, allocate_texture, check_textures_availability +from .common import ( + ocl, + pyopencl, + release_cl_buffers, + query_kernel_info, + allocate_texture, + check_textures_availability, +) from .utils import concatenate_cl_kernel import platform BufferDescription = namedtuple("BufferDescription", ["name", "size", "dtype", "flags"]) -EventDescription = namedtuple("EventDescription", ["name", "event"]) +EventDescription = namedtuple( + "EventDescription", ["name", "event"] +) # Deprecated, please use ProfileDescription +ProfileDescription = namedtuple("ProfileDescription", ["name", "start", "stop"]) logger = logging.getLogger(__name__) @@ -72,8 +81,9 @@ class KernelContainer(object): def get_kernels(self): "return the dictionary with all kernels" - return dict(item for item in self.__dict__.items() - if not item[0].startswith("_")) + return dict( + item for item in self.__dict__.items() if not item[0].startswith("_") + ) def get_kernel(self, name): "get a kernel from its name" @@ -96,7 +106,9 @@ class KernelContainer(object): else: kernel = self.get_kernel(kernel_name) - return query_kernel_info(self._program, kernel, "PREFERRED_WORK_GROUP_SIZE_MULTIPLE") + return query_kernel_info( + self._program, kernel, "PREFERRED_WORK_GROUP_SIZE_MULTIPLE" + ) class OpenclProcessing(object): @@ -108,14 +120,24 @@ class OpenclProcessing(object): * Functions to compile kernels, cache them and clean them * helper functions to clone the object """ + # Example of how to create an output buffer of 10 floats - buffers = [BufferDescription("output", 10, numpy.float32, None), - ] + buffers = [ + BufferDescription("output", 10, numpy.float32, None), + ] # list of kernel source files to be concatenated before compilation of the program kernel_files = [] - def __init__(self, ctx=None, devicetype="all", platformid=None, deviceid=None, - block_size=None, memory=None, profile=False): + def __init__( + self, + ctx=None, + devicetype="all", + platformid=None, + deviceid=None, + block_size=None, + memory=None, + profile=False, + ): """Constructor of the abstract OpenCL processing class :param ctx: actual working context, left to None for automatic @@ -140,9 +162,12 @@ class OpenclProcessing(object): if ctx: self.ctx = ctx else: - self.ctx = ocl.create_context(devicetype=devicetype, - platformid=platformid, deviceid=deviceid, - memory=memory) + self.ctx = ocl.create_context( + devicetype=devicetype, + platformid=platformid, + deviceid=deviceid, + memory=memory, + ) device_name = self.ctx.devices[0].name.strip() platform_name = self.ctx.devices[0].platform.name.strip() platform = ocl.get_platform(platform_name) @@ -158,8 +183,7 @@ class OpenclProcessing(object): return check_textures_availability(self.ctx) def __del__(self): - """Destructor: release all buffers and programs - """ + """Destructor: release all buffers and programs""" try: self.reset_log() self.free_kernels() @@ -201,19 +225,27 @@ class OpenclProcessing(object): ualloc = 0 for buf in buffers: ualloc += numpy.dtype(buf.dtype).itemsize * numpy.prod(buf.size) - logger.info("%.3fMB are needed on device: %s, which has %.3fMB", - ualloc / 1.0e6, self.device, self.device.memory / 1.0e6) + logger.info( + "%.3fMB are needed on device: %s, which has %.3fMB", + ualloc / 1.0e6, + self.device, + self.device.memory / 1.0e6, + ) if ualloc >= self.device.memory: - raise MemoryError("Fatal error in allocate_buffers. Not enough " - " device memory for buffers (%lu requested, %lu available)" - % (ualloc, self.device.memory)) + raise MemoryError( + "Fatal error in allocate_buffers. Not enough " + " device memory for buffers (%lu requested, %lu available)" + % (ualloc, self.device.memory) + ) # do the allocation try: if use_array: for buf in buffers: - mem[buf.name] = pyopencl.array.empty(self.queue, buf.size, buf.dtype) + mem[buf.name] = pyopencl.array.empty( + self.queue, buf.size, buf.dtype + ) else: for buf in buffers: size = numpy.dtype(buf.dtype).itemsize * numpy.prod(buf.size) @@ -241,8 +273,7 @@ class OpenclProcessing(object): return self.kernels.max_workgroup_size(kernel_name) def free_buffers(self): - """free all device.memory allocated on the device - """ + """free all device.memory allocated on the device""" with self.sem: for key, buf in list(self.cl_mem.items()): if buf is not None: @@ -272,20 +303,22 @@ class OpenclProcessing(object): compile_options = compile_options or self.get_compiler_options() logger.info("Compiling file %s with options %s", kernel_files, compile_options) try: - self.program = pyopencl.Program(self.ctx, kernel_src).build(options=compile_options) + self.program = pyopencl.Program(self.ctx, kernel_src).build( + options=compile_options + ) except (pyopencl.MemoryError, pyopencl.LogicError) as error: raise MemoryError(error) else: self.kernels = KernelContainer(self.program) def free_kernels(self): - """Free all kernels - """ + """Free all kernels""" for kernel in self.cl_kernel_args: self.cl_kernel_args[kernel] = [] self.kernels = None self.program = None + # Methods about Profiling def set_profiling(self, value=True): """Switch On/Off the profiling flag of the command queue to allow debugging @@ -300,85 +333,121 @@ class OpenclProcessing(object): if self.queue is not None: self.queue.finish() if self.profile: - self.queue = pyopencl.CommandQueue(self.ctx, - properties=pyopencl.command_queue_properties.PROFILING_ENABLE) + self.queue = pyopencl.CommandQueue( + self.ctx, + properties=pyopencl.command_queue_properties.PROFILING_ENABLE, + ) else: self.queue = pyopencl.CommandQueue(self.ctx) + # Update all memory-objects with the new queue: + for obj, cl_obj in list(self.cl_mem.items()): + if isinstance(cl_obj, pyopencl.array.Array): + self.cl_mem[obj] = cl_obj.with_queue(self.queue) def profile_add(self, event, desc): """ Add an OpenCL event to the events lists, if profiling is enabled. - :param event: silx.opencl.processing.EventDescription. + :param event: pyopencl.NanyEvent. :param desc: event description """ if self.profile: - self.events.append(EventDescription(desc, event)) - - def allocate_texture(self, shape, hostbuf=None, support_1D=False): - return allocate_texture(self.ctx, shape, hostbuf=hostbuf, support_1D=support_1D) + try: + profile = event.profile + self.events.append(ProfileDescription(desc, profile.start, profile.end)) + except Exception: + # Probably the driver does not support profiling + pass - def transfer_to_texture(self, arr, tex_ref): + def profile_multi(self, event_lists): """ - Transfer an array to a texture. + Extract profiling info from several OpenCL event, if profiling is enabled. - :param arr: Input array. Can be a numpy array or a pyopencl array. - :param tex_ref: texture reference (pyopencl._cl.Image). + :param event_lists: list of ("desc", pyopencl.NanyEvent). """ - copy_args = [self.queue, tex_ref, arr] - shp = arr.shape - ndim = arr.ndim - if ndim == 1: - # pyopencl and OpenCL < 1.2 do not support image1d_t - # force 2D with one row in this case - # ~ ndim = 2 - shp = (1,) + shp - copy_kwargs = {"origin":(0,) * ndim, "region": shp[::-1]} - if not(isinstance(arr, numpy.ndarray)): # assuming pyopencl.array.Array - # D->D copy - copy_args[2] = arr.data - copy_kwargs["offset"] = 0 - ev = pyopencl.enqueue_copy(*copy_args, **copy_kwargs) - self.profile_add(ev, "Transfer to texture") + if self.profile: + for event_desc in event_lists: + if isinstance(event_desc, ProfileDescription): + self.events.append(event_desc) + else: + if ( + isinstance(event_desc, EventDescription) + or "__len__" in dir(event_desc) + and len(event_desc) == 2 + ): + desc, event = event_desc + else: + desc = "?" + event = event_desc + try: + profile = event.profile + start = profile.start + end = profile.end + except Exception: + # probably an unfinished job ... use old-style. + self.events.append(event_desc) + else: + self.events.append(ProfileDescription(desc, start, end)) def log_profile(self, stats=False): """If we are in profiling mode, prints out all timing for every single OpenCL call - + :param stats: if True, prints the statistics on each kernel instead of all execution timings :return: list of lines to print """ total_time = 0.0 out = [""] if stats: - stats = OrderedDict() - out.append(f"OpenCL kernel profiling statistics in milliseconds for: {self.__class__.__name__}") - out.append(f"{'Kernel name':>50} (count): min median max mean std") + stats = {} + out.append( + f"OpenCL kernel profiling statistics in milliseconds for: {self.__class__.__name__}" + ) + out.append( + f"{'Kernel name':>50} (count): min median max mean std" + ) else: stats = None out.append(f"Profiling info for OpenCL: {self.__class__.__name__}") if self.profile: for e in self.events: - if "__len__" in dir(e) and len(e) >= 2: + if isinstance(e, ProfileDescription): + name = e[0] + t0 = e[1] + t1 = e[2] + elif ( + isinstance(e, EventDescription) + or "__len__" in dir(e) + and len(e) == 2 + ): name = e[0] pr = e[1].profile t0 = pr.start t1 = pr.end - et = 1e-6 * (t1 - t0) - total_time += et - if stats is None: - out.append(f"{name:>50} : {et:.3f}ms") + else: + name = "?" + t0 = e.profile.start + t1 = e.profile.end + + et = 1e-6 * (t1 - t0) + total_time += et + if stats is None: + out.append(f"{name:>50} : {et:.3f}ms") + else: + if name in stats: + stats[name].append(et) else: - if name in stats: - stats[name].append(et) - else: - stats[name] = [et] + stats[name] = [et] if stats is not None: for k, v in stats.items(): n = numpy.array(v) - out.append(f"{k:>50} ({len(v):5}): {n.min():8.3f} {numpy.median(n):8.3f} {n.max():8.3f} {n.mean():8.3f} {n.std():8.3f}") + out.append( + f"{k:>50} ({len(v):5}): {n.min():8.3f} {numpy.median(n):8.3f} {n.max():8.3f} {n.mean():8.3f} {n.std():8.3f}" + ) out.append("_" * 80) - out.append(f"{'Total OpenCL execution time':>50} : {total_time:.3f}ms") + out.append( + f"{'Total OpenCL execution time':>50} : {total_time:.3f}ms" + ) logger.info(os.linesep.join(out)) return out @@ -390,13 +459,42 @@ class OpenclProcessing(object): with self.sem: self.events = [] + # Methods about textures + def allocate_texture(self, shape, hostbuf=None, support_1D=False): + return allocate_texture(self.ctx, shape, hostbuf=hostbuf, support_1D=support_1D) + + def transfer_to_texture(self, arr, tex_ref): + """ + Transfer an array to a texture. + + :param arr: Input array. Can be a numpy array or a pyopencl array. + :param tex_ref: texture reference (pyopencl._cl.Image). + """ + copy_args = [self.queue, tex_ref, arr] + shp = arr.shape + ndim = arr.ndim + if ndim == 1: + # pyopencl and OpenCL < 1.2 do not support image1d_t + # force 2D with one row in this case + # ~ ndim = 2 + shp = (1,) + shp + copy_kwargs = {"origin": (0,) * ndim, "region": shp[::-1]} + if not (isinstance(arr, numpy.ndarray)): # assuming pyopencl.array.Array + # D->D copy + copy_args[2] = arr.data + copy_kwargs["offset"] = 0 + ev = pyopencl.enqueue_copy(*copy_args, **copy_kwargs) + self.profile_add(ev, "Transfer to texture") + @property def x87_volatile_option(self): # this is running 32 bits OpenCL woth POCL if self._X87_VOLATILE is None: - if (platform.machine() in ("i386", "i686", "x86_64", "AMD64") and - (tuple.__itemsize__ == 4) and - self.ctx.devices[0].platform.name == 'Portable Computing Language'): + if ( + platform.machine() in ("i386", "i686", "x86_64", "AMD64") + and (tuple.__itemsize__ == 4) + and self.ctx.devices[0].platform.name == "Portable Computing Language" + ): self._X87_VOLATILE = "-DX87_VOLATILE=volatile" else: self._X87_VOLATILE = "" @@ -413,6 +511,7 @@ class OpenclProcessing(object): option_list.append(self.x87_volatile_option) return " ".join(i for i in option_list if i) + # This should be implemented by concrete class # def __copy__(self): # """Shallow copy of the object diff --git a/src/silx/opencl/projection.py b/src/silx/opencl/projection.py index c02faf6..cf4b625 100644 --- a/src/silx/opencl/projection.py +++ b/src/silx/opencl/projection.py @@ -1,5 +1,4 @@ #!/usr/bin/env python -# coding: utf-8 # /*########################################################################## # # Copyright (c) 2016-2020 European Synchrotron Radiation Facility @@ -25,8 +24,6 @@ # ###########################################################################*/ """Module for tomographic projector on the GPU""" -from __future__ import absolute_import, print_function, with_statement, division - __authors__ = ["A. Mirone, P. Paleo"] __license__ = "MIT" __date__ = "01/08/2019" @@ -51,14 +48,23 @@ class Projection(OpenclProcessing): A class for performing a tomographic projection (Radon Transform) using OpenCL """ + kernel_files = ["proj.cl", "array_utils.cl"] logger.warning("Forward Projecter is untested and unsuported for now") - def __init__(self, slice_shape, angles, axis_position=None, - detector_width=None, normalize=False, ctx=None, - devicetype="all", platformid=None, deviceid=None, - profile=False - ): + def __init__( + self, + slice_shape, + angles, + axis_position=None, + detector_width=None, + normalize=False, + ctx=None, + devicetype="all", + platformid=None, + deviceid=None, + profile=False, + ): """Constructor of the OpenCL projector. :param slice_shape: shape of the slice: (num_rows, num_columns). @@ -87,9 +93,14 @@ class Projection(OpenclProcessing): # if sys.platform.startswith('darwin'): # assuming no discrete GPU # raise NotImplementedError("Backprojection is not implemented on CPU for OS X yet") - OpenclProcessing.__init__(self, ctx=ctx, devicetype=devicetype, - platformid=platformid, deviceid=deviceid, - profile=profile) + OpenclProcessing.__init__( + self, + ctx=ctx, + devicetype=devicetype, + platformid=platformid, + deviceid=deviceid, + profile=profile, + ) self.shape = slice_shape self.axis_pos = axis_position self.angles = angles @@ -98,24 +109,27 @@ class Projection(OpenclProcessing): # Default values if self.axis_pos is None: - self.axis_pos = (self.shape[1] - 1) / 2. + self.axis_pos = (self.shape[1] - 1) / 2.0 if self.dwidth is None: self.dwidth = self.shape[1] - if not(np.iterable(self.angles)): + if not (np.iterable(self.angles)): if self.angles is None: self.nprojs = self.shape[0] else: self.nprojs = self.angles - self.angles = np.linspace(start=0, - stop=np.pi, - num=self.nprojs, - endpoint=False).astype(dtype=np.float32) + self.angles = np.linspace( + start=0, stop=np.pi, num=self.nprojs, endpoint=False + ).astype(dtype=np.float32) else: self.nprojs = len(self.angles) - self.offset_x = -np.float32((self.shape[1] - 1) / 2. - self.axis_pos) # TODO: custom - self.offset_y = -np.float32((self.shape[0] - 1) / 2. - self.axis_pos) # TODO: custom + self.offset_x = -np.float32( + (self.shape[1] - 1) / 2.0 - self.axis_pos + ) # TODO: custom + self.offset_y = -np.float32( + (self.shape[0] - 1) / 2.0 - self.axis_pos + ) # TODO: custom # Reset axis_pos once offset are computed - self.axis_pos0 = np.float64((self.shape[1] - 1) / 2.) + self.axis_pos0 = np.float64((self.shape[1] - 1) / 2.0) # Workgroup, ndrange and shared size self.dimgrid_x = _idivup(self.dwidth, 16) @@ -126,118 +140,122 @@ class Projection(OpenclProcessing): self.wg = (16, 16) self.ndrange = ( int(self.dimgrid_x) * self.wg[0], # int(): pyopencl <= 2015.1 - int(self.dimgrid_y) * self.wg[1] # int(): pyopencl <= 2015.1 + int(self.dimgrid_y) * self.wg[1], # int(): pyopencl <= 2015.1 ) self._use_textures = self.check_textures_availability() # Allocate memory self.buffers = [ - BufferDescription("_d_sino", self._dimrecx * self._dimrecy, np.float32, mf.READ_WRITE), + BufferDescription( + "_d_sino", self._dimrecx * self._dimrecy, np.float32, mf.READ_WRITE + ), BufferDescription("d_angles", self._dimrecy, np.float32, mf.READ_ONLY), BufferDescription("d_beginPos", self._dimrecy * 2, np.int32, mf.READ_ONLY), - BufferDescription("d_strideJoseph", self._dimrecy * 2, np.int32, mf.READ_ONLY), - BufferDescription("d_strideLine", self._dimrecy * 2, np.int32, mf.READ_ONLY), + BufferDescription( + "d_strideJoseph", self._dimrecy * 2, np.int32, mf.READ_ONLY + ), + BufferDescription( + "d_strideLine", self._dimrecy * 2, np.int32, mf.READ_ONLY + ), ] d_axis_corrections = parray.empty(self.queue, self.nprojs, np.float32) d_axis_corrections.fill(np.float32(0.0)) - self.add_to_cl_mem( - { - "d_axis_corrections": d_axis_corrections - } + self.add_to_cl_mem({"d_axis_corrections": d_axis_corrections}) + self._tmp_extended_img = np.zeros( + (self.shape[0] + 2, self.shape[1] + 2), dtype=np.float32 ) - self._tmp_extended_img = np.zeros((self.shape[0] + 2, self.shape[1] + 2), - dtype=np.float32) - if not(self._use_textures): + if not (self._use_textures): self.allocate_slice() else: self.allocate_textures() self.allocate_buffers() - self._ex_sino = np.zeros((self._dimrecy, self._dimrecx), - dtype=np.float32) - if not(self._use_textures): - self.cl_mem["d_slice"].fill(0.) + self._ex_sino = np.zeros((self._dimrecy, self._dimrecx), dtype=np.float32) + if not (self._use_textures): + self.cl_mem["d_slice"].fill(0.0) # enqueue_fill_buffer has issues if opencl 1.2 is not present # ~ pyopencl.enqueue_fill_buffer( - # ~ self.queue, - # ~ self.cl_mem["d_slice"], - # ~ np.float32(0), - # ~ 0, - # ~ self._tmp_extended_img.size * _sizeof(np.float32) + # ~ self.queue, + # ~ self.cl_mem["d_slice"], + # ~ np.float32(0), + # ~ 0, + # ~ self._tmp_extended_img.size * _sizeof(np.float32) # ~ ) # Precomputations self.compute_angles() self.proj_precomputations() - self.cl_mem["d_axis_corrections"].fill(0.) + self.cl_mem["d_axis_corrections"].fill(0.0) # enqueue_fill_buffer has issues if opencl 1.2 is not present # ~ pyopencl.enqueue_fill_buffer( - # ~ self.queue, - # ~ self.cl_mem["d_axis_corrections"], - # ~ np.float32(0), - # ~ 0, - # ~ self.nprojs*_sizeof(np.float32) - # ~ ) + # ~ self.queue, + # ~ self.cl_mem["d_axis_corrections"], + # ~ np.float32(0), + # ~ 0, + # ~ self.nprojs*_sizeof(np.float32) + # ~ ) # Shorthands self._d_sino = self.cl_mem["_d_sino"] compile_options = None - if not(self._use_textures): + if not (self._use_textures): compile_options = "-DDONT_USE_TEXTURES" OpenclProcessing.compile_kernels( - self, - self.kernel_files, - compile_options=compile_options + self, self.kernel_files, compile_options=compile_options ) # check that workgroup can actually be (16, 16) - self.compiletime_workgroup_size = self.kernels.max_workgroup_size("forward_kernel_cpu") + self.compiletime_workgroup_size = self.kernels.max_workgroup_size( + "forward_kernel_cpu" + ) def compute_angles(self): angles2 = np.zeros(self._dimrecy, dtype=np.float32) # dimrecy != num_projs - angles2[:self.nprojs] = np.copy(self.angles) - angles2[self.nprojs:] = angles2[self.nprojs - 1] + angles2[: self.nprojs] = np.copy(self.angles) + angles2[self.nprojs :] = angles2[self.nprojs - 1] self.angles2 = angles2 pyopencl.enqueue_copy(self.queue, self.cl_mem["d_angles"], angles2) def allocate_slice(self): - ary = parray.empty(self.queue, (self.shape[1] + 2, self.shape[1] + 2), np.float32) + ary = parray.empty( + self.queue, (self.shape[1] + 2, self.shape[1] + 2), np.float32 + ) ary.fill(0) self.add_to_cl_mem({"d_slice": ary}) def allocate_textures(self): self.d_image_tex = pyopencl.Image( - self.ctx, - mf.READ_ONLY | mf.USE_HOST_PTR, - pyopencl.ImageFormat( - pyopencl.channel_order.INTENSITY, - pyopencl.channel_type.FLOAT - ), hostbuf=np.ascontiguousarray(self._tmp_extended_img.T), - ) + self.ctx, + mf.READ_ONLY | mf.USE_HOST_PTR, + pyopencl.ImageFormat( + pyopencl.channel_order.INTENSITY, pyopencl.channel_type.FLOAT + ), + hostbuf=np.ascontiguousarray(self._tmp_extended_img.T), + ) def transfer_to_texture(self, image): image2 = image - if not(image.flags["C_CONTIGUOUS"] and image.dtype == np.float32): + if not (image.flags["C_CONTIGUOUS"] and image.dtype == np.float32): image2 = np.ascontiguousarray(image) - if not(self._use_textures): + if not (self._use_textures): # TODO: create NoneEvent return self.transfer_to_slice(image2) # ~ return pyopencl.enqueue_copy( - # ~ self.queue, - # ~ self.cl_mem["d_slice"].data, - # ~ image2, - # ~ origin=(1, 1), - # ~ region=image.shape[::-1] - # ~ ) + # ~ self.queue, + # ~ self.cl_mem["d_slice"].data, + # ~ image2, + # ~ origin=(1, 1), + # ~ region=image.shape[::-1] + # ~ ) else: return pyopencl.enqueue_copy( - self.queue, - self.d_image_tex, - image2, - origin=(1, 1), - region=image.shape[::-1] - ) + self.queue, + self.d_image_tex, + image2, + origin=(1, 1), + region=image.shape[::-1], + ) def transfer_device_to_texture(self, d_image): - if not(self._use_textures): + if not (self._use_textures): # TODO this copy should not be necessary return self.cpy2d_to_slice(d_image) else: @@ -247,7 +265,10 @@ class Projection(OpenclProcessing): d_image, offset=0, origin=(1, 1), - region=(int(self.shape[1]), int(self.shape[0])) # self.shape[::-1] # pyopencl <= 2015.2 + region=( + int(self.shape[1]), + int(self.shape[0]), + ), # self.shape[::-1] # pyopencl <= 2015.2 ) def transfer_to_slice(self, image): @@ -326,7 +347,7 @@ class Projection(OpenclProcessing): np.int32(self._dimrecx), np.int32((0, 0)), np.int32((0, 0)), - sino_shape_ocl + sino_shape_ocl, ) return self.kernels.cpy2d(self.queue, ndrange, wg, *kernel_args) @@ -334,7 +355,10 @@ class Projection(OpenclProcessing): """ copy a Nx * Ny slice to self.d_slice which is (Nx+2)*(Ny+2) """ - ndrange = (int(self.shape[1]), int(self.shape[0])) # self.shape[::-1] # pyopencl < 2015.2 + ndrange = ( + int(self.shape[1]), + int(self.shape[0]), + ) # self.shape[::-1] # pyopencl < 2015.2 wg = None slice_shape_ocl = np.int32(ndrange) kernel_args = ( @@ -344,7 +368,7 @@ class Projection(OpenclProcessing): np.int32(self.shape[1]), np.int32((1, 1)), np.int32((0, 0)), - slice_shape_ocl + slice_shape_ocl, ) return self.kernels.cpy2d(self.queue, ndrange, wg, *kernel_args) @@ -367,7 +391,7 @@ class Projection(OpenclProcessing): self.transfer_to_slice(image) slice_ref = self.cl_mem["d_slice"].data else: - if not(self._use_textures): + if not (self._use_textures): slice_ref = self.cl_mem["d_slice"].data else: slice_ref = self.d_image_tex @@ -389,23 +413,17 @@ class Projection(OpenclProcessing): self.offset_x, self.offset_y, np.int32(1), # josephnoclip, 1 by default - np.int32(self.normalize) + np.int32(self.normalize), ) # Call the kernel - if not(self._use_textures): + if not (self._use_textures): event_pj = self.kernels.forward_kernel_cpu( - self.queue, - self.ndrange, - self.wg, - *kernel_args + self.queue, self.ndrange, self.wg, *kernel_args ) else: event_pj = self.kernels.forward_kernel( - self.queue, - self.ndrange, - self.wg, - *kernel_args + self.queue, self.ndrange, self.wg, *kernel_args ) events.append(EventDescription("projection", event_pj)) if dst is None: @@ -413,7 +431,7 @@ class Projection(OpenclProcessing): ev = pyopencl.enqueue_copy(self.queue, self._ex_sino, self._d_sino) events.append(EventDescription("copy D->H result", ev)) ev.wait() - res = np.copy(self._ex_sino[:self.nprojs, :self.dwidth]) + res = np.copy(self._ex_sino[: self.nprojs, : self.dwidth]) else: ev = self.cpy2d_to_sino(dst) events.append(EventDescription("copy D->D result", ev)) diff --git a/src/silx/opencl/reconstruction.py b/src/silx/opencl/reconstruction.py index 2c84aee..c80a0ef 100644 --- a/src/silx/opencl/reconstruction.py +++ b/src/silx/opencl/reconstruction.py @@ -1,5 +1,4 @@ #!/usr/bin/env python -# coding: utf-8 # /*########################################################################## # # Copyright (c) 2016 European Synchrotron Radiation Facility @@ -25,8 +24,6 @@ # ###########################################################################*/ """Module for tomographic reconstruction algorithms""" -from __future__ import absolute_import, print_function, with_statement, division - __authors__ = ["P. Paleo"] __license__ = "MIT" __date__ = "01/08/2019" @@ -42,6 +39,7 @@ from .linalg import LinAlg import pyopencl.array as parray from pyopencl.elementwise import ElementwiseKernel + logger = logging.getLogger(__name__) cl = pyopencl @@ -68,13 +66,26 @@ class ReconstructionAlgorithm(OpenclProcessing): store profiling elements (makes code slightly slower) """ - def __init__(self, sino_shape, slice_shape=None, axis_position=None, angles=None, - ctx=None, devicetype="all", platformid=None, deviceid=None, - profile=False - ): - OpenclProcessing.__init__(self, ctx=ctx, devicetype=devicetype, - platformid=platformid, deviceid=deviceid, - profile=profile) + def __init__( + self, + sino_shape, + slice_shape=None, + axis_position=None, + angles=None, + ctx=None, + devicetype="all", + platformid=None, + deviceid=None, + profile=False, + ): + OpenclProcessing.__init__( + self, + ctx=ctx, + devicetype=devicetype, + platformid=platformid, + deviceid=deviceid, + profile=profile, + ) # Create a backprojector self.backprojector = Backprojection( @@ -83,7 +94,7 @@ class ReconstructionAlgorithm(OpenclProcessing): axis_position=axis_position, angles=angles, ctx=self.ctx, - profile=profile + profile=profile, ) # Create a projector self.projector = Projection( @@ -93,7 +104,7 @@ class ReconstructionAlgorithm(OpenclProcessing): detector_width=self.backprojector.num_bins, normalize=False, ctx=self.ctx, - profile=profile + profile=profile, ) self.sino_shape = sino_shape self.is_cpu = self.backprojector.is_cpu @@ -102,32 +113,34 @@ class ReconstructionAlgorithm(OpenclProcessing): self.d_data.fill(0.0) self.d_sino = parray.empty_like(self.d_data) self.d_sino.fill(0.0) - self.d_x = parray.empty(self.queue, - self.backprojector.slice_shape, - dtype=np.float32) + self.d_x = parray.empty( + self.queue, self.backprojector.slice_shape, dtype=np.float32 + ) self.d_x.fill(0.0) self.d_x_old = parray.empty_like(self.d_x) self.d_x_old.fill(0.0) - self.add_to_cl_mem({ - "d_data": self.d_data, - "d_sino": self.d_sino, - "d_x": self.d_x, - "d_x_old": self.d_x_old, - }) + self.add_to_cl_mem( + { + "d_data": self.d_data, + "d_sino": self.d_sino, + "d_x": self.d_x, + "d_x_old": self.d_x_old, + } + ) def proj(self, d_slice, d_sino): """ Project d_slice to d_sino """ - self.projector.transfer_device_to_texture(d_slice.data) #.wait() + self.projector.transfer_device_to_texture(d_slice.data) # .wait() self.projector.projection(dst=d_sino) def backproj(self, d_sino, d_slice): """ Backproject d_sino to d_slice """ - self.backprojector.transfer_device_to_texture(d_sino.data) #.wait() + self.backprojector.transfer_device_to_texture(d_sino.data) # .wait() self.backprojector.backprojection(dst=d_slice) @@ -156,15 +169,30 @@ class SIRT(ReconstructionAlgorithm): implementation. """ - def __init__(self, sino_shape, slice_shape=None, axis_position=None, angles=None, - ctx=None, devicetype="all", platformid=None, deviceid=None, - profile=False - ): - - ReconstructionAlgorithm.__init__(self, sino_shape, slice_shape=slice_shape, - axis_position=axis_position, angles=angles, - ctx=ctx, devicetype=devicetype, platformid=platformid, - deviceid=deviceid, profile=profile) + def __init__( + self, + sino_shape, + slice_shape=None, + axis_position=None, + angles=None, + ctx=None, + devicetype="all", + platformid=None, + deviceid=None, + profile=False, + ): + ReconstructionAlgorithm.__init__( + self, + sino_shape, + slice_shape=slice_shape, + axis_position=axis_position, + angles=angles, + ctx=ctx, + devicetype=devicetype, + platformid=platformid, + deviceid=deviceid, + profile=profile, + ) self.compute_preconditioners() def compute_preconditioners(self): @@ -181,26 +209,31 @@ class SIRT(ReconstructionAlgorithm): # r_{i,i} = 1/(sum_j a_{i,j}) slice_ones = np.ones(self.backprojector.slice_shape, dtype=np.float32) - R = 1./self.projector.projection(slice_ones) # could be all done on GPU, but I want extra checks - R[np.logical_not(np.isfinite(R))] = 1. # In the case where the rotation axis is excentred + R = 1.0 / self.projector.projection( + slice_ones + ) # could be all done on GPU, but I want extra checks + R[ + np.logical_not(np.isfinite(R)) + ] = 1.0 # In the case where the rotation axis is excentred self.d_R = parray.to_device(self.queue, R) # c_{j,j} = 1/(sum_i a_{i,j}) sino_ones = np.ones(self.sino_shape, dtype=np.float32) - C = 1./self.backprojector.backprojection(sino_ones) - C[np.logical_not(np.isfinite(C))] = 1. # In the case where the rotation axis is excentred + C = 1.0 / self.backprojector.backprojection(sino_ones) + C[ + np.logical_not(np.isfinite(C)) + ] = 1.0 # In the case where the rotation axis is excentred self.d_C = parray.to_device(self.queue, C) - self.add_to_cl_mem({ - "d_R": self.d_R, - "d_C": self.d_C - }) + self.add_to_cl_mem({"d_R": self.d_R, "d_C": self.d_C}) # TODO: compute and possibly return the residual def run(self, data, n_it): """ Run n_it iterations of the SIRT algorithm. """ - cl.enqueue_copy(self.queue, self.d_data.data, np.ascontiguousarray(data.astype(np.float32))) + cl.enqueue_copy( + self.queue, self.d_data.data, np.ascontiguousarray(data.astype(np.float32)) + ) d_x_old = self.d_x_old d_x = self.d_x @@ -257,26 +290,44 @@ class TV(ReconstructionAlgorithm): the AMD opencl implementation. """ - def __init__(self, sino_shape, slice_shape=None, axis_position=None, angles=None, - ctx=None, devicetype="all", platformid=None, deviceid=None, - profile=False - ): - ReconstructionAlgorithm.__init__(self, sino_shape, slice_shape=slice_shape, - axis_position=axis_position, angles=angles, - ctx=ctx, devicetype=devicetype, platformid=platformid, - deviceid=deviceid, profile=profile) + def __init__( + self, + sino_shape, + slice_shape=None, + axis_position=None, + angles=None, + ctx=None, + devicetype="all", + platformid=None, + deviceid=None, + profile=False, + ): + ReconstructionAlgorithm.__init__( + self, + sino_shape, + slice_shape=slice_shape, + axis_position=axis_position, + angles=angles, + ctx=ctx, + devicetype=devicetype, + platformid=platformid, + deviceid=deviceid, + profile=profile, + ) self.compute_preconditioners() # Create a LinAlg instance self.linalg = LinAlg(self.backprojector.slice_shape, ctx=self.ctx) # Positivity constraint - self.elwise_clamp = ElementwiseKernel(self.ctx, "float *a", "a[i] = max(a[i], 0.0f);") + self.elwise_clamp = ElementwiseKernel( + self.ctx, "float *a", "a[i] = max(a[i], 0.0f);" + ) # Projection onto the L-infinity ball of radius Lambda self.elwise_proj_linf = ElementwiseKernel( self.ctx, "float2* a, float Lambda", "a[i].x = copysign(min(fabs(a[i].x), Lambda), a[i].x); a[i].y = copysign(min(fabs(a[i].y), Lambda), a[i].y);", - "elwise_proj_linf" + "elwise_proj_linf", ) # Additional arrays self.linalg.gradient(self.d_x) @@ -287,11 +338,13 @@ class TV(ReconstructionAlgorithm): self.d_p.fill(0) self.d_q.fill(0) self.d_tmp.fill(0) - self.add_to_cl_mem({ - "d_p": self.d_p, - "d_q": self.d_q, - "d_tmp": self.d_tmp, - }) + self.add_to_cl_mem( + { + "d_p": self.d_p, + "d_q": self.d_q, + "d_tmp": self.d_tmp, + } + ) self.theta = 1.0 @@ -311,30 +364,36 @@ class TV(ReconstructionAlgorithm): # Compute the diagonal preconditioner "Sigma" slice_ones = np.ones(self.backprojector.slice_shape, dtype=np.float32) - Sigma_k = 1./self.projector.projection(slice_ones) - Sigma_k[np.logical_not(np.isfinite(Sigma_k))] = 1. + Sigma_k = 1.0 / self.projector.projection(slice_ones) + Sigma_k[np.logical_not(np.isfinite(Sigma_k))] = 1.0 self.d_Sigma_k = parray.to_device(self.queue, Sigma_k) self.d_Sigma_kp1 = self.d_Sigma_k + 1 # TODO: memory vs computation - self.Sigma_grad = 1/2.0 # For discrete gradient, sum|D_i,j| = 2 along lines or cols + self.Sigma_grad = ( + 1 / 2.0 + ) # For discrete gradient, sum|D_i,j| = 2 along lines or cols # Compute the diagonal preconditioner "Tau" sino_ones = np.ones(self.sino_shape, dtype=np.float32) C = self.backprojector.backprojection(sino_ones) - Tau = 1./(C + 2.) + Tau = 1.0 / (C + 2.0) self.d_Tau = parray.to_device(self.queue, Tau) - self.add_to_cl_mem({ - "d_Sigma_k": self.d_Sigma_k, - "d_Sigma_kp1": self.d_Sigma_kp1, - "d_Tau": self.d_Tau - }) + self.add_to_cl_mem( + { + "d_Sigma_k": self.d_Sigma_k, + "d_Sigma_kp1": self.d_Sigma_kp1, + "d_Tau": self.d_Tau, + } + ) def run(self, data, n_it, Lambda, pos_constraint=False): """ Run n_it iterations of the TV-regularized reconstruction, with the regularization parameter Lambda. """ - cl.enqueue_copy(self.queue, self.d_data.data, np.ascontiguousarray(data.astype(np.float32))) + cl.enqueue_copy( + self.queue, self.d_data.data, np.ascontiguousarray(data.astype(np.float32)) + ) d_x = self.d_x d_x_old = self.d_x_old @@ -351,7 +410,7 @@ class TV(ReconstructionAlgorithm): for k in range(0, n_it): # Update primal variables d_x_old[:] = d_x[:] - #~ x = x + Tau*div(p) - Tau*Kadj(q) + # ~ x = x + Tau*div(p) - Tau*Kadj(q) self.backproj(d_q, d_tmp) self.linalg.divergence(d_p) # TODO: this in less than three ops (one kernel ?) @@ -363,20 +422,20 @@ class TV(ReconstructionAlgorithm): self.elwise_clamp(d_x) # Update dual variables - #~ p = proj_linf(p + Sigma_grad*gradient(x + theta*(x - x_old)), Lambda) + # ~ p = proj_linf(p + Sigma_grad*gradient(x + theta*(x - x_old)), Lambda) d_tmp[:] = d_x[:] # FIXME: mul_add is out of place, put an equivalent thing in linalg... - #~ d_tmp.mul_add(1 + theta, d_x_old, -theta) - d_tmp *= 1+self.theta - d_tmp -= self.theta*d_x_old + # ~ d_tmp.mul_add(1 + theta, d_x_old, -theta) + d_tmp *= 1 + self.theta + d_tmp -= self.theta * d_x_old self.linalg.gradient(d_tmp) # TODO: out of place mul_add - #~ d_p.mul_add(1, L.cl_mem["d_gradient"], Sigma_grad) + # ~ d_p.mul_add(1, L.cl_mem["d_gradient"], Sigma_grad) self.linalg.cl_mem["d_gradient"] *= self.Sigma_grad d_p += self.linalg.cl_mem["d_gradient"] self.elwise_proj_linf(d_p, Lambda) - #~ q = (q + Sigma_k*K(x + theta*(x - x_old)) - Sigma_k*data)/(1.0 + Sigma_k) + # ~ q = (q + Sigma_k*K(x + theta*(x - x_old)) - Sigma_k*data)/(1.0 + Sigma_k) self.proj(d_tmp, d_sino) # TODO: this in less instructions d_sino -= self.d_data diff --git a/src/silx/opencl/setup.py b/src/silx/opencl/setup.py deleted file mode 100644 index 10fb1be..0000000 --- a/src/silx/opencl/setup.py +++ /dev/null @@ -1,48 +0,0 @@ -# coding: utf-8 -# -# Copyright (C) 2016-2017 European Synchrotron Radiation Facility -# -# Permission is hereby granted, free of charge, to any person obtaining a copy -# of this software and associated documentation files (the "Software"), to deal -# in the Software without restriction, including without limitation the rights -# to use, copy, modify, merge, publish, distribute, sublicense, and/or sell -# copies of the Software, and to permit persons to whom the Software is -# furnished to do so, subject to the following conditions: -# -# The above copyright notice and this permission notice shall be included in -# all copies or substantial portions of the Software. -# -# THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR -# IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, -# FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE -# AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER -# LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, -# OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN -# THE SOFTWARE. -# - -from __future__ import division - -__contact__ = "jerome.kieffer@esrf.eu" -__license__ = "MIT" -__copyright__ = "European Synchrotron Radiation Facility, Grenoble, France" -__authors__ = ["J. Kieffer"] -__date__ = "16/10/2017" - -import os.path -from numpy.distutils.misc_util import Configuration - - -def configuration(parent_package='', top_path=None): - config = Configuration('opencl', parent_package, top_path) - path = os.path.dirname(os.path.abspath(__file__)) - if os.path.exists(os.path.join(path, 'sift')): - config.add_subpackage('sift') - config.add_subpackage('codec') - config.add_subpackage('test') - return config - - -if __name__ == "__main__": - from numpy.distutils.core import setup - setup(configuration=configuration) diff --git a/src/silx/opencl/sinofilter.py b/src/silx/opencl/sinofilter.py index d608744..fc447de 100644 --- a/src/silx/opencl/sinofilter.py +++ b/src/silx/opencl/sinofilter.py @@ -1,8 +1,7 @@ #!/usr/bin/env python -# coding: utf-8 # /*########################################################################## # -# Copyright (c) 2016-2019 European Synchrotron Radiation Facility +# Copyright (c) 2016-2023 European Synchrotron Radiation Facility # # Permission is hereby granted, free of charge, to any person obtaining a copy # of this software and associated documentation files (the "Software"), to deal @@ -25,8 +24,6 @@ # ###########################################################################*/ """Module for sinogram filtering on CPU/GPU.""" -from __future__ import absolute_import, print_function, with_statement, division - __authors__ = ["P. Paleo"] __license__ = "MIT" __date__ = "07/06/2019" @@ -41,8 +38,6 @@ from .processing import OpenclProcessing from ..math.fft.clfft import CLFFT, __have_clfft__ from ..math.fft.npfft import NPFFT from ..image.tomography import generate_powers, get_next_power, compute_fourier_filter -from ..utils.deprecation import deprecated - class SinoFilter(OpenclProcessing): @@ -53,12 +48,21 @@ class SinoFilter(OpenclProcessing): - In 2D: (n_a, d_x): n_a filterings (1D FFT of size d_x) - In 3D: (n_z, n_a, d_x): n_z*n_a filterings (1D FFT of size d_x) """ + kernel_files = ["array_utils.cl"] powers = generate_powers() - def __init__(self, sino_shape, filter_name=None, ctx=None, - devicetype="all", platformid=None, deviceid=None, - profile=False, extra_options=None): + def __init__( + self, + sino_shape, + filter_name=None, + ctx=None, + devicetype="all", + platformid=None, + deviceid=None, + profile=False, + extra_options=None, + ): """Constructor of OpenCL FFT-Convolve. :param sino_shape: shape of the sinogram. @@ -75,9 +79,14 @@ class SinoFilter(OpenclProcessing): :param dict extra_options: Advanced extra options. Current options are: cutoff, use_numpy_fft """ - OpenclProcessing.__init__(self, ctx=ctx, devicetype=devicetype, - platformid=platformid, deviceid=deviceid, - profile=profile) + OpenclProcessing.__init__( + self, + ctx=ctx, + devicetype=devicetype, + platformid=platformid, + deviceid=deviceid, + profile=profile, + ) self._init_extra_options(extra_options) self._calculate_shapes(sino_shape) @@ -95,8 +104,9 @@ class SinoFilter(OpenclProcessing): if self.ndim == 2: n_angles, dwidth = sino_shape else: - raise ValueError("Invalid sinogram number of dimensions: " - "expected 2 dimensions") + raise ValueError( + "Invalid sinogram number of dimensions: " "expected 2 dimensions" + ) self.sino_shape = sino_shape self.n_angles = n_angles self.dwidth = dwidth @@ -113,14 +123,14 @@ class SinoFilter(OpenclProcessing): Current options are: cutoff, """ self.extra_options = { - "cutoff": 1., + "cutoff": 1.0, "use_numpy_fft": False, } if extra_options is not None: self.extra_options.update(extra_options) def _init_fft(self): - if __have_clfft__ and not(self.extra_options["use_numpy_fft"]): + if __have_clfft__ and not (self.extra_options["use_numpy_fft"]): self.fft_backend = "opencl" self.fft = CLFFT( self.sino_padded_shape, @@ -130,17 +140,22 @@ class SinoFilter(OpenclProcessing): ) else: self.fft_backend = "numpy" - print("The gpyfft module was not found. The Fourier transforms " - "will be done on CPU. For more performances, it is advised " - "to install gpyfft.""") + print( + "The gpyfft module was not found. The Fourier transforms " + "will be done on CPU. For more performances, it is advised " + "to install gpyfft." + "" + ) self.fft = NPFFT( template=np.zeros(self.sino_padded_shape, "f"), axes=(-1,), ) def _allocate_memory(self): - self.d_filter_f = parray.zeros(self.queue, (self.sino_f_shape[-1],), np.complex64) - self.is_cpu = (self.device.type == "CPU") + self.d_filter_f = parray.zeros( + self.queue, (self.sino_f_shape[-1],), np.complex64 + ) + self.is_cpu = self.device.type == "CPU" # These are already allocated by FFT() if using the opencl backend if self.fft_backend == "opencl": self.d_sino_padded = self.fft.data_in @@ -163,7 +178,9 @@ class SinoFilter(OpenclProcessing): self.dwidth_padded, self.filter_name, cutoff=self.extra_options["cutoff"], - )[:self.dwidth_padded // 2 + 1] # R2C + )[ + : self.dwidth_padded // 2 + 1 + ] # R2C self.set_filter(filter_f, normalize=True) def set_filter(self, h_filt, normalize=True): @@ -184,7 +201,7 @@ class SinoFilter(OpenclProcessing): """ % (self.sino_f_shape[-1], h_filt.size) ) - if not(np.iscomplexobj(h_filt)): + if not (np.iscomplexobj(h_filt)): print("Warning: expected a complex Fourier filter") self.filter_f = h_filt if normalize: @@ -195,24 +212,27 @@ class SinoFilter(OpenclProcessing): def _init_kernels(self): OpenclProcessing.compile_kernels(self, self.kernel_files) h, w = self.d_sino_f.shape - self.mult_kern_args = (self.queue, (int(w), (int(h))), None, - self.d_sino_f.data, - self.d_filter_f.data, - np.int32(w), - np.int32(h)) + self.mult_kern_args = ( + self.queue, + (int(w), (int(h))), + None, + self.d_sino_f.data, + self.d_filter_f.data, + np.int32(w), + np.int32(h), + ) def check_array(self, arr): if arr.dtype != np.float32: raise ValueError("Expected data type = numpy.float32") if arr.shape != self.sino_shape: - raise ValueError("Expected sinogram shape %s, got %s" % - (self.sino_shape, arr.shape)) - if not(isinstance(arr, np.ndarray) or isinstance(arr, parray.Array)): - raise ValueError("Expected either numpy.ndarray or " - "pyopencl.array.Array") - - def copy2d(self, dst, src, transfer_shape, dst_offset=(0, 0), - src_offset=(0, 0)): + raise ValueError( + "Expected sinogram shape %s, got %s" % (self.sino_shape, arr.shape) + ) + if not (isinstance(arr, np.ndarray) or isinstance(arr, parray.Array)): + raise ValueError("Expected either numpy.ndarray or " "pyopencl.array.Array") + + def copy2d(self, dst, src, transfer_shape, dst_offset=(0, 0), src_offset=(0, 0)): """ :param dst: @@ -222,18 +242,23 @@ class SinoFilter(OpenclProcessing): :param src_offset: """ shape = tuple(int(i) for i in transfer_shape[::-1]) - ev = self.kernels.cpy2d(self.queue, shape, None, - dst.data, - src.data, - np.int32(dst.shape[1]), - np.int32(src.shape[1]), - np.int32(dst_offset), - np.int32(src_offset), - np.int32(transfer_shape[::-1])) + ev = self.kernels.cpy2d( + self.queue, + shape, + None, + dst.data, + src.data, + np.int32(dst.shape[1]), + np.int32(src.shape[1]), + np.int32(dst_offset), + np.int32(src_offset), + np.int32(transfer_shape[::-1]), + ) ev.wait() - def copy2d_host(self, dst, src, transfer_shape, dst_offset=(0, 0), - src_offset=(0, 0)): + def copy2d_host( + self, dst, src, transfer_shape, dst_offset=(0, 0), src_offset=(0, 0) + ): """ :param dst: @@ -245,7 +270,9 @@ class SinoFilter(OpenclProcessing): s = transfer_shape do = dst_offset so = src_offset - dst[do[0]:do[0] + s[0], do[1]:do[1] + s[1]] = src[so[0]:so[0] + s[0], so[1]:so[1] + s[1]] + dst[do[0] : do[0] + s[0], do[1] : do[1] + s[1]] = src[ + so[0] : so[0] + s[0], so[1] : so[1] + s[1] + ] def _prepare_input_sino(self, sino): """ @@ -272,7 +299,7 @@ class SinoFilter(OpenclProcessing): self.d_sino_padded.finish() # should not be required here else: # Numpy backend: FFT/mult/IFFT are done on host. - if not(isinstance(sino, np.ndarray)): + if not (isinstance(sino, np.ndarray)): # Numpy backend + pyopencl input: need to copy D->H self.tmp_sino_host[:] = sino[:] h_sino_ref = self.tmp_sino_host @@ -296,9 +323,11 @@ class SinoFilter(OpenclProcessing): # As pyopencl does not support rectangular copies, we first have # to call a kernel doing rectangular copy D->D, then do a copy # D->H. - self.copy2d(dst=self.tmp_sino_device, - src=self.d_sino_padded, - transfer_shape=self.sino_shape) + self.copy2d( + dst=self.tmp_sino_device, + src=self.d_sino_padded, + transfer_shape=self.sino_shape, + ) if self.is_cpu: self.tmp_sino_device.finish() # should not be required here res[:] = self.tmp_sino_device.get()[:] @@ -309,11 +338,13 @@ class SinoFilter(OpenclProcessing): if self.is_cpu: res.finish() # should not be required here else: - if not(isinstance(res, np.ndarray)): + if not (isinstance(res, np.ndarray)): # Numpy backend + pyopencl output: rect copy H->H + copy H->D - self.copy2d_host(dst=self.tmp_sino_host, - src=self.d_sino_padded, - transfer_shape=self.sino_shape) + self.copy2d_host( + dst=self.tmp_sino_host, + src=self.d_sino_padded, + transfer_shape=self.sino_shape, + ) res[:] = self.tmp_sino_host[:] else: # Numpy backend + numpy output: rect copy H->H @@ -334,9 +365,7 @@ class SinoFilter(OpenclProcessing): def _multiply_fourier(self): if self.fft_backend == "opencl": # Everything is on device. Call the multiplication kernel. - ev = self.kernels.inplace_complex_mul_2Dby1D( - *self.mult_kern_args - ) + ev = self.kernels.inplace_complex_mul_2Dby1D(*self.mult_kern_args) ev.wait() if self.is_cpu: self.d_sino_f.finish() # should not be required here @@ -379,57 +408,3 @@ class SinoFilter(OpenclProcessing): # ~ return output __call__ = filter_sino - - - - -# ------------------- -# - Compatibility - -# ------------------- - - -def nextpow2(N): - p = 1 - while p < N: - p *= 2 - return p - - -@deprecated(replacement="Backprojection.sino_filter", since_version="0.10") -def fourier_filter(sino, filter_=None, fft_size=None): - """Simple np based implementation of fourier space filter. - This function is deprecated, please use silx.opencl.sinofilter.SinoFilter. - - :param sino: of shape shape = (num_projs, num_bins) - :param filter: filter function to apply in fourier space - :fft_size: size on which perform the fft. May be larger than the sino array - :return: filtered sinogram - """ - assert sino.ndim == 2 - num_projs, num_bins = sino.shape - if fft_size is None: - fft_size = nextpow2(num_bins * 2 - 1) - else: - assert fft_size >= num_bins - if fft_size == num_bins: - sino_zeropadded = sino.astype(np.float32) - else: - sino_zeropadded = np.zeros((num_projs, fft_size), - dtype=np.complex64) - sino_zeropadded[:, :num_bins] = sino.astype(np.float32) - - if filter_ is None: - h = np.zeros(fft_size, dtype=np.float32) - L2 = fft_size // 2 + 1 - h[0] = 1 / 4. - j = np.linspace(1, L2, L2 // 2, False) - h[1:L2:2] = -1. / (np.pi ** 2 * j ** 2) - h[L2:] = np.copy(h[1:L2 - 1][::-1]) - filter_ = np.fft.fft(h).astype(np.complex64) - - # Linear convolution - sino_f = np.fft.fft(sino, fft_size) - sino_f = sino_f * filter_ - sino_filtered = np.fft.ifft(sino_f)[:, :num_bins].real - - return np.ascontiguousarray(sino_filtered.real, dtype=np.float32) diff --git a/src/silx/opencl/sparse.py b/src/silx/opencl/sparse.py index 514589a..9baa3a0 100644 --- a/src/silx/opencl/sparse.py +++ b/src/silx/opencl/sparse.py @@ -1,5 +1,4 @@ #!/usr/bin/env python -# coding: utf-8 # /*########################################################################## # # Copyright (c) 2019 European Synchrotron Radiation Facility @@ -25,8 +24,6 @@ # ###########################################################################*/ """Module for data sparsification on CPU/GPU.""" -from __future__ import absolute_import, print_function, with_statement, division - __authors__ = ["P. Paleo"] __license__ = "MIT" __date__ = "07/06/2019" @@ -38,11 +35,13 @@ from pyopencl.scan import GenericScanKernel from pyopencl.tools import dtype_to_ctype from .common import pyopencl as cl from .processing import OpenclProcessing, EventDescription, BufferDescription + mf = cl.mem_flags CSRData = namedtuple("CSRData", ["data", "indices", "indptr"]) + def tuple_to_csrdata(arrs): """ Converts a 3-tuple to a CSRData namedtuple. @@ -52,13 +51,23 @@ def tuple_to_csrdata(arrs): return CSRData(data=arrs[0], indices=arrs[1], indptr=arrs[2]) - class CSR(OpenclProcessing): kernel_files = ["sparse.cl"] - def __init__(self, shape, dtype="f", max_nnz=None, idx_dtype=numpy.int32, - ctx=None, devicetype="all", platformid=None, deviceid=None, - block_size=None, memory=None, profile=False): + def __init__( + self, + shape, + dtype="f", + max_nnz=None, + idx_dtype=numpy.int32, + ctx=None, + devicetype="all", + platformid=None, + deviceid=None, + block_size=None, + memory=None, + profile=False, + ): """ Compute Compressed Sparse Row format of an image (2D matrix). It is designed to be compatible with scipy.sparse.csr_matrix. @@ -80,10 +89,16 @@ class CSR(OpenclProcessing): for information on the other parameters. """ - OpenclProcessing.__init__(self, ctx=ctx, devicetype=devicetype, - platformid=platformid, deviceid=deviceid, - block_size=block_size, memory=memory, - profile=profile) + OpenclProcessing.__init__( + self, + ctx=ctx, + devicetype=devicetype, + platformid=platformid, + deviceid=deviceid, + block_size=block_size, + memory=memory, + profile=profile, + ) self._set_parameters(shape, dtype, max_nnz, idx_dtype) self._allocate_memory() self._setup_kernels() @@ -96,23 +111,23 @@ class CSR(OpenclProcessing): self.shape = shape self.size = numpy.prod(shape) self._set_idx_dtype(idx_dtype) - assert len(shape) == 2 # + assert len(shape) == 2 # if max_nnz is None: - self.max_nnz = numpy.prod(shape) # worst case + self.max_nnz = numpy.prod(shape) # worst case else: self.max_nnz = int(max_nnz) self._set_dtype(dtype) - def _set_idx_dtype(self, idx_dtype): idx_dtype = numpy.dtype(idx_dtype) if idx_dtype.kind not in ["i", "u"]: raise ValueError("Not an integer type: %s" % idx_dtype) # scan value type must have size divisible by 4 bytes if idx_dtype.itemsize % 4 != 0: - raise ValueError("Due to an internal pyopencl limitation, idx_dtype type must have size divisible by 4 bytes") - self.indice_dtype = idx_dtype # - + raise ValueError( + "Due to an internal pyopencl limitation, idx_dtype type must have size divisible by 4 bytes" + ) + self.indice_dtype = idx_dtype # def _set_dtype(self, dtype): self.dtype = numpy.dtype(dtype) @@ -122,42 +137,44 @@ class CSR(OpenclProcessing): self._c_zero_str = "0.0f" elif self.dtype == numpy.dtype(numpy.float64): self._c_zero_str = "0.0" - else: # assuming integer + else: # assuming integer self._c_zero_str = "0" self.c_dtype = dtype_to_ctype(self.dtype) self.idx_c_dtype = dtype_to_ctype(self.indice_dtype) - def _allocate_memory(self): - self.is_cpu = (self.device.type == "CPU") # move to OpenclProcessing ? + self.is_cpu = self.device.type == "CPU" # move to OpenclProcessing ? self.buffers = [ BufferDescription("array", (self.size,), self.dtype, mf.READ_ONLY), BufferDescription("data", (self.max_nnz,), self.dtype, mf.READ_WRITE), - BufferDescription("indices", (self.max_nnz,), self.indice_dtype, mf.READ_WRITE), - BufferDescription("indptr", (self.shape[0]+1,), self.indice_dtype, mf.READ_WRITE), + BufferDescription( + "indices", (self.max_nnz,), self.indice_dtype, mf.READ_WRITE + ), + BufferDescription( + "indptr", (self.shape[0] + 1,), self.indice_dtype, mf.READ_WRITE + ), ] self.allocate_buffers(use_array=True) for arr_name in ["array", "data", "indices", "indptr"]: setattr(self, arr_name, self.cl_mem[arr_name]) - self.cl_mem[arr_name].fill(0) # allocate_buffers() uses empty() + self.cl_mem[arr_name].fill(0) # allocate_buffers() uses empty() self._old_array = self.array self._old_data = self.data self._old_indices = self.indices self._old_indptr = self.indptr - def _setup_kernels(self): self._setup_compaction_kernel() self._setup_decompaction_kernel() - def _setup_compaction_kernel(self): kernel_signature = str( "__global %s *data, \ __global %s *data_compacted, \ __global %s *indices, \ __global %s* indptr \ - """ % (self.c_dtype, self.c_dtype, self.idx_c_dtype, self.idx_c_dtype) + " + "" % (self.c_dtype, self.c_dtype, self.idx_c_dtype, self.idx_c_dtype) ) if self.dtype.kind == "f": map_nonzero_expr = "(fabs(data[i]) > %s) ? 1 : 0" % self._c_zero_str @@ -167,10 +184,12 @@ class CSR(OpenclProcessing): raise ValueError("Unknown data type") self.scan_kernel = GenericScanKernel( - self.ctx, self.indice_dtype, + self.ctx, + self.indice_dtype, arguments=kernel_signature, input_expr=map_nonzero_expr, - scan_expr="a+b", neutral="0", + scan_expr="a+b", + neutral="0", output_statement=""" // item is the running sum of input_expr(i), i.e the cumsum of "nonzero" if (prev_item != item) { @@ -186,7 +205,6 @@ class CSR(OpenclProcessing): preamble="#define GET_INDEX(i) (i % IMAGE_WIDTH)", ) - def _setup_decompaction_kernel(self): OpenclProcessing.compile_kernels( self, @@ -195,18 +213,17 @@ class CSR(OpenclProcessing): "-DIMAGE_WIDTH=%d" % self.shape[1], "-DDTYPE=%s" % self.c_dtype, "-DIDX_DTYPE=%s" % self.idx_c_dtype, - ] + ], ) device = self.ctx.devices[0] wg_x = min( device.max_work_group_size, 32, - self.kernels.max_workgroup_size("densify_csr") + self.kernels.max_workgroup_size("densify_csr"), ) self._decomp_wg = (wg_x, 1) self._decomp_grid = (self._decomp_wg[0], self.shape[0]) - # -------------------------------------------------------------------------- # -------------------------- Array utils ----------------------------------- # -------------------------------------------------------------------------- @@ -222,7 +239,6 @@ class CSR(OpenclProcessing): assert arr.size == self.size assert arr.dtype == self.dtype - # TODO handle pyopencl Buffer def check_sparse_arrays(self, csr_data): """ @@ -237,12 +253,11 @@ class CSR(OpenclProcessing): assert arr.ndim == 1 assert csr_data.data.size <= self.max_nnz assert csr_data.indices.size <= self.max_nnz - assert csr_data.indptr.size == self.shape[0]+1 + assert csr_data.indptr.size == self.shape[0] + 1 assert csr_data.data.dtype == self.dtype assert csr_data.indices.dtype == self.indice_dtype assert csr_data.indptr.dtype == self.indice_dtype - def set_array(self, arr): """ Set the provided array as the current context 2D matrix. @@ -262,23 +277,25 @@ class CSR(OpenclProcessing): else: raise ValueError("Expected pyopencl array or numpy array") - def set_sparse_arrays(self, csr_data): if csr_data is None: return self.check_sparse_arrays(csr_data) - for name, arr in {"data": csr_data.data, "indices": csr_data.indices, "indptr": csr_data.indptr}.items(): + for name, arr in { + "data": csr_data.data, + "indices": csr_data.indices, + "indptr": csr_data.indptr, + }.items(): # The current array is a device array. Don't copy, use it directly if isinstance(arr, parray.Array): setattr(self, "_old_" + name, getattr(self, name)) setattr(self, name, arr) # The current array is a numpy.ndarray: copy H2D elif isinstance(arr, numpy.ndarray): - getattr(self, name)[:arr.size] = arr[:] + getattr(self, name)[: arr.size] = arr[:] else: raise ValueError("Unsupported array type: %s" % type(arr)) - def _recover_arrays_references(self): """ Recover the previous arrays references, and return the references of the @@ -293,7 +310,6 @@ class CSR(OpenclProcessing): setattr(self, name, getattr(self, "_old_" + name)) return array, (data, indices, indptr) - def get_sparse_arrays(self, output): """ Get the 2D dense array of the current context. @@ -314,7 +330,6 @@ class CSR(OpenclProcessing): res = output return res - def get_array(self, output): if output is None: res = self.array.get().reshape(self.shape) @@ -344,7 +359,7 @@ class CSR(OpenclProcessing): self.indices, self.indptr, ) - #~ evt.wait() + # ~ evt.wait() self.profile_add(evt, "sparsification kernel") res = self.get_sparse_arrays(output) self._recover_arrays_references() @@ -355,9 +370,7 @@ class CSR(OpenclProcessing): # -------------------------------------------------------------------------- def densify(self, data, indices, indptr, output=None): - self.set_sparse_arrays( - CSRData(data=data, indices=indices, indptr=indptr) - ) + self.set_sparse_arrays(CSRData(data=data, indices=indices, indptr=indptr)) self.set_array(output) evt = self.kernels.densify_csr( self.queue, @@ -369,9 +382,8 @@ class CSR(OpenclProcessing): self.array.data, numpy.int32(self.shape[0]), ) - #~ evt.wait() + # ~ evt.wait() self.profile_add(evt, "desparsification kernel") res = self.get_array(output) self._recover_arrays_references() return res - diff --git a/src/silx/opencl/statistics.py b/src/silx/opencl/statistics.py index a96ee33..26d23e6 100644 --- a/src/silx/opencl/statistics.py +++ b/src/silx/opencl/statistics.py @@ -1,9 +1,8 @@ -# -*- coding: utf-8 -*- # # Project: SILX # https://github.com/silx-kit/silx # -# Copyright (C) 2012-2019 European Synchrotron Radiation Facility, Grenoble, France +# Copyright (C) 2012-2023 European Synchrotron Radiation Facility, Grenoble, France # # Principal author: Jérôme Kieffer (Jerome.Kieffer@ESRF.eu) # @@ -37,7 +36,7 @@ __contact__ = "jerome.kieffer@esrf.fr" import logging import numpy -from collections import OrderedDict, namedtuple +from collections import namedtuple from math import sqrt from .common import pyopencl @@ -47,6 +46,7 @@ from .utils import concatenate_cl_kernel if pyopencl: mf = pyopencl.mem_flags from pyopencl.reduction import ReductionKernel + try: from pyopencl import cltypes except ImportError: @@ -59,8 +59,9 @@ else: raise ImportError("pyopencl is not installed") logger = logging.getLogger(__name__) -StatResults = namedtuple("StatResults", ["min", "max", "cnt", "sum", "mean", - "var", "std"]) +StatResults = namedtuple( + "StatResults", ["min", "max", "cnt", "sum", "mean", "var", "std"] +) zero8 = "(float8)(FLT_MAX, -FLT_MAX, 0.0f, 0.0f, 0.0f, 0.0f, 0.0f, 0.0f)" # min max cnt cnt_e sum sum_e var var_e @@ -82,33 +83,52 @@ class Statistics(OpenclProcessing): Switch on profiling to be able to profile at the kernel level, store profiling elements (makes code slightly slower) """ + buffers = [ BufferDescription("raw", 1, numpy.float32, mf.READ_ONLY), BufferDescription("converted", 1, numpy.float32, mf.READ_WRITE), ] kernel_files = ["preprocess.cl"] - mapping = {numpy.int8: "s8_to_float", - numpy.uint8: "u8_to_float", - numpy.int16: "s16_to_float", - numpy.uint16: "u16_to_float", - numpy.uint32: "u32_to_float", - numpy.int32: "s32_to_float"} - - def __init__(self, size=None, dtype=None, template=None, - ctx=None, devicetype="all", platformid=None, deviceid=None, - block_size=None, profile=False - ): - OpenclProcessing.__init__(self, ctx=ctx, devicetype=devicetype, - platformid=platformid, deviceid=deviceid, - block_size=block_size, profile=profile) + mapping = { + numpy.int8: "s8_to_float", + numpy.uint8: "u8_to_float", + numpy.int16: "s16_to_float", + numpy.uint16: "u16_to_float", + numpy.uint32: "u32_to_float", + numpy.int32: "s32_to_float", + } + + def __init__( + self, + size=None, + dtype=None, + template=None, + ctx=None, + devicetype="all", + platformid=None, + deviceid=None, + block_size=None, + profile=False, + ): + OpenclProcessing.__init__( + self, + ctx=ctx, + devicetype=devicetype, + platformid=platformid, + deviceid=deviceid, + block_size=block_size, + profile=profile, + ) self.size = size self.dtype = dtype if template is not None: self.size = template.size self.dtype = template.dtype - self.buffers = [BufferDescription(i.name, i.size * self.size, i.dtype, i.flags) - for i in self.__class__.buffers] + self.buffers = [ + BufferDescription(i.name, i.size * self.size, i.dtype, i.flags) + for i in self.__class__.buffers + ] self.allocate_buffers(use_array=True) self.compile_kernels() @@ -117,43 +137,54 @@ class Statistics(OpenclProcessing): def set_kernel_arguments(self): """Parametrize all kernel arguments""" for val in self.mapping.values(): - self.cl_kernel_args[val] = OrderedDict(((i, self.cl_mem[i]) for i in ("raw", "converted"))) + self.cl_kernel_args[val] = dict( + ((i, self.cl_mem[i]) for i in ("raw", "converted")) + ) def compile_kernels(self): """Compile the kernel""" - OpenclProcessing.compile_kernels(self, - self.kernel_files, - "-D NIMAGE=%i" % self.size) + OpenclProcessing.compile_kernels( + self, self.kernel_files, "-D NIMAGE=%i" % self.size + ) compiler_options = self.get_compiler_options(x87_volatile=True) src = concatenate_cl_kernel(("doubleword.cl", "statistics.cl")) - self.reduction_comp = ReductionKernel(self.ctx, - dtype_out=float8, - neutral=zero8, - map_expr="map_statistics(data, i)", - reduce_expr="reduce_statistics(a,b)", - arguments="__global float *data", - preamble=src, - options=compiler_options) - self.reduction_simple = ReductionKernel(self.ctx, - dtype_out=float8, - neutral=zero8, - map_expr="map_statistics(data, i)", - reduce_expr="reduce_statistics_simple(a,b)", - arguments="__global float *data", - preamble=src, - options=compiler_options) + self.reduction_comp = ReductionKernel( + self.ctx, + dtype_out=float8, + neutral=zero8, + map_expr="map_statistics(data, i)", + reduce_expr="reduce_statistics(a,b)", + arguments="__global float *data", + preamble=src, + options=compiler_options, + ) + self.reduction_simple = ReductionKernel( + self.ctx, + dtype_out=float8, + neutral=zero8, + map_expr="map_statistics(data, i)", + reduce_expr="reduce_statistics_simple(a,b)", + arguments="__global float *data", + preamble=src, + options=compiler_options, + ) if "cl_khr_fp64" in self.device.extensions: - self.reduction_double = ReductionKernel(self.ctx, - dtype_out=float8, - neutral=zero8, - map_expr="map_statistics(data, i)", - reduce_expr="reduce_statistics_double(a,b)", - arguments="__global float *data", - preamble=src, - options=compiler_options) + self.reduction_double = ReductionKernel( + self.ctx, + dtype_out=float8, + neutral=zero8, + map_expr="map_statistics(data, i)", + reduce_expr="reduce_statistics_double(a,b)", + arguments="__global float *data", + preamble=src, + options=compiler_options, + ) else: - logger.info("Device %s does not support double-precision arithmetics, fall-back on compensated one", self.device) + logger.info( + "Device %s does not support double-precision arithmetics, fall-back on compensated one", + self.device, + ) self.reduction_double = self.reduction_comp def send_buffer(self, data, dest): @@ -168,23 +199,27 @@ class Statistics(OpenclProcessing): dest_type = numpy.dtype([i.dtype for i in self.buffers if i.name == dest][0]) events = [] if (data.dtype == dest_type) or (data.dtype.itemsize > dest_type.itemsize): - copy_image = pyopencl.enqueue_copy(self.queue, - self.cl_mem[dest].data, - numpy.ascontiguousarray(data, dest_type)) + copy_image = pyopencl.enqueue_copy( + self.queue, + self.cl_mem[dest].data, + numpy.ascontiguousarray(data, dest_type), + ) events.append(EventDescription("copy H->D %s" % dest, copy_image)) else: - copy_image = pyopencl.enqueue_copy(self.queue, - self.cl_mem["raw"].data, - numpy.ascontiguousarray(data)) + copy_image = pyopencl.enqueue_copy( + self.queue, self.cl_mem["raw"].data, numpy.ascontiguousarray(data) + ) kernel = getattr(self.program, self.mapping[data.dtype.type]) - cast_to_float = kernel(self.queue, - (self.size,), - None, - self.cl_mem["raw"].data, - self.cl_mem[dest].data) + cast_to_float = kernel( + self.queue, + (self.size,), + None, + self.cl_mem["raw"].data, + self.cl_mem[dest].data, + ) events += [ EventDescription("copy H->D raw", copy_image), - EventDescription(f"cast to float {dest}", cast_to_float) + EventDescription(f"cast to float {dest}", cast_to_float), ] if self.profile: self.events += events @@ -194,7 +229,7 @@ class Statistics(OpenclProcessing): """Actually calculate the statics on the data :param numpy.ndarray data: numpy array with the image - :param comp: use Kahan compensated arithmetics for the calculation + :param comp: use Kahan compensated arithmetics for the calculation :return: Statistics named tuple :rtype: StatResults """ @@ -217,9 +252,11 @@ class Statistics(OpenclProcessing): reduction = self.reduction_double else: reduction = self.reduction_comp - res_d, evt = reduction(self.cl_mem["converted"][:self.size], - queue=self.queue, - return_event=True) + res_d, evt = reduction( + self.cl_mem["converted"][: self.size], + queue=self.queue, + return_event=True, + ) events.append(EventDescription(f"statistical reduction {comp}", evt)) if self.profile: self.events += events @@ -230,13 +267,7 @@ class Statistics(OpenclProcessing): sum_ = 1.0 * res_h["s4"] + res_h["s5"] m2 = 1.0 * res_h["s6"] + res_h["s7"] var = m2 / (count - 1.0) - res = StatResults(min_, - max_, - count, - sum_, - sum_ / count, - var, - sqrt(var)) + res = StatResults(min_, max_, count, sum_, sum_ / count, var, sqrt(var)) return res __call__ = process diff --git a/src/silx/opencl/test/__init__.py b/src/silx/opencl/test/__init__.py index 92cda4a..b1ecf1b 100644 --- a/src/silx/opencl/test/__init__.py +++ b/src/silx/opencl/test/__init__.py @@ -1,4 +1,3 @@ -# -*- coding: utf-8 -*- # # Project: silx # https://github.com/silx-kit/silx diff --git a/src/silx/opencl/test/test_addition.py b/src/silx/opencl/test/test_addition.py index 3b668bf..98beab4 100644 --- a/src/silx/opencl/test/test_addition.py +++ b/src/silx/opencl/test/test_addition.py @@ -1,5 +1,4 @@ #!/usr/bin/env python -# -*- coding: utf-8 -*- # # Project: Sift implementation in Python + OpenCL # https://github.com/silx-kit/silx @@ -41,16 +40,17 @@ import pytest import unittest from ..common import ocl, _measure_workgroup_size, query_kernel_info + if ocl: import pyopencl import pyopencl.array from ..utils import get_opencl_code + logger = logging.getLogger(__name__) @unittest.skipUnless(ocl, "PyOpenCl is missing") class TestAddition(unittest.TestCase): - @classmethod def setUpClass(cls): super(TestAddition, cls).setUpClass() @@ -59,8 +59,9 @@ class TestAddition(unittest.TestCase): if logger.getEffectiveLevel() <= logging.INFO: cls.PROFILE = True cls.queue = pyopencl.CommandQueue( - cls.ctx, - properties=pyopencl.command_queue_properties.PROFILING_ENABLE) + cls.ctx, + properties=pyopencl.command_queue_properties.PROFILING_ENABLE, + ) else: cls.PROFILE = False cls.queue = pyopencl.CommandQueue(cls.ctx) @@ -69,7 +70,10 @@ class TestAddition(unittest.TestCase): @classmethod def tearDownClass(cls): super(TestAddition, cls).tearDownClass() - print("Maximum valid workgroup size %s on device %s" % (cls.max_valid_wg, cls.ctx.devices[0])) + print( + "Maximum valid workgroup size %s on device %s" + % (cls.max_valid_wg, cls.ctx.devices[0]) + ) cls.ctx = None cls.queue = None @@ -96,11 +100,20 @@ class TestAddition(unittest.TestCase): d_array_result = pyopencl.array.empty_like(self.d_array_img) wg = 1 << i try: - evt = self.program.addition(self.queue, (self.shape,), (wg,), - self.d_array_img.data, self.d_array_5.data, d_array_result.data, numpy.int32(self.shape)) + evt = self.program.addition( + self.queue, + (self.shape,), + (wg,), + self.d_array_img.data, + self.d_array_5.data, + d_array_result.data, + numpy.int32(self.shape), + ) evt.wait() except Exception as error: - max_valid_wg = self.program.addition.get_work_group_info(pyopencl.kernel_work_group_info.WORK_GROUP_SIZE, self.ctx.devices[0]) + max_valid_wg = self.program.addition.get_work_group_info( + pyopencl.kernel_work_group_info.WORK_GROUP_SIZE, self.ctx.devices[0] + ) msg = "Error %s on WG=%s: %s" % (error, wg, max_valid_wg) self.assertLess(max_valid_wg, wg, msg) break @@ -118,23 +131,39 @@ class TestAddition(unittest.TestCase): for platform in ocl.platforms: for did, device in enumerate(platform.devices): meas = _measure_workgroup_size((platform.id, device.id)) - self.assertEqual(meas, device.max_work_group_size, - "Workgroup size for %s/%s: %s == %s" % (platform, device, meas, device.max_work_group_size)) + self.assertEqual( + meas, + device.max_work_group_size, + "Workgroup size for %s/%s: %s == %s" + % (platform, device, meas, device.max_work_group_size), + ) def test_query(self): """ tests that all devices are working properly ... lengthy and error prone """ - for what in ("COMPILE_WORK_GROUP_SIZE", - "LOCAL_MEM_SIZE", - "PREFERRED_WORK_GROUP_SIZE_MULTIPLE", - "PRIVATE_MEM_SIZE", - "WORK_GROUP_SIZE"): - logger.info("%s: %s", what, query_kernel_info(program=self.program, kernel="addition", what=what)) - - # Not all ICD work properly .... - #self.assertEqual(3, len(query_kernel_info(program=self.program, kernel="addition", what="COMPILE_WORK_GROUP_SIZE")), "3D kernel") - - min_wg = query_kernel_info(program=self.program, kernel="addition", what="PREFERRED_WORK_GROUP_SIZE_MULTIPLE") - max_wg = query_kernel_info(program=self.program, kernel="addition", what="WORK_GROUP_SIZE") + for what in ( + "COMPILE_WORK_GROUP_SIZE", + "LOCAL_MEM_SIZE", + "PREFERRED_WORK_GROUP_SIZE_MULTIPLE", + "PRIVATE_MEM_SIZE", + "WORK_GROUP_SIZE", + ): + logger.info( + "%s: %s", + what, + query_kernel_info(program=self.program, kernel="addition", what=what), + ) + + # Not all ICD work properly .... + # self.assertEqual(3, len(query_kernel_info(program=self.program, kernel="addition", what="COMPILE_WORK_GROUP_SIZE")), "3D kernel") + + min_wg = query_kernel_info( + program=self.program, + kernel="addition", + what="PREFERRED_WORK_GROUP_SIZE_MULTIPLE", + ) + max_wg = query_kernel_info( + program=self.program, kernel="addition", what="WORK_GROUP_SIZE" + ) self.assertEqual(max_wg % min_wg, 0, msg="max_wg is a multiple of min_wg") diff --git a/src/silx/opencl/test/test_array_utils.py b/src/silx/opencl/test/test_array_utils.py index 325a6c3..98f8bf3 100644 --- a/src/silx/opencl/test/test_array_utils.py +++ b/src/silx/opencl/test/test_array_utils.py @@ -1,8 +1,7 @@ #!/usr/bin/env python -# coding: utf-8 # /*########################################################################## # -# Copyright (c) 2016 European Synchrotron Radiation Facility +# Copyright (c) 2016-2022 European Synchrotron Radiation Facility # # Permission is hereby granted, free of charge, to any person obtaining a copy # of this software and associated documentation files (the "Software"), to deal @@ -25,42 +24,32 @@ # ###########################################################################*/ """Test of the OpenCL array_utils""" -from __future__ import division, print_function - __authors__ = ["Pierre paleo"] __license__ = "MIT" __copyright__ = "2013-2017 European Synchrotron Radiation Facility, Grenoble, France" __date__ = "14/06/2017" -import time import logging import numpy as np import unittest + try: import mako except ImportError: mako = None from ..common import ocl + if ocl: import pyopencl as cl import pyopencl.array as parray - from .. import linalg from ..utils import get_opencl_code -from silx.test.utils import utilstest logger = logging.getLogger(__name__) -try: - from scipy.ndimage.filters import laplace - _has_scipy = True -except ImportError: - _has_scipy = False - @unittest.skipUnless(ocl and mako, "PyOpenCl is missing") class TestCpy2d(unittest.TestCase): - def setUp(self): if ocl is None: return @@ -68,8 +57,8 @@ class TestCpy2d(unittest.TestCase): if logger.getEffectiveLevel() <= logging.INFO: self.PROFILE = True self.queue = cl.CommandQueue( - self.ctx, - properties=cl.command_queue_properties.PROFILING_ENABLE) + self.ctx, properties=cl.command_queue_properties.PROFILING_ENABLE + ) else: self.PROFILE = False self.queue = cl.CommandQueue(self.ctx) @@ -96,8 +85,12 @@ class TestCpy2d(unittest.TestCase): self.offset1 = (offset1_y, offset1_x) self.offset2 = (offset2_y, offset2_x) # Compute the size of the rectangle to transfer - size_y = np.random.randint(2, high=min(self.shape1[0], self.shape2[0]) - max(offset1_y, offset2_y) + 1) - size_x = np.random.randint(2, high=min(self.shape1[1], self.shape2[1]) - max(offset1_x, offset2_x) + 1) + size_y = np.random.randint( + 2, high=min(self.shape1[0], self.shape2[0]) - max(offset1_y, offset2_y) + 1 + ) + size_x = np.random.randint( + 2, high=min(self.shape1[1], self.shape2[1]) - max(offset1_x, offset2_x) + 1 + ) self.transfer_shape = (size_y, size_x) def tearDown(self): @@ -113,7 +106,9 @@ class TestCpy2d(unittest.TestCase): def compare(self, result, reference): errmax = np.max(np.abs(result - reference)) logger.info("Max error = %e" % (errmax)) - self.assertTrue(errmax == 0, str("Max error is too high"))#. PRNG state was %s" % str(self.prng_state))) + self.assertTrue( + errmax == 0, str("Max error is too high") + ) # . PRNG state was %s" % str(self.prng_state))) @unittest.skipUnless(ocl and mako, "pyopencl is missing") def test_cpy2d(self): @@ -124,18 +119,26 @@ class TestCpy2d(unittest.TestCase): o1 = self.offset1 o2 = self.offset2 T = self.transfer_shape - logger.info("""Testing D->D rectangular copy with (N1_y, N1_x) = %s, + logger.info( + """Testing D->D rectangular copy with (N1_y, N1_x) = %s, (N2_y, N2_x) = %s: - array2[%d:%d, %d:%d] = array1[%d:%d, %d:%d]""" % - ( - str(self.shape1), str(self.shape2), - o2[0], o2[0] + T[0], - o2[1], o2[1] + T[1], - o1[0], o1[0] + T[0], - o1[1], o1[1] + T[1] - ) - ) - self.array2[o2[0]:o2[0] + T[0], o2[1]:o2[1] + T[1]] = self.array1[o1[0]:o1[0] + T[0], o1[1]:o1[1] + T[1]] + array2[%d:%d, %d:%d] = array1[%d:%d, %d:%d]""" + % ( + str(self.shape1), + str(self.shape2), + o2[0], + o2[0] + T[0], + o2[1], + o2[1] + T[1], + o1[0], + o1[0] + T[0], + o1[1], + o1[1] + T[1], + ) + ) + self.array2[o2[0] : o2[0] + T[0], o2[1] : o2[1] + T[1]] = self.array1[ + o1[0] : o1[0] + T[0], o1[1] : o1[1] + T[1] + ] kernel_args = ( self.d_array2.data, self.d_array1.data, @@ -143,7 +146,7 @@ class TestCpy2d(unittest.TestCase): np.int32(self.shape1[1]), np.int32(self.offset2[::-1]), np.int32(self.offset1[::-1]), - np.int32(self.transfer_shape[::-1]) + np.int32(self.transfer_shape[::-1]), ) wg = None ndrange = self.transfer_shape[::-1] diff --git a/src/silx/opencl/test/test_backprojection.py b/src/silx/opencl/test/test_backprojection.py index 96d56fa..b08c972 100644 --- a/src/silx/opencl/test/test_backprojection.py +++ b/src/silx/opencl/test/test_backprojection.py @@ -1,5 +1,4 @@ #!/usr/bin/env python -# coding: utf-8 # /*########################################################################## # # Copyright (c) 2016 European Synchrotron Radiation Facility @@ -25,8 +24,6 @@ # ###########################################################################*/ """Test of the filtered backprojection module""" -from __future__ import division, print_function - __authors__ = ["Pierre paleo"] __license__ = "MIT" __copyright__ = "2013-2017 European Synchrotron Radiation Facility, Grenoble, France" @@ -38,11 +35,13 @@ import logging import numpy as np import unittest from math import pi + try: import mako except ImportError: mako = None from ..common import ocl + if ocl: from .. import backprojection from ...image.tomography import compute_fourier_filter @@ -59,7 +58,7 @@ def generate_coords(img_shp, center=None): l_r, l_c = float(img_shp[0]), float(img_shp[1]) R, C = np.mgrid[:l_r, :l_c] if center is None: - center0, center1 = l_r / 2., l_c / 2. + center0, center1 = l_r / 2.0, l_c / 2.0 else: center0, center1 = center R = R + 0.5 - center0 @@ -75,7 +74,7 @@ def clip_circle(img, center=None, radius=None): M = R * R + C * C res = np.zeros_like(img) if radius is None: - radius = img.shape[0] / 2. - 1 + radius = img.shape[0] / 2.0 - 1 mask = M < radius * radius res[mask] = img[mask] return res @@ -83,20 +82,21 @@ def clip_circle(img, center=None, radius=None): @unittest.skipUnless(ocl and mako, "PyOpenCl is missing") class TestFBP(unittest.TestCase): - def setUp(self): if ocl is None: return self.getfiles() self.fbp = backprojection.Backprojection(self.sino.shape, profile=True) if self.fbp.compiletime_workgroup_size < 16 * 16: - self.skipTest("Current implementation of OpenCL backprojection is " - "not supported on this platform yet") + self.skipTest( + "Current implementation of OpenCL backprojection is " + "not supported on this platform yet" + ) # Astra does not use the same backprojector implementation. # Therefore, we cannot expect results to be the "same" (up to float32 # numerical error) self.tol = 5e-2 - if not(self.fbp._use_textures) or self.fbp.device.type == "CPU": + if not (self.fbp._use_textures) or self.fbp.device.type == "CPU": # Precision is less when using CPU # (either CPU textures or "manual" linear interpolation) self.tol *= 2 @@ -133,8 +133,12 @@ class TestFBP(unittest.TestCase): ref_clipped = clip_circle(self.reference_rec) delta = abs(res_clipped - ref_clipped) bad = delta > 1 - logger.debug("Absolute difference: %s with %s outlier pixels out of %s" - "", delta.max(), bad.sum(), np.prod(bad.shape)) + logger.debug( + "Absolute difference: %s with %s outlier pixels out of %s" "", + delta.max(), + bad.sum(), + np.prod(bad.shape), + ) return delta.max() @unittest.skipUnless(ocl and mako, "pyopencl is missing") @@ -160,17 +164,14 @@ class TestFBP(unittest.TestCase): for i in range(10): res = self.fbp.filtered_backprojection(self.sino) errmax = np.max(np.abs(res - res0)) - self.assertTrue(errmax < 1.e-6, "Max error is too high") + self.assertTrue(errmax < 1.0e-6, "Max error is too high") @unittest.skipUnless(ocl and mako, "pyopencl is missing") def test_fbp_filters(self): """ Test the different available filters of silx FBP. """ - avail_filters = [ - "ramlak", "shepp-logan", "cosine", "hamming", - "hann" - ] + avail_filters = ["ramlak", "shepp-logan", "cosine", "hamming", "hann"] # Create a Dirac delta function at a single angle view. # As the filters are radially invarant: # - backprojection yields an image where each line is a Dirac. @@ -179,7 +180,7 @@ class TestFBP(unittest.TestCase): # test will also ensure that backprojection behaves well. dirac = np.zeros_like(self.sino) na, dw = dirac.shape - dirac[0, dw//2] = na / pi * 2 + dirac[0, dw // 2] = na / pi * 2 for filter_name in avail_filters: B = backprojection.Backprojection(dirac.shape, filter_name=filter_name) @@ -187,17 +188,15 @@ class TestFBP(unittest.TestCase): # Check that radial invariance is kept std0 = np.max(np.abs(np.std(r, axis=0))) self.assertTrue( - std0 < 5.e-6, - "Something wrong with FBP(filter=%s)" % filter_name + std0 < 5.0e-6, "Something wrong with FBP(filter=%s)" % filter_name ) # Check that the filter is retrieved - r_f = np.fft.fft(np.fft.fftshift(r[0])).real / 2. # filter factor + r_f = np.fft.fft(np.fft.fftshift(r[0])).real / 2.0 # filter factor ref_filter_f = compute_fourier_filter(dw, filter_name) errmax = np.max(np.abs(r_f - ref_filter_f)) logger.info("FBP filter %s: max error=%e" % (filter_name, errmax)) self.assertTrue( - errmax < 1.e-3, - "Something wrong with FBP(filter=%s)" % filter_name + errmax < 1.0e-3, "Something wrong with FBP(filter=%s)" % filter_name ) @unittest.skipUnless(ocl and mako, "pyopencl is missing") @@ -205,13 +204,14 @@ class TestFBP(unittest.TestCase): # Generate a 513-sinogram. # The padded width will be nextpow(513*2). # silx [0.10, 0.10.1] will give 1029, which makes R2C transform fail. - sino = np.pad(self.sino, ((0, 0), (1, 0)), mode='edge') - B = backprojection.Backprojection(sino.shape, axis_position=self.fbp.axis_pos+1) + sino = np.pad(self.sino, ((0, 0), (1, 0)), mode="edge") + B = backprojection.Backprojection( + sino.shape, axis_position=self.fbp.axis_pos + 1 + ) res = B(sino) # Compare with self.reference_rec. Tolerance is high as backprojector # is not fully shift-invariant. errmax = np.max(np.abs(clip_circle(res[1:, 1:] - self.reference_rec))) self.assertLess( - errmax, 1.e-1, - "Something wrong with FBP on odd-sized sinogram" + errmax, 1.0e-1, "Something wrong with FBP on odd-sized sinogram" ) diff --git a/src/silx/opencl/test/test_convolution.py b/src/silx/opencl/test/test_convolution.py index 6a2759d..86716f4 100644 --- a/src/silx/opencl/test/test_convolution.py +++ b/src/silx/opencl/test/test_convolution.py @@ -1,8 +1,7 @@ #!/usr/bin/env python -# coding: utf-8 # /*########################################################################## # -# Copyright (c) 2019 European Synchrotron Radiation Facility +# Copyright (c) 2019-2022 European Synchrotron Radiation Facility # # Permission is hereby granted, free of charge, to any person obtaining a copy # of this software and associated documentation files (the "Software"), to deal @@ -28,8 +27,6 @@ Test of the Convolution class. """ -from __future__ import division, print_function - __authors__ = ["Pierre Paleo"] __contact__ = "pierre.paleo@esrf.fr" __license__ = "MIT" @@ -44,7 +41,11 @@ from silx.image.utils import gaussian_kernel try: from scipy.ndimage import convolve, convolve1d - from scipy.misc import ascent + + try: + from scipy.misc import ascent + except: + from scipy.datasets import ascent scipy_convolve = convolve scipy_convolve1d = convolve1d @@ -61,7 +62,6 @@ logger = logging.getLogger(__name__) class ConvolutionData: - def __init__(self, param): self.param = param self.mode = param["boundary_handling"] @@ -207,7 +207,7 @@ def convolution_data_params(): ) params = [] for boundary_handling, use_texture, input_dev, output_dev in param_vals: - param={ + param = { "boundary_handling": boundary_handling, "input_on_device": input_dev, "output_on_device": output_dev, @@ -239,26 +239,31 @@ def convolution_data(request): def test_1D(convolution_data): convolution_data.template_test("test_1D") + @pytest.mark.skipif(ocl is None, reason="OpenCL is missing") @pytest.mark.skipif(scipy_convolve is None, reason="scipy is missing") def test_separable_2D(convolution_data): convolution_data.template_test("test_separable_2D") + @pytest.mark.skipif(ocl is None, reason="OpenCL is missing") @pytest.mark.skipif(scipy_convolve is None, reason="scipy is missing") def test_separable_3D(convolution_data): convolution_data.template_test("test_separable_3D") + @pytest.mark.skipif(ocl is None, reason="OpenCL is missing") @pytest.mark.skipif(scipy_convolve is None, reason="scipy is missing") def test_nonseparable_2D(convolution_data): convolution_data.template_test("test_nonseparable_2D") + @pytest.mark.skipif(ocl is None, reason="OpenCL is missing") @pytest.mark.skipif(scipy_convolve is None, reason="scipy is missing") def test_nonseparable_3D(convolution_data): convolution_data.template_test("test_nonseparable_3D") + @pytest.mark.skipif(ocl is None, reason="OpenCL is missing") @pytest.mark.skipif(scipy_convolve is None, reason="scipy is missing") def test_batched_2D(convolution_data): diff --git a/src/silx/opencl/test/test_doubleword.py b/src/silx/opencl/test/test_doubleword.py index a33cf5a..493d8c8 100644 --- a/src/silx/opencl/test/test_doubleword.py +++ b/src/silx/opencl/test/test_doubleword.py @@ -1,5 +1,4 @@ #!/usr/bin/env python -# coding: utf-8 # # Project: The silx project # https://github.com/silx-kit/silx @@ -47,6 +46,7 @@ except ImportError as error: pyopencl = None from .. import ocl + if ocl is not None: from ..utils import read_cl_file from .. import pyopencl @@ -66,14 +66,21 @@ class TestDoubleWord(unittest.TestCase): @classmethod def setUpClass(cls): if pyopencl is None or ocl is None: - raise unittest.SkipTest("OpenCL module (pyopencl) is not present or no device available") + raise unittest.SkipTest( + "OpenCL module (pyopencl) is not present or no device available" + ) cls.ctx = ocl.create_context(devicetype="GPU") - cls.queue = pyopencl.CommandQueue(cls.ctx, properties=pyopencl.command_queue_properties.PROFILING_ENABLE) + cls.queue = pyopencl.CommandQueue( + cls.ctx, properties=pyopencl.command_queue_properties.PROFILING_ENABLE + ) # this is running 32 bits OpenCL woth POCL - if (platform.machine() in ("i386", "i686", "x86_64") and (tuple.__itemsize__ == 4) and - cls.ctx.devices[0].platform.name == 'Portable Computing Language'): + if ( + platform.machine() in ("i386", "i686", "x86_64") + and (tuple.__itemsize__ == 4) + and cls.ctx.devices[0].platform.name == "Portable Computing Language" + ): cls.args = "-DX87_VOLATILE=volatile" else: cls.args = "" @@ -95,38 +102,68 @@ class TestDoubleWord(unittest.TestCase): cls.doubleword = None def test_fast_sum2(self): - test_kernel = ElementwiseKernel(self.ctx, - "float *a, float *b, float *res_h, float *res_l", - "float2 tmp = fast_fp_plus_fp(a[i], b[i]); res_h[i] = tmp.s0; res_l[i] = tmp.s1", - preamble=self.doubleword) + test_kernel = ElementwiseKernel( + self.ctx, + "float *a, float *b, float *res_h, float *res_l", + "float2 tmp = fast_fp_plus_fp(a[i], b[i]); res_h[i] = tmp.s0; res_l[i] = tmp.s1", + preamble=self.doubleword, + ) a_g = pyopencl.array.to_device(self.queue, self.ah) b_g = pyopencl.array.to_device(self.queue, self.bl) res_l = pyopencl.array.empty_like(a_g) res_h = pyopencl.array.empty_like(a_g) test_kernel(a_g, b_g, res_h, res_l) self.assertEqual(abs(self.ah + self.bl - res_h.get()).max(), 0, "Major matches") - self.assertGreater(abs(self.ah.astype(numpy.float64) + self.bl - res_h.get()).max(), 0, "Exact mismatches") - self.assertEqual(abs(self.ah.astype(numpy.float64) + self.bl - (res_h.get().astype(numpy.float64) + res_l.get())).max(), 0, "Exact matches") + self.assertGreater( + abs(self.ah.astype(numpy.float64) + self.bl - res_h.get()).max(), + 0, + "Exact mismatches", + ) + self.assertEqual( + abs( + self.ah.astype(numpy.float64) + + self.bl + - (res_h.get().astype(numpy.float64) + res_l.get()) + ).max(), + 0, + "Exact matches", + ) def test_sum2(self): - test_kernel = ElementwiseKernel(self.ctx, - "float *a, float *b, float *res_h, float *res_l", - "float2 tmp = fp_plus_fp(a[i],b[i]); res_h[i]=tmp.s0; res_l[i]=tmp.s1;", - preamble=self.doubleword) + test_kernel = ElementwiseKernel( + self.ctx, + "float *a, float *b, float *res_h, float *res_l", + "float2 tmp = fp_plus_fp(a[i],b[i]); res_h[i]=tmp.s0; res_l[i]=tmp.s1;", + preamble=self.doubleword, + ) a_g = pyopencl.array.to_device(self.queue, self.ah) b_g = pyopencl.array.to_device(self.queue, self.bh) res_l = pyopencl.array.empty_like(a_g) res_h = pyopencl.array.empty_like(a_g) test_kernel(a_g, b_g, res_h, res_l) self.assertEqual(abs(self.ah + self.bh - res_h.get()).max(), 0, "Major matches") - self.assertGreater(abs(self.ah.astype(numpy.float64) + self.bh - res_h.get()).max(), 0, "Exact mismatches") - self.assertEqual(abs(self.ah.astype(numpy.float64) + self.bh - (res_h.get().astype(numpy.float64) + res_l.get())).max(), 0, "Exact matches") + self.assertGreater( + abs(self.ah.astype(numpy.float64) + self.bh - res_h.get()).max(), + 0, + "Exact mismatches", + ) + self.assertEqual( + abs( + self.ah.astype(numpy.float64) + + self.bh + - (res_h.get().astype(numpy.float64) + res_l.get()) + ).max(), + 0, + "Exact matches", + ) def test_prod2(self): - test_kernel = ElementwiseKernel(self.ctx, - "float *a, float *b, float *res_h, float *res_l", - "float2 tmp = fp_times_fp(a[i],b[i]); res_h[i]=tmp.s0; res_l[i]=tmp.s1;", - preamble=self.doubleword) + test_kernel = ElementwiseKernel( + self.ctx, + "float *a, float *b, float *res_h, float *res_l", + "float2 tmp = fp_times_fp(a[i],b[i]); res_h[i]=tmp.s0; res_l[i]=tmp.s1;", + preamble=self.doubleword, + ) a_g = pyopencl.array.to_device(self.queue, self.ah) b_g = pyopencl.array.to_device(self.queue, self.bh) res_l = pyopencl.array.empty_like(a_g) @@ -135,14 +172,22 @@ class TestDoubleWord(unittest.TestCase): res_m = res_h.get() res = res_h.get().astype(numpy.float64) + res_l.get() self.assertEqual(abs(self.ah * self.bh - res_m).max(), 0, "Major matches") - self.assertGreater(abs(self.ah.astype(numpy.float64) * self.bh - res_m).max(), 0, "Exact mismatches") - self.assertEqual(abs(self.ah.astype(numpy.float64) * self.bh - res).max(), 0, "Exact matches") + self.assertGreater( + abs(self.ah.astype(numpy.float64) * self.bh - res_m).max(), + 0, + "Exact mismatches", + ) + self.assertEqual( + abs(self.ah.astype(numpy.float64) * self.bh - res).max(), 0, "Exact matches" + ) def test_dw_plus_fp(self): - test_kernel = ElementwiseKernel(self.ctx, - "float *ah, float *al, float *b, float *res_h, float *res_l", - "float2 tmp = dw_plus_fp((float2)(ah[i], al[i]),b[i]); res_h[i]=tmp.s0; res_l[i]=tmp.s1;", - preamble=self.doubleword) + test_kernel = ElementwiseKernel( + self.ctx, + "float *ah, float *al, float *b, float *res_h, float *res_l", + "float2 tmp = dw_plus_fp((float2)(ah[i], al[i]),b[i]); res_h[i]=tmp.s0; res_l[i]=tmp.s1;", + preamble=self.doubleword, + ) ah_g = pyopencl.array.to_device(self.queue, self.ah) al_g = pyopencl.array.to_device(self.queue, self.al) b_g = pyopencl.array.to_device(self.queue, self.bh) @@ -152,14 +197,22 @@ class TestDoubleWord(unittest.TestCase): res_m = res_h.get() res = res_h.get().astype(numpy.float64) + res_l.get() self.assertLess(abs(self.a + self.bh - res_m).max(), EPS32, "Major matches") - self.assertGreater(abs(self.a + self.bh - res_m).max(), EPS64, "Exact mismatches") - self.assertLess(abs(self.ah.astype(numpy.float64) + self.al + self.bh - res).max(), 2 * EPS32 ** 2, "Exact matches") + self.assertGreater( + abs(self.a + self.bh - res_m).max(), EPS64, "Exact mismatches" + ) + self.assertLess( + abs(self.ah.astype(numpy.float64) + self.al + self.bh - res).max(), + 2 * EPS32**2, + "Exact matches", + ) def test_dw_plus_dw(self): - test_kernel = ElementwiseKernel(self.ctx, - "float *ah, float *al, float *bh, float *bl, float *res_h, float *res_l", - "float2 tmp = dw_plus_dw((float2)(ah[i], al[i]),(float2)(bh[i], bl[i])); res_h[i]=tmp.s0; res_l[i]=tmp.s1;", - preamble=self.doubleword) + test_kernel = ElementwiseKernel( + self.ctx, + "float *ah, float *al, float *bh, float *bl, float *res_h, float *res_l", + "float2 tmp = dw_plus_dw((float2)(ah[i], al[i]),(float2)(bh[i], bl[i])); res_h[i]=tmp.s0; res_l[i]=tmp.s1;", + preamble=self.doubleword, + ) ah_g = pyopencl.array.to_device(self.queue, self.ah) al_g = pyopencl.array.to_device(self.queue, self.al) bh_g = pyopencl.array.to_device(self.queue, self.bh) @@ -170,14 +223,20 @@ class TestDoubleWord(unittest.TestCase): res_m = res_h.get() res = res_h.get().astype(numpy.float64) + res_l.get() self.assertLess(abs(self.a + self.b - res_m).max(), EPS32, "Major matches") - self.assertGreater(abs(self.a + self.b - res_m).max(), EPS64, "Exact mismatches") - self.assertLess(abs(self.a + self.b - res).max(), 3 * EPS32 ** 2, "Exact matches") + self.assertGreater( + abs(self.a + self.b - res_m).max(), EPS64, "Exact mismatches" + ) + self.assertLess( + abs(self.a + self.b - res).max(), 3 * EPS32**2, "Exact matches" + ) def test_dw_times_fp(self): - test_kernel = ElementwiseKernel(self.ctx, - "float *ah, float *al, float *b, float *res_h, float *res_l", - "float2 tmp = dw_times_fp((float2)(ah[i], al[i]),b[i]); res_h[i]=tmp.s0; res_l[i]=tmp.s1;", - preamble=self.doubleword) + test_kernel = ElementwiseKernel( + self.ctx, + "float *ah, float *al, float *b, float *res_h, float *res_l", + "float2 tmp = dw_times_fp((float2)(ah[i], al[i]),b[i]); res_h[i]=tmp.s0; res_l[i]=tmp.s1;", + preamble=self.doubleword, + ) ah_g = pyopencl.array.to_device(self.queue, self.ah) al_g = pyopencl.array.to_device(self.queue, self.al) b_g = pyopencl.array.to_device(self.queue, self.bh) @@ -187,14 +246,20 @@ class TestDoubleWord(unittest.TestCase): res_m = res_h.get() res = res_h.get().astype(numpy.float64) + res_l.get() self.assertLess(abs(self.a * self.bh - res_m).max(), EPS32, "Major matches") - self.assertGreater(abs(self.a * self.bh - res_m).max(), EPS64, "Exact mismatches") - self.assertLess(abs(self.a * self.bh - res).max(), 2 * EPS32 ** 2, "Exact matches") + self.assertGreater( + abs(self.a * self.bh - res_m).max(), EPS64, "Exact mismatches" + ) + self.assertLess( + abs(self.a * self.bh - res).max(), 2 * EPS32**2, "Exact matches" + ) def test_dw_times_dw(self): - test_kernel = ElementwiseKernel(self.ctx, - "float *ah, float *al, float *bh, float *bl, float *res_h, float *res_l", - "float2 tmp = dw_times_dw((float2)(ah[i], al[i]),(float2)(bh[i], bl[i])); res_h[i]=tmp.s0; res_l[i]=tmp.s1;", - preamble=self.doubleword) + test_kernel = ElementwiseKernel( + self.ctx, + "float *ah, float *al, float *bh, float *bl, float *res_h, float *res_l", + "float2 tmp = dw_times_dw((float2)(ah[i], al[i]),(float2)(bh[i], bl[i])); res_h[i]=tmp.s0; res_l[i]=tmp.s1;", + preamble=self.doubleword, + ) ah_g = pyopencl.array.to_device(self.queue, self.ah) al_g = pyopencl.array.to_device(self.queue, self.al) bh_g = pyopencl.array.to_device(self.queue, self.bh) @@ -205,14 +270,20 @@ class TestDoubleWord(unittest.TestCase): res_m = res_h.get() res = res_h.get().astype(numpy.float64) + res_l.get() self.assertLess(abs(self.a * self.b - res_m).max(), EPS32, "Major matches") - self.assertGreater(abs(self.a * self.b - res_m).max(), EPS64, "Exact mismatches") - self.assertLess(abs(self.a * self.b - res).max(), 5 * EPS32 ** 2, "Exact matches") + self.assertGreater( + abs(self.a * self.b - res_m).max(), EPS64, "Exact mismatches" + ) + self.assertLess( + abs(self.a * self.b - res).max(), 5 * EPS32**2, "Exact matches" + ) def test_dw_div_fp(self): - test_kernel = ElementwiseKernel(self.ctx, - "float *ah, float *al, float *b, float *res_h, float *res_l", - "float2 tmp = dw_div_fp((float2)(ah[i], al[i]),b[i]); res_h[i]=tmp.s0; res_l[i]=tmp.s1;", - preamble=self.doubleword) + test_kernel = ElementwiseKernel( + self.ctx, + "float *ah, float *al, float *b, float *res_h, float *res_l", + "float2 tmp = dw_div_fp((float2)(ah[i], al[i]),b[i]); res_h[i]=tmp.s0; res_l[i]=tmp.s1;", + preamble=self.doubleword, + ) ah_g = pyopencl.array.to_device(self.queue, self.ah) al_g = pyopencl.array.to_device(self.queue, self.al) b_g = pyopencl.array.to_device(self.queue, self.bh) @@ -222,14 +293,20 @@ class TestDoubleWord(unittest.TestCase): res_m = res_h.get() res = res_h.get().astype(numpy.float64) + res_l.get() self.assertLess(abs(self.a / self.bh - res_m).max(), EPS32, "Major matches") - self.assertGreater(abs(self.a / self.bh - res_m).max(), EPS64, "Exact mismatches") - self.assertLess(abs(self.a / self.bh - res).max(), 3 * EPS32 ** 2, "Exact matches") + self.assertGreater( + abs(self.a / self.bh - res_m).max(), EPS64, "Exact mismatches" + ) + self.assertLess( + abs(self.a / self.bh - res).max(), 3 * EPS32**2, "Exact matches" + ) def test_dw_div_dw(self): - test_kernel = ElementwiseKernel(self.ctx, - "float *ah, float *al, float *bh, float *bl, float *res_h, float *res_l", - "float2 tmp = dw_div_dw((float2)(ah[i], al[i]),(float2)(bh[i], bl[i])); res_h[i]=tmp.s0; res_l[i]=tmp.s1;", - preamble=self.doubleword) + test_kernel = ElementwiseKernel( + self.ctx, + "float *ah, float *al, float *bh, float *bl, float *res_h, float *res_l", + "float2 tmp = dw_div_dw((float2)(ah[i], al[i]),(float2)(bh[i], bl[i])); res_h[i]=tmp.s0; res_l[i]=tmp.s1;", + preamble=self.doubleword, + ) ah_g = pyopencl.array.to_device(self.queue, self.ah) al_g = pyopencl.array.to_device(self.queue, self.al) bh_g = pyopencl.array.to_device(self.queue, self.bh) @@ -240,5 +317,9 @@ class TestDoubleWord(unittest.TestCase): res_m = res_h.get() res = res_h.get().astype(numpy.float64) + res_l.get() self.assertLess(abs(self.a / self.b - res_m).max(), EPS32, "Major matches") - self.assertGreater(abs(self.a / self.b - res_m).max(), EPS64, "Exact mismatches") - self.assertLess(abs(self.a / self.b - res).max(), 6 * EPS32 ** 2, "Exact matches") + self.assertGreater( + abs(self.a / self.b - res_m).max(), EPS64, "Exact mismatches" + ) + self.assertLess( + abs(self.a / self.b - res).max(), 6 * EPS32**2, "Exact matches" + ) diff --git a/src/silx/opencl/test/test_image.py b/src/silx/opencl/test/test_image.py index 73c771b..691ea82 100644 --- a/src/silx/opencl/test/test_image.py +++ b/src/silx/opencl/test/test_image.py @@ -1,5 +1,4 @@ #!/usr/bin/env python -# -*- coding: utf-8 -*- # # Project: image manipulation in OpenCL # https://github.com/silx-kit/silx @@ -29,8 +28,6 @@ Simple test of image manipulation """ -from __future__ import division, print_function - __authors__ = ["Jérôme Kieffer"] __contact__ = "jerome.kieffer@esrf.eu" __license__ = "MIT" @@ -42,11 +39,13 @@ import numpy import unittest from ..common import ocl, _measure_workgroup_size + if ocl: import pyopencl import pyopencl.array from ...test.utils import utilstest from ..image import ImageProcessing + logger = logging.getLogger(__name__) try: from PIL import Image @@ -56,7 +55,6 @@ except ImportError: @unittest.skipUnless(ocl and Image, "PyOpenCl/Image is missing") class TestImage(unittest.TestCase): - @classmethod def setUpClass(cls): super(TestImage, cls).setUpClass() @@ -102,7 +100,9 @@ class TestImage(unittest.TestCase): tmp = pyopencl.array.empty(self.ip.ctx, self.data.shape, "float32") res = self.ip.to_float(self.data, out=tmp) res2 = self.ip.normalize(tmp, -100, 100, copy=False) - norm = (self.data.astype(numpy.float32) - self.data.min()) / (self.data.max() - self.data.min()) + norm = (self.data.astype(numpy.float32) - self.data.min()) / ( + self.data.max() - self.data.min() + ) ref2 = 200 * norm - 100 self.assertLess(abs(res2 - ref2).max(), 3e-5, "content") @@ -111,15 +111,21 @@ class TestImage(unittest.TestCase): """ Test on a greyscaled image ... of Lena :) """ - lena_bw = (0.2126 * self.data[:, :, 0] + - 0.7152 * self.data[:, :, 1] + - 0.0722 * self.data[:, :, 2]).astype("int32") + lena_bw = ( + 0.2126 * self.data[:, :, 0] + + 0.7152 * self.data[:, :, 1] + + 0.0722 * self.data[:, :, 2] + ).astype("int32") ref = numpy.histogram(lena_bw, 255) ip = ImageProcessing(ctx=self.ctx, template=lena_bw, profile=True) res = ip.histogram(lena_bw, 255) ip.log_profile() - delta = (ref[0] - res[0]) - deltap = (ref[1] - res[1]) + delta = ref[0] - res[0] + deltap = ref[1] - res[1] self.assertEqual(delta.sum(), 0, "errors are self-compensated") self.assertLessEqual(abs(delta).max(), 1, "errors are small") - self.assertLessEqual(abs(deltap).max(), 3e-5, "errors on position are small: %s" % (abs(deltap).max())) + self.assertLessEqual( + abs(deltap).max(), + 3e-5, + "errors on position are small: %s" % (abs(deltap).max()), + ) diff --git a/src/silx/opencl/test/test_kahan.py b/src/silx/opencl/test/test_kahan.py index 9e4a1e3..069d7de 100644 --- a/src/silx/opencl/test/test_kahan.py +++ b/src/silx/opencl/test/test_kahan.py @@ -1,5 +1,4 @@ #!/usr/bin/env python -# coding: utf-8 # # Project: OpenCL numerical library # https://github.com/silx-kit/silx @@ -48,6 +47,7 @@ except ImportError as error: pyopencl = None from .. import ocl + if ocl is not None: from ..utils import read_cl_file from .. import pyopencl @@ -62,14 +62,21 @@ class TestKahan(unittest.TestCase): @classmethod def setUpClass(cls): if pyopencl is None or ocl is None: - raise unittest.SkipTest("OpenCL module (pyopencl) is not present or no device available") + raise unittest.SkipTest( + "OpenCL module (pyopencl) is not present or no device available" + ) cls.ctx = ocl.create_context(devicetype="GPU") - cls.queue = pyopencl.CommandQueue(cls.ctx, properties=pyopencl.command_queue_properties.PROFILING_ENABLE) + cls.queue = pyopencl.CommandQueue( + cls.ctx, properties=pyopencl.command_queue_properties.PROFILING_ENABLE + ) # this is running 32 bits OpenCL woth POCL - if (platform.machine() in ("i386", "i686", "x86_64") and (tuple.__itemsize__ == 4) and - cls.ctx.devices[0].platform.name == 'Portable Computing Language'): + if ( + platform.machine() in ("i386", "i686", "x86_64") + and (tuple.__itemsize__ == 4) + and cls.ctx.devices[0].platform.name == "Portable Computing Language" + ): cls.args = "-DX87_VOLATILE=volatile" else: cls.args = "" @@ -81,7 +88,7 @@ class TestKahan(unittest.TestCase): @staticmethod def dummy_sum(ary, dtype=None): - "perform the actual sum in a dummy way " + "perform the actual sum in a dummy way" if dtype is None: dtype = ary.dtype.type sum_ = dtype(0) @@ -96,8 +103,10 @@ class TestKahan(unittest.TestCase): ref64 = numpy.sum(data, dtype=numpy.float64) ref32 = self.dummy_sum(data) - if (ref64 == ref32): - logger.warning("Kahan: invalid tests as float32 provides the same result as float64") + if ref64 == ref32: + logger.warning( + "Kahan: invalid tests as float32 provides the same result as float64" + ) # Dummy kernel to evaluate src = """ kernel void summation(global float* data, @@ -113,11 +122,15 @@ class TestKahan(unittest.TestCase): result[1] = acc.s1; } """ - prg = pyopencl.Program(self.ctx, read_cl_file("kahan.cl") + src).build(self.args) + prg = pyopencl.Program(self.ctx, read_cl_file("kahan.cl") + src).build( + self.args + ) ones_d = pyopencl.array.to_device(self.queue, data) res_d = pyopencl.array.empty(self.queue, 2, numpy.float32) res_d.fill(0) - evt = prg.summation(self.queue, (1,), (1,), ones_d.data, numpy.int32(N), res_d.data) + evt = prg.summation( + self.queue, (1,), (1,), ones_d.data, numpy.int32(N), res_d.data + ) evt.wait() res = res_d.get().sum(dtype=numpy.float64) self.assertEqual(ref64, res, "test_kahan") @@ -129,8 +142,10 @@ class TestKahan(unittest.TestCase): ref64 = numpy.dot(data.astype(numpy.float64), data.astype(numpy.float64)) ref32 = numpy.dot(data, data) - if (ref64 == ref32): - logger.warning("dot16: invalid tests as float32 provides the same result as float64") + if ref64 == ref32: + logger.warning( + "dot16: invalid tests as float32 provides the same result as float64" + ) # Dummy kernel to evaluate src = """ kernel void test_dot16(global float* data, @@ -196,11 +211,15 @@ class TestKahan(unittest.TestCase): """ - prg = pyopencl.Program(self.ctx, read_cl_file("kahan.cl") + src).build(self.args) + prg = pyopencl.Program(self.ctx, read_cl_file("kahan.cl") + src).build( + self.args + ) ones_d = pyopencl.array.to_device(self.queue, data) res_d = pyopencl.array.empty(self.queue, 2, numpy.float32) res_d.fill(0) - evt = prg.test_dot16(self.queue, (1,), (1,), ones_d.data, numpy.int32(N), res_d.data) + evt = prg.test_dot16( + self.queue, (1,), (1,), ones_d.data, numpy.int32(N), res_d.data + ) evt.wait() res = res_d.get().sum(dtype="float64") self.assertEqual(ref64, res, "test_dot16") @@ -210,9 +229,13 @@ class TestKahan(unittest.TestCase): data1 = data[1::2] ref64 = numpy.dot(data0.astype(numpy.float64), data1.astype(numpy.float64)) ref32 = numpy.dot(data0, data1) - if (ref64 == ref32): - logger.warning("dot8: invalid tests as float32 provides the same result as float64") - evt = prg.test_dot8(self.queue, (1,), (1,), ones_d.data, numpy.int32(N), res_d.data) + if ref64 == ref32: + logger.warning( + "dot8: invalid tests as float32 provides the same result as float64" + ) + evt = prg.test_dot8( + self.queue, (1,), (1,), ones_d.data, numpy.int32(N), res_d.data + ) evt.wait() res = res_d.get().sum(dtype="float64") self.assertEqual(ref64, res, "test_dot8") @@ -222,9 +245,13 @@ class TestKahan(unittest.TestCase): data1 = data[3::4] ref64 = numpy.dot(data0.astype(numpy.float64), data1.astype(numpy.float64)) ref32 = numpy.dot(data0, data1) - if (ref64 == ref32): - logger.warning("dot4: invalid tests as float32 provides the same result as float64") - evt = prg.test_dot4(self.queue, (1,), (1,), ones_d.data, numpy.int32(N), res_d.data) + if ref64 == ref32: + logger.warning( + "dot4: invalid tests as float32 provides the same result as float64" + ) + evt = prg.test_dot4( + self.queue, (1,), (1,), ones_d.data, numpy.int32(N), res_d.data + ) evt.wait() res = res_d.get().sum(dtype="float64") self.assertEqual(ref64, res, "test_dot4") @@ -234,9 +261,13 @@ class TestKahan(unittest.TestCase): data1 = numpy.array([data[3], data[11], data[15]]) ref64 = numpy.dot(data0.astype(numpy.float64), data1.astype(numpy.float64)) ref32 = numpy.dot(data0, data1) - if (ref64 == ref32): - logger.warning("dot3: invalid tests as float32 provides the same result as float64") - evt = prg.test_dot3(self.queue, (1,), (1,), ones_d.data, numpy.int32(N), res_d.data) + if ref64 == ref32: + logger.warning( + "dot3: invalid tests as float32 provides the same result as float64" + ) + evt = prg.test_dot3( + self.queue, (1,), (1,), ones_d.data, numpy.int32(N), res_d.data + ) evt.wait() res = res_d.get().sum(dtype="float64") self.assertEqual(ref64, res, "test_dot3") @@ -246,9 +277,13 @@ class TestKahan(unittest.TestCase): data1 = numpy.array([data[1], data[15]]) ref64 = numpy.dot(data0.astype(numpy.float64), data1.astype(numpy.float64)) ref32 = numpy.dot(data0, data1) - if (ref64 == ref32): - logger.warning("dot2: invalid tests as float32 provides the same result as float64") - evt = prg.test_dot2(self.queue, (1,), (1,), ones_d.data, numpy.int32(N), res_d.data) + if ref64 == ref32: + logger.warning( + "dot2: invalid tests as float32 provides the same result as float64" + ) + evt = prg.test_dot2( + self.queue, (1,), (1,), ones_d.data, numpy.int32(N), res_d.data + ) evt.wait() res = res_d.get().sum(dtype="float64") self.assertEqual(ref64, res, "test_dot2") diff --git a/src/silx/opencl/test/test_linalg.py b/src/silx/opencl/test/test_linalg.py index a997a36..0b0a443 100644 --- a/src/silx/opencl/test/test_linalg.py +++ b/src/silx/opencl/test/test_linalg.py @@ -1,8 +1,7 @@ #!/usr/bin/env python -# coding: utf-8 # /*########################################################################## # -# Copyright (c) 2016 European Synchrotron Radiation Facility +# Copyright (c) 2016-2022 European Synchrotron Radiation Facility # # Permission is hereby granted, free of charge, to any person obtaining a copy # of this software and associated documentation files (the "Software"), to deal @@ -25,23 +24,22 @@ # ###########################################################################*/ """Test of the linalg module""" -from __future__ import division, print_function - __authors__ = ["Pierre paleo"] __license__ = "MIT" __copyright__ = "2013-2017 European Synchrotron Radiation Facility, Grenoble, France" __date__ = "01/08/2019" -import time import logging import numpy as np import unittest + try: import mako except ImportError: mako = None from ..common import ocl + if ocl: import pyopencl as cl import pyopencl.array as parray @@ -50,7 +48,8 @@ from silx.test.utils import utilstest logger = logging.getLogger(__name__) try: - from scipy.ndimage.filters import laplace + from scipy.ndimage import laplace + _has_scipy = True except ImportError: _has_scipy = False @@ -58,13 +57,18 @@ except ImportError: # TODO move this function in math or image ? def gradient(img): - ''' + """ Compute the gradient of an image as a numpy array Code from https://github.com/emmanuelle/tomo-tv/ - ''' - shape = [img.ndim, ] + list(img.shape) + """ + shape = [ + img.ndim, + ] + list(img.shape) gradient = np.zeros(shape, dtype=img.dtype) - slice_all = [0, slice(None, -1),] + slice_all = [ + 0, + slice(None, -1), + ] for d in range(img.ndim): gradient[tuple(slice_all)] = np.diff(img, axis=d) slice_all[0] = d + 1 @@ -74,10 +78,10 @@ def gradient(img): # TODO move this function in math or image ? def divergence(grad): - ''' + """ Compute the divergence of a gradient Code from https://github.com/emmanuelle/tomo-tv/ - ''' + """ res = np.zeros(grad.shape[1:]) for d in range(grad.shape[0]): this_grad = np.rollaxis(grad[d], d) @@ -90,7 +94,6 @@ def divergence(grad): @unittest.skipUnless(ocl and mako, "PyOpenCl is missing") class TestLinAlg(unittest.TestCase): - def setUp(self): if ocl is None: return @@ -109,7 +112,9 @@ class TestLinAlg(unittest.TestCase): self.div_ref = divergence(self.grad_ref) self.image2 = np.zeros_like(self.image) # Device images - self.gradient_parray = parray.empty(self.la.queue, self.image.shape, np.complex64) + self.gradient_parray = parray.empty( + self.la.queue, self.image.shape, np.complex64 + ) self.gradient_parray.fill(0) # we should be using cl.Buffer(self.la.ctx, cl.mem_flags.READ_WRITE, size=self.image.nbytes*2), # but platforms not suporting openCL 1.2 have a problem with enqueue_fill_buffer, @@ -156,46 +161,78 @@ class TestLinAlg(unittest.TestCase): arrays = { "numpy.ndarray": self.image, "buffer": self.image_buffer, - "parray": self.image_parray + "parray": self.image_parray, } for desc, image in arrays.items(): # Test with dst on host (numpy.ndarray) res = self.la.gradient(image, return_to_host=True) - self.compare(res, self.grad_ref, 1e-6, str("gradient[src=%s, dst=numpy.ndarray]" % desc)) + self.compare( + res, + self.grad_ref, + 1e-6, + str("gradient[src=%s, dst=numpy.ndarray]" % desc), + ) # Test with dst on device (pyopencl.Buffer) self.la.gradient(image, dst=self.gradient_buffer) cl.enqueue_copy(self.la.queue, self.grad, self.gradient_buffer) self.grad2[0] = self.grad.real self.grad2[1] = self.grad.imag - self.compare(self.grad2, self.grad_ref, 1e-6, str("gradient[src=%s, dst=buffer]" % desc)) + self.compare( + self.grad2, + self.grad_ref, + 1e-6, + str("gradient[src=%s, dst=buffer]" % desc), + ) # Test with dst on device (pyopencl.Array) self.la.gradient(image, dst=self.gradient_parray) self.grad = self.gradient_parray.get() self.grad2[0] = self.grad.real self.grad2[1] = self.grad.imag - self.compare(self.grad2, self.grad_ref, 1e-6, str("gradient[src=%s, dst=parray]" % desc)) + self.compare( + self.grad2, + self.grad_ref, + 1e-6, + str("gradient[src=%s, dst=parray]" % desc), + ) @unittest.skipUnless(ocl and mako, "pyopencl is missing") def test_divergence(self): arrays = { "numpy.ndarray": self.grad_ref, "buffer": self.grad_ref_buffer, - "parray": self.grad_ref_parray + "parray": self.grad_ref_parray, } for desc, grad in arrays.items(): # Test with dst on host (numpy.ndarray) res = self.la.divergence(grad, return_to_host=True) - self.compare(res, self.div_ref, 1e-6, str("divergence[src=%s, dst=numpy.ndarray]" % desc)) + self.compare( + res, + self.div_ref, + 1e-6, + str("divergence[src=%s, dst=numpy.ndarray]" % desc), + ) # Test with dst on device (pyopencl.Buffer) self.la.divergence(grad, dst=self.image_buffer) cl.enqueue_copy(self.la.queue, self.image2, self.image_buffer) - self.compare(self.image2, self.div_ref, 1e-6, str("divergence[src=%s, dst=buffer]" % desc)) + self.compare( + self.image2, + self.div_ref, + 1e-6, + str("divergence[src=%s, dst=buffer]" % desc), + ) # Test with dst on device (pyopencl.Array) self.la.divergence(grad, dst=self.image_parray) self.image2 = self.image_parray.get() - self.compare(self.image2, self.div_ref, 1e-6, str("divergence[src=%s, dst=parray]" % desc)) - - @unittest.skipUnless(ocl and mako and _has_scipy, "pyopencl and/or scipy is missing") + self.compare( + self.image2, + self.div_ref, + 1e-6, + str("divergence[src=%s, dst=parray]" % desc), + ) + + @unittest.skipUnless( + ocl and mako and _has_scipy, "pyopencl and/or scipy is missing" + ) def test_laplacian(self): laplacian_ref = laplace(self.image) # Laplacian = div(grad) diff --git a/src/silx/opencl/test/test_medfilt.py b/src/silx/opencl/test/test_medfilt.py index 339e0f2..15cd749 100644 --- a/src/silx/opencl/test/test_medfilt.py +++ b/src/silx/opencl/test/test_medfilt.py @@ -1,5 +1,4 @@ #!/usr/bin/env python -# -*- coding: utf-8 -*- # # Project: Median filter of images + OpenCL # https://github.com/silx-kit/silx @@ -29,13 +28,11 @@ Simple test of the median filter """ -from __future__ import division, print_function - __authors__ = ["Jérôme Kieffer"] __contact__ = "jerome.kieffer@esrf.eu" __license__ = "MIT" -__copyright__ = "2013-2017 European Synchrotron Radiation Facility, Grenoble, France" -__date__ = "05/07/2018" +__copyright__ = "2013-2022 European Synchrotron Radiation Facility, Grenoble, France" +__date__ = "09/05/2023" import sys @@ -43,12 +40,15 @@ import time import logging import numpy import unittest + from collections import namedtuple + try: import mako except ImportError: mako = None from ..common import ocl + if ocl: import pyopencl import pyopencl.array @@ -61,12 +61,18 @@ Result = namedtuple("Result", ["size", "error", "sp_time", "oc_time"]) try: from scipy.misc import ascent except: - def ascent(): - """Dummy image from random data""" - return numpy.random.random((512, 512)) + try: + from scipy.datasets import ascent + except: + + def ascent(): + """Dummy image from random data""" + return numpy.random.random((512, 512)) + + try: - from scipy.ndimage import filters - median_filter = filters.median_filter + from scipy.ndimage import median_filter + HAS_SCIPY = True except: HAS_SCIPY = False @@ -74,7 +80,6 @@ except: @unittest.skipUnless(ocl and mako, "PyOpenCl is missing") class TestMedianFilter(unittest.TestCase): - def setUp(self): if ocl is None: return @@ -111,8 +116,15 @@ class TestMedianFilter(unittest.TestCase): if r is None: logger.info("test_medfilt: size: %s: skipped") else: - logger.info("test_medfilt: size: %s error %s, t_ref: %.3fs, t_ocl: %.3fs" % r) - self.assertEqual(r.error, 0, 'Results are correct') + logger.info( + "test_medfilt: size: %s error %s, t_ref: %.3fs, t_ocl: %.3fs" % r + ) + if ( + self.medianfilter.device.platform.name.lower() + != "portable computing language" + ): + # Known broken + self.assertEqual(r.error, 0, "Results are correct") def benchmark(self, limit=36): "Run some benchmarking" diff --git a/src/silx/opencl/test/test_projection.py b/src/silx/opencl/test/test_projection.py index 13db5f4..550a2f6 100644 --- a/src/silx/opencl/test/test_projection.py +++ b/src/silx/opencl/test/test_projection.py @@ -1,5 +1,4 @@ #!/usr/bin/env python -# coding: utf-8 # /*########################################################################## # # Copyright (c) 2016 European Synchrotron Radiation Facility @@ -25,8 +24,6 @@ # ###########################################################################*/ """Test of the forward projection module""" -from __future__ import division, print_function - __authors__ = ["Pierre paleo"] __license__ = "MIT" __copyright__ = "2013-2017 European Synchrotron Radiation Facility, Grenoble, France" @@ -37,11 +34,13 @@ import time import logging import numpy as np import unittest + try: import mako except ImportError: mako = None from ..common import ocl + if ocl: from .. import projection from silx.test.utils import utilstest @@ -51,17 +50,18 @@ logger = logging.getLogger(__name__) @unittest.skipUnless(ocl and mako, "PyOpenCl is missing") class TestProj(unittest.TestCase): - def setUp(self): if ocl is None: return # ~ if sys.platform.startswith('darwin'): - # ~ self.skipTest("Projection is not implemented on CPU for OS X yet") + # ~ self.skipTest("Projection is not implemented on CPU for OS X yet") self.getfiles() n_angles = self.sino.shape[0] self.proj = projection.Projection(self.phantom.shape, n_angles) if self.proj.compiletime_workgroup_size < 16 * 16: - self.skipTest("Current implementation of OpenCL projection is not supported on this platform yet") + self.skipTest( + "Current implementation of OpenCL projection is not supported on this platform yet" + ) def tearDown(self): self.phantom = None @@ -111,11 +111,11 @@ class TestProj(unittest.TestCase): msg = str("Max error = %e" % err) logger.info(msg) # Interpolation differs at some lines, giving relative error of 10/50000 - self.assertTrue(err < 20., "Max error is too high") + self.assertTrue(err < 20.0, "Max error is too high") # Test multiple reconstructions # ----------------------------- res0 = np.copy(res) for i in range(10): res = self.proj.projection(self.phantom) errmax = np.max(np.abs(res - res0)) - self.assertTrue(errmax < 1.e-6, "Max error is too high") + self.assertTrue(errmax < 1.0e-6, "Max error is too high") diff --git a/src/silx/opencl/test/test_sparse.py b/src/silx/opencl/test/test_sparse.py index 1d26b36..db58220 100644 --- a/src/silx/opencl/test/test_sparse.py +++ b/src/silx/opencl/test/test_sparse.py @@ -1,5 +1,4 @@ #!/usr/bin/env python -# coding: utf-8 # /*########################################################################## # # Copyright (c) 2018-2019 European Synchrotron Radiation Facility @@ -30,6 +29,7 @@ import unittest import logging from itertools import product from ..common import ocl + if ocl: import pyopencl.array as parray from silx.opencl.sparse import CSR @@ -40,13 +40,14 @@ except ImportError: logger = logging.getLogger(__name__) - def generate_sparse_random_data( shape=(1000,), - data_min=0, data_max=100, + data_min=0, + data_max=100, density=0.1, use_only_integers=True, - dtype="f"): + dtype="f", +): """ Generate random sparse data where. @@ -76,7 +77,6 @@ def generate_sparse_random_data( return (d * mask).astype(dtype) - @unittest.skipUnless(ocl and sp, "PyOpenCl/scipy is missing") class TestCSR(unittest.TestCase): """Test CSR format""" @@ -88,20 +88,19 @@ class TestCSR(unittest.TestCase): dtypes = [np.float32, np.int32, np.uint16] self._test_configs = list(product(input_on_device, output_on_device, dtypes)) - def compute_ref_sparsification(self, array): ref_sparse = sp.csr_matrix(array) return ref_sparse - def test_sparsification(self): for input_on_device, output_on_device, dtype in self._test_configs: self._test_sparsification(input_on_device, output_on_device, dtype) - def _test_sparsification(self, input_on_device, output_on_device, dtype): current_config = "input on device: %s, output on device: %s, dtype: %s" % ( - str(input_on_device), str(output_on_device), str(dtype) + str(input_on_device), + str(output_on_device), + str(dtype), ) logger.debug("CSR: %s" % current_config) # Generate data and reference CSR @@ -133,29 +132,27 @@ class TestCSR(unittest.TestCase): nnz = ref_sparse.nnz self.assertTrue( np.allclose(data[:nnz], ref_sparse.data), - "something wrong with sparsified data (%s)" - % current_config + "something wrong with sparsified data (%s)" % current_config, ) self.assertTrue( np.allclose(indices[:nnz], ref_sparse.indices), - "something wrong with sparsified indices (%s)" - % current_config + "something wrong with sparsified indices (%s)" % current_config, ) self.assertTrue( np.allclose(indptr, ref_sparse.indptr), "something wrong with sparsified indices pointers (indptr) (%s)" - % current_config + % current_config, ) - def test_desparsification(self): for input_on_device, output_on_device, dtype in self._test_configs: self._test_desparsification(input_on_device, output_on_device, dtype) - def _test_desparsification(self, input_on_device, output_on_device, dtype): current_config = "input on device: %s, output on device: %s, dtype: %s" % ( - str(input_on_device), str(output_on_device), str(dtype) + str(input_on_device), + str(output_on_device), + str(dtype), ) logger.debug("CSR: %s" % current_config) # Generate data and reference CSR @@ -183,6 +180,5 @@ class TestCSR(unittest.TestCase): # Compare self.assertTrue( np.allclose(arr.reshape(array.shape), array), - "something wrong with densified data (%s)" - % current_config + "something wrong with densified data (%s)" % current_config, ) diff --git a/src/silx/opencl/test/test_stats.py b/src/silx/opencl/test/test_stats.py index 859271d..7637211 100644 --- a/src/silx/opencl/test/test_stats.py +++ b/src/silx/opencl/test/test_stats.py @@ -1,5 +1,4 @@ #!/usr/bin/env python -# -*- coding: utf-8 -*- # # Project: Sift implementation in Python + OpenCL # https://github.com/silx-kit/silx @@ -40,17 +39,18 @@ import numpy import unittest from ..common import ocl + if ocl: import pyopencl import pyopencl.array from ..statistics import StatResults, Statistics from ..utils import get_opencl_code + logger = logging.getLogger(__name__) @unittest.skipUnless(ocl, "PyOpenCl is missing") class TestStatistics(unittest.TestCase): - @classmethod def setUpClass(cls): cls.size = 1 << 20 # 1 million elements @@ -58,9 +58,15 @@ class TestStatistics(unittest.TestCase): fdata = cls.data.astype("float64") t0 = time.perf_counter() std = fdata.std() - cls.ref = StatResults(fdata.min(), fdata.max(), float(fdata.size), - fdata.sum(), fdata.mean(), std ** 2, - std) + cls.ref = StatResults( + fdata.min(), + fdata.max(), + float(fdata.size), + fdata.sum(), + fdata.mean(), + std**2, + std, + ) t1 = time.perf_counter() cls.ref_time = t1 - t0 @@ -71,11 +77,12 @@ class TestStatistics(unittest.TestCase): @classmethod def validate(cls, res): return ( - (res.min == cls.ref.min) and - (res.max == cls.ref.max) and - (res.cnt == cls.ref.cnt) and - abs(res.mean - cls.ref.mean) < 0.01 and - abs(res.std - cls.ref.std) < 0.1) + (res.min == cls.ref.min) + and (res.max == cls.ref.max) + and (res.cnt == cls.ref.cnt) + and abs(res.mean - cls.ref.mean) < 0.01 + and abs(res.std - cls.ref.std) < 0.1 + ) def test_measurement(self): """ @@ -96,11 +103,26 @@ class TestStatistics(unittest.TestCase): t0 = time.perf_counter() res = s(self.data, comp=comp) t1 = time.perf_counter() - logger.info("Runtime on %s/%s : %.3fms x%.1f", platform, device, 1000 * (t1 - t0), self.ref_time / (t1 - t0)) + logger.info( + "Runtime on %s/%s : %.3fms x%.1f", + platform, + device, + 1000 * (t1 - t0), + self.ref_time / (t1 - t0), + ) if failed_init or not self.validate(res): - logger.error("failed_init %s; Computation modes %s", failed_init, comp) - logger.error("Failed on platform %s device %s", platform, device) + logger.error( + "failed_init %s; Computation modes %s", + failed_init, + comp, + ) + logger.error( + "Failed on platform %s device %s", platform, device + ) logger.error("Reference results: %s", self.ref) logger.error("Faulty results: %s", res) - self.assertTrue(False, f"Stat calculation failed on {platform},{device} in mode {comp}") + self.assertTrue( + False, + f"Stat calculation failed on {platform},{device} in mode {comp}", + ) diff --git a/src/silx/opencl/utils.py b/src/silx/opencl/utils.py index 575e018..c332402 100644 --- a/src/silx/opencl/utils.py +++ b/src/silx/opencl/utils.py @@ -1,4 +1,3 @@ -# -*- coding: utf-8 -*- # /*########################################################################## # Copyright (C) 2017 European Synchrotron Radiation Facility # @@ -26,8 +25,6 @@ Project: Sift implementation in Python + OpenCL https://github.com/silx-kit/silx """ -from __future__ import division - __authors__ = ["Jérôme Kieffer", "Pierre Paleo"] __contact__ = "jerome.kieffer@esrf.eu" __license__ = "MIT" @@ -46,9 +43,13 @@ def calc_size(shape, blocksize): Calculate the optimal size for a kernel according to the workgroup size """ if "__len__" in dir(blocksize): - return tuple((int(i) + int(j) - 1) & ~(int(j) - 1) for i, j in zip(shape, blocksize)) + return tuple( + (int(i) + int(j) - 1) & ~(int(j) - 1) for i, j in zip(shape, blocksize) + ) else: - return tuple((int(i) + int(blocksize) - 1) & ~(int(blocksize) - 1) for i in shape) + return tuple( + (int(i) + int(blocksize) - 1) & ~(int(blocksize) - 1) for i in shape + ) def nextpower(n): @@ -91,8 +92,7 @@ def get_cl_file(resource): """ if not resource.endswith(".cl"): resource += ".cl" - return resources._resource_filename(resource, - default_directory="opencl") + return resources._resource_filename(resource, default_directory="opencl") def read_cl_file(filename): @@ -121,8 +121,6 @@ def concatenate_cl_kernel(filenames): return os.linesep.join(read_cl_file(fn) for fn in filenames) - - class ConvolutionInfos(object): allowed_axes = { "1D": [None], @@ -135,10 +133,10 @@ class ConvolutionInfos(object): (2, 0, 1), (2, 1, 0), (1, 0, 2), - (0, 2, 1) + (0, 2, 1), ], "batched_1D_3D": [(0,), (1,), (2,)], - "batched_separable_2D_1D_3D": [(0,), (1,), (2,)], # unsupported (?) + "batched_separable_2D_1D_3D": [(0,), (1,), (2,)], # unsupported (?) "2D": [None], "batched_2D_3D": [(0,), (1,), (2,)], "separable_3D_2D_3D": [ @@ -205,10 +203,3 @@ class ConvolutionInfos(object): }, }, } - - - - - - - |