summaryrefslogtreecommitdiff
path: root/silx/opencl
diff options
context:
space:
mode:
Diffstat (limited to 'silx/opencl')
-rw-r--r--silx/opencl/__init__.py52
-rw-r--r--silx/opencl/backprojection.py397
-rw-r--r--silx/opencl/codec/__init__.py0
-rw-r--r--silx/opencl/codec/byte_offset.py439
-rw-r--r--silx/opencl/codec/setup.py43
-rw-r--r--silx/opencl/codec/test/__init__.py37
-rw-r--r--silx/opencl/codec/test/test_byte_offset.py315
-rw-r--r--silx/opencl/common.py691
-rw-r--r--silx/opencl/convolution.py442
-rw-r--r--silx/opencl/image.py387
-rw-r--r--silx/opencl/linalg.py220
-rw-r--r--silx/opencl/medfilt.py269
-rw-r--r--silx/opencl/processing.py447
-rw-r--r--silx/opencl/projection.py428
-rw-r--r--silx/opencl/reconstruction.py388
-rw-r--r--silx/opencl/setup.py48
-rw-r--r--silx/opencl/sinofilter.py435
-rw-r--r--silx/opencl/sparse.py377
-rw-r--r--silx/opencl/statistics.py242
-rw-r--r--silx/opencl/test/__init__.py68
-rw-r--r--silx/opencl/test/test_addition.py154
-rw-r--r--silx/opencl/test/test_array_utils.py161
-rw-r--r--silx/opencl/test/test_backprojection.py231
-rw-r--r--silx/opencl/test/test_convolution.py265
-rw-r--r--silx/opencl/test/test_doubleword.py258
-rw-r--r--silx/opencl/test/test_image.py137
-rw-r--r--silx/opencl/test/test_kahan.py269
-rw-r--r--silx/opencl/test/test_linalg.py216
-rw-r--r--silx/opencl/test/test_medfilt.py175
-rw-r--r--silx/opencl/test/test_projection.py131
-rw-r--r--silx/opencl/test/test_sparse.py203
-rw-r--r--silx/opencl/test/test_stats.py116
-rw-r--r--silx/opencl/utils.py214
33 files changed, 0 insertions, 8255 deletions
diff --git a/silx/opencl/__init__.py b/silx/opencl/__init__.py
deleted file mode 100644
index fbd1f88..0000000
--- a/silx/opencl/__init__.py
+++ /dev/null
@@ -1,52 +0,0 @@
-#!/usr/bin/env python
-# -*- coding: utf-8 -*-
-#
-# Project: S I L X project
-# https://github.com/silx-kit/silx
-#
-# Copyright (C) 2012-2018 European Synchrotron Radiation Facility, Grenoble, France
-#
-# Principal author: Jérôme Kieffer (Jerome.Kieffer@ESRF.eu)
-#
-# Permission is hereby granted, free of charge, to any person
-# obtaining a copy of this software and associated documentation
-# files (the "Software"), to deal in the Software without
-# restriction, including without limitation the rights to use,
-# copy, modify, merge, publish, distribute, sublicense, and/or sell
-# copies of the Software, and to permit persons to whom the
-# Software is furnished to do so, subject to the following
-# conditions:
-#
-# The above copyright notice and this permission notice shall be
-# included in all copies or substantial portions of the Software.
-#
-# THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
-# EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES
-# OF MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
-# NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT
-# HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY,
-# WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
-# FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR
-# OTHER DEALINGS IN THE SOFTWARE.
-#
-"""This package provides OpenCl-based optimized processing functions.
-
-For more processing functions, see the silx.math and silx.image packages.
-
-See silx documentation: http://www.silx.org/doc/silx/latest/
-"""
-
-__author__ = "Jerome Kieffer"
-__contact__ = "Jerome.Kieffer@ESRF.eu"
-__license__ = "MIT"
-__copyright__ = "European Synchrotron Radiation Facility, Grenoble, France"
-__date__ = "15/03/2017"
-__status__ = "stable"
-
-import logging
-
-
-logger = logging.getLogger(__name__)
-
-
-from .common import *
diff --git a/silx/opencl/backprojection.py b/silx/opencl/backprojection.py
deleted file mode 100644
index 65a9836..0000000
--- a/silx/opencl/backprojection.py
+++ /dev/null
@@ -1,397 +0,0 @@
-#!/usr/bin/env python
-# coding: utf-8
-# /*##########################################################################
-#
-# Copyright (c) 2016 European Synchrotron Radiation Facility
-#
-# Permission is hereby granted, free of charge, to any person obtaining a copy
-# of this software and associated documentation files (the "Software"), to deal
-# in the Software without restriction, including without limitation the rights
-# to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
-# copies of the Software, and to permit persons to whom the Software is
-# furnished to do so, subject to the following conditions:
-#
-# The above copyright notice and this permission notice shall be included in
-# all copies or substantial portions of the Software.
-#
-# THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
-# IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
-# FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
-# AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
-# LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
-# OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN
-# THE SOFTWARE.
-#
-# ###########################################################################*/
-"""Module for (filtered) backprojection on the GPU"""
-
-from __future__ import absolute_import, print_function, with_statement, division
-
-__authors__ = ["A. Mirone, P. Paleo"]
-__license__ = "MIT"
-__date__ = "25/01/2019"
-
-import logging
-import numpy as np
-
-from .common import pyopencl
-from .processing import EventDescription, OpenclProcessing, BufferDescription
-from .sinofilter import SinoFilter
-from .sinofilter import fourier_filter as fourier_filter_
-from ..utils.deprecation import deprecated
-
-if pyopencl:
- mf = pyopencl.mem_flags
- import pyopencl.array as parray
-else:
- raise ImportError("Please install pyopencl in order to use opencl backprojection")
-logger = logging.getLogger(__name__)
-
-
-def _sizeof(Type):
- """
- return the size (in bytes) of a scalar type, like the C behavior
- """
- return np.dtype(Type).itemsize
-
-
-def _idivup(a, b):
- """
- return the integer division, plus one if `a` is not a multiple of `b`
- """
- return (a + (b - 1)) // b
-
-
-class Backprojection(OpenclProcessing):
- """A class for performing the backprojection using OpenCL"""
- kernel_files = ["backproj.cl", "array_utils.cl"]
-
- def __init__(self, sino_shape, slice_shape=None, axis_position=None,
- angles=None, filter_name=None, ctx=None, devicetype="all",
- platformid=None, deviceid=None, profile=False,
- extra_options=None):
- """Constructor of the OpenCL (filtered) backprojection
-
- :param sino_shape: shape of the sinogram. The sinogram is in the format
- (n_b, n_a) where n_b is the number of detector bins
- and n_a is the number of angles.
- :param slice_shape: Optional, shape of the reconstructed slice. By
- default, it is a square slice where the dimension
- is the "x dimension" of the sinogram (number of
- bins).
- :param axis_position: Optional, axis position. Default is
- `(shape[1]-1)/2.0`.
- :param angles: Optional, a list of custom angles in radian.
- :param filter_name: Optional, name of the filter for FBP. Default is
- the Ram-Lak filter.
- :param ctx: actual working context, left to None for automatic
- initialization from device type or platformid/deviceid
- :param devicetype: type of device, can be "CPU", "GPU", "ACC" or "ALL"
- :param platformid: integer with the platform_identifier, as given by
- clinfo
- :param deviceid: Integer with the device identifier, as given by clinfo
- :param profile: switch on profiling to be able to profile at the kernel
- level, store profiling elements (makes code slightly
- slower)
- :param extra_options: Advanced extra options in the form of a dict.
- Current options are: cutoff, use_numpy_fft
- """
- # OS X enforces a workgroup size of 1 when the kernel has
- # synchronization barriers if sys.platform.startswith('darwin'):
- # assuming no discrete GPU
- # raise NotImplementedError("Backprojection is not implemented on CPU for OS X yet")
-
- OpenclProcessing.__init__(self, ctx=ctx, devicetype=devicetype,
- platformid=platformid, deviceid=deviceid,
- profile=profile)
-
- self._init_geometry(sino_shape, slice_shape, angles, axis_position,
- extra_options)
- self._allocate_memory()
- self._compute_angles()
- self._init_kernels()
- self._init_filter(filter_name)
-
- def _init_geometry(self, sino_shape, slice_shape, angles, axis_position,
- extra_options):
- """Geometry Initialization
-
- :param sino_shape: shape of the sinogram. The sinogram is in the format
- (n_b, n_a) where n_b is the number of detector bins
- and n_a is the number of angles.
- :param slice_shape: shape of the reconstructed slice. By
- default, it is a square slice where the dimension
- is the "x dimension" of the sinogram (number of
- bins).
- :param angles: list of projection angles in radian.
- :param axis_position: axis position
- :param dict extra_options: Advanced extra options
- """
- self.shape = sino_shape
- self.num_bins = np.int32(sino_shape[1])
- self.num_projs = np.int32(sino_shape[0])
- self.angles = angles
- if slice_shape is None:
- self.slice_shape = (self.num_bins, self.num_bins)
- else:
- self.slice_shape = slice_shape
- self.dimrec_shape = (
- _idivup(self.slice_shape[0], 32) * 32,
- _idivup(self.slice_shape[1], 32) * 32
- )
- if axis_position:
- self.axis_pos = np.float32(axis_position)
- else:
- self.axis_pos = np.float32((sino_shape[1] - 1.) / 2)
- self.axis_array = None # TODO: add axis correction front-end
- self._init_extra_options(extra_options)
-
- def _init_extra_options(self, extra_options):
- """Backprojection extra option initialization
-
- :param dict extra_options: Advanced extra options
- """
- self.extra_options = {
- "cutoff": 1.,
- "use_numpy_fft": False,
- # It is axis_pos - (num_bins-1)/2 in PyHST
- "gpu_offset_x": 0., #self.axis_pos - (self.num_bins - 1) / 2.,
- "gpu_offset_y": 0., #self.axis_pos - (self.num_bins - 1) / 2.
- }
- if extra_options is not None:
- self.extra_options.update(extra_options)
-
- def _allocate_memory(self):
- # Host memory
- self.slice = np.zeros(self.dimrec_shape, dtype=np.float32)
- self._use_textures = self.check_textures_availability()
-
- # Device memory
- self.buffers = [
- BufferDescription("_d_slice", self.dimrec_shape, np.float32, mf.READ_WRITE),
- BufferDescription("d_sino", self.shape, np.float32, mf.READ_WRITE), # before transferring to texture (if available)
- BufferDescription("d_cos", (self.num_projs,), np.float32, mf.READ_ONLY),
- BufferDescription("d_sin", (self.num_projs,), np.float32, mf.READ_ONLY),
- BufferDescription("d_axes", (self.num_projs,), np.float32, mf.READ_ONLY),
- ]
- self.allocate_buffers(use_array=True)
- self.d_sino = self.cl_mem["d_sino"] # shorthand
-
- # Texture memory (if relevant)
- if self._use_textures:
- self._allocate_textures()
-
- # Local memory
- self.local_mem = 256 * 3 * _sizeof(np.float32) # constant for all image sizes
-
- def _compute_angles(self):
- if self.angles is None:
- self.angles = np.linspace(0, np.pi, self.num_projs, False)
- h_cos = np.cos(self.angles).astype(np.float32)
- h_sin = np.sin(self.angles).astype(np.float32)
- self.cl_mem["d_cos"][:] = h_cos[:]
- self.cl_mem["d_sin"][:] = h_sin[:]
- if self.axis_array:
- self.cl_mem["d_axes"][:] = self.axis_array.astype(np.float32)[:]
- else:
- self.cl_mem["d_axes"][:] = np.ones(self.num_projs, dtype="f") * self.axis_pos
-
- def _init_kernels(self):
- compile_options = None
- if not(self._use_textures):
- compile_options = "-DDONT_USE_TEXTURES"
- OpenclProcessing.compile_kernels(
- self,
- self.kernel_files,
- compile_options=compile_options
- )
- # check that workgroup can actually be (16, 16)
- self.compiletime_workgroup_size = self.kernels.max_workgroup_size("backproj_cpu_kernel")
- # Workgroup and ndrange sizes are always the same
- self.wg = (16, 16)
- self.ndrange = (
- _idivup(int(self.dimrec_shape[1]), 32) * self.wg[0],
- _idivup(int(self.dimrec_shape[0]), 32) * self.wg[1]
- )
- # Prepare arguments for the kernel call
- if not(self._use_textures):
- d_sino_ref = self.d_sino.data
- else:
- d_sino_ref = self.d_sino_tex
- self._backproj_kernel_args = (
- # num of projections (int32)
- self.num_projs,
- # num of bins (int32)
- self.num_bins,
- # axis position (float32)
- self.axis_pos,
- # d_slice (__global float32*)
- self.cl_mem["_d_slice"].data,
- # d_sino (__read_only image2d_t or float*)
- d_sino_ref,
- # gpu_offset_x (float32) 
- np.float32(self.extra_options["gpu_offset_x"]),
- # gpu_offset_y (float32)
- np.float32(self.extra_options["gpu_offset_y"]),
- # d_cos (__global float32*)
- self.cl_mem["d_cos"].data,
- # d_sin (__global float32*)
- self.cl_mem["d_sin"].data,
- # d_axis (__global float32*)
- self.cl_mem["d_axes"].data,
- # shared mem (__local float32*)
- self._get_local_mem()
- )
-
- def _allocate_textures(self):
- """
- Allocate the texture for the sinogram.
- """
- self.d_sino_tex = self.allocate_texture(self.shape)
-
- def _init_filter(self, filter_name):
- """Filter initialization
-
- :param str filter_name: filter name
- """
- self.filter_name = filter_name or "ram-lak"
- self.sino_filter = SinoFilter(
- self.shape,
- ctx=self.ctx,
- filter_name=self.filter_name,
- extra_options=self.extra_options,
- )
-
- def _get_local_mem(self):
- return pyopencl.LocalMemory(self.local_mem) # constant for all image sizes
-
- def _cpy2d_to_slice(self, dst):
- ndrange = (int(self.slice_shape[1]), int(self.slice_shape[0]))
- slice_shape_ocl = np.int32(ndrange)
- wg = None
- kernel_args = (
- dst.data,
- self.cl_mem["_d_slice"].data,
- np.int32(self.slice_shape[1]),
- np.int32(self.dimrec_shape[1]),
- np.int32((0, 0)),
- np.int32((0, 0)),
- slice_shape_ocl
- )
- return self.kernels.cpy2d(self.queue, ndrange, wg, *kernel_args)
-
- def _transfer_to_texture(self, sino):
- if isinstance(sino, parray.Array):
- return self._transfer_device_to_texture(sino)
- sino2 = sino
- if not(sino.flags["C_CONTIGUOUS"] and sino.dtype == np.float32):
- sino2 = np.ascontiguousarray(sino, dtype=np.float32)
- if not(self._use_textures):
- ev = pyopencl.enqueue_copy(
- self.queue,
- self.d_sino.data,
- sino2
- )
- what = "transfer filtered sino H->D buffer"
- ev.wait()
- else:
- ev = pyopencl.enqueue_copy(
- self.queue,
- self.d_sino_tex,
- sino2,
- origin=(0, 0),
- region=self.shape[::-1]
- )
- what = "transfer filtered sino H->D texture"
- return EventDescription(what, ev)
-
- def _transfer_device_to_texture(self, d_sino):
- if not(self._use_textures):
- if id(self.d_sino) == id(d_sino):
- return
- ev = pyopencl.enqueue_copy(
- self.queue,
- self.d_sino.data,
- d_sino
- )
- what = "transfer filtered sino D->D buffer"
- ev.wait()
- else:
- ev = pyopencl.enqueue_copy(
- self.queue,
- self.d_sino_tex,
- d_sino.data,
- offset=0,
- origin=(0, 0),
- region=self.shape[::-1]
- )
- what = "transfer filtered sino D->D texture"
- return EventDescription(what, ev)
-
- def backprojection(self, sino, output=None):
- """Perform the backprojection on an input sinogram
-
- :param sino: sinogram.
- :param output: optional, output slice.
- If provided, the result will be written in this array.
- :return: backprojection of sinogram
- """
- events = []
- with self.sem:
- events.append(self._transfer_to_texture(sino))
- # Call the backprojection kernel
- if not(self._use_textures):
- kernel_to_call = self.kernels.backproj_cpu_kernel
- else:
- kernel_to_call = self.kernels.backproj_kernel
- kernel_to_call(
- self.queue,
- self.ndrange,
- self.wg,
- *self._backproj_kernel_args
- )
- # Return
- if output is None:
- res = self.cl_mem["_d_slice"].get()
- res = res[:self.slice_shape[0], :self.slice_shape[1]]
- else:
- res = output
- self._cpy2d_to_slice(output)
-
- # /with self.sem
- if self.profile:
- self.events += events
-
- return res
-
- def filtered_backprojection(self, sino, output=None):
- """
- Compute the filtered backprojection (FBP) on a sinogram.
-
- :param sino: sinogram (`np.ndarray` or `pyopencl.array.Array`)
- with the shape (n_projections, n_bins)
- :param output: output (`np.ndarray` or `pyopencl.array.Array`).
- If nothing is provided, a new numpy array is returned.
- """
- # Filter
- self.sino_filter(sino, output=self.d_sino)
- # Backproject
- res = self.backprojection(self.d_sino, output=output)
- return res
-
- __call__ = filtered_backprojection
-
-
- # -------------------
- # - Compatibility -
- # -------------------
-
- @deprecated(replacement="Backprojection.sino_filter", since_version="0.10")
- def filter_projections(self, sino, rescale=True):
- self.sino_filter(sino, output=self.d_sino)
-
-
-
-def fourier_filter(sino, filter_=None, fft_size=None):
- return fourier_filter_(sino, filter_=filter_, fft_size=fft_size)
-
diff --git a/silx/opencl/codec/__init__.py b/silx/opencl/codec/__init__.py
deleted file mode 100644
index e69de29..0000000
--- a/silx/opencl/codec/__init__.py
+++ /dev/null
diff --git a/silx/opencl/codec/byte_offset.py b/silx/opencl/codec/byte_offset.py
deleted file mode 100644
index 9a52427..0000000
--- a/silx/opencl/codec/byte_offset.py
+++ /dev/null
@@ -1,439 +0,0 @@
-#!/usr/bin/env python
-# -*- coding: utf-8 -*-
-#
-# Project: Sift implementation in Python + OpenCL
-# https://github.com/silx-kit/silx
-#
-# Copyright (C) 2013-2020 European Synchrotron Radiation Facility, Grenoble, France
-#
-# Permission is hereby granted, free of charge, to any person
-# obtaining a copy of this software and associated documentation
-# files (the "Software"), to deal in the Software without
-# restriction, including without limitation the rights to use,
-# copy, modify, merge, publish, distribute, sublicense, and/or sell
-# copies of the Software, and to permit persons to whom the
-# Software is furnished to do so, subject to the following
-# conditions:
-#
-# The above copyright notice and this permission notice shall be
-# included in all copies or substantial portions of the Software.
-#
-# THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
-# EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES
-# OF MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
-# NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT
-# HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY,
-# WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
-# FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR
-# OTHER DEALINGS IN THE SOFTWARE.
-
-"""
-This module provides a class for CBF byte offset compression/decompression.
-"""
-
-from __future__ import division, print_function, with_statement
-
-__authors__ = ["Jérôme Kieffer"]
-__contact__ = "jerome.kieffer@esrf.eu"
-__license__ = "MIT"
-__copyright__ = "European Synchrotron Radiation Facility, Grenoble, France"
-__date__ = "11/10/2018"
-__status__ = "production"
-
-
-import functools
-import os
-import numpy
-from ..common import ocl, pyopencl
-from ..processing import BufferDescription, EventDescription, OpenclProcessing
-
-import logging
-logger = logging.getLogger(__name__)
-
-if pyopencl:
- import pyopencl.version
- if pyopencl.version.VERSION < (2016, 0):
- from pyopencl.scan import GenericScanKernel, GenericDebugScanKernel
- else:
- from pyopencl.algorithm import GenericScanKernel
- from pyopencl.scan import GenericDebugScanKernel
-else:
- logger.warning("No PyOpenCL, no byte-offset, please see fabio")
-
-
-class ByteOffset(OpenclProcessing):
- """Perform the byte offset compression/decompression on the GPU
-
- See :class:`OpenclProcessing` for optional arguments description.
-
- :param int raw_size:
- Size of the raw stream for decompression.
- It can be (slightly) larger than the array.
- :param int dec_size:
- Size of the decompression output array
- (mandatory for decompression)
- """
-
- def __init__(self, raw_size=None, dec_size=None,
- ctx=None, devicetype="all",
- platformid=None, deviceid=None,
- block_size=None, profile=False):
- OpenclProcessing.__init__(self, ctx=ctx, devicetype=devicetype,
- platformid=platformid, deviceid=deviceid,
- block_size=block_size, profile=profile)
- if self.block_size is None:
- self.block_size = self.device.max_work_group_size
- wg = self.block_size
-
- buffers = [BufferDescription("counter", 1, numpy.int32, None)]
-
- if raw_size is None:
- self.raw_size = -1
- self.padded_raw_size = -1
- else:
- self.raw_size = int(raw_size)
- self.padded_raw_size = int((self.raw_size + wg - 1) & ~(wg - 1))
- buffers += [
- BufferDescription("raw", self.padded_raw_size, numpy.int8, None),
- BufferDescription("mask", self.padded_raw_size, numpy.int32, None),
- BufferDescription("values", self.padded_raw_size, numpy.int32, None),
- BufferDescription("exceptions", self.padded_raw_size, numpy.int32, None)
- ]
-
- if dec_size is None:
- self.dec_size = None
- else:
- self.dec_size = numpy.int32(dec_size)
- buffers += [
- BufferDescription("data_float", self.dec_size, numpy.float32, None),
- BufferDescription("data_int", self.dec_size, numpy.int32, None)
- ]
-
- self.allocate_buffers(buffers, use_array=True)
-
- self.compile_kernels([os.path.join("codec", "byte_offset")])
- self.kernels.__setattr__("scan", self._init_double_scan())
- self.kernels.__setattr__("compression_scan",
- self._init_compression_scan())
-
- def _init_double_scan(self):
- """"generates a double scan on indexes and values in one operation"""
- arguments = "__global int *value", "__global int *index"
- int2 = pyopencl.tools.get_or_register_dtype("int2")
- input_expr = "index[i]>0 ? (int2)(0, 0) : (int2)(value[i], 1)"
- scan_expr = "a+b"
- neutral = "(int2)(0,0)"
- output_statement = "value[i] = item.s0; index[i+1] = item.s1;"
-
- if self.block_size > 256:
- knl = GenericScanKernel(self.ctx,
- dtype=int2,
- arguments=arguments,
- input_expr=input_expr,
- scan_expr=scan_expr,
- neutral=neutral,
- output_statement=output_statement)
- else: # MacOS on CPU
- knl = GenericDebugScanKernel(self.ctx,
- dtype=int2,
- arguments=arguments,
- input_expr=input_expr,
- scan_expr=scan_expr,
- neutral=neutral,
- output_statement=output_statement)
- return knl
-
- def decode(self, raw, as_float=False, out=None):
- """This function actually performs the decompression by calling the kernels
-
- :param numpy.ndarray raw: The compressed data as a 1D numpy array of char.
- :param bool as_float: True to decompress as float32,
- False (default) to decompress as int32
- :param pyopencl.array out: pyopencl array in which to place the result.
- :return: The decompressed image as an pyopencl array.
- :rtype: pyopencl.array
- """
- assert self.dec_size is not None, \
- "dec_size is a mandatory ByteOffset init argument for decompression"
-
- events = []
- with self.sem:
- len_raw = numpy.int32(len(raw))
- if len_raw > self.padded_raw_size:
- wg = self.block_size
- self.raw_size = int(len(raw))
- self.padded_raw_size = (self.raw_size + wg - 1) & ~(wg - 1)
- logger.info("increase raw buffer size to %s", self.padded_raw_size)
- buffers = {
- "raw": pyopencl.array.empty(self.queue, self.padded_raw_size, dtype=numpy.int8),
- "mask": pyopencl.array.empty(self.queue, self.padded_raw_size, dtype=numpy.int32),
- "exceptions": pyopencl.array.empty(self.queue, self.padded_raw_size, dtype=numpy.int32),
- "values": pyopencl.array.empty(self.queue, self.padded_raw_size, dtype=numpy.int32),
- }
- self.cl_mem.update(buffers)
- else:
- wg = self.block_size
-
- evt = pyopencl.enqueue_copy(self.queue, self.cl_mem["raw"].data,
- raw,
- is_blocking=False)
- events.append(EventDescription("copy raw H -> D", evt))
- evt = self.kernels.fill_int_mem(self.queue, (self.padded_raw_size,), (wg,),
- self.cl_mem["mask"].data,
- numpy.int32(self.padded_raw_size),
- numpy.int32(0),
- numpy.int32(0))
- events.append(EventDescription("memset mask", evt))
- evt = self.kernels.fill_int_mem(self.queue, (1,), (1,),
- self.cl_mem["counter"].data,
- numpy.int32(1),
- numpy.int32(0),
- numpy.int32(0))
- events.append(EventDescription("memset counter", evt))
- evt = self.kernels.mark_exceptions(self.queue, (self.padded_raw_size,), (wg,),
- self.cl_mem["raw"].data,
- len_raw,
- numpy.int32(self.raw_size),
- self.cl_mem["mask"].data,
- self.cl_mem["values"].data,
- self.cl_mem["counter"].data,
- self.cl_mem["exceptions"].data)
- events.append(EventDescription("mark exceptions", evt))
- nb_exceptions = numpy.empty(1, dtype=numpy.int32)
- evt = pyopencl.enqueue_copy(self.queue, nb_exceptions, self.cl_mem["counter"].data,
- is_blocking=False)
- events.append(EventDescription("copy counter D -> H", evt))
- evt.wait()
- nbexc = int(nb_exceptions[0])
- if nbexc == 0:
- logger.info("nbexc %i", nbexc)
- else:
- evt = self.kernels.treat_exceptions(self.queue, (nbexc,), (1,),
- self.cl_mem["raw"].data,
- len_raw,
- self.cl_mem["mask"].data,
- self.cl_mem["exceptions"].data,
- self.cl_mem["values"].data
- )
- events.append(EventDescription("treat_exceptions", evt))
-
- #self.cl_mem["copy_values"] = self.cl_mem["values"].copy()
- #self.cl_mem["copy_mask"] = self.cl_mem["mask"].copy()
- evt = self.kernels.scan(self.cl_mem["values"],
- self.cl_mem["mask"],
- queue=self.queue,
- size=int(len_raw),
- wait_for=(evt,))
- events.append(EventDescription("double scan", evt))
- #evt.wait()
- if out is not None:
- if out.dtype == numpy.float32:
- copy_results = self.kernels.copy_result_float
- else:
- copy_results = self.kernels.copy_result_int
- else:
- if as_float:
- out = self.cl_mem["data_float"]
- copy_results = self.kernels.copy_result_float
- else:
- out = self.cl_mem["data_int"]
- copy_results = self.kernels.copy_result_int
- evt = copy_results(self.queue, (self.padded_raw_size,), (wg,),
- self.cl_mem["values"].data,
- self.cl_mem["mask"].data,
- len_raw,
- self.dec_size,
- out.data
- )
- events.append(EventDescription("copy_results", evt))
- #evt.wait()
- if self.profile:
- self.events += events
- return out
-
- __call__ = decode
-
- def _init_compression_scan(self):
- """Initialize CBF compression scan kernels"""
- preamble = """
- int compressed_size(int diff) {
- int abs_diff = abs(diff);
-
- if (abs_diff < 128) {
- return 1;
- }
- else if (abs_diff < 32768) {
- return 3;
- }
- else {
- return 7;
- }
- }
-
- void write(const int index,
- const int diff,
- global char *output) {
- int abs_diff = abs(diff);
-
- if (abs_diff < 128) {
- output[index] = (char) diff;
- }
- else if (abs_diff < 32768) {
- output[index] = -128;
- output[index + 1] = (char) (diff >> 0);
- output[index + 2] = (char) (diff >> 8);
- }
- else {
- output[index] = -128;
- output[index + 1] = 0;
- output[index + 2] = -128;
- output[index + 3] = (char) (diff >> 0);
- output[index + 4] = (char) (diff >> 8);
- output[index + 5] = (char) (diff >> 16);
- output[index + 6] = (char) (diff >> 24);
- }
- }
- """
- arguments = "__global const int *data, __global char *compressed, __global int *size"
- input_expr = "compressed_size((i == 0) ? data[0] : (data[i] - data[i - 1]))"
- scan_expr = "a+b"
- neutral = "0"
- output_statement = """
- if (prev_item == 0) { // 1st thread store compressed data size
- size[0] = last_item;
- }
- write(prev_item, (i == 0) ? data[0] : (data[i] - data[i - 1]), compressed);
- """
-
- if self.block_size >= 64:
- knl = GenericScanKernel(self.ctx,
- dtype=numpy.int32,
- preamble=preamble,
- arguments=arguments,
- input_expr=input_expr,
- scan_expr=scan_expr,
- neutral=neutral,
- output_statement=output_statement)
- else: # MacOS on CPU
- knl = GenericDebugScanKernel(self.ctx,
- dtype=numpy.int32,
- preamble=preamble,
- arguments=arguments,
- input_expr=input_expr,
- scan_expr=scan_expr,
- neutral=neutral,
- output_statement=output_statement)
- return knl
-
- def encode(self, data, out=None):
- """Compress data to CBF.
-
- :param data: The data to compress as a numpy array
- (or a pyopencl Array) of int32.
- :type data: Union[numpy.ndarray, pyopencl.array.Array]
- :param pyopencl.array out:
- pyopencl array of int8 in which to store the result.
- The array should be large enough to store the compressed data.
- :return: The compressed data as a pyopencl array.
- If out is provided, this array shares the backing buffer,
- but has the exact size of the compressed data and the queue
- of the ByteOffset instance.
- :rtype: pyopencl.array
- :raises ValueError: if out array is not large enough
- """
-
- events = []
- with self.sem:
- if isinstance(data, pyopencl.array.Array):
- d_data = data # Uses provided array
-
- else: # Copy data to device
- data = numpy.ascontiguousarray(data, dtype=numpy.int32).ravel()
-
- # Make sure data array exists and is large enough
- if ("data_input" not in self.cl_mem or
- self.cl_mem["data_input"].size < data.size):
- logger.info("increase data input buffer size to %s", data.size)
- self.cl_mem.update({
- "data_input": pyopencl.array.empty(self.queue,
- data.size,
- dtype=numpy.int32)})
- d_data = self.cl_mem["data_input"]
-
- evt = pyopencl.enqueue_copy(
- self.queue, d_data.data, data, is_blocking=False)
- events.append(EventDescription("copy data H -> D", evt))
-
- # Make sure compressed array exists and is large enough
- compressed_size = d_data.size * 7
- if ("compressed" not in self.cl_mem or
- self.cl_mem["compressed"].size < compressed_size):
- logger.info("increase compressed buffer size to %s", compressed_size)
- self.cl_mem.update({
- "compressed": pyopencl.array.empty(self.queue,
- compressed_size,
- dtype=numpy.int8)})
- d_compressed = self.cl_mem["compressed"]
- d_size = self.cl_mem["counter"] # Shared with decompression
-
- evt = self.kernels.compression_scan(d_data, d_compressed, d_size)
- events.append(EventDescription("compression scan", evt))
- byte_count = int(d_size.get()[0])
-
- if out is None:
- # Create out array from a sub-region of the compressed buffer
- out = pyopencl.array.Array(
- self.queue,
- shape=(byte_count,),
- dtype=numpy.int8,
- allocator=functools.partial(
- d_compressed.base_data.get_sub_region,
- d_compressed.offset))
-
- elif out.size < byte_count:
- raise ValueError(
- "Provided output buffer is not large enough: "
- "requires %d bytes, got %d" % (byte_count, out.size))
-
- else: # out.size >= byte_count
- # Create an array with a sub-region of out and this class queue
- out = pyopencl.array.Array(
- self.queue,
- shape=(byte_count,),
- dtype=numpy.int8,
- allocator=functools.partial(out.base_data.get_sub_region,
- out.offset))
-
- evt = pyopencl.enqueue_copy(self.queue, out.data, d_compressed.data,
- byte_count=byte_count)
- events.append(
- EventDescription("copy D -> D: internal -> out", evt))
-
- if self.profile:
- self.events += events
-
- return out
-
- def encode_to_bytes(self, data):
- """Compresses data to CBF and returns compressed data as bytes.
-
- Usage:
-
- Provided an image (`image`) stored as a numpy array of int32,
- first, create a byte offset compression/decompression object:
-
- >>> from silx.opencl.codec.byte_offset import ByteOffset
- >>> byte_offset_codec = ByteOffset()
-
- Then, compress an image into bytes:
-
- >>> compressed = byte_offset_codec.encode_to_bytes(image)
-
- :param data: The data to compress as a numpy array
- (or a pyopencl Array) of int32.
- :type data: Union[numpy.ndarray, pyopencl.array.Array]
- :return: The compressed data as bytes.
- :rtype: bytes
- """
- compressed_array = self.encode(data)
- return compressed_array.get().tobytes()
diff --git a/silx/opencl/codec/setup.py b/silx/opencl/codec/setup.py
deleted file mode 100644
index 4a5c1e5..0000000
--- a/silx/opencl/codec/setup.py
+++ /dev/null
@@ -1,43 +0,0 @@
-# coding: utf-8
-#
-# Copyright (C) 2016-2017 European Synchrotron Radiation Facility
-#
-# Permission is hereby granted, free of charge, to any person obtaining a copy
-# of this software and associated documentation files (the "Software"), to deal
-# in the Software without restriction, including without limitation the rights
-# to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
-# copies of the Software, and to permit persons to whom the Software is
-# furnished to do so, subject to the following conditions:
-#
-# The above copyright notice and this permission notice shall be included in
-# all copies or substantial portions of the Software.
-#
-# THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
-# IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
-# FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
-# AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
-# LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
-# OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN
-# THE SOFTWARE.
-#
-
-from __future__ import division
-
-__contact__ = "jerome.kieffer@esrf.eu"
-__license__ = "MIT"
-__copyright__ = "European Synchrotron Radiation Facility, Grenoble, France"
-__authors__ = ["J. Kieffer"]
-__date__ = "13/10/2017"
-
-from numpy.distutils.misc_util import Configuration
-
-
-def configuration(parent_package='', top_path=None):
- config = Configuration('codec', parent_package, top_path)
- config.add_subpackage('test')
- return config
-
-
-if __name__ == "__main__":
- from numpy.distutils.core import setup
- setup(configuration=configuration)
diff --git a/silx/opencl/codec/test/__init__.py b/silx/opencl/codec/test/__init__.py
deleted file mode 100644
index ec76dd3..0000000
--- a/silx/opencl/codec/test/__init__.py
+++ /dev/null
@@ -1,37 +0,0 @@
-# -*- coding: utf-8 -*-
-#
-# Project: silx
-# https://github.com/silx-kit/silx
-#
-# Copyright (C) 2013-2017 European Synchrotron Radiation Facility, Grenoble, France
-# Permission is hereby granted, free of charge, to any person obtaining a copy
-# of this software and associated documentation files (the "Software"), to deal
-# in the Software without restriction, including without limitation the rights
-# to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
-# copies of the Software, and to permit persons to whom the Software is
-# furnished to do so, subject to the following conditions:
-#
-# The above copyright notice and this permission notice shall be included in
-# all copies or substantial portions of the Software.
-#
-# THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
-# IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
-# FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
-# AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
-# LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
-# OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN
-# THE SOFTWARE.
-
-__authors__ = ["J. Kieffer"]
-__license__ = "MIT"
-__date__ = "13/10/2017"
-
-import unittest
-from . import test_byte_offset
-
-
-def suite():
- testSuite = unittest.TestSuite()
- testSuite.addTest(test_byte_offset.suite())
-
- return testSuite
diff --git a/silx/opencl/codec/test/test_byte_offset.py b/silx/opencl/codec/test/test_byte_offset.py
deleted file mode 100644
index d1482ce..0000000
--- a/silx/opencl/codec/test/test_byte_offset.py
+++ /dev/null
@@ -1,315 +0,0 @@
-#!/usr/bin/env python
-# -*- coding: utf-8 -*-
-#
-# Project: Byte-offset decompression in OpenCL
-# https://github.com/silx-kit/silx
-#
-# Copyright (C) 2013-2020 European Synchrotron Radiation Facility,
-# Grenoble, France
-# Permission is hereby granted, free of charge, to any person
-# obtaining a copy of this software and associated documentation
-# files (the "Software"), to deal in the Software without
-# restriction, including without limitation the rights to use,
-# copy, modify, merge, publish, distribute, sublicense, and/or sell
-# copies of the Software, and to permit persons to whom the
-# Software is furnished to do so, subject to the following
-# conditions:
-#
-# The above copyright notice and this permission notice shall be
-# included in all copies or substantial portions of the Software.
-#
-# THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
-# EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES
-# OF MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
-# NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT
-# HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY,
-# WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
-# FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR
-# OTHER DEALINGS IN THE SOFTWARE.
-
-"""
-Test suite for byte-offset decompression
-"""
-
-from __future__ import division, print_function
-
-__authors__ = ["Jérôme Kieffer"]
-__contact__ = "jerome.kieffer@esrf.eu"
-__license__ = "MIT"
-__copyright__ = "2013 European Synchrotron Radiation Facility, Grenoble, France"
-__date__ = "02/03/2021"
-
-import sys
-import time
-import logging
-import numpy
-from silx.opencl.common import ocl, pyopencl
-from silx.opencl.codec import byte_offset
-import fabio
-import unittest
-logger = logging.getLogger(__name__)
-
-
-@unittest.skipUnless(ocl and pyopencl,
- "PyOpenCl is missing")
-class TestByteOffset(unittest.TestCase):
-
- @staticmethod
- def _create_test_data(shape, nexcept, lam=200):
- """Create test (image, compressed stream) pair.
-
- :param shape: Shape of test image
- :param int nexcept: Number of exceptions in the image
- :param lam: Expectation of interval argument for numpy.random.poisson
- :return: (reference image array, compressed stream)
- """
- size = numpy.prod(shape)
- ref = numpy.random.poisson(lam, numpy.prod(shape))
- exception_loc = numpy.random.randint(0, size, size=nexcept)
- exception_value = numpy.random.randint(0, 1000000, size=nexcept)
- ref[exception_loc] = exception_value
- ref.shape = shape
-
- raw = fabio.compression.compByteOffset(ref)
- return ref, raw
-
- def test_decompress(self):
- """
- tests the byte offset decompression on GPU
- """
- ref, raw = self._create_test_data(shape=(91, 97), nexcept=229)
- # ref, raw = self._create_test_data(shape=(7, 9), nexcept=0)
-
- size = numpy.prod(ref.shape)
-
- try:
- bo = byte_offset.ByteOffset(raw_size=len(raw), dec_size=size, profile=True)
- except (RuntimeError, pyopencl.RuntimeError) as err:
- logger.warning(err)
- if sys.platform == "darwin":
- raise unittest.SkipTest("Byte-offset decompression is known to be buggy on MacOS-CPU")
- else:
- raise err
- print(bo.block_size)
-
- t0 = time.time()
- res_cy = fabio.compression.decByteOffset(raw)
- t1 = time.time()
- res_cl = bo.decode(raw)
- t2 = time.time()
- delta_cy = abs(ref.ravel() - res_cy).max()
- delta_cl = abs(ref.ravel() - res_cl.get()).max()
-
- logger.debug("Global execution time: fabio %.3fms, OpenCL: %.3fms.",
- 1000.0 * (t1 - t0),
- 1000.0 * (t2 - t1))
- bo.log_profile()
- # print(ref)
- # print(res_cl.get())
- self.assertEqual(delta_cy, 0, "Checks fabio works")
- self.assertEqual(delta_cl, 0, "Checks opencl works")
-
- def test_many_decompress(self, ntest=10):
- """
- tests the byte offset decompression on GPU, many images to ensure there
- is not leaking in memory
- """
- shape = (991, 997)
- size = numpy.prod(shape)
- ref, raw = self._create_test_data(shape=shape, nexcept=0, lam=100)
-
- try:
- bo = byte_offset.ByteOffset(len(raw), size, profile=True)
- except (RuntimeError, pyopencl.RuntimeError) as err:
- logger.warning(err)
- if sys.platform == "darwin":
- raise unittest.SkipTest("Byte-offset decompression is known to be buggy on MacOS-CPU")
- else:
- raise err
- t0 = time.time()
- res_cy = fabio.compression.decByteOffset(raw)
- t1 = time.time()
- res_cl = bo(raw)
- t2 = time.time()
- delta_cy = abs(ref.ravel() - res_cy).max()
- delta_cl = abs(ref.ravel() - res_cl.get()).max()
- self.assertEqual(delta_cy, 0, "Checks fabio works")
- self.assertEqual(delta_cl, 0, "Checks opencl works")
- logger.debug("Global execution time: fabio %.3fms, OpenCL: %.3fms.",
- 1000.0 * (t1 - t0),
- 1000.0 * (t2 - t1))
-
- for i in range(ntest):
- ref, raw = self._create_test_data(shape=shape, nexcept=2729, lam=200)
-
- t0 = time.time()
- res_cy = fabio.compression.decByteOffset(raw)
- t1 = time.time()
- res_cl = bo(raw)
- t2 = time.time()
- delta_cy = abs(ref.ravel() - res_cy).max()
- delta_cl = abs(ref.ravel() - res_cl.get()).max()
- self.assertEqual(delta_cy, 0, "Checks fabio works #%i" % i)
- self.assertEqual(delta_cl, 0, "Checks opencl works #%i" % i)
-
- logger.debug("Global execution time: fabio %.3fms, OpenCL: %.3fms.",
- 1000.0 * (t1 - t0),
- 1000.0 * (t2 - t1))
- bo.log_profile(stats=True)
-
- def test_encode(self):
- """Test byte offset compression"""
- ref, raw = self._create_test_data(shape=(2713, 2719), nexcept=2729)
-
- try:
- bo = byte_offset.ByteOffset(len(raw), ref.size, profile=True)
- except (RuntimeError, pyopencl.RuntimeError) as err:
- logger.warning(err)
- raise err
-
- t0 = time.time()
- compressed_array = bo.encode(ref)
- t1 = time.time()
-
- compressed_stream = compressed_array.get().tobytes()
- self.assertEqual(raw, compressed_stream)
-
- logger.debug("Global execution time: OpenCL: %.3fms.",
- 1000.0 * (t1 - t0))
- bo.log_profile()
-
- def test_encode_to_array(self):
- """Test byte offset compression while providing an out array"""
-
- ref, raw = self._create_test_data(shape=(2713, 2719), nexcept=2729)
-
- try:
- bo = byte_offset.ByteOffset(profile=True)
- except (RuntimeError, pyopencl.RuntimeError) as err:
- logger.warning(err)
- raise err
- # Test with out buffer too small
- out = pyopencl.array.empty(bo.queue, (10,), numpy.int8)
- with self.assertRaises(ValueError):
- bo.encode(ref, out)
-
- # Test with out buffer too big
- out = pyopencl.array.empty(bo.queue, (len(raw) + 10,), numpy.int8)
-
- compressed_array = bo.encode(ref, out)
-
- # Get size from returned array
- compressed_size = compressed_array.size
- self.assertEqual(compressed_size, len(raw))
-
- # Get data from out array, read it from bo object queue
- out_bo_queue = out.with_queue(bo.queue)
- compressed_stream = out_bo_queue.get().tobytes()[:compressed_size]
- self.assertEqual(raw, compressed_stream)
-
- def test_encode_to_bytes(self):
- """Test byte offset compression to bytes"""
- ref, raw = self._create_test_data(shape=(2713, 2719), nexcept=2729)
-
- try:
- bo = byte_offset.ByteOffset(profile=True)
- except (RuntimeError, pyopencl.RuntimeError) as err:
- logger.warning(err)
- raise err
-
- t0 = time.time()
- res_fabio = fabio.compression.compByteOffset(ref)
- t1 = time.time()
- compressed_stream = bo.encode_to_bytes(ref)
- t2 = time.time()
-
- self.assertEqual(raw, compressed_stream)
-
- logger.debug("Global execution time: fabio %.3fms, OpenCL: %.3fms.",
- 1000.0 * (t1 - t0),
- 1000.0 * (t2 - t1))
- bo.log_profile()
-
- def test_encode_to_bytes_from_array(self):
- """Test byte offset compression to bytes from a pyopencl array.
- """
- ref, raw = self._create_test_data(shape=(2713, 2719), nexcept=2729)
-
- try:
- bo = byte_offset.ByteOffset(profile=True)
- except (RuntimeError, pyopencl.RuntimeError) as err:
- logger.warning(err)
- raise err
-
- d_ref = pyopencl.array.to_device(
- bo.queue, ref.astype(numpy.int32).ravel())
-
- t0 = time.time()
- res_fabio = fabio.compression.compByteOffset(ref)
- t1 = time.time()
- compressed_stream = bo.encode_to_bytes(d_ref)
- t2 = time.time()
-
- self.assertEqual(raw, compressed_stream)
-
- logger.debug("Global execution time: fabio %.3fms, OpenCL: %.3fms.",
- 1000.0 * (t1 - t0),
- 1000.0 * (t2 - t1))
- bo.log_profile()
-
- def test_many_encode(self, ntest=10):
- """Test byte offset compression with many image"""
- shape = (991, 997)
- ref, raw = self._create_test_data(shape=shape, nexcept=0, lam=100)
-
- try:
- bo = byte_offset.ByteOffset(profile=False)
- except (RuntimeError, pyopencl.RuntimeError) as err:
- logger.warning(err)
- raise err
-
- bo_durations = []
-
- t0 = time.time()
- res_fabio = fabio.compression.compByteOffset(ref)
- t1 = time.time()
- compressed_stream = bo.encode_to_bytes(ref)
- t2 = time.time()
- bo_durations.append(1000.0 * (t2 - t1))
-
- self.assertEqual(raw, compressed_stream)
- logger.debug("Global execution time: fabio %.3fms, OpenCL: %.3fms.",
- 1000.0 * (t1 - t0),
- 1000.0 * (t2 - t1))
-
- for i in range(ntest):
- ref, raw = self._create_test_data(shape=shape, nexcept=2729, lam=200)
-
- t0 = time.time()
- res_fabio = fabio.compression.compByteOffset(ref)
- t1 = time.time()
- compressed_stream = bo.encode_to_bytes(ref)
- t2 = time.time()
- bo_durations.append(1000.0 * (t2 - t1))
-
- self.assertEqual(raw, compressed_stream)
- logger.debug("Global execution time: fabio %.3fms, OpenCL: %.3fms.",
- 1000.0 * (t1 - t0),
- 1000.0 * (t2 - t1))
-
- logger.debug("OpenCL execution time: Mean: %fms, Min: %fms, Max: %fms",
- numpy.mean(bo_durations),
- numpy.min(bo_durations),
- numpy.max(bo_durations))
-
-
-def suite():
- test_suite = unittest.TestSuite()
- test_suite.addTest(TestByteOffset("test_decompress"))
- test_suite.addTest(TestByteOffset("test_many_decompress"))
- test_suite.addTest(TestByteOffset("test_encode"))
- test_suite.addTest(TestByteOffset("test_encode_to_array"))
- test_suite.addTest(TestByteOffset("test_encode_to_bytes"))
- test_suite.addTest(TestByteOffset("test_encode_to_bytes_from_array"))
- test_suite.addTest(TestByteOffset("test_many_encode"))
- return test_suite
diff --git a/silx/opencl/common.py b/silx/opencl/common.py
deleted file mode 100644
index da966f6..0000000
--- a/silx/opencl/common.py
+++ /dev/null
@@ -1,691 +0,0 @@
-#!/usr/bin/env python
-# -*- coding: utf-8 -*-
-#
-# Project: S I L X project
-# https://github.com/silx-kit/silx
-#
-# Copyright (C) 2012-2021 European Synchrotron Radiation Facility, Grenoble, France
-#
-# Principal author: Jérôme Kieffer (Jerome.Kieffer@ESRF.eu)
-#
-# Permission is hereby granted, free of charge, to any person
-# obtaining a copy of this software and associated documentation
-# files (the "Software"), to deal in the Software without
-# restriction, including without limitation the rights to use,
-# copy, modify, merge, publish, distribute, sublicense, and/or sell
-# copies of the Software, and to permit persons to whom the
-# Software is furnished to do so, subject to the following
-# conditions:
-#
-# The above copyright notice and this permission notice shall be
-# included in all copies or substantial portions of the Software.
-#
-# THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
-# EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES
-# OF MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
-# NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT
-# HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY,
-# WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
-# FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR
-# OTHER DEALINGS IN THE SOFTWARE.
-#
-
-__author__ = "Jerome Kieffer"
-__contact__ = "Jerome.Kieffer@ESRF.eu"
-__license__ = "MIT"
-__copyright__ = "2012-2017 European Synchrotron Radiation Facility, Grenoble, France"
-__date__ = "30/11/2020"
-__status__ = "stable"
-__all__ = ["ocl", "pyopencl", "mf", "release_cl_buffers", "allocate_cl_buffers",
- "measure_workgroup_size", "kernel_workgroup_size"]
-
-import os
-import logging
-
-import numpy
-
-from .utils import get_opencl_code
-
-logger = logging.getLogger(__name__)
-
-if os.environ.get("SILX_OPENCL") in ["0", "False"]:
- logger.info("Use of OpenCL has been disabled from environment variable: SILX_OPENCL=0")
- pyopencl = None
-else:
- try:
- import pyopencl
- except ImportError:
- logger.warning("Unable to import pyOpenCl. Please install it from: https://pypi.org/project/pyopencl")
- pyopencl = None
- else:
- try:
- pyopencl.get_platforms()
- except pyopencl.LogicError:
- logger.warning("The module pyOpenCL has been imported but can't be used here")
- pyopencl = None
- else:
- import pyopencl.array as array
- mf = pyopencl.mem_flags
-
-if pyopencl is None:
-
- # Define default mem flags
- class mf(object):
- WRITE_ONLY = 1
- READ_ONLY = 1
- READ_WRITE = 1
-else:
- mf = pyopencl.mem_flags
-
-FLOP_PER_CORE = {"GPU": 64, # GPU, Fermi at least perform 64 flops per cycle/multicore, G80 were at 24 or 48 ...
- "CPU": 4, # CPU, at least intel's have 4 operation per cycle
- "ACC": 8} # ACC: the Xeon-phi (MIC) appears to be able to process 8 Flops per hyperthreaded-core
-
-# Sources : https://en.wikipedia.org/wiki/CUDA
-NVIDIA_FLOP_PER_CORE = {(1, 0): 24, # Guessed !
- (1, 1): 24, # Measured on G98 [Quadro NVS 295]
- (1, 2): 24, # Guessed !
- (1, 3): 24, # measured on a GT285 (GT200)
- (2, 0): 64, # Measured on a 580 (GF110)
- (2, 1): 96, # Measured on Quadro2000 GF106GL
- (3, 0): 384, # Guessed!
- (3, 5): 384, # Measured on K20
- (3, 7): 384, # K80: Guessed!
- (5, 0): 256, # Maxwell 4 warps/SM 2 flops/ CU
- (5, 2): 256, # Titan-X
- (5, 3): 256, # TX1
- (6, 0): 128, # GP100
- (6, 1): 128, # GP104
- (6, 2): 128, # ?
- (7, 0): 128, # Volta # measured on Telsa V100
- (7, 1): 128, # Volta ?
- }
-
-AMD_FLOP_PER_CORE = 160 # Measured on a M7820 10 core, 700MHz 1120GFlops
-
-
-class Device(object):
- """
- Simple class that contains the structure of an OpenCL device
- """
-
- def __init__(self, name="None", dtype=None, version=None, driver_version=None,
- extensions="", memory=None, available=None,
- cores=None, frequency=None, flop_core=None, idx=0, workgroup=1):
- """
- Simple container with some important data for the OpenCL device description.
-
- :param name: name of the device
- :param dtype: device type: CPU/GPU/ACC...
- :param version: driver version
- :param driver_version:
- :param extensions: List of opencl extensions
- :param memory: maximum memory available on the device
- :param available: is the device deactivated or not
- :param cores: number of SM/cores
- :param frequency: frequency of the device
- :param flop_core: Flopating Point operation per core per cycle
- :param idx: index of the device within the platform
- :param workgroup: max workgroup size
- """
- self.name = name.strip()
- self.type = dtype
- self.version = version
- self.driver_version = driver_version
- self.extensions = extensions.split()
- self.memory = memory
- self.available = available
- self.cores = cores
- self.frequency = frequency
- self.id = idx
- self.max_work_group_size = workgroup
- if not flop_core:
- flop_core = FLOP_PER_CORE.get(dtype, 1)
- if cores and frequency:
- self.flops = cores * frequency * flop_core
- else:
- self.flops = flop_core
-
- def __repr__(self):
- return "%s" % self.name
-
- def pretty_print(self):
- """
- Complete device description
-
- :return: string
- """
- lst = ["Name\t\t:\t%s" % self.name,
- "Type\t\t:\t%s" % self.type,
- "Memory\t\t:\t%.3f MB" % (self.memory / 2.0 ** 20),
- "Cores\t\t:\t%s CU" % self.cores,
- "Frequency\t:\t%s MHz" % self.frequency,
- "Speed\t\t:\t%.3f GFLOPS" % (self.flops / 1000.),
- "Version\t\t:\t%s" % self.version,
- "Available\t:\t%s" % self.available]
- return os.linesep.join(lst)
-
- def set_unavailable(self):
- """Use this method to flag a faulty device
- """
- self.available = False
-
-
-class Platform(object):
- """
- Simple class that contains the structure of an OpenCL platform
- """
-
- def __init__(self, name="None", vendor="None", version=None, extensions=None, idx=0):
- """
- Class containing all descriptions of a platform and all devices description within that platform.
-
- :param name: platform name
- :param vendor: name of the brand/vendor
- :param version:
- :param extensions: list of the extension provided by the platform to all of its devices
- :param idx: index of the platform
- """
- self.name = name.strip()
- self.vendor = vendor.strip()
- self.version = version
- self.extensions = extensions.split()
- self.devices = []
- self.id = idx
-
- def __repr__(self):
- return "%s" % self.name
-
- def add_device(self, device):
- """
- Add new device to the platform
-
- :param device: Device instance
- """
- self.devices.append(device)
-
- def get_device(self, key):
- """
- Return a device according to key
-
- :param key: identifier for a device, either it's id (int) or it's name
- :type key: int or str
- """
- out = None
- try:
- devid = int(key)
- except ValueError:
- for a_dev in self.devices:
- if a_dev.name == key:
- out = a_dev
- else:
- if len(self.devices) > devid > 0:
- out = self.devices[devid]
- return out
-
-
-def _measure_workgroup_size(device_or_context, fast=False):
- """Mesure the maximal work group size of the given device
-
- DEPRECATED since not perfectly correct !
-
- :param device_or_context: instance of pyopencl.Device or pyopencl.Context
- or 2-tuple (platformid,deviceid)
- :param fast: ask the kernel the valid value, don't probe it
- :return: maximum size for the workgroup
- """
- if isinstance(device_or_context, pyopencl.Device):
- try:
- ctx = pyopencl.Context(devices=[device_or_context])
- except pyopencl._cl.LogicError as error:
- platform = device_or_context.platform
- platformid = pyopencl.get_platforms().index(platform)
- deviceid = platform.get_devices().index(device_or_context)
- ocl.platforms[platformid].devices[deviceid].set_unavailable()
- raise RuntimeError("Unable to create context on %s/%s: %s" % (platform, device_or_context, error))
- else:
- device = device_or_context
- elif isinstance(device_or_context, pyopencl.Context):
- ctx = device_or_context
- device = device_or_context.devices[0]
- elif isinstance(device_or_context, (tuple, list)) and len(device_or_context) == 2:
- ctx = ocl.create_context(platformid=device_or_context[0],
- deviceid=device_or_context[1])
- device = ctx.devices[0]
- else:
- raise RuntimeError("""given parameter device_or_context is not an
- instanciation of a device or a context""")
- shape = device.max_work_group_size
- # get the context
-
- assert ctx is not None
- queue = pyopencl.CommandQueue(ctx)
-
- max_valid_wg = 1
- data = numpy.random.random(shape).astype(numpy.float32)
- d_data = pyopencl.array.to_device(queue, data)
- d_data_1 = pyopencl.array.empty_like(d_data)
- d_data_1.fill(numpy.float32(1.0))
-
- program = pyopencl.Program(ctx, get_opencl_code("addition")).build()
- if fast:
- max_valid_wg = program.addition.get_work_group_info(pyopencl.kernel_work_group_info.WORK_GROUP_SIZE, device)
- else:
- maxi = int(round(numpy.log2(shape)))
- for i in range(maxi + 1):
- d_res = pyopencl.array.empty_like(d_data)
- wg = 1 << i
- try:
- evt = program.addition(
- queue, (shape,), (wg,),
- d_data.data, d_data_1.data, d_res.data, numpy.int32(shape))
- evt.wait()
- except Exception as error:
- logger.info("%s on device %s for WG=%s/%s", error, device.name, wg, shape)
- program = queue = d_res = d_data_1 = d_data = None
- break
- else:
- res = d_res.get()
- good = numpy.allclose(res, data + 1)
- if good:
- if wg > max_valid_wg:
- max_valid_wg = wg
- else:
- logger.warning("ArithmeticError on %s for WG=%s/%s", wg, device.name, shape)
-
- return max_valid_wg
-
-
-def _is_nvidia_gpu(vendor, devtype):
- return (vendor == "NVIDIA Corporation") and (devtype == "GPU")
-
-
-class OpenCL(object):
- """
- Simple class that wraps the structure ocl_tools_extended.h
-
- This is a static class.
- ocl should be the only instance and shared among all python modules.
- """
-
- platforms = []
- nb_devices = 0
- context_cache = {} # key: 2-tuple of int, value: context
- if pyopencl:
- platform = device = pypl = devtype = extensions = pydev = None
- for idx, platform in enumerate(pyopencl.get_platforms()):
- pypl = Platform(platform.name, platform.vendor, platform.version, platform.extensions, idx)
- for idd, device in enumerate(platform.get_devices()):
- ####################################################
- # Nvidia does not report int64 atomics (we are using) ...
- # this is a hack around as any nvidia GPU with double-precision supports int64 atomics
- ####################################################
- extensions = device.extensions
- if (pypl.vendor == "NVIDIA Corporation") and ('cl_khr_fp64' in extensions):
- extensions += ' cl_khr_int64_base_atomics cl_khr_int64_extended_atomics'
- try:
- devtype = pyopencl.device_type.to_string(device.type).upper()
- except ValueError:
- # pocl does not describe itself as a CPU !
- devtype = "CPU"
- if len(devtype) > 3:
- if "GPU" in devtype:
- devtype = "GPU"
- elif "ACC" in devtype:
- devtype = "ACC"
- elif "CPU" in devtype:
- devtype = "CPU"
- else:
- devtype = devtype[:3]
- if _is_nvidia_gpu(device.vendor, devtype) and ("compute_capability_major_nv" in dir(device)):
- try:
- comput_cap = device.compute_capability_major_nv, device.compute_capability_minor_nv
- except pyopencl.LogicError:
- flop_core = FLOP_PER_CORE["GPU"]
- else:
- flop_core = NVIDIA_FLOP_PER_CORE.get(comput_cap, FLOP_PER_CORE["GPU"])
- elif (pypl.vendor == "Advanced Micro Devices, Inc.") and (devtype == "GPU"):
- flop_core = AMD_FLOP_PER_CORE
- elif devtype == "CPU":
- flop_core = FLOP_PER_CORE.get(devtype, 1)
- else:
- flop_core = 1
- workgroup = device.max_work_group_size
- if (devtype == "CPU") and (pypl.vendor == "Apple"):
- logger.info("For Apple's OpenCL on CPU: Measuring actual valid max_work_goup_size.")
- workgroup = _measure_workgroup_size(device, fast=True)
- if (devtype == "GPU") and os.environ.get("GPU") == "False":
- # Environment variable to disable GPU devices
- continue
- pydev = Device(device.name, devtype, device.version, device.driver_version, extensions,
- device.global_mem_size, bool(device.available), device.max_compute_units,
- device.max_clock_frequency, flop_core, idd, workgroup)
- pypl.add_device(pydev)
- nb_devices += 1
- platforms.append(pypl)
- del platform, device, pypl, devtype, extensions, pydev
-
- def __repr__(self):
- out = ["OpenCL devices:"]
- for platformid, platform in enumerate(self.platforms):
- deviceids = ["(%s,%s) %s" % (platformid, deviceid, dev.name)
- for deviceid, dev in enumerate(platform.devices)]
- out.append("[%s] %s: " % (platformid, platform.name) + ", ".join(deviceids))
- return os.linesep.join(out)
-
- def get_platform(self, key):
- """
- Return a platform according
-
- :param key: identifier for a platform, either an Id (int) or it's name
- :type key: int or str
- """
- out = None
- try:
- platid = int(key)
- except ValueError:
- for a_plat in self.platforms:
- if a_plat.name == key:
- out = a_plat
- else:
- if len(self.platforms) > platid > 0:
- out = self.platforms[platid]
- return out
-
- def select_device(self, dtype="ALL", memory=None, extensions=None, best=True, **kwargs):
- """
- Select a device based on few parameters (at the end, keep the one with most memory)
-
- :param dtype: "gpu" or "cpu" or "all" ....
- :param memory: minimum amount of memory (int)
- :param extensions: list of extensions to be present
- :param best: shall we look for the
- :returns: A tuple of plateform ID and device ID, else None if nothing
- found
- """
- if extensions is None:
- extensions = []
- if "type" in kwargs:
- dtype = kwargs["type"].upper()
- else:
- dtype = dtype.upper()
- if len(dtype) > 3:
- dtype = dtype[:3]
- best_found = None
- for platformid, platform in enumerate(self.platforms):
- for deviceid, device in enumerate(platform.devices):
- if not device.available:
- continue
- if (dtype in ["ALL", "DEF"]) or (device.type == dtype):
- if (memory is None) or (memory <= device.memory):
- found = True
- for ext in extensions:
- if ext not in device.extensions:
- found = False
- if found:
- if not best:
- return platformid, deviceid
- else:
- if not best_found:
- best_found = platformid, deviceid, device.flops
- elif best_found[2] < device.flops:
- best_found = platformid, deviceid, device.flops
- if best_found:
- return best_found[0], best_found[1]
-
- # Nothing found
- return None
-
- def create_context(self, devicetype="ALL", useFp64=False, platformid=None,
- deviceid=None, cached=True, memory=None, extensions=None):
- """
- Choose a device and initiate a context.
-
- Devicetypes can be GPU,gpu,CPU,cpu,DEF,ACC,ALL.
- Suggested are GPU,CPU.
- For each setting to work there must be such an OpenCL device and properly installed.
- E.g.: If Nvidia driver is installed, GPU will succeed but CPU will fail.
- The AMD SDK kit is required for CPU via OpenCL.
- :param devicetype: string in ["cpu","gpu", "all", "acc"]
- :param useFp64: boolean specifying if double precision will be used: deprecated use extensions=["cl_khr_fp64"]
- :param platformid: integer
- :param deviceid: integer
- :param cached: True if we want to cache the context
- :param memory: minimum amount of memory of the device
- :param extensions: list of extensions to be present
- :return: OpenCL context on the selected device
- """
- if extensions is None:
- extensions = []
- if useFp64:
- logger.warning("Deprecation: please select your device using the extension name!, i.e. extensions=['cl_khr_fp64']")
- extensions.append('cl_khr_fp64')
-
- if (platformid is not None) and (deviceid is not None):
- platformid = int(platformid)
- deviceid = int(deviceid)
- elif "PYOPENCL_CTX" in os.environ:
- pyopencl_ctx = [int(i) if i.isdigit() else 0 for i in os.environ["PYOPENCL_CTX"].split(":")]
- pyopencl_ctx += [0] * (2 - len(pyopencl_ctx)) # pad with 0
- platformid, deviceid = pyopencl_ctx
- else:
- ids = ocl.select_device(type=devicetype, extensions=extensions)
- if ids:
- platformid, deviceid = ids
- ctx = None
- if (platformid is not None) and (deviceid is not None):
- if (platformid, deviceid) in self.context_cache:
- ctx = self.context_cache[(platformid, deviceid)]
- else:
- try:
- ctx = pyopencl.Context(devices=[pyopencl.get_platforms()[platformid].get_devices()[deviceid]])
- except pyopencl._cl.LogicError as error:
- self.platforms[platformid].devices[deviceid].set_unavailable()
- logger.warning("Unable to create context on %s/%s: %s", platformid, deviceid, error)
- ctx = None
- else:
- if cached:
- self.context_cache[(platformid, deviceid)] = ctx
- if ctx is None:
- logger.warning("Last chance to get an OpenCL device ... probably not the one requested")
- ctx = pyopencl.create_some_context(interactive=False)
- return ctx
-
- def device_from_context(self, context):
- """
- Retrieves the Device from the context
-
- :param context: OpenCL context
- :return: instance of Device
- """
- odevice = context.devices[0]
- oplat = odevice.platform
- device_id = oplat.get_devices().index(odevice)
- platform_id = pyopencl.get_platforms().index(oplat)
- return self.platforms[platform_id].devices[device_id]
-
-
-if pyopencl:
- ocl = OpenCL()
- if ocl.nb_devices == 0:
- ocl = None
-else:
- ocl = None
-
-
-def release_cl_buffers(cl_buffers):
- """
- :param cl_buffers: the buffer you want to release
- :type cl_buffers: dict(str, pyopencl.Buffer)
-
- This method release the memory of the buffers store in the dict
- """
- for key, buffer_ in cl_buffers.items():
- if buffer_ is not None:
- if isinstance(buffer_, pyopencl.array.Array):
- try:
- buffer_.data.release()
- except pyopencl.LogicError:
- logger.error("Error while freeing buffer %s", key)
- else:
- try:
- buffer_.release()
- except pyopencl.LogicError:
- logger.error("Error while freeing buffer %s", key)
- cl_buffers[key] = None
- return cl_buffers
-
-
-def allocate_cl_buffers(buffers, device=None, context=None):
- """
- :param buffers: the buffers info use to create the pyopencl.Buffer
- :type buffers: list(std, flag, numpy.dtype, int)
- :param device: one of the context device
- :param context: opencl contextdevice
- :return: a dict containing the instanciated pyopencl.Buffer
- :rtype: dict(str, pyopencl.Buffer)
-
- This method instanciate the pyopencl.Buffer from the buffers
- description.
- """
- mem = {}
- if device is None:
- device = ocl.device_from_context(context)
-
- # check if enough memory is available on the device
- ualloc = 0
- for _, _, dtype, size in buffers:
- ualloc += numpy.dtype(dtype).itemsize * size
- memory = device.memory
- logger.info("%.3fMB are needed on device which has %.3fMB",
- ualloc / 1.0e6, memory / 1.0e6)
- if ualloc >= memory:
- memError = "Fatal error in allocate_buffers."
- memError += "Not enough device memory for buffers"
- memError += "(%lu requested, %lu available)" % (ualloc, memory)
- raise MemoryError(memError) # noqa
-
- # do the allocation
- try:
- for name, flag, dtype, size in buffers:
- mem[name] = pyopencl.Buffer(context, flag,
- numpy.dtype(dtype).itemsize * size)
- except pyopencl.MemoryError as error:
- release_cl_buffers(mem)
- raise MemoryError(error)
-
- return mem
-
-
-def allocate_texture(ctx, shape, hostbuf=None, support_1D=False):
- """
- Allocate an OpenCL image ("texture").
-
- :param ctx: OpenCL context
- :param shape: Shape of the image. Note that pyopencl and OpenCL < 1.2
- do not support 1D images, so 1D images are handled as 2D with one row
- :param support_1D: force the image to be 1D if the shape has only one dim
- """
- if len(shape) == 1 and not(support_1D):
- shape = (1,) + shape
- return pyopencl.Image(
- ctx,
- pyopencl.mem_flags.READ_ONLY | pyopencl.mem_flags.USE_HOST_PTR,
- pyopencl.ImageFormat(
- pyopencl.channel_order.INTENSITY,
- pyopencl.channel_type.FLOAT
- ),
- hostbuf=numpy.zeros(shape[::-1], dtype=numpy.float32)
- )
-
-
-def check_textures_availability(ctx):
- """
- Check whether textures are supported on the current OpenCL context.
-
- :param ctx: OpenCL context
- """
- try:
- dummy_texture = allocate_texture(ctx, (16, 16))
- # Need to further access some attributes (pocl)
- dummy_height = dummy_texture.height
- textures_available = True
- del dummy_texture, dummy_height
- except (pyopencl.RuntimeError, pyopencl.LogicError):
- textures_available = False
- # Nvidia Fermi GPUs (compute capability 2.X) do not support opencl read_imagef
- # There is no way to detect this until a kernel is compiled
- try:
- cc = ctx.devices[0].compute_capability_major_nv
- textures_available &= (cc >= 3)
- except (pyopencl.LogicError, AttributeError): # probably not a Nvidia GPU
- pass
- #
- return textures_available
-
-
-def measure_workgroup_size(device):
- """Measure the actual size of the workgroup
-
- :param device: device or context or 2-tuple with indexes
- :return: the actual measured workgroup size
-
- if device is "all", returns a dict with all devices with their ids as keys.
- """
- if (ocl is None) or (device is None):
- return None
-
- if isinstance(device, tuple) and (len(device) == 2):
- # this is probably a tuple (platformid, deviceid)
- device = ocl.create_context(platformid=device[0], deviceid=device[1])
-
- if device == "all":
- res = {}
- for pid, platform in enumerate(ocl.platforms):
- for did, _devices in enumerate(platform.devices):
- tup = (pid, did)
- res[tup] = measure_workgroup_size(tup)
- else:
- res = _measure_workgroup_size(device)
- return res
-
-
-def query_kernel_info(program, kernel, what="WORK_GROUP_SIZE"):
- """Extract the compile time information from a kernel
-
- :param program: OpenCL program
- :param kernel: kernel or name of the kernel
- :param what: what is the query about ?
- :return: int or 3-int for the workgroup size.
-
- Possible information available are:
- * 'COMPILE_WORK_GROUP_SIZE': Returns the work-group size specified inside the kernel (__attribute__((reqd_work_gr oup_size(X, Y, Z))))
- * 'GLOBAL_WORK_SIZE': maximum global size that can be used to execute a kernel #OCL2.1!
- * 'LOCAL_MEM_SIZE': amount of local memory in bytes being used by the kernel
- * 'PREFERRED_WORK_GROUP_SIZE_MULTIPLE': preferred multiple of workgroup size for launch. This is a performance hint.
- * 'PRIVATE_MEM_SIZE' Returns the minimum amount of private memory, in bytes, used by each workitem in the kernel
- * 'WORK_GROUP_SIZE': maximum work-group size that can be used to execute a kernel on a specific device given by device
-
- Further information on:
- https://www.khronos.org/registry/OpenCL/sdk/1.1/docs/man/xhtml/clGetKernelWorkGroupInfo.html
-
- """
- assert isinstance(program, pyopencl.Program)
- if not isinstance(kernel, pyopencl.Kernel):
- kernel_name = kernel
- assert kernel in (k.function_name for k in program.all_kernels()), "the kernel exists"
- kernel = program.__getattr__(kernel_name)
-
- device = program.devices[0]
- query_wg = getattr(pyopencl.kernel_work_group_info, what)
- return kernel.get_work_group_info(query_wg, device)
-
-
-def kernel_workgroup_size(program, kernel):
- """Extract the compile time maximum workgroup size
-
- :param program: OpenCL program
- :param kernel: kernel or name of the kernel
- :return: the maximum acceptable workgroup size for the given kernel
- """
- return query_kernel_info(program, kernel, what="WORK_GROUP_SIZE")
diff --git a/silx/opencl/convolution.py b/silx/opencl/convolution.py
deleted file mode 100644
index 15ef931..0000000
--- a/silx/opencl/convolution.py
+++ /dev/null
@@ -1,442 +0,0 @@
-#!/usr/bin/env python
-# coding: utf-8
-# /*##########################################################################
-#
-# Copyright (c) 2019 European Synchrotron Radiation Facility
-#
-# Permission is hereby granted, free of charge, to any person obtaining a copy
-# of this software and associated documentation files (the "Software"), to deal
-# in the Software without restriction, including without limitation the rights
-# to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
-# copies of the Software, and to permit persons to whom the Software is
-# furnished to do so, subject to the following conditions:
-#
-# The above copyright notice and this permission notice shall be included in
-# all copies or substantial portions of the Software.
-#
-# THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
-# IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
-# FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
-# AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
-# LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
-# OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN
-# THE SOFTWARE.
-#
-# ###########################################################################*/
-"""Module for convolution on CPU/GPU."""
-
-from __future__ import absolute_import, print_function, with_statement, division
-
-__authors__ = ["P. Paleo"]
-__license__ = "MIT"
-__date__ = "01/08/2019"
-
-import numpy as np
-from copy import copy # python2
-from .common import pyopencl as cl
-import pyopencl.array as parray
-from .processing import OpenclProcessing, EventDescription
-from .utils import ConvolutionInfos
-
-class Convolution(OpenclProcessing):
- """
- A class for performing convolution on CPU/GPU with OpenCL.
- """
-
- def __init__(self, shape, kernel, axes=None, mode=None, ctx=None,
- devicetype="all", platformid=None, deviceid=None,
- profile=False, extra_options=None):
- """Constructor of OpenCL Convolution.
-
- :param shape: shape of the array.
- :param kernel: convolution kernel (1D, 2D or 3D).
- :param axes: axes along which the convolution is performed,
- for batched convolutions.
- :param mode: Boundary handling mode. Available modes are:
- "reflect": cba|abcd|dcb
- "nearest": aaa|abcd|ddd
- "wrap": bcd|abcd|abc
- "constant": 000|abcd|000
- Default is "reflect".
- :param ctx: actual working context, left to None for automatic
- initialization from device type or platformid/deviceid
- :param devicetype: type of device, can be "CPU", "GPU", "ACC" or "ALL"
- :param platformid: integer with the platform_identifier, as given by
- clinfo
- :param deviceid: Integer with the device identifier, as given by clinfo
- :param profile: switch on profiling to be able to profile at the kernel
- level, store profiling elements (makes code slightly
- slower)
- :param extra_options: Advanced options (dict). Current options are:
- "allocate_input_array": True,
- "allocate_output_array": True,
- "allocate_tmp_array": True,
- "dont_use_textures": False,
- """
- OpenclProcessing.__init__(self, ctx=ctx, devicetype=devicetype,
- platformid=platformid, deviceid=deviceid,
- profile=profile)
-
- self._configure_extra_options(extra_options)
- self._determine_use_case(shape, kernel, axes)
- self._allocate_memory(mode)
- self._init_kernels()
-
- def _configure_extra_options(self, extra_options):
- self.extra_options = {
- "allocate_input_array": True,
- "allocate_output_array": True,
- "allocate_tmp_array": True,
- "dont_use_textures": False,
- }
- extra_opts = extra_options or {}
- self.extra_options.update(extra_opts)
- self.use_textures = not(self.extra_options["dont_use_textures"])
- self.use_textures &= self.check_textures_availability()
-
- def _get_dimensions(self, shape, kernel):
- self.shape = shape
- self.data_ndim = self._check_dimensions(shape=shape, name="Data")
- self.kernel_ndim = self._check_dimensions(arr=kernel, name="Kernel")
- Nx = shape[-1]
- if self.data_ndim >= 2:
- Ny = shape[-2]
- else:
- Ny = 1
- if self.data_ndim >= 3:
- Nz = shape[-3]
- else:
- Nz = 1
- self.Nx = np.int32(Nx)
- self.Ny = np.int32(Ny)
- self.Nz = np.int32(Nz)
-
- def _determine_use_case(self, shape, kernel, axes):
- """
- Determine the convolution use case from the input/kernel shape, and axes.
- """
- self._get_dimensions(shape, kernel)
- if self.kernel_ndim > self.data_ndim:
- raise ValueError("Kernel dimensions cannot exceed data dimensions")
- data_ndim = self.data_ndim
- kernel_ndim = self.kernel_ndim
- self.kernel = kernel.astype("f")
-
- convol_infos = ConvolutionInfos()
- k = (data_ndim, kernel_ndim)
- if k not in convol_infos.use_cases:
- raise ValueError(
- "Cannot find a use case for data ndim = %d and kernel ndim = %d"
- % (data_ndim, kernel_ndim)
- )
- possible_use_cases = convol_infos.use_cases[k]
-
- self.use_case_name = None
- for uc_name, uc_params in possible_use_cases.items():
- if axes in convol_infos.allowed_axes[uc_name]:
- self.use_case_name = uc_name
- self.use_case_desc = uc_params["name"]
- #~ self.use_case_kernels = uc_params["kernels"].copy()
- self.use_case_kernels = copy(uc_params["kernels"]) # TODO use the above line once we get rid of python2
- if self.use_case_name is None:
- raise ValueError(
- "Cannot find a use case for data ndim = %d, kernel ndim = %d and axes=%s"
- % (data_ndim, kernel_ndim, str(axes))
- )
- # TODO implement this use case
- if self.use_case_name == "batched_separable_2D_1D_3D":
- raise NotImplementedError(
- "The use case %s is not implemented"
- % self.use_case_name
- )
- #
- self.axes = axes
- # Replace "axes=None" with an actual value (except for ND-ND)
- allowed_axes = convol_infos.allowed_axes[self.use_case_name]
- if len(allowed_axes) > 1:
- # The default choice might impact perfs
- self.axes = allowed_axes[0] or allowed_axes[1]
- self.separable = self.use_case_name.startswith("separable")
- self.batched = self.use_case_name.startswith("batched")
- # Update kernel names when using textures
- if self.use_textures:
- for i, kern_name in enumerate(self.use_case_kernels):
- self.use_case_kernels[i] = kern_name + "_tex"
-
- def _allocate_memory(self, mode):
- self.mode = mode or "reflect"
- option_array_names = {
- "allocate_input_array": "data_in",
- "allocate_output_array": "data_out",
- "allocate_tmp_array": "data_tmp",
- }
- # Nonseparable transforms do not need tmp array
- if not(self.separable):
- self.extra_options["allocate_tmp_array"] = False
- # Allocate arrays
- for option_name, array_name in option_array_names.items():
- if self.extra_options[option_name]:
- value = parray.empty(self.queue, self.shape, np.float32)
- value.fill(np.float32(0.0))
- else:
- value = None
- setattr(self, array_name, value)
-
- if isinstance(self.kernel, np.ndarray):
- self.d_kernel = parray.to_device(self.queue, self.kernel)
- else:
- if not(isinstance(self.kernel, parray.Array)):
- raise ValueError("kernel must be either numpy array or pyopencl array")
- self.d_kernel = self.kernel
- self._old_input_ref = None
- self._old_output_ref = None
- if self.use_textures:
- self._allocate_textures()
- self._c_modes_mapping = {
- "periodic": 2,
- "wrap": 2,
- "nearest": 1,
- "replicate": 1,
- "reflect": 0,
- "constant": 3,
- }
- mp = self._c_modes_mapping
- if self.mode.lower() not in mp:
- raise ValueError(
- """
- Mode %s is not available for textures. Available modes are:
- %s
- """
- % (self.mode, str(mp.keys()))
- )
- # TODO
- if not(self.use_textures) and self.mode.lower() == "constant":
- raise NotImplementedError(
- "mode='constant' is not implemented without textures yet"
- )
- #
- self._c_conv_mode = mp[self.mode]
-
- def _allocate_textures(self):
- self.data_in_tex = self.allocate_texture(self.shape)
- self.d_kernel_tex = self.allocate_texture(self.kernel.shape)
- self.transfer_to_texture(self.d_kernel, self.d_kernel_tex)
-
- def _init_kernels(self):
- if self.kernel_ndim > 1:
- if np.abs(np.diff(self.kernel.shape)).max() > 0:
- raise NotImplementedError(
- "Non-separable convolution with non-square kernels is not implemented yet"
- )
- compile_options = [str("-DUSED_CONV_MODE=%d" % self._c_conv_mode)]
- if self.use_textures:
- kernel_files = ["convolution_textures.cl"]
- compile_options.extend([
- str("-DIMAGE_DIMS=%d" % self.data_ndim),
- str("-DFILTER_DIMS=%d" % self.kernel_ndim),
- ])
- d_kernel_ref = self.d_kernel_tex
- else:
- kernel_files = ["convolution.cl"]
- d_kernel_ref = self.d_kernel.data
- self.compile_kernels(
- kernel_files=kernel_files,
- compile_options=compile_options
- )
- self.ndrange = self.shape[::-1]
- self.wg = None
- kernel_args = [
- self.queue,
- self.ndrange, self.wg,
- None,
- None,
- d_kernel_ref,
- np.int32(self.kernel.shape[0]),
- self.Nx, self.Ny, self.Nz
- ]
- if self.kernel_ndim == 2:
- kernel_args.insert(6, np.int32(self.kernel.shape[1]))
- if self.kernel_ndim == 3:
- kernel_args.insert(6, np.int32(self.kernel.shape[2]))
- kernel_args.insert(7, np.int32(self.kernel.shape[1]))
- self.kernel_args = tuple(kernel_args)
- # If self.data_tmp is allocated, separable transforms can be performed
- # by a series of batched transforms, without any copy, by swapping refs.
- self.swap_pattern = None
- if self.separable:
- if self.data_tmp is not None:
- self.swap_pattern = {
- 2: [
- ("data_in", "data_tmp"),
- ("data_tmp", "data_out")
- ],
- 3: [
- ("data_in", "data_out"),
- ("data_out", "data_tmp"),
- ("data_tmp", "data_out"),
- ],
- }
- else:
- # TODO
- raise NotImplementedError("For now, data_tmp has to be allocated")
-
- def _get_swapped_arrays(self, i):
- """
- Get the input and output arrays to use when using a "swap pattern".
- Swapping refs enables to avoid copies between temp. array and output.
- For example, a separable 2D->1D convolution on 2D data reads:
- data_tmp = convol(data_input, kernel, axis=1) # step i=0
- data_out = convol(data_tmp, kernel, axis=0) # step i=1
-
- :param i: current step number of the separable convolution
- """
- if self.use_textures:
- # copy is needed when using texture, as data_out is a Buffer
- if i > 0:
- self.transfer_to_texture(self.data_out, self.data_in_tex)
- return self.data_in_tex, self.data_out
- n_batchs = len(self.axes)
- in_ref, out_ref = self.swap_pattern[n_batchs][i]
- d_in = getattr(self, in_ref)
- d_out = getattr(self, out_ref)
- return d_in, d_out
-
- def _configure_kernel_args(self, opencl_kernel_args, input_ref, output_ref):
- # TODO more elegant
- if isinstance(input_ref, parray.Array):
- input_ref = input_ref.data
- if isinstance(output_ref, parray.Array):
- output_ref = output_ref.data
- if input_ref is not None or output_ref is not None:
- opencl_kernel_args = list(opencl_kernel_args)
- if input_ref is not None:
- opencl_kernel_args[3] = input_ref
- if output_ref is not None:
- opencl_kernel_args[4] = output_ref
- opencl_kernel_args = tuple(opencl_kernel_args)
- return opencl_kernel_args
-
- @staticmethod
- def _check_dimensions(arr=None, shape=None, name="", dim_min=1, dim_max=3):
- if shape is not None:
- ndim = len(shape)
- elif arr is not None:
- ndim = arr.ndim
- else:
- raise ValueError("Please provide either arr= or shape=")
- if ndim < dim_min or ndim > dim_max:
- raise ValueError("%s dimensions should be between %d and %d"
- % (name, dim_min, dim_max)
- )
- return ndim
-
- def _check_array(self, arr):
- # TODO allow cl.Buffer
- if not(isinstance(arr, parray.Array) or isinstance(arr, np.ndarray)):
- raise TypeError("Expected either pyopencl.array.Array or numpy.ndarray")
- # TODO composition with ImageProcessing/cast
- if arr.dtype != np.float32:
- raise TypeError("Data must be float32")
- if arr.shape != self.shape:
- raise ValueError("Expected data shape = %s" % str(self.shape))
-
- def _set_arrays(self, array, output=None):
- # When using textures: copy
- if self.use_textures:
- self.transfer_to_texture(array, self.data_in_tex)
- data_in_ref = self.data_in_tex
- else:
- # Otherwise: copy H->D or update references.
- if isinstance(array, np.ndarray):
- self.data_in[:] = array[:]
- else:
- self._old_input_ref = self.data_in
- self.data_in = array
- data_in_ref = self.data_in
- if output is not None:
- if not(isinstance(output, np.ndarray)):
- self._old_output_ref = self.data_out
- self.data_out = output
- # Update OpenCL kernel arguments with new array references
- self.kernel_args = self._configure_kernel_args(
- self.kernel_args,
- data_in_ref,
- self.data_out
- )
-
- def _separable_convolution(self):
- assert len(self.axes) == len(self.use_case_kernels)
- # Separable: one kernel call per data dimension
- for i, axis in enumerate(self.axes):
- in_ref, out_ref = self._get_swapped_arrays(i)
- self._batched_convolution(axis, input_ref=in_ref, output_ref=out_ref)
-
- def _batched_convolution(self, axis, input_ref=None, output_ref=None):
- # Batched: one kernel call in total
- opencl_kernel = self.kernels.get_kernel(self.use_case_kernels[axis])
- opencl_kernel_args = self._configure_kernel_args(
- self.kernel_args,
- input_ref,
- output_ref
- )
- ev = opencl_kernel(*opencl_kernel_args)
- if self.profile:
- self.events.append(EventDescription("batched convolution", ev))
-
- def _nd_convolution(self):
- assert len(self.use_case_kernels) == 1
- opencl_kernel = self.kernels.get_kernel(self.use_case_kernels[0])
- ev = opencl_kernel(*self.kernel_args)
- if self.profile:
- self.events.append(EventDescription("ND convolution", ev))
-
- def _recover_arrays_references(self):
- if self._old_input_ref is not None:
- self.data_in = self._old_input_ref
- self._old_input_ref = None
- if self._old_output_ref is not None:
- self.data_out = self._old_output_ref
- self._old_output_ref = None
- self.kernel_args = self._configure_kernel_args(
- self.kernel_args,
- self.data_in,
- self.data_out
- )
-
- def _get_output(self, output):
- if output is None:
- res = self.data_out.get()
- else:
- res = output
- if isinstance(output, np.ndarray):
- output[:] = self.data_out[:]
- self._recover_arrays_references()
- return res
-
- def convolve(self, array, output=None):
- """
- Convolve an array with the class kernel.
-
- :param array: Input array. Can be numpy.ndarray or pyopencl.array.Array.
- :param output: Output array. Can be numpy.ndarray or pyopencl.array.Array.
- """
- self._check_array(array)
- self._set_arrays(array, output=output)
- if self.axes is not None:
- if self.separable:
- self._separable_convolution()
- elif self.batched:
- assert len(self.axes) == 1
- self._batched_convolution(self.axes[0])
- # else: ND-ND convol
- else:
- # ND-ND convol
- self._nd_convolution()
-
- res = self._get_output(output)
- return res
-
-
- __call__ = convolve
-
-
diff --git a/silx/opencl/image.py b/silx/opencl/image.py
deleted file mode 100644
index 65e2d5e..0000000
--- a/silx/opencl/image.py
+++ /dev/null
@@ -1,387 +0,0 @@
-# -*- coding: utf-8 -*-
-#
-# Project: silx
-# https://github.com/silx-kit/silx
-#
-# Copyright (C) 2012-2017 European Synchrotron Radiation Facility, Grenoble, France
-#
-# Principal author: Jérôme Kieffer (Jerome.Kieffer@ESRF.eu)
-#
-# Permission is hereby granted, free of charge, to any person obtaining a copy
-# of this software and associated documentation files (the "Software"), to deal
-# in the Software without restriction, including without limitation the rights
-# to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
-# copies of the Software, and to permit persons to whom the Software is
-# furnished to do so, subject to the following conditions:
-# .
-# The above copyright notice and this permission notice shall be included in
-# all copies or substantial portions of the Software.
-# .
-# THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
-# IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
-# FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
-# AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
-# LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
-# OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN
-# THE SOFTWARE.
-
-"""A general purpose library for manipulating 2D images in 1 or 3 colors
-
-"""
-from __future__ import absolute_import, print_function, with_statement, division
-
-
-__author__ = "Jerome Kieffer"
-__license__ = "MIT"
-__date__ = "12/02/2018"
-__copyright__ = "2012-2017, ESRF, Grenoble"
-__contact__ = "jerome.kieffer@esrf.fr"
-
-import os
-import logging
-import numpy
-from collections import OrderedDict
-from math import floor, ceil, sqrt, log
-
-from .common import pyopencl, kernel_workgroup_size
-from .processing import EventDescription, OpenclProcessing, BufferDescription
-
-if pyopencl:
- mf = pyopencl.mem_flags
-logger = logging.getLogger(__name__)
-
-
-class ImageProcessing(OpenclProcessing):
-
- kernel_files = ["cast", "map", "max_min", "histogram"]
-
- converter = {numpy.dtype(numpy.uint8): "u8_to_float",
- numpy.dtype(numpy.int8): "s8_to_float",
- numpy.dtype(numpy.uint16): "u16_to_float",
- numpy.dtype(numpy.int16): "s16_to_float",
- numpy.dtype(numpy.uint32): "u32_to_float",
- numpy.dtype(numpy.int32): "s32_to_float",
- }
-
- def __init__(self, shape=None, ncolors=1, template=None,
- ctx=None, devicetype="all", platformid=None, deviceid=None,
- block_size=None, memory=None, profile=False):
- """Constructor of the ImageProcessing class
-
- :param ctx: actual working context, left to None for automatic
- initialization from device type or platformid/deviceid
- :param devicetype: type of device, can be "CPU", "GPU", "ACC" or "ALL"
- :param platformid: integer with the platform_identifier, as given by clinfo
- :param deviceid: Integer with the device identifier, as given by clinfo
- :param block_size: preferred workgroup size, may vary depending on the
- out come of the compilation
- :param memory: minimum memory available on device
- :param profile: switch on profiling to be able to profile at the kernel
- level, store profiling elements (makes code slightly slower)
- """
- OpenclProcessing.__init__(self, ctx=ctx, devicetype=devicetype,
- platformid=platformid, deviceid=deviceid,
- block_size=block_size, memory=memory, profile=profile)
- if template is not None:
- shape = template.shape
- if len(shape) > 2:
- self.ncolors = shape[2]
- self.shape = shape[:2]
- else:
- self.ncolors = 1
- self.shape = shape
- else:
- self.ncolors = ncolors
- self.shape = shape
- assert shape is not None
- self.buffer_shape = self.shape if self.ncolors == 1 else self.shape + (self.ncolors,)
- kernel_files = [os.path.join("image", i) for i in self.kernel_files]
- self.compile_kernels(kernel_files,
- compile_options="-DNB_COLOR=%i" % self.ncolors)
- if self.ncolors == 1:
- img_shape = self.shape
- else:
- img_shape = self.shape + (self.ncolors,)
-
- buffers = [BufferDescription("image0_d", img_shape, numpy.float32, None),
- BufferDescription("image1_d", img_shape, numpy.float32, None),
- BufferDescription("image2_d", img_shape, numpy.float32, None),
- BufferDescription("max_min_d", 2, numpy.float32, None),
- BufferDescription("cnt_d", 1, numpy.int32, None), ]
- # Temporary buffer for max-min reduction
- self.wg_red = kernel_workgroup_size(self.program, self.kernels.max_min_reduction_stage1)
- if self.wg_red > 1:
- self.wg_red = min(self.wg_red,
- numpy.int32(1 << int(floor(log(sqrt(numpy.prod(self.shape)), 2)))))
- tmp = BufferDescription("tmp_max_min_d", 2 * self.wg_red, numpy.float32, None)
- buffers.append(tmp)
- self.allocate_buffers(buffers, use_array=True)
- self.cl_mem["cnt_d"].fill(0)
-
- def __repr__(self):
- return "ImageProcessing for shape=%s, %i colors initalized on %s" % \
- (self.shape, self.ncolors, self.ctx.devices[0].name)
-
- def _get_in_out_buffers(self, img=None, copy=True, out=None,
- out_dtype=None, out_size=None):
- """Internal method used to select the proper buffers before processing.
-
- :param img: expects a numpy array or a pyopencl.array of dim 2 or 3
- :param copy: set to False to directly re-use a pyopencl array
- :param out: provide an output buffer to store the result
- :param out_dtype: enforce the type of the output buffer (optional)
- :param out_size: enforce the size of the output buffer (optional)
- :return: input_buffer, output_buffer
-
- Nota: this is not locked.
- """
- events = []
- if out is not None and isinstance(out, pyopencl.array.Array):
- if (out_size or out_dtype) is not None:
- if out_size is not None:
- assert out.size > out_size
- if out_dtype is not None:
- assert out_dtype == out.dtype
- else: # assume it is same size and type as weoking buffer
- assert out.shape == self.buffer_shape
- assert out.dtype == numpy.float32
- out.finish()
- output_array = out
- else:
- if out_dtype != numpy.float32 and out_size:
- name = "%s_%s_d" % (numpy.dtype(out_dtype), out_size)
- if name not in self.cl_mem:
- output_array = self.cl_mem[name] = pyopencl.array.empty(self.queue, (out_size,), out_dtype)
- else:
- output_array = self.cl_mem[name]
- else:
- output_array = self.cl_mem["image2_d"]
-
- if img is None:
- input_array = self.cl_mem["image1_d"]
- if isinstance(img, pyopencl.array.Array):
- if copy:
- evt = pyopencl.enqueue_copy(self.queue, self.cl_mem["image1_d"].data, img.data)
- input_array = self.cl_mem["image1_d"]
- events.append(EventDescription("copy D->D", evt))
- else:
- img.finish()
- input_array = img
- evt = None
- else:
- # assume this is numpy
- if img.dtype.itemsize > 4:
- logger.warning("Casting to float32 on CPU")
- evt = pyopencl.enqueue_copy(self.queue, self.cl_mem["image1_d"].data, numpy.ascontiguousarray(img, numpy.float32))
- input_array = self.cl_mem["image1_d"]
- events.append(EventDescription("cast+copy H->D", evt))
- else:
- evt = pyopencl.enqueue_copy(self.queue, self.cl_mem["image1_d"].data, numpy.ascontiguousarray(img))
- input_array = self.cl_mem["image1_d"]
- events.append(EventDescription("copy H->D", evt))
- if self.profile:
- self.events += events
- return input_array, output_array
-
- def to_float(self, img, copy=True, out=None):
- """ Takes any array and convert it to a float array for ease of processing.
-
- :param img: expects a numpy array or a pyopencl.array of dim 2 or 3
- :param copy: set to False to directly re-use a pyopencl array
- :param out: provide an output buffer to store the result
- """
- assert img.shape == self.buffer_shape
-
- events = []
- with self.sem:
- input_array, output_array = self._get_in_out_buffers(img, copy, out)
- if (img.dtype.itemsize > 4) or (img.dtype == numpy.float32):
- # copy device -> device, already there as float32
- ev = pyopencl.enqueue_copy(self.queue, output_array.data, input_array.data)
- events.append(EventDescription("copy D->D", ev))
- else:
- # Cast to float:
- name = self.converter[img.dtype]
- kernel = self.kernels.get_kernel(name)
- ev = kernel(self.queue, (self.shape[1], self.shape[0]), None,
- input_array.data, output_array.data,
- numpy.int32(self.shape[1]), numpy.int32(self.shape[0])
- )
- events.append(EventDescription("cast %s" % name, ev))
-
- if self.profile:
- self.events += events
- if out is None:
- res = output_array.get()
- return res
- else:
- output_array.finish()
- return output_array
-
- def normalize(self, img, mini=0.0, maxi=1.0, copy=True, out=None):
- """Scale the intensity of the image so that the minimum is 0 and the
- maximum is 1.0 (or any value suggested).
-
- :param img: numpy array or pyopencl array of dim 2 or 3 and of type float
- :param mini: Expected minimum value
- :param maxi: expected maxiumum value
- :param copy: set to False to use directly the input buffer
- :param out: provides an output buffer. prevents a copy D->H
-
- This uses a min/max reduction in two stages plus a map operation
- """
- assert img.shape == self.buffer_shape
- events = []
- with self.sem:
- input_array, output_array = self._get_in_out_buffers(img, copy, out)
- size = numpy.int32(numpy.prod(self.shape))
- if self.wg_red == 1:
- # Probably on MacOS CPU WG==1 --> serial code.
- kernel = self.kernels.get_kernel("max_min_serial")
- evt = kernel(self.queue, (1,), (1,),
- input_array.data,
- size,
- self.cl_mem["max_min_d"].data)
- ed = EventDescription("max_min_serial", evt)
- events.append(ed)
- else:
- stage1 = self.kernels.max_min_reduction_stage1
- stage2 = self.kernels.max_min_reduction_stage2
- local_mem = pyopencl.LocalMemory(int(self.wg_red * 8))
- k1 = stage1(self.queue, (int(self.wg_red ** 2),), (int(self.wg_red),),
- input_array.data,
- self.cl_mem["tmp_max_min_d"].data,
- size,
- local_mem)
- k2 = stage2(self.queue, (int(self.wg_red),), (int(self.wg_red),),
- self.cl_mem["tmp_max_min_d"].data,
- self.cl_mem["max_min_d"].data,
- local_mem)
-
- events += [EventDescription("max_min_stage1", k1),
- EventDescription("max_min_stage2", k2)]
-
- evt = self.kernels.normalize_image(self.queue, (self.shape[1], self.shape[0]), None,
- input_array.data, output_array.data,
- numpy.int32(self.shape[1]), numpy.int32(self.shape[0]),
- self.cl_mem["max_min_d"].data,
- numpy.float32(mini), numpy.float32(maxi))
- events.append(EventDescription("normalize", evt))
- if self.profile:
- self.events += events
-
- if out is None:
- res = output_array.get()
- return res
- else:
- output_array.finish()
- return output_array
-
- def histogram(self, img=None, nbins=255, range=None,
- log_scale=False, copy=True, out=None):
- """Compute the histogram of a set of data.
-
- :param img: input image. If None, use the one already on the device
- :param nbins: number of bins
- :param range: the lower and upper range of the bins. If not provided,
- range is simply ``(a.min(), a.max())``. Values outside the
- range are ignored. The first element of the range must be
- less than or equal to the second.
- :param log_scale: perform the binning in lograrithmic scale.
- Open to extension
- :param copy: unset to directly use the input buffer without copy
- :param out: use a provided array for offering the result
- :return: histogram (size=nbins), edges (size=nbins+1)
- API similar to numpy
- """
- assert img.shape == self.buffer_shape
-
- input_array = self.to_float(img, copy=copy, out=self.cl_mem["image0_d"])
- events = []
- with self.sem:
- input_array, output_array = self._get_in_out_buffers(input_array, copy=False,
- out=out,
- out_dtype=numpy.int32,
- out_size=nbins)
-
- if range is None:
- # measure actually the bounds
- size = numpy.int32(numpy.prod(self.shape))
- if self.wg_red == 1:
- # Probably on MacOS CPU WG==1 --> serial code.
- kernel = self.kernels.get_kernel("max_min_serial")
-
- evt = kernel(self.queue, (1,), (1,),
- input_array.data,
- size,
- self.cl_mem["max_min_d"].data)
- events.append(EventDescription("max_min_serial", evt))
- else:
- stage1 = self.kernels.max_min_reduction_stage1
- stage2 = self.kernels.max_min_reduction_stage2
- local_mem = pyopencl.LocalMemory(int(self.wg_red * 2 * numpy.dtype("float32").itemsize))
- k1 = stage1(self.queue, (int(self.wg_red ** 2),), (int(self.wg_red),),
- input_array.data,
- self.cl_mem["tmp_max_min_d"].data,
- size,
- local_mem)
- k2 = stage2(self.queue, (int(self.wg_red),), (int(self.wg_red),),
- self.cl_mem["tmp_max_min_d"].data,
- self.cl_mem["max_min_d"].data,
- local_mem)
-
- events += [EventDescription("max_min_stage1", k1),
- EventDescription("max_min_stage2", k2)]
- maxi, mini = self.cl_mem["max_min_d"].get()
- else:
- mini = numpy.float32(min(range))
- maxi = numpy.float32(max(range))
- device = self.ctx.devices[0]
- nb_engines = device.max_compute_units
- tmp_size = nb_engines * nbins
- name = "tmp_int32_%s_d" % (tmp_size)
- if name not in self.cl_mem:
- tmp_array = self.cl_mem[name] = pyopencl.array.empty(self.queue, (tmp_size,), numpy.int32)
- else:
- tmp_array = self.cl_mem[name]
-
- edge_name = "tmp_float32_%s_d" % (nbins + 1)
- if edge_name not in self.cl_mem:
- edges_array = self.cl_mem[edge_name] = pyopencl.array.empty(self.queue, (nbins + 1,), numpy.float32)
- else:
- edges_array = self.cl_mem[edge_name]
-
- shared = pyopencl.LocalMemory(numpy.dtype(numpy.int32).itemsize * nbins)
-
- # Handle log-scale
- if log_scale:
- map_operation = numpy.int32(1)
- else:
- map_operation = numpy.int32(0)
- kernel = self.kernels.get_kernel("histogram")
- wg = min(device.max_work_group_size,
- 1 << (int(ceil(log(nbins, 2)))),
- self.kernels.max_workgroup_size(kernel))
- evt = kernel(self.queue, (wg * nb_engines,), (wg,),
- input_array.data,
- numpy.int32(input_array.size),
- mini,
- maxi,
- map_operation,
- output_array.data,
- edges_array.data,
- numpy.int32(nbins),
- tmp_array.data,
- self.cl_mem["cnt_d"].data,
- shared)
- events.append(EventDescription("histogram", evt))
-
- if self.profile:
- self.events += events
-
- if out is None:
- res = output_array.get()
- return res, edges_array.get()
- else:
- output_array.finish()
- return output_array, edges_array
diff --git a/silx/opencl/linalg.py b/silx/opencl/linalg.py
deleted file mode 100644
index a64122a..0000000
--- a/silx/opencl/linalg.py
+++ /dev/null
@@ -1,220 +0,0 @@
-#!/usr/bin/env python
-# coding: utf-8
-# /*##########################################################################
-#
-# Copyright (c) 2016 European Synchrotron Radiation Facility
-#
-# Permission is hereby granted, free of charge, to any person obtaining a copy
-# of this software and associated documentation files (the "Software"), to deal
-# in the Software without restriction, including without limitation the rights
-# to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
-# copies of the Software, and to permit persons to whom the Software is
-# furnished to do so, subject to the following conditions:
-#
-# The above copyright notice and this permission notice shall be included in
-# all copies or substantial portions of the Software.
-#
-# THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
-# IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
-# FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
-# AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
-# LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
-# OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN
-# THE SOFTWARE.
-#
-# ###########################################################################*/
-"""Module for basic linear algebra in OpenCL"""
-
-from __future__ import absolute_import, print_function, with_statement, division
-
-__authors__ = ["P. Paleo"]
-__license__ = "MIT"
-__date__ = "01/08/2019"
-
-import numpy as np
-
-from .common import pyopencl
-from .processing import EventDescription, OpenclProcessing
-
-import pyopencl.array as parray
-cl = pyopencl
-
-
-class LinAlg(OpenclProcessing):
-
- kernel_files = ["linalg.cl"]
-
- def __init__(self, shape, do_checks=False, ctx=None, devicetype="all", platformid=None, deviceid=None, profile=False):
- """
- Create a "Linear Algebra" plan for a given image shape.
-
- :param shape: shape of the image (num_rows, num_columns)
- :param do_checks (optional): if True, memory and data type checks are performed when possible.
- :param ctx: actual working context, left to None for automatic
- initialization from device type or platformid/deviceid
- :param devicetype: type of device, can be "CPU", "GPU", "ACC" or "ALL"
- :param platformid: integer with the platform_identifier, as given by clinfo
- :param deviceid: Integer with the device identifier, as given by clinfo
- :param profile: switch on profiling to be able to profile at the kernel level,
- store profiling elements (makes code slightly slower)
-
- """
- OpenclProcessing.__init__(self, ctx=ctx, devicetype=devicetype,
- platformid=platformid, deviceid=deviceid,
- profile=profile)
-
- self.d_gradient = parray.empty(self.queue, shape, np.complex64)
- self.d_gradient.fill(np.complex64(0.0))
- self.d_image = parray.empty(self.queue, shape, np.float32)
- self.d_image.fill(np.float32(0.0))
- self.add_to_cl_mem({
- "d_gradient": self.d_gradient,
- "d_image": self.d_image
- })
-
- self.wg2D = None
- self.shape = shape
- self.ndrange2D = (
- int(self.shape[1]),
- int(self.shape[0])
- )
- self.do_checks = bool(do_checks)
- OpenclProcessing.compile_kernels(self, self.kernel_files)
-
- @staticmethod
- def check_array(array, dtype, shape, arg_name):
- if array.shape != shape or array.dtype != dtype:
- raise ValueError("%s should be a %s array of type %s" %(arg_name, str(shape), str(dtype)))
-
- def get_data_references(self, src, dst, default_src_ref, default_dst_ref):
- """
- From various types of src and dst arrays,
- returns the references to the underlying data (Buffer) that will be used by the OpenCL kernels.
- # TODO documentation
-
- This function will make a copy host->device if the input is on host (eg. numpy array)
- """
- if dst is not None:
- if isinstance(dst, cl.array.Array):
- dst_ref = dst.data
- elif isinstance(dst, cl.Buffer):
- dst_ref = dst
- else:
- raise ValueError("dst should be either pyopencl.array.Array or pyopencl.Buffer")
- else:
- dst_ref = default_dst_ref
-
- if isinstance(src, cl.array.Array):
- src_ref = src.data
- elif isinstance(src, cl.Buffer):
- src_ref = src
- else: # assuming numpy.ndarray
- evt = cl.enqueue_copy(self.queue, default_src_ref, src)
- self.events.append(EventDescription("copy H->D", evt))
- src_ref = default_src_ref
- return src_ref, dst_ref
-
- def gradient(self, image, dst=None, return_to_host=False):
- """
- Compute the spatial gradient of an image.
- The gradient is computed with first-order difference (not central difference).
-
- :param image: image to compute the gradient from. It can be either a numpy.ndarray, a pyopencl Array or Buffer.
- :param dst: optional, reference to a destination pyopencl Array or Buffer. It must be of complex64 data type.
- :param return_to_host: optional, set to True if you want the result to be transferred back to host.
-
- if dst is provided, it should be of type numpy.complex64 !
- """
- n_y, n_x = np.int32(self.shape)
- if self.do_checks:
- self.check_array(image, np.float32, self.shape, "image")
- if dst is not None:
- self.check_array(dst, np.complex64, self.shape, "dst")
- img_ref, grad_ref = self.get_data_references(image, dst, self.d_image.data, self.d_gradient.data)
-
- # Prepare the kernel call
- kernel_args = [
- img_ref,
- grad_ref,
- n_x,
- n_y
- ]
- # Call the gradient kernel
- evt = self.kernels.kern_gradient2D(
- self.queue,
- self.ndrange2D,
- self.wg2D,
- *kernel_args
- )
- self.events.append(EventDescription("gradient2D", evt))
- # TODO: should the wait be done in any case ?
- # In the case where dst=None, the wait() is mandatory since a user will be doing arithmetic on dst afterwards
- if dst is None:
- evt.wait()
-
- if return_to_host:
- if dst is not None:
- res_tmp = self.d_gradient.get()
- else:
- res_tmp = np.zeros(self.shape, dtype=np.complex64)
- cl.enqueue_copy(self.queue, res_tmp, grad_ref)
- res = np.zeros((2,) + self.shape, dtype=np.float32)
- res[0] = np.copy(res_tmp.real)
- res[1] = np.copy(res_tmp.imag)
- return res
- else:
- return dst
-
- def divergence(self, gradient, dst=None, return_to_host=False):
- """
- Compute the spatial divergence of an image.
- The divergence is designed to be the (negative) adjoint of the gradient.
-
- :param gradient: gradient-like array to compute the divergence from. It can be either a numpy.ndarray, a pyopencl Array or Buffer.
- :param dst: optional, reference to a destination pyopencl Array or Buffer. It must be of complex64 data type.
- :param return_to_host: optional, set to True if you want the result to be transferred back to host.
-
- if dst is provided, it should be of type numpy.complex64 !
- """
- n_y, n_x = np.int32(self.shape)
- # numpy.ndarray gradients are expected to be (2, n_y, n_x)
- if isinstance(gradient, np.ndarray):
- gradient2 = np.zeros(self.shape, dtype=np.complex64)
- gradient2.real = np.copy(gradient[0])
- gradient2.imag = np.copy(gradient[1])
- gradient = gradient2
- elif self.do_checks:
- self.check_array(gradient, np.complex64, self.shape, "gradient")
- if dst is not None:
- self.check_array(dst, np.float32, self.shape, "dst")
- grad_ref, img_ref = self.get_data_references(gradient, dst, self.d_gradient.data, self.d_image.data)
-
- # Prepare the kernel call
- kernel_args = [
- grad_ref,
- img_ref,
- n_x,
- n_y
- ]
- # Call the gradient kernel
- evt = self.kernels.kern_divergence2D(
- self.queue,
- self.ndrange2D,
- self.wg2D,
- *kernel_args
- )
- self.events.append(EventDescription("divergence2D", evt))
- # TODO: should the wait be done in any case ?
- # In the case where dst=None, the wait() is mandatory since a user will be doing arithmetic on dst afterwards
- if dst is None:
- evt.wait()
-
- if return_to_host:
- if dst is not None:
- res = self.d_image.get()
- else:
- res = np.zeros(self.shape, dtype=np.float32)
- cl.enqueue_copy(self.queue, res, img_ref)
- return res
- else:
- return dst
diff --git a/silx/opencl/medfilt.py b/silx/opencl/medfilt.py
deleted file mode 100644
index d4e425b..0000000
--- a/silx/opencl/medfilt.py
+++ /dev/null
@@ -1,269 +0,0 @@
-# -*- coding: utf-8 -*-
-#
-# Project: Azimuthal integration
-# https://github.com/silx-kit/pyFAI
-#
-# Copyright (C) 2012-2017 European Synchrotron Radiation Facility, Grenoble, France
-#
-# Principal author: Jérôme Kieffer (Jerome.Kieffer@ESRF.eu)
-#
-# Permission is hereby granted, free of charge, to any person obtaining a copy
-# of this software and associated documentation files (the "Software"), to deal
-# in the Software without restriction, including without limitation the rights
-# to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
-# copies of the Software, and to permit persons to whom the Software is
-# furnished to do so, subject to the following conditions:
-# .
-# The above copyright notice and this permission notice shall be included in
-# all copies or substantial portions of the Software.
-# .
-# THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
-# IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
-# FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
-# AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
-# LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
-# OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN
-# THE SOFTWARE.
-
-"""A module for performing the 1d, 2d and 3d median filter ...
-
-The target is to mimic the signature of scipy.signal.medfilt and scipy.medfilt2
-
-The first implementation targets 2D implementation where this operation is costly (~10s/2kx2k image)
-"""
-from __future__ import absolute_import, print_function, with_statement, division
-
-
-__author__ = "Jerome Kieffer"
-__license__ = "MIT"
-__date__ = "12/09/2017"
-__copyright__ = "2012-2017, ESRF, Grenoble"
-__contact__ = "jerome.kieffer@esrf.fr"
-
-import logging
-import numpy
-from collections import OrderedDict
-
-from .common import pyopencl, kernel_workgroup_size
-from .processing import EventDescription, OpenclProcessing, BufferDescription
-
-if pyopencl:
- mf = pyopencl.mem_flags
-else:
- raise ImportError("pyopencl is not installed")
-logger = logging.getLogger(__name__)
-
-
-class MedianFilter2D(OpenclProcessing):
- """A class for doing median filtering using OpenCL"""
- buffers = [
- BufferDescription("result", 1, numpy.float32, mf.WRITE_ONLY),
- BufferDescription("image_raw", 1, numpy.float32, mf.READ_ONLY),
- BufferDescription("image", 1, numpy.float32, mf.READ_WRITE),
- ]
- kernel_files = ["preprocess.cl", "bitonic.cl", "medfilt.cl"]
- mapping = {numpy.int8: "s8_to_float",
- numpy.uint8: "u8_to_float",
- numpy.int16: "s16_to_float",
- numpy.uint16: "u16_to_float",
- numpy.uint32: "u32_to_float",
- numpy.int32: "s32_to_float"}
-
- def __init__(self, shape, kernel_size=(3, 3),
- ctx=None, devicetype="all", platformid=None, deviceid=None,
- block_size=None, profile=False
- ):
- """Constructor of the OpenCL 2D median filtering class
-
- :param shape: shape of the images to treat
- :param kernel size: 2-tuple of odd values
- :param ctx: actual working context, left to None for automatic
- initialization from device type or platformid/deviceid
- :param devicetype: type of device, can be "CPU", "GPU", "ACC" or "ALL"
- :param platformid: integer with the platform_identifier, as given by clinfo
- :param deviceid: Integer with the device identifier, as given by clinfo
- :param block_size: preferred workgroup size, may vary depending on the outpcome of the compilation
- :param profile: switch on profiling to be able to profile at the kernel level,
- store profiling elements (makes code slightly slower)
- """
- OpenclProcessing.__init__(self, ctx=ctx, devicetype=devicetype,
- platformid=platformid, deviceid=deviceid,
- block_size=block_size, profile=profile)
- self.shape = shape
- self.size = self.shape[0] * self.shape[1]
- self.kernel_size = self.calc_kernel_size(kernel_size)
- self.workgroup_size = (self.calc_wg(self.kernel_size), 1) # 3D kernel
- self.buffers = [BufferDescription(i.name, i.size * self.size, i.dtype, i.flags)
- for i in self.__class__.buffers]
-
- self.allocate_buffers()
- self.local_mem = self._get_local_mem(self.workgroup_size[0])
- OpenclProcessing.compile_kernels(self, self.kernel_files, "-D NIMAGE=%i" % self.size)
- self.set_kernel_arguments()
-
- def set_kernel_arguments(self):
- """Parametrize all kernel arguments
- """
- for val in self.mapping.values():
- self.cl_kernel_args[val] = OrderedDict(((i, self.cl_mem[i]) for i in ("image_raw", "image")))
- self.cl_kernel_args["medfilt2d"] = OrderedDict((("image", self.cl_mem["image"]),
- ("result", self.cl_mem["result"]),
- ("local", self.local_mem),
- ("khs1", numpy.int32(self.kernel_size[0] // 2)), # Kernel half-size along dim1 (lines)
- ("khs2", numpy.int32(self.kernel_size[1] // 2)), # Kernel half-size along dim2 (columns)
- ("height", numpy.int32(self.shape[0])), # Image size along dim1 (lines)
- ("width", numpy.int32(self.shape[1]))))
-# ('debug', self.cl_mem["debug"]))) # Image size along dim2 (columns))
-
- def _get_local_mem(self, wg):
- return pyopencl.LocalMemory(wg * 32) # 4byte per float, 8 element per thread
-
- def send_buffer(self, data, dest):
- """Send a numpy array to the device, including the cast on the device if possible
-
- :param data: numpy array with data
- :param dest: name of the buffer as registered in the class
- """
-
- dest_type = numpy.dtype([i.dtype for i in self.buffers if i.name == dest][0])
- events = []
- if (data.dtype == dest_type) or (data.dtype.itemsize > dest_type.itemsize):
- copy_image = pyopencl.enqueue_copy(self.queue, self.cl_mem[dest], numpy.ascontiguousarray(data, dest_type))
- events.append(EventDescription("copy H->D %s" % dest, copy_image))
- else:
- copy_image = pyopencl.enqueue_copy(self.queue, self.cl_mem["image_raw"], numpy.ascontiguousarray(data))
- kernel = getattr(self.program, self.mapping[data.dtype.type])
- cast_to_float = kernel(self.queue, (self.size,), None, self.cl_mem["image_raw"], self.cl_mem[dest])
- events += [EventDescription("copy H->D %s" % dest, copy_image), EventDescription("cast to float", cast_to_float)]
- if self.profile:
- self.events += events
-
- def calc_wg(self, kernel_size):
- """calculate and return the optimal workgroup size for the first dimension, taking into account
- the 8-height band
-
- :param kernel_size: 2-tuple of int, shape of the median window
- :return: optimal workgroup size
- """
- needed_threads = ((kernel_size[0] + 7) // 8) * kernel_size[1]
- if needed_threads < 8:
- wg = 8
- elif needed_threads < 32:
- wg = 32
- else:
- wg = 1 << (int(needed_threads).bit_length())
- return wg
-
- def medfilt2d(self, image, kernel_size=None):
- """Actually apply the median filtering on the image
-
- :param image: numpy array with the image
- :param kernel_size: 2-tuple if
- :return: median-filtered 2D image
-
-
- Nota: for window size 1x1 -> 7x7 up to 49 / 64 elements in 8 threads, 8elt/th
- 9x9 -> 15x15 up to 225 / 256 elements in 32 threads, 8elt/th
- 17x17 -> 21x21 up to 441 / 512 elements in 64 threads, 8elt/th
-
- TODO: change window size on the fly,
-
-
- """
- events = []
- if kernel_size is None:
- kernel_size = self.kernel_size
- else:
- kernel_size = self.calc_kernel_size(kernel_size)
- kernel_half_size = kernel_size // numpy.int32(2)
- # this is the workgroup size
- wg = self.calc_wg(kernel_size)
-
- # check for valid work group size:
- amws = kernel_workgroup_size(self.program, "medfilt2d")
- logger.warning("max actual workgroup size: %s, expected: %s", amws, wg)
- if wg > amws:
- raise RuntimeError("Workgroup size is too big for medfilt2d: %s>%s" % (wg, amws))
-
- localmem = self._get_local_mem(wg)
-
- assert image.ndim == 2, "Treat only 2D images"
- assert image.shape[0] <= self.shape[0], "height is OK"
- assert image.shape[1] <= self.shape[1], "width is OK"
-
- with self.sem:
- self.send_buffer(image, "image")
-
- kwargs = self.cl_kernel_args["medfilt2d"]
- kwargs["local"] = localmem
- kwargs["khs1"] = kernel_half_size[0]
- kwargs["khs2"] = kernel_half_size[1]
- kwargs["height"] = numpy.int32(image.shape[0])
- kwargs["width"] = numpy.int32(image.shape[1])
-# for k, v in kwargs.items():
-# print("%s: %s (%s)" % (k, v, type(v)))
- mf2d = self.kernels.medfilt2d(self.queue,
- (wg, image.shape[1]),
- (wg, 1), *list(kwargs.values()))
- events.append(EventDescription("median filter 2d", mf2d))
-
- result = numpy.empty(image.shape, numpy.float32)
- ev = pyopencl.enqueue_copy(self.queue, result, self.cl_mem["result"])
- events.append(EventDescription("copy D->H result", ev))
- ev.wait()
- if self.profile:
- self.events += events
- return result
- __call__ = medfilt2d
-
- @staticmethod
- def calc_kernel_size(kernel_size):
- """format the kernel size to be a 2-length numpy array of int32
- """
- kernel_size = numpy.asarray(kernel_size, dtype=numpy.int32)
- if kernel_size.shape == ():
- kernel_size = numpy.repeat(kernel_size.item(), 2).astype(numpy.int32)
- for size in kernel_size:
- if (size % 2) != 1:
- raise ValueError("Each element of kernel_size should be odd.")
- return kernel_size
-
-
-class _MedFilt2d(object):
- median_filter = None
-
- @classmethod
- def medfilt2d(cls, ary, kernel_size=3):
- """Median filter a 2-dimensional array.
-
- Apply a median filter to the `input` array using a local window-size
- given by `kernel_size` (must be odd).
-
- :param ary: A 2-dimensional input array.
- :param kernel_size: A scalar or a list of length 2, giving the size of the
- median filter window in each dimension. Elements of
- `kernel_size` should be odd. If `kernel_size` is a scalar,
- then this scalar is used as the size in each dimension.
- Default is a kernel of size (3, 3).
- :return: An array the same size as input containing the median filtered
- result. always work on float32 values
-
- About the padding:
-
- * The filling mode in scipy.signal.medfilt2d is zero-padding
- * This implementation is equivalent to:
- scipy.ndimage.filters.median_filter(ary, kernel_size, mode="nearest")
-
- """
- image = numpy.atleast_2d(ary)
- shape = numpy.array(image.shape)
- if cls.median_filter is None:
- cls.median_filter = MedianFilter2D(image.shape, kernel_size)
- elif (numpy.array(cls.median_filter.shape) < shape).any():
- # enlarger the buffer size
- new_shape = numpy.maximum(numpy.array(cls.median_filter.shape), shape)
- ctx = cls.median_filter.ctx
- cls.median_filter = MedianFilter2D(new_shape, kernel_size, ctx=ctx)
- return cls.median_filter.medfilt2d(image, kernel_size=kernel_size)
-
-medfilt2d = _MedFilt2d.medfilt2d
diff --git a/silx/opencl/processing.py b/silx/opencl/processing.py
deleted file mode 100644
index 8b81f7f..0000000
--- a/silx/opencl/processing.py
+++ /dev/null
@@ -1,447 +0,0 @@
-#!/usr/bin/env python
-# -*- coding: utf-8 -*-
-#
-# Project: S I L X project
-# https://github.com/silx-kit/silx
-#
-# Copyright (C) 2012-2018 European Synchrotron Radiation Facility, Grenoble, France
-#
-# Principal author: Jérôme Kieffer (Jerome.Kieffer@ESRF.eu)
-#
-# Permission is hereby granted, free of charge, to any person
-# obtaining a copy of this software and associated documentation
-# files (the "Software"), to deal in the Software without
-# restriction, including without limitation the rights to use,
-# copy, modify, merge, publish, distribute, sublicense, and/or sell
-# copies of the Software, and to permit persons to whom the
-# Software is furnished to do so, subject to the following
-# conditions:
-#
-# The above copyright notice and this permission notice shall be
-# included in all copies or substantial portions of the Software.
-#
-# THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
-# EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES
-# OF MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
-# NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT
-# HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY,
-# WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
-# FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR
-# OTHER DEALINGS IN THE SOFTWARE.
-#
-
-"""
-Common OpenCL abstract base classe for different processing
-"""
-
-__author__ = "Jerome Kieffer"
-__contact__ = "Jerome.Kieffer@ESRF.eu"
-__license__ = "MIT"
-__copyright__ = "European Synchrotron Radiation Facility, Grenoble, France"
-__date__ = "02/03/2021"
-__status__ = "stable"
-
-import sys
-import os
-import logging
-import gc
-from collections import namedtuple, OrderedDict
-import numpy
-import threading
-from .common import ocl, pyopencl, release_cl_buffers, query_kernel_info, allocate_texture, check_textures_availability
-from .utils import concatenate_cl_kernel
-import platform
-
-BufferDescription = namedtuple("BufferDescription", ["name", "size", "dtype", "flags"])
-EventDescription = namedtuple("EventDescription", ["name", "event"])
-
-logger = logging.getLogger(__name__)
-
-
-class KernelContainer(object):
- """Those object holds a copy of all kernels accessible as attributes"""
-
- def __init__(self, program):
- """Constructor of the class
-
- :param program: the OpenCL program as generated by PyOpenCL
- """
- self._program = program
- for kernel in program.all_kernels():
- self.__setattr__(kernel.function_name, kernel)
-
- def get_kernels(self):
- "return the dictionary with all kernels"
- return dict(item for item in self.__dict__.items()
- if not item[0].startswith("_"))
-
- def get_kernel(self, name):
- "get a kernel from its name"
- logger.debug("KernelContainer.get_kernel(%s)", name)
- return self.__dict__.get(name)
-
- def max_workgroup_size(self, kernel_name):
- "Retrieve the compile time WORK_GROUP_SIZE for a given kernel"
- if isinstance(kernel_name, pyopencl.Kernel):
- kernel = kernel_name
- else:
- kernel = self.get_kernel(kernel_name)
-
- return query_kernel_info(self._program, kernel, "WORK_GROUP_SIZE")
-
- def min_workgroup_size(self, kernel_name):
- "Retrieve the compile time PREFERRED_WORK_GROUP_SIZE_MULTIPLE for a given kernel"
- if isinstance(kernel_name, pyopencl.Kernel):
- kernel = kernel_name
- else:
- kernel = self.get_kernel(kernel_name)
-
- return query_kernel_info(self._program, kernel, "PREFERRED_WORK_GROUP_SIZE_MULTIPLE")
-
-
-class OpenclProcessing(object):
- """Abstract class for different types of OpenCL processing.
-
- This class provides:
- * Generation of the context, queues, profiling mode
- * Additional function to allocate/free all buffers declared as static attributes of the class
- * Functions to compile kernels, cache them and clean them
- * helper functions to clone the object
- """
- # Example of how to create an output buffer of 10 floats
- buffers = [BufferDescription("output", 10, numpy.float32, None),
- ]
- # list of kernel source files to be concatenated before compilation of the program
- kernel_files = []
-
- def __init__(self, ctx=None, devicetype="all", platformid=None, deviceid=None,
- block_size=None, memory=None, profile=False):
- """Constructor of the abstract OpenCL processing class
-
- :param ctx: actual working context, left to None for automatic
- initialization from device type or platformid/deviceid
- :param devicetype: type of device, can be "CPU", "GPU", "ACC" or "ALL"
- :param platformid: integer with the platform_identifier, as given by clinfo
- :param deviceid: Integer with the device identifier, as given by clinfo
- :param block_size: preferred workgroup size, may vary depending on the
- out come of the compilation
- :param memory: minimum memory available on device
- :param profile: switch on profiling to be able to profile at the kernel
- level, store profiling elements (makes code slightly slower)
- """
- self.sem = threading.Semaphore()
- self._X87_VOLATILE = None
- self.profile = None
- self.events = [] # List with of EventDescription, kept for profiling
- self.cl_mem = {} # dict with all buffer allocated
- self.cl_program = None # The actual OpenCL program
- self.cl_kernel_args = {} # dict with all kernel arguments
- self.queue = None
- if ctx:
- self.ctx = ctx
- else:
- self.ctx = ocl.create_context(devicetype=devicetype,
- platformid=platformid, deviceid=deviceid,
- memory=memory)
- device_name = self.ctx.devices[0].name.strip()
- platform_name = self.ctx.devices[0].platform.name.strip()
- platform = ocl.get_platform(platform_name)
- self.device = platform.get_device(device_name)
- self.cl_kernel_args = {} # dict with all kernel arguments
-
- self.set_profiling(profile)
- self.block_size = block_size
- self.program = None
- self.kernels = None
-
- def check_textures_availability(self):
- return check_textures_availability(self.ctx)
-
- def __del__(self):
- """Destructor: release all buffers and programs
- """
- try:
- self.reset_log()
- self.free_kernels()
- self.free_buffers()
- if self.queue is not None:
- self.queue.finish()
- except Exception as err:
- logger.warning("%s: %s", type(err), err)
- self.queue = None
- self.device = None
- self.ctx = None
- gc.collect()
-
- def allocate_buffers(self, buffers=None, use_array=False):
- """
- Allocate OpenCL buffers required for a specific configuration
-
- :param buffers: a list of BufferDescriptions, leave to None for
- paramatrized buffers.
- :param use_array: allocate memory as pyopencl.array.Array
- instead of pyopencl.Buffer
-
- Note that an OpenCL context also requires some memory, as well
- as Event and other OpenCL functionalities which cannot and are
- not taken into account here. The memory required by a context
- varies depending on the device. Typical for GTX580 is 65Mb but
- for a 9300m is ~15Mb In addition, a GPU will always have at
- least 3-5Mb of memory in use. Unfortunately, OpenCL does NOT
- have a built-in way to check the actual free memory on a
- device, only the total memory.
- """
- if buffers is None:
- buffers = self.buffers
-
- with self.sem:
- mem = {}
-
- # check if enough memory is available on the device
- ualloc = 0
- for buf in buffers:
- ualloc += numpy.dtype(buf.dtype).itemsize * numpy.prod(buf.size)
- logger.info("%.3fMB are needed on device: %s, which has %.3fMB",
- ualloc / 1.0e6, self.device, self.device.memory / 1.0e6)
-
- if ualloc >= self.device.memory:
- raise MemoryError("Fatal error in allocate_buffers. Not enough "
- " device memory for buffers (%lu requested, %lu available)"
- % (ualloc, self.device.memory))
-
- # do the allocation
- try:
- if use_array:
- for buf in buffers:
- mem[buf.name] = pyopencl.array.empty(self.queue, buf.size, buf.dtype)
- else:
- for buf in buffers:
- size = numpy.dtype(buf.dtype).itemsize * numpy.prod(buf.size)
- mem[buf.name] = pyopencl.Buffer(self.ctx, buf.flags, int(size))
- except pyopencl.MemoryError as error:
- release_cl_buffers(mem)
- raise MemoryError(error)
-
- self.cl_mem.update(mem)
-
- def add_to_cl_mem(self, parrays):
- """
- Add pyopencl.array, which are allocated by pyopencl, to self.cl_mem.
- This should be used before calling allocate_buffers().
-
- :param parrays: a dictionary of `pyopencl.array.Array` or `pyopencl.Buffer`
- """
- mem = self.cl_mem
- for name, parr in parrays.items():
- mem[name] = parr
- self.cl_mem.update(mem)
-
- def check_workgroup_size(self, kernel_name):
- "Calculate the maximum workgroup size from given kernel after compilation"
- return self.kernels.max_workgroup_size(kernel_name)
-
- def free_buffers(self):
- """free all device.memory allocated on the device
- """
- with self.sem:
- for key, buf in list(self.cl_mem.items()):
- if buf is not None:
- if isinstance(buf, pyopencl.array.Array):
- try:
- buf.data.release()
- except pyopencl.LogicError:
- logger.error("Error while freeing buffer %s", key)
- else:
- try:
- buf.release()
- except pyopencl.LogicError:
- logger.error("Error while freeing buffer %s", key)
- self.cl_mem[key] = None
-
- def compile_kernels(self, kernel_files=None, compile_options=None):
- """Call the OpenCL compiler
-
- :param kernel_files: list of path to the kernel
- (by default use the one declared in the class)
- :param compile_options: string of compile options
- """
- # concatenate all needed source files into a single openCL module
- kernel_files = kernel_files or self.kernel_files
- kernel_src = concatenate_cl_kernel(kernel_files)
-
- compile_options = compile_options or self.get_compiler_options()
- logger.info("Compiling file %s with options %s", kernel_files, compile_options)
- try:
- self.program = pyopencl.Program(self.ctx, kernel_src).build(options=compile_options)
- except (pyopencl.MemoryError, pyopencl.LogicError) as error:
- raise MemoryError(error)
- else:
- self.kernels = KernelContainer(self.program)
-
- def free_kernels(self):
- """Free all kernels
- """
- for kernel in self.cl_kernel_args:
- self.cl_kernel_args[kernel] = []
- self.kernels = None
- self.program = None
-
- def set_profiling(self, value=True):
- """Switch On/Off the profiling flag of the command queue to allow debugging
-
- :param value: set to True to enable profiling, or to False to disable it.
- Without profiling, the processing is marginally faster
-
- Profiling information can then be retrieved with the 'log_profile' method
- """
- if bool(value) != self.profile:
- with self.sem:
- self.profile = bool(value)
- if self.queue is not None:
- self.queue.finish()
- if self.profile:
- self.queue = pyopencl.CommandQueue(self.ctx,
- properties=pyopencl.command_queue_properties.PROFILING_ENABLE)
- else:
- self.queue = pyopencl.CommandQueue(self.ctx)
-
- def profile_add(self, event, desc):
- """
- Add an OpenCL event to the events lists, if profiling is enabled.
-
- :param event: silx.opencl.processing.EventDescription.
- :param desc: event description
- """
- if self.profile:
- self.events.append(EventDescription(desc, event))
-
- def allocate_texture(self, shape, hostbuf=None, support_1D=False):
- return allocate_texture(self.ctx, shape, hostbuf=hostbuf, support_1D=support_1D)
-
- def transfer_to_texture(self, arr, tex_ref):
- """
- Transfer an array to a texture.
-
- :param arr: Input array. Can be a numpy array or a pyopencl array.
- :param tex_ref: texture reference (pyopencl._cl.Image).
- """
- copy_args = [self.queue, tex_ref, arr]
- shp = arr.shape
- ndim = arr.ndim
- if ndim == 1:
- # pyopencl and OpenCL < 1.2 do not support image1d_t
- # force 2D with one row in this case
- # ~ ndim = 2
- shp = (1,) + shp
- copy_kwargs = {"origin":(0,) * ndim, "region": shp[::-1]}
- if not(isinstance(arr, numpy.ndarray)): # assuming pyopencl.array.Array
- # D->D copy
- copy_args[2] = arr.data
- copy_kwargs["offset"] = 0
- ev = pyopencl.enqueue_copy(*copy_args, **copy_kwargs)
- self.profile_add(ev, "Transfer to texture")
-
- def log_profile(self, stats=False):
- """If we are in profiling mode, prints out all timing for every single OpenCL call
-
- :param stats: if True, prints the statistics on each kernel instead of all execution timings
- :return: list of lines to print
- """
- total_time = 0.0
- out = [""]
- if stats:
- stats = OrderedDict()
- out.append(f"OpenCL kernel profiling statistics in milliseconds for: {self.__class__.__name__}")
- out.append(f"{'Kernel name':>50} (count): min median max mean std")
- else:
- stats = None
- out.append(f"Profiling info for OpenCL: {self.__class__.__name__}")
-
- if self.profile:
- for e in self.events:
- if "__len__" in dir(e) and len(e) >= 2:
- name = e[0]
- pr = e[1].profile
- t0 = pr.start
- t1 = pr.end
- et = 1e-6 * (t1 - t0)
- total_time += et
- if stats is None:
- out.append(f"{name:>50} : {et:.3f}ms")
- else:
- if name in stats:
- stats[name].append(et)
- else:
- stats[name] = [et]
- if stats is not None:
- for k, v in stats.items():
- n = numpy.array(v)
- out.append(f"{k:>50} ({len(v):5}): {n.min():8.3f} {numpy.median(n):8.3f} {n.max():8.3f} {n.mean():8.3f} {n.std():8.3f}")
- out.append("_" * 80)
- out.append(f"{'Total OpenCL execution time':>50} : {total_time:.3f}ms")
-
- logger.info(os.linesep.join(out))
- return out
-
- def reset_log(self):
- """
- Resets the profiling timers
- """
- with self.sem:
- self.events = []
-
- @property
- def x87_volatile_option(self):
- # this is running 32 bits OpenCL woth POCL
- if self._X87_VOLATILE is None:
- if (platform.machine() in ("i386", "i686", "x86_64", "AMD64") and
- (tuple.__itemsize__ == 4) and
- self.ctx.devices[0].platform.name == 'Portable Computing Language'):
- self._X87_VOLATILE = "-DX87_VOLATILE=volatile"
- else:
- self._X87_VOLATILE = ""
- return self._X87_VOLATILE
-
- def get_compiler_options(self, x87_volatile=False):
- """Provide the default OpenCL compiler options
-
- :param x87_volatile: needed for Kahan summation
- :return: string with compiler option
- """
- option_list = []
- if x87_volatile:
- option_list.append(self.x87_volatile_option)
- return " ".join(i for i in option_list if i)
-
-# This should be implemented by concrete class
-# def __copy__(self):
-# """Shallow copy of the object
-#
-# :return: copy of the object
-# """
-# return self.__class__((self._data, self._indices, self._indptr),
-# self.size, block_size=self.BLOCK_SIZE,
-# platformid=self.platform.id,
-# deviceid=self.device.id,
-# checksum=self.on_device.get("data"),
-# profile=self.profile, empty=self.empty)
-#
-# def __deepcopy__(self, memo=None):
-# """deep copy of the object
-#
-# :return: deepcopy of the object
-# """
-# if memo is None:
-# memo = {}
-# new_csr = self._data.copy(), self._indices.copy(), self._indptr.copy()
-# memo[id(self._data)] = new_csr[0]
-# memo[id(self._indices)] = new_csr[1]
-# memo[id(self._indptr)] = new_csr[2]
-# new_obj = self.__class__(new_csr, self.size,
-# block_size=self.BLOCK_SIZE,
-# platformid=self.platform.id,
-# deviceid=self.device.id,
-# checksum=self.on_device.get("data"),
-# profile=self.profile, empty=self.empty)
-# memo[id(self)] = new_obj
-# return new_obj
diff --git a/silx/opencl/projection.py b/silx/opencl/projection.py
deleted file mode 100644
index c02faf6..0000000
--- a/silx/opencl/projection.py
+++ /dev/null
@@ -1,428 +0,0 @@
-#!/usr/bin/env python
-# coding: utf-8
-# /*##########################################################################
-#
-# Copyright (c) 2016-2020 European Synchrotron Radiation Facility
-#
-# Permission is hereby granted, free of charge, to any person obtaining a copy
-# of this software and associated documentation files (the "Software"), to deal
-# in the Software without restriction, including without limitation the rights
-# to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
-# copies of the Software, and to permit persons to whom the Software is
-# furnished to do so, subject to the following conditions:
-#
-# The above copyright notice and this permission notice shall be included in
-# all copies or substantial portions of the Software.
-#
-# THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
-# IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
-# FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
-# AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
-# LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
-# OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN
-# THE SOFTWARE.
-#
-# ###########################################################################*/
-"""Module for tomographic projector on the GPU"""
-
-from __future__ import absolute_import, print_function, with_statement, division
-
-__authors__ = ["A. Mirone, P. Paleo"]
-__license__ = "MIT"
-__date__ = "01/08/2019"
-
-import logging
-import numpy as np
-
-from .common import pyopencl
-from .processing import EventDescription, OpenclProcessing, BufferDescription
-from .backprojection import _sizeof, _idivup
-
-if pyopencl:
- mf = pyopencl.mem_flags
- import pyopencl.array as parray
-else:
- raise ImportError("pyopencl is not installed")
-logger = logging.getLogger(__name__)
-
-
-class Projection(OpenclProcessing):
- """
- A class for performing a tomographic projection (Radon Transform) using
- OpenCL
- """
- kernel_files = ["proj.cl", "array_utils.cl"]
- logger.warning("Forward Projecter is untested and unsuported for now")
-
- def __init__(self, slice_shape, angles, axis_position=None,
- detector_width=None, normalize=False, ctx=None,
- devicetype="all", platformid=None, deviceid=None,
- profile=False
- ):
- """Constructor of the OpenCL projector.
-
- :param slice_shape: shape of the slice: (num_rows, num_columns).
- :param angles: Either an integer number of angles, or a list of custom
- angles values in radian.
- :param axis_position: Optional, axis position. Default is
- `(shape[1]-1)/2.0`.
- :param detector_width: Optional, detector width in pixels.
- If detector_width > slice_shape[1], the
- projection data will be surrounded with zeros.
- Using detector_width < slice_shape[1] might
- result in a local tomography setup.
- :param normalize: Optional, normalization. If set, the sinograms are
- multiplied by the factor pi/(2*nprojs).
- :param ctx: actual working context, left to None for automatic
- initialization from device type or platformid/deviceid
- :param devicetype: type of device, can be "CPU", "GPU", "ACC" or "ALL"
- :param platformid: integer with the platform_identifier, as given by
- clinfo
- :param deviceid: Integer with the device identifier, as given by clinfo
- :param profile: switch on profiling to be able to profile at the kernel
- level, store profiling elements (makes code slightly
- slower)
- """
- # OS X enforces a workgroup size of 1 when the kernel has synchronization barriers
- # if sys.platform.startswith('darwin'): # assuming no discrete GPU
- # raise NotImplementedError("Backprojection is not implemented on CPU for OS X yet")
-
- OpenclProcessing.__init__(self, ctx=ctx, devicetype=devicetype,
- platformid=platformid, deviceid=deviceid,
- profile=profile)
- self.shape = slice_shape
- self.axis_pos = axis_position
- self.angles = angles
- self.dwidth = detector_width
- self.normalize = normalize
-
- # Default values
- if self.axis_pos is None:
- self.axis_pos = (self.shape[1] - 1) / 2.
- if self.dwidth is None:
- self.dwidth = self.shape[1]
- if not(np.iterable(self.angles)):
- if self.angles is None:
- self.nprojs = self.shape[0]
- else:
- self.nprojs = self.angles
- self.angles = np.linspace(start=0,
- stop=np.pi,
- num=self.nprojs,
- endpoint=False).astype(dtype=np.float32)
- else:
- self.nprojs = len(self.angles)
- self.offset_x = -np.float32((self.shape[1] - 1) / 2. - self.axis_pos) # TODO: custom
- self.offset_y = -np.float32((self.shape[0] - 1) / 2. - self.axis_pos) # TODO: custom
- # Reset axis_pos once offset are computed
- self.axis_pos0 = np.float64((self.shape[1] - 1) / 2.)
-
- # Workgroup, ndrange and shared size
- self.dimgrid_x = _idivup(self.dwidth, 16)
- self.dimgrid_y = _idivup(self.nprojs, 16)
- self._dimrecx = np.int32(self.dimgrid_x * 16)
- self._dimrecy = np.int32(self.dimgrid_y * 16)
- self.local_mem = 16 * 7 * _sizeof(np.float32)
- self.wg = (16, 16)
- self.ndrange = (
- int(self.dimgrid_x) * self.wg[0], # int(): pyopencl <= 2015.1
- int(self.dimgrid_y) * self.wg[1] # int(): pyopencl <= 2015.1
- )
-
- self._use_textures = self.check_textures_availability()
-
- # Allocate memory
- self.buffers = [
- BufferDescription("_d_sino", self._dimrecx * self._dimrecy, np.float32, mf.READ_WRITE),
- BufferDescription("d_angles", self._dimrecy, np.float32, mf.READ_ONLY),
- BufferDescription("d_beginPos", self._dimrecy * 2, np.int32, mf.READ_ONLY),
- BufferDescription("d_strideJoseph", self._dimrecy * 2, np.int32, mf.READ_ONLY),
- BufferDescription("d_strideLine", self._dimrecy * 2, np.int32, mf.READ_ONLY),
- ]
- d_axis_corrections = parray.empty(self.queue, self.nprojs, np.float32)
- d_axis_corrections.fill(np.float32(0.0))
- self.add_to_cl_mem(
- {
- "d_axis_corrections": d_axis_corrections
- }
- )
- self._tmp_extended_img = np.zeros((self.shape[0] + 2, self.shape[1] + 2),
- dtype=np.float32)
- if not(self._use_textures):
- self.allocate_slice()
- else:
- self.allocate_textures()
- self.allocate_buffers()
- self._ex_sino = np.zeros((self._dimrecy, self._dimrecx),
- dtype=np.float32)
- if not(self._use_textures):
- self.cl_mem["d_slice"].fill(0.)
- # enqueue_fill_buffer has issues if opencl 1.2 is not present
- # ~ pyopencl.enqueue_fill_buffer(
- # ~ self.queue,
- # ~ self.cl_mem["d_slice"],
- # ~ np.float32(0),
- # ~ 0,
- # ~ self._tmp_extended_img.size * _sizeof(np.float32)
- # ~ )
- # Precomputations
- self.compute_angles()
- self.proj_precomputations()
- self.cl_mem["d_axis_corrections"].fill(0.)
- # enqueue_fill_buffer has issues if opencl 1.2 is not present
- # ~ pyopencl.enqueue_fill_buffer(
- # ~ self.queue,
- # ~ self.cl_mem["d_axis_corrections"],
- # ~ np.float32(0),
- # ~ 0,
- # ~ self.nprojs*_sizeof(np.float32)
- # ~ )
- # Shorthands
- self._d_sino = self.cl_mem["_d_sino"]
-
- compile_options = None
- if not(self._use_textures):
- compile_options = "-DDONT_USE_TEXTURES"
- OpenclProcessing.compile_kernels(
- self,
- self.kernel_files,
- compile_options=compile_options
- )
- # check that workgroup can actually be (16, 16)
- self.compiletime_workgroup_size = self.kernels.max_workgroup_size("forward_kernel_cpu")
-
- def compute_angles(self):
- angles2 = np.zeros(self._dimrecy, dtype=np.float32) # dimrecy != num_projs
- angles2[:self.nprojs] = np.copy(self.angles)
- angles2[self.nprojs:] = angles2[self.nprojs - 1]
- self.angles2 = angles2
- pyopencl.enqueue_copy(self.queue, self.cl_mem["d_angles"], angles2)
-
- def allocate_slice(self):
- ary = parray.empty(self.queue, (self.shape[1] + 2, self.shape[1] + 2), np.float32)
- ary.fill(0)
- self.add_to_cl_mem({"d_slice": ary})
-
- def allocate_textures(self):
- self.d_image_tex = pyopencl.Image(
- self.ctx,
- mf.READ_ONLY | mf.USE_HOST_PTR,
- pyopencl.ImageFormat(
- pyopencl.channel_order.INTENSITY,
- pyopencl.channel_type.FLOAT
- ), hostbuf=np.ascontiguousarray(self._tmp_extended_img.T),
- )
-
- def transfer_to_texture(self, image):
- image2 = image
- if not(image.flags["C_CONTIGUOUS"] and image.dtype == np.float32):
- image2 = np.ascontiguousarray(image)
- if not(self._use_textures):
- # TODO: create NoneEvent
- return self.transfer_to_slice(image2)
- # ~ return pyopencl.enqueue_copy(
- # ~ self.queue,
- # ~ self.cl_mem["d_slice"].data,
- # ~ image2,
- # ~ origin=(1, 1),
- # ~ region=image.shape[::-1]
- # ~ )
- else:
- return pyopencl.enqueue_copy(
- self.queue,
- self.d_image_tex,
- image2,
- origin=(1, 1),
- region=image.shape[::-1]
- )
-
- def transfer_device_to_texture(self, d_image):
- if not(self._use_textures):
- # TODO this copy should not be necessary
- return self.cpy2d_to_slice(d_image)
- else:
- return pyopencl.enqueue_copy(
- self.queue,
- self.d_image_tex,
- d_image,
- offset=0,
- origin=(1, 1),
- region=(int(self.shape[1]), int(self.shape[0])) # self.shape[::-1] # pyopencl <= 2015.2
- )
-
- def transfer_to_slice(self, image):
- image2 = np.zeros((image.shape[0] + 2, image.shape[1] + 2), dtype=np.float32)
- image2[1:-1, 1:-1] = image.astype(np.float32)
- self.cl_mem["d_slice"].set(image2)
-
- def proj_precomputations(self):
- beginPos = np.zeros((2, self._dimrecy), dtype=np.int32)
- strideJoseph = np.zeros((2, self._dimrecy), dtype=np.int32)
- strideLine = np.zeros((2, self._dimrecy), dtype=np.int32)
- cos_angles = np.cos(self.angles2)
- sin_angles = np.sin(self.angles2)
- dimslice = self.shape[1]
-
- M1 = np.abs(cos_angles) > 0.70710678
- M1b = np.logical_not(M1)
- M2 = cos_angles > 0
- M2b = np.logical_not(M2)
- M3 = sin_angles > 0
- M3b = np.logical_not(M3)
- case1 = M1 * M2
- case2 = M1 * M2b
- case3 = M1b * M3
- case4 = M1b * M3b
-
- beginPos[0][case1] = 0
- beginPos[1][case1] = 0
- strideJoseph[0][case1] = 1
- strideJoseph[1][case1] = 0
- strideLine[0][case1] = 0
- strideLine[1][case1] = 1
-
- beginPos[0][case2] = dimslice - 1
- beginPos[1][case2] = dimslice - 1
- strideJoseph[0][case2] = -1
- strideJoseph[1][case2] = 0
- strideLine[0][case2] = 0
- strideLine[1][case2] = -1
-
- beginPos[0][case3] = dimslice - 1
- beginPos[1][case3] = 0
- strideJoseph[0][case3] = 0
- strideJoseph[1][case3] = 1
- strideLine[0][case3] = -1
- strideLine[1][case3] = 0
-
- beginPos[0][case4] = 0
- beginPos[1][case4] = dimslice - 1
- strideJoseph[0][case4] = 0
- strideJoseph[1][case4] = -1
- strideLine[0][case4] = 1
- strideLine[1][case4] = 0
-
- # For debug purpose
- # ~ self.beginPos = beginPos
- # ~ self.strideJoseph = strideJoseph
- # ~ self.strideLine = strideLine
- #
-
- pyopencl.enqueue_copy(self.queue, self.cl_mem["d_beginPos"], beginPos)
- pyopencl.enqueue_copy(self.queue, self.cl_mem["d_strideJoseph"], strideJoseph)
- pyopencl.enqueue_copy(self.queue, self.cl_mem["d_strideLine"], strideLine)
-
- def _get_local_mem(self):
- return pyopencl.LocalMemory(self.local_mem) # constant for all image sizes
-
- def cpy2d_to_sino(self, dst):
- ndrange = (int(self.dwidth), int(self.nprojs)) # pyopencl < 2015.2
- sino_shape_ocl = np.int32(ndrange)
- wg = None
- kernel_args = (
- dst.data,
- self._d_sino,
- np.int32(self.dwidth),
- np.int32(self._dimrecx),
- np.int32((0, 0)),
- np.int32((0, 0)),
- sino_shape_ocl
- )
- return self.kernels.cpy2d(self.queue, ndrange, wg, *kernel_args)
-
- def cpy2d_to_slice(self, src):
- """
- copy a Nx * Ny slice to self.d_slice which is (Nx+2)*(Ny+2)
- """
- ndrange = (int(self.shape[1]), int(self.shape[0])) # self.shape[::-1] # pyopencl < 2015.2
- wg = None
- slice_shape_ocl = np.int32(ndrange)
- kernel_args = (
- self.cl_mem["d_slice"].data,
- src,
- np.int32(self.shape[1] + 2),
- np.int32(self.shape[1]),
- np.int32((1, 1)),
- np.int32((0, 0)),
- slice_shape_ocl
- )
- return self.kernels.cpy2d(self.queue, ndrange, wg, *kernel_args)
-
- def projection(self, image=None, dst=None):
- """Perform the projection on an input image
-
- :param image: Image to project
- :return: A sinogram
- """
- events = []
- with self.sem:
- if image is not None:
- assert image.ndim == 2, "Treat only 2D images"
- assert image.shape[0] == self.shape[0], "image shape is OK"
- assert image.shape[1] == self.shape[1], "image shape is OK"
- if self._use_textures:
- self.transfer_to_texture(image)
- slice_ref = self.d_image_tex
- else:
- self.transfer_to_slice(image)
- slice_ref = self.cl_mem["d_slice"].data
- else:
- if not(self._use_textures):
- slice_ref = self.cl_mem["d_slice"].data
- else:
- slice_ref = self.d_image_tex
-
- kernel_args = (
- self._d_sino,
- slice_ref,
- np.int32(self.shape[1]),
- np.int32(self.dwidth),
- self.cl_mem["d_angles"],
- np.float32(self.axis_pos0),
- self.cl_mem["d_axis_corrections"].data, # TODO custom
- self.cl_mem["d_beginPos"],
- self.cl_mem["d_strideJoseph"],
- self.cl_mem["d_strideLine"],
- np.int32(self.nprojs),
- self._dimrecx,
- self._dimrecy,
- self.offset_x,
- self.offset_y,
- np.int32(1), # josephnoclip, 1 by default
- np.int32(self.normalize)
- )
-
- # Call the kernel
- if not(self._use_textures):
- event_pj = self.kernels.forward_kernel_cpu(
- self.queue,
- self.ndrange,
- self.wg,
- *kernel_args
- )
- else:
- event_pj = self.kernels.forward_kernel(
- self.queue,
- self.ndrange,
- self.wg,
- *kernel_args
- )
- events.append(EventDescription("projection", event_pj))
- if dst is None:
- self._ex_sino[:] = 0
- ev = pyopencl.enqueue_copy(self.queue, self._ex_sino, self._d_sino)
- events.append(EventDescription("copy D->H result", ev))
- ev.wait()
- res = np.copy(self._ex_sino[:self.nprojs, :self.dwidth])
- else:
- ev = self.cpy2d_to_sino(dst)
- events.append(EventDescription("copy D->D result", ev))
- ev.wait()
- res = dst
- # /with self.sem
- if self.profile:
- self.events += events
- # ~ res = self._ex_sino
- return res
-
- __call__ = projection
diff --git a/silx/opencl/reconstruction.py b/silx/opencl/reconstruction.py
deleted file mode 100644
index 2c84aee..0000000
--- a/silx/opencl/reconstruction.py
+++ /dev/null
@@ -1,388 +0,0 @@
-#!/usr/bin/env python
-# coding: utf-8
-# /*##########################################################################
-#
-# Copyright (c) 2016 European Synchrotron Radiation Facility
-#
-# Permission is hereby granted, free of charge, to any person obtaining a copy
-# of this software and associated documentation files (the "Software"), to deal
-# in the Software without restriction, including without limitation the rights
-# to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
-# copies of the Software, and to permit persons to whom the Software is
-# furnished to do so, subject to the following conditions:
-#
-# The above copyright notice and this permission notice shall be included in
-# all copies or substantial portions of the Software.
-#
-# THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
-# IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
-# FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
-# AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
-# LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
-# OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN
-# THE SOFTWARE.
-#
-# ###########################################################################*/
-"""Module for tomographic reconstruction algorithms"""
-
-from __future__ import absolute_import, print_function, with_statement, division
-
-__authors__ = ["P. Paleo"]
-__license__ = "MIT"
-__date__ = "01/08/2019"
-
-import logging
-import numpy as np
-
-from .common import pyopencl
-from .processing import OpenclProcessing
-from .backprojection import Backprojection
-from .projection import Projection
-from .linalg import LinAlg
-
-import pyopencl.array as parray
-from pyopencl.elementwise import ElementwiseKernel
-logger = logging.getLogger(__name__)
-
-cl = pyopencl
-
-
-class ReconstructionAlgorithm(OpenclProcessing):
- """
- A parent class for all iterative tomographic reconstruction algorithms
-
- :param sino_shape: shape of the sinogram. The sinogram is in the format
- (n_b, n_a) where n_b is the number of detector bins and
- n_a is the number of angles.
- :param slice_shape: Optional, shape of the reconstructed slice.
- By default, it is a square slice where the dimension
- is the "x dimension" of the sinogram (number of bins).
- :param axis_position: Optional, axis position. Default is `(shape[1]-1)/2.0`.
- :param angles: Optional, a list of custom angles in radian.
- :param ctx: actual working context, left to None for automatic
- initialization from device type or platformid/deviceid
- :param devicetype: type of device, can be "CPU", "GPU", "ACC" or "ALL"
- :param platformid: integer with the platform_identifier, as given by clinfo
- :param deviceid: Integer with the device identifier, as given by clinfo
- :param profile: switch on profiling to be able to profile at the kernel level,
- store profiling elements (makes code slightly slower)
- """
-
- def __init__(self, sino_shape, slice_shape=None, axis_position=None, angles=None,
- ctx=None, devicetype="all", platformid=None, deviceid=None,
- profile=False
- ):
- OpenclProcessing.__init__(self, ctx=ctx, devicetype=devicetype,
- platformid=platformid, deviceid=deviceid,
- profile=profile)
-
- # Create a backprojector
- self.backprojector = Backprojection(
- sino_shape,
- slice_shape=slice_shape,
- axis_position=axis_position,
- angles=angles,
- ctx=self.ctx,
- profile=profile
- )
- # Create a projector
- self.projector = Projection(
- self.backprojector.slice_shape,
- self.backprojector.angles,
- axis_position=axis_position,
- detector_width=self.backprojector.num_bins,
- normalize=False,
- ctx=self.ctx,
- profile=profile
- )
- self.sino_shape = sino_shape
- self.is_cpu = self.backprojector.is_cpu
- # Arrays
- self.d_data = parray.empty(self.queue, sino_shape, dtype=np.float32)
- self.d_data.fill(0.0)
- self.d_sino = parray.empty_like(self.d_data)
- self.d_sino.fill(0.0)
- self.d_x = parray.empty(self.queue,
- self.backprojector.slice_shape,
- dtype=np.float32)
- self.d_x.fill(0.0)
- self.d_x_old = parray.empty_like(self.d_x)
- self.d_x_old.fill(0.0)
-
- self.add_to_cl_mem({
- "d_data": self.d_data,
- "d_sino": self.d_sino,
- "d_x": self.d_x,
- "d_x_old": self.d_x_old,
- })
-
- def proj(self, d_slice, d_sino):
- """
- Project d_slice to d_sino
- """
- self.projector.transfer_device_to_texture(d_slice.data) #.wait()
- self.projector.projection(dst=d_sino)
-
- def backproj(self, d_sino, d_slice):
- """
- Backproject d_sino to d_slice
- """
- self.backprojector.transfer_device_to_texture(d_sino.data) #.wait()
- self.backprojector.backprojection(dst=d_slice)
-
-
-class SIRT(ReconstructionAlgorithm):
- """
- A class for the SIRT algorithm
-
- :param sino_shape: shape of the sinogram. The sinogram is in the format
- (n_b, n_a) where n_b is the number of detector bins and
- n_a is the number of angles.
- :param slice_shape: Optional, shape of the reconstructed slice.
- By default, it is a square slice where the dimension is
- the "x dimension" of the sinogram (number of bins).
- :param axis_position: Optional, axis position. Default is `(shape[1]-1)/2.0`.
- :param angles: Optional, a list of custom angles in radian.
- :param ctx: actual working context, left to None for automatic
- initialization from device type or platformid/deviceid
- :param devicetype: type of device, can be "CPU", "GPU", "ACC" or "ALL"
- :param platformid: integer with the platform_identifier, as given by clinfo
- :param deviceid: Integer with the device identifier, as given by clinfo
- :param profile: switch on profiling to be able to profile at the kernel level,
- store profiling elements (makes code slightly slower)
-
- .. warning:: This is a beta version of the SIRT algorithm. Reconstruction
- fails for at least on CPU (Xeon E3-1245 v5) using the AMD opencl
- implementation.
- """
-
- def __init__(self, sino_shape, slice_shape=None, axis_position=None, angles=None,
- ctx=None, devicetype="all", platformid=None, deviceid=None,
- profile=False
- ):
-
- ReconstructionAlgorithm.__init__(self, sino_shape, slice_shape=slice_shape,
- axis_position=axis_position, angles=angles,
- ctx=ctx, devicetype=devicetype, platformid=platformid,
- deviceid=deviceid, profile=profile)
- self.compute_preconditioners()
-
- def compute_preconditioners(self):
- """
- Create a diagonal preconditioner for the projection and backprojection
- operator.
- Each term of the diagonal is the sum of the projector/backprojector
- along rows [1], i.e the projection/backprojection of an array of ones.
-
- [1] Jens Gregor and Thomas Benson,
- Computational Analysis and Improvement of SIRT,
- IEEE transactions on medical imaging, vol. 27, no. 7, 2008
- """
-
- # r_{i,i} = 1/(sum_j a_{i,j})
- slice_ones = np.ones(self.backprojector.slice_shape, dtype=np.float32)
- R = 1./self.projector.projection(slice_ones) # could be all done on GPU, but I want extra checks
- R[np.logical_not(np.isfinite(R))] = 1. # In the case where the rotation axis is excentred
- self.d_R = parray.to_device(self.queue, R)
- # c_{j,j} = 1/(sum_i a_{i,j})
- sino_ones = np.ones(self.sino_shape, dtype=np.float32)
- C = 1./self.backprojector.backprojection(sino_ones)
- C[np.logical_not(np.isfinite(C))] = 1. # In the case where the rotation axis is excentred
- self.d_C = parray.to_device(self.queue, C)
-
- self.add_to_cl_mem({
- "d_R": self.d_R,
- "d_C": self.d_C
- })
-
- # TODO: compute and possibly return the residual
- def run(self, data, n_it):
- """
- Run n_it iterations of the SIRT algorithm.
- """
- cl.enqueue_copy(self.queue, self.d_data.data, np.ascontiguousarray(data.astype(np.float32)))
-
- d_x_old = self.d_x_old
- d_x = self.d_x
- d_R = self.d_R
- d_C = self.d_C
- d_sino = self.d_sino
- d_x *= 0
-
- for k in range(n_it):
- d_x_old[:] = d_x[:]
- # x{k+1} = x{k} - C A^T R (A x{k} - b)
- self.proj(d_x, d_sino)
- d_sino -= self.d_data
- d_sino *= d_R
- if self.is_cpu:
- # This sync is necessary when using CPU, while it is not for GPU
- d_sino.finish()
- self.backproj(d_sino, d_x)
- d_x *= -d_C
- d_x += d_x_old
- if self.is_cpu:
- # This sync is necessary when using CPU, while it is not for GPU
- d_x.finish()
-
- return d_x
-
- __call__ = run
-
-
-class TV(ReconstructionAlgorithm):
- """
- A class for reconstruction with Total Variation regularization using the
- Chambolle-Pock TV reconstruction algorithm.
-
- :param sino_shape: shape of the sinogram. The sinogram is in the format
- (n_b, n_a) where n_b is the number of detector bins and
- n_a is the number of angles.
- :param slice_shape: Optional, shape of the reconstructed slice. By default,
- it is a square slice where the dimension is the
- "x dimension" of the sinogram (number of bins).
- :param axis_position: Optional, axis position. Default is
- `(shape[1]-1)/2.0`.
- :param angles: Optional, a list of custom angles in radian.
- :param ctx: actual working context, left to None for automatic
- initialization from device type or platformid/deviceid
- :param devicetype: type of device, can be "CPU", "GPU", "ACC" or "ALL"
- :param platformid: integer with the platform_identifier, as given by clinfo
- :param deviceid: Integer with the device identifier, as given by clinfo
- :param profile: switch on profiling to be able to profile at the kernel
- level, store profiling elements (makes code slightly slower)
-
- .. warning:: This is a beta version of the Chambolle-Pock TV algorithm.
- Reconstruction fails for at least on CPU (Xeon E3-1245 v5) using
- the AMD opencl implementation.
- """
-
- def __init__(self, sino_shape, slice_shape=None, axis_position=None, angles=None,
- ctx=None, devicetype="all", platformid=None, deviceid=None,
- profile=False
- ):
- ReconstructionAlgorithm.__init__(self, sino_shape, slice_shape=slice_shape,
- axis_position=axis_position, angles=angles,
- ctx=ctx, devicetype=devicetype, platformid=platformid,
- deviceid=deviceid, profile=profile)
- self.compute_preconditioners()
-
- # Create a LinAlg instance
- self.linalg = LinAlg(self.backprojector.slice_shape, ctx=self.ctx)
- # Positivity constraint
- self.elwise_clamp = ElementwiseKernel(self.ctx, "float *a", "a[i] = max(a[i], 0.0f);")
- # Projection onto the L-infinity ball of radius Lambda
- self.elwise_proj_linf = ElementwiseKernel(
- self.ctx,
- "float2* a, float Lambda",
- "a[i].x = copysign(min(fabs(a[i].x), Lambda), a[i].x); a[i].y = copysign(min(fabs(a[i].y), Lambda), a[i].y);",
- "elwise_proj_linf"
- )
- # Additional arrays
- self.linalg.gradient(self.d_x)
- self.d_p = parray.empty_like(self.linalg.cl_mem["d_gradient"])
- self.d_q = parray.empty_like(self.d_data)
- self.d_g = self.linalg.d_image
- self.d_tmp = parray.empty_like(self.d_x)
- self.d_p.fill(0)
- self.d_q.fill(0)
- self.d_tmp.fill(0)
- self.add_to_cl_mem({
- "d_p": self.d_p,
- "d_q": self.d_q,
- "d_tmp": self.d_tmp,
- })
-
- self.theta = 1.0
-
- def compute_preconditioners(self):
- """
- Create a diagonal preconditioner for the projection and backprojection
- operator.
- Each term of the diagonal is the sum of the projector/backprojector
- along rows [2],
- i.e the projection/backprojection of an array of ones.
-
- [2] T. Pock, A. Chambolle,
- Diagonal preconditioning for first order primal-dual algorithms in
- convex optimization,
- International Conference on Computer Vision, 2011
- """
-
- # Compute the diagonal preconditioner "Sigma"
- slice_ones = np.ones(self.backprojector.slice_shape, dtype=np.float32)
- Sigma_k = 1./self.projector.projection(slice_ones)
- Sigma_k[np.logical_not(np.isfinite(Sigma_k))] = 1.
- self.d_Sigma_k = parray.to_device(self.queue, Sigma_k)
- self.d_Sigma_kp1 = self.d_Sigma_k + 1 # TODO: memory vs computation
- self.Sigma_grad = 1/2.0 # For discrete gradient, sum|D_i,j| = 2 along lines or cols
-
- # Compute the diagonal preconditioner "Tau"
- sino_ones = np.ones(self.sino_shape, dtype=np.float32)
- C = self.backprojector.backprojection(sino_ones)
- Tau = 1./(C + 2.)
- self.d_Tau = parray.to_device(self.queue, Tau)
-
- self.add_to_cl_mem({
- "d_Sigma_k": self.d_Sigma_k,
- "d_Sigma_kp1": self.d_Sigma_kp1,
- "d_Tau": self.d_Tau
- })
-
- def run(self, data, n_it, Lambda, pos_constraint=False):
- """
- Run n_it iterations of the TV-regularized reconstruction,
- with the regularization parameter Lambda.
- """
- cl.enqueue_copy(self.queue, self.d_data.data, np.ascontiguousarray(data.astype(np.float32)))
-
- d_x = self.d_x
- d_x_old = self.d_x_old
- d_tmp = self.d_tmp
- d_sino = self.d_sino
- d_p = self.d_p
- d_q = self.d_q
- d_g = self.d_g
-
- d_x *= 0
- d_p *= 0
- d_q *= 0
-
- for k in range(0, n_it):
- # Update primal variables
- d_x_old[:] = d_x[:]
- #~ x = x + Tau*div(p) - Tau*Kadj(q)
- self.backproj(d_q, d_tmp)
- self.linalg.divergence(d_p)
- # TODO: this in less than three ops (one kernel ?)
- d_g -= d_tmp # d_g -> L.d_image
- d_g *= self.d_Tau
- d_x += d_g
-
- if pos_constraint:
- self.elwise_clamp(d_x)
-
- # Update dual variables
- #~ p = proj_linf(p + Sigma_grad*gradient(x + theta*(x - x_old)), Lambda)
- d_tmp[:] = d_x[:]
- # FIXME: mul_add is out of place, put an equivalent thing in linalg...
- #~ d_tmp.mul_add(1 + theta, d_x_old, -theta)
- d_tmp *= 1+self.theta
- d_tmp -= self.theta*d_x_old
- self.linalg.gradient(d_tmp)
- # TODO: out of place mul_add
- #~ d_p.mul_add(1, L.cl_mem["d_gradient"], Sigma_grad)
- self.linalg.cl_mem["d_gradient"] *= self.Sigma_grad
- d_p += self.linalg.cl_mem["d_gradient"]
- self.elwise_proj_linf(d_p, Lambda)
-
- #~ q = (q + Sigma_k*K(x + theta*(x - x_old)) - Sigma_k*data)/(1.0 + Sigma_k)
- self.proj(d_tmp, d_sino)
- # TODO: this in less instructions
- d_sino -= self.d_data
- d_sino *= self.d_Sigma_k
- d_q += d_sino
- d_q /= self.d_Sigma_kp1
- return d_x
-
- __call__ = run
diff --git a/silx/opencl/setup.py b/silx/opencl/setup.py
deleted file mode 100644
index 10fb1be..0000000
--- a/silx/opencl/setup.py
+++ /dev/null
@@ -1,48 +0,0 @@
-# coding: utf-8
-#
-# Copyright (C) 2016-2017 European Synchrotron Radiation Facility
-#
-# Permission is hereby granted, free of charge, to any person obtaining a copy
-# of this software and associated documentation files (the "Software"), to deal
-# in the Software without restriction, including without limitation the rights
-# to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
-# copies of the Software, and to permit persons to whom the Software is
-# furnished to do so, subject to the following conditions:
-#
-# The above copyright notice and this permission notice shall be included in
-# all copies or substantial portions of the Software.
-#
-# THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
-# IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
-# FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
-# AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
-# LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
-# OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN
-# THE SOFTWARE.
-#
-
-from __future__ import division
-
-__contact__ = "jerome.kieffer@esrf.eu"
-__license__ = "MIT"
-__copyright__ = "European Synchrotron Radiation Facility, Grenoble, France"
-__authors__ = ["J. Kieffer"]
-__date__ = "16/10/2017"
-
-import os.path
-from numpy.distutils.misc_util import Configuration
-
-
-def configuration(parent_package='', top_path=None):
- config = Configuration('opencl', parent_package, top_path)
- path = os.path.dirname(os.path.abspath(__file__))
- if os.path.exists(os.path.join(path, 'sift')):
- config.add_subpackage('sift')
- config.add_subpackage('codec')
- config.add_subpackage('test')
- return config
-
-
-if __name__ == "__main__":
- from numpy.distutils.core import setup
- setup(configuration=configuration)
diff --git a/silx/opencl/sinofilter.py b/silx/opencl/sinofilter.py
deleted file mode 100644
index d608744..0000000
--- a/silx/opencl/sinofilter.py
+++ /dev/null
@@ -1,435 +0,0 @@
-#!/usr/bin/env python
-# coding: utf-8
-# /*##########################################################################
-#
-# Copyright (c) 2016-2019 European Synchrotron Radiation Facility
-#
-# Permission is hereby granted, free of charge, to any person obtaining a copy
-# of this software and associated documentation files (the "Software"), to deal
-# in the Software without restriction, including without limitation the rights
-# to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
-# copies of the Software, and to permit persons to whom the Software is
-# furnished to do so, subject to the following conditions:
-#
-# The above copyright notice and this permission notice shall be included in
-# all copies or substantial portions of the Software.
-#
-# THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
-# IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
-# FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
-# AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
-# LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
-# OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN
-# THE SOFTWARE.
-#
-# ###########################################################################*/
-"""Module for sinogram filtering on CPU/GPU."""
-
-from __future__ import absolute_import, print_function, with_statement, division
-
-__authors__ = ["P. Paleo"]
-__license__ = "MIT"
-__date__ = "07/06/2019"
-
-import numpy as np
-from math import pi
-
-
-import pyopencl.array as parray
-from .common import pyopencl as cl
-from .processing import OpenclProcessing
-from ..math.fft.clfft import CLFFT, __have_clfft__
-from ..math.fft.npfft import NPFFT
-from ..image.tomography import generate_powers, get_next_power, compute_fourier_filter
-from ..utils.deprecation import deprecated
-
-
-
-class SinoFilter(OpenclProcessing):
- """A class for performing sinogram filtering on GPU using OpenCL.
-
- This is a convolution in the Fourier space, along one dimension:
-
- - In 2D: (n_a, d_x): n_a filterings (1D FFT of size d_x)
- - In 3D: (n_z, n_a, d_x): n_z*n_a filterings (1D FFT of size d_x)
- """
- kernel_files = ["array_utils.cl"]
- powers = generate_powers()
-
- def __init__(self, sino_shape, filter_name=None, ctx=None,
- devicetype="all", platformid=None, deviceid=None,
- profile=False, extra_options=None):
- """Constructor of OpenCL FFT-Convolve.
-
- :param sino_shape: shape of the sinogram.
- :param filter_name: Name of the filter. Defaut is "ram-lak".
- :param ctx: actual working context, left to None for automatic
- initialization from device type or platformid/deviceid
- :param devicetype: type of device, can be "CPU", "GPU", "ACC" or "ALL"
- :param platformid: integer with the platform_identifier, as given by
- clinfo
- :param deviceid: Integer with the device identifier, as given by clinfo
- :param profile: switch on profiling to be able to profile at the kernel
- level, store profiling elements (makes code slightly
- slower)
- :param dict extra_options: Advanced extra options.
- Current options are: cutoff, use_numpy_fft
- """
- OpenclProcessing.__init__(self, ctx=ctx, devicetype=devicetype,
- platformid=platformid, deviceid=deviceid,
- profile=profile)
-
- self._init_extra_options(extra_options)
- self._calculate_shapes(sino_shape)
- self._init_fft()
- self._allocate_memory()
- self._compute_filter(filter_name)
- self._init_kernels()
-
- def _calculate_shapes(self, sino_shape):
- """
-
- :param sino_shape: shape of the sinogram.
- """
- self.ndim = len(sino_shape)
- if self.ndim == 2:
- n_angles, dwidth = sino_shape
- else:
- raise ValueError("Invalid sinogram number of dimensions: "
- "expected 2 dimensions")
- self.sino_shape = sino_shape
- self.n_angles = n_angles
- self.dwidth = dwidth
- self.dwidth_padded = get_next_power(2 * self.dwidth, powers=self.powers)
- self.sino_padded_shape = (n_angles, self.dwidth_padded)
- sino_f_shape = list(self.sino_padded_shape)
- sino_f_shape[-1] = sino_f_shape[-1] // 2 + 1
- self.sino_f_shape = tuple(sino_f_shape)
-
- def _init_extra_options(self, extra_options):
- """
-
- :param dict extra_options: Advanced extra options.
- Current options are: cutoff,
- """
- self.extra_options = {
- "cutoff": 1.,
- "use_numpy_fft": False,
- }
- if extra_options is not None:
- self.extra_options.update(extra_options)
-
- def _init_fft(self):
- if __have_clfft__ and not(self.extra_options["use_numpy_fft"]):
- self.fft_backend = "opencl"
- self.fft = CLFFT(
- self.sino_padded_shape,
- dtype=np.float32,
- axes=(-1,),
- ctx=self.ctx,
- )
- else:
- self.fft_backend = "numpy"
- print("The gpyfft module was not found. The Fourier transforms "
- "will be done on CPU. For more performances, it is advised "
- "to install gpyfft.""")
- self.fft = NPFFT(
- template=np.zeros(self.sino_padded_shape, "f"),
- axes=(-1,),
- )
-
- def _allocate_memory(self):
- self.d_filter_f = parray.zeros(self.queue, (self.sino_f_shape[-1],), np.complex64)
- self.is_cpu = (self.device.type == "CPU")
- # These are already allocated by FFT() if using the opencl backend
- if self.fft_backend == "opencl":
- self.d_sino_padded = self.fft.data_in
- self.d_sino_f = self.fft.data_out
- else:
- # When using the numpy backend, arrays are not pre-allocated
- self.d_sino_padded = np.zeros(self.sino_padded_shape, "f")
- self.d_sino_f = np.zeros(self.sino_f_shape, np.complex64)
- # These are needed for rectangular memcpy in certain cases (see below).
- self.tmp_sino_device = parray.zeros(self.queue, self.sino_shape, "f")
- self.tmp_sino_host = np.zeros(self.sino_shape, "f")
-
- def _compute_filter(self, filter_name):
- """
-
- :param str filter_name: filter name
- """
- self.filter_name = filter_name or "ram-lak"
- filter_f = compute_fourier_filter(
- self.dwidth_padded,
- self.filter_name,
- cutoff=self.extra_options["cutoff"],
- )[:self.dwidth_padded // 2 + 1] # R2C
- self.set_filter(filter_f, normalize=True)
-
- def set_filter(self, h_filt, normalize=True):
- """
- Set a filter for sinogram filtering.
-
- :param h_filt: Filter. Each line of the sinogram will be filtered with
- this filter. It has to be the Real-to-Complex Fourier Transform
- of some real filter, padded to 2*sinogram_width.
- :param normalize: Whether to normalize the filter with pi/num_angles.
- """
- if h_filt.size != self.sino_f_shape[-1]:
- raise ValueError(
- """
- Invalid filter size: expected %d, got %d.
- Please check that the filter is the Fourier R2C transform of
- some real 1D filter.
- """
- % (self.sino_f_shape[-1], h_filt.size)
- )
- if not(np.iscomplexobj(h_filt)):
- print("Warning: expected a complex Fourier filter")
- self.filter_f = h_filt
- if normalize:
- self.filter_f *= pi / self.n_angles
- self.filter_f = self.filter_f.astype(np.complex64)
- self.d_filter_f[:] = self.filter_f[:]
-
- def _init_kernels(self):
- OpenclProcessing.compile_kernels(self, self.kernel_files)
- h, w = self.d_sino_f.shape
- self.mult_kern_args = (self.queue, (int(w), (int(h))), None,
- self.d_sino_f.data,
- self.d_filter_f.data,
- np.int32(w),
- np.int32(h))
-
- def check_array(self, arr):
- if arr.dtype != np.float32:
- raise ValueError("Expected data type = numpy.float32")
- if arr.shape != self.sino_shape:
- raise ValueError("Expected sinogram shape %s, got %s" %
- (self.sino_shape, arr.shape))
- if not(isinstance(arr, np.ndarray) or isinstance(arr, parray.Array)):
- raise ValueError("Expected either numpy.ndarray or "
- "pyopencl.array.Array")
-
- def copy2d(self, dst, src, transfer_shape, dst_offset=(0, 0),
- src_offset=(0, 0)):
- """
-
- :param dst:
- :param src:
- :param transfer_shape:
- :param dst_offset:
- :param src_offset:
- """
- shape = tuple(int(i) for i in transfer_shape[::-1])
- ev = self.kernels.cpy2d(self.queue, shape, None,
- dst.data,
- src.data,
- np.int32(dst.shape[1]),
- np.int32(src.shape[1]),
- np.int32(dst_offset),
- np.int32(src_offset),
- np.int32(transfer_shape[::-1]))
- ev.wait()
-
- def copy2d_host(self, dst, src, transfer_shape, dst_offset=(0, 0),
- src_offset=(0, 0)):
- """
-
- :param dst:
- :param src:
- :param transfer_shape:
- :param dst_offset:
- :param src_offset:
- """
- s = transfer_shape
- do = dst_offset
- so = src_offset
- dst[do[0]:do[0] + s[0], do[1]:do[1] + s[1]] = src[so[0]:so[0] + s[0], so[1]:so[1] + s[1]]
-
- def _prepare_input_sino(self, sino):
- """
- :param sino: sinogram
- """
- self.check_array(sino)
- self.d_sino_padded.fill(0)
- if self.fft_backend == "opencl":
- # OpenCL backend: FFT/mult/IFFT are done on device.
- if isinstance(sino, np.ndarray):
- # OpenCL backend + numpy input: copy H->D.
- # As pyopencl does not support rectangular copies, we have to
- # do a copy H->D in a temporary device buffer, and then call a
- # kernel doing the rectangular D-D copy.
- self.tmp_sino_device[:] = sino[:]
- if self.is_cpu:
- self.tmp_sino_device.finish()
- d_sino_ref = self.tmp_sino_device
- else:
- d_sino_ref = sino
- # Rectangular copy D->D
- self.copy2d(self.d_sino_padded, d_sino_ref, self.sino_shape)
- if self.is_cpu:
- self.d_sino_padded.finish() # should not be required here
- else:
- # Numpy backend: FFT/mult/IFFT are done on host.
- if not(isinstance(sino, np.ndarray)):
- # Numpy backend + pyopencl input: need to copy D->H
- self.tmp_sino_host[:] = sino[:]
- h_sino_ref = self.tmp_sino_host
- else:
- h_sino_ref = sino
- # Rectangular copy H->H
- self.copy2d_host(self.d_sino_padded, h_sino_ref, self.sino_shape)
-
- def _get_output_sino(self, output):
- """
- :param Union[numpy.dtype,None] output: sinogram output.
- :return: sinogram
- """
- if output is None:
- res = np.zeros(self.sino_shape, dtype=np.float32)
- else:
- res = output
- if self.fft_backend == "opencl":
- if isinstance(res, np.ndarray):
- # OpenCL backend + numpy output: copy D->H
- # As pyopencl does not support rectangular copies, we first have
- # to call a kernel doing rectangular copy D->D, then do a copy
- # D->H.
- self.copy2d(dst=self.tmp_sino_device,
- src=self.d_sino_padded,
- transfer_shape=self.sino_shape)
- if self.is_cpu:
- self.tmp_sino_device.finish() # should not be required here
- res[:] = self.tmp_sino_device.get()[:]
- else:
- if self.is_cpu:
- self.d_sino_padded.finish()
- self.copy2d(res, self.d_sino_padded, self.sino_shape)
- if self.is_cpu:
- res.finish() # should not be required here
- else:
- if not(isinstance(res, np.ndarray)):
- # Numpy backend + pyopencl output: rect copy H->H + copy H->D
- self.copy2d_host(dst=self.tmp_sino_host,
- src=self.d_sino_padded,
- transfer_shape=self.sino_shape)
- res[:] = self.tmp_sino_host[:]
- else:
- # Numpy backend + numpy output: rect copy H->H
- self.copy2d_host(res, self.d_sino_padded, self.sino_shape)
- return res
-
- def _do_fft(self):
- if self.fft_backend == "opencl":
- self.fft.fft(self.d_sino_padded, output=self.d_sino_f)
- if self.is_cpu:
- self.d_sino_f.finish()
- else:
- # numpy backend does not support "output=" argument,
- # and rfft always return a complex128 result.
- res = self.fft.fft(self.d_sino_padded).astype(np.complex64)
- self.d_sino_f[:] = res[:]
-
- def _multiply_fourier(self):
- if self.fft_backend == "opencl":
- # Everything is on device. Call the multiplication kernel.
- ev = self.kernels.inplace_complex_mul_2Dby1D(
- *self.mult_kern_args
- )
- ev.wait()
- if self.is_cpu:
- self.d_sino_f.finish() # should not be required here
- else:
- # Everything is on host.
- self.d_sino_f *= self.filter_f
-
- def _do_ifft(self):
- if self.fft_backend == "opencl":
- if self.is_cpu:
- self.d_sino_padded.fill(0)
- self.d_sino_padded.finish()
- self.fft.ifft(self.d_sino_f, output=self.d_sino_padded)
- if self.is_cpu:
- self.d_sino_padded.finish()
- else:
- # numpy backend does not support "output=" argument,
- # and irfft always return a float64 result.
- res = self.fft.ifft(self.d_sino_f).astype("f")
- self.d_sino_padded[:] = res[:]
-
- def filter_sino(self, sino, output=None):
- """
-
- :param sino: sinogram
- :param output:
- :return: filtered sinogram
- """
- # Handle input sinogram
- self._prepare_input_sino(sino)
- # FFT
- self._do_fft()
- # multiply with filter in the Fourier domain
- self._multiply_fourier()
- # iFFT
- self._do_ifft()
- # return
- res = self._get_output_sino(output)
- return res
- # ~ return output
-
- __call__ = filter_sino
-
-
-
-
-# -------------------
-# - Compatibility -
-# -------------------
-
-
-def nextpow2(N):
- p = 1
- while p < N:
- p *= 2
- return p
-
-
-@deprecated(replacement="Backprojection.sino_filter", since_version="0.10")
-def fourier_filter(sino, filter_=None, fft_size=None):
- """Simple np based implementation of fourier space filter.
- This function is deprecated, please use silx.opencl.sinofilter.SinoFilter.
-
- :param sino: of shape shape = (num_projs, num_bins)
- :param filter: filter function to apply in fourier space
- :fft_size: size on which perform the fft. May be larger than the sino array
- :return: filtered sinogram
- """
- assert sino.ndim == 2
- num_projs, num_bins = sino.shape
- if fft_size is None:
- fft_size = nextpow2(num_bins * 2 - 1)
- else:
- assert fft_size >= num_bins
- if fft_size == num_bins:
- sino_zeropadded = sino.astype(np.float32)
- else:
- sino_zeropadded = np.zeros((num_projs, fft_size),
- dtype=np.complex64)
- sino_zeropadded[:, :num_bins] = sino.astype(np.float32)
-
- if filter_ is None:
- h = np.zeros(fft_size, dtype=np.float32)
- L2 = fft_size // 2 + 1
- h[0] = 1 / 4.
- j = np.linspace(1, L2, L2 // 2, False)
- h[1:L2:2] = -1. / (np.pi ** 2 * j ** 2)
- h[L2:] = np.copy(h[1:L2 - 1][::-1])
- filter_ = np.fft.fft(h).astype(np.complex64)
-
- # Linear convolution
- sino_f = np.fft.fft(sino, fft_size)
- sino_f = sino_f * filter_
- sino_filtered = np.fft.ifft(sino_f)[:, :num_bins].real
-
- return np.ascontiguousarray(sino_filtered.real, dtype=np.float32)
diff --git a/silx/opencl/sparse.py b/silx/opencl/sparse.py
deleted file mode 100644
index 514589a..0000000
--- a/silx/opencl/sparse.py
+++ /dev/null
@@ -1,377 +0,0 @@
-#!/usr/bin/env python
-# coding: utf-8
-# /*##########################################################################
-#
-# Copyright (c) 2019 European Synchrotron Radiation Facility
-#
-# Permission is hereby granted, free of charge, to any person obtaining a copy
-# of this software and associated documentation files (the "Software"), to deal
-# in the Software without restriction, including without limitation the rights
-# to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
-# copies of the Software, and to permit persons to whom the Software is
-# furnished to do so, subject to the following conditions:
-#
-# The above copyright notice and this permission notice shall be included in
-# all copies or substantial portions of the Software.
-#
-# THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
-# IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
-# FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
-# AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
-# LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
-# OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN
-# THE SOFTWARE.
-#
-# ###########################################################################*/
-"""Module for data sparsification on CPU/GPU."""
-
-from __future__ import absolute_import, print_function, with_statement, division
-
-__authors__ = ["P. Paleo"]
-__license__ = "MIT"
-__date__ = "07/06/2019"
-
-import numpy
-import pyopencl.array as parray
-from collections import namedtuple
-from pyopencl.scan import GenericScanKernel
-from pyopencl.tools import dtype_to_ctype
-from .common import pyopencl as cl
-from .processing import OpenclProcessing, EventDescription, BufferDescription
-mf = cl.mem_flags
-
-
-CSRData = namedtuple("CSRData", ["data", "indices", "indptr"])
-
-def tuple_to_csrdata(arrs):
- """
- Converts a 3-tuple to a CSRData namedtuple.
- """
- if arrs is None:
- return None
- return CSRData(data=arrs[0], indices=arrs[1], indptr=arrs[2])
-
-
-
-class CSR(OpenclProcessing):
- kernel_files = ["sparse.cl"]
-
- def __init__(self, shape, dtype="f", max_nnz=None, idx_dtype=numpy.int32,
- ctx=None, devicetype="all", platformid=None, deviceid=None,
- block_size=None, memory=None, profile=False):
- """
- Compute Compressed Sparse Row format of an image (2D matrix).
- It is designed to be compatible with scipy.sparse.csr_matrix.
-
- :param shape: tuple
- Matrix shape.
- :param dtype: str or numpy.dtype, optional
- Numeric data type. By default, sparse matrix data will be float32.
- :param max_nnz: int, optional
- Maximum number of non-zero elements. By default, the arrays "data"
- and "indices" are allocated with prod(shape) elements, but
- in practice a much lesser space is needed.
- The number of non-zero items cannot be known in advance, but one can
- estimate an upper-bound with this parameter to save memory.
-
- Opencl processing parameters
- -----------------------------
- Please refer to the documentation of silx.opencl.processing.OpenclProcessing
- for information on the other parameters.
- """
-
- OpenclProcessing.__init__(self, ctx=ctx, devicetype=devicetype,
- platformid=platformid, deviceid=deviceid,
- block_size=block_size, memory=memory,
- profile=profile)
- self._set_parameters(shape, dtype, max_nnz, idx_dtype)
- self._allocate_memory()
- self._setup_kernels()
-
- # --------------------------------------------------------------------------
- # -------------------------- Initialization --------------------------------
- # --------------------------------------------------------------------------
-
- def _set_parameters(self, shape, dtype, max_nnz, idx_dtype):
- self.shape = shape
- self.size = numpy.prod(shape)
- self._set_idx_dtype(idx_dtype)
- assert len(shape) == 2 #
- if max_nnz is None:
- self.max_nnz = numpy.prod(shape) # worst case
- else:
- self.max_nnz = int(max_nnz)
- self._set_dtype(dtype)
-
-
- def _set_idx_dtype(self, idx_dtype):
- idx_dtype = numpy.dtype(idx_dtype)
- if idx_dtype.kind not in ["i", "u"]:
- raise ValueError("Not an integer type: %s" % idx_dtype)
- # scan value type must have size divisible by 4 bytes
- if idx_dtype.itemsize % 4 != 0:
- raise ValueError("Due to an internal pyopencl limitation, idx_dtype type must have size divisible by 4 bytes")
- self.indice_dtype = idx_dtype #
-
-
- def _set_dtype(self, dtype):
- self.dtype = numpy.dtype(dtype)
- if self.dtype.kind == "c":
- raise ValueError("Complex data is not supported")
- if self.dtype == numpy.dtype(numpy.float32):
- self._c_zero_str = "0.0f"
- elif self.dtype == numpy.dtype(numpy.float64):
- self._c_zero_str = "0.0"
- else: # assuming integer
- self._c_zero_str = "0"
- self.c_dtype = dtype_to_ctype(self.dtype)
- self.idx_c_dtype = dtype_to_ctype(self.indice_dtype)
-
-
- def _allocate_memory(self):
- self.is_cpu = (self.device.type == "CPU") # move to OpenclProcessing ?
- self.buffers = [
- BufferDescription("array", (self.size,), self.dtype, mf.READ_ONLY),
- BufferDescription("data", (self.max_nnz,), self.dtype, mf.READ_WRITE),
- BufferDescription("indices", (self.max_nnz,), self.indice_dtype, mf.READ_WRITE),
- BufferDescription("indptr", (self.shape[0]+1,), self.indice_dtype, mf.READ_WRITE),
- ]
- self.allocate_buffers(use_array=True)
- for arr_name in ["array", "data", "indices", "indptr"]:
- setattr(self, arr_name, self.cl_mem[arr_name])
- self.cl_mem[arr_name].fill(0) # allocate_buffers() uses empty()
- self._old_array = self.array
- self._old_data = self.data
- self._old_indices = self.indices
- self._old_indptr = self.indptr
-
-
- def _setup_kernels(self):
- self._setup_compaction_kernel()
- self._setup_decompaction_kernel()
-
-
- def _setup_compaction_kernel(self):
- kernel_signature = str(
- "__global %s *data, \
- __global %s *data_compacted, \
- __global %s *indices, \
- __global %s* indptr \
- """ % (self.c_dtype, self.c_dtype, self.idx_c_dtype, self.idx_c_dtype)
- )
- if self.dtype.kind == "f":
- map_nonzero_expr = "(fabs(data[i]) > %s) ? 1 : 0" % self._c_zero_str
- elif self.dtype.kind in ["u", "i"]:
- map_nonzero_expr = "(data[i] != %s) ? 1 : 0" % self._c_zero_str
- else:
- raise ValueError("Unknown data type")
-
- self.scan_kernel = GenericScanKernel(
- self.ctx, self.indice_dtype,
- arguments=kernel_signature,
- input_expr=map_nonzero_expr,
- scan_expr="a+b", neutral="0",
- output_statement="""
- // item is the running sum of input_expr(i), i.e the cumsum of "nonzero"
- if (prev_item != item) {
- data_compacted[item-1] = data[i];
- indices[item-1] = GET_INDEX(i);
- }
- // The last cumsum element of each line of "nonzero" goes to inptr[i]
- if ((i+1) % IMAGE_WIDTH == 0) {
- indptr[(i/IMAGE_WIDTH)+1] = item;
- }
- """,
- options=["-DIMAGE_WIDTH=%d" % self.shape[1]],
- preamble="#define GET_INDEX(i) (i % IMAGE_WIDTH)",
- )
-
-
- def _setup_decompaction_kernel(self):
- OpenclProcessing.compile_kernels(
- self,
- self.kernel_files,
- compile_options=[
- "-DIMAGE_WIDTH=%d" % self.shape[1],
- "-DDTYPE=%s" % self.c_dtype,
- "-DIDX_DTYPE=%s" % self.idx_c_dtype,
- ]
- )
- device = self.ctx.devices[0]
- wg_x = min(
- device.max_work_group_size,
- 32,
- self.kernels.max_workgroup_size("densify_csr")
- )
- self._decomp_wg = (wg_x, 1)
- self._decomp_grid = (self._decomp_wg[0], self.shape[0])
-
-
- # --------------------------------------------------------------------------
- # -------------------------- Array utils -----------------------------------
- # --------------------------------------------------------------------------
-
- # TODO handle pyopencl Buffer
- def check_array(self, arr):
- """
- Check that provided array is compatible with current context.
-
- :param arr: numpy.ndarray or pyopencl.array.Array
- 2D array in dense format.
- """
- assert arr.size == self.size
- assert arr.dtype == self.dtype
-
-
- # TODO handle pyopencl Buffer
- def check_sparse_arrays(self, csr_data):
- """
- Check that the provided sparse arrays are compatible with the current
- context.
-
- :param arrays: namedtuple CSRData.
- It contains the arrays "data", "indices", "indptr"
- """
- assert isinstance(csr_data, CSRData)
- for arr in [csr_data.data, csr_data.indices, csr_data.indptr]:
- assert arr.ndim == 1
- assert csr_data.data.size <= self.max_nnz
- assert csr_data.indices.size <= self.max_nnz
- assert csr_data.indptr.size == self.shape[0]+1
- assert csr_data.data.dtype == self.dtype
- assert csr_data.indices.dtype == self.indice_dtype
- assert csr_data.indptr.dtype == self.indice_dtype
-
-
- def set_array(self, arr):
- """
- Set the provided array as the current context 2D matrix.
-
- :param arr: numpy.ndarray or pyopencl.array.Array
- 2D array in dense format.
- """
- if arr is None:
- return
- self.check_array(arr)
- # GenericScanKernel only supports 1D data
- if isinstance(arr, parray.Array):
- self._old_array = self.array
- self.array = arr
- elif isinstance(arr, numpy.ndarray):
- self.array[:] = arr.ravel()[:]
- else:
- raise ValueError("Expected pyopencl array or numpy array")
-
-
- def set_sparse_arrays(self, csr_data):
- if csr_data is None:
- return
- self.check_sparse_arrays(csr_data)
- for name, arr in {"data": csr_data.data, "indices": csr_data.indices, "indptr": csr_data.indptr}.items():
- # The current array is a device array. Don't copy, use it directly
- if isinstance(arr, parray.Array):
- setattr(self, "_old_" + name, getattr(self, name))
- setattr(self, name, arr)
- # The current array is a numpy.ndarray: copy H2D
- elif isinstance(arr, numpy.ndarray):
- getattr(self, name)[:arr.size] = arr[:]
- else:
- raise ValueError("Unsupported array type: %s" % type(arr))
-
-
- def _recover_arrays_references(self):
- """
- Recover the previous arrays references, and return the references of the
- "current" arrays.
- """
- array = self.array
- data = self.data
- indices = self.indices
- indptr = self.indptr
- for name in ["array", "data", "indices", "indptr"]:
- # self.X = self._old_X
- setattr(self, name, getattr(self, "_old_" + name))
- return array, (data, indices, indptr)
-
-
- def get_sparse_arrays(self, output):
- """
- Get the 2D dense array of the current context.
-
- :param output: tuple or None
- tuple in the form (data, indices, indptr). These arrays have to be
- compatible with the current context (size and data type).
- The content of these arrays will be overwritten with the result of
- the previous computation.
- """
- numels = self.max_nnz
- if output is None:
- data = self.data.get()[:numels]
- ind = self.indices.get()[:numels]
- indptr = self.indptr.get()
- res = (data, ind, indptr)
- else:
- res = output
- return res
-
-
- def get_array(self, output):
- if output is None:
- res = self.array.get().reshape(self.shape)
- else:
- res = output
- return res
-
- # --------------------------------------------------------------------------
- # -------------------------- Compaction ------------------------------------
- # --------------------------------------------------------------------------
-
- def sparsify(self, arr, output=None):
- """
- Convert an image (2D matrix) into a CSR representation.
-
- :param arr: numpy.ndarray or pyopencl.array.Array
- Input array.
- :param output: tuple of pyopencl.array.Array, optional
- If provided, this must be a tuple of 3 arrays (data, indices, indptr).
- The content of each array is overwritten by the computation result.
- """
- self.set_array(arr)
- self.set_sparse_arrays(tuple_to_csrdata(output))
- evt = self.scan_kernel(
- self.array,
- self.data,
- self.indices,
- self.indptr,
- )
- #~ evt.wait()
- self.profile_add(evt, "sparsification kernel")
- res = self.get_sparse_arrays(output)
- self._recover_arrays_references()
- return res
-
- # --------------------------------------------------------------------------
- # -------------------------- Decompaction ----------------------------------
- # --------------------------------------------------------------------------
-
- def densify(self, data, indices, indptr, output=None):
- self.set_sparse_arrays(
- CSRData(data=data, indices=indices, indptr=indptr)
- )
- self.set_array(output)
- evt = self.kernels.densify_csr(
- self.queue,
- self._decomp_grid,
- self._decomp_wg,
- self.data.data,
- self.indices.data,
- self.indptr.data,
- self.array.data,
- numpy.int32(self.shape[0]),
- )
- #~ evt.wait()
- self.profile_add(evt, "desparsification kernel")
- res = self.get_array(output)
- self._recover_arrays_references()
- return res
-
diff --git a/silx/opencl/statistics.py b/silx/opencl/statistics.py
deleted file mode 100644
index a96ee33..0000000
--- a/silx/opencl/statistics.py
+++ /dev/null
@@ -1,242 +0,0 @@
-# -*- coding: utf-8 -*-
-#
-# Project: SILX
-# https://github.com/silx-kit/silx
-#
-# Copyright (C) 2012-2019 European Synchrotron Radiation Facility, Grenoble, France
-#
-# Principal author: Jérôme Kieffer (Jerome.Kieffer@ESRF.eu)
-#
-# Permission is hereby granted, free of charge, to any person obtaining a copy
-# of this software and associated documentation files (the "Software"), to deal
-# in the Software without restriction, including without limitation the rights
-# to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
-# copies of the Software, and to permit persons to whom the Software is
-# furnished to do so, subject to the following conditions:
-# .
-# The above copyright notice and this permission notice shall be included in
-# all copies or substantial portions of the Software.
-# .
-# THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
-# IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
-# FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
-# AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
-# LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
-# OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN
-# THE SOFTWARE.
-
-"""A module for performing basic statistical analysis (min, max, mean, std) on
-large data where numpy is not very efficient.
-"""
-
-__author__ = "Jerome Kieffer"
-__license__ = "MIT"
-__date__ = "19/05/2021"
-__copyright__ = "2012-2019, ESRF, Grenoble"
-__contact__ = "jerome.kieffer@esrf.fr"
-
-import logging
-import numpy
-from collections import OrderedDict, namedtuple
-from math import sqrt
-
-from .common import pyopencl
-from .processing import EventDescription, OpenclProcessing, BufferDescription
-from .utils import concatenate_cl_kernel
-
-if pyopencl:
- mf = pyopencl.mem_flags
- from pyopencl.reduction import ReductionKernel
- try:
- from pyopencl import cltypes
- except ImportError:
- v = pyopencl.array.vec()
- float8 = v.float8
- else:
- float8 = cltypes.float8
-
-else:
- raise ImportError("pyopencl is not installed")
-logger = logging.getLogger(__name__)
-
-StatResults = namedtuple("StatResults", ["min", "max", "cnt", "sum", "mean",
- "var", "std"])
-zero8 = "(float8)(FLT_MAX, -FLT_MAX, 0.0f, 0.0f, 0.0f, 0.0f, 0.0f, 0.0f)"
-# min max cnt cnt_e sum sum_e var var_e
-
-
-class Statistics(OpenclProcessing):
- """A class for doing statistical analysis using OpenCL
-
- :param List[int] size: Shape of input data to treat
- :param numpy.dtype dtype: Input data type
- :param numpy.ndarray template: Data template to extract size & dtype
- :param ctx: Actual working context, left to None for automatic
- initialization from device type or platformid/deviceid
- :param str devicetype: Type of device, can be "CPU", "GPU", "ACC" or "ALL"
- :param int platformid: Platform identifier as given by clinfo
- :param int deviceid: Device identifier as given by clinfo
- :param int block_size:
- Preferred workgroup size, may vary depending on the outcome of the compilation
- :param bool profile:
- Switch on profiling to be able to profile at the kernel level,
- store profiling elements (makes code slightly slower)
- """
- buffers = [
- BufferDescription("raw", 1, numpy.float32, mf.READ_ONLY),
- BufferDescription("converted", 1, numpy.float32, mf.READ_WRITE),
- ]
- kernel_files = ["preprocess.cl"]
- mapping = {numpy.int8: "s8_to_float",
- numpy.uint8: "u8_to_float",
- numpy.int16: "s16_to_float",
- numpy.uint16: "u16_to_float",
- numpy.uint32: "u32_to_float",
- numpy.int32: "s32_to_float"}
-
- def __init__(self, size=None, dtype=None, template=None,
- ctx=None, devicetype="all", platformid=None, deviceid=None,
- block_size=None, profile=False
- ):
- OpenclProcessing.__init__(self, ctx=ctx, devicetype=devicetype,
- platformid=platformid, deviceid=deviceid,
- block_size=block_size, profile=profile)
- self.size = size
- self.dtype = dtype
- if template is not None:
- self.size = template.size
- self.dtype = template.dtype
-
- self.buffers = [BufferDescription(i.name, i.size * self.size, i.dtype, i.flags)
- for i in self.__class__.buffers]
-
- self.allocate_buffers(use_array=True)
- self.compile_kernels()
- self.set_kernel_arguments()
-
- def set_kernel_arguments(self):
- """Parametrize all kernel arguments"""
- for val in self.mapping.values():
- self.cl_kernel_args[val] = OrderedDict(((i, self.cl_mem[i]) for i in ("raw", "converted")))
-
- def compile_kernels(self):
- """Compile the kernel"""
- OpenclProcessing.compile_kernels(self,
- self.kernel_files,
- "-D NIMAGE=%i" % self.size)
- compiler_options = self.get_compiler_options(x87_volatile=True)
- src = concatenate_cl_kernel(("doubleword.cl", "statistics.cl"))
- self.reduction_comp = ReductionKernel(self.ctx,
- dtype_out=float8,
- neutral=zero8,
- map_expr="map_statistics(data, i)",
- reduce_expr="reduce_statistics(a,b)",
- arguments="__global float *data",
- preamble=src,
- options=compiler_options)
- self.reduction_simple = ReductionKernel(self.ctx,
- dtype_out=float8,
- neutral=zero8,
- map_expr="map_statistics(data, i)",
- reduce_expr="reduce_statistics_simple(a,b)",
- arguments="__global float *data",
- preamble=src,
- options=compiler_options)
-
- if "cl_khr_fp64" in self.device.extensions:
- self.reduction_double = ReductionKernel(self.ctx,
- dtype_out=float8,
- neutral=zero8,
- map_expr="map_statistics(data, i)",
- reduce_expr="reduce_statistics_double(a,b)",
- arguments="__global float *data",
- preamble=src,
- options=compiler_options)
- else:
- logger.info("Device %s does not support double-precision arithmetics, fall-back on compensated one", self.device)
- self.reduction_double = self.reduction_comp
-
- def send_buffer(self, data, dest):
- """
- Send a numpy array to the device, including the cast on the device if
- possible
-
- :param numpy.ndarray data: numpy array with data
- :param dest: name of the buffer as registered in the class
- """
- logger.info("send data to %s", dest)
- dest_type = numpy.dtype([i.dtype for i in self.buffers if i.name == dest][0])
- events = []
- if (data.dtype == dest_type) or (data.dtype.itemsize > dest_type.itemsize):
- copy_image = pyopencl.enqueue_copy(self.queue,
- self.cl_mem[dest].data,
- numpy.ascontiguousarray(data, dest_type))
- events.append(EventDescription("copy H->D %s" % dest, copy_image))
- else:
- copy_image = pyopencl.enqueue_copy(self.queue,
- self.cl_mem["raw"].data,
- numpy.ascontiguousarray(data))
- kernel = getattr(self.program, self.mapping[data.dtype.type])
- cast_to_float = kernel(self.queue,
- (self.size,),
- None,
- self.cl_mem["raw"].data,
- self.cl_mem[dest].data)
- events += [
- EventDescription("copy H->D raw", copy_image),
- EventDescription(f"cast to float {dest}", cast_to_float)
- ]
- if self.profile:
- self.events += events
- return events
-
- def process(self, data, comp=True):
- """Actually calculate the statics on the data
-
- :param numpy.ndarray data: numpy array with the image
- :param comp: use Kahan compensated arithmetics for the calculation
- :return: Statistics named tuple
- :rtype: StatResults
- """
- if data.ndim != 1:
- data = data.ravel()
- size = data.size
- assert size <= self.size, "size is OK"
- events = []
- if comp is True:
- comp = "comp"
- elif comp is False:
- comp = "single"
- else:
- comp = comp.lower()
- with self.sem:
- self.send_buffer(data, "converted")
- if comp in ("single", "fp32", "float32"):
- reduction = self.reduction_simple
- elif comp in ("double", "fp64", "float64"):
- reduction = self.reduction_double
- else:
- reduction = self.reduction_comp
- res_d, evt = reduction(self.cl_mem["converted"][:self.size],
- queue=self.queue,
- return_event=True)
- events.append(EventDescription(f"statistical reduction {comp}", evt))
- if self.profile:
- self.events += events
- res_h = res_d.get()
- min_ = 1.0 * res_h["s0"]
- max_ = 1.0 * res_h["s1"]
- count = 1.0 * res_h["s2"] + res_h["s3"]
- sum_ = 1.0 * res_h["s4"] + res_h["s5"]
- m2 = 1.0 * res_h["s6"] + res_h["s7"]
- var = m2 / (count - 1.0)
- res = StatResults(min_,
- max_,
- count,
- sum_,
- sum_ / count,
- var,
- sqrt(var))
- return res
-
- __call__ = process
diff --git a/silx/opencl/test/__init__.py b/silx/opencl/test/__init__.py
deleted file mode 100644
index 928dbaf..0000000
--- a/silx/opencl/test/__init__.py
+++ /dev/null
@@ -1,68 +0,0 @@
-# -*- coding: utf-8 -*-
-#
-# Project: silx
-# https://github.com/silx-kit/silx
-#
-# Copyright (C) 2012-2016 European Synchrotron Radiation Facility, Grenoble, France
-# Permission is hereby granted, free of charge, to any person obtaining a copy
-# of this software and associated documentation files (the "Software"), to deal
-# in the Software without restriction, including without limitation the rights
-# to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
-# copies of the Software, and to permit persons to whom the Software is
-# furnished to do so, subject to the following conditions:
-#
-# The above copyright notice and this permission notice shall be included in
-# all copies or substantial portions of the Software.
-#
-# THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
-# IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
-# FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
-# AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
-# LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
-# OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN
-# THE SOFTWARE.
-
-__authors__ = ["J. Kieffer"]
-__license__ = "MIT"
-__date__ = "17/05/2021"
-
-import os
-import unittest
-from . import test_addition
-from . import test_medfilt
-from . import test_backprojection
-from . import test_projection
-from . import test_linalg
-from . import test_array_utils
-from ..codec import test as test_codec
-from . import test_image
-from . import test_kahan
-from . import test_doubleword
-from . import test_stats
-from . import test_convolution
-from . import test_sparse
-
-
-def suite():
- test_suite = unittest.TestSuite()
- test_suite.addTests(test_addition.suite())
- test_suite.addTests(test_medfilt.suite())
- test_suite.addTests(test_backprojection.suite())
- test_suite.addTests(test_projection.suite())
- test_suite.addTests(test_linalg.suite())
- test_suite.addTests(test_array_utils.suite())
- test_suite.addTests(test_codec.suite())
- test_suite.addTests(test_image.suite())
- test_suite.addTests(test_kahan.suite())
- test_suite.addTests(test_doubleword.suite())
- test_suite.addTests(test_stats.suite())
- test_suite.addTests(test_convolution.suite())
- test_suite.addTests(test_sparse.suite())
- # Allow to remove sift from the project
- test_base_dir = os.path.dirname(__file__)
- sift_dir = os.path.join(test_base_dir, "..", "sift")
- if os.path.exists(sift_dir):
- from ..sift import test as test_sift
- test_suite.addTests(test_sift.suite())
-
- return test_suite
diff --git a/silx/opencl/test/test_addition.py b/silx/opencl/test/test_addition.py
deleted file mode 100644
index 19dfdf0..0000000
--- a/silx/opencl/test/test_addition.py
+++ /dev/null
@@ -1,154 +0,0 @@
-#!/usr/bin/env python
-# -*- coding: utf-8 -*-
-#
-# Project: Sift implementation in Python + OpenCL
-# https://github.com/silx-kit/silx
-#
-# Permission is hereby granted, free of charge, to any person
-# obtaining a copy of this software and associated documentation
-# files (the "Software"), to deal in the Software without
-# restriction, including without limitation the rights to use,
-# copy, modify, merge, publish, distribute, sublicense, and/or sell
-# copies of the Software, and to permit persons to whom the
-# Software is furnished to do so, subject to the following
-# conditions:
-#
-# The above copyright notice and this permission notice shall be
-# included in all copies or substantial portions of the Software.
-#
-# THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
-# EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES
-# OF MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
-# NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT
-# HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY,
-# WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
-# FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR
-# OTHER DEALINGS IN THE SOFTWARE.
-
-"""
-Simple test of an addition
-"""
-
-__authors__ = ["Henri Payno, Jérôme Kieffer"]
-__contact__ = "jerome.kieffer@esrf.eu"
-__license__ = "MIT"
-__copyright__ = "2013 European Synchrotron Radiation Facility, Grenoble, France"
-__date__ = "30/11/2020"
-
-import logging
-import numpy
-
-import unittest
-from ..common import ocl, _measure_workgroup_size, query_kernel_info
-if ocl:
- import pyopencl
- import pyopencl.array
-from ..utils import get_opencl_code
-logger = logging.getLogger(__name__)
-
-
-@unittest.skipUnless(ocl, "PyOpenCl is missing")
-class TestAddition(unittest.TestCase):
-
- @classmethod
- def setUpClass(cls):
- super(TestAddition, cls).setUpClass()
- if ocl:
- cls.ctx = ocl.create_context()
- if logger.getEffectiveLevel() <= logging.INFO:
- cls.PROFILE = True
- cls.queue = pyopencl.CommandQueue(
- cls.ctx,
- properties=pyopencl.command_queue_properties.PROFILING_ENABLE)
- else:
- cls.PROFILE = False
- cls.queue = pyopencl.CommandQueue(cls.ctx)
- cls.max_valid_wg = 0
-
- @classmethod
- def tearDownClass(cls):
- super(TestAddition, cls).tearDownClass()
- print("Maximum valid workgroup size %s on device %s" % (cls.max_valid_wg, cls.ctx.devices[0]))
- cls.ctx = None
- cls.queue = None
-
- def setUp(self):
- if ocl is None:
- return
- self.shape = 4096
- self.data = numpy.random.random(self.shape).astype(numpy.float32)
- self.d_array_img = pyopencl.array.to_device(self.queue, self.data)
- self.d_array_5 = pyopencl.array.empty_like(self.d_array_img)
- self.d_array_5.fill(-5)
- self.program = pyopencl.Program(self.ctx, get_opencl_code("addition")).build()
-
- def tearDown(self):
- self.img = self.data = None
- self.d_array_img = self.d_array_5 = self.program = None
-
- @unittest.skipUnless(ocl, "pyopencl is missing")
- def test_add(self):
- """
- tests the addition kernel
- """
- maxi = int(round(numpy.log2(self.shape)))
- for i in range(maxi):
- d_array_result = pyopencl.array.empty_like(self.d_array_img)
- wg = 1 << i
- try:
- evt = self.program.addition(self.queue, (self.shape,), (wg,),
- self.d_array_img.data, self.d_array_5.data, d_array_result.data, numpy.int32(self.shape))
- evt.wait()
- except Exception as error:
- max_valid_wg = self.program.addition.get_work_group_info(pyopencl.kernel_work_group_info.WORK_GROUP_SIZE, self.ctx.devices[0])
- msg = "Error %s on WG=%s: %s" % (error, wg, max_valid_wg)
- self.assertLess(max_valid_wg, wg, msg)
- break
- else:
- res = d_array_result.get()
- good = numpy.allclose(res, self.data - 5)
- if good and wg > self.max_valid_wg:
- self.__class__.max_valid_wg = wg
- self.assertTrue(good, "calculation is correct for WG=%s" % wg)
-
- @unittest.skipUnless(ocl, "pyopencl is missing")
- def test_measurement(self):
- """
- tests that all devices are working properly ... lengthy and error prone
- """
- for platform in ocl.platforms:
- for did, device in enumerate(platform.devices):
- meas = _measure_workgroup_size((platform.id, device.id))
- self.assertEqual(meas, device.max_work_group_size,
- "Workgroup size for %s/%s: %s == %s" % (platform, device, meas, device.max_work_group_size))
-
- @unittest.skipUnless(ocl, "pyopencl is missing")
- def test_query(self):
- """
- tests that all devices are working properly ... lengthy and error prone
- """
- for what in ("COMPILE_WORK_GROUP_SIZE",
- "LOCAL_MEM_SIZE",
- "PREFERRED_WORK_GROUP_SIZE_MULTIPLE",
- "PRIVATE_MEM_SIZE",
- "WORK_GROUP_SIZE"):
- logger.info("%s: %s", what, query_kernel_info(program=self.program, kernel="addition", what=what))
-
- # Not all ICD work properly ....
- #self.assertEqual(3, len(query_kernel_info(program=self.program, kernel="addition", what="COMPILE_WORK_GROUP_SIZE")), "3D kernel")
-
- min_wg = query_kernel_info(program=self.program, kernel="addition", what="PREFERRED_WORK_GROUP_SIZE_MULTIPLE")
- max_wg = query_kernel_info(program=self.program, kernel="addition", what="WORK_GROUP_SIZE")
- self.assertEqual(max_wg % min_wg, 0, msg="max_wg is a multiple of min_wg")
-
-
-def suite():
- testSuite = unittest.TestSuite()
- testSuite.addTest(TestAddition("test_add"))
- # testSuite.addTest(TestAddition("test_measurement"))
- testSuite.addTest(TestAddition("test_query"))
- return testSuite
-
-
-if __name__ == '__main__':
- unittest.main(defaultTest="suite")
diff --git a/silx/opencl/test/test_array_utils.py b/silx/opencl/test/test_array_utils.py
deleted file mode 100644
index 833d828..0000000
--- a/silx/opencl/test/test_array_utils.py
+++ /dev/null
@@ -1,161 +0,0 @@
-#!/usr/bin/env python
-# coding: utf-8
-# /*##########################################################################
-#
-# Copyright (c) 2016 European Synchrotron Radiation Facility
-#
-# Permission is hereby granted, free of charge, to any person obtaining a copy
-# of this software and associated documentation files (the "Software"), to deal
-# in the Software without restriction, including without limitation the rights
-# to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
-# copies of the Software, and to permit persons to whom the Software is
-# furnished to do so, subject to the following conditions:
-#
-# The above copyright notice and this permission notice shall be included in
-# all copies or substantial portions of the Software.
-#
-# THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
-# IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
-# FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
-# AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
-# LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
-# OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN
-# THE SOFTWARE.
-#
-# ###########################################################################*/
-"""Test of the OpenCL array_utils"""
-
-from __future__ import division, print_function
-
-__authors__ = ["Pierre paleo"]
-__license__ = "MIT"
-__copyright__ = "2013-2017 European Synchrotron Radiation Facility, Grenoble, France"
-__date__ = "14/06/2017"
-
-
-import time
-import logging
-import numpy as np
-import unittest
-try:
- import mako
-except ImportError:
- mako = None
-from ..common import ocl
-if ocl:
- import pyopencl as cl
- import pyopencl.array as parray
- from .. import linalg
-from ..utils import get_opencl_code
-from silx.test.utils import utilstest
-
-logger = logging.getLogger(__name__)
-try:
- from scipy.ndimage.filters import laplace
- _has_scipy = True
-except ImportError:
- _has_scipy = False
-
-
-
-@unittest.skipUnless(ocl and mako, "PyOpenCl is missing")
-class TestCpy2d(unittest.TestCase):
-
- def setUp(self):
- if ocl is None:
- return
- self.ctx = ocl.create_context()
- if logger.getEffectiveLevel() <= logging.INFO:
- self.PROFILE = True
- self.queue = cl.CommandQueue(
- self.ctx,
- properties=cl.command_queue_properties.PROFILING_ENABLE)
- else:
- self.PROFILE = False
- self.queue = cl.CommandQueue(self.ctx)
- self.allocate_arrays()
- self.program = cl.Program(self.ctx, get_opencl_code("array_utils")).build()
-
- def allocate_arrays(self):
- """
- Allocate various types of arrays for the tests
- """
- self.prng_state = np.random.get_state()
- # Generate arrays of random shape
- self.shape1 = np.random.randint(20, high=512, size=(2,))
- self.shape2 = np.random.randint(20, high=512, size=(2,))
- self.array1 = np.random.rand(*self.shape1).astype(np.float32)
- self.array2 = np.random.rand(*self.shape2).astype(np.float32)
- self.d_array1 = parray.to_device(self.queue, self.array1)
- self.d_array2 = parray.to_device(self.queue, self.array2)
- # Generate random offsets
- offset1_y = np.random.randint(2, high=min(self.shape1[0], self.shape2[0]) - 10)
- offset1_x = np.random.randint(2, high=min(self.shape1[1], self.shape2[1]) - 10)
- offset2_y = np.random.randint(2, high=min(self.shape1[0], self.shape2[0]) - 10)
- offset2_x = np.random.randint(2, high=min(self.shape1[1], self.shape2[1]) - 10)
- self.offset1 = (offset1_y, offset1_x)
- self.offset2 = (offset2_y, offset2_x)
- # Compute the size of the rectangle to transfer
- size_y = np.random.randint(2, high=min(self.shape1[0], self.shape2[0]) - max(offset1_y, offset2_y) + 1)
- size_x = np.random.randint(2, high=min(self.shape1[1], self.shape2[1]) - max(offset1_x, offset2_x) + 1)
- self.transfer_shape = (size_y, size_x)
-
- def tearDown(self):
- self.array1 = None
- self.array2 = None
- self.d_array1.data.release()
- self.d_array2.data.release()
- self.d_array1 = None
- self.d_array2 = None
- self.ctx = None
- self.queue = None
-
- def compare(self, result, reference):
- errmax = np.max(np.abs(result - reference))
- logger.info("Max error = %e" % (errmax))
- self.assertTrue(errmax == 0, str("Max error is too high"))#. PRNG state was %s" % str(self.prng_state)))
-
- @unittest.skipUnless(ocl and mako, "pyopencl is missing")
- def test_cpy2d(self):
- """
- Test rectangular transfer of self.d_array1 to self.d_array2
- """
- # Reference
- o1 = self.offset1
- o2 = self.offset2
- T = self.transfer_shape
- logger.info("""Testing D->D rectangular copy with (N1_y, N1_x) = %s,
- (N2_y, N2_x) = %s:
- array2[%d:%d, %d:%d] = array1[%d:%d, %d:%d]""" %
- (
- str(self.shape1), str(self.shape2),
- o2[0], o2[0] + T[0],
- o2[1], o2[1] + T[1],
- o1[0], o1[0] + T[0],
- o1[1], o1[1] + T[1]
- )
- )
- self.array2[o2[0]:o2[0] + T[0], o2[1]:o2[1] + T[1]] = self.array1[o1[0]:o1[0] + T[0], o1[1]:o1[1] + T[1]]
- kernel_args = (
- self.d_array2.data,
- self.d_array1.data,
- np.int32(self.shape2[1]),
- np.int32(self.shape1[1]),
- np.int32(self.offset2[::-1]),
- np.int32(self.offset1[::-1]),
- np.int32(self.transfer_shape[::-1])
- )
- wg = None
- ndrange = self.transfer_shape[::-1]
- self.program.cpy2d(self.queue, ndrange, wg, *kernel_args)
- res = self.d_array2.get()
- self.compare(res, self.array2)
-
-
-def suite():
- testSuite = unittest.TestSuite()
- testSuite.addTest(TestCpy2d("test_cpy2d"))
- return testSuite
-
-if __name__ == '__main__':
- unittest.main(defaultTest="suite")
diff --git a/silx/opencl/test/test_backprojection.py b/silx/opencl/test/test_backprojection.py
deleted file mode 100644
index 9dfdd3a..0000000
--- a/silx/opencl/test/test_backprojection.py
+++ /dev/null
@@ -1,231 +0,0 @@
-#!/usr/bin/env python
-# coding: utf-8
-# /*##########################################################################
-#
-# Copyright (c) 2016 European Synchrotron Radiation Facility
-#
-# Permission is hereby granted, free of charge, to any person obtaining a copy
-# of this software and associated documentation files (the "Software"), to deal
-# in the Software without restriction, including without limitation the rights
-# to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
-# copies of the Software, and to permit persons to whom the Software is
-# furnished to do so, subject to the following conditions:
-#
-# The above copyright notice and this permission notice shall be included in
-# all copies or substantial portions of the Software.
-#
-# THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
-# IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
-# FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
-# AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
-# LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
-# OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN
-# THE SOFTWARE.
-#
-# ###########################################################################*/
-"""Test of the filtered backprojection module"""
-
-from __future__ import division, print_function
-
-__authors__ = ["Pierre paleo"]
-__license__ = "MIT"
-__copyright__ = "2013-2017 European Synchrotron Radiation Facility, Grenoble, France"
-__date__ = "19/01/2018"
-
-
-import time
-import logging
-import numpy as np
-import unittest
-from math import pi
-try:
- import mako
-except ImportError:
- mako = None
-from ..common import ocl
-if ocl:
- from .. import backprojection
- from ...image.tomography import compute_fourier_filter
-from silx.test.utils import utilstest
-
-logger = logging.getLogger(__name__)
-
-
-def generate_coords(img_shp, center=None):
- """
- Return two 2D arrays containing the indexes of an image.
- The zero is at the center of the image.
- """
- l_r, l_c = float(img_shp[0]), float(img_shp[1])
- R, C = np.mgrid[:l_r, :l_c]
- if center is None:
- center0, center1 = l_r / 2., l_c / 2.
- else:
- center0, center1 = center
- R = R + 0.5 - center0
- C = C + 0.5 - center1
- return R, C
-
-
-def clip_circle(img, center=None, radius=None):
- """
- Puts zeros outside the inscribed circle of the image support.
- """
- R, C = generate_coords(img.shape, center)
- M = R * R + C * C
- res = np.zeros_like(img)
- if radius is None:
- radius = img.shape[0] / 2. - 1
- mask = M < radius * radius
- res[mask] = img[mask]
- return res
-
-
-@unittest.skipUnless(ocl and mako, "PyOpenCl is missing")
-class TestFBP(unittest.TestCase):
-
- def setUp(self):
- if ocl is None:
- return
- self.getfiles()
- self.fbp = backprojection.Backprojection(self.sino.shape, profile=True)
- if self.fbp.compiletime_workgroup_size < 16 * 16:
- self.skipTest("Current implementation of OpenCL backprojection is "
- "not supported on this platform yet")
- # Astra does not use the same backprojector implementation.
- # Therefore, we cannot expect results to be the "same" (up to float32
- # numerical error)
- self.tol = 5e-2
- if not(self.fbp._use_textures) or self.fbp.device.type == "CPU":
- # Precision is less when using CPU
- # (either CPU textures or "manual" linear interpolation)
- self.tol *= 2
-
- def tearDown(self):
- self.sino = None
- # self.fbp.log_profile()
- self.fbp = None
-
- def getfiles(self):
- # load sinogram of 512x512 MRI phantom
- self.sino = np.load(utilstest.getfile("sino500.npz"))["data"]
- # load reconstruction made with ASTRA FBP (with filter designed in spatial domain)
- self.reference_rec = np.load(utilstest.getfile("rec_astra_500.npz"))["data"]
-
- def measure(self):
- "Common measurement of timings"
- t1 = time.time()
- try:
- result = self.fbp.filtered_backprojection(self.sino)
- except RuntimeError as msg:
- logger.error(msg)
- return
- t2 = time.time()
- return t2 - t1, result
-
- def compare(self, res):
- """
- Compare a result with the reference reconstruction.
- Only the valid reconstruction zone (inscribed circle) is taken into
- account
- """
- res_clipped = clip_circle(res)
- ref_clipped = clip_circle(self.reference_rec)
- delta = abs(res_clipped - ref_clipped)
- bad = delta > 1
- logger.debug("Absolute difference: %s with %s outlier pixels out of %s"
- "", delta.max(), bad.sum(), np.prod(bad.shape))
- return delta.max()
-
- @unittest.skipUnless(ocl and mako, "pyopencl is missing")
- def test_fbp(self):
- """
- tests FBP
- """
- # Test single reconstruction
- # --------------------------
- t, res = self.measure()
- if t is None:
- logger.info("test_fp: skipped")
- else:
- logger.info("test_backproj: time = %.3fs" % t)
- err = self.compare(res)
- msg = str("Max error = %e" % err)
- logger.info(msg)
- self.assertTrue(err < self.tol, "Max error is too high")
-
- # Test multiple reconstructions
- # -----------------------------
- res0 = np.copy(res)
- for i in range(10):
- res = self.fbp.filtered_backprojection(self.sino)
- errmax = np.max(np.abs(res - res0))
- self.assertTrue(errmax < 1.e-6, "Max error is too high")
-
- @unittest.skipUnless(ocl and mako, "pyopencl is missing")
- def test_fbp_filters(self):
- """
- Test the different available filters of silx FBP.
- """
- avail_filters = [
- "ramlak", "shepp-logan", "cosine", "hamming",
- "hann"
- ]
- # Create a Dirac delta function at a single angle view.
- # As the filters are radially invarant:
- # - backprojection yields an image where each line is a Dirac.
- # - FBP yields an image where each line is the spatial filter
- # One can simply filter "dirac" without backprojecting it, but this
- # test will also ensure that backprojection behaves well.
- dirac = np.zeros_like(self.sino)
- na, dw = dirac.shape
- dirac[0, dw//2] = na / pi * 2
-
- for filter_name in avail_filters:
- B = backprojection.Backprojection(dirac.shape, filter_name=filter_name)
- r = B(dirac)
- # Check that radial invariance is kept
- std0 = np.max(np.abs(np.std(r, axis=0)))
- self.assertTrue(
- std0 < 5.e-6,
- "Something wrong with FBP(filter=%s)" % filter_name
- )
- # Check that the filter is retrieved
- r_f = np.fft.fft(np.fft.fftshift(r[0])).real / 2. # filter factor
- ref_filter_f = compute_fourier_filter(dw, filter_name)
- errmax = np.max(np.abs(r_f - ref_filter_f))
- logger.info("FBP filter %s: max error=%e" % (filter_name, errmax))
- self.assertTrue(
- errmax < 1.e-3,
- "Something wrong with FBP(filter=%s)" % filter_name
- )
-
- @unittest.skipUnless(ocl and mako, "pyopencl is missing")
- def test_fbp_oddsize(self):
- # Generate a 513-sinogram.
- # The padded width will be nextpow(513*2).
- # silx [0.10, 0.10.1] will give 1029, which makes R2C transform fail.
- sino = np.pad(self.sino, ((0, 0), (1, 0)), mode='edge')
- B = backprojection.Backprojection(sino.shape, axis_position=self.fbp.axis_pos+1)
- res = B(sino)
- # Compare with self.reference_rec. Tolerance is high as backprojector
- # is not fully shift-invariant.
- errmax = np.max(np.abs(clip_circle(res[1:, 1:] - self.reference_rec)))
- self.assertLess(
- errmax, 1.e-1,
- "Something wrong with FBP on odd-sized sinogram"
- )
-
-
-
-
-def suite():
- testSuite = unittest.TestSuite()
- testSuite.addTest(TestFBP("test_fbp"))
- testSuite.addTest(TestFBP("test_fbp_filters"))
- testSuite.addTest(TestFBP("test_fbp_oddsize"))
- return testSuite
-
-
-if __name__ == '__main__':
- unittest.main(defaultTest="suite")
diff --git a/silx/opencl/test/test_convolution.py b/silx/opencl/test/test_convolution.py
deleted file mode 100644
index 7bceb0d..0000000
--- a/silx/opencl/test/test_convolution.py
+++ /dev/null
@@ -1,265 +0,0 @@
-#!/usr/bin/env python
-# coding: utf-8
-# /*##########################################################################
-#
-# Copyright (c) 2019 European Synchrotron Radiation Facility
-#
-# Permission is hereby granted, free of charge, to any person obtaining a copy
-# of this software and associated documentation files (the "Software"), to deal
-# in the Software without restriction, including without limitation the rights
-# to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
-# copies of the Software, and to permit persons to whom the Software is
-# furnished to do so, subject to the following conditions:
-#
-# The above copyright notice and this permission notice shall be included in
-# all copies or substantial portions of the Software.
-#
-# THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
-# IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
-# FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
-# AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
-# LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
-# OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN
-# THE SOFTWARE.
-#
-# ###########################################################################*/
-
-"""
-Test of the Convolution class.
-"""
-
-from __future__ import division, print_function
-
-__authors__ = ["Pierre Paleo"]
-__contact__ = "pierre.paleo@esrf.fr"
-__license__ = "MIT"
-__copyright__ = "2019 European Synchrotron Radiation Facility, Grenoble, France"
-__date__ = "01/08/2019"
-
-import logging
-from itertools import product
-import numpy as np
-from silx.utils.testutils import parameterize
-from silx.image.utils import gaussian_kernel
-
-try:
- from scipy.ndimage import convolve, convolve1d
- from scipy.misc import ascent
-
- scipy_convolve = convolve
- scipy_convolve1d = convolve1d
-except ImportError:
- scipy_convolve = None
-import unittest
-from ..common import ocl, check_textures_availability
-
-if ocl:
- import pyopencl as cl
- import pyopencl.array as parray
- from silx.opencl.convolution import Convolution
-logger = logging.getLogger(__name__)
-
-
-@unittest.skipUnless(ocl and scipy_convolve, "PyOpenCl/scipy is missing")
-class TestConvolution(unittest.TestCase):
- @classmethod
- def setUpClass(cls):
- super(TestConvolution, cls).setUpClass()
- cls.image = np.ascontiguousarray(ascent()[:, :511], dtype="f")
- cls.data1d = cls.image[0]
- cls.data2d = cls.image
- cls.data3d = np.tile(cls.image[224:-224, 224:-224], (62, 1, 1))
- cls.kernel1d = gaussian_kernel(1.0)
- cls.kernel2d = np.outer(cls.kernel1d, cls.kernel1d)
- cls.kernel3d = np.multiply.outer(cls.kernel2d, cls.kernel1d)
- cls.ctx = ocl.create_context()
- cls.tol = {
- "1D": 1e-4,
- "2D": 1e-3,
- "3D": 1e-3,
- }
-
- @classmethod
- def tearDownClass(cls):
- cls.data1d = cls.data2d = cls.data3d = cls.image = None
- cls.kernel1d = cls.kernel2d = cls.kernel3d = None
-
- @staticmethod
- def compare(arr1, arr2):
- return np.max(np.abs(arr1 - arr2))
-
- @staticmethod
- def print_err(conv):
- errmsg = str(
- """
- Something wrong with %s
- mode=%s, texture=%s
- """
- % (conv.use_case_desc, conv.mode, conv.use_textures)
- )
- return errmsg
-
- def __init__(self, methodName="runTest", param=None):
- unittest.TestCase.__init__(self, methodName)
- self.param = param
- self.mode = param["boundary_handling"]
- logger.debug(
- """
- Testing convolution with boundary_handling=%s,
- use_textures=%s, input_device=%s, output_device=%s
- """
- % (
- self.mode,
- param["use_textures"],
- param["input_on_device"],
- param["output_on_device"],
- )
- )
-
- def instantiate_convol(self, shape, kernel, axes=None):
- if self.mode == "constant":
- if not (self.param["use_textures"]) or (
- self.param["use_textures"]
- and not (check_textures_availability(self.ctx))
- ):
- self.skipTest("mode=constant not implemented without textures")
- C = Convolution(
- shape,
- kernel,
- mode=self.mode,
- ctx=self.ctx,
- axes=axes,
- extra_options={"dont_use_textures": not (self.param["use_textures"])},
- )
- return C
-
- def get_data_and_kernel(self, test_name):
- dims = {
- "test_1D": (1, 1),
- "test_separable_2D": (2, 1),
- "test_separable_3D": (3, 1),
- "test_nonseparable_2D": (2, 2),
- "test_nonseparable_3D": (3, 3),
- }
- dim_data = {1: self.data1d, 2: self.data2d, 3: self.data3d}
- dim_kernel = {
- 1: self.kernel1d,
- 2: self.kernel2d,
- 3: self.kernel3d,
- }
- dd, kd = dims[test_name]
- return dim_data[dd], dim_kernel[kd]
-
- def get_reference_function(self, test_name):
- ref_func = {
- "test_1D": lambda x, y: scipy_convolve1d(x, y, mode=self.mode),
- "test_separable_2D": lambda x, y: scipy_convolve1d(
- scipy_convolve1d(x, y, mode=self.mode, axis=1),
- y,
- mode=self.mode,
- axis=0,
- ),
- "test_separable_3D": lambda x, y: scipy_convolve1d(
- scipy_convolve1d(
- scipy_convolve1d(x, y, mode=self.mode, axis=2),
- y,
- mode=self.mode,
- axis=1,
- ),
- y,
- mode=self.mode,
- axis=0,
- ),
- "test_nonseparable_2D": lambda x, y: scipy_convolve(x, y, mode=self.mode),
- "test_nonseparable_3D": lambda x, y: scipy_convolve(x, y, mode=self.mode),
- }
- return ref_func[test_name]
-
- def template_test(self, test_name):
- data, kernel = self.get_data_and_kernel(test_name)
- conv = self.instantiate_convol(data.shape, kernel)
- if self.param["input_on_device"]:
- data_ref = parray.to_device(conv.queue, data)
- else:
- data_ref = data
- if self.param["output_on_device"]:
- d_res = parray.empty_like(conv.data_out)
- d_res.fill(0)
- res = d_res
- else:
- res = None
- res = conv(data_ref, output=res)
- if self.param["output_on_device"]:
- res = res.get()
- ref_func = self.get_reference_function(test_name)
- ref = ref_func(data, kernel)
- metric = self.compare(res, ref)
- logger.info("%s: max error = %.2e" % (test_name, metric))
- tol = self.tol[str("%dD" % kernel.ndim)]
- self.assertLess(metric, tol, self.print_err(conv))
-
- def test_1D(self):
- self.template_test("test_1D")
-
- def test_separable_2D(self):
- self.template_test("test_separable_2D")
-
- def test_separable_3D(self):
- self.template_test("test_separable_3D")
-
- def test_nonseparable_2D(self):
- self.template_test("test_nonseparable_2D")
-
- def test_nonseparable_3D(self):
- self.template_test("test_nonseparable_3D")
-
- def test_batched_2D(self):
- """
- Test batched (nonseparable) 2D convolution on 3D data.
- In this test: batch along "z" (axis 0)
- """
- data = self.data3d
- kernel = self.kernel2d
- conv = self.instantiate_convol(data.shape, kernel, axes=(0,))
- res = conv(data) # 3D
- ref = scipy_convolve(data[0], kernel, mode=self.mode) # 2D
-
- std = np.std(res, axis=0)
- std_max = np.max(np.abs(std))
- self.assertLess(std_max, self.tol["2D"], self.print_err(conv))
- metric = self.compare(res[0], ref)
- logger.info("test_nonseparable_3D: max error = %.2e" % metric)
- self.assertLess(metric, self.tol["2D"], self.print_err(conv))
-
-
-def test_convolution():
- boundary_handling_ = ["reflect", "nearest", "wrap", "constant"]
- use_textures_ = [True, False]
- input_on_device_ = [True, False]
- output_on_device_ = [True, False]
- testSuite = unittest.TestSuite()
-
- param_vals = list(
- product(boundary_handling_, use_textures_, input_on_device_, output_on_device_)
- )
- for boundary_handling, use_textures, input_dev, output_dev in param_vals:
- testcase = parameterize(
- TestConvolution,
- param={
- "boundary_handling": boundary_handling,
- "input_on_device": input_dev,
- "output_on_device": output_dev,
- "use_textures": use_textures,
- },
- )
- testSuite.addTest(testcase)
- return testSuite
-
-
-def suite():
- testSuite = test_convolution()
- return testSuite
-
-
-if __name__ == "__main__":
- unittest.main(defaultTest="suite")
diff --git a/silx/opencl/test/test_doubleword.py b/silx/opencl/test/test_doubleword.py
deleted file mode 100644
index ca947e0..0000000
--- a/silx/opencl/test/test_doubleword.py
+++ /dev/null
@@ -1,258 +0,0 @@
-#!/usr/bin/env python
-# coding: utf-8
-#
-# Project: The silx project
-# https://github.com/silx-kit/silx
-#
-# Copyright (C) 2021-2021 European Synchrotron Radiation Facility, Grenoble, France
-#
-# Principal author: Jérôme Kieffer (Jerome.Kieffer@ESRF.eu)
-#
-# Permission is hereby granted, free of charge, to any person obtaining a copy
-# of this software and associated documentation files (the "Software"), to deal
-# in the Software without restriction, including without limitation the rights
-# to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
-# copies of the Software, and to permit persons to whom the Software is
-# furnished to do so, subject to the following conditions:
-#
-# The above copyright notice and this permission notice shall be included in
-# all copies or substantial portions of the Software.
-#
-# THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
-# IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
-# FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
-# AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
-# LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
-# OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN
-# THE SOFTWARE.
-
-"test suite for OpenCL code"
-
-__author__ = "Jérôme Kieffer"
-__contact__ = "Jerome.Kieffer@ESRF.eu"
-__license__ = "MIT"
-__copyright__ = "European Synchrotron Radiation Facility, Grenoble, France"
-__date__ = "31/05/2021"
-
-import unittest
-import numpy
-import logging
-import platform
-
-logger = logging.getLogger(__name__)
-try:
- import pyopencl
-except ImportError as error:
- logger.warning("OpenCL module (pyopencl) is not present, skip tests. %s.", error)
- pyopencl = None
-
-from .. import ocl
-if ocl is not None:
- from ..utils import read_cl_file
- from .. import pyopencl
- import pyopencl.array
- from pyopencl.elementwise import ElementwiseKernel
-from ...test.utils import test_options
-
-EPS32 = numpy.finfo("float32").eps
-EPS64 = numpy.finfo("float64").eps
-
-
-class TestDoubleWord(unittest.TestCase):
- """
- Test the kernels for compensated math in OpenCL
- """
-
- @classmethod
- def setUpClass(cls):
- if not test_options.WITH_OPENCL_TEST:
- raise unittest.SkipTest("User request to skip OpenCL tests")
- if pyopencl is None or ocl is None:
- raise unittest.SkipTest("OpenCL module (pyopencl) is not present or no device available")
-
- cls.ctx = ocl.create_context(devicetype="GPU")
- cls.queue = pyopencl.CommandQueue(cls.ctx, properties=pyopencl.command_queue_properties.PROFILING_ENABLE)
-
- # this is running 32 bits OpenCL woth POCL
- if (platform.machine() in ("i386", "i686", "x86_64") and (tuple.__itemsize__ == 4) and
- cls.ctx.devices[0].platform.name == 'Portable Computing Language'):
- cls.args = "-DX87_VOLATILE=volatile"
- else:
- cls.args = ""
- size = 1024
- cls.a = 1.0 + numpy.random.random(size)
- cls.b = 1.0 + numpy.random.random(size)
- cls.ah = cls.a.astype(numpy.float32)
- cls.bh = cls.b.astype(numpy.float32)
- cls.al = (cls.a - cls.ah).astype(numpy.float32)
- cls.bl = (cls.b - cls.bh).astype(numpy.float32)
- cls.doubleword = read_cl_file("doubleword.cl")
-
- @classmethod
- def tearDownClass(cls):
- cls.queue = None
- cls.ctx = None
- cls.a = cls.al = cls.ah = None
- cls.b = cls.bl = cls.bh = None
- cls.doubleword = None
-
- def test_fast_sum2(self):
- test_kernel = ElementwiseKernel(self.ctx,
- "float *a, float *b, float *res_h, float *res_l",
- "float2 tmp = fast_fp_plus_fp(a[i], b[i]); res_h[i] = tmp.s0; res_l[i] = tmp.s1",
- preamble=self.doubleword)
- a_g = pyopencl.array.to_device(self.queue, self.ah)
- b_g = pyopencl.array.to_device(self.queue, self.bl)
- res_l = pyopencl.array.empty_like(a_g)
- res_h = pyopencl.array.empty_like(a_g)
- test_kernel(a_g, b_g, res_h, res_l)
- self.assertEqual(abs(self.ah + self.bl - res_h.get()).max(), 0, "Major matches")
- self.assertGreater(abs(self.ah.astype(numpy.float64) + self.bl - res_h.get()).max(), 0, "Exact mismatches")
- self.assertEqual(abs(self.ah.astype(numpy.float64) + self.bl - (res_h.get().astype(numpy.float64) + res_l.get())).max(), 0, "Exact matches")
-
- def test_sum2(self):
- test_kernel = ElementwiseKernel(self.ctx,
- "float *a, float *b, float *res_h, float *res_l",
- "float2 tmp = fp_plus_fp(a[i],b[i]); res_h[i]=tmp.s0; res_l[i]=tmp.s1;",
- preamble=self.doubleword)
- a_g = pyopencl.array.to_device(self.queue, self.ah)
- b_g = pyopencl.array.to_device(self.queue, self.bh)
- res_l = pyopencl.array.empty_like(a_g)
- res_h = pyopencl.array.empty_like(a_g)
- test_kernel(a_g, b_g, res_h, res_l)
- self.assertEqual(abs(self.ah + self.bh - res_h.get()).max(), 0, "Major matches")
- self.assertGreater(abs(self.ah.astype(numpy.float64) + self.bh - res_h.get()).max(), 0, "Exact mismatches")
- self.assertEqual(abs(self.ah.astype(numpy.float64) + self.bh - (res_h.get().astype(numpy.float64) + res_l.get())).max(), 0, "Exact matches")
-
- def test_prod2(self):
- test_kernel = ElementwiseKernel(self.ctx,
- "float *a, float *b, float *res_h, float *res_l",
- "float2 tmp = fp_times_fp(a[i],b[i]); res_h[i]=tmp.s0; res_l[i]=tmp.s1;",
- preamble=self.doubleword)
- a_g = pyopencl.array.to_device(self.queue, self.ah)
- b_g = pyopencl.array.to_device(self.queue, self.bh)
- res_l = pyopencl.array.empty_like(a_g)
- res_h = pyopencl.array.empty_like(a_g)
- test_kernel(a_g, b_g, res_h, res_l)
- res_m = res_h.get()
- res = res_h.get().astype(numpy.float64) + res_l.get()
- self.assertEqual(abs(self.ah * self.bh - res_m).max(), 0, "Major matches")
- self.assertGreater(abs(self.ah.astype(numpy.float64) * self.bh - res_m).max(), 0, "Exact mismatches")
- self.assertEqual(abs(self.ah.astype(numpy.float64) * self.bh - res).max(), 0, "Exact matches")
-
- def test_dw_plus_fp(self):
- test_kernel = ElementwiseKernel(self.ctx,
- "float *ah, float *al, float *b, float *res_h, float *res_l",
- "float2 tmp = dw_plus_fp((float2)(ah[i], al[i]),b[i]); res_h[i]=tmp.s0; res_l[i]=tmp.s1;",
- preamble=self.doubleword)
- ah_g = pyopencl.array.to_device(self.queue, self.ah)
- al_g = pyopencl.array.to_device(self.queue, self.al)
- b_g = pyopencl.array.to_device(self.queue, self.bh)
- res_l = pyopencl.array.empty_like(b_g)
- res_h = pyopencl.array.empty_like(b_g)
- test_kernel(ah_g, al_g, b_g, res_h, res_l)
- res_m = res_h.get()
- res = res_h.get().astype(numpy.float64) + res_l.get()
- self.assertLess(abs(self.a + self.bh - res_m).max(), EPS32, "Major matches")
- self.assertGreater(abs(self.a + self.bh - res_m).max(), EPS64, "Exact mismatches")
- self.assertLess(abs(self.ah.astype(numpy.float64) + self.al + self.bh - res).max(), 2 * EPS32 ** 2, "Exact matches")
-
- def test_dw_plus_dw(self):
- test_kernel = ElementwiseKernel(self.ctx,
- "float *ah, float *al, float *bh, float *bl, float *res_h, float *res_l",
- "float2 tmp = dw_plus_dw((float2)(ah[i], al[i]),(float2)(bh[i], bl[i])); res_h[i]=tmp.s0; res_l[i]=tmp.s1;",
- preamble=self.doubleword)
- ah_g = pyopencl.array.to_device(self.queue, self.ah)
- al_g = pyopencl.array.to_device(self.queue, self.al)
- bh_g = pyopencl.array.to_device(self.queue, self.bh)
- bl_g = pyopencl.array.to_device(self.queue, self.bl)
- res_l = pyopencl.array.empty_like(bh_g)
- res_h = pyopencl.array.empty_like(bh_g)
- test_kernel(ah_g, al_g, bh_g, bl_g, res_h, res_l)
- res_m = res_h.get()
- res = res_h.get().astype(numpy.float64) + res_l.get()
- self.assertLess(abs(self.a + self.b - res_m).max(), EPS32, "Major matches")
- self.assertGreater(abs(self.a + self.b - res_m).max(), EPS64, "Exact mismatches")
- self.assertLess(abs(self.a + self.b - res).max(), 3 * EPS32 ** 2, "Exact matches")
-
- def test_dw_times_fp(self):
- test_kernel = ElementwiseKernel(self.ctx,
- "float *ah, float *al, float *b, float *res_h, float *res_l",
- "float2 tmp = dw_times_fp((float2)(ah[i], al[i]),b[i]); res_h[i]=tmp.s0; res_l[i]=tmp.s1;",
- preamble=self.doubleword)
- ah_g = pyopencl.array.to_device(self.queue, self.ah)
- al_g = pyopencl.array.to_device(self.queue, self.al)
- b_g = pyopencl.array.to_device(self.queue, self.bh)
- res_l = pyopencl.array.empty_like(b_g)
- res_h = pyopencl.array.empty_like(b_g)
- test_kernel(ah_g, al_g, b_g, res_h, res_l)
- res_m = res_h.get()
- res = res_h.get().astype(numpy.float64) + res_l.get()
- self.assertLess(abs(self.a * self.bh - res_m).max(), EPS32, "Major matches")
- self.assertGreater(abs(self.a * self.bh - res_m).max(), EPS64, "Exact mismatches")
- self.assertLess(abs(self.a * self.bh - res).max(), 2 * EPS32 ** 2, "Exact matches")
-
- def test_dw_times_dw(self):
- test_kernel = ElementwiseKernel(self.ctx,
- "float *ah, float *al, float *bh, float *bl, float *res_h, float *res_l",
- "float2 tmp = dw_times_dw((float2)(ah[i], al[i]),(float2)(bh[i], bl[i])); res_h[i]=tmp.s0; res_l[i]=tmp.s1;",
- preamble=self.doubleword)
- ah_g = pyopencl.array.to_device(self.queue, self.ah)
- al_g = pyopencl.array.to_device(self.queue, self.al)
- bh_g = pyopencl.array.to_device(self.queue, self.bh)
- bl_g = pyopencl.array.to_device(self.queue, self.bl)
- res_l = pyopencl.array.empty_like(bh_g)
- res_h = pyopencl.array.empty_like(bh_g)
- test_kernel(ah_g, al_g, bh_g, bl_g, res_h, res_l)
- res_m = res_h.get()
- res = res_h.get().astype(numpy.float64) + res_l.get()
- self.assertLess(abs(self.a * self.b - res_m).max(), EPS32, "Major matches")
- self.assertGreater(abs(self.a * self.b - res_m).max(), EPS64, "Exact mismatches")
- self.assertLess(abs(self.a * self.b - res).max(), 5 * EPS32 ** 2, "Exact matches")
-
- def test_dw_div_fp(self):
- test_kernel = ElementwiseKernel(self.ctx,
- "float *ah, float *al, float *b, float *res_h, float *res_l",
- "float2 tmp = dw_div_fp((float2)(ah[i], al[i]),b[i]); res_h[i]=tmp.s0; res_l[i]=tmp.s1;",
- preamble=self.doubleword)
- ah_g = pyopencl.array.to_device(self.queue, self.ah)
- al_g = pyopencl.array.to_device(self.queue, self.al)
- b_g = pyopencl.array.to_device(self.queue, self.bh)
- res_l = pyopencl.array.empty_like(b_g)
- res_h = pyopencl.array.empty_like(b_g)
- test_kernel(ah_g, al_g, b_g, res_h, res_l)
- res_m = res_h.get()
- res = res_h.get().astype(numpy.float64) + res_l.get()
- self.assertLess(abs(self.a / self.bh - res_m).max(), EPS32, "Major matches")
- self.assertGreater(abs(self.a / self.bh - res_m).max(), EPS64, "Exact mismatches")
- self.assertLess(abs(self.a / self.bh - res).max(), 3 * EPS32 ** 2, "Exact matches")
-
- def test_dw_div_dw(self):
- test_kernel = ElementwiseKernel(self.ctx,
- "float *ah, float *al, float *bh, float *bl, float *res_h, float *res_l",
- "float2 tmp = dw_div_dw((float2)(ah[i], al[i]),(float2)(bh[i], bl[i])); res_h[i]=tmp.s0; res_l[i]=tmp.s1;",
- preamble=self.doubleword)
- ah_g = pyopencl.array.to_device(self.queue, self.ah)
- al_g = pyopencl.array.to_device(self.queue, self.al)
- bh_g = pyopencl.array.to_device(self.queue, self.bh)
- bl_g = pyopencl.array.to_device(self.queue, self.bl)
- res_l = pyopencl.array.empty_like(bh_g)
- res_h = pyopencl.array.empty_like(bh_g)
- test_kernel(ah_g, al_g, bh_g, bl_g, res_h, res_l)
- res_m = res_h.get()
- res = res_h.get().astype(numpy.float64) + res_l.get()
- self.assertLess(abs(self.a / self.b - res_m).max(), EPS32, "Major matches")
- self.assertGreater(abs(self.a / self.b - res_m).max(), EPS64, "Exact mismatches")
- self.assertLess(abs(self.a / self.b - res).max(), 6 * EPS32 ** 2, "Exact matches")
-
-
-def suite():
- testsuite = unittest.TestSuite()
- loader = unittest.defaultTestLoader.loadTestsFromTestCase
- testsuite.addTest(loader(TestDoubleWord))
- return testsuite
-
-
-if __name__ == '__main__':
- runner = unittest.TextTestRunner()
- runner.run(suite())
diff --git a/silx/opencl/test/test_image.py b/silx/opencl/test/test_image.py
deleted file mode 100644
index d73a854..0000000
--- a/silx/opencl/test/test_image.py
+++ /dev/null
@@ -1,137 +0,0 @@
-#!/usr/bin/env python
-# -*- coding: utf-8 -*-
-#
-# Project: image manipulation in OpenCL
-# https://github.com/silx-kit/silx
-#
-# Permission is hereby granted, free of charge, to any person
-# obtaining a copy of this software and associated documentation
-# files (the "Software"), to deal in the Software without
-# restriction, including without limitation the rights to use,
-# copy, modify, merge, publish, distribute, sublicense, and/or sell
-# copies of the Software, and to permit persons to whom the
-# Software is furnished to do so, subject to the following
-# conditions:
-#
-# The above copyright notice and this permission notice shall be
-# included in all copies or substantial portions of the Software.
-#
-# THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
-# EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES
-# OF MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
-# NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT
-# HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY,
-# WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
-# FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR
-# OTHER DEALINGS IN THE SOFTWARE.
-
-"""
-Simple test of image manipulation
-"""
-
-from __future__ import division, print_function
-
-__authors__ = ["Jérôme Kieffer"]
-__contact__ = "jerome.kieffer@esrf.eu"
-__license__ = "MIT"
-__copyright__ = "2017 European Synchrotron Radiation Facility, Grenoble, France"
-__date__ = "13/02/2018"
-
-import logging
-import numpy
-
-import unittest
-from ..common import ocl, _measure_workgroup_size
-if ocl:
- import pyopencl
- import pyopencl.array
-from ...test.utils import utilstest
-from ..image import ImageProcessing
-logger = logging.getLogger(__name__)
-try:
- from PIL import Image
-except ImportError:
- Image = None
-
-
-@unittest.skipUnless(ocl and Image, "PyOpenCl/Image is missing")
-class TestImage(unittest.TestCase):
-
- @classmethod
- def setUpClass(cls):
- super(TestImage, cls).setUpClass()
- if ocl:
- cls.ctx = ocl.create_context()
- cls.lena = utilstest.getfile("lena.png")
- cls.data = numpy.asarray(Image.open(cls.lena))
- cls.ip = ImageProcessing(ctx=cls.ctx, template=cls.data, profile=True)
-
- @classmethod
- def tearDownClass(cls):
- super(TestImage, cls).tearDownClass()
- cls.ctx = None
- cls.lena = None
- cls.data = None
- if logger.level <= logging.INFO:
- logger.warning("\n".join(cls.ip.log_profile()))
- cls.ip = None
-
- def setUp(self):
- if ocl is None:
- return
- self.data = numpy.asarray(Image.open(self.lena))
-
- def tearDown(self):
- self.img = self.data = None
-
- @unittest.skipUnless(ocl, "pyopencl is missing")
- def test_cast(self):
- """
- tests the cast kernel
- """
- res = self.ip.to_float(self.data)
- self.assertEqual(res.shape, self.data.shape, "shape")
- self.assertEqual(res.dtype, numpy.float32, "dtype")
- self.assertEqual(abs(res - self.data).max(), 0, "content")
-
- @unittest.skipUnless(ocl, "pyopencl is missing")
- def test_normalize(self):
- """
- tests that all devices are working properly ...
- """
- tmp = pyopencl.array.empty(self.ip.ctx, self.data.shape, "float32")
- res = self.ip.to_float(self.data, out=tmp)
- res2 = self.ip.normalize(tmp, -100, 100, copy=False)
- norm = (self.data.astype(numpy.float32) - self.data.min()) / (self.data.max() - self.data.min())
- ref2 = 200 * norm - 100
- self.assertLess(abs(res2 - ref2).max(), 3e-5, "content")
-
- @unittest.skipUnless(ocl, "pyopencl is missing")
- def test_histogram(self):
- """
- Test on a greyscaled image ... of Lena :)
- """
- lena_bw = (0.2126 * self.data[:, :, 0] +
- 0.7152 * self.data[:, :, 1] +
- 0.0722 * self.data[:, :, 2]).astype("int32")
- ref = numpy.histogram(lena_bw, 255)
- ip = ImageProcessing(ctx=self.ctx, template=lena_bw, profile=True)
- res = ip.histogram(lena_bw, 255)
- ip.log_profile()
- delta = (ref[0] - res[0])
- deltap = (ref[1] - res[1])
- self.assertEqual(delta.sum(), 0, "errors are self-compensated")
- self.assertLessEqual(abs(delta).max(), 1, "errors are small")
- self.assertLessEqual(abs(deltap).max(), 3e-5, "errors on position are small: %s" % (abs(deltap).max()))
-
-
-def suite():
- testSuite = unittest.TestSuite()
- testSuite.addTest(TestImage("test_cast"))
- testSuite.addTest(TestImage("test_normalize"))
- testSuite.addTest(TestImage("test_histogram"))
- return testSuite
-
-
-if __name__ == '__main__':
- unittest.main(defaultTest="suite")
diff --git a/silx/opencl/test/test_kahan.py b/silx/opencl/test/test_kahan.py
deleted file mode 100644
index 6ea599b..0000000
--- a/silx/opencl/test/test_kahan.py
+++ /dev/null
@@ -1,269 +0,0 @@
-#!/usr/bin/env python
-# coding: utf-8
-#
-# Project: OpenCL numerical library
-# https://github.com/silx-kit/silx
-#
-# Copyright (C) 2015-2021 European Synchrotron Radiation Facility, Grenoble, France
-#
-# Principal author: Jérôme Kieffer (Jerome.Kieffer@ESRF.eu)
-#
-# Permission is hereby granted, free of charge, to any person obtaining a copy
-# of this software and associated documentation files (the "Software"), to deal
-# in the Software without restriction, including without limitation the rights
-# to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
-# copies of the Software, and to permit persons to whom the Software is
-# furnished to do so, subject to the following conditions:
-#
-# The above copyright notice and this permission notice shall be included in
-# all copies or substantial portions of the Software.
-#
-# THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
-# IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
-# FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
-# AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
-# LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
-# OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN
-# THE SOFTWARE.
-
-"test suite for OpenCL code"
-
-__author__ = "Jérôme Kieffer"
-__contact__ = "Jerome.Kieffer@ESRF.eu"
-__license__ = "MIT"
-__copyright__ = "European Synchrotron Radiation Facility, Grenoble, France"
-__date__ = "17/05/2021"
-
-
-import unittest
-import numpy
-import logging
-import platform
-
-logger = logging.getLogger(__name__)
-try:
- import pyopencl
-except ImportError as error:
- logger.warning("OpenCL module (pyopencl) is not present, skip tests. %s.", error)
- pyopencl = None
-
-from .. import ocl
-if ocl is not None:
- from ..utils import read_cl_file
- from .. import pyopencl
- import pyopencl.array
-from ...test.utils import test_options
-
-
-class TestKahan(unittest.TestCase):
- """
- Test the kernels for compensated math in OpenCL
- """
-
- @classmethod
- def setUpClass(cls):
- if not test_options.WITH_OPENCL_TEST:
- raise unittest.SkipTest("User request to skip OpenCL tests")
- if pyopencl is None or ocl is None:
- raise unittest.SkipTest("OpenCL module (pyopencl) is not present or no device available")
-
- cls.ctx = ocl.create_context(devicetype="GPU")
- cls.queue = pyopencl.CommandQueue(cls.ctx, properties=pyopencl.command_queue_properties.PROFILING_ENABLE)
-
- # this is running 32 bits OpenCL woth POCL
- if (platform.machine() in ("i386", "i686", "x86_64") and (tuple.__itemsize__ == 4) and
- cls.ctx.devices[0].platform.name == 'Portable Computing Language'):
- cls.args = "-DX87_VOLATILE=volatile"
- else:
- cls.args = ""
-
- @classmethod
- def tearDownClass(cls):
- cls.queue = None
- cls.ctx = None
-
- @staticmethod
- def dummy_sum(ary, dtype=None):
- "perform the actual sum in a dummy way "
- if dtype is None:
- dtype = ary.dtype.type
- sum_ = dtype(0)
- for i in ary:
- sum_ += i
- return sum_
-
- def test_kahan(self):
- # simple test
- N = 26
- data = (1 << (N - 1 - numpy.arange(N))).astype(numpy.float32)
-
- ref64 = numpy.sum(data, dtype=numpy.float64)
- ref32 = self.dummy_sum(data)
- if (ref64 == ref32):
- logger.warning("Kahan: invalid tests as float32 provides the same result as float64")
- # Dummy kernel to evaluate
- src = """
- kernel void summation(global float* data,
- int size,
- global float* result)
- {
- float2 acc = (float2)(0.0f, 0.0f);
- for (int i=0; i<size; i++)
- {
- acc = kahan_sum(acc, data[i]);
- }
- result[0] = acc.s0;
- result[1] = acc.s1;
- }
- """
- prg = pyopencl.Program(self.ctx, read_cl_file("kahan.cl") + src).build(self.args)
- ones_d = pyopencl.array.to_device(self.queue, data)
- res_d = pyopencl.array.empty(self.queue, 2, numpy.float32)
- res_d.fill(0)
- evt = prg.summation(self.queue, (1,), (1,), ones_d.data, numpy.int32(N), res_d.data)
- evt.wait()
- res = res_d.get().sum(dtype=numpy.float64)
- self.assertEqual(ref64, res, "test_kahan")
-
- def test_dot16(self):
- # simple test
- N = 16
- data = (1 << (N - 1 - numpy.arange(N))).astype(numpy.float32)
-
- ref64 = numpy.dot(data.astype(numpy.float64), data.astype(numpy.float64))
- ref32 = numpy.dot(data, data)
- if (ref64 == ref32):
- logger.warning("dot16: invalid tests as float32 provides the same result as float64")
- # Dummy kernel to evaluate
- src = """
- kernel void test_dot16(global float* data,
- int size,
- global float* result)
- {
- float2 acc = (float2)(0.0f, 0.0f);
- float16 data16 = (float16) (data[0],data[1],data[2],data[3],data[4],
- data[5],data[6],data[7],data[8],data[9],
- data[10],data[11],data[12],data[13],data[14],data[15]);
- acc = comp_dot16(data16, data16);
- result[0] = acc.s0;
- result[1] = acc.s1;
- }
-
- kernel void test_dot8(global float* data,
- int size,
- global float* result)
- {
- float2 acc = (float2)(0.0f, 0.0f);
- float8 data0 = (float8) (data[0],data[2],data[4],data[6],data[8],data[10],data[12],data[14]);
- float8 data1 = (float8) (data[1],data[3],data[5],data[7],data[9],data[11],data[13],data[15]);
- acc = comp_dot8(data0, data1);
- result[0] = acc.s0;
- result[1] = acc.s1;
- }
-
- kernel void test_dot4(global float* data,
- int size,
- global float* result)
- {
- float2 acc = (float2)(0.0f, 0.0f);
- float4 data0 = (float4) (data[0],data[4],data[8],data[12]);
- float4 data1 = (float4) (data[3],data[7],data[11],data[15]);
- acc = comp_dot4(data0, data1);
- result[0] = acc.s0;
- result[1] = acc.s1;
- }
-
- kernel void test_dot3(global float* data,
- int size,
- global float* result)
- {
- float2 acc = (float2)(0.0f, 0.0f);
- float3 data0 = (float3) (data[0],data[4],data[12]);
- float3 data1 = (float3) (data[3],data[11],data[15]);
- acc = comp_dot3(data0, data1);
- result[0] = acc.s0;
- result[1] = acc.s1;
- }
-
- kernel void test_dot2(global float* data,
- int size,
- global float* result)
- {
- float2 acc = (float2)(0.0f, 0.0f);
- float2 data0 = (float2) (data[0],data[14]);
- float2 data1 = (float2) (data[1],data[15]);
- acc = comp_dot2(data0, data1);
- result[0] = acc.s0;
- result[1] = acc.s1;
- }
-
- """
-
- prg = pyopencl.Program(self.ctx, read_cl_file("kahan.cl") + src).build(self.args)
- ones_d = pyopencl.array.to_device(self.queue, data)
- res_d = pyopencl.array.empty(self.queue, 2, numpy.float32)
- res_d.fill(0)
- evt = prg.test_dot16(self.queue, (1,), (1,), ones_d.data, numpy.int32(N), res_d.data)
- evt.wait()
- res = res_d.get().sum(dtype="float64")
- self.assertEqual(ref64, res, "test_dot16")
-
- res_d.fill(0)
- data0 = data[0::2]
- data1 = data[1::2]
- ref64 = numpy.dot(data0.astype(numpy.float64), data1.astype(numpy.float64))
- ref32 = numpy.dot(data0, data1)
- if (ref64 == ref32):
- logger.warning("dot8: invalid tests as float32 provides the same result as float64")
- evt = prg.test_dot8(self.queue, (1,), (1,), ones_d.data, numpy.int32(N), res_d.data)
- evt.wait()
- res = res_d.get().sum(dtype="float64")
- self.assertEqual(ref64, res, "test_dot8")
-
- res_d.fill(0)
- data0 = data[0::4]
- data1 = data[3::4]
- ref64 = numpy.dot(data0.astype(numpy.float64), data1.astype(numpy.float64))
- ref32 = numpy.dot(data0, data1)
- if (ref64 == ref32):
- logger.warning("dot4: invalid tests as float32 provides the same result as float64")
- evt = prg.test_dot4(self.queue, (1,), (1,), ones_d.data, numpy.int32(N), res_d.data)
- evt.wait()
- res = res_d.get().sum(dtype="float64")
- self.assertEqual(ref64, res, "test_dot4")
-
- res_d.fill(0)
- data0 = numpy.array([data[0], data[4], data[12]])
- data1 = numpy.array([data[3], data[11], data[15]])
- ref64 = numpy.dot(data0.astype(numpy.float64), data1.astype(numpy.float64))
- ref32 = numpy.dot(data0, data1)
- if (ref64 == ref32):
- logger.warning("dot3: invalid tests as float32 provides the same result as float64")
- evt = prg.test_dot3(self.queue, (1,), (1,), ones_d.data, numpy.int32(N), res_d.data)
- evt.wait()
- res = res_d.get().sum(dtype="float64")
- self.assertEqual(ref64, res, "test_dot3")
-
- res_d.fill(0)
- data0 = numpy.array([data[0], data[14]])
- data1 = numpy.array([data[1], data[15]])
- ref64 = numpy.dot(data0.astype(numpy.float64), data1.astype(numpy.float64))
- ref32 = numpy.dot(data0, data1)
- if (ref64 == ref32):
- logger.warning("dot2: invalid tests as float32 provides the same result as float64")
- evt = prg.test_dot2(self.queue, (1,), (1,), ones_d.data, numpy.int32(N), res_d.data)
- evt.wait()
- res = res_d.get().sum(dtype="float64")
- self.assertEqual(ref64, res, "test_dot2")
-
-
-def suite():
- testsuite = unittest.TestSuite()
- loader = unittest.defaultTestLoader.loadTestsFromTestCase
- testsuite.addTest(loader(TestKahan))
- return testsuite
-
-
-if __name__ == '__main__':
- runner = unittest.TextTestRunner()
- runner.run(suite())
diff --git a/silx/opencl/test/test_linalg.py b/silx/opencl/test/test_linalg.py
deleted file mode 100644
index 0b6c730..0000000
--- a/silx/opencl/test/test_linalg.py
+++ /dev/null
@@ -1,216 +0,0 @@
-#!/usr/bin/env python
-# coding: utf-8
-# /*##########################################################################
-#
-# Copyright (c) 2016 European Synchrotron Radiation Facility
-#
-# Permission is hereby granted, free of charge, to any person obtaining a copy
-# of this software and associated documentation files (the "Software"), to deal
-# in the Software without restriction, including without limitation the rights
-# to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
-# copies of the Software, and to permit persons to whom the Software is
-# furnished to do so, subject to the following conditions:
-#
-# The above copyright notice and this permission notice shall be included in
-# all copies or substantial portions of the Software.
-#
-# THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
-# IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
-# FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
-# AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
-# LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
-# OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN
-# THE SOFTWARE.
-#
-# ###########################################################################*/
-"""Test of the linalg module"""
-
-from __future__ import division, print_function
-
-__authors__ = ["Pierre paleo"]
-__license__ = "MIT"
-__copyright__ = "2013-2017 European Synchrotron Radiation Facility, Grenoble, France"
-__date__ = "01/08/2019"
-
-
-import time
-import logging
-import numpy as np
-import unittest
-try:
- import mako
-except ImportError:
- mako = None
-from ..common import ocl
-if ocl:
- import pyopencl as cl
- import pyopencl.array as parray
- from .. import linalg
-from silx.test.utils import utilstest
-
-logger = logging.getLogger(__name__)
-try:
- from scipy.ndimage.filters import laplace
- _has_scipy = True
-except ImportError:
- _has_scipy = False
-
-
-# TODO move this function in math or image ?
-def gradient(img):
- '''
- Compute the gradient of an image as a numpy array
- Code from https://github.com/emmanuelle/tomo-tv/
- '''
- shape = [img.ndim, ] + list(img.shape)
- gradient = np.zeros(shape, dtype=img.dtype)
- slice_all = [0, slice(None, -1),]
- for d in range(img.ndim):
- gradient[tuple(slice_all)] = np.diff(img, axis=d)
- slice_all[0] = d + 1
- slice_all.insert(1, slice(None))
- return gradient
-
-
-# TODO move this function in math or image ?
-def divergence(grad):
- '''
- Compute the divergence of a gradient
- Code from https://github.com/emmanuelle/tomo-tv/
- '''
- res = np.zeros(grad.shape[1:])
- for d in range(grad.shape[0]):
- this_grad = np.rollaxis(grad[d], d)
- this_res = np.rollaxis(res, d)
- this_res[:-1] += this_grad[:-1]
- this_res[1:-1] -= this_grad[:-2]
- this_res[-1] -= this_grad[-2]
- return res
-
-
-@unittest.skipUnless(ocl and mako, "PyOpenCl is missing")
-class TestLinAlg(unittest.TestCase):
-
- def setUp(self):
- if ocl is None:
- return
- self.getfiles()
- self.la = linalg.LinAlg(self.image.shape)
- self.allocate_arrays()
-
- def allocate_arrays(self):
- """
- Allocate various types of arrays for the tests
- """
- # numpy images
- self.grad = np.zeros(self.image.shape, dtype=np.complex64)
- self.grad2 = np.zeros((2,) + self.image.shape, dtype=np.float32)
- self.grad_ref = gradient(self.image)
- self.div_ref = divergence(self.grad_ref)
- self.image2 = np.zeros_like(self.image)
- # Device images
- self.gradient_parray = parray.empty(self.la.queue, self.image.shape, np.complex64)
- self.gradient_parray.fill(0)
- # we should be using cl.Buffer(self.la.ctx, cl.mem_flags.READ_WRITE, size=self.image.nbytes*2),
- # but platforms not suporting openCL 1.2 have a problem with enqueue_fill_buffer,
- # so we use the parray "fill" utility
- self.gradient_buffer = self.gradient_parray.data
- # Do the same for image
- self.image_parray = parray.to_device(self.la.queue, self.image)
- self.image_buffer = self.image_parray.data
- # Refs
- tmp = np.zeros(self.image.shape, dtype=np.complex64)
- tmp.real = np.copy(self.grad_ref[0])
- tmp.imag = np.copy(self.grad_ref[1])
- self.grad_ref_parray = parray.to_device(self.la.queue, tmp)
- self.grad_ref_buffer = self.grad_ref_parray.data
-
- def tearDown(self):
- self.image = None
- self.image2 = None
- self.grad = None
- self.grad2 = None
- self.grad_ref = None
- self.div_ref = None
- self.gradient_parray.data.release()
- self.gradient_parray = None
- self.gradient_buffer = None
- self.image_parray.data.release()
- self.image_parray = None
- self.image_buffer = None
- self.grad_ref_parray.data.release()
- self.grad_ref_parray = None
- self.grad_ref_buffer = None
-
- def getfiles(self):
- # load 512x512 MRI phantom - TODO include Lena or ascent once a .npz is available
- self.image = np.load(utilstest.getfile("Brain512.npz"))["data"]
-
- def compare(self, result, reference, abstol, name):
- errmax = np.max(np.abs(result - reference))
- logger.info("%s: Max error = %e" % (name, errmax))
- self.assertTrue(errmax < abstol, str("%s: Max error is too high" % name))
-
- @unittest.skipUnless(ocl and mako, "pyopencl is missing")
- def test_gradient(self):
- arrays = {
- "numpy.ndarray": self.image,
- "buffer": self.image_buffer,
- "parray": self.image_parray
- }
- for desc, image in arrays.items():
- # Test with dst on host (numpy.ndarray)
- res = self.la.gradient(image, return_to_host=True)
- self.compare(res, self.grad_ref, 1e-6, str("gradient[src=%s, dst=numpy.ndarray]" % desc))
- # Test with dst on device (pyopencl.Buffer)
- self.la.gradient(image, dst=self.gradient_buffer)
- cl.enqueue_copy(self.la.queue, self.grad, self.gradient_buffer)
- self.grad2[0] = self.grad.real
- self.grad2[1] = self.grad.imag
- self.compare(self.grad2, self.grad_ref, 1e-6, str("gradient[src=%s, dst=buffer]" % desc))
- # Test with dst on device (pyopencl.Array)
- self.la.gradient(image, dst=self.gradient_parray)
- self.grad = self.gradient_parray.get()
- self.grad2[0] = self.grad.real
- self.grad2[1] = self.grad.imag
- self.compare(self.grad2, self.grad_ref, 1e-6, str("gradient[src=%s, dst=parray]" % desc))
-
- @unittest.skipUnless(ocl and mako, "pyopencl is missing")
- def test_divergence(self):
- arrays = {
- "numpy.ndarray": self.grad_ref,
- "buffer": self.grad_ref_buffer,
- "parray": self.grad_ref_parray
- }
- for desc, grad in arrays.items():
- # Test with dst on host (numpy.ndarray)
- res = self.la.divergence(grad, return_to_host=True)
- self.compare(res, self.div_ref, 1e-6, str("divergence[src=%s, dst=numpy.ndarray]" % desc))
- # Test with dst on device (pyopencl.Buffer)
- self.la.divergence(grad, dst=self.image_buffer)
- cl.enqueue_copy(self.la.queue, self.image2, self.image_buffer)
- self.compare(self.image2, self.div_ref, 1e-6, str("divergence[src=%s, dst=buffer]" % desc))
- # Test with dst on device (pyopencl.Array)
- self.la.divergence(grad, dst=self.image_parray)
- self.image2 = self.image_parray.get()
- self.compare(self.image2, self.div_ref, 1e-6, str("divergence[src=%s, dst=parray]" % desc))
-
- @unittest.skipUnless(ocl and mako and _has_scipy, "pyopencl and/or scipy is missing")
- def test_laplacian(self):
- laplacian_ref = laplace(self.image)
- # Laplacian = div(grad)
- self.la.gradient(self.image)
- laplacian_ocl = self.la.divergence(self.la.d_gradient, return_to_host=True)
- self.compare(laplacian_ocl, laplacian_ref, 1e-6, "laplacian")
-
-
-def suite():
- testSuite = unittest.TestSuite()
- testSuite.addTest(TestLinAlg("test_gradient"))
- testSuite.addTest(TestLinAlg("test_divergence"))
- testSuite.addTest(TestLinAlg("test_laplacian"))
- return testSuite
-
-
-if __name__ == '__main__':
- unittest.main(defaultTest="suite")
diff --git a/silx/opencl/test/test_medfilt.py b/silx/opencl/test/test_medfilt.py
deleted file mode 100644
index 976b199..0000000
--- a/silx/opencl/test/test_medfilt.py
+++ /dev/null
@@ -1,175 +0,0 @@
-#!/usr/bin/env python
-# -*- coding: utf-8 -*-
-#
-# Project: Median filter of images + OpenCL
-# https://github.com/silx-kit/silx
-#
-# Permission is hereby granted, free of charge, to any person
-# obtaining a copy of this software and associated documentation
-# files (the "Software"), to deal in the Software without
-# restriction, including without limitation the rights to use,
-# copy, modify, merge, publish, distribute, sublicense, and/or sell
-# copies of the Software, and to permit persons to whom the
-# Software is furnished to do so, subject to the following
-# conditions:
-#
-# The above copyright notice and this permission notice shall be
-# included in all copies or substantial portions of the Software.
-#
-# THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
-# EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES
-# OF MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
-# NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT
-# HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY,
-# WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
-# FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR
-# OTHER DEALINGS IN THE SOFTWARE.
-
-"""
-Simple test of the median filter
-"""
-
-from __future__ import division, print_function
-
-__authors__ = ["Jérôme Kieffer"]
-__contact__ = "jerome.kieffer@esrf.eu"
-__license__ = "MIT"
-__copyright__ = "2013-2017 European Synchrotron Radiation Facility, Grenoble, France"
-__date__ = "05/07/2018"
-
-
-import sys
-import time
-import logging
-import numpy
-import unittest
-from collections import namedtuple
-try:
- import mako
-except ImportError:
- mako = None
-from ..common import ocl
-if ocl:
- import pyopencl
- import pyopencl.array
- from .. import medfilt
-
-logger = logging.getLogger(__name__)
-
-Result = namedtuple("Result", ["size", "error", "sp_time", "oc_time"])
-
-try:
- from scipy.misc import ascent
-except:
- def ascent():
- """Dummy image from random data"""
- return numpy.random.random((512, 512))
-try:
- from scipy.ndimage import filters
- median_filter = filters.median_filter
- HAS_SCIPY = True
-except:
- HAS_SCIPY = False
- from silx.math import medfilt2d as median_filter
-
-@unittest.skipUnless(ocl and mako, "PyOpenCl is missing")
-class TestMedianFilter(unittest.TestCase):
-
- def setUp(self):
- if ocl is None:
- return
- self.data = ascent().astype(numpy.float32)
- self.medianfilter = medfilt.MedianFilter2D(self.data.shape, devicetype="gpu")
-
- def tearDown(self):
- self.data = None
- self.medianfilter = None
-
- def measure(self, size):
- "Common measurement of accuracy and timings"
- t0 = time.time()
- if HAS_SCIPY:
- ref = median_filter(self.data, size, mode="nearest")
- else:
- ref = median_filter(self.data, size)
- t1 = time.time()
- try:
- got = self.medianfilter.medfilt2d(self.data, size)
- except RuntimeError as msg:
- logger.error(msg)
- return
- t2 = time.time()
- delta = abs(got - ref).max()
- return Result(size, delta, t1 - t0, t2 - t1)
-
- @unittest.skipUnless(ocl and mako, "pyopencl is missing")
- def test_medfilt(self):
- """
- tests the median filter kernel
- """
- r = self.measure(size=11)
- if r is None:
- logger.info("test_medfilt: size: %s: skipped")
- else:
- logger.info("test_medfilt: size: %s error %s, t_ref: %.3fs, t_ocl: %.3fs" % r)
- self.assertEqual(r.error, 0, 'Results are correct')
-
- def benchmark(self, limit=36):
- "Run some benchmarking"
- try:
- import PyQt5
- from ...gui.matplotlib import pylab
- from ...gui.utils import update_fig
- except:
- pylab = None
-
- def update_fig(*ag, **kwarg):
- pass
-
- fig = pylab.figure()
- fig.suptitle("Median filter of an image 512x512")
- sp = fig.add_subplot(1, 1, 1)
- sp.set_title(self.medianfilter.ctx.devices[0].name)
- sp.set_xlabel("Window width & height")
- sp.set_ylabel("Execution time (s)")
- sp.set_xlim(2, limit + 1)
- sp.set_ylim(0, 4)
- data_size = []
- data_scipy = []
- data_opencl = []
- plot_sp = sp.plot(data_size, data_scipy, "-or", label="scipy")[0]
- plot_opencl = sp.plot(data_size, data_opencl, "-ob", label="opencl")[0]
- sp.legend(loc=2)
- fig.show()
- update_fig(fig)
- for s in range(3, limit, 2):
- r = self.measure(s)
- print(r)
- if r.error == 0:
- data_size.append(s)
- data_scipy.append(r.sp_time)
- data_opencl.append(r.oc_time)
- plot_sp.set_data(data_size, data_scipy)
- plot_opencl.set_data(data_size, data_opencl)
- update_fig(fig)
- fig.show()
- if sys.version_info[0] < 3:
- raw_input()
- else:
- input()
-
-
-def suite():
- testSuite = unittest.TestSuite()
- testSuite.addTest(TestMedianFilter("test_medfilt"))
- return testSuite
-
-
-def benchmark():
- testSuite = unittest.TestSuite()
- testSuite.addTest(TestMedianFilter("benchmark"))
- return testSuite
-
-
-if __name__ == '__main__':
- unittest.main(defaultTest="suite")
diff --git a/silx/opencl/test/test_projection.py b/silx/opencl/test/test_projection.py
deleted file mode 100644
index 7631128..0000000
--- a/silx/opencl/test/test_projection.py
+++ /dev/null
@@ -1,131 +0,0 @@
-#!/usr/bin/env python
-# coding: utf-8
-# /*##########################################################################
-#
-# Copyright (c) 2016 European Synchrotron Radiation Facility
-#
-# Permission is hereby granted, free of charge, to any person obtaining a copy
-# of this software and associated documentation files (the "Software"), to deal
-# in the Software without restriction, including without limitation the rights
-# to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
-# copies of the Software, and to permit persons to whom the Software is
-# furnished to do so, subject to the following conditions:
-#
-# The above copyright notice and this permission notice shall be included in
-# all copies or substantial portions of the Software.
-#
-# THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
-# IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
-# FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
-# AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
-# LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
-# OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN
-# THE SOFTWARE.
-#
-# ###########################################################################*/
-"""Test of the forward projection module"""
-
-from __future__ import division, print_function
-
-__authors__ = ["Pierre paleo"]
-__license__ = "MIT"
-__copyright__ = "2013-2017 European Synchrotron Radiation Facility, Grenoble, France"
-__date__ = "19/01/2018"
-
-
-import time
-import logging
-import numpy as np
-import unittest
-try:
- import mako
-except ImportError:
- mako = None
-from ..common import ocl
-if ocl:
- from .. import projection
-from silx.test.utils import utilstest
-
-logger = logging.getLogger(__name__)
-
-
-@unittest.skipUnless(ocl and mako, "PyOpenCl is missing")
-class TestProj(unittest.TestCase):
-
- def setUp(self):
- if ocl is None:
- return
- # ~ if sys.platform.startswith('darwin'):
- # ~ self.skipTest("Projection is not implemented on CPU for OS X yet")
- self.getfiles()
- n_angles = self.sino.shape[0]
- self.proj = projection.Projection(self.phantom.shape, n_angles)
- if self.proj.compiletime_workgroup_size < 16 * 16:
- self.skipTest("Current implementation of OpenCL projection is not supported on this platform yet")
-
- def tearDown(self):
- self.phantom = None
- self.sino = None
- self.proj = None
-
- def getfiles(self):
- # load 512x512 MRI phantom
- self.phantom = np.load(utilstest.getfile("Brain512.npz"))["data"]
- # load sinogram computed with PyHST
- self.sino = np.load(utilstest.getfile("sino500_pyhst.npz"))["data"]
-
- def measure(self):
- "Common measurement of timings"
- t1 = time.time()
- try:
- result = self.proj.projection(self.phantom)
- except RuntimeError as msg:
- logger.error(msg)
- return
- t2 = time.time()
- return t2 - t1, result
-
- def compare(self, res):
- """
- Compare a result with the reference reconstruction.
- Only the valid reconstruction zone (inscribed circle) is taken into account
- """
- # Compare with the original phantom.
- # TODO: compare a standard projection
- ref = self.sino
- return np.max(np.abs(res - ref))
-
- @unittest.skipUnless(ocl and mako, "pyopencl is missing")
- def test_proj(self):
- """
- tests Projection
- """
- # Test single reconstruction
- # --------------------------
- t, res = self.measure()
- if t is None:
- logger.info("test_proj: skipped")
- else:
- logger.info("test_proj: time = %.3fs" % t)
- err = self.compare(res)
- msg = str("Max error = %e" % err)
- logger.info(msg)
- # Interpolation differs at some lines, giving relative error of 10/50000
- self.assertTrue(err < 20., "Max error is too high")
- # Test multiple reconstructions
- # -----------------------------
- res0 = np.copy(res)
- for i in range(10):
- res = self.proj.projection(self.phantom)
- errmax = np.max(np.abs(res - res0))
- self.assertTrue(errmax < 1.e-6, "Max error is too high")
-
-
-def suite():
- testSuite = unittest.TestSuite()
- testSuite.addTest(TestProj("test_proj"))
- return testSuite
-
-
-if __name__ == '__main__':
- unittest.main(defaultTest="suite")
diff --git a/silx/opencl/test/test_sparse.py b/silx/opencl/test/test_sparse.py
deleted file mode 100644
index 76a6a0a..0000000
--- a/silx/opencl/test/test_sparse.py
+++ /dev/null
@@ -1,203 +0,0 @@
-#!/usr/bin/env python
-# coding: utf-8
-# /*##########################################################################
-#
-# Copyright (c) 2018-2019 European Synchrotron Radiation Facility
-#
-# Permission is hereby granted, free of charge, to any person obtaining a copy
-# of this software and associated documentation files (the "Software"), to deal
-# in the Software without restriction, including without limitation the rights
-# to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
-# copies of the Software, and to permit persons to whom the Software is
-# furnished to do so, subject to the following conditions:
-#
-# The above copyright notice and this permission notice shall be included in
-# all copies or substantial portions of the Software.
-#
-# THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
-# IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
-# FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
-# AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
-# LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
-# OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN
-# THE SOFTWARE.
-#
-# ###########################################################################*/
-"""Test of the sparse module"""
-
-import numpy as np
-import unittest
-import logging
-from itertools import product
-from ..common import ocl
-if ocl:
- import pyopencl.array as parray
- from silx.opencl.sparse import CSR
-try:
- import scipy.sparse as sp
-except ImportError:
- sp = None
-logger = logging.getLogger(__name__)
-
-
-
-def generate_sparse_random_data(
- shape=(1000,),
- data_min=0, data_max=100,
- density=0.1,
- use_only_integers=True,
- dtype="f"):
- """
- Generate random sparse data where.
-
- Parameters
- ------------
- shape: tuple
- Output data shape.
- data_min: int or float
- Minimum value of data
- data_max: int or float
- Maximum value of data
- density: float
- Density of non-zero elements in the output data.
- Low value of density mean low number of non-zero elements.
- use_only_integers: bool
- If set to True, the output data items will be primarily integers,
- possibly casted to float if dtype is a floating-point type.
- This can be used for ease of debugging.
- dtype: str or numpy.dtype
- Output data type
- """
- mask = np.random.binomial(1, density, size=shape)
- if use_only_integers:
- d = np.random.randint(data_min, high=data_max, size=shape)
- else:
- d = data_min + (data_max - data_min) * np.random.rand(*shape)
- return (d * mask).astype(dtype)
-
-
-
-@unittest.skipUnless(ocl and sp, "PyOpenCl/scipy is missing")
-class TestCSR(unittest.TestCase):
- """Test CSR format"""
-
- def setUp(self):
- # Test possible configurations
- input_on_device = [False, True]
- output_on_device = [False, True]
- dtypes = [np.float32, np.int32, np.uint16]
- self._test_configs = list(product(input_on_device, output_on_device, dtypes))
-
-
- def compute_ref_sparsification(self, array):
- ref_sparse = sp.csr_matrix(array)
- return ref_sparse
-
-
- def test_sparsification(self):
- for input_on_device, output_on_device, dtype in self._test_configs:
- self._test_sparsification(input_on_device, output_on_device, dtype)
-
-
- def _test_sparsification(self, input_on_device, output_on_device, dtype):
- current_config = "input on device: %s, output on device: %s, dtype: %s" % (
- str(input_on_device), str(output_on_device), str(dtype)
- )
- logger.debug("CSR: %s" % current_config)
- # Generate data and reference CSR
- array = generate_sparse_random_data(shape=(512, 511), dtype=dtype)
- ref_sparse = self.compute_ref_sparsification(array)
- # Sparsify on device
- csr = CSR(array.shape, dtype=dtype)
- if input_on_device:
- # The array has to be flattened
- arr = parray.to_device(csr.queue, array.ravel())
- else:
- arr = array
- if output_on_device:
- d_data = parray.empty_like(csr.data)
- d_indices = parray.empty_like(csr.indices)
- d_indptr = parray.empty_like(csr.indptr)
- d_data.fill(0)
- d_indices.fill(0)
- d_indptr.fill(0)
- output = (d_data, d_indices, d_indptr)
- else:
- output = None
- data, indices, indptr = csr.sparsify(arr, output=output)
- if output_on_device:
- data = data.get()
- indices = indices.get()
- indptr = indptr.get()
- # Compare
- nnz = ref_sparse.nnz
- self.assertTrue(
- np.allclose(data[:nnz], ref_sparse.data),
- "something wrong with sparsified data (%s)"
- % current_config
- )
- self.assertTrue(
- np.allclose(indices[:nnz], ref_sparse.indices),
- "something wrong with sparsified indices (%s)"
- % current_config
- )
- self.assertTrue(
- np.allclose(indptr, ref_sparse.indptr),
- "something wrong with sparsified indices pointers (indptr) (%s)"
- % current_config
- )
-
-
- def test_desparsification(self):
- for input_on_device, output_on_device, dtype in self._test_configs:
- self._test_desparsification(input_on_device, output_on_device, dtype)
-
-
- def _test_desparsification(self, input_on_device, output_on_device, dtype):
- current_config = "input on device: %s, output on device: %s, dtype: %s" % (
- str(input_on_device), str(output_on_device), str(dtype)
- )
- logger.debug("CSR: %s" % current_config)
- # Generate data and reference CSR
- array = generate_sparse_random_data(shape=(512, 511), dtype=dtype)
- ref_sparse = self.compute_ref_sparsification(array)
- # De-sparsify on device
- csr = CSR(array.shape, dtype=dtype, max_nnz=ref_sparse.nnz)
- if input_on_device:
- data = parray.to_device(csr.queue, ref_sparse.data)
- indices = parray.to_device(csr.queue, ref_sparse.indices)
- indptr = parray.to_device(csr.queue, ref_sparse.indptr)
- else:
- data = ref_sparse.data
- indices = ref_sparse.indices
- indptr = ref_sparse.indptr
- if output_on_device:
- d_arr = parray.empty_like(csr.array)
- d_arr.fill(0)
- output = d_arr
- else:
- output = None
- arr = csr.densify(data, indices, indptr, output=output)
- if output_on_device:
- arr = arr.get()
- # Compare
- self.assertTrue(
- np.allclose(arr.reshape(array.shape), array),
- "something wrong with densified data (%s)"
- % current_config
- )
-
-
-
-def suite():
- suite = unittest.TestSuite()
- suite.addTest(
- unittest.defaultTestLoader.loadTestsFromTestCase(TestCSR)
- )
- return suite
-
-
-if __name__ == '__main__':
- unittest.main(defaultTest="suite")
-
-
diff --git a/silx/opencl/test/test_stats.py b/silx/opencl/test/test_stats.py
deleted file mode 100644
index 8baf05e..0000000
--- a/silx/opencl/test/test_stats.py
+++ /dev/null
@@ -1,116 +0,0 @@
-#!/usr/bin/env python
-# -*- coding: utf-8 -*-
-#
-# Project: Sift implementation in Python + OpenCL
-# https://github.com/silx-kit/silx
-#
-# Permission is hereby granted, free of charge, to any person
-# obtaining a copy of this software and associated documentation
-# files (the "Software"), to deal in the Software without
-# restriction, including without limitation the rights to use,
-# copy, modify, merge, publish, distribute, sublicense, and/or sell
-# copies of the Software, and to permit persons to whom the
-# Software is furnished to do so, subject to the following
-# conditions:
-#
-# The above copyright notice and this permission notice shall be
-# included in all copies or substantial portions of the Software.
-#
-# THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
-# EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES
-# OF MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
-# NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT
-# HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY,
-# WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
-# FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR
-# OTHER DEALINGS IN THE SOFTWARE.
-
-"""
-Simple test of an addition
-"""
-__authors__ = ["Henri Payno, Jérôme Kieffer"]
-__contact__ = "jerome.kieffer@esrf.eu"
-__license__ = "MIT"
-__copyright__ = "2013 European Synchrotron Radiation Facility, Grenoble, France"
-__date__ = "19/05/2021"
-
-import logging
-import time
-import numpy
-
-import unittest
-from ..common import ocl
-if ocl:
- import pyopencl
- import pyopencl.array
- from ..statistics import StatResults, Statistics
-from ..utils import get_opencl_code
-logger = logging.getLogger(__name__)
-
-
-@unittest.skipUnless(ocl, "PyOpenCl is missing")
-class TestStatistics(unittest.TestCase):
-
- @classmethod
- def setUpClass(cls):
- cls.size = 1 << 20 # 1 million elements
- cls.data = numpy.random.randint(0, 65000, cls.size).astype("uint16")
- fdata = cls.data.astype("float64")
- t0 = time.perf_counter()
- std = fdata.std()
- cls.ref = StatResults(fdata.min(), fdata.max(), float(fdata.size),
- fdata.sum(), fdata.mean(), std ** 2,
- std)
- t1 = time.perf_counter()
- cls.ref_time = t1 - t0
-
- @classmethod
- def tearDownClass(cls):
- cls.size = cls.ref = cls.data = cls.ref_time = None
-
- @classmethod
- def validate(cls, res):
- return (
- (res.min == cls.ref.min) and
- (res.max == cls.ref.max) and
- (res.cnt == cls.ref.cnt) and
- abs(res.mean - cls.ref.mean) < 0.01 and
- abs(res.std - cls.ref.std) < 0.1)
-
- def test_measurement(self):
- """
- tests that all devices are working properly ...
- """
- logger.info("Reference results: %s", self.ref)
- for pid, platform in enumerate(ocl.platforms):
- for did, device in enumerate(platform.devices):
- try:
- s = Statistics(template=self.data, platformid=pid, deviceid=did)
- except Exception as err:
- failed_init = True
- res = StatResults(0, 0, 0, 0, 0, 0, 0)
- print(err)
- else:
- failed_init = False
- for comp in ("single", "double", "comp"):
- t0 = time.perf_counter()
- res = s(self.data, comp=comp)
- t1 = time.perf_counter()
- logger.info("Runtime on %s/%s : %.3fms x%.1f", platform, device, 1000 * (t1 - t0), self.ref_time / (t1 - t0))
-
- if failed_init or not self.validate(res):
- logger.error("failed_init %s; Computation modes %s", failed_init, comp)
- logger.error("Failed on platform %s device %s", platform, device)
- logger.error("Reference results: %s", self.ref)
- logger.error("Faulty results: %s", res)
- self.assertTrue(False, f"Stat calculation failed on {platform},{device} in mode {comp}")
-
-
-def suite():
- testSuite = unittest.TestSuite()
- testSuite.addTest(TestStatistics("test_measurement"))
- return testSuite
-
-
-if __name__ == '__main__':
- unittest.main(defaultTest="suite")
diff --git a/silx/opencl/utils.py b/silx/opencl/utils.py
deleted file mode 100644
index 575e018..0000000
--- a/silx/opencl/utils.py
+++ /dev/null
@@ -1,214 +0,0 @@
-# -*- coding: utf-8 -*-
-# /*##########################################################################
-# Copyright (C) 2017 European Synchrotron Radiation Facility
-#
-# Permission is hereby granted, free of charge, to any person obtaining a copy
-# of this software and associated documentation files (the "Software"), to deal
-# in the Software without restriction, including without limitation the rights
-# to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
-# copies of the Software, and to permit persons to whom the Software is
-# furnished to do so, subject to the following conditions:
-#
-# The above copyright notice and this permission notice shall be included in
-# all copies or substantial portions of the Software.
-#
-# THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
-# IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
-# FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
-# AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
-# LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
-# OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN
-# THE SOFTWARE.
-#
-# ############################################################################*/
-"""
-Project: Sift implementation in Python + OpenCL
- https://github.com/silx-kit/silx
-"""
-
-from __future__ import division
-
-__authors__ = ["Jérôme Kieffer", "Pierre Paleo"]
-__contact__ = "jerome.kieffer@esrf.eu"
-__license__ = "MIT"
-__copyright__ = "European Synchrotron Radiation Facility, Grenoble, France"
-__date__ = "06/09/2017"
-__status__ = "Production"
-
-import os
-import numpy
-from .. import resources
-from math import log, ceil
-
-
-def calc_size(shape, blocksize):
- """
- Calculate the optimal size for a kernel according to the workgroup size
- """
- if "__len__" in dir(blocksize):
- return tuple((int(i) + int(j) - 1) & ~(int(j) - 1) for i, j in zip(shape, blocksize))
- else:
- return tuple((int(i) + int(blocksize) - 1) & ~(int(blocksize) - 1) for i in shape)
-
-
-def nextpower(n):
- """Calculate the power of two
-
- :param n: an integer, for example 100
- :return: another integer, 100-> 128
- """
- return 1 << int(ceil(log(n, 2)))
-
-
-def sizeof(shape, dtype="uint8"):
- """
- Calculate the number of bytes needed to allocate for a given structure
-
- :param shape: size or tuple of sizes
- :param dtype: data type
- """
- itemsize = numpy.dtype(dtype).itemsize
- cnt = 1
- if "__len__" in dir(shape):
- for dim in shape:
- cnt *= dim
- else:
- cnt = int(shape)
- return cnt * itemsize
-
-
-def get_cl_file(resource):
- """get the full path of a openCL resource file
-
- The resource name can be prefixed by the name of a resource directory. For
- example "silx:foo.png" identify the resource "foo.png" from the resource
- directory "silx".
- See also :func:`silx.resources.register_resource_directory`.
-
- :param str resource: Resource name. File name contained if the `opencl`
- directory of the resources.
- :return: the full path of the openCL source file
- """
- if not resource.endswith(".cl"):
- resource += ".cl"
- return resources._resource_filename(resource,
- default_directory="opencl")
-
-
-def read_cl_file(filename):
- """
- :param filename: read an OpenCL file and apply a preprocessor
- :return: preprocessed source code
- """
- with open(get_cl_file(filename), "r") as f:
- # Dummy preprocessor which removes the #include
- lines = [i for i in f.readlines() if not i.startswith("#include ")]
- return "".join(lines)
-
-
-get_opencl_code = read_cl_file
-
-
-def concatenate_cl_kernel(filenames):
- """Concatenates all the kernel from the list of files
-
- :param filenames: filenames containing the kernels
- :type filenames: list of str which can be filename of kernel as a string.
- :return: a string with all kernels concatenated
-
- this method concatenates all the kernel from the list
- """
- return os.linesep.join(read_cl_file(fn) for fn in filenames)
-
-
-
-
-class ConvolutionInfos(object):
- allowed_axes = {
- "1D": [None],
- "separable_2D_1D_2D": [None, (0, 1), (1, 0)],
- "batched_1D_2D": [(0,), (1,)],
- "separable_3D_1D_3D": [
- None,
- (0, 1, 2),
- (1, 2, 0),
- (2, 0, 1),
- (2, 1, 0),
- (1, 0, 2),
- (0, 2, 1)
- ],
- "batched_1D_3D": [(0,), (1,), (2,)],
- "batched_separable_2D_1D_3D": [(0,), (1,), (2,)], # unsupported (?)
- "2D": [None],
- "batched_2D_3D": [(0,), (1,), (2,)],
- "separable_3D_2D_3D": [
- (1, 0),
- (0, 1),
- (2, 0),
- (0, 2),
- (1, 2),
- (2, 1),
- ],
- "3D": [None],
- }
- use_cases = {
- (1, 1): {
- "1D": {
- "name": "1D convolution on 1D data",
- "kernels": ["convol_1D_X"],
- },
- },
- (2, 2): {
- "2D": {
- "name": "2D convolution on 2D data",
- "kernels": ["convol_2D_XY"],
- },
- },
- (3, 3): {
- "3D": {
- "name": "3D convolution on 3D data",
- "kernels": ["convol_3D_XYZ"],
- },
- },
- (2, 1): {
- "separable_2D_1D_2D": {
- "name": "Separable (2D->1D) convolution on 2D data",
- "kernels": ["convol_1D_X", "convol_1D_Y"],
- },
- "batched_1D_2D": {
- "name": "Batched 1D convolution on 2D data",
- "kernels": ["convol_1D_X", "convol_1D_Y"],
- },
- },
- (3, 1): {
- "separable_3D_1D_3D": {
- "name": "Separable (3D->1D) convolution on 3D data",
- "kernels": ["convol_1D_X", "convol_1D_Y", "convol_1D_Z"],
- },
- "batched_1D_3D": {
- "name": "Batched 1D convolution on 3D data",
- "kernels": ["convol_1D_X", "convol_1D_Y", "convol_1D_Z"],
- },
- "batched_separable_2D_1D_3D": {
- "name": "Batched separable (2D->1D) convolution on 3D data",
- "kernels": ["convol_1D_X", "convol_1D_Y", "convol_1D_Z"],
- },
- },
- (3, 2): {
- "separable_3D_2D_3D": {
- "name": "Separable (3D->2D) convolution on 3D data",
- "kernels": ["convol_2D_XY", "convol_2D_XZ", "convol_2D_YZ"],
- },
- "batched_2D_3D": {
- "name": "Batched 2D convolution on 3D data",
- "kernels": ["convol_2D_XY", "convol_2D_XZ", "convol_2D_YZ"],
- },
- },
- }
-
-
-
-
-
-
-