diff options
Diffstat (limited to 'silx/opencl')
-rw-r--r-- | silx/opencl/__init__.py | 2 | ||||
-rw-r--r-- | silx/opencl/backprojection.py | 488 | ||||
-rw-r--r-- | silx/opencl/common.py | 29 | ||||
-rw-r--r-- | silx/opencl/linalg.py | 218 | ||||
-rw-r--r-- | silx/opencl/medfilt.py | 8 | ||||
-rw-r--r-- | silx/opencl/processing.py | 61 | ||||
-rw-r--r-- | silx/opencl/projection.py | 419 | ||||
-rw-r--r-- | silx/opencl/reconstruction.py | 381 | ||||
-rw-r--r-- | silx/opencl/setup.py | 9 | ||||
-rw-r--r-- | silx/opencl/test/__init__.py | 21 | ||||
-rw-r--r-- | silx/opencl/test/test_array_utils.py | 161 | ||||
-rw-r--r-- | silx/opencl/test/test_backprojection.py | 165 | ||||
-rw-r--r-- | silx/opencl/test/test_linalg.py | 215 | ||||
-rw-r--r-- | silx/opencl/test/test_projection.py | 139 | ||||
-rw-r--r-- | silx/opencl/utils.py | 71 |
15 files changed, 2325 insertions, 62 deletions
diff --git a/silx/opencl/__init__.py b/silx/opencl/__init__.py index b970338..353b74b 100644 --- a/silx/opencl/__init__.py +++ b/silx/opencl/__init__.py @@ -40,7 +40,7 @@ __status__ = "stable" import logging -logger = logging.getLogger("silx.opencl") +logger = logging.getLogger(__name__) from .common import * diff --git a/silx/opencl/backprojection.py b/silx/opencl/backprojection.py new file mode 100644 index 0000000..15a03b9 --- /dev/null +++ b/silx/opencl/backprojection.py @@ -0,0 +1,488 @@ +#!/usr/bin/env python +# coding: utf-8 +# /*########################################################################## +# +# Copyright (c) 2016 European Synchrotron Radiation Facility +# +# Permission is hereby granted, free of charge, to any person obtaining a copy +# of this software and associated documentation files (the "Software"), to deal +# in the Software without restriction, including without limitation the rights +# to use, copy, modify, merge, publish, distribute, sublicense, and/or sell +# copies of the Software, and to permit persons to whom the Software is +# furnished to do so, subject to the following conditions: +# +# The above copyright notice and this permission notice shall be included in +# all copies or substantial portions of the Software. +# +# THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR +# IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, +# FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE +# AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER +# LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, +# OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN +# THE SOFTWARE. +# +# ###########################################################################*/ +"""Module for (filtered) backprojection on the GPU""" + +from __future__ import absolute_import, print_function, with_statement, division + +__authors__ = ["A. Mirone, P. Paleo"] +__license__ = "MIT" +__date__ = "05/10/2017" + +import logging +import numpy + +from .common import pyopencl +from .processing import EventDescription, OpenclProcessing, BufferDescription +from .utils import nextpower as nextpow2 + +if pyopencl: + mf = pyopencl.mem_flags + import pyopencl.array as parray +else: + raise ImportError("pyopencl is not installed") +logger = logging.getLogger(__name__) + +# put in .common ? +try: + from pyfft.cl import Plan as pyfft_Plan + _has_pyfft = True +except ImportError: + _has_pyfft = False +# For silx v0.6 we disable the use FFT on GPU. +_has_pyfft = False + + +def _sizeof(Type): + """ + return the size (in bytes) of a scalar type, like the C behavior + """ + return numpy.dtype(Type).itemsize + + +def _idivup(a, b): + """ + return the integer division, plus one if `a` is not a multiple of `b` + """ + return (a + (b - 1)) // b + + +def fourier_filter(sino, filter_=None, fft_size=None): + """Simple numpy based implementation of fourier space filter + + :param sino: of shape shape = (num_projs, num_bins) + :param filter: filter function to apply in fourier space + :fft_size: size on which perform the fft. May be larger than the sino array + :return: filtered sinogram + """ + assert sino.ndim == 2 + num_projs, num_bins = sino.shape + if fft_size is None: + fft_size = nextpow2(num_bins * 2 - 1) + else: + assert fft_size >= num_bins + if fft_size == num_bins: + sino_zeropadded = sino.astype(numpy.float32) + else: + sino_zeropadded = numpy.zeros((num_projs, fft_size), + dtype=numpy.complex64) + sino_zeropadded[:, :num_bins] = sino.astype(numpy.float32) + + if filter_ is None: + h = numpy.zeros(fft_size, dtype=numpy.float32) + L2 = fft_size // 2 + 1 + h[0] = 1 / 4. + j = numpy.linspace(1, L2, L2 // 2, False) + h[1:L2:2] = -1. / (numpy.pi ** 2 * j ** 2) + h[L2:] = numpy.copy(h[1:L2 - 1][::-1]) + filter_ = numpy.fft.fft(h).astype(numpy.complex64) + + # Linear convolution + sino_f = numpy.fft.fft(sino, fft_size) + sino_f = sino_f * filter_ + sino_filtered = numpy.fft.ifft(sino_f)[:, :num_bins].real + # Send the filtered sinogram to device + return numpy.ascontiguousarray(sino_filtered.real, dtype=numpy.float32) + + +class Backprojection(OpenclProcessing): + """A class for performing the backprojection using OpenCL""" + kernel_files = ["backproj.cl", "array_utils.cl"] + if _has_pyfft: + kernel_files.append("backproj_helper.cl") + + def __init__(self, sino_shape, slice_shape=None, axis_position=None, + angles=None, filter_name=None, ctx=None, devicetype="all", + platformid=None, deviceid=None, profile=False): + """Constructor of the OpenCL (filtered) backprojection + + :param sino_shape: shape of the sinogram. The sinogram is in the format + (n_b, n_a) where n_b is the number of detector bins + and n_a is the number of angles. + :param slice_shape: Optional, shape of the reconstructed slice. By + default, it is a square slice where the dimension + is the "x dimension" of the sinogram (number of + bins). + :param axis_position: Optional, axis position. Default is + `(shape[1]-1)/2.0`. + :param angles: Optional, a list of custom angles in radian. + :param filter_name: Optional, name of the filter for FBP. Default is + the Ram-Lak filter. + :param ctx: actual working context, left to None for automatic + initialization from device type or platformid/deviceid + :param devicetype: type of device, can be "CPU", "GPU", "ACC" or "ALL" + :param platformid: integer with the platform_identifier, as given by + clinfo + :param deviceid: Integer with the device identifier, as given by clinfo + :param profile: switch on profiling to be able to profile at the kernel + level, store profiling elements (makes code slightly + slower) + """ + # OS X enforces a workgroup size of 1 when the kernel has + # synchronization barriers if sys.platform.startswith('darwin'): + # assuming no discrete GPU + # raise NotImplementedError("Backprojection is not implemented on CPU for OS X yet") + + OpenclProcessing.__init__(self, ctx=ctx, devicetype=devicetype, + platformid=platformid, deviceid=deviceid, + profile=profile) + self.shape = sino_shape + + self.num_bins = numpy.int32(sino_shape[1]) + self.num_projs = numpy.int32(sino_shape[0]) + self.angles = angles + if slice_shape is None: + self.slice_shape = (self.num_bins, self.num_bins) + else: + self.slice_shape = slice_shape + self.dimrec_shape = ( + _idivup(self.slice_shape[0], 32) * 32, + _idivup(self.slice_shape[1], 32) * 32 + ) + self.slice = numpy.zeros(self.dimrec_shape, dtype=numpy.float32) + self.filter_name = filter_name if filter_name else "Ram-Lak" + if axis_position: + self.axis_pos = numpy.float32(axis_position) + else: + self.axis_pos = numpy.float32((sino_shape[1] - 1.) / 2) + self.axis_array = None # TODO: add axis correction front-end + + self.is_cpu = False + if self.device.type == "CPU": + self.is_cpu = True + + self.compute_fft_plans() + self.buffers = [ + BufferDescription("_d_slice", numpy.prod(self.dimrec_shape), numpy.float32, mf.READ_WRITE), + BufferDescription("d_sino", self.num_projs * self.num_bins, numpy.float32, mf.READ_WRITE), # before transferring to texture (if available) + BufferDescription("d_cos", self.num_projs, numpy.float32, mf.READ_ONLY), + BufferDescription("d_sin", self.num_projs, numpy.float32, mf.READ_ONLY), + BufferDescription("d_axes", self.num_projs, numpy.float32, mf.READ_ONLY), + ] + self.allocate_buffers() + if not(self.is_cpu): + self.allocate_textures() + self.compute_filter() + if self.pyfft_plan: + self.add_to_cl_mem({ + "d_filter": self.d_filter, + "d_sino_z": self.d_sino_z + }) + self.d_sino = self.cl_mem["d_sino"] # shorthand + self.compute_angles() + + self.local_mem = 256 * 3 * _sizeof(numpy.float32) # constant for all image sizes + OpenclProcessing.compile_kernels(self, self.kernel_files) + # check that workgroup can actually be (16, 16) + self.check_workgroup_size("backproj_cpu_kernel") + # Workgroup and ndrange sizes are always the same + self.wg = (16, 16) + self.ndrange = ( + _idivup(int(self.dimrec_shape[1]), 32) * self.wg[0], # int(): pyopencl <= 2015.1 + _idivup(int(self.dimrec_shape[0]), 32) * self.wg[1] # int(): pyopencl <= 2015.1 + ) + + def compute_angles(self): + if self.angles is None: + self.angles = numpy.linspace(0, numpy.pi, self.num_projs, False) + h_cos = numpy.cos(self.angles).astype(numpy.float32) + h_sin = numpy.sin(self.angles).astype(numpy.float32) + pyopencl.enqueue_copy(self.queue, self.cl_mem["d_cos"], h_cos) + pyopencl.enqueue_copy(self.queue, self.cl_mem["d_sin"], h_sin) + if self.axis_array: + pyopencl.enqueue_copy(self.queue, + self.cl_mem["d_axes"], + self.axis_array.astype(numpy.float32)) + else: + pyopencl.enqueue_copy(self.queue, + self.cl_mem["d_axes"], + numpy.ones(self.num_projs, dtype=numpy.float32) * self.axis_pos) + + def allocate_textures(self): + """ + Allocate the texture for the sinogram. + """ + self.d_sino_tex = pyopencl.Image( + self.ctx, + mf.READ_ONLY | mf.USE_HOST_PTR, + pyopencl.ImageFormat( + pyopencl.channel_order.INTENSITY, + pyopencl.channel_type.FLOAT + ), + hostbuf=numpy.zeros(self.shape[::-1], dtype=numpy.float32) + ) + + def compute_fft_plans(self): + """ + If pyfft is installed, prepare a batched 1D FFT plan for the filtering + of FBP + + """ + self.fft_size = nextpow2(self.num_bins * 2 - 1) + if _has_pyfft: + logger.debug("pyfft is available. Computing FFT plans...") + # batched 1D transform + self.pyfft_plan = pyfft_Plan(self.fft_size, queue=self.queue, + wait_for_finish=True) + self.d_sino_z = parray.zeros(self.queue, + (self.num_projs, self.fft_size), + dtype=numpy.complex64) + logger.debug("... done") + else: + logger.debug("pyfft not available, using numpy.fft") + self.pyfft_plan = None + # TODO: fall-back to fftw if present ? + + def compute_filter(self): + """ + Compute the filter for FBP + """ + if self.filter_name == "Ram-Lak": + L = self.fft_size + h = numpy.zeros(L, dtype=numpy.float32) + L2 = L // 2 + 1 + h[0] = 1 / 4. + j = numpy.linspace(1, L2, L2 // 2, False) + h[1:L2:2] = -1. / (numpy.pi ** 2 * j ** 2) + h[L2:] = numpy.copy(h[1:L2 - 1][::-1]) + else: + # TODO: other filters + raise ValueError("Filter %s is not available" % self.filter_name) + self.filter = h + if self.pyfft_plan: + self.d_filter = parray.to_device(self.queue, h.astype(numpy.complex64)) + self.pyfft_plan.execute(self.d_filter.data) + else: + self.filter = numpy.fft.fft(h).astype(numpy.complex64) + self.d_filter = None + + def _get_local_mem(self): + return pyopencl.LocalMemory(self.local_mem) # constant for all image sizes + + def cpy2d_to_slice(self, dst): + ndrange = (int(self.slice_shape[1]), int(self.slice_shape[0])) # pyopencl < 2015.2 + slice_shape_ocl = numpy.int32(ndrange) + wg = None + kernel_args = ( + dst.data, + self.cl_mem["_d_slice"], + numpy.int32(self.slice_shape[1]), + numpy.int32(self.dimrec_shape[1]), + numpy.int32((0, 0)), + numpy.int32((0, 0)), + slice_shape_ocl + ) + return self.kernels.cpy2d(self.queue, ndrange, wg, *kernel_args) + + def transfer_to_texture(self, sino): + sino2 = sino + if not(sino.flags["C_CONTIGUOUS"] and sino.dtype == numpy.float32): + sino2 = numpy.ascontiguousarray(sino, dtype=numpy.float32) + if self.is_cpu: + ev = pyopencl.enqueue_copy( + self.queue, + self.d_sino, + sino2 + ) + what = "transfer filtered sino H->D buffer" + else: + ev = pyopencl.enqueue_copy( + self.queue, + self.d_sino_tex, + sino2, + origin=(0, 0), + region=self.shape[::-1] + ) + what = "transfer filtered sino H->D texture" + return EventDescription(what, ev) + + def transfer_device_to_texture(self, d_sino): + if self.is_cpu: + if id(self.d_sino) == id(d_sino): + return + ev = pyopencl.enqueue_copy( + self.queue, + self.d_sino, + d_sino + ) + what = "transfer filtered sino D->D buffer" + else: + ev = pyopencl.enqueue_copy( + self.queue, + self.d_sino_tex, + d_sino, + offset=0, + origin=(0, 0), + region=self.shape[::-1] + ) + what = "transfer filtered sino D->D texture" + return EventDescription(what, ev) + + def backprojection(self, sino=None, dst=None): + """Perform the backprojection on an input sinogram + + :param sino: sinogram. If provided, it returns the plain backprojection. + :param dst: destination (pyopencl.Array). If provided, the result will be written in this array. + :return: backprojection of sinogram + """ + events = [] + with self.sem: + + if sino is not None: # assuming numpy.ndarray + events.append(self.transfer_to_texture(sino)) + # Prepare arguments for the kernel call + if self.is_cpu: + d_sino_ref = self.d_sino + else: + d_sino_ref = self.d_sino_tex + kernel_args = ( + self.num_projs, # num of projections (int32) + self.num_bins, # num of bins (int32) + self.axis_pos, # axis position (float32) + self.cl_mem["_d_slice"], # d_slice (__global float32*) + d_sino_ref, # d_sino (__read_only image2d_t or float*) + numpy.float32(0), # gpu_offset_x (float32) + numpy.float32(0), # gpu_offset_y (float32) + self.cl_mem["d_cos"], # d_cos (__global float32*) + self.cl_mem["d_sin"], # d_sin (__global float32*) + self.cl_mem["d_axes"], # d_axis (__global float32*) + self._get_local_mem() # shared mem (__local float32*) + ) + # Call the kernel + if self.is_cpu: + kernel_to_call = self.kernels.backproj_cpu_kernel + else: + kernel_to_call = self.kernels.backproj_kernel + event_bpj = kernel_to_call( + self.queue, + self.ndrange, + self.wg, + *kernel_args + ) + if dst is None: + self.slice[:] = 0 + events.append(EventDescription("backprojection", event_bpj)) + ev = pyopencl.enqueue_copy(self.queue, self.slice, + self.cl_mem["_d_slice"]) + events.append(EventDescription("copy D->H result", ev)) + ev.wait() + res = numpy.copy(self.slice) + if self.dimrec_shape[0] > self.slice_shape[0] or self.dimrec_shape[1] > self.slice_shape[1]: + res = res[:self.slice_shape[0], :self.slice_shape[1]] + # if the slice is backprojected onto a bigger grid + if self.slice_shape[1] > self.num_bins: + res = res[:self.slice_shape[0], :self.slice_shape[1]] + else: + ev = self.cpy2d_to_slice(dst) + events.append(EventDescription("copy D->D result", ev)) + ev.wait() + res = dst + + # /with self.sem + if self.profile: + self.events += events + + return res + + def filter_projections(self, sino, rescale=True): + """ + Performs the FBP on a given sinogram. + + :param sinogram: sinogram to (filter-)backproject + :param rescale: if True (default), the sinogram is multiplied with + (pi/n_projs) + """ + if sino.shape[0] != self.num_projs or sino.shape[1] != self.num_bins: + raise ValueError("Expected sinogram with (projs, bins) = (%d, %d)" % (self.num_projs, self.num_bins)) + if rescale: + sino = sino * numpy.pi / self.num_projs + events = [] + # if pyfft is available, all can be done on the device + if self.d_filter is not None: + + # Zero-pad the sinogram. + # TODO: this can be done on GPU with a "Memcpy2D": + # cl.enqueue_copy(queue, dst, src, host_origin=(0,0), buffer_origin=(0,0), region=shape, host_pitches=(sino.shape[1],), buffer_pitches=(self.fft_size,)) + # However it does not work properly, and raises an error for pyopencl < 2017.1 + sino_zeropadded = numpy.zeros((sino.shape[0], self.fft_size), dtype=numpy.complex64) + sino_zeropadded[:, :self.num_bins] = sino.astype(numpy.float32) + sino_zeropadded = numpy.ascontiguousarray(sino_zeropadded, dtype=numpy.complex64) + with self.sem: + # send to GPU + ev = pyopencl.enqueue_copy(self.queue, self.d_sino_z.data, sino_zeropadded) + events.append(EventDescription("Send sino H->D", ev)) + + # FFT (in-place) + self.pyfft_plan.execute(self.d_sino_z.data, batch=self.num_projs) + + # Multiply (complex-wise) with the the filter + ev = self.kernels.mult(self.queue, + tuple(int(i) for i in self.d_sino_z.shape[::-1]), + None, + self.d_sino_z.data, + self.d_filter.data, + numpy.int32(self.fft_size), + self.num_projs + ) + events.append(EventDescription("complex 2D-1D multiplication", ev)) + # Inverse FFT (in-place) + self.pyfft_plan.execute(self.d_sino_z.data, batch=self.num_projs, inverse=True) + # Copy the real part of d_sino_z[:, :self.num_bins] (complex64) to d_sino (float32) + ev = self.kernels.cpy2d_c2r(self.queue, self.shape[::-1], None, + self.d_sino, + self.d_sino_z.data, + self.num_bins, + self.num_projs, + numpy.int32(self.fft_size) + ) + events.append(EventDescription("conversion from complex padded sinogram to sinogram", ev)) + # debug +# ev.wait() +# h_sino = numpy.zeros(sino.shape, dtype=numpy.float32) +# ev = pyopencl.enqueue_copy(self.queue, h_sino, self.d_sino) +# ev.wait() +# numpy.save("/tmp/filtered_sinogram_%s.npy" % self.ctx.devices[0].platform.name.split()[0], h_sino) + events.append(self.transfer_device_to_texture(self.d_sino)) + # ------ + else: # no pyfft + sino_filtered = fourier_filter(sino, filter_=self.filter, fft_size=self.fft_size) + with self.sem: + events.append(self.transfer_to_texture(sino_filtered)) + if self.profile: + self.events += events + + def filtered_backprojection(self, sino): + """ + Compute the filtered backprojection (FBP) on a sinogram. + + :param sino: sinogram (`numpy.ndarray`) in the format (projections, + bins) + """ + + self.filter_projections(sino) + res = self.backprojection() + return res + + __call__ = filtered_backprojection diff --git a/silx/opencl/common.py b/silx/opencl/common.py index fcb4efa..ebf50c7 100644 --- a/silx/opencl/common.py +++ b/silx/opencl/common.py @@ -34,7 +34,7 @@ __author__ = "Jerome Kieffer" __contact__ = "Jerome.Kieffer@ESRF.eu" __license__ = "MIT" __copyright__ = "2012-2017 European Synchrotron Radiation Facility, Grenoble, France" -__date__ = "15/03/2017" +__date__ = "05/10/2017" __status__ = "stable" __all__ = ["ocl", "pyopencl", "mf", "release_cl_buffers", "allocate_cl_buffers", "measure_workgroup_size", "kernel_workgroup_size"] @@ -47,7 +47,7 @@ import numpy from .utils import get_opencl_code -logger = logging.getLogger("silx.opencl") +logger = logging.getLogger(__name__) if os.environ.get("SILX_OPENCL") in ["0", "False"]: @@ -59,14 +59,17 @@ else: except ImportError: logger.warning("Unable to import pyOpenCl. Please install it from: http://pypi.python.org/pypi/pyopencl") pyopencl = None - class mf(object): - WRITE_ONLY = 1 - READ_ONLY = 1 - READ_WRITE = 1 else: import pyopencl.array as array mf = pyopencl.mem_flags +if pyopencl is None: + # Define default mem flags + class mf(object): + WRITE_ONLY = 1 + READ_ONLY = 1 + READ_WRITE = 1 + FLOP_PER_CORE = {"GPU": 64, # GPU, Fermi at least perform 64 flops per cycle/multicore, G80 were at 24 or 48 ... "CPU": 4, # CPU, at least intel's have 4 operation per cycle @@ -362,6 +365,8 @@ class OpenCL(object): :param memory: minimum amount of memory (int) :param extensions: list of extensions to be present :param best: shall we look for the + :returns: A tuple of plateform ID and device ID, else None if nothing + found """ if extensions is None: extensions = [] @@ -391,6 +396,9 @@ class OpenCL(object): if best_found: return best_found[0], best_found[1] + # Nothing found + return None + def create_context(self, devicetype="ALL", useFp64=False, platformid=None, deviceid=None, cached=True): """ @@ -411,14 +419,17 @@ class OpenCL(object): if (platformid is not None) and (deviceid is not None): platformid = int(platformid) deviceid = int(deviceid) + elif "PYOPENCL_CTX" in os.environ: + pyopencl_ctx = [int(i) if i.isdigit() else 0 for i in os.environ["PYOPENCL_CTX"].split(":")] + pyopencl_ctx += [0] * (2 - len(pyopencl_ctx)) # pad with 0 + platformid, deviceid = pyopencl_ctx else: if useFp64: ids = ocl.select_device(type=devicetype, extensions=["cl_khr_int64_base_atomics"]) else: ids = ocl.select_device(dtype=devicetype) if ids: - platformid = ids[0] - deviceid = ids[1] + platformid, deviceid = ids if (platformid is not None) and (deviceid is not None): if (platformid, deviceid) in self.context_cache: ctx = self.context_cache[(platformid, deviceid)] @@ -444,6 +455,7 @@ class OpenCL(object): platform_id = pyopencl.get_platforms().index(oplat) return self.platforms[platform_id].devices[device_id] + if pyopencl: ocl = OpenCL() if ocl.nb_devices == 0: @@ -558,4 +570,3 @@ def kernel_workgroup_size(program, kernel): device = program.devices[0] query_wg = pyopencl.kernel_work_group_info.WORK_GROUP_SIZE return kernel.get_work_group_info(query_wg, device) - diff --git a/silx/opencl/linalg.py b/silx/opencl/linalg.py new file mode 100644 index 0000000..fba3e9c --- /dev/null +++ b/silx/opencl/linalg.py @@ -0,0 +1,218 @@ +#!/usr/bin/env python +# coding: utf-8 +# /*########################################################################## +# +# Copyright (c) 2016 European Synchrotron Radiation Facility +# +# Permission is hereby granted, free of charge, to any person obtaining a copy +# of this software and associated documentation files (the "Software"), to deal +# in the Software without restriction, including without limitation the rights +# to use, copy, modify, merge, publish, distribute, sublicense, and/or sell +# copies of the Software, and to permit persons to whom the Software is +# furnished to do so, subject to the following conditions: +# +# The above copyright notice and this permission notice shall be included in +# all copies or substantial portions of the Software. +# +# THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR +# IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, +# FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE +# AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER +# LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, +# OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN +# THE SOFTWARE. +# +# ###########################################################################*/ +"""Module for basic linear algebra in OpenCL""" + +from __future__ import absolute_import, print_function, with_statement, division + +__authors__ = ["P. Paleo"] +__license__ = "MIT" +__date__ = "10/08/2017" + +import numpy as np + +from .common import pyopencl +from .processing import EventDescription, OpenclProcessing + +import pyopencl.array as parray +cl = pyopencl + + +class LinAlg(OpenclProcessing): + + kernel_files = ["linalg.cl"] + + def __init__(self, shape, do_checks=False, ctx=None, devicetype="all", platformid=None, deviceid=None, profile=False): + """ + Create a "Linear Algebra" plan for a given image shape. + + :param shape: shape of the image (num_rows, num_columns) + :param do_checks (optional): if True, memory and data type checks are performed when possible. + :param ctx: actual working context, left to None for automatic + initialization from device type or platformid/deviceid + :param devicetype: type of device, can be "CPU", "GPU", "ACC" or "ALL" + :param platformid: integer with the platform_identifier, as given by clinfo + :param deviceid: Integer with the device identifier, as given by clinfo + :param profile: switch on profiling to be able to profile at the kernel level, + store profiling elements (makes code slightly slower) + + """ + OpenclProcessing.__init__(self, ctx=ctx, devicetype=devicetype, + platformid=platformid, deviceid=deviceid, + profile=profile) + + self.d_gradient = parray.zeros(self.queue, shape, np.complex64) + self.d_image = parray.zeros(self.queue, shape, np.float32) + self.add_to_cl_mem({ + "d_gradient": self.d_gradient, + "d_image": self.d_image + }) + + self.wg2D = None + self.shape = shape + self.ndrange2D = ( + int(self.shape[1]), + int(self.shape[0]) + ) + self.do_checks = bool(do_checks) + OpenclProcessing.compile_kernels(self, self.kernel_files) + + @staticmethod + def check_array(array, dtype, shape, arg_name): + if array.shape != shape or array.dtype != dtype: + raise ValueError("%s should be a %s array of type %s" %(arg_name, str(shape), str(dtype))) + + def get_data_references(self, src, dst, default_src_ref, default_dst_ref): + """ + From various types of src and dst arrays, + returns the references to the underlying data (Buffer) that will be used by the OpenCL kernels. + # TODO documentation + + This function will make a copy host->device if the input is on host (eg. numpy array) + """ + if dst is not None: + if isinstance(dst, cl.array.Array): + dst_ref = dst.data + elif isinstance(dst, cl.Buffer): + dst_ref = dst + else: + raise ValueError("dst should be either pyopencl.array.Array or pyopencl.Buffer") + else: + dst_ref = default_dst_ref + + if isinstance(src, cl.array.Array): + src_ref = src.data + elif isinstance(src, cl.Buffer): + src_ref = src + else: # assuming numpy.ndarray + evt = cl.enqueue_copy(self.queue, default_src_ref, src) + self.events.append(EventDescription("copy H->D", evt)) + src_ref = default_src_ref + return src_ref, dst_ref + + def gradient(self, image, dst=None, return_to_host=False): + """ + Compute the spatial gradient of an image. + The gradient is computed with first-order difference (not central difference). + + :param image: image to compute the gradient from. It can be either a numpy.ndarray, a pyopencl Array or Buffer. + :param dst: optional, reference to a destination pyopencl Array or Buffer. It must be of complex64 data type. + :param return_to_host: optional, set to True if you want the result to be transferred back to host. + + if dst is provided, it should be of type numpy.complex64 ! + """ + n_y, n_x = np.int32(self.shape) + if self.do_checks: + self.check_array(image, np.float32, self.shape, "image") + if dst is not None: + self.check_array(dst, np.complex64, self.shape, "dst") + img_ref, grad_ref = self.get_data_references(image, dst, self.d_image.data, self.d_gradient.data) + + # Prepare the kernel call + kernel_args = [ + img_ref, + grad_ref, + n_x, + n_y + ] + # Call the gradient kernel + evt = self.kernels.kern_gradient2D( + self.queue, + self.ndrange2D, + self.wg2D, + *kernel_args + ) + self.events.append(EventDescription("gradient2D", evt)) + # TODO: should the wait be done in any case ? + # In the case where dst=None, the wait() is mandatory since a user will be doing arithmetic on dst afterwards + if dst is None: + evt.wait() + + if return_to_host: + if dst is not None: + res_tmp = self.d_gradient.get() + else: + res_tmp = np.zeros(self.shape, dtype=np.complex64) + cl.enqueue_copy(self.queue, res_tmp, grad_ref) + res = np.zeros((2,) + self.shape, dtype=np.float32) + res[0] = np.copy(res_tmp.real) + res[1] = np.copy(res_tmp.imag) + return res + else: + return dst + + def divergence(self, gradient, dst=None, return_to_host=False): + """ + Compute the spatial divergence of an image. + The divergence is designed to be the (negative) adjoint of the gradient. + + :param gradient: gradient-like array to compute the divergence from. It can be either a numpy.ndarray, a pyopencl Array or Buffer. + :param dst: optional, reference to a destination pyopencl Array or Buffer. It must be of complex64 data type. + :param return_to_host: optional, set to True if you want the result to be transferred back to host. + + if dst is provided, it should be of type numpy.complex64 ! + """ + n_y, n_x = np.int32(self.shape) + # numpy.ndarray gradients are expected to be (2, n_y, n_x) + if isinstance(gradient, np.ndarray): + gradient2 = np.zeros(self.shape, dtype=np.complex64) + gradient2.real = np.copy(gradient[0]) + gradient2.imag = np.copy(gradient[1]) + gradient = gradient2 + elif self.do_checks: + self.check_array(gradient, np.complex64, self.shape, "gradient") + if dst is not None: + self.check_array(dst, np.float32, self.shape, "dst") + grad_ref, img_ref = self.get_data_references(gradient, dst, self.d_gradient.data, self.d_image.data) + + # Prepare the kernel call + kernel_args = [ + grad_ref, + img_ref, + n_x, + n_y + ] + # Call the gradient kernel + evt = self.kernels.kern_divergence2D( + self.queue, + self.ndrange2D, + self.wg2D, + *kernel_args + ) + self.events.append(EventDescription("divergence2D", evt)) + # TODO: should the wait be done in any case ? + # In the case where dst=None, the wait() is mandatory since a user will be doing arithmetic on dst afterwards + if dst is None: + evt.wait() + + if return_to_host: + if dst is not None: + res = self.d_image.get() + else: + res = np.zeros(self.shape, dtype=np.float32) + cl.enqueue_copy(self.queue, res, img_ref) + return res + else: + return dst diff --git a/silx/opencl/medfilt.py b/silx/opencl/medfilt.py index 90cd49a..d4e425b 100644 --- a/silx/opencl/medfilt.py +++ b/silx/opencl/medfilt.py @@ -36,7 +36,7 @@ from __future__ import absolute_import, print_function, with_statement, division __author__ = "Jerome Kieffer" __license__ = "MIT" -__date__ = "15/03/2017" +__date__ = "12/09/2017" __copyright__ = "2012-2017, ESRF, Grenoble" __contact__ = "jerome.kieffer@esrf.fr" @@ -51,7 +51,7 @@ if pyopencl: mf = pyopencl.mem_flags else: raise ImportError("pyopencl is not installed") -logger = logging.getLogger("silx.opencl.medfilt") +logger = logging.getLogger(__name__) class MedianFilter2D(OpenclProcessing): @@ -202,7 +202,7 @@ class MedianFilter2D(OpenclProcessing): kwargs["width"] = numpy.int32(image.shape[1]) # for k, v in kwargs.items(): # print("%s: %s (%s)" % (k, v, type(v))) - mf2d = self.program.medfilt2d(self.queue, + mf2d = self.kernels.medfilt2d(self.queue, (wg, image.shape[1]), (wg, 1), *list(kwargs.values())) events.append(EventDescription("median filter 2d", mf2d)) @@ -264,6 +264,6 @@ class _MedFilt2d(object): new_shape = numpy.maximum(numpy.array(cls.median_filter.shape), shape) ctx = cls.median_filter.ctx cls.median_filter = MedianFilter2D(new_shape, kernel_size, ctx=ctx) - return cls.median_filter.medfilt2d(image) + return cls.median_filter.medfilt2d(image, kernel_size=kernel_size) medfilt2d = _MedFilt2d.medfilt2d diff --git a/silx/opencl/processing.py b/silx/opencl/processing.py index ca46701..1997a55 100644 --- a/silx/opencl/processing.py +++ b/silx/opencl/processing.py @@ -4,7 +4,7 @@ # Project: S I L X project # https://github.com/silx-kit/silx # -# Copyright (C) European Synchrotron Radiation Facility, Grenoble, France +# Copyright (C) 2012-2017 European Synchrotron Radiation Facility, Grenoble, France # # Principal author: Jérôme Kieffer (Jerome.Kieffer@ESRF.eu) # @@ -41,7 +41,7 @@ __author__ = "Jerome Kieffer" __contact__ = "Jerome.Kieffer@ESRF.eu" __license__ = "MIT" __copyright__ = "European Synchrotron Radiation Facility, Grenoble, France" -__date__ = "03/02/2017" +__date__ = "03/10/2017" __status__ = "stable" @@ -51,14 +51,34 @@ import gc from collections import namedtuple import numpy import threading -from .common import ocl, pyopencl, release_cl_buffers +from .common import ocl, pyopencl, release_cl_buffers, kernel_workgroup_size from .utils import concatenate_cl_kernel BufferDescription = namedtuple("BufferDescription", ["name", "size", "dtype", "flags"]) EventDescription = namedtuple("EventDescription", ["name", "event"]) -logger = logging.getLogger("pyFAI.opencl.processing") +logger = logging.getLogger(__name__) + + +class KernelContainer(object): + """Those object holds a copy of all kernels accessible as attributes""" + + def __init__(self, program): + """Constructor of the class + + :param program: the OpenCL program as generated by PyOpenCL + """ + for kernel in program.all_kernels(): + self.__setattr__(kernel.function_name, kernel) + + def get_kernels(self): + "return the dictionary with all kernels" + return self.__dict__.copy() + + def get_kernel(self, name): + "get a kernel from its name" + return self.__dict__.get(name) class OpenclProcessing(object): @@ -85,9 +105,10 @@ class OpenclProcessing(object): :param devicetype: type of device, can be "CPU", "GPU", "ACC" or "ALL" :param platformid: integer with the platform_identifier, as given by clinfo :param deviceid: Integer with the device identifier, as given by clinfo - :param block_size: preferred workgroup size, may vary depending on the outpcome of the compilation - :param profile: switch on profiling to be able to profile at the kernel level, - store profiling elements (makes code slightly slower) + :param block_size: preferred workgroup size, may vary depending on the + out come of the compilation + :param profile: switch on profiling to be able to profile at the kernel + level, store profiling elements (makes code slightly slower) """ self.sem = threading.Semaphore() self.profile = None @@ -108,6 +129,7 @@ class OpenclProcessing(object): self.set_profiling(profile) self.block_size = block_size self.program = None + self.kernels = None def __del__(self): """Destructor: release all buffers and programs @@ -145,8 +167,8 @@ class OpenclProcessing(object): ualloc = 0 for buf in buffers: ualloc += numpy.dtype(buf.dtype).itemsize * buf.size - logger.info("%.3fMB are needed on device which has %.3fMB", - ualloc / 1.0e6, self.device.memory / 1.0e6) + logger.info("%.3fMB are needed on device: %s, which has %.3fMB", + ualloc / 1.0e6, self.device, self.device.memory / 1.0e6) if ualloc >= self.device.memory: raise MemoryError("Fatal error in allocate_buffers. Not enough " @@ -157,13 +179,29 @@ class OpenclProcessing(object): try: for buf in buffers: size = numpy.dtype(buf.dtype).itemsize * buf.size - mem[buf.name] = pyopencl.Buffer(self.ctx, buf.flags, size) + mem[buf.name] = pyopencl.Buffer(self.ctx, buf.flags, int(size)) except pyopencl.MemoryError as error: release_cl_buffers(mem) raise MemoryError(error) self.cl_mem.update(mem) + def add_to_cl_mem(self, parrays): + """ + Add pyopencl.array, which are allocated by pyopencl, to self.cl_mem. + This should be used before calling allocate_buffers(). + + :param parrays: a dictionary of `pyopencl.array.Array` or `pyopencl.Buffer` + """ + mem = self.cl_mem + for name, parr in parrays.items(): + mem[name] = parr + self.cl_mem.update(mem) + + def check_workgroup_size(self, kernel_name): + kernel = self.kernels.get_kernel(kernel_name) + self.compiletime_workgroup_size = kernel_workgroup_size(self.program, kernel) + def free_buffers(self): """free all device.memory allocated on the device """ @@ -199,12 +237,15 @@ class OpenclProcessing(object): self.program = pyopencl.Program(self.ctx, kernel_src).build(options=compile_options) except (pyopencl.MemoryError, pyopencl.LogicError) as error: raise MemoryError(error) + else: + self.kernels = KernelContainer(self.program) def free_kernels(self): """Free all kernels """ for kernel in self.cl_kernel_args: self.cl_kernel_args[kernel] = [] + self.kernels = None self.program = None def set_profiling(self, value=True): diff --git a/silx/opencl/projection.py b/silx/opencl/projection.py new file mode 100644 index 0000000..0ebe9bc --- /dev/null +++ b/silx/opencl/projection.py @@ -0,0 +1,419 @@ +#!/usr/bin/env python +# coding: utf-8 +# /*########################################################################## +# +# Copyright (c) 2016 European Synchrotron Radiation Facility +# +# Permission is hereby granted, free of charge, to any person obtaining a copy +# of this software and associated documentation files (the "Software"), to deal +# in the Software without restriction, including without limitation the rights +# to use, copy, modify, merge, publish, distribute, sublicense, and/or sell +# copies of the Software, and to permit persons to whom the Software is +# furnished to do so, subject to the following conditions: +# +# The above copyright notice and this permission notice shall be included in +# all copies or substantial portions of the Software. +# +# THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR +# IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, +# FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE +# AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER +# LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, +# OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN +# THE SOFTWARE. +# +# ###########################################################################*/ +"""Module for tomographic projector on the GPU""" + +from __future__ import absolute_import, print_function, with_statement, division + +__authors__ = ["A. Mirone, P. Paleo"] +__license__ = "MIT" +__date__ = "26/06/2017" + +import logging +import numpy as np + +from .common import pyopencl +from .processing import EventDescription, OpenclProcessing, BufferDescription +from .backprojection import _sizeof, _idivup + +if pyopencl: + mf = pyopencl.mem_flags + import pyopencl.array as parray +else: + raise ImportError("pyopencl is not installed") +logger = logging.getLogger(__name__) + + +class Projection(OpenclProcessing): + """ + A class for performing a tomographic projection (Radon Transform) using + OpenCL + """ + kernel_files = ["proj.cl", "array_utils.cl"] + + def __init__(self, slice_shape, angles, axis_position=None, + detector_width=None, normalize=False, ctx=None, + devicetype="all", platformid=None, deviceid=None, + profile=False + ): + """Constructor of the OpenCL projector. + + :param slice_shape: shape of the slice: (num_rows, num_columns). + :param angles: Either an integer number of angles, or a list of custom + angles values in radian. + :param axis_position: Optional, axis position. Default is + `(shape[1]-1)/2.0`. + :param detector_width: Optional, detector width in pixels. + If detector_width > slice_shape[1], the + projection data will be surrounded with zeros. + Using detector_width < slice_shape[1] might + result in a local tomography setup. + :param normalize: Optional, normalization. If set, the sinograms are + multiplied by the factor pi/(2*nprojs). + :param ctx: actual working context, left to None for automatic + initialization from device type or platformid/deviceid + :param devicetype: type of device, can be "CPU", "GPU", "ACC" or "ALL" + :param platformid: integer with the platform_identifier, as given by + clinfo + :param deviceid: Integer with the device identifier, as given by clinfo + :param profile: switch on profiling to be able to profile at the kernel + level, store profiling elements (makes code slightly + slower) + """ + # OS X enforces a workgroup size of 1 when the kernel has synchronization barriers + # if sys.platform.startswith('darwin'): # assuming no discrete GPU + # raise NotImplementedError("Backprojection is not implemented on CPU for OS X yet") + + OpenclProcessing.__init__(self, ctx=ctx, devicetype=devicetype, + platformid=platformid, deviceid=deviceid, + profile=profile) + self.shape = slice_shape + self.axis_pos = axis_position + self.angles = angles + self.dwidth = detector_width + self.normalize = normalize + + # Default values + if self.axis_pos is None: + self.axis_pos = (self.shape[1] - 1) / 2. + if self.dwidth is None: + self.dwidth = self.shape[1] + if not(np.iterable(self.angles)): + if self.angles is None: + self.nprojs = self.shape[0] + else: + self.nprojs = self.angles + self.angles = np.linspace(start=0, + stop=np.pi, + num=self.nprojs, + endpoint=False).astype(dtype=np.float32) + else: + self.nprojs = len(self.angles) + self.offset_x = -np.float32((self.shape[1]-1)/2. - self.axis_pos) # TODO: custom + self.offset_y = -np.float32((self.shape[0]-1)/2. - self.axis_pos) # TODO: custom + # Reset axis_pos once offset are computed + self.axis_pos0 = np.float((self.shape[1]-1)/2.) + + # Workgroup, ndrange and shared size + self.dimgrid_x = _idivup(self.dwidth, 16) + self.dimgrid_y = _idivup(self.nprojs, 16) + self._dimrecx = np.int32(self.dimgrid_x * 16) + self._dimrecy = np.int32(self.dimgrid_y * 16) + self.local_mem = 16 * 7 * _sizeof(np.float32) + self.wg = (16, 16) + self.ndrange = ( + int(self.dimgrid_x) * self.wg[0], # int(): pyopencl <= 2015.1 + int(self.dimgrid_y) * self.wg[1] # int(): pyopencl <= 2015.1 + ) + + self.is_cpu = False + if self.device.type == "CPU": + self.is_cpu = True + + # Allocate memory + self.buffers = [ + BufferDescription("_d_sino", self._dimrecx * self._dimrecy, np.float32, mf.READ_WRITE), + BufferDescription("d_angles", self._dimrecy, np.float32, mf.READ_ONLY), + BufferDescription("d_beginPos", self._dimrecy * 2, np.int32, mf.READ_ONLY), + BufferDescription("d_strideJoseph", self._dimrecy * 2, np.int32, mf.READ_ONLY), + BufferDescription("d_strideLine", self._dimrecy * 2, np.int32, mf.READ_ONLY), + ] + self.add_to_cl_mem( + { + "d_axis_corrections": parray.zeros(self.queue, + self.nprojs, np.float32) + } + ) + self._tmp_extended_img = np.zeros((self.shape[0]+2, self.shape[1]+2), + dtype=np.float32) + if self.is_cpu: + self.allocate_slice() + else: + self.allocate_textures() + self.allocate_buffers() + self._ex_sino = np.zeros((self._dimrecy, self._dimrecx), + dtype=np.float32) + if self.is_cpu: + self.cl_mem["d_slice"].fill(0.) + # enqueue_fill_buffer has issues if opencl 1.2 is not present + #~ pyopencl.enqueue_fill_buffer( + #~ self.queue, + #~ self.cl_mem["d_slice"], + #~ np.float32(0), + #~ 0, + #~ self._tmp_extended_img.size * _sizeof(np.float32) + #~ ) + # Precomputations + self.compute_angles() + self.proj_precomputations() + self.cl_mem["d_axis_corrections"].fill(0.) + # enqueue_fill_buffer has issues if opencl 1.2 is not present + #~ pyopencl.enqueue_fill_buffer( + #~ self.queue, + #~ self.cl_mem["d_axis_corrections"], + #~ np.float32(0), + #~ 0, + #~ self.nprojs*_sizeof(np.float32) + #~ ) + # Shorthands + self._d_sino = self.cl_mem["_d_sino"] + + OpenclProcessing.compile_kernels(self, self.kernel_files) + # check that workgroup can actually be (16, 16) + self.check_workgroup_size("forward_kernel_cpu") + + def compute_angles(self): + angles2 = np.zeros(self._dimrecy, dtype=np.float32) # dimrecy != num_projs + angles2[:self.nprojs] = np.copy(self.angles) + angles2[self.nprojs:] = angles2[self.nprojs-1] + self.angles2 = angles2 + pyopencl.enqueue_copy(self.queue, self.cl_mem["d_angles"], angles2) + + def allocate_slice(self): + self.add_to_cl_mem({"d_slice": parray.zeros(self.queue, (self.shape[1]+2, self.shape[1]+2), np.float32)}) + + def allocate_textures(self): + self.d_image_tex = pyopencl.Image( + self.ctx, + mf.READ_ONLY | mf.USE_HOST_PTR, + pyopencl.ImageFormat( + pyopencl.channel_order.INTENSITY, + pyopencl.channel_type.FLOAT + ), hostbuf=np.ascontiguousarray(self._tmp_extended_img.T), + ) + + def transfer_to_texture(self, image): + image2 = image + if not(image.flags["C_CONTIGUOUS"] and image.dtype == np.float32): + image2 = np.ascontiguousarray(image) + if self.is_cpu: + # TODO: create NoneEvent + return self.transfer_to_slice(image2) + #~ return pyopencl.enqueue_copy( + #~ self.queue, + #~ self.cl_mem["d_slice"].data, + #~ image2, + #~ origin=(1, 1), + #~ region=image.shape[::-1] + #~ ) + else: + return pyopencl.enqueue_copy( + self.queue, + self.d_image_tex, + image2, + origin=(1, 1), + region=image.shape[::-1] + ) + + def transfer_device_to_texture(self, d_image): + if self.is_cpu: + # TODO this copy should not be necessary + return self.cpy2d_to_slice(d_image) + else: + return pyopencl.enqueue_copy( + self.queue, + self.d_image_tex, + d_image, + offset=0, + origin=(1, 1), + region=(int(self.shape[1]), int(self.shape[0]))#self.shape[::-1] # pyopencl <= 2015.2 + ) + + def transfer_to_slice(self, image): + image2 = np.zeros((image.shape[0]+2, image.shape[1]+2), dtype=np.float32) + image2[1:-1, 1:-1] = image.astype(np.float32) + self.cl_mem["d_slice"].set(image2) + + def proj_precomputations(self): + beginPos = np.zeros((2, self._dimrecy), dtype=np.int32) + strideJoseph = np.zeros((2, self._dimrecy), dtype=np.int32) + strideLine = np.zeros((2, self._dimrecy), dtype=np.int32) + cos_angles = np.cos(self.angles2) + sin_angles = np.sin(self.angles2) + dimslice = self.shape[1] + + M1 = np.abs(cos_angles) > 0.70710678 + M1b = np.logical_not(M1) + M2 = cos_angles > 0 + M2b = np.logical_not(M2) + M3 = sin_angles > 0 + M3b = np.logical_not(M3) + case1 = M1 * M2 + case2 = M1 * M2b + case3 = M1b * M3 + case4 = M1b * M3b + + beginPos[0][case1] = 0 + beginPos[1][case1] = 0 + strideJoseph[0][case1] = 1 + strideJoseph[1][case1] = 0 + strideLine[0][case1] = 0 + strideLine[1][case1] = 1 + + beginPos[0][case2] = dimslice-1 + beginPos[1][case2] = dimslice-1 + strideJoseph[0][case2] = -1 + strideJoseph[1][case2] = 0 + strideLine[0][case2] = 0 + strideLine[1][case2] = -1 + + beginPos[0][case3] = dimslice-1 + beginPos[1][case3] = 0 + strideJoseph[0][case3] = 0 + strideJoseph[1][case3] = 1 + strideLine[0][case3] = -1 + strideLine[1][case3] = 0 + + beginPos[0][case4] = 0 + beginPos[1][case4] = dimslice-1 + strideJoseph[0][case4] = 0 + strideJoseph[1][case4] = -1 + strideLine[0][case4] = 1 + strideLine[1][case4] = 0 + + # For debug purpose + #~ self.beginPos = beginPos + #~ self.strideJoseph = strideJoseph + #~ self.strideLine = strideLine + # + + pyopencl.enqueue_copy(self.queue, self.cl_mem["d_beginPos"], beginPos) + pyopencl.enqueue_copy(self.queue, self.cl_mem["d_strideJoseph"], strideJoseph) + pyopencl.enqueue_copy(self.queue, self.cl_mem["d_strideLine"], strideLine) + + def _get_local_mem(self): + return pyopencl.LocalMemory(self.local_mem) # constant for all image sizes + + def cpy2d_to_sino(self, dst): + ndrange = (int(self.dwidth), int(self.nprojs)) # pyopencl < 2015.2 + sino_shape_ocl = np.int32(ndrange) + wg = None + kernel_args = ( + dst.data, + self._d_sino, + np.int32(self.dwidth), + np.int32(self._dimrecx), + np.int32((0, 0)), + np.int32((0, 0)), + sino_shape_ocl + ) + return self.kernels.cpy2d(self.queue, ndrange, wg, *kernel_args) + + def cpy2d_to_slice(self, src): + """ + copy a Nx * Ny slice to self.d_slice which is (Nx+2)*(Ny+2) + """ + ndrange = (int(self.shape[1]), int(self.shape[0])) #self.shape[::-1] # pyopencl < 2015.2 + wg = None + slice_shape_ocl = np.int32(ndrange) + kernel_args = ( + self.cl_mem["d_slice"].data, + src, + np.int32(self.shape[1]+2), + np.int32(self.shape[1]), + np.int32((1, 1)), + np.int32((0, 0)), + slice_shape_ocl + ) + return self.kernels.cpy2d(self.queue, ndrange, wg, *kernel_args) + + def projection(self, image=None, dst=None): + """Perform the projection on an input image + + :param image: Image to project + :return: A sinogram + """ + events = [] + with self.sem: + if image is not None: + assert image.ndim == 2, "Treat only 2D images" + assert image.shape[0] == self.shape[0], "image shape is OK" + assert image.shape[1] == self.shape[1], "image shape is OK" + if not(self.is_cpu): + self.transfer_to_texture(image) + slice_ref = self.d_image_tex + else: + self.transfer_to_slice(image) + slice_ref = self.cl_mem["d_slice"].data + else: + if self.is_cpu: + slice_ref = self.cl_mem["d_slice"].data + else: + slice_ref = self.d_image_tex + + kernel_args = ( + self._d_sino, + slice_ref, + np.int32(self.shape[1]), + np.int32(self.dwidth), + self.cl_mem["d_angles"], + np.float32(self.axis_pos0), + self.cl_mem["d_axis_corrections"].data, # TODO custom + self.cl_mem["d_beginPos"], + self.cl_mem["d_strideJoseph"], + self.cl_mem["d_strideLine"], + np.int32(self.nprojs), + self._dimrecx, + self._dimrecy, + self.offset_x, + self.offset_y, + np.int32(1), # josephnoclip, 1 by default + np.int32(self.normalize) + ) + + # Call the kernel + if self.is_cpu: + event_pj = self.kernels.forward_kernel_cpu( + self.queue, + self.ndrange, + self.wg, + *kernel_args + ) + else: + event_pj = self.kernels.forward_kernel( + self.queue, + self.ndrange, + self.wg, + *kernel_args + ) + events.append(EventDescription("projection", event_pj)) + if dst is None: + self._ex_sino[:] = 0 + ev = pyopencl.enqueue_copy(self.queue, self._ex_sino, self._d_sino) + events.append(EventDescription("copy D->H result", ev)) + ev.wait() + res = np.copy(self._ex_sino[:self.nprojs, :self.dwidth]) + else: + ev = self.cpy2d_to_sino(dst) + events.append(EventDescription("copy D->D result", ev)) + ev.wait() + res = dst + # /with self.sem + if self.profile: + self.events += events + #~ res = self._ex_sino + return res + + __call__ = projection diff --git a/silx/opencl/reconstruction.py b/silx/opencl/reconstruction.py new file mode 100644 index 0000000..cb68680 --- /dev/null +++ b/silx/opencl/reconstruction.py @@ -0,0 +1,381 @@ +#!/usr/bin/env python +# coding: utf-8 +# /*########################################################################## +# +# Copyright (c) 2016 European Synchrotron Radiation Facility +# +# Permission is hereby granted, free of charge, to any person obtaining a copy +# of this software and associated documentation files (the "Software"), to deal +# in the Software without restriction, including without limitation the rights +# to use, copy, modify, merge, publish, distribute, sublicense, and/or sell +# copies of the Software, and to permit persons to whom the Software is +# furnished to do so, subject to the following conditions: +# +# The above copyright notice and this permission notice shall be included in +# all copies or substantial portions of the Software. +# +# THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR +# IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, +# FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE +# AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER +# LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, +# OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN +# THE SOFTWARE. +# +# ###########################################################################*/ +"""Module for tomographic reconstruction algorithms""" + +from __future__ import absolute_import, print_function, with_statement, division + +__authors__ = ["P. Paleo"] +__license__ = "MIT" +__date__ = "19/09/2017" + +import logging +import numpy as np + +from .common import pyopencl +from .processing import OpenclProcessing +from .backprojection import Backprojection +from .projection import Projection +from .linalg import LinAlg + +import pyopencl.array as parray +from pyopencl.elementwise import ElementwiseKernel +logger = logging.getLogger(__name__) + +cl = pyopencl + + +class ReconstructionAlgorithm(OpenclProcessing): + """ + A parent class for all iterative tomographic reconstruction algorithms + + :param sino_shape: shape of the sinogram. The sinogram is in the format + (n_b, n_a) where n_b is the number of detector bins and + n_a is the number of angles. + :param slice_shape: Optional, shape of the reconstructed slice. + By default, it is a square slice where the dimension + is the "x dimension" of the sinogram (number of bins). + :param axis_position: Optional, axis position. Default is `(shape[1]-1)/2.0`. + :param angles: Optional, a list of custom angles in radian. + :param ctx: actual working context, left to None for automatic + initialization from device type or platformid/deviceid + :param devicetype: type of device, can be "CPU", "GPU", "ACC" or "ALL" + :param platformid: integer with the platform_identifier, as given by clinfo + :param deviceid: Integer with the device identifier, as given by clinfo + :param profile: switch on profiling to be able to profile at the kernel level, + store profiling elements (makes code slightly slower) + """ + + def __init__(self, sino_shape, slice_shape=None, axis_position=None, angles=None, + ctx=None, devicetype="all", platformid=None, deviceid=None, + profile=False + ): + OpenclProcessing.__init__(self, ctx=ctx, devicetype=devicetype, + platformid=platformid, deviceid=deviceid, + profile=profile) + + # Create a backprojector + self.backprojector = Backprojection( + sino_shape, + slice_shape=slice_shape, + axis_position=axis_position, + angles=angles, + ctx=self.ctx, + profile=profile + ) + # Create a projector + self.projector = Projection( + self.backprojector.slice_shape, + self.backprojector.angles, + axis_position=axis_position, + detector_width=self.backprojector.num_bins, + normalize=False, + ctx=self.ctx, + profile=profile + ) + self.sino_shape = sino_shape + self.is_cpu = self.backprojector.is_cpu + # Arrays + self.d_data = parray.zeros(self.queue, sino_shape, dtype=np.float32) + self.d_sino = parray.zeros_like(self.d_data) + self.d_x = parray.zeros(self.queue, + self.backprojector.slice_shape, + dtype=np.float32) + self.d_x_old = parray.zeros_like(self.d_x) + + self.add_to_cl_mem({ + "d_data": self.d_data, + "d_sino": self.d_sino, + "d_x": self.d_x, + "d_x_old": self.d_x_old, + }) + + def proj(self, d_slice, d_sino): + """ + Project d_slice to d_sino + """ + self.projector.transfer_device_to_texture(d_slice.data) #.wait() + self.projector.projection(dst=d_sino) + + def backproj(self, d_sino, d_slice): + """ + Backproject d_sino to d_slice + """ + self.backprojector.transfer_device_to_texture(d_sino.data) #.wait() + self.backprojector.backprojection(dst=d_slice) + + +class SIRT(ReconstructionAlgorithm): + """ + A class for the SIRT algorithm + + :param sino_shape: shape of the sinogram. The sinogram is in the format + (n_b, n_a) where n_b is the number of detector bins and + n_a is the number of angles. + :param slice_shape: Optional, shape of the reconstructed slice. + By default, it is a square slice where the dimension is + the "x dimension" of the sinogram (number of bins). + :param axis_position: Optional, axis position. Default is `(shape[1]-1)/2.0`. + :param angles: Optional, a list of custom angles in radian. + :param ctx: actual working context, left to None for automatic + initialization from device type or platformid/deviceid + :param devicetype: type of device, can be "CPU", "GPU", "ACC" or "ALL" + :param platformid: integer with the platform_identifier, as given by clinfo + :param deviceid: Integer with the device identifier, as given by clinfo + :param profile: switch on profiling to be able to profile at the kernel level, + store profiling elements (makes code slightly slower) + + .. warning:: This is a beta version of the SIRT algorithm. Reconstruction + fails for at least on CPU (Xeon E3-1245 v5) using the AMD opencl + implementation. + """ + + def __init__(self, sino_shape, slice_shape=None, axis_position=None, angles=None, + ctx=None, devicetype="all", platformid=None, deviceid=None, + profile=False + ): + + ReconstructionAlgorithm.__init__(self, sino_shape, slice_shape=slice_shape, + axis_position=axis_position, angles=angles, + ctx=ctx, devicetype=devicetype, platformid=platformid, + deviceid=deviceid, profile=profile) + self.compute_preconditioners() + + def compute_preconditioners(self): + """ + Create a diagonal preconditioner for the projection and backprojection + operator. + Each term of the diagonal is the sum of the projector/backprojector + along rows [1], i.e the projection/backprojection of an array of ones. + + [1] Jens Gregor and Thomas Benson, + Computational Analysis and Improvement of SIRT, + IEEE transactions on medical imaging, vol. 27, no. 7, 2008 + """ + + # r_{i,i} = 1/(sum_j a_{i,j}) + slice_ones = np.ones(self.backprojector.slice_shape, dtype=np.float32) + R = 1./self.projector.projection(slice_ones) # could be all done on GPU, but I want extra checks + R[np.logical_not(np.isfinite(R))] = 1. # In the case where the rotation axis is excentred + self.d_R = parray.to_device(self.queue, R) + # c_{j,j} = 1/(sum_i a_{i,j}) + sino_ones = np.ones(self.sino_shape, dtype=np.float32) + C = 1./self.backprojector.backprojection(sino_ones) + C[np.logical_not(np.isfinite(C))] = 1. # In the case where the rotation axis is excentred + self.d_C = parray.to_device(self.queue, C) + + self.add_to_cl_mem({ + "d_R": self.d_R, + "d_C": self.d_C + }) + + # TODO: compute and possibly return the residual + def run(self, data, n_it): + """ + Run n_it iterations of the SIRT algorithm. + """ + cl.enqueue_copy(self.queue, self.d_data.data, np.ascontiguousarray(data.astype(np.float32))) + + d_x_old = self.d_x_old + d_x = self.d_x + d_R = self.d_R + d_C = self.d_C + d_sino = self.d_sino + d_x *= 0 + + for k in range(n_it): + d_x_old[:] = d_x[:] + # x{k+1} = x{k} - C A^T R (A x{k} - b) + self.proj(d_x, d_sino) + d_sino -= self.d_data + d_sino *= d_R + if self.is_cpu: + # This sync is necessary when using CPU, while it is not for GPU + d_sino.finish() + self.backproj(d_sino, d_x) + d_x *= -d_C + d_x += d_x_old + if self.is_cpu: + # This sync is necessary when using CPU, while it is not for GPU + d_x.finish() + + return d_x + + __call__ = run + + +class TV(ReconstructionAlgorithm): + """ + A class for reconstruction with Total Variation regularization using the + Chambolle-Pock TV reconstruction algorithm. + + :param sino_shape: shape of the sinogram. The sinogram is in the format + (n_b, n_a) where n_b is the number of detector bins and + n_a is the number of angles. + :param slice_shape: Optional, shape of the reconstructed slice. By default, + it is a square slice where the dimension is the + "x dimension" of the sinogram (number of bins). + :param axis_position: Optional, axis position. Default is + `(shape[1]-1)/2.0`. + :param angles: Optional, a list of custom angles in radian. + :param ctx: actual working context, left to None for automatic + initialization from device type or platformid/deviceid + :param devicetype: type of device, can be "CPU", "GPU", "ACC" or "ALL" + :param platformid: integer with the platform_identifier, as given by clinfo + :param deviceid: Integer with the device identifier, as given by clinfo + :param profile: switch on profiling to be able to profile at the kernel + level, store profiling elements (makes code slightly slower) + + .. warning:: This is a beta version of the Chambolle-Pock TV algorithm. + Reconstruction fails for at least on CPU (Xeon E3-1245 v5) using + the AMD opencl implementation. + """ + + def __init__(self, sino_shape, slice_shape=None, axis_position=None, angles=None, + ctx=None, devicetype="all", platformid=None, deviceid=None, + profile=False + ): + ReconstructionAlgorithm.__init__(self, sino_shape, slice_shape=slice_shape, + axis_position=axis_position, angles=angles, + ctx=ctx, devicetype=devicetype, platformid=platformid, + deviceid=deviceid, profile=profile) + self.compute_preconditioners() + + # Create a LinAlg instance + self.linalg = LinAlg(self.backprojector.slice_shape, ctx=self.ctx) + # Positivity constraint + self.elwise_clamp = ElementwiseKernel(self.ctx, "float *a", "a[i] = max(a[i], 0.0f);") + # Projection onto the L-infinity ball of radius Lambda + self.elwise_proj_linf = ElementwiseKernel( + self.ctx, + "float2* a, float Lambda", + "a[i].x = copysign(min(fabs(a[i].x), Lambda), a[i].x); a[i].y = copysign(min(fabs(a[i].y), Lambda), a[i].y);", + "elwise_proj_linf" + ) + # Additional arrays + self.linalg.gradient(self.d_x) + self.d_p = parray.zeros_like(self.linalg.cl_mem["d_gradient"]) + self.d_q = parray.zeros_like(self.d_data) + self.d_g = self.linalg.d_image + self.d_tmp = parray.zeros_like(self.d_x) + self.add_to_cl_mem({ + "d_p": self.d_p, + "d_q": self.d_q, + "d_tmp": self.d_tmp, + }) + + self.theta = 1.0 + + def compute_preconditioners(self): + """ + Create a diagonal preconditioner for the projection and backprojection + operator. + Each term of the diagonal is the sum of the projector/backprojector + along rows [2], + i.e the projection/backprojection of an array of ones. + + [2] T. Pock, A. Chambolle, + Diagonal preconditioning for first order primal-dual algorithms in + convex optimization, + International Conference on Computer Vision, 2011 + """ + + # Compute the diagonal preconditioner "Sigma" + slice_ones = np.ones(self.backprojector.slice_shape, dtype=np.float32) + Sigma_k = 1./self.projector.projection(slice_ones) + Sigma_k[np.logical_not(np.isfinite(Sigma_k))] = 1. + self.d_Sigma_k = parray.to_device(self.queue, Sigma_k) + self.d_Sigma_kp1 = self.d_Sigma_k + 1 # TODO: memory vs computation + self.Sigma_grad = 1/2.0 # For discrete gradient, sum|D_i,j| = 2 along lines or cols + + # Compute the diagonal preconditioner "Tau" + sino_ones = np.ones(self.sino_shape, dtype=np.float32) + C = self.backprojector.backprojection(sino_ones) + Tau = 1./(C + 2.) + self.d_Tau = parray.to_device(self.queue, Tau) + + self.add_to_cl_mem({ + "d_Sigma_k": self.d_Sigma_k, + "d_Sigma_kp1": self.d_Sigma_kp1, + "d_Tau": self.d_Tau + }) + + def run(self, data, n_it, Lambda, pos_constraint=False): + """ + Run n_it iterations of the TV-regularized reconstruction, + with the regularization parameter Lambda. + """ + cl.enqueue_copy(self.queue, self.d_data.data, np.ascontiguousarray(data.astype(np.float32))) + + d_x = self.d_x + d_x_old = self.d_x_old + d_tmp = self.d_tmp + d_sino = self.d_sino + d_p = self.d_p + d_q = self.d_q + d_g = self.d_g + + d_x *= 0 + d_p *= 0 + d_q *= 0 + + for k in range(0, n_it): + # Update primal variables + d_x_old[:] = d_x[:] + #~ x = x + Tau*div(p) - Tau*Kadj(q) + self.backproj(d_q, d_tmp) + self.linalg.divergence(d_p) + # TODO: this in less than three ops (one kernel ?) + d_g -= d_tmp # d_g -> L.d_image + d_g *= self.d_Tau + d_x += d_g + + if pos_constraint: + self.elwise_clamp(d_x) + + # Update dual variables + #~ p = proj_linf(p + Sigma_grad*gradient(x + theta*(x - x_old)), Lambda) + d_tmp[:] = d_x[:] + # FIXME: mul_add is out of place, put an equivalent thing in linalg... + #~ d_tmp.mul_add(1 + theta, d_x_old, -theta) + d_tmp *= 1+self.theta + d_tmp -= self.theta*d_x_old + self.linalg.gradient(d_tmp) + # TODO: out of place mul_add + #~ d_p.mul_add(1, L.cl_mem["d_gradient"], Sigma_grad) + self.linalg.cl_mem["d_gradient"] *= self.Sigma_grad + d_p += self.linalg.cl_mem["d_gradient"] + self.elwise_proj_linf(d_p, Lambda) + + #~ q = (q + Sigma_k*K(x + theta*(x - x_old)) - Sigma_k*data)/(1.0 + Sigma_k) + self.proj(d_tmp, d_sino) + # TODO: this in less instructions + d_sino -= self.d_data + d_sino *= self.d_Sigma_k + d_q += d_sino + d_q /= self.d_Sigma_kp1 + return d_x + + __call__ = run diff --git a/silx/opencl/setup.py b/silx/opencl/setup.py index d0181cd..a2ae244 100644 --- a/silx/opencl/setup.py +++ b/silx/opencl/setup.py @@ -1,6 +1,6 @@ # coding: utf-8 # -# Copyright (C) 2016 European Synchrotron Radiation Facility +# Copyright (C) 2016-2017 European Synchrotron Radiation Facility # # Permission is hereby granted, free of charge, to any person obtaining a copy # of this software and associated documentation files (the "Software"), to deal @@ -27,14 +27,17 @@ __contact__ = "jerome.kieffer@esrf.eu" __license__ = "MIT" __copyright__ = "European Synchrotron Radiation Facility, Grenoble, France" __authors__ = ["J. Kieffer"] -__date__ = "08/09/2016" +__date__ = "16/08/2017" +import os.path from numpy.distutils.misc_util import Configuration def configuration(parent_package='', top_path=None): config = Configuration('opencl', parent_package, top_path) - config.add_subpackage('sift') + path = os.path.dirname(os.path.abspath(__file__)) + if os.path.exists(os.path.join(path, 'sift')): + config.add_subpackage('sift') config.add_subpackage('test') return config diff --git a/silx/opencl/test/__init__.py b/silx/opencl/test/__init__.py index 24aa06e..f6aadcd 100644 --- a/silx/opencl/test/__init__.py +++ b/silx/opencl/test/__init__.py @@ -24,18 +24,31 @@ __authors__ = ["J. Kieffer"] __license__ = "MIT" -__date__ = "15/03/2017" +__date__ = "01/09/2017" +import os import unittest from . import test_addition from . import test_medfilt -from ..sift import test as test_sift - +from . import test_backprojection +from . import test_projection +from . import test_linalg +from . import test_array_utils def suite(): test_suite = unittest.TestSuite() test_suite.addTests(test_addition.suite()) test_suite.addTests(test_medfilt.suite()) - test_suite.addTests(test_sift.suite()) + test_suite.addTests(test_backprojection.suite()) + test_suite.addTests(test_projection.suite()) + test_suite.addTests(test_linalg.suite()) + test_suite.addTests(test_array_utils.suite()) + + # Allow to remove sift from the project + test_base_dir = os.path.dirname(__file__) + sift_dir = os.path.join(test_base_dir, "..", "sift") + if os.path.exists(sift_dir): + from ..sift import test as test_sift + test_suite.addTests(test_sift.suite()) return test_suite diff --git a/silx/opencl/test/test_array_utils.py b/silx/opencl/test/test_array_utils.py new file mode 100644 index 0000000..833d828 --- /dev/null +++ b/silx/opencl/test/test_array_utils.py @@ -0,0 +1,161 @@ +#!/usr/bin/env python +# coding: utf-8 +# /*########################################################################## +# +# Copyright (c) 2016 European Synchrotron Radiation Facility +# +# Permission is hereby granted, free of charge, to any person obtaining a copy +# of this software and associated documentation files (the "Software"), to deal +# in the Software without restriction, including without limitation the rights +# to use, copy, modify, merge, publish, distribute, sublicense, and/or sell +# copies of the Software, and to permit persons to whom the Software is +# furnished to do so, subject to the following conditions: +# +# The above copyright notice and this permission notice shall be included in +# all copies or substantial portions of the Software. +# +# THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR +# IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, +# FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE +# AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER +# LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, +# OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN +# THE SOFTWARE. +# +# ###########################################################################*/ +"""Test of the OpenCL array_utils""" + +from __future__ import division, print_function + +__authors__ = ["Pierre paleo"] +__license__ = "MIT" +__copyright__ = "2013-2017 European Synchrotron Radiation Facility, Grenoble, France" +__date__ = "14/06/2017" + + +import time +import logging +import numpy as np +import unittest +try: + import mako +except ImportError: + mako = None +from ..common import ocl +if ocl: + import pyopencl as cl + import pyopencl.array as parray + from .. import linalg +from ..utils import get_opencl_code +from silx.test.utils import utilstest + +logger = logging.getLogger(__name__) +try: + from scipy.ndimage.filters import laplace + _has_scipy = True +except ImportError: + _has_scipy = False + + + +@unittest.skipUnless(ocl and mako, "PyOpenCl is missing") +class TestCpy2d(unittest.TestCase): + + def setUp(self): + if ocl is None: + return + self.ctx = ocl.create_context() + if logger.getEffectiveLevel() <= logging.INFO: + self.PROFILE = True + self.queue = cl.CommandQueue( + self.ctx, + properties=cl.command_queue_properties.PROFILING_ENABLE) + else: + self.PROFILE = False + self.queue = cl.CommandQueue(self.ctx) + self.allocate_arrays() + self.program = cl.Program(self.ctx, get_opencl_code("array_utils")).build() + + def allocate_arrays(self): + """ + Allocate various types of arrays for the tests + """ + self.prng_state = np.random.get_state() + # Generate arrays of random shape + self.shape1 = np.random.randint(20, high=512, size=(2,)) + self.shape2 = np.random.randint(20, high=512, size=(2,)) + self.array1 = np.random.rand(*self.shape1).astype(np.float32) + self.array2 = np.random.rand(*self.shape2).astype(np.float32) + self.d_array1 = parray.to_device(self.queue, self.array1) + self.d_array2 = parray.to_device(self.queue, self.array2) + # Generate random offsets + offset1_y = np.random.randint(2, high=min(self.shape1[0], self.shape2[0]) - 10) + offset1_x = np.random.randint(2, high=min(self.shape1[1], self.shape2[1]) - 10) + offset2_y = np.random.randint(2, high=min(self.shape1[0], self.shape2[0]) - 10) + offset2_x = np.random.randint(2, high=min(self.shape1[1], self.shape2[1]) - 10) + self.offset1 = (offset1_y, offset1_x) + self.offset2 = (offset2_y, offset2_x) + # Compute the size of the rectangle to transfer + size_y = np.random.randint(2, high=min(self.shape1[0], self.shape2[0]) - max(offset1_y, offset2_y) + 1) + size_x = np.random.randint(2, high=min(self.shape1[1], self.shape2[1]) - max(offset1_x, offset2_x) + 1) + self.transfer_shape = (size_y, size_x) + + def tearDown(self): + self.array1 = None + self.array2 = None + self.d_array1.data.release() + self.d_array2.data.release() + self.d_array1 = None + self.d_array2 = None + self.ctx = None + self.queue = None + + def compare(self, result, reference): + errmax = np.max(np.abs(result - reference)) + logger.info("Max error = %e" % (errmax)) + self.assertTrue(errmax == 0, str("Max error is too high"))#. PRNG state was %s" % str(self.prng_state))) + + @unittest.skipUnless(ocl and mako, "pyopencl is missing") + def test_cpy2d(self): + """ + Test rectangular transfer of self.d_array1 to self.d_array2 + """ + # Reference + o1 = self.offset1 + o2 = self.offset2 + T = self.transfer_shape + logger.info("""Testing D->D rectangular copy with (N1_y, N1_x) = %s, + (N2_y, N2_x) = %s: + array2[%d:%d, %d:%d] = array1[%d:%d, %d:%d]""" % + ( + str(self.shape1), str(self.shape2), + o2[0], o2[0] + T[0], + o2[1], o2[1] + T[1], + o1[0], o1[0] + T[0], + o1[1], o1[1] + T[1] + ) + ) + self.array2[o2[0]:o2[0] + T[0], o2[1]:o2[1] + T[1]] = self.array1[o1[0]:o1[0] + T[0], o1[1]:o1[1] + T[1]] + kernel_args = ( + self.d_array2.data, + self.d_array1.data, + np.int32(self.shape2[1]), + np.int32(self.shape1[1]), + np.int32(self.offset2[::-1]), + np.int32(self.offset1[::-1]), + np.int32(self.transfer_shape[::-1]) + ) + wg = None + ndrange = self.transfer_shape[::-1] + self.program.cpy2d(self.queue, ndrange, wg, *kernel_args) + res = self.d_array2.get() + self.compare(res, self.array2) + + +def suite(): + testSuite = unittest.TestSuite() + testSuite.addTest(TestCpy2d("test_cpy2d")) + return testSuite + +if __name__ == '__main__': + unittest.main(defaultTest="suite") diff --git a/silx/opencl/test/test_backprojection.py b/silx/opencl/test/test_backprojection.py new file mode 100644 index 0000000..342bd2f --- /dev/null +++ b/silx/opencl/test/test_backprojection.py @@ -0,0 +1,165 @@ +#!/usr/bin/env python +# coding: utf-8 +# /*########################################################################## +# +# Copyright (c) 2016 European Synchrotron Radiation Facility +# +# Permission is hereby granted, free of charge, to any person obtaining a copy +# of this software and associated documentation files (the "Software"), to deal +# in the Software without restriction, including without limitation the rights +# to use, copy, modify, merge, publish, distribute, sublicense, and/or sell +# copies of the Software, and to permit persons to whom the Software is +# furnished to do so, subject to the following conditions: +# +# The above copyright notice and this permission notice shall be included in +# all copies or substantial portions of the Software. +# +# THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR +# IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, +# FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE +# AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER +# LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, +# OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN +# THE SOFTWARE. +# +# ###########################################################################*/ +"""Test of the filtered backprojection module""" + +from __future__ import division, print_function + +__authors__ = ["Pierre paleo"] +__license__ = "MIT" +__copyright__ = "2013-2017 European Synchrotron Radiation Facility, Grenoble, France" +__date__ = "05/10/2017" + + +import time +import logging +import numpy +import unittest +try: + import mako +except ImportError: + mako = None +from ..common import ocl +if ocl: + from .. import backprojection +from silx.test.utils import utilstest + +logger = logging.getLogger(__name__) + + +def generate_coords(img_shp, center=None): + """ + Return two 2D arrays containing the indexes of an image. + The zero is at the center of the image. + """ + l_r, l_c = float(img_shp[0]), float(img_shp[1]) + R, C = numpy.mgrid[:l_r, :l_c] + if center is None: + center0, center1 = l_r / 2., l_c / 2. + else: + center0, center1 = center + R = R + 0.5 - center0 + C = C + 0.5 - center1 + return R, C + + +def clip_circle(img, center=None, radius=None): + """ + Puts zeros outside the inscribed circle of the image support. + """ + R, C = generate_coords(img.shape, center) + M = R * R + C * C + res = numpy.zeros_like(img) + if radius is None: + radius = img.shape[0] / 2. - 1 + mask = M < radius * radius + res[mask] = img[mask] + return res + + +@unittest.skipUnless(ocl and mako, "PyOpenCl is missing") +class TestFBP(unittest.TestCase): + + def setUp(self): + if ocl is None: + return + # ~ if sys.platform.startswith('darwin'): + # ~ self.skipTest("Backprojection is not implemented on CPU for OS X yet") + self.getfiles() + self.fbp = backprojection.Backprojection(self.sino.shape, profile=True) + if self.fbp.compiletime_workgroup_size < 16: + self.skipTest("Current implementation of OpenCL backprojection is not supported on this platform yet") + + def tearDown(self): + self.sino = None +# self.fbp.log_profile() + self.fbp = None + + def getfiles(self): + # load sinogram of 512x512 MRI phantom + self.sino = numpy.load(utilstest.getfile("sino500.npz"))["data"] + # load reconstruction made with ASTRA FBP (with filter designed in spatial domain) + self.reference_rec = numpy.load(utilstest.getfile("rec_astra_500.npz"))["data"] + + def measure(self): + "Common measurement of timings" + t1 = time.time() + try: + result = self.fbp.filtered_backprojection(self.sino) + except RuntimeError as msg: + logger.error(msg) + return + t2 = time.time() + return t2 - t1, result + + def compare(self, res): + """ + Compare a result with the reference reconstruction. + Only the valid reconstruction zone (inscribed circle) is taken into + account + """ + res_clipped = clip_circle(res) + ref_clipped = clip_circle(self.reference_rec) + delta = abs(res_clipped - ref_clipped) + bad = delta > 1 +# numpy.save("/tmp/bad.npy", bad.astype(int)) + logger.debug("Absolute difference: %s with %s outlier pixels out of %s", delta.max(), bad.sum(), numpy.prod(bad.shape)) + return delta.max() + + @unittest.skipUnless(ocl and mako, "pyopencl is missing") + def test_fbp(self): + """ + tests FBP + """ + # Test single reconstruction + # -------------------------- + t, res = self.measure() + if t is None: + logger.info("test_fp: skipped") + else: + logger.info("test_backproj: time = %.3fs" % t) + err = self.compare(res) + msg = str("Max error = %e" % err) + logger.info(msg) + # TODO: cannot do better than 1e0 ? + # The plain backprojection was much better, so it must be an issue in the filtering process + self.assertTrue(err < 1., "Max error is too high") + # Test multiple reconstructions + # ----------------------------- + res0 = numpy.copy(res) + for i in range(10): + res = self.fbp.filtered_backprojection(self.sino) + errmax = numpy.max(numpy.abs(res - res0)) + self.assertTrue(errmax < 1.e-6, "Max error is too high") + + +def suite(): + testSuite = unittest.TestSuite() + testSuite.addTest(TestFBP("test_fbp")) + return testSuite + + +if __name__ == '__main__': + unittest.main(defaultTest="suite") diff --git a/silx/opencl/test/test_linalg.py b/silx/opencl/test/test_linalg.py new file mode 100644 index 0000000..7d03983 --- /dev/null +++ b/silx/opencl/test/test_linalg.py @@ -0,0 +1,215 @@ +#!/usr/bin/env python +# coding: utf-8 +# /*########################################################################## +# +# Copyright (c) 2016 European Synchrotron Radiation Facility +# +# Permission is hereby granted, free of charge, to any person obtaining a copy +# of this software and associated documentation files (the "Software"), to deal +# in the Software without restriction, including without limitation the rights +# to use, copy, modify, merge, publish, distribute, sublicense, and/or sell +# copies of the Software, and to permit persons to whom the Software is +# furnished to do so, subject to the following conditions: +# +# The above copyright notice and this permission notice shall be included in +# all copies or substantial portions of the Software. +# +# THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR +# IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, +# FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE +# AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER +# LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, +# OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN +# THE SOFTWARE. +# +# ###########################################################################*/ +"""Test of the linalg module""" + +from __future__ import division, print_function + +__authors__ = ["Pierre paleo"] +__license__ = "MIT" +__copyright__ = "2013-2017 European Synchrotron Radiation Facility, Grenoble, France" +__date__ = "14/06/2017" + + +import time +import logging +import numpy as np +import unittest +try: + import mako +except ImportError: + mako = None +from ..common import ocl +if ocl: + import pyopencl as cl + import pyopencl.array as parray + from .. import linalg +from silx.test.utils import utilstest + +logger = logging.getLogger(__name__) +try: + from scipy.ndimage.filters import laplace + _has_scipy = True +except ImportError: + _has_scipy = False + + +# TODO move this function in math or image ? +def gradient(img): + ''' + Compute the gradient of an image as a numpy array + Code from https://github.com/emmanuelle/tomo-tv/ + ''' + shape = [img.ndim, ] + list(img.shape) + gradient = np.zeros(shape, dtype=img.dtype) + slice_all = [0, slice(None, -1),] + for d in range(img.ndim): + gradient[slice_all] = np.diff(img, axis=d) + slice_all[0] = d + 1 + slice_all.insert(1, slice(None)) + return gradient + + +# TODO move this function in math or image ? +def divergence(grad): + ''' + Compute the divergence of a gradient + Code from https://github.com/emmanuelle/tomo-tv/ + ''' + res = np.zeros(grad.shape[1:]) + for d in range(grad.shape[0]): + this_grad = np.rollaxis(grad[d], d) + this_res = np.rollaxis(res, d) + this_res[:-1] += this_grad[:-1] + this_res[1:-1] -= this_grad[:-2] + this_res[-1] -= this_grad[-2] + return res + + +@unittest.skipUnless(ocl and mako, "PyOpenCl is missing") +class TestLinAlg(unittest.TestCase): + + def setUp(self): + if ocl is None: + return + self.getfiles() + self.la = linalg.LinAlg(self.image.shape) + self.allocate_arrays() + + def allocate_arrays(self): + """ + Allocate various types of arrays for the tests + """ + # numpy images + self.grad = np.zeros(self.image.shape, dtype=np.complex64) + self.grad2 = np.zeros((2,) + self.image.shape, dtype=np.float32) + self.grad_ref = gradient(self.image) + self.div_ref = divergence(self.grad_ref) + self.image2 = np.zeros_like(self.image) + # Device images + self.gradient_parray = parray.zeros(self.la.queue, self.image.shape, np.complex64) + # we should be using cl.Buffer(self.la.ctx, cl.mem_flags.READ_WRITE, size=self.image.nbytes*2), + # but platforms not suporting openCL 1.2 have a problem with enqueue_fill_buffer, + # so we use the parray "fill" utility + self.gradient_buffer = self.gradient_parray.data + # Do the same for image + self.image_parray = parray.to_device(self.la.queue, self.image) + self.image_buffer = self.image_parray.data + # Refs + tmp = np.zeros(self.image.shape, dtype=np.complex64) + tmp.real = np.copy(self.grad_ref[0]) + tmp.imag = np.copy(self.grad_ref[1]) + self.grad_ref_parray = parray.to_device(self.la.queue, tmp) + self.grad_ref_buffer = self.grad_ref_parray.data + + def tearDown(self): + self.image = None + self.image2 = None + self.grad = None + self.grad2 = None + self.grad_ref = None + self.div_ref = None + self.gradient_parray.data.release() + self.gradient_parray = None + self.gradient_buffer = None + self.image_parray.data.release() + self.image_parray = None + self.image_buffer = None + self.grad_ref_parray.data.release() + self.grad_ref_parray = None + self.grad_ref_buffer = None + + def getfiles(self): + # load 512x512 MRI phantom - TODO include Lena or ascent once a .npz is available + self.image = np.load(utilstest.getfile("Brain512.npz"))["data"] + + def compare(self, result, reference, abstol, name): + errmax = np.max(np.abs(result - reference)) + logger.info("%s: Max error = %e" % (name, errmax)) + self.assertTrue(errmax < abstol, str("%s: Max error is too high" % name)) + + @unittest.skipUnless(ocl and mako, "pyopencl is missing") + def test_gradient(self): + arrays = { + "numpy.ndarray": self.image, + "buffer": self.image_buffer, + "parray": self.image_parray + } + for desc, image in arrays.items(): + # Test with dst on host (numpy.ndarray) + res = self.la.gradient(image, return_to_host=True) + self.compare(res, self.grad_ref, 1e-6, str("gradient[src=%s, dst=numpy.ndarray]" % desc)) + # Test with dst on device (pyopencl.Buffer) + self.la.gradient(image, dst=self.gradient_buffer) + cl.enqueue_copy(self.la.queue, self.grad, self.gradient_buffer) + self.grad2[0] = self.grad.real + self.grad2[1] = self.grad.imag + self.compare(self.grad2, self.grad_ref, 1e-6, str("gradient[src=%s, dst=buffer]" % desc)) + # Test with dst on device (pyopencl.Array) + self.la.gradient(image, dst=self.gradient_parray) + self.grad = self.gradient_parray.get() + self.grad2[0] = self.grad.real + self.grad2[1] = self.grad.imag + self.compare(self.grad2, self.grad_ref, 1e-6, str("gradient[src=%s, dst=parray]" % desc)) + + @unittest.skipUnless(ocl and mako, "pyopencl is missing") + def test_divergence(self): + arrays = { + "numpy.ndarray": self.grad_ref, + "buffer": self.grad_ref_buffer, + "parray": self.grad_ref_parray + } + for desc, grad in arrays.items(): + # Test with dst on host (numpy.ndarray) + res = self.la.divergence(grad, return_to_host=True) + self.compare(res, self.div_ref, 1e-6, str("divergence[src=%s, dst=numpy.ndarray]" % desc)) + # Test with dst on device (pyopencl.Buffer) + self.la.divergence(grad, dst=self.image_buffer) + cl.enqueue_copy(self.la.queue, self.image2, self.image_buffer) + self.compare(self.image2, self.div_ref, 1e-6, str("divergence[src=%s, dst=buffer]" % desc)) + # Test with dst on device (pyopencl.Array) + self.la.divergence(grad, dst=self.image_parray) + self.image2 = self.image_parray.get() + self.compare(self.image2, self.div_ref, 1e-6, str("divergence[src=%s, dst=parray]" % desc)) + + @unittest.skipUnless(ocl and mako and _has_scipy, "pyopencl and/or scipy is missing") + def test_laplacian(self): + laplacian_ref = laplace(self.image) + # Laplacian = div(grad) + self.la.gradient(self.image) + laplacian_ocl = self.la.divergence(self.la.d_gradient, return_to_host=True) + self.compare(laplacian_ocl, laplacian_ref, 1e-6, "laplacian") + + +def suite(): + testSuite = unittest.TestSuite() + testSuite.addTest(TestLinAlg("test_gradient")) + testSuite.addTest(TestLinAlg("test_divergence")) + testSuite.addTest(TestLinAlg("test_laplacian")) + return testSuite + + +if __name__ == '__main__': + unittest.main(defaultTest="suite") diff --git a/silx/opencl/test/test_projection.py b/silx/opencl/test/test_projection.py new file mode 100644 index 0000000..c9a3a1c --- /dev/null +++ b/silx/opencl/test/test_projection.py @@ -0,0 +1,139 @@ +#!/usr/bin/env python +# coding: utf-8 +# /*########################################################################## +# +# Copyright (c) 2016 European Synchrotron Radiation Facility +# +# Permission is hereby granted, free of charge, to any person obtaining a copy +# of this software and associated documentation files (the "Software"), to deal +# in the Software without restriction, including without limitation the rights +# to use, copy, modify, merge, publish, distribute, sublicense, and/or sell +# copies of the Software, and to permit persons to whom the Software is +# furnished to do so, subject to the following conditions: +# +# The above copyright notice and this permission notice shall be included in +# all copies or substantial portions of the Software. +# +# THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR +# IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, +# FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE +# AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER +# LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, +# OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN +# THE SOFTWARE. +# +# ###########################################################################*/ +"""Test of the forward projection module""" + +from __future__ import division, print_function + +__authors__ = ["Pierre paleo"] +__license__ = "MIT" +__copyright__ = "2013-2017 European Synchrotron Radiation Facility, Grenoble, France" +__date__ = "14/06/2017" + + +import time +import logging +import numpy as np +import unittest +try: + import mako +except ImportError: + mako = None +from ..common import ocl +if ocl: + from .. import projection +from silx.test.utils import utilstest + +logger = logging.getLogger(__name__) + + + +@unittest.skipUnless(ocl and mako, "PyOpenCl is missing") +class TestProj(unittest.TestCase): + + def setUp(self): + if ocl is None: + return + #~ if sys.platform.startswith('darwin'): + #~ self.skipTest("Projection is not implemented on CPU for OS X yet") + self.getfiles() + n_angles = self.sino.shape[0] + self.proj = projection.Projection(self.phantom.shape, n_angles) + if self.proj.compiletime_workgroup_size < 16: + self.skipTest("Current implementation of OpenCL projection is not supported on this platform yet") + + + def tearDown(self): + self.phantom = None + self.sino = None + self.proj = None + + + def getfiles(self): + # load 512x512 MRI phantom + self.phantom = np.load(utilstest.getfile("Brain512.npz"))["data"] + # load sinogram computed with PyHST + self.sino = np.load(utilstest.getfile("sino500_pyhst.npz"))["data"] + + + def measure(self): + "Common measurement of timings" + t1 = time.time() + try: + result = self.proj.projection(self.phantom) + except RuntimeError as msg: + logger.error(msg) + return + t2 = time.time() + return t2 - t1, result + + + def compare(self, res): + """ + Compare a result with the reference reconstruction. + Only the valid reconstruction zone (inscribed circle) is taken into account + """ + # Compare with the original phantom. + # TODO: compare a standard projection + ref = self.sino + return np.max(np.abs(res - ref)) + + + @unittest.skipUnless(ocl and mako, "pyopencl is missing") + def test_proj(self): + """ + tests Projection + """ + # Test single reconstruction + # -------------------------- + t, res = self.measure() + if t is None: + logger.info("test_proj: skipped") + else: + logger.info("test_proj: time = %.3fs" % t) + err = self.compare(res) + msg = str("Max error = %e" % err) + logger.info(msg) + # Interpolation differs at some lines, giving relative error of 10/50000 + self.assertTrue(err < 20., "Max error is too high") + # Test multiple reconstructions + # ----------------------------- + res0 = np.copy(res) + for i in range(10): + res = self.proj.projection(self.phantom) + errmax = np.max(np.abs(res - res0)) + self.assertTrue(errmax < 1.e-6, "Max error is too high") + + + +def suite(): + testSuite = unittest.TestSuite() + testSuite.addTest(TestProj("test_proj")) + return testSuite + + + +if __name__ == '__main__': + unittest.main(defaultTest="suite") diff --git a/silx/opencl/utils.py b/silx/opencl/utils.py index 58becb0..f558c1b 100644 --- a/silx/opencl/utils.py +++ b/silx/opencl/utils.py @@ -1,30 +1,30 @@ -#!/usr/bin/env python # -*- coding: utf-8 -*- +# /*########################################################################## +# Copyright (C) 2017 European Synchrotron Radiation Facility # -# Project: Sift implementation in Python + OpenCL -# https://github.com/silx-kit/silx +# Permission is hereby granted, free of charge, to any person obtaining a copy +# of this software and associated documentation files (the "Software"), to deal +# in the Software without restriction, including without limitation the rights +# to use, copy, modify, merge, publish, distribute, sublicense, and/or sell +# copies of the Software, and to permit persons to whom the Software is +# furnished to do so, subject to the following conditions: # -# Permission is hereby granted, free of charge, to any person -# obtaining a copy of this software and associated documentation -# files (the "Software"), to deal in the Software without -# restriction, including without limitation the rights to use, -# copy, modify, merge, publish, distribute, sublicense, and/or sell -# copies of the Software, and to permit persons to whom the -# Software is furnished to do so, subject to the following -# conditions: +# The above copyright notice and this permission notice shall be included in +# all copies or substantial portions of the Software. # -# The above copyright notice and this permission notice shall be -# included in all copies or substantial portions of the Software. +# THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR +# IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, +# FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE +# AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER +# LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, +# OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN +# THE SOFTWARE. # -# THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, -# EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES -# OF MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND -# NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT -# HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, -# WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING -# FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR -# OTHER DEALINGS IN THE SOFTWARE. - +# ############################################################################*/ +""" +Project: Sift implementation in Python + OpenCL + https://github.com/silx-kit/silx +""" from __future__ import division @@ -32,12 +32,12 @@ __authors__ = ["Jérôme Kieffer", "Pierre Paleo"] __contact__ = "jerome.kieffer@esrf.eu" __license__ = "MIT" __copyright__ = "European Synchrotron Radiation Facility, Grenoble, France" -__date__ = "15/03/2017" -__status__ = "Production" +__date__ = "06/09/2017" +__status__ = "Production" import os import numpy -from ..resources import resource_filename +from .. import resources from math import log, ceil @@ -77,14 +77,22 @@ def sizeof(shape, dtype="uint8"): return cnt * itemsize -def get_cl_file(filename): - """get the full path of a openCL file +def get_cl_file(resource): + """get the full path of a openCL resource file + + The resource name can be prefixed by the name of a resource directory. For + example "silx:foo.png" identify the resource "foo.png" from the resource + directory "silx". + See also :func:`silx.resources.register_resource_directory`. + :param str resource: Resource name. File name contained if the `opencl` + directory of the resources. :return: the full path of the openCL source file """ - if not filename.endswith(".cl"): - filename += ".cl" - return resource_filename(os.path.join("opencl", filename)) + if not resource.endswith(".cl"): + resource += ".cl" + return resources._resource_filename(resource, + default_directory="opencl") def read_cl_file(filename): @@ -97,6 +105,7 @@ def read_cl_file(filename): lines = [i for i in f.readlines() if not i.startswith("#include ")] return "".join(lines) + get_opencl_code = read_cl_file @@ -109,4 +118,4 @@ def concatenate_cl_kernel(filenames): this method concatenates all the kernel from the list """ - return os.linesep.join(read_cl_file(fn) for fn in filenames) + return os.linesep.join(read_cl_file(fn) for fn in filenames) |