diff options
author | Picca Frédéric-Emmanuel <picca@synchrotron-soleil.fr> | 2019-05-28 08:16:16 +0200 |
---|---|---|
committer | Picca Frédéric-Emmanuel <picca@synchrotron-soleil.fr> | 2019-05-28 08:16:16 +0200 |
commit | 7287b75301a53bae723579b145448d43304272af (patch) | |
tree | df6d1a4595f3352a8c90ce9cba0e71ea0269e98b /silx/opencl | |
parent | 3e5dcad207c1eadeb74fb53f524c3a94fbe19096 (diff) | |
parent | a763e5d1b3921b3194f3d4e94ab9de3fbe08bbdd (diff) |
Update upstream source from tag 'upstream/0.10.1+dfsg'
Update to upstream version '0.10.1+dfsg'
with Debian dir 6b2d4eeabb68177b2b91df4d7527306d5e19409d
Diffstat (limited to 'silx/opencl')
-rw-r--r-- | silx/opencl/backprojection.py | 467 | ||||
-rw-r--r-- | silx/opencl/codec/test/test_byte_offset.py | 9 | ||||
-rw-r--r-- | silx/opencl/processing.py | 30 | ||||
-rw-r--r-- | silx/opencl/sinofilter.py | 559 | ||||
-rw-r--r-- | silx/opencl/statistics.py | 224 | ||||
-rw-r--r-- | silx/opencl/test/__init__.py | 7 | ||||
-rw-r--r-- | silx/opencl/test/test_backprojection.py | 78 | ||||
-rw-r--r-- | silx/opencl/test/test_kahan.py | 269 | ||||
-rw-r--r-- | silx/opencl/test/test_stats.py | 114 |
9 files changed, 1452 insertions, 305 deletions
diff --git a/silx/opencl/backprojection.py b/silx/opencl/backprojection.py index ca7244e..c257872 100644 --- a/silx/opencl/backprojection.py +++ b/silx/opencl/backprojection.py @@ -29,37 +29,30 @@ from __future__ import absolute_import, print_function, with_statement, division __authors__ = ["A. Mirone, P. Paleo"] __license__ = "MIT" -__date__ = "19/01/2018" +__date__ = "25/01/2019" import logging -import numpy +import numpy as np from .common import pyopencl from .processing import EventDescription, OpenclProcessing, BufferDescription -from .utils import nextpower as nextpow2 +from .sinofilter import SinoFilter +from .sinofilter import fourier_filter as fourier_filter_ +from ..utils.deprecation import deprecated if pyopencl: mf = pyopencl.mem_flags import pyopencl.array as parray else: - raise ImportError("pyopencl is not installed") + raise ImportError("Please install pyopencl in order to use opencl backprojection") logger = logging.getLogger(__name__) -# put in .common ? -try: - from pyfft.cl import Plan as pyfft_Plan - _has_pyfft = True -except ImportError: - _has_pyfft = False -# For silx v0.6 we disable the use FFT on GPU. -_has_pyfft = False - def _sizeof(Type): """ return the size (in bytes) of a scalar type, like the C behavior """ - return numpy.dtype(Type).itemsize + return np.dtype(Type).itemsize def _idivup(a, b): @@ -69,53 +62,14 @@ def _idivup(a, b): return (a + (b - 1)) // b -def fourier_filter(sino, filter_=None, fft_size=None): - """Simple numpy based implementation of fourier space filter - - :param sino: of shape shape = (num_projs, num_bins) - :param filter: filter function to apply in fourier space - :fft_size: size on which perform the fft. May be larger than the sino array - :return: filtered sinogram - """ - assert sino.ndim == 2 - num_projs, num_bins = sino.shape - if fft_size is None: - fft_size = nextpow2(num_bins * 2 - 1) - else: - assert fft_size >= num_bins - if fft_size == num_bins: - sino_zeropadded = sino.astype(numpy.float32) - else: - sino_zeropadded = numpy.zeros((num_projs, fft_size), - dtype=numpy.complex64) - sino_zeropadded[:, :num_bins] = sino.astype(numpy.float32) - - if filter_ is None: - h = numpy.zeros(fft_size, dtype=numpy.float32) - L2 = fft_size // 2 + 1 - h[0] = 1 / 4. - j = numpy.linspace(1, L2, L2 // 2, False) - h[1:L2:2] = -1. / (numpy.pi ** 2 * j ** 2) - h[L2:] = numpy.copy(h[1:L2 - 1][::-1]) - filter_ = numpy.fft.fft(h).astype(numpy.complex64) - - # Linear convolution - sino_f = numpy.fft.fft(sino, fft_size) - sino_f = sino_f * filter_ - sino_filtered = numpy.fft.ifft(sino_f)[:, :num_bins].real - # Send the filtered sinogram to device - return numpy.ascontiguousarray(sino_filtered.real, dtype=numpy.float32) - - class Backprojection(OpenclProcessing): """A class for performing the backprojection using OpenCL""" kernel_files = ["backproj.cl", "array_utils.cl"] - if _has_pyfft: - kernel_files.append("backproj_helper.cl") def __init__(self, sino_shape, slice_shape=None, axis_position=None, angles=None, filter_name=None, ctx=None, devicetype="all", - platformid=None, deviceid=None, profile=False): + platformid=None, deviceid=None, profile=False, + extra_options=None): """Constructor of the OpenCL (filtered) backprojection :param sino_shape: shape of the sinogram. The sinogram is in the format @@ -139,6 +93,8 @@ class Backprojection(OpenclProcessing): :param profile: switch on profiling to be able to profile at the kernel level, store profiling elements (makes code slightly slower) + :param extra_options: Advanced extra options in the form of a dict. + Current options are: cutoff, use_numpy_fft """ # OS X enforces a workgroup size of 1 when the kernel has # synchronization barriers if sys.platform.startswith('darwin'): @@ -148,10 +104,32 @@ class Backprojection(OpenclProcessing): OpenclProcessing.__init__(self, ctx=ctx, devicetype=devicetype, platformid=platformid, deviceid=deviceid, profile=profile) - self.shape = sino_shape - self.num_bins = numpy.int32(sino_shape[1]) - self.num_projs = numpy.int32(sino_shape[0]) + self._init_geometry(sino_shape, slice_shape, angles, axis_position, + extra_options) + self._allocate_memory() + self._compute_angles() + self._init_kernels() + self._init_filter(filter_name) + + def _init_geometry(self, sino_shape, slice_shape, angles, axis_position, + extra_options): + """Geometry Initialization + + :param sino_shape: shape of the sinogram. The sinogram is in the format + (n_b, n_a) where n_b is the number of detector bins + and n_a is the number of angles. + :param slice_shape: shape of the reconstructed slice. By + default, it is a square slice where the dimension + is the "x dimension" of the sinogram (number of + bins). + :param angles: list of projection angles in radian. + :param axis_position: axis position + :param dict extra_options: Advanced extra options + """ + self.shape = sino_shape + self.num_bins = np.int32(sino_shape[1]) + self.num_projs = np.int32(sino_shape[0]) self.angles = angles if slice_shape is None: self.slice_shape = (self.num_bins, self.num_bins) @@ -161,152 +139,161 @@ class Backprojection(OpenclProcessing): _idivup(self.slice_shape[0], 32) * 32, _idivup(self.slice_shape[1], 32) * 32 ) - self.slice = numpy.zeros(self.dimrec_shape, dtype=numpy.float32) - self.filter_name = filter_name if filter_name else "Ram-Lak" if axis_position: - self.axis_pos = numpy.float32(axis_position) + self.axis_pos = np.float32(axis_position) else: - self.axis_pos = numpy.float32((sino_shape[1] - 1.) / 2) + self.axis_pos = np.float32((sino_shape[1] - 1.) / 2) self.axis_array = None # TODO: add axis correction front-end + self._init_extra_options(extra_options) + def _init_extra_options(self, extra_options): + """Backprojection extra option initialization + + :param dict extra_options: Advanced extra options + """ + self.extra_options = { + "cutoff": 1., + "use_numpy_fft": False, + } + if extra_options is not None: + self.extra_options.update(extra_options) + + def _allocate_memory(self): + # Host memory + self.slice = np.zeros(self.dimrec_shape, dtype=np.float32) self.is_cpu = False if self.device.type == "CPU": self.is_cpu = True - self.compute_fft_plans() + # Device memory self.buffers = [ - BufferDescription("_d_slice", numpy.prod(self.dimrec_shape), numpy.float32, mf.READ_WRITE), - BufferDescription("d_sino", self.num_projs * self.num_bins, numpy.float32, mf.READ_WRITE), # before transferring to texture (if available) - BufferDescription("d_cos", self.num_projs, numpy.float32, mf.READ_ONLY), - BufferDescription("d_sin", self.num_projs, numpy.float32, mf.READ_ONLY), - BufferDescription("d_axes", self.num_projs, numpy.float32, mf.READ_ONLY), - ] - self.allocate_buffers() - if not(self.is_cpu): - self.allocate_textures() - self.compute_filter() - if self.pyfft_plan: - self.add_to_cl_mem({ - "d_filter": self.d_filter, - "d_sino_z": self.d_sino_z - }) + BufferDescription("_d_slice", self.dimrec_shape, np.float32, mf.READ_WRITE), + BufferDescription("d_sino", self.shape, np.float32, mf.READ_WRITE), # before transferring to texture (if available) + BufferDescription("d_cos", (self.num_projs,), np.float32, mf.READ_ONLY), + BufferDescription("d_sin", (self.num_projs,), np.float32, mf.READ_ONLY), + BufferDescription("d_axes", (self.num_projs,), np.float32, mf.READ_ONLY), + ] + self.allocate_buffers(use_array=True) self.d_sino = self.cl_mem["d_sino"] # shorthand - self.compute_angles() - self.local_mem = 256 * 3 * _sizeof(numpy.float32) # constant for all image sizes + # Texture memory (if relevant) + if not(self.is_cpu): + self._allocate_textures() + + # Local memory + self.local_mem = 256 * 3 * _sizeof(np.float32) # constant for all image sizes + + def _compute_angles(self): + if self.angles is None: + self.angles = np.linspace(0, np.pi, self.num_projs, False) + h_cos = np.cos(self.angles).astype(np.float32) + h_sin = np.sin(self.angles).astype(np.float32) + self.cl_mem["d_cos"][:] = h_cos[:] + self.cl_mem["d_sin"][:] = h_sin[:] + if self.axis_array: + self.cl_mem["d_axes"][:] = self.axis_array.astype(np.float32)[:] + else: + self.cl_mem["d_axes"][:] = np.ones(self.num_projs, dtype="f") * self.axis_pos + + def _init_kernels(self): OpenclProcessing.compile_kernels(self, self.kernel_files) # check that workgroup can actually be (16, 16) self.compiletime_workgroup_size = self.kernels.max_workgroup_size("backproj_cpu_kernel") # Workgroup and ndrange sizes are always the same self.wg = (16, 16) self.ndrange = ( - _idivup(int(self.dimrec_shape[1]), 32) * self.wg[0], # int(): pyopencl <= 2015.1 - _idivup(int(self.dimrec_shape[0]), 32) * self.wg[1] # int(): pyopencl <= 2015.1 + _idivup(int(self.dimrec_shape[1]), 32) * self.wg[0], + _idivup(int(self.dimrec_shape[0]), 32) * self.wg[1] ) - - def compute_angles(self): - if self.angles is None: - self.angles = numpy.linspace(0, numpy.pi, self.num_projs, False) - h_cos = numpy.cos(self.angles).astype(numpy.float32) - h_sin = numpy.sin(self.angles).astype(numpy.float32) - pyopencl.enqueue_copy(self.queue, self.cl_mem["d_cos"], h_cos) - pyopencl.enqueue_copy(self.queue, self.cl_mem["d_sin"], h_sin) - if self.axis_array: - pyopencl.enqueue_copy(self.queue, - self.cl_mem["d_axes"], - self.axis_array.astype(numpy.float32)) + # Prepare arguments for the kernel call + if self.is_cpu: + d_sino_ref = self.d_sino.data else: - pyopencl.enqueue_copy(self.queue, - self.cl_mem["d_axes"], - numpy.ones(self.num_projs, dtype=numpy.float32) * self.axis_pos) + d_sino_ref = self.d_sino_tex + self._backproj_kernel_args = ( + # num of projections (int32) + self.num_projs, + # num of bins (int32) + self.num_bins, + # axis position (float32) + self.axis_pos, + # d_slice (__global float32*) + self.cl_mem["_d_slice"].data, + # d_sino (__read_only image2d_t or float*) + d_sino_ref, + # gpu_offset_x (float32) # TODO custom ? + -np.float32((self.num_bins - 1) / 2. - self.axis_pos), + # gpu_offset_y (float32) # TODO custom ? + -np.float32((self.num_bins - 1) / 2. - self.axis_pos), + # d_cos (__global float32*) + self.cl_mem["d_cos"].data, + # d_sin (__global float32*) + self.cl_mem["d_sin"].data, + # d_axis (__global float32*) + self.cl_mem["d_axes"].data, + # shared mem (__local float32*) + self._get_local_mem() + ) - def allocate_textures(self): + def _allocate_textures(self): """ Allocate the texture for the sinogram. """ self.d_sino_tex = pyopencl.Image( - self.ctx, - mf.READ_ONLY | mf.USE_HOST_PTR, - pyopencl.ImageFormat( - pyopencl.channel_order.INTENSITY, - pyopencl.channel_type.FLOAT - ), - hostbuf=numpy.zeros(self.shape[::-1], dtype=numpy.float32) - ) - - def compute_fft_plans(self): - """ - If pyfft is installed, prepare a batched 1D FFT plan for the filtering - of FBP + self.ctx, + mf.READ_ONLY | mf.USE_HOST_PTR, + pyopencl.ImageFormat( + pyopencl.channel_order.INTENSITY, + pyopencl.channel_type.FLOAT + ), + hostbuf=np.zeros(self.shape[::-1], dtype=np.float32) + ) - """ - self.fft_size = nextpow2(self.num_bins * 2 - 1) - if _has_pyfft: - logger.debug("pyfft is available. Computing FFT plans...") - # batched 1D transform - self.pyfft_plan = pyfft_Plan(self.fft_size, queue=self.queue, - wait_for_finish=True) - self.d_sino_z = parray.zeros(self.queue, - (self.num_projs, self.fft_size), - dtype=numpy.complex64) - logger.debug("... done") - else: - logger.debug("pyfft not available, using numpy.fft") - self.pyfft_plan = None - # TODO: fall-back to fftw if present ? + def _init_filter(self, filter_name): + """Filter initialization - def compute_filter(self): - """ - Compute the filter for FBP + :param str filter_name: filter name """ - if self.filter_name == "Ram-Lak": - L = self.fft_size - h = numpy.zeros(L, dtype=numpy.float32) - L2 = L // 2 + 1 - h[0] = 1 / 4. - j = numpy.linspace(1, L2, L2 // 2, False) - h[1:L2:2] = -1. / (numpy.pi ** 2 * j ** 2) - h[L2:] = numpy.copy(h[1:L2 - 1][::-1]) - else: - # TODO: other filters - raise ValueError("Filter %s is not available" % self.filter_name) - self.filter = h - if self.pyfft_plan: - self.d_filter = parray.to_device(self.queue, h.astype(numpy.complex64)) - self.pyfft_plan.execute(self.d_filter.data) - else: - self.filter = numpy.fft.fft(h).astype(numpy.complex64) - self.d_filter = None + self.filter_name = filter_name or "ram-lak" + self.sino_filter = SinoFilter( + self.shape, + ctx=self.ctx, + filter_name=self.filter_name, + extra_options=self.extra_options, + ) def _get_local_mem(self): return pyopencl.LocalMemory(self.local_mem) # constant for all image sizes - def cpy2d_to_slice(self, dst): - ndrange = (int(self.slice_shape[1]), int(self.slice_shape[0])) # pyopencl < 2015.2 - slice_shape_ocl = numpy.int32(ndrange) + def _cpy2d_to_slice(self, dst): + ndrange = (int(self.slice_shape[1]), int(self.slice_shape[0])) + slice_shape_ocl = np.int32(ndrange) wg = None kernel_args = ( dst.data, - self.cl_mem["_d_slice"], - numpy.int32(self.slice_shape[1]), - numpy.int32(self.dimrec_shape[1]), - numpy.int32((0, 0)), - numpy.int32((0, 0)), + self.cl_mem["_d_slice"].data, + np.int32(self.slice_shape[1]), + np.int32(self.dimrec_shape[1]), + np.int32((0, 0)), + np.int32((0, 0)), slice_shape_ocl ) return self.kernels.cpy2d(self.queue, ndrange, wg, *kernel_args) - def transfer_to_texture(self, sino): + def _transfer_to_texture(self, sino): + if isinstance(sino, parray.Array): + return self._transfer_device_to_texture(sino) sino2 = sino - if not(sino.flags["C_CONTIGUOUS"] and sino.dtype == numpy.float32): - sino2 = numpy.ascontiguousarray(sino, dtype=numpy.float32) + if not(sino.flags["C_CONTIGUOUS"] and sino.dtype == np.float32): + sino2 = np.ascontiguousarray(sino, dtype=np.float32) if self.is_cpu: ev = pyopencl.enqueue_copy( self.queue, - self.d_sino, + self.d_sino.data, sino2 ) what = "transfer filtered sino H->D buffer" + ev.wait() else: ev = pyopencl.enqueue_copy( self.queue, @@ -318,21 +305,22 @@ class Backprojection(OpenclProcessing): what = "transfer filtered sino H->D texture" return EventDescription(what, ev) - def transfer_device_to_texture(self, d_sino): + def _transfer_device_to_texture(self, d_sino): if self.is_cpu: if id(self.d_sino) == id(d_sino): return ev = pyopencl.enqueue_copy( self.queue, - self.d_sino, + self.d_sino.data, d_sino ) what = "transfer filtered sino D->D buffer" + ev.wait() else: ev = pyopencl.enqueue_copy( self.queue, self.d_sino_tex, - d_sino, + d_sino.data, offset=0, origin=(0, 0), region=self.shape[::-1] @@ -340,65 +328,35 @@ class Backprojection(OpenclProcessing): what = "transfer filtered sino D->D texture" return EventDescription(what, ev) - def backprojection(self, sino=None, dst=None): + def backprojection(self, sino, output=None): """Perform the backprojection on an input sinogram - :param sino: sinogram. If provided, it returns the plain backprojection. - :param dst: destination (pyopencl.Array). If provided, the result will be written in this array. + :param sino: sinogram. + :param output: optional, output slice. + If provided, the result will be written in this array. :return: backprojection of sinogram """ events = [] with self.sem: - - if sino is not None: # assuming numpy.ndarray - events.append(self.transfer_to_texture(sino)) - # Prepare arguments for the kernel call - if self.is_cpu: - d_sino_ref = self.d_sino - else: - d_sino_ref = self.d_sino_tex - kernel_args = ( - self.num_projs, # num of projections (int32) - self.num_bins, # num of bins (int32) - self.axis_pos, # axis position (float32) - self.cl_mem["_d_slice"], # d_slice (__global float32*) - d_sino_ref, # d_sino (__read_only image2d_t or float*) - numpy.float32(0), # gpu_offset_x (float32) - numpy.float32(0), # gpu_offset_y (float32) - self.cl_mem["d_cos"], # d_cos (__global float32*) - self.cl_mem["d_sin"], # d_sin (__global float32*) - self.cl_mem["d_axes"], # d_axis (__global float32*) - self._get_local_mem() # shared mem (__local float32*) - ) - # Call the kernel + events.append(self._transfer_to_texture(sino)) + # Call the backprojection kernel if self.is_cpu: kernel_to_call = self.kernels.backproj_cpu_kernel else: kernel_to_call = self.kernels.backproj_kernel - event_bpj = kernel_to_call( + kernel_to_call( self.queue, self.ndrange, self.wg, - *kernel_args + *self._backproj_kernel_args ) - if dst is None: - self.slice[:] = 0 - events.append(EventDescription("backprojection", event_bpj)) - ev = pyopencl.enqueue_copy(self.queue, self.slice, - self.cl_mem["_d_slice"]) - events.append(EventDescription("copy D->H result", ev)) - ev.wait() - res = numpy.copy(self.slice) - if self.dimrec_shape[0] > self.slice_shape[0] or self.dimrec_shape[1] > self.slice_shape[1]: - res = res[:self.slice_shape[0], :self.slice_shape[1]] - # if the slice is backprojected onto a bigger grid - if self.slice_shape[1] > self.num_bins: - res = res[:self.slice_shape[0], :self.slice_shape[1]] + # Return + if output is None: + res = self.cl_mem["_d_slice"].get() + res = res[:self.slice_shape[0], :self.slice_shape[1]] else: - ev = self.cpy2d_to_slice(dst) - events.append(EventDescription("copy D->D result", ev)) - ev.wait() - res = dst + res = output + self._cpy2d_to_slice(output) # /with self.sem if self.profile: @@ -406,83 +364,34 @@ class Backprojection(OpenclProcessing): return res - def filter_projections(self, sino, rescale=True): - """ - Performs the FBP on a given sinogram. - - :param sinogram: sinogram to (filter-)backproject - :param rescale: if True (default), the sinogram is multiplied with - (pi/n_projs) - """ - if sino.shape[0] != self.num_projs or sino.shape[1] != self.num_bins: - raise ValueError("Expected sinogram with (projs, bins) = (%d, %d)" % (self.num_projs, self.num_bins)) - if rescale: - sino = sino * numpy.pi / self.num_projs - events = [] - # if pyfft is available, all can be done on the device - if self.d_filter is not None: - - # Zero-pad the sinogram. - # TODO: this can be done on GPU with a "Memcpy2D": - # cl.enqueue_copy(queue, dst, src, host_origin=(0,0), buffer_origin=(0,0), region=shape, host_pitches=(sino.shape[1],), buffer_pitches=(self.fft_size,)) - # However it does not work properly, and raises an error for pyopencl < 2017.1 - sino_zeropadded = numpy.zeros((sino.shape[0], self.fft_size), dtype=numpy.complex64) - sino_zeropadded[:, :self.num_bins] = sino.astype(numpy.float32) - sino_zeropadded = numpy.ascontiguousarray(sino_zeropadded, dtype=numpy.complex64) - with self.sem: - # send to GPU - ev = pyopencl.enqueue_copy(self.queue, self.d_sino_z.data, sino_zeropadded) - events.append(EventDescription("Send sino H->D", ev)) - - # FFT (in-place) - self.pyfft_plan.execute(self.d_sino_z.data, batch=self.num_projs) - - # Multiply (complex-wise) with the the filter - ev = self.kernels.mult(self.queue, - tuple(int(i) for i in self.d_sino_z.shape[::-1]), - None, - self.d_sino_z.data, - self.d_filter.data, - numpy.int32(self.fft_size), - self.num_projs - ) - events.append(EventDescription("complex 2D-1D multiplication", ev)) - # Inverse FFT (in-place) - self.pyfft_plan.execute(self.d_sino_z.data, batch=self.num_projs, inverse=True) - # Copy the real part of d_sino_z[:, :self.num_bins] (complex64) to d_sino (float32) - ev = self.kernels.cpy2d_c2r(self.queue, self.shape[::-1], None, - self.d_sino, - self.d_sino_z.data, - self.num_bins, - self.num_projs, - numpy.int32(self.fft_size) - ) - events.append(EventDescription("conversion from complex padded sinogram to sinogram", ev)) - # debug -# ev.wait() -# h_sino = numpy.zeros(sino.shape, dtype=numpy.float32) -# ev = pyopencl.enqueue_copy(self.queue, h_sino, self.d_sino) -# ev.wait() -# numpy.save("/tmp/filtered_sinogram_%s.npy" % self.ctx.devices[0].platform.name.split()[0], h_sino) - events.append(self.transfer_device_to_texture(self.d_sino)) - # ------ - else: # no pyfft - sino_filtered = fourier_filter(sino, filter_=self.filter, fft_size=self.fft_size) - with self.sem: - events.append(self.transfer_to_texture(sino_filtered)) - if self.profile: - self.events += events - - def filtered_backprojection(self, sino): + def filtered_backprojection(self, sino, output=None): """ Compute the filtered backprojection (FBP) on a sinogram. - :param sino: sinogram (`numpy.ndarray`) in the format (projections, - bins) + :param sino: sinogram (`np.ndarray` or `pyopencl.array.Array`) + with the shape (n_projections, n_bins) + :param output: output (`np.ndarray` or `pyopencl.array.Array`). + If nothing is provided, a new numpy array is returned. """ - - self.filter_projections(sino) - res = self.backprojection() + # Filter + self.sino_filter(sino, output=self.d_sino) + # Backproject + res = self.backprojection(self.d_sino, output=output) return res __call__ = filtered_backprojection + + + # ------------------- + # - Compatibility - + # ------------------- + + @deprecated(replacement="Backprojection.sino_filter", since_version="0.10") + def filter_projections(self, sino, rescale=True): + self.sino_filter(sino, output=self.d_sino) + + + +def fourier_filter(sino, filter_=None, fft_size=None): + return fourier_filter_(sino, filter_=filter_, fft_size=fft_size) + diff --git a/silx/opencl/codec/test/test_byte_offset.py b/silx/opencl/codec/test/test_byte_offset.py index 2bfa1d3..9ce1cfc 100644 --- a/silx/opencl/codec/test/test_byte_offset.py +++ b/silx/opencl/codec/test/test_byte_offset.py @@ -45,16 +45,13 @@ import logging import numpy from silx.opencl.common import ocl, pyopencl from silx.opencl.codec import byte_offset -try: - import fabio -except ImportError: - fabio = None +import fabio import unittest logger = logging.getLogger(__name__) -@unittest.skipUnless(ocl and fabio and pyopencl, - "PyOpenCl or fabio is missing") +@unittest.skipUnless(ocl and pyopencl, + "PyOpenCl is missing") class TestByteOffset(unittest.TestCase): @staticmethod diff --git a/silx/opencl/processing.py b/silx/opencl/processing.py index 250582d..045a9d3 100644 --- a/silx/opencl/processing.py +++ b/silx/opencl/processing.py @@ -41,7 +41,7 @@ __author__ = "Jerome Kieffer" __contact__ = "Jerome.Kieffer@ESRF.eu" __license__ = "MIT" __copyright__ = "European Synchrotron Radiation Facility, Grenoble, France" -__date__ = "27/02/2018" +__date__ = "11/01/2019" __status__ = "stable" @@ -53,6 +53,7 @@ import numpy import threading from .common import ocl, pyopencl, release_cl_buffers, kernel_workgroup_size from .utils import concatenate_cl_kernel +import platform BufferDescription = namedtuple("BufferDescription", ["name", "size", "dtype", "flags"]) @@ -124,6 +125,7 @@ class OpenclProcessing(object): level, store profiling elements (makes code slightly slower) """ self.sem = threading.Semaphore() + self._X87_VOLATILE = None self.profile = None self.events = [] # List with of EventDescription, kept for profiling self.cl_mem = {} # dict with all buffer allocated @@ -176,7 +178,6 @@ class OpenclProcessing(object): have a built-in way to check the actual free memory on a device, only the total memory. """ - if buffers is None: buffers = self.buffers @@ -255,7 +256,7 @@ class OpenclProcessing(object): kernel_files = kernel_files or self.kernel_files kernel_src = concatenate_cl_kernel(kernel_files) - compile_options = compile_options or "" + compile_options = compile_options or self.get_compiler_options() logger.info("Compiling file %s with options %s", kernel_files, compile_options) try: self.program = pyopencl.Program(self.ctx, kernel_src).build(options=compile_options) @@ -313,6 +314,29 @@ class OpenclProcessing(object): with self.sem: self.events = [] + @property + def x87_volatile_option(self): + # this is running 32 bits OpenCL woth POCL + if self._X87_VOLATILE is None: + if (platform.machine() in ("i386", "i686", "x86_64", "AMD64") and + (tuple.__itemsize__ == 4) and + self.ctx.devices[0].platform.name == 'Portable Computing Language'): + self._X87_VOLATILE = "-DX87_VOLATILE=volatile" + else: + self._X87_VOLATILE = "" + return self._X87_VOLATILE + + def get_compiler_options(self, x87_volatile=False): + """Provide the default OpenCL compiler options + + :param x87_volatile: needed for Kahan summation + :return: string with compiler option + """ + option_list = [] + if x87_volatile: + option_list.append(self.x87_volatile_option) + return " ".join(i for i in option_list if i) + # This should be implemented by concrete class # def __copy__(self): # """Shallow copy of the object diff --git a/silx/opencl/sinofilter.py b/silx/opencl/sinofilter.py new file mode 100644 index 0000000..3e4d69c --- /dev/null +++ b/silx/opencl/sinofilter.py @@ -0,0 +1,559 @@ +#!/usr/bin/env python +# coding: utf-8 +# /*########################################################################## +# +# Copyright (c) 2016 European Synchrotron Radiation Facility +# +# Permission is hereby granted, free of charge, to any person obtaining a copy +# of this software and associated documentation files (the "Software"), to deal +# in the Software without restriction, including without limitation the rights +# to use, copy, modify, merge, publish, distribute, sublicense, and/or sell +# copies of the Software, and to permit persons to whom the Software is +# furnished to do so, subject to the following conditions: +# +# The above copyright notice and this permission notice shall be included in +# all copies or substantial portions of the Software. +# +# THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR +# IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, +# FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE +# AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER +# LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, +# OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN +# THE SOFTWARE. +# +# ###########################################################################*/ +"""Module for sinogram filtering on CPU/GPU.""" + +from __future__ import absolute_import, print_function, with_statement, division + +__authors__ = ["P. Paleo"] +__license__ = "MIT" +__date__ = "01/02/2019" + +import numpy as np +from math import pi +from itertools import product +from bisect import bisect + +from .common import pyopencl as cl +import pyopencl.array as parray +from .processing import OpenclProcessing +from ..math.fft import FFT +from ..math.fft.clfft import __have_clfft__ +from ..utils.deprecation import deprecated + + +def nextpow2(N): + p = 1 + while p < N: + p *= 2 + return p + +def compute_ramlak_filter(dwidth_padded, dtype=np.float32): + """ + Compute the Ramachandran-Lakshminarayanan (Ram-Lak) filter, used in + filtered backprojection. + + :param dwidth_padded: width of the 2D sinogram after padding + :param dtype: data type + """ + #~ dwidth_padded = dwidth * 2 + L = dwidth_padded + h = np.zeros(L, dtype=dtype) + L2 = L//2+1 + h[0] = 1/4. + j = np.linspace(1, L2, L2//2, False).astype(dtype) # np < 1.9.0 + # h[2::2] = 0 + h[1:L2:2] = -1./(pi**2 * j**2) + # h[-1:L2-1:-2] = -1./(pi**2 * j**2) + h[L2:] = np.copy(h[1:L2-1][::-1]) + return h + + +def tukey(N, alpha=0.5): + """ + Compute the Tukey apodization window. + + :param int N: Number of points. + :param float alpha: + """ + apod = np.zeros(N) + x = np.arange(N)/(N-1) + r = alpha + M1 = (0 <= x) * (x < r/2) + M2 = (r/2 <= x) * (x <= 1 - r/2) + M3 = (1 - r/2 < x) * (x <= 1) + apod[M1] = (1 + np.cos(2*pi/r * (x[M1] - r/2)))/2. + apod[M2] = 1. + apod[M3] = (1 + np.cos(2*pi/r * (x[M3] - 1 + r/2)))/2. + return apod + + +def lanczos(N): + """ + Compute the Lanczos window (truncated sinc) of width N. + + :param int N: window width + """ + x = np.arange(N)/(N-1) + return np.sin(pi*(2*x-1))/(pi*(2*x-1)) + + +def compute_fourier_filter(dwidth_padded, filter_name, cutoff=1.): + """ + Compute the filter used for FBP. + + :param dwidth_padded: padded detector width. As the filtering is done by the + Fourier convolution theorem, dwidth_padded should be at least 2*dwidth. + :param filter_name: Name of the filter. Available filters are: + Ram-Lak, Shepp-Logan, Cosine, Hamming, Hann, Tukey, Lanczos. + :param cutoff: Cut-off frequency, if relevant. + """ + Nf = dwidth_padded + #~ filt_f = np.abs(np.fft.fftfreq(Nf)) + rl = compute_ramlak_filter(Nf, dtype=np.float64) + filt_f = np.fft.fft(rl) + + filter_name = filter_name.lower() + if filter_name in ["ram-lak", "ramlak"]: + return filt_f + + w = 2 * pi * np.fft.fftfreq(dwidth_padded) + d = cutoff + apodization = { + # ~OK + "shepp-logan": np.sin(w[1:Nf]/(2*d))/(w[1:Nf]/(2*d)), + # ~OK + "cosine": np.cos(w[1:Nf]/(2*d)), + # OK + "hamming": 0.54*np.ones_like(filt_f)[1:Nf] + .46 * np.cos(w[1:Nf]/d), + # OK + "hann": (np.ones_like(filt_f)[1:Nf] + np.cos(w[1:Nf]/d))/2., + # These one is not compatible with Astra - TODO investigate why + "tukey": np.fft.fftshift(tukey(dwidth_padded, alpha=d/2.))[1:Nf], + "lanczos": np.fft.fftshift(lanczos(dwidth_padded))[1:Nf], + } + if filter_name not in apodization: + raise ValueError("Unknown filter %s. Available filters are %s" % + (filter_name, str(apodization.keys()))) + filt_f[1:Nf] *= apodization[filter_name] + return filt_f + + +def generate_powers(): + """ + Generate a list of powers of [2, 3, 5, 7], + up to (2**15)*(3**9)*(5**6)*(7**5). + """ + primes = [2, 3, 5, 7] + maxpow = {2: 15, 3: 9, 5: 6, 7: 5} + valuations = [] + for prime in primes: + valuations.append(range(maxpow[prime]+1)) + powers = product(*valuations) + res = [] + for pw in powers: + res.append(np.prod(list(map(lambda x : x[0]**x[1], zip(primes, pw))))) + return np.unique(res) + + + +class SinoFilter(OpenclProcessing): + """ + A class for performing sinogram filtering on GPU using OpenCL. + This is a convolution in the Fourier space, along one dimension: + - In 2D: (n_a, d_x): n_a filterings (1D FFT of size d_x) + - In 3D: (n_z, n_a, d_x): n_z*n_a filterings (1D FFT of size d_x) + """ + kernel_files = ["array_utils.cl"] + powers = generate_powers() + + def __init__(self, sino_shape, filter_name=None, ctx=None, + devicetype="all", platformid=None, deviceid=None, + profile=False, extra_options=None): + """Constructor of OpenCL FFT-Convolve. + + :param sino_shape: shape of the sinogram. + :param filter_name: Name of the filter. Defaut is "ram-lak". + :param ctx: actual working context, left to None for automatic + initialization from device type or platformid/deviceid + :param devicetype: type of device, can be "CPU", "GPU", "ACC" or "ALL" + :param platformid: integer with the platform_identifier, as given by + clinfo + :param deviceid: Integer with the device identifier, as given by clinfo + :param profile: switch on profiling to be able to profile at the kernel + level, store profiling elements (makes code slightly + slower) + :param dict extra_options: Advanced extra options. + Current options are: cutoff, use_numpy_fft + """ + OpenclProcessing.__init__(self, ctx=ctx, devicetype=devicetype, + platformid=platformid, deviceid=deviceid, + profile=profile) + + self._init_extra_options(extra_options) + self._calculate_shapes(sino_shape) + self._init_fft() + self._allocate_memory() + self._compute_filter(filter_name) + self._init_kernels() + + def _calculate_shapes(self, sino_shape): + """ + + :param sino_shape: shape of the sinogram. + """ + self.ndim = len(sino_shape) + if self.ndim == 2: + n_angles, dwidth = sino_shape + else: + raise ValueError("Invalid sinogram number of dimensions: " + "expected 2 dimensions") + self.sino_shape = sino_shape + self.n_angles = n_angles + self.dwidth = dwidth + self.dwidth_padded = self.get_next_power(2*self.dwidth) + self.sino_padded_shape = (n_angles, self.dwidth_padded) + sino_f_shape = list(self.sino_padded_shape) + sino_f_shape[-1] = sino_f_shape[-1]//2+1 + self.sino_f_shape = tuple(sino_f_shape) + + def _init_extra_options(self, extra_options): + """ + + :param dict extra_options: Advanced extra options. + Current options are: cutoff, + """ + self.extra_options = { + "cutoff": 1., + "use_numpy_fft": False, + } + if extra_options is not None: + self.extra_options.update(extra_options) + + def _init_fft(self): + if __have_clfft__ and not(self.extra_options["use_numpy_fft"]): + self.fft_backend = "opencl" + self.fft = FFT( + self.sino_padded_shape, + dtype=np.float32, + axes=(-1,), + backend="opencl", + ctx=self.ctx, + ) + else: + self.fft_backend = "numpy" + print("The gpyfft module was not found. The Fourier transforms " + "will be done on CPU. For more performances, it is advised " + "to install gpyfft.""") + self.fft = FFT( + template=np.zeros(self.sino_padded_shape, "f"), + axes=(-1,), + backend="numpy", + ) + + def _allocate_memory(self): + self.d_filter_f = parray.zeros(self.queue, (self.sino_f_shape[-1],), np.complex64) + self.is_cpu = (self.device.type == "CPU") + # These are already allocated by FFT() if using the opencl backend + if self.fft_backend == "opencl": + self.d_sino_padded = self.fft.data_in + self.d_sino_f = self.fft.data_out + else: + # When using the numpy backend, arrays are not pre-allocated + self.d_sino_padded = np.zeros(self.sino_padded_shape, "f") + self.d_sino_f = np.zeros(self.sino_f_shape, np.complex64) + # These are needed for rectangular memcpy in certain cases (see below). + self.tmp_sino_device = parray.zeros(self.queue, self.sino_shape, "f") + self.tmp_sino_host = np.zeros(self.sino_shape, "f") + + def get_next_power(self, n): + """ + Given a number, get the closest (upper) number p such that + p is a power of 2, 3, 5 and 7. + """ + idx = bisect(self.powers, n) + if self.powers[idx-1] == n: + return n + return self.powers[idx] + + def _compute_filter(self, filter_name): + """ + + :param str filter_name: filter name + """ + self.filter_name = filter_name or "ram-lak" + filter_f = compute_fourier_filter( + self.dwidth_padded, + filter_name, + cutoff=self.extra_options["cutoff"], + )[:self.dwidth_padded//2+1] # R2C + self.set_filter(filter_f, normalize=True) + + def set_filter(self, h_filt, normalize=True): + """ + Set a filter for sinogram filtering. + + :param h_filt: Filter. Each line of the sinogram will be filtered with + this filter. It has to be the Real-to-Complex Fourier Transform + of some real filter, padded to 2*sinogram_width. + :param normalize: Whether to normalize the filter with pi/num_angles. + """ + if h_filt.size != self.sino_f_shape[-1]: + raise ValueError( + """ + Invalid filter size: expected %d, got %d. + Please check that the filter is the Fourier R2C transform of + some real 1D filter. + """ + % (self.sino_f_shape[-1], h_filt.size) + ) + if not(np.iscomplexobj(h_filt)): + print("Warning: expected a complex Fourier filter") + self.filter_f = h_filt + if normalize: + self.filter_f *= pi/self.n_angles + self.filter_f = self.filter_f.astype(np.complex64) + self.d_filter_f[:] = self.filter_f[:] + + def _init_kernels(self): + OpenclProcessing.compile_kernels(self, self.kernel_files) + h, w = self.d_sino_f.shape + self.mult_kern_args = ( + self.queue, + np.int32(self.d_sino_f.shape[::-1]), + None, + self.d_sino_f.data, + self.d_filter_f.data, + np.int32(w), + np.int32(h) + ) + + def check_array(self, arr): + if arr.dtype != np.float32: + raise ValueError("Expected data type = numpy.float32") + if arr.shape != self.sino_shape: + raise ValueError("Expected sinogram shape %s, got %s" % + (self.sino_shape, arr.shape)) + if not(isinstance(arr, np.ndarray) or isinstance(arr, parray.Array)): + raise ValueError("Expected either numpy.ndarray or " + "pyopencl.array.Array") + + def copy2d(self, dst, src, transfer_shape, dst_offset=(0, 0), + src_offset=(0, 0)): + """ + + :param dst: + :param src: + :param transfer_shape: + :param dst_offset: + :param src_offset: + """ + self.kernels.cpy2d( + self.queue, + np.int32(transfer_shape[::-1]), + None, + dst.data, + src.data, + np.int32(dst.shape[1]), + np.int32(src.shape[1]), + np.int32(dst_offset), + np.int32(src_offset), + np.int32(transfer_shape[::-1]) + ) + + def copy2d_host(self, dst, src, transfer_shape, dst_offset=(0, 0), + src_offset=(0, 0)): + """ + + :param dst: + :param src: + :param transfer_shape: + :param dst_offset: + :param src_offset: + """ + s = transfer_shape + do = dst_offset + so = src_offset + dst[do[0]:do[0]+s[0], do[1]:do[1]+s[1]] = src[so[0]:so[0]+s[0], so[1]:so[1]+s[1]] + + def _prepare_input_sino(self, sino): + """ + + :param sino: sinogram + """ + self.check_array(sino) + self.d_sino_padded.fill(0) + if self.fft_backend == "opencl": + # OpenCL backend: FFT/mult/IFFT are done on device. + if isinstance(sino, np.ndarray): + # OpenCL backend + numpy input: copy H->D. + # As pyopencl does not support rectangular copies, we have to + # do a copy H->D in a temporary device buffer, and then call a + # kernel doing the rectangular D-D copy. + self.tmp_sino_device[:] = sino[:] + if self.is_cpu: + self.tmp_sino_device.finish() + d_sino_ref = self.tmp_sino_device + else: + d_sino_ref = sino + # Rectangular copy D->D + self.copy2d(self.d_sino_padded, d_sino_ref, self.sino_shape) + if self.is_cpu: + self.d_sino_padded.finish() + else: + # Numpy backend: FFT/mult/IFFT are done on host. + if not(isinstance(sino, np.ndarray)): + # Numpy backend + pyopencl input: need to copy D->H + self.tmp_sino_host[:] = sino[:] + h_sino_ref = self.tmp_sino_host + else: + h_sino_ref = sino + # Rectangular copy H->H + self.copy2d_host(self.d_sino_padded, h_sino_ref, self.sino_shape) + + def _get_output_sino(self, output): + """ + + :param Union[numpy.dtype,None] output: sinogram output. + :return: sinogram + """ + if output is None: + res = np.zeros(self.sino_shape, dtype=np.float32) + else: + res = output + if self.fft_backend == "opencl": + if isinstance(res, np.ndarray): + # OpenCL backend + numpy output: copy D->H + # As pyopencl does not support rectangular copies, we first have + # to call a kernel doing rectangular copy D->D, then do a copy + # D->H. + self.copy2d(dst=self.tmp_sino_device, + src=self.d_sino_padded, + transfer_shape=self.sino_shape) + if self.is_cpu: + self.tmp_sino_device.finish() + res[:] = self.tmp_sino_device[:] + else: + if self.is_cpu: + self.d_sino_padded.finish() + self.copy2d(res, self.d_sino_padded, self.sino_shape) + if self.is_cpu: + res.finish() + else: + if not(isinstance(res, np.ndarray)): + # Numpy backend + pyopencl output: rect copy H->H + copy H->D + self.copy2d_host(dst=self.tmp_sino_host, + src=self.d_sino_padded, + transfer_shape=self.sino_shape) + res[:] = self.tmp_sino_host[:] + else: + # Numpy backend + numpy output: rect copy H->H + self.copy2d_host(res, self.d_sino_padded, self.sino_shape) + return res + + def _do_fft(self): + if self.fft_backend == "opencl": + self.fft.fft(self.d_sino_padded, output=self.d_sino_f) + if self.is_cpu: + self.d_sino_f.finish() + else: + # numpy backend does not support "output=" argument, + # and rfft always return a complex128 result. + res = self.fft.fft(self.d_sino_padded).astype(np.complex64) + self.d_sino_f[:] = res[:] + + def _multiply_fourier(self): + if self.fft_backend == "opencl": + # Everything is on device. Call the multiplication kernel. + self.kernels.inplace_complex_mul_2Dby1D( + *self.mult_kern_args + ) + if self.is_cpu: + self.d_sino_f.finish() + else: + # Everything is on host. + self.d_sino_f *= self.filter_f + + def _do_ifft(self): + if self.fft_backend == "opencl": + if self.is_cpu: + self.d_sino_padded.fill(0) + self.d_sino_padded.finish() + self.fft.ifft(self.d_sino_f, output=self.d_sino_padded) + if self.is_cpu: + self.d_sino_padded.finish() + else: + # numpy backend does not support "output=" argument, + # and irfft always return a float64 result. + res = self.fft.ifft(self.d_sino_f).astype("f") + self.d_sino_padded[:] = res[:] + + def filter_sino(self, sino, output=None): + """ + + :param sino: sinogram + :param output: + :return: filtered sinogram + """ + # Handle input sinogram + self._prepare_input_sino(sino) + # FFT + self._do_fft() + # multiply with filter in the Fourier domain + self._multiply_fourier() + # iFFT + self._do_ifft() + # return + res = self._get_output_sino(output) + return res + #~ return output + + __call__ = filter_sino + + + + +# ------------------- +# - Compatibility - +# ------------------- + +@deprecated(replacement="Backprojection.sino_filter", since_version="0.10") +def fourier_filter(sino, filter_=None, fft_size=None): + """Simple np based implementation of fourier space filter. + This function is deprecated, please use silx.opencl.sinofilter.SinoFilter. + + :param sino: of shape shape = (num_projs, num_bins) + :param filter: filter function to apply in fourier space + :fft_size: size on which perform the fft. May be larger than the sino array + :return: filtered sinogram + """ + assert sino.ndim == 2 + num_projs, num_bins = sino.shape + if fft_size is None: + fft_size = nextpow2(num_bins * 2 - 1) + else: + assert fft_size >= num_bins + if fft_size == num_bins: + sino_zeropadded = sino.astype(np.float32) + else: + sino_zeropadded = np.zeros((num_projs, fft_size), + dtype=np.complex64) + sino_zeropadded[:, :num_bins] = sino.astype(np.float32) + + if filter_ is None: + h = np.zeros(fft_size, dtype=np.float32) + L2 = fft_size // 2 + 1 + h[0] = 1 / 4. + j = np.linspace(1, L2, L2 // 2, False) + h[1:L2:2] = -1. / (np.pi ** 2 * j ** 2) + h[L2:] = np.copy(h[1:L2 - 1][::-1]) + filter_ = np.fft.fft(h).astype(np.complex64) + + # Linear convolution + sino_f = np.fft.fft(sino, fft_size) + sino_f = sino_f * filter_ + sino_filtered = np.fft.ifft(sino_f)[:, :num_bins].real + + return np.ascontiguousarray(sino_filtered.real, dtype=np.float32) diff --git a/silx/opencl/statistics.py b/silx/opencl/statistics.py new file mode 100644 index 0000000..bd8e7b7 --- /dev/null +++ b/silx/opencl/statistics.py @@ -0,0 +1,224 @@ +# -*- coding: utf-8 -*- +# +# Project: SILX +# https://github.com/silx-kit/silx +# +# Copyright (C) 2012-2019 European Synchrotron Radiation Facility, Grenoble, France +# +# Principal author: Jérôme Kieffer (Jerome.Kieffer@ESRF.eu) +# +# Permission is hereby granted, free of charge, to any person obtaining a copy +# of this software and associated documentation files (the "Software"), to deal +# in the Software without restriction, including without limitation the rights +# to use, copy, modify, merge, publish, distribute, sublicense, and/or sell +# copies of the Software, and to permit persons to whom the Software is +# furnished to do so, subject to the following conditions: +# . +# The above copyright notice and this permission notice shall be included in +# all copies or substantial portions of the Software. +# . +# THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR +# IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, +# FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE +# AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER +# LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, +# OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN +# THE SOFTWARE. + +"""A module for performing basic statistical analysis (min, max, mean, std) on +large data where numpy is not very efficient. +""" + +from __future__ import absolute_import, print_function, with_statement, division + + +__author__ = "Jerome Kieffer" +__license__ = "MIT" +__date__ = "11/01/2019" +__copyright__ = "2012-2017, ESRF, Grenoble" +__contact__ = "jerome.kieffer@esrf.fr" + +import logging +import numpy +from collections import OrderedDict, namedtuple +from math import sqrt + +from .common import pyopencl +from .processing import EventDescription, OpenclProcessing, BufferDescription +from .utils import concatenate_cl_kernel + +if pyopencl: + mf = pyopencl.mem_flags + from pyopencl.reduction import ReductionKernel + try: + from pyopencl import cltypes + except ImportError: + v = pyopencl.array.vec() + float8 = v.float8 + else: + float8 = cltypes.float8 + +else: + raise ImportError("pyopencl is not installed") +logger = logging.getLogger(__name__) + +StatResults = namedtuple("StatResults", ["min", "max", "cnt", "sum", "mean", + "var", "std"]) +zero8 = "(float8)(FLT_MAX, -FLT_MAX, 0.0f, 0.0f, 0.0f, 0.0f, 0.0f, 0.0f)" +# min max cnt cnt_e sum sum_e var var_e + + +class Statistics(OpenclProcessing): + """A class for doing statistical analysis using OpenCL + + :param List[int] size: Shape of input data to treat + :param numpy.dtype dtype: Input data type + :param numpy.ndarray template: Data template to extract size & dtype + :param ctx: Actual working context, left to None for automatic + initialization from device type or platformid/deviceid + :param str devicetype: Type of device, can be "CPU", "GPU", "ACC" or "ALL" + :param int platformid: Platform identifier as given by clinfo + :param int deviceid: Device identifier as given by clinfo + :param int block_size: + Preferred workgroup size, may vary depending on the outcome of the compilation + :param bool profile: + Switch on profiling to be able to profile at the kernel level, + store profiling elements (makes code slightly slower) + """ + buffers = [ + BufferDescription("raw", 1, numpy.float32, mf.READ_ONLY), + BufferDescription("converted", 1, numpy.float32, mf.READ_WRITE), + ] + kernel_files = ["preprocess.cl"] + mapping = {numpy.int8: "s8_to_float", + numpy.uint8: "u8_to_float", + numpy.int16: "s16_to_float", + numpy.uint16: "u16_to_float", + numpy.uint32: "u32_to_float", + numpy.int32: "s32_to_float"} + + def __init__(self, size=None, dtype=None, template=None, + ctx=None, devicetype="all", platformid=None, deviceid=None, + block_size=None, profile=False + ): + OpenclProcessing.__init__(self, ctx=ctx, devicetype=devicetype, + platformid=platformid, deviceid=deviceid, + block_size=block_size, profile=profile) + self.size = size + self.dtype = dtype + if template is not None: + self.size = template.size + self.dtype = template.dtype + + self.buffers = [BufferDescription(i.name, i.size * self.size, i.dtype, i.flags) + for i in self.__class__.buffers] + + self.allocate_buffers(use_array=True) + self.compile_kernels() + self.set_kernel_arguments() + + def set_kernel_arguments(self): + """Parametrize all kernel arguments""" + for val in self.mapping.values(): + self.cl_kernel_args[val] = OrderedDict(((i, self.cl_mem[i]) for i in ("raw", "converted"))) + + def compile_kernels(self): + """Compile the kernel""" + OpenclProcessing.compile_kernels(self, + self.kernel_files, + "-D NIMAGE=%i" % self.size) + compiler_options = self.get_compiler_options(x87_volatile=True) + src = concatenate_cl_kernel(("kahan.cl", "statistics.cl")) + self.reduction_comp = ReductionKernel(self.ctx, + dtype_out=float8, + neutral=zero8, + map_expr="map_statistics(data, i)", + reduce_expr="reduce_statistics(a,b)", + arguments="__global float *data", + preamble=src, + options=compiler_options) + self.reduction_simple = ReductionKernel(self.ctx, + dtype_out=float8, + neutral=zero8, + map_expr="map_statistics(data, i)", + reduce_expr="reduce_statistics_simple(a,b)", + arguments="__global float *data", + preamble=src, + options=compiler_options) + + def send_buffer(self, data, dest): + """ + Send a numpy array to the device, including the cast on the device if + possible + + :param numpy.ndarray data: numpy array with data + :param dest: name of the buffer as registered in the class + """ + + dest_type = numpy.dtype([i.dtype for i in self.buffers if i.name == dest][0]) + events = [] + if (data.dtype == dest_type) or (data.dtype.itemsize > dest_type.itemsize): + copy_image = pyopencl.enqueue_copy(self.queue, + self.cl_mem[dest].data, + numpy.ascontiguousarray(data, dest_type)) + events.append(EventDescription("copy H->D %s" % dest, copy_image)) + else: + copy_image = pyopencl.enqueue_copy(self.queue, + self.cl_mem["raw"].data, + numpy.ascontiguousarray(data)) + kernel = getattr(self.program, self.mapping[data.dtype.type]) + cast_to_float = kernel(self.queue, + (self.size,), + None, + self.cl_mem["raw"].data, + self.cl_mem[dest].data) + events += [ + EventDescription("copy H->D %s" % dest, copy_image), + EventDescription("cast to float", cast_to_float) + ] + if self.profile: + self.events += events + return events + + def process(self, data, comp=True): + """Actually calculate the statics on the data + + :param numpy.ndarray data: numpy array with the image + :param comp: use Kahan compensated arithmetics for the calculation + :return: Statistics named tuple + :rtype: StatResults + """ + if data.ndim != 1: + data = data.ravel() + size = data.size + assert size <= self.size, "size is OK" + events = [] + with self.sem: + self.send_buffer(data, "converted") + if comp: + reduction = self.reduction_comp + else: + reduction = self.reduction_simple + res_d, evt = reduction(self.cl_mem["converted"][:self.size], + queue=self.queue, + return_event=True) + events.append(EventDescription("statistical reduction %s" % ("comp"if comp else "simple"), evt)) + if self.profile: + self.events += events + res_h = res_d.get() + min_ = 1.0 * res_h["s0"] + max_ = 1.0 * res_h["s1"] + count = 1.0 * res_h["s2"] + res_h["s3"] + sum_ = 1.0 * res_h["s4"] + res_h["s5"] + m2 = 1.0 * res_h["s6"] + res_h["s7"] + var = m2 / (count - 1.0) + res = StatResults(min_, + max_, + count, + sum_, + sum_ / count, + var, + sqrt(var)) + return res + + __call__ = process diff --git a/silx/opencl/test/__init__.py b/silx/opencl/test/__init__.py index 2fe88ea..f3121d5 100644 --- a/silx/opencl/test/__init__.py +++ b/silx/opencl/test/__init__.py @@ -24,7 +24,7 @@ __authors__ = ["J. Kieffer"] __license__ = "MIT" -__date__ = "17/10/2017" +__date__ = "11/01/2019" import os import unittest @@ -36,6 +36,9 @@ from . import test_linalg from . import test_array_utils from ..codec import test as test_codec from . import test_image +from . import test_kahan +from . import test_stats + def suite(): test_suite = unittest.TestSuite() @@ -47,6 +50,8 @@ def suite(): test_suite.addTests(test_array_utils.suite()) test_suite.addTests(test_codec.suite()) test_suite.addTests(test_image.suite()) + test_suite.addTests(test_kahan.suite()) + test_suite.addTests(test_stats.suite()) # Allow to remove sift from the project test_base_dir = os.path.dirname(__file__) sift_dir = os.path.join(test_base_dir, "..", "sift") diff --git a/silx/opencl/test/test_backprojection.py b/silx/opencl/test/test_backprojection.py index 70ce2ae..7ab416d 100644 --- a/silx/opencl/test/test_backprojection.py +++ b/silx/opencl/test/test_backprojection.py @@ -35,8 +35,9 @@ __date__ = "19/01/2018" import time import logging -import numpy +import numpy as np import unittest +from math import pi try: import mako except ImportError: @@ -44,6 +45,7 @@ except ImportError: from ..common import ocl if ocl: from .. import backprojection + from ..sinofilter import compute_fourier_filter from silx.test.utils import utilstest logger = logging.getLogger(__name__) @@ -55,7 +57,7 @@ def generate_coords(img_shp, center=None): The zero is at the center of the image. """ l_r, l_c = float(img_shp[0]), float(img_shp[1]) - R, C = numpy.mgrid[:l_r, :l_c] + R, C = np.mgrid[:l_r, :l_c] if center is None: center0, center1 = l_r / 2., l_c / 2. else: @@ -71,7 +73,7 @@ def clip_circle(img, center=None, radius=None): """ R, C = generate_coords(img.shape, center) M = R * R + C * C - res = numpy.zeros_like(img) + res = np.zeros_like(img) if radius is None: radius = img.shape[0] / 2. - 1 mask = M < radius * radius @@ -85,23 +87,29 @@ class TestFBP(unittest.TestCase): def setUp(self): if ocl is None: return - # ~ if sys.platform.startswith('darwin'): - # ~ self.skipTest("Backprojection is not implemented on CPU for OS X yet") self.getfiles() self.fbp = backprojection.Backprojection(self.sino.shape, profile=True) if self.fbp.compiletime_workgroup_size < 16 * 16: - self.skipTest("Current implementation of OpenCL backprojection is not supported on this platform yet") + self.skipTest("Current implementation of OpenCL backprojection is " + "not supported on this platform yet") + # Astra does not use the same backprojector implementation. + # Therefore, we cannot expect results to be the "same" (up to float32 + # numerical error) + self.tol = 5e-2 + if self.fbp.is_cpu: + # Precision is less when using CPU + self.tol *= 2 def tearDown(self): self.sino = None -# self.fbp.log_profile() + # self.fbp.log_profile() self.fbp = None def getfiles(self): # load sinogram of 512x512 MRI phantom - self.sino = numpy.load(utilstest.getfile("sino500.npz"))["data"] + self.sino = np.load(utilstest.getfile("sino500.npz"))["data"] # load reconstruction made with ASTRA FBP (with filter designed in spatial domain) - self.reference_rec = numpy.load(utilstest.getfile("rec_astra_500.npz"))["data"] + self.reference_rec = np.load(utilstest.getfile("rec_astra_500.npz"))["data"] def measure(self): "Common measurement of timings" @@ -124,8 +132,8 @@ class TestFBP(unittest.TestCase): ref_clipped = clip_circle(self.reference_rec) delta = abs(res_clipped - ref_clipped) bad = delta > 1 -# numpy.save("/tmp/bad.npy", bad.astype(int)) - logger.debug("Absolute difference: %s with %s outlier pixels out of %s", delta.max(), bad.sum(), numpy.prod(bad.shape)) + logger.debug("Absolute difference: %s with %s outlier pixels out of %s" + "", delta.max(), bad.sum(), np.prod(bad.shape)) return delta.max() @unittest.skipUnless(ocl and mako, "pyopencl is missing") @@ -143,21 +151,59 @@ class TestFBP(unittest.TestCase): err = self.compare(res) msg = str("Max error = %e" % err) logger.info(msg) - # TODO: cannot do better than 1e0 ? - # The plain backprojection was much better, so it must be an issue in the filtering process - self.assertTrue(err < 1., "Max error is too high") + self.assertTrue(err < self.tol, "Max error is too high") + # Test multiple reconstructions # ----------------------------- - res0 = numpy.copy(res) + res0 = np.copy(res) for i in range(10): res = self.fbp.filtered_backprojection(self.sino) - errmax = numpy.max(numpy.abs(res - res0)) + errmax = np.max(np.abs(res - res0)) self.assertTrue(errmax < 1.e-6, "Max error is too high") + @unittest.skipUnless(ocl and mako, "pyopencl is missing") + def test_fbp_filters(self): + """ + Test the different available filters of silx FBP. + """ + avail_filters = [ + "ramlak", "shepp-logan", "cosine", "hamming", + "hann" + ] + # Create a Dirac delta function at a single angle view. + # As the filters are radially invarant: + # - backprojection yields an image where each line is a Dirac. + # - FBP yields an image where each line is the spatial filter + # One can simply filter "dirac" without backprojecting it, but this + # test will also ensure that backprojection behaves well. + dirac = np.zeros_like(self.sino) + na, dw = dirac.shape + dirac[0, dw//2] = na / pi * 2 + + for filter_name in avail_filters: + B = backprojection.Backprojection(dirac.shape, filter_name=filter_name) + r = B(dirac) + # Check that radial invariance is kept + std0 = np.max(np.abs(np.std(r, axis=0))) + self.assertTrue( + std0 < 5.e-6, + "Something wrong with FBP(filter=%s)" % filter_name + ) + # Check that the filter is retrieved + r_f = np.fft.fft(np.fft.fftshift(r[0])).real / 2. # filter factor + ref_filter_f = compute_fourier_filter(dw, filter_name) + errmax = np.max(np.abs(r_f - ref_filter_f)) + logger.info("FBP filter %s: max error=%e" % (filter_name, errmax)) + self.assertTrue( + errmax < 1.e-3, + "Something wrong with FBP(filter=%s)" % filter_name + ) + def suite(): testSuite = unittest.TestSuite() testSuite.addTest(TestFBP("test_fbp")) + testSuite.addTest(TestFBP("test_fbp_filters")) return testSuite diff --git a/silx/opencl/test/test_kahan.py b/silx/opencl/test/test_kahan.py new file mode 100644 index 0000000..bb9ea3f --- /dev/null +++ b/silx/opencl/test/test_kahan.py @@ -0,0 +1,269 @@ +#!/usr/bin/env python +# coding: utf-8 +# +# Project: Azimuthal integration +# https://github.com/silx-kit/pyFAI +# +# Copyright (C) 2015-2019 European Synchrotron Radiation Facility, Grenoble, France +# +# Principal author: Jérôme Kieffer (Jerome.Kieffer@ESRF.eu) +# +# Permission is hereby granted, free of charge, to any person obtaining a copy +# of this software and associated documentation files (the "Software"), to deal +# in the Software without restriction, including without limitation the rights +# to use, copy, modify, merge, publish, distribute, sublicense, and/or sell +# copies of the Software, and to permit persons to whom the Software is +# furnished to do so, subject to the following conditions: +# +# The above copyright notice and this permission notice shall be included in +# all copies or substantial portions of the Software. +# +# THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR +# IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, +# FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE +# AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER +# LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, +# OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN +# THE SOFTWARE. + +"test suite for OpenCL code" + +from __future__ import absolute_import, division, print_function + +__author__ = "Jérôme Kieffer" +__contact__ = "Jerome.Kieffer@ESRF.eu" +__license__ = "MIT" +__copyright__ = "European Synchrotron Radiation Facility, Grenoble, France" +__date__ = "30/01/2019" + + +import unittest +import numpy +import logging +import platform + +logger = logging.getLogger(__name__) +try: + import pyopencl +except ImportError as error: + logger.warning("OpenCL module (pyopencl) is not present, skip tests. %s.", error) + pyopencl = None + +from .. import ocl +if ocl is not None: + from ..utils import read_cl_file + from .. import pyopencl + import pyopencl.array +from ...test.utils import test_options + + +class TestKahan(unittest.TestCase): + """ + Test the kernels for compensated math in OpenCL + """ + + @classmethod + def setUpClass(cls): + if not test_options.WITH_OPENCL_TEST: + raise unittest.SkipTest("User request to skip OpenCL tests") + if pyopencl is None or ocl is None: + raise unittest.SkipTest("OpenCL module (pyopencl) is not present or no device available") + + cls.ctx = ocl.create_context(devicetype="GPU") + cls.queue = pyopencl.CommandQueue(cls.ctx, properties=pyopencl.command_queue_properties.PROFILING_ENABLE) + + # this is running 32 bits OpenCL woth POCL + if (platform.machine() in ("i386", "i686", "x86_64") and (tuple.__itemsize__ == 4) and + cls.ctx.devices[0].platform.name == 'Portable Computing Language'): + cls.args = "-DX87_VOLATILE=volatile" + else: + cls.args = "" + + @classmethod + def tearDownClass(cls): + cls.queue = None + cls.ctx = None + + @staticmethod + def dummy_sum(ary, dtype=None): + "perform the actual sum in a dummy way " + if dtype is None: + dtype = ary.dtype.type + sum_ = dtype(0) + for i in ary: + sum_ += i + return sum_ + + def test_kahan(self): + # simple test + N = 26 + data = (1 << (N - 1 - numpy.arange(N))).astype(numpy.float32) + + ref64 = numpy.sum(data, dtype=numpy.float64) + ref32 = self.dummy_sum(data) + if (ref64 == ref32): + logger.warning("Kahan: invalid tests as float32 provides the same result as float64") + # Dummy kernel to evaluate + src = """ + kernel void summation(global float* data, + int size, + global float* result) + { + float2 acc = (float2)(0.0f, 0.0f); + for (int i=0; i<size; i++) + { + acc = kahan_sum(acc, data[i]); + } + result[0] = acc.s0; + result[1] = acc.s1; + } + """ + prg = pyopencl.Program(self.ctx, read_cl_file("kahan.cl") + src).build(self.args) + ones_d = pyopencl.array.to_device(self.queue, data) + res_d = pyopencl.array.zeros(self.queue, 2, numpy.float32) + evt = prg.summation(self.queue, (1,), (1,), ones_d.data, numpy.int32(N), res_d.data) + evt.wait() + res = res_d.get().sum(dtype=numpy.float64) + self.assertEqual(ref64, res, "test_kahan") + + def test_dot16(self): + # simple test + N = 16 + data = (1 << (N - 1 - numpy.arange(N))).astype(numpy.float32) + + ref64 = numpy.dot(data.astype(numpy.float64), data.astype(numpy.float64)) + ref32 = numpy.dot(data, data) + if (ref64 == ref32): + logger.warning("dot16: invalid tests as float32 provides the same result as float64") + # Dummy kernel to evaluate + src = """ + kernel void test_dot16(global float* data, + int size, + global float* result) + { + float2 acc = (float2)(0.0f, 0.0f); + float16 data16 = (float16) (data[0],data[1],data[2],data[3],data[4], + data[5],data[6],data[7],data[8],data[9], + data[10],data[11],data[12],data[13],data[14],data[15]); + acc = comp_dot16(data16, data16); + result[0] = acc.s0; + result[1] = acc.s1; + } + + kernel void test_dot8(global float* data, + int size, + global float* result) + { + float2 acc = (float2)(0.0f, 0.0f); + float8 data0 = (float8) (data[0],data[2],data[4],data[6],data[8],data[10],data[12],data[14]); + float8 data1 = (float8) (data[1],data[3],data[5],data[7],data[9],data[11],data[13],data[15]); + acc = comp_dot8(data0, data1); + result[0] = acc.s0; + result[1] = acc.s1; + } + + kernel void test_dot4(global float* data, + int size, + global float* result) + { + float2 acc = (float2)(0.0f, 0.0f); + float4 data0 = (float4) (data[0],data[4],data[8],data[12]); + float4 data1 = (float4) (data[3],data[7],data[11],data[15]); + acc = comp_dot4(data0, data1); + result[0] = acc.s0; + result[1] = acc.s1; + } + + kernel void test_dot3(global float* data, + int size, + global float* result) + { + float2 acc = (float2)(0.0f, 0.0f); + float3 data0 = (float3) (data[0],data[4],data[12]); + float3 data1 = (float3) (data[3],data[11],data[15]); + acc = comp_dot3(data0, data1); + result[0] = acc.s0; + result[1] = acc.s1; + } + + kernel void test_dot2(global float* data, + int size, + global float* result) + { + float2 acc = (float2)(0.0f, 0.0f); + float2 data0 = (float2) (data[0],data[14]); + float2 data1 = (float2) (data[1],data[15]); + acc = comp_dot2(data0, data1); + result[0] = acc.s0; + result[1] = acc.s1; + } + + """ + + prg = pyopencl.Program(self.ctx, read_cl_file("kahan.cl") + src).build(self.args) + ones_d = pyopencl.array.to_device(self.queue, data) + res_d = pyopencl.array.zeros(self.queue, 2, numpy.float32) + evt = prg.test_dot16(self.queue, (1,), (1,), ones_d.data, numpy.int32(N), res_d.data) + evt.wait() + res = res_d.get().sum(dtype="float64") + self.assertEqual(ref64, res, "test_dot16") + + res_d.fill(0) + data0 = data[0::2] + data1 = data[1::2] + ref64 = numpy.dot(data0.astype(numpy.float64), data1.astype(numpy.float64)) + ref32 = numpy.dot(data0, data1) + if (ref64 == ref32): + logger.warning("dot8: invalid tests as float32 provides the same result as float64") + evt = prg.test_dot8(self.queue, (1,), (1,), ones_d.data, numpy.int32(N), res_d.data) + evt.wait() + res = res_d.get().sum(dtype="float64") + self.assertEqual(ref64, res, "test_dot8") + + res_d.fill(0) + data0 = data[0::4] + data1 = data[3::4] + ref64 = numpy.dot(data0.astype(numpy.float64), data1.astype(numpy.float64)) + ref32 = numpy.dot(data0, data1) + if (ref64 == ref32): + logger.warning("dot4: invalid tests as float32 provides the same result as float64") + evt = prg.test_dot4(self.queue, (1,), (1,), ones_d.data, numpy.int32(N), res_d.data) + evt.wait() + res = res_d.get().sum(dtype="float64") + self.assertEqual(ref64, res, "test_dot4") + + res_d.fill(0) + data0 = numpy.array([data[0], data[4], data[12]]) + data1 = numpy.array([data[3], data[11], data[15]]) + ref64 = numpy.dot(data0.astype(numpy.float64), data1.astype(numpy.float64)) + ref32 = numpy.dot(data0, data1) + if (ref64 == ref32): + logger.warning("dot3: invalid tests as float32 provides the same result as float64") + evt = prg.test_dot3(self.queue, (1,), (1,), ones_d.data, numpy.int32(N), res_d.data) + evt.wait() + res = res_d.get().sum(dtype="float64") + self.assertEqual(ref64, res, "test_dot3") + + res_d.fill(0) + data0 = numpy.array([data[0], data[14]]) + data1 = numpy.array([data[1], data[15]]) + ref64 = numpy.dot(data0.astype(numpy.float64), data1.astype(numpy.float64)) + ref32 = numpy.dot(data0, data1) + if (ref64 == ref32): + logger.warning("dot2: invalid tests as float32 provides the same result as float64") + evt = prg.test_dot2(self.queue, (1,), (1,), ones_d.data, numpy.int32(N), res_d.data) + evt.wait() + res = res_d.get().sum(dtype="float64") + self.assertEqual(ref64, res, "test_dot2") + + +def suite(): + testsuite = unittest.TestSuite() + loader = unittest.defaultTestLoader.loadTestsFromTestCase + testsuite.addTest(loader(TestKahan)) + return testsuite + + +if __name__ == '__main__': + runner = unittest.TextTestRunner() + runner.run(suite()) diff --git a/silx/opencl/test/test_stats.py b/silx/opencl/test/test_stats.py new file mode 100644 index 0000000..b5127c8 --- /dev/null +++ b/silx/opencl/test/test_stats.py @@ -0,0 +1,114 @@ +#!/usr/bin/env python +# -*- coding: utf-8 -*- +# +# Project: Sift implementation in Python + OpenCL +# https://github.com/silx-kit/silx +# +# Permission is hereby granted, free of charge, to any person +# obtaining a copy of this software and associated documentation +# files (the "Software"), to deal in the Software without +# restriction, including without limitation the rights to use, +# copy, modify, merge, publish, distribute, sublicense, and/or sell +# copies of the Software, and to permit persons to whom the +# Software is furnished to do so, subject to the following +# conditions: +# +# The above copyright notice and this permission notice shall be +# included in all copies or substantial portions of the Software. +# +# THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, +# EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES +# OF MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND +# NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT +# HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, +# WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING +# FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR +# OTHER DEALINGS IN THE SOFTWARE. + +""" +Simple test of an addition +""" + +from __future__ import division, print_function + +__authors__ = ["Henri Payno, Jérôme Kieffer"] +__contact__ = "jerome.kieffer@esrf.eu" +__license__ = "MIT" +__copyright__ = "2013 European Synchrotron Radiation Facility, Grenoble, France" +__date__ = "13/12/2018" + +import logging +import time +import numpy + +import unittest +from ..common import ocl +if ocl: + import pyopencl + import pyopencl.array + from ..statistics import StatResults, Statistics +from ..utils import get_opencl_code +logger = logging.getLogger(__name__) + + +@unittest.skipUnless(ocl, "PyOpenCl is missing") +class TestStatistics(unittest.TestCase): + + @classmethod + def setUpClass(cls): + cls.size = 1 << 20 # 1 million elements + cls.data = numpy.random.randint(0, 65000, cls.size).astype("uint16") + t0 = time.time() + cls.ref = StatResults(cls.data.min(), cls.data.max(), cls.data.size, + cls.data.sum(), cls.data.mean(), cls.data.std() ** 2, + cls.data.std()) + t1 = time.time() + cls.ref_time = t1 - t0 + + @classmethod + def tearDownClass(cls): + cls.size = cls.ref = cls.data = cls.ref_time = None + + @classmethod + def validate(cls, res): + return ( + (res.min == cls.ref.min) and + (res.max == cls.ref.max) and + (res.cnt == cls.ref.cnt) and + abs(res.mean - cls.ref.mean) < 0.01 and + abs(res.std - cls.ref.std) < 0.1) + + def test_measurement(self): + """ + tests that all devices are working properly ... + """ + logger.info("Reference results: %s", self.ref) + for pid, platform in enumerate(ocl.platforms): + for did, device in enumerate(platform.devices): + try: + s = Statistics(template=self.data, platformid=pid, deviceid=did) + except Exception as err: + failed_init = True + res = StatResults(0,0,0,0,0,0,0) + else: + failed_init = False + t0 = time.time() + res = s(self.data) + t1 = time.time() + logger.warning("failed_init %s", failed_init) + if failed_init or not self.validate(res): + logger.error("Failed on platform %s device %s", platform, device) + logger.error("Reference results: %s", self.ref) + logger.error("Faulty results: %s", res) + self.assertTrue(False, "Stat calculation failed on %s %s" % (platform, device)) + logger.info("Runtime on %s/%s : %.3fms x%.1f", platform, device, 1000 * (t1 - t0), self.ref_time / (t1 - t0)) + + +def suite(): + testSuite = unittest.TestSuite() + testSuite.addTest(TestStatistics("test_measurement")) + return testSuite + + +if __name__ == '__main__': + unittest.main(defaultTest="suite") |