diff options
Diffstat (limited to 'silx/opencl/projection.py')
-rw-r--r-- | silx/opencl/projection.py | 419 |
1 files changed, 419 insertions, 0 deletions
diff --git a/silx/opencl/projection.py b/silx/opencl/projection.py new file mode 100644 index 0000000..0ebe9bc --- /dev/null +++ b/silx/opencl/projection.py @@ -0,0 +1,419 @@ +#!/usr/bin/env python +# coding: utf-8 +# /*########################################################################## +# +# Copyright (c) 2016 European Synchrotron Radiation Facility +# +# Permission is hereby granted, free of charge, to any person obtaining a copy +# of this software and associated documentation files (the "Software"), to deal +# in the Software without restriction, including without limitation the rights +# to use, copy, modify, merge, publish, distribute, sublicense, and/or sell +# copies of the Software, and to permit persons to whom the Software is +# furnished to do so, subject to the following conditions: +# +# The above copyright notice and this permission notice shall be included in +# all copies or substantial portions of the Software. +# +# THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR +# IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, +# FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE +# AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER +# LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, +# OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN +# THE SOFTWARE. +# +# ###########################################################################*/ +"""Module for tomographic projector on the GPU""" + +from __future__ import absolute_import, print_function, with_statement, division + +__authors__ = ["A. Mirone, P. Paleo"] +__license__ = "MIT" +__date__ = "26/06/2017" + +import logging +import numpy as np + +from .common import pyopencl +from .processing import EventDescription, OpenclProcessing, BufferDescription +from .backprojection import _sizeof, _idivup + +if pyopencl: + mf = pyopencl.mem_flags + import pyopencl.array as parray +else: + raise ImportError("pyopencl is not installed") +logger = logging.getLogger(__name__) + + +class Projection(OpenclProcessing): + """ + A class for performing a tomographic projection (Radon Transform) using + OpenCL + """ + kernel_files = ["proj.cl", "array_utils.cl"] + + def __init__(self, slice_shape, angles, axis_position=None, + detector_width=None, normalize=False, ctx=None, + devicetype="all", platformid=None, deviceid=None, + profile=False + ): + """Constructor of the OpenCL projector. + + :param slice_shape: shape of the slice: (num_rows, num_columns). + :param angles: Either an integer number of angles, or a list of custom + angles values in radian. + :param axis_position: Optional, axis position. Default is + `(shape[1]-1)/2.0`. + :param detector_width: Optional, detector width in pixels. + If detector_width > slice_shape[1], the + projection data will be surrounded with zeros. + Using detector_width < slice_shape[1] might + result in a local tomography setup. + :param normalize: Optional, normalization. If set, the sinograms are + multiplied by the factor pi/(2*nprojs). + :param ctx: actual working context, left to None for automatic + initialization from device type or platformid/deviceid + :param devicetype: type of device, can be "CPU", "GPU", "ACC" or "ALL" + :param platformid: integer with the platform_identifier, as given by + clinfo + :param deviceid: Integer with the device identifier, as given by clinfo + :param profile: switch on profiling to be able to profile at the kernel + level, store profiling elements (makes code slightly + slower) + """ + # OS X enforces a workgroup size of 1 when the kernel has synchronization barriers + # if sys.platform.startswith('darwin'): # assuming no discrete GPU + # raise NotImplementedError("Backprojection is not implemented on CPU for OS X yet") + + OpenclProcessing.__init__(self, ctx=ctx, devicetype=devicetype, + platformid=platformid, deviceid=deviceid, + profile=profile) + self.shape = slice_shape + self.axis_pos = axis_position + self.angles = angles + self.dwidth = detector_width + self.normalize = normalize + + # Default values + if self.axis_pos is None: + self.axis_pos = (self.shape[1] - 1) / 2. + if self.dwidth is None: + self.dwidth = self.shape[1] + if not(np.iterable(self.angles)): + if self.angles is None: + self.nprojs = self.shape[0] + else: + self.nprojs = self.angles + self.angles = np.linspace(start=0, + stop=np.pi, + num=self.nprojs, + endpoint=False).astype(dtype=np.float32) + else: + self.nprojs = len(self.angles) + self.offset_x = -np.float32((self.shape[1]-1)/2. - self.axis_pos) # TODO: custom + self.offset_y = -np.float32((self.shape[0]-1)/2. - self.axis_pos) # TODO: custom + # Reset axis_pos once offset are computed + self.axis_pos0 = np.float((self.shape[1]-1)/2.) + + # Workgroup, ndrange and shared size + self.dimgrid_x = _idivup(self.dwidth, 16) + self.dimgrid_y = _idivup(self.nprojs, 16) + self._dimrecx = np.int32(self.dimgrid_x * 16) + self._dimrecy = np.int32(self.dimgrid_y * 16) + self.local_mem = 16 * 7 * _sizeof(np.float32) + self.wg = (16, 16) + self.ndrange = ( + int(self.dimgrid_x) * self.wg[0], # int(): pyopencl <= 2015.1 + int(self.dimgrid_y) * self.wg[1] # int(): pyopencl <= 2015.1 + ) + + self.is_cpu = False + if self.device.type == "CPU": + self.is_cpu = True + + # Allocate memory + self.buffers = [ + BufferDescription("_d_sino", self._dimrecx * self._dimrecy, np.float32, mf.READ_WRITE), + BufferDescription("d_angles", self._dimrecy, np.float32, mf.READ_ONLY), + BufferDescription("d_beginPos", self._dimrecy * 2, np.int32, mf.READ_ONLY), + BufferDescription("d_strideJoseph", self._dimrecy * 2, np.int32, mf.READ_ONLY), + BufferDescription("d_strideLine", self._dimrecy * 2, np.int32, mf.READ_ONLY), + ] + self.add_to_cl_mem( + { + "d_axis_corrections": parray.zeros(self.queue, + self.nprojs, np.float32) + } + ) + self._tmp_extended_img = np.zeros((self.shape[0]+2, self.shape[1]+2), + dtype=np.float32) + if self.is_cpu: + self.allocate_slice() + else: + self.allocate_textures() + self.allocate_buffers() + self._ex_sino = np.zeros((self._dimrecy, self._dimrecx), + dtype=np.float32) + if self.is_cpu: + self.cl_mem["d_slice"].fill(0.) + # enqueue_fill_buffer has issues if opencl 1.2 is not present + #~ pyopencl.enqueue_fill_buffer( + #~ self.queue, + #~ self.cl_mem["d_slice"], + #~ np.float32(0), + #~ 0, + #~ self._tmp_extended_img.size * _sizeof(np.float32) + #~ ) + # Precomputations + self.compute_angles() + self.proj_precomputations() + self.cl_mem["d_axis_corrections"].fill(0.) + # enqueue_fill_buffer has issues if opencl 1.2 is not present + #~ pyopencl.enqueue_fill_buffer( + #~ self.queue, + #~ self.cl_mem["d_axis_corrections"], + #~ np.float32(0), + #~ 0, + #~ self.nprojs*_sizeof(np.float32) + #~ ) + # Shorthands + self._d_sino = self.cl_mem["_d_sino"] + + OpenclProcessing.compile_kernels(self, self.kernel_files) + # check that workgroup can actually be (16, 16) + self.check_workgroup_size("forward_kernel_cpu") + + def compute_angles(self): + angles2 = np.zeros(self._dimrecy, dtype=np.float32) # dimrecy != num_projs + angles2[:self.nprojs] = np.copy(self.angles) + angles2[self.nprojs:] = angles2[self.nprojs-1] + self.angles2 = angles2 + pyopencl.enqueue_copy(self.queue, self.cl_mem["d_angles"], angles2) + + def allocate_slice(self): + self.add_to_cl_mem({"d_slice": parray.zeros(self.queue, (self.shape[1]+2, self.shape[1]+2), np.float32)}) + + def allocate_textures(self): + self.d_image_tex = pyopencl.Image( + self.ctx, + mf.READ_ONLY | mf.USE_HOST_PTR, + pyopencl.ImageFormat( + pyopencl.channel_order.INTENSITY, + pyopencl.channel_type.FLOAT + ), hostbuf=np.ascontiguousarray(self._tmp_extended_img.T), + ) + + def transfer_to_texture(self, image): + image2 = image + if not(image.flags["C_CONTIGUOUS"] and image.dtype == np.float32): + image2 = np.ascontiguousarray(image) + if self.is_cpu: + # TODO: create NoneEvent + return self.transfer_to_slice(image2) + #~ return pyopencl.enqueue_copy( + #~ self.queue, + #~ self.cl_mem["d_slice"].data, + #~ image2, + #~ origin=(1, 1), + #~ region=image.shape[::-1] + #~ ) + else: + return pyopencl.enqueue_copy( + self.queue, + self.d_image_tex, + image2, + origin=(1, 1), + region=image.shape[::-1] + ) + + def transfer_device_to_texture(self, d_image): + if self.is_cpu: + # TODO this copy should not be necessary + return self.cpy2d_to_slice(d_image) + else: + return pyopencl.enqueue_copy( + self.queue, + self.d_image_tex, + d_image, + offset=0, + origin=(1, 1), + region=(int(self.shape[1]), int(self.shape[0]))#self.shape[::-1] # pyopencl <= 2015.2 + ) + + def transfer_to_slice(self, image): + image2 = np.zeros((image.shape[0]+2, image.shape[1]+2), dtype=np.float32) + image2[1:-1, 1:-1] = image.astype(np.float32) + self.cl_mem["d_slice"].set(image2) + + def proj_precomputations(self): + beginPos = np.zeros((2, self._dimrecy), dtype=np.int32) + strideJoseph = np.zeros((2, self._dimrecy), dtype=np.int32) + strideLine = np.zeros((2, self._dimrecy), dtype=np.int32) + cos_angles = np.cos(self.angles2) + sin_angles = np.sin(self.angles2) + dimslice = self.shape[1] + + M1 = np.abs(cos_angles) > 0.70710678 + M1b = np.logical_not(M1) + M2 = cos_angles > 0 + M2b = np.logical_not(M2) + M3 = sin_angles > 0 + M3b = np.logical_not(M3) + case1 = M1 * M2 + case2 = M1 * M2b + case3 = M1b * M3 + case4 = M1b * M3b + + beginPos[0][case1] = 0 + beginPos[1][case1] = 0 + strideJoseph[0][case1] = 1 + strideJoseph[1][case1] = 0 + strideLine[0][case1] = 0 + strideLine[1][case1] = 1 + + beginPos[0][case2] = dimslice-1 + beginPos[1][case2] = dimslice-1 + strideJoseph[0][case2] = -1 + strideJoseph[1][case2] = 0 + strideLine[0][case2] = 0 + strideLine[1][case2] = -1 + + beginPos[0][case3] = dimslice-1 + beginPos[1][case3] = 0 + strideJoseph[0][case3] = 0 + strideJoseph[1][case3] = 1 + strideLine[0][case3] = -1 + strideLine[1][case3] = 0 + + beginPos[0][case4] = 0 + beginPos[1][case4] = dimslice-1 + strideJoseph[0][case4] = 0 + strideJoseph[1][case4] = -1 + strideLine[0][case4] = 1 + strideLine[1][case4] = 0 + + # For debug purpose + #~ self.beginPos = beginPos + #~ self.strideJoseph = strideJoseph + #~ self.strideLine = strideLine + # + + pyopencl.enqueue_copy(self.queue, self.cl_mem["d_beginPos"], beginPos) + pyopencl.enqueue_copy(self.queue, self.cl_mem["d_strideJoseph"], strideJoseph) + pyopencl.enqueue_copy(self.queue, self.cl_mem["d_strideLine"], strideLine) + + def _get_local_mem(self): + return pyopencl.LocalMemory(self.local_mem) # constant for all image sizes + + def cpy2d_to_sino(self, dst): + ndrange = (int(self.dwidth), int(self.nprojs)) # pyopencl < 2015.2 + sino_shape_ocl = np.int32(ndrange) + wg = None + kernel_args = ( + dst.data, + self._d_sino, + np.int32(self.dwidth), + np.int32(self._dimrecx), + np.int32((0, 0)), + np.int32((0, 0)), + sino_shape_ocl + ) + return self.kernels.cpy2d(self.queue, ndrange, wg, *kernel_args) + + def cpy2d_to_slice(self, src): + """ + copy a Nx * Ny slice to self.d_slice which is (Nx+2)*(Ny+2) + """ + ndrange = (int(self.shape[1]), int(self.shape[0])) #self.shape[::-1] # pyopencl < 2015.2 + wg = None + slice_shape_ocl = np.int32(ndrange) + kernel_args = ( + self.cl_mem["d_slice"].data, + src, + np.int32(self.shape[1]+2), + np.int32(self.shape[1]), + np.int32((1, 1)), + np.int32((0, 0)), + slice_shape_ocl + ) + return self.kernels.cpy2d(self.queue, ndrange, wg, *kernel_args) + + def projection(self, image=None, dst=None): + """Perform the projection on an input image + + :param image: Image to project + :return: A sinogram + """ + events = [] + with self.sem: + if image is not None: + assert image.ndim == 2, "Treat only 2D images" + assert image.shape[0] == self.shape[0], "image shape is OK" + assert image.shape[1] == self.shape[1], "image shape is OK" + if not(self.is_cpu): + self.transfer_to_texture(image) + slice_ref = self.d_image_tex + else: + self.transfer_to_slice(image) + slice_ref = self.cl_mem["d_slice"].data + else: + if self.is_cpu: + slice_ref = self.cl_mem["d_slice"].data + else: + slice_ref = self.d_image_tex + + kernel_args = ( + self._d_sino, + slice_ref, + np.int32(self.shape[1]), + np.int32(self.dwidth), + self.cl_mem["d_angles"], + np.float32(self.axis_pos0), + self.cl_mem["d_axis_corrections"].data, # TODO custom + self.cl_mem["d_beginPos"], + self.cl_mem["d_strideJoseph"], + self.cl_mem["d_strideLine"], + np.int32(self.nprojs), + self._dimrecx, + self._dimrecy, + self.offset_x, + self.offset_y, + np.int32(1), # josephnoclip, 1 by default + np.int32(self.normalize) + ) + + # Call the kernel + if self.is_cpu: + event_pj = self.kernels.forward_kernel_cpu( + self.queue, + self.ndrange, + self.wg, + *kernel_args + ) + else: + event_pj = self.kernels.forward_kernel( + self.queue, + self.ndrange, + self.wg, + *kernel_args + ) + events.append(EventDescription("projection", event_pj)) + if dst is None: + self._ex_sino[:] = 0 + ev = pyopencl.enqueue_copy(self.queue, self._ex_sino, self._d_sino) + events.append(EventDescription("copy D->H result", ev)) + ev.wait() + res = np.copy(self._ex_sino[:self.nprojs, :self.dwidth]) + else: + ev = self.cpy2d_to_sino(dst) + events.append(EventDescription("copy D->D result", ev)) + ev.wait() + res = dst + # /with self.sem + if self.profile: + self.events += events + #~ res = self._ex_sino + return res + + __call__ = projection |