1 files changed, 419 insertions, 0 deletions
diff --git a/silx/opencl/projection.py b/silx/opencl/projection.py
new file mode 100644
index 0000000..0ebe9bc
--- /dev/null
+++ b/silx/opencl/projection.py
@@ -0,0 +1,419 @@
+#!/usr/bin/env python
+# coding: utf-8
+# /*##########################################################################
+#
+# Copyright (c) 2016 European Synchrotron Radiation Facility
+#
+# Permission is hereby granted, free of charge, to any person obtaining a copy
+# of this software and associated documentation files (the "Software"), to deal
+# in the Software without restriction, including without limitation the rights
+# to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
+# copies of the Software, and to permit persons to whom the Software is
+# furnished to do so, subject to the following conditions:
+#
+# The above copyright notice and this permission notice shall be included in
+# all copies or substantial portions of the Software.
+#
+# THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+# IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+# FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
+# AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+# LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
+# OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN
+# THE SOFTWARE.
+#
+# ###########################################################################*/
+"""Module for tomographic projector on the GPU"""
+
+from __future__ import absolute_import, print_function, with_statement, division
+
+__authors__ = ["A. Mirone, P. Paleo"]
+__license__ = "MIT"
+__date__ = "26/06/2017"
+
+import logging
+import numpy as np
+
+from .common import pyopencl
+from .processing import EventDescription, OpenclProcessing, BufferDescription
+from .backprojection import _sizeof, _idivup
+
+if pyopencl:
+    mf = pyopencl.mem_flags
+    import pyopencl.array as parray
+else:
+    raise ImportError("pyopencl is not installed")
+logger = logging.getLogger(__name__)
+
+
+class Projection(OpenclProcessing):
+    """
+    A class for performing a tomographic projection (Radon Transform) using
+    OpenCL
+    """
+    kernel_files = ["proj.cl", "array_utils.cl"]
+
+    def __init__(self, slice_shape, angles, axis_position=None,
+                 detector_width=None, normalize=False, ctx=None,
+                 devicetype="all", platformid=None, deviceid=None,
+                 profile=False
+                 ):
+        """Constructor of the OpenCL projector.
+
+        :param slice_shape: shape of the slice: (num_rows, num_columns).
+        :param angles: Either an integer number of angles, or a list of custom
+                       angles values in radian.
+        :param axis_position: Optional, axis position. Default is
+                              `(shape[1]-1)/2.0`.
+        :param detector_width: Optional, detector width in pixels.
+                               If detector_width > slice_shape[1], the
+                               projection data will be surrounded with zeros.
+                               Using detector_width < slice_shape[1] might
+                               result in a local tomography setup.
+        :param normalize: Optional, normalization. If set, the sinograms are
+                          multiplied by the factor pi/(2*nprojs).
+        :param ctx: actual working context, left to None for automatic
+                    initialization from device type or platformid/deviceid
+        :param devicetype: type of device, can be "CPU", "GPU", "ACC" or "ALL"
+        :param platformid: integer with the platform_identifier, as given by
+                           clinfo
+        :param deviceid: Integer with the device identifier, as given by clinfo
+        :param profile: switch on profiling to be able to profile at the kernel
+                        level, store profiling elements (makes code slightly
+                        slower)
+        """
+        # OS X enforces a workgroup size of 1 when the kernel has synchronization barriers
+        # if sys.platform.startswith('darwin'): # assuming no discrete GPU
+        #    raise NotImplementedError("Backprojection is not implemented on CPU for OS X yet")
+
+        OpenclProcessing.__init__(self, ctx=ctx, devicetype=devicetype,
+                                  platformid=platformid, deviceid=deviceid,
+                                  profile=profile)
+        self.shape = slice_shape
+        self.axis_pos = axis_position
+        self.angles = angles
+        self.dwidth = detector_width
+        self.normalize = normalize
+
+        # Default values
+        if self.axis_pos is None:
+            self.axis_pos = (self.shape[1] - 1) / 2.
+        if self.dwidth is None:
+            self.dwidth = self.shape[1]
+        if not(np.iterable(self.angles)):
+            if self.angles is None:
+                self.nprojs = self.shape[0]
+            else:
+                self.nprojs = self.angles
+            self.angles = np.linspace(start=0,
+                                      stop=np.pi,
+                                      num=self.nprojs,
+                                      endpoint=False).astype(dtype=np.float32)
+        else:
+            self.nprojs = len(self.angles)
+        self.offset_x = -np.float32((self.shape[1]-1)/2. - self.axis_pos) # TODO: custom
+        self.offset_y = -np.float32((self.shape[0]-1)/2. - self.axis_pos) # TODO: custom
+        # Reset axis_pos once offset are computed
+        self.axis_pos0 = np.float((self.shape[1]-1)/2.)
+
+        # Workgroup, ndrange and shared size
+        self.dimgrid_x = _idivup(self.dwidth, 16)
+        self.dimgrid_y = _idivup(self.nprojs, 16)
+        self._dimrecx = np.int32(self.dimgrid_x * 16)
+        self._dimrecy = np.int32(self.dimgrid_y * 16)
+        self.local_mem = 16 * 7 * _sizeof(np.float32)
+        self.wg = (16, 16)
+        self.ndrange = (
+            int(self.dimgrid_x) * self.wg[0],  # int(): pyopencl <= 2015.1
+            int(self.dimgrid_y) * self.wg[1]   # int(): pyopencl <= 2015.1
+        )
+
+        self.is_cpu = False
+        if self.device.type == "CPU":
+            self.is_cpu = True
+
+        # Allocate memory
+        self.buffers = [
+            BufferDescription("_d_sino", self._dimrecx * self._dimrecy, np.float32, mf.READ_WRITE),
+            BufferDescription("d_angles", self._dimrecy, np.float32, mf.READ_ONLY),
+            BufferDescription("d_beginPos", self._dimrecy * 2, np.int32, mf.READ_ONLY),
+            BufferDescription("d_strideJoseph", self._dimrecy * 2, np.int32, mf.READ_ONLY),
+            BufferDescription("d_strideLine", self._dimrecy * 2, np.int32, mf.READ_ONLY),
+        ]
+        self.add_to_cl_mem(
+            {
+                "d_axis_corrections": parray.zeros(self.queue,
+                                                   self.nprojs, np.float32)
+            }
+        )
+        self._tmp_extended_img = np.zeros((self.shape[0]+2, self.shape[1]+2),
+                                          dtype=np.float32)
+        if self.is_cpu:
+            self.allocate_slice()
+        else:
+            self.allocate_textures()
+        self.allocate_buffers()
+        self._ex_sino = np.zeros((self._dimrecy, self._dimrecx),
+                                 dtype=np.float32)
+        if self.is_cpu:
+            self.cl_mem["d_slice"].fill(0.)
+            # enqueue_fill_buffer has issues if opencl 1.2 is not present
+            #~ pyopencl.enqueue_fill_buffer(
+                #~ self.queue,
+                #~ self.cl_mem["d_slice"],
+                #~ np.float32(0),
+                #~ 0,
+                #~ self._tmp_extended_img.size * _sizeof(np.float32)
+            #~ )
+        # Precomputations
+        self.compute_angles()
+        self.proj_precomputations()
+        self.cl_mem["d_axis_corrections"].fill(0.)
+        # enqueue_fill_buffer has issues if opencl 1.2 is not present
+        #~ pyopencl.enqueue_fill_buffer(
+                                    #~ self.queue,
+                                    #~ self.cl_mem["d_axis_corrections"],
+                                    #~ np.float32(0),
+                                    #~ 0,
+                                    #~ self.nprojs*_sizeof(np.float32)
+                                    #~ )
+        # Shorthands
+        self._d_sino = self.cl_mem["_d_sino"]
+
+        OpenclProcessing.compile_kernels(self, self.kernel_files)
+        # check that workgroup can actually be (16, 16)
+        self.check_workgroup_size("forward_kernel_cpu")
+
+    def compute_angles(self):
+        angles2 = np.zeros(self._dimrecy, dtype=np.float32) # dimrecy != num_projs
+        angles2[:self.nprojs] = np.copy(self.angles)
+        angles2[self.nprojs:] = angles2[self.nprojs-1]
+        self.angles2 = angles2
+        pyopencl.enqueue_copy(self.queue, self.cl_mem["d_angles"], angles2)
+
+    def allocate_slice(self):
+            self.add_to_cl_mem({"d_slice": parray.zeros(self.queue, (self.shape[1]+2, self.shape[1]+2), np.float32)})
+
+    def allocate_textures(self):
+        self.d_image_tex = pyopencl.Image(
+                self.ctx,
+                mf.READ_ONLY | mf.USE_HOST_PTR,
+                pyopencl.ImageFormat(
+                    pyopencl.channel_order.INTENSITY,
+                    pyopencl.channel_type.FLOAT
+                ), hostbuf=np.ascontiguousarray(self._tmp_extended_img.T),
+            )
+
+    def transfer_to_texture(self, image):
+        image2 = image
+        if not(image.flags["C_CONTIGUOUS"] and image.dtype == np.float32):
+            image2 = np.ascontiguousarray(image)
+        if self.is_cpu:
+            # TODO: create NoneEvent
+            return self.transfer_to_slice(image2)
+            #~ return pyopencl.enqueue_copy(
+                        #~ self.queue,
+                        #~ self.cl_mem["d_slice"].data,
+                        #~ image2,
+                        #~ origin=(1, 1),
+                        #~ region=image.shape[::-1]
+                        #~ )
+        else:
+            return pyopencl.enqueue_copy(
+                       self.queue,
+                       self.d_image_tex,
+                       image2,
+                       origin=(1, 1),
+                       region=image.shape[::-1]
+                   )
+
+    def transfer_device_to_texture(self, d_image):
+        if self.is_cpu:
+            # TODO this copy should not be necessary
+            return self.cpy2d_to_slice(d_image)
+        else:
+            return pyopencl.enqueue_copy(
+                self.queue,
+                self.d_image_tex,
+                d_image,
+                offset=0,
+                origin=(1, 1),
+                region=(int(self.shape[1]), int(self.shape[0]))#self.shape[::-1] # pyopencl <= 2015.2
+            )
+
+    def transfer_to_slice(self, image):
+        image2 = np.zeros((image.shape[0]+2, image.shape[1]+2), dtype=np.float32)
+        image2[1:-1, 1:-1] = image.astype(np.float32)
+        self.cl_mem["d_slice"].set(image2)
+
+    def proj_precomputations(self):
+        beginPos = np.zeros((2, self._dimrecy), dtype=np.int32)
+        strideJoseph = np.zeros((2, self._dimrecy), dtype=np.int32)
+        strideLine = np.zeros((2, self._dimrecy), dtype=np.int32)
+        cos_angles = np.cos(self.angles2)
+        sin_angles = np.sin(self.angles2)
+        dimslice = self.shape[1]
+
+        M1 = np.abs(cos_angles) > 0.70710678
+        M1b = np.logical_not(M1)
+        M2 = cos_angles > 0
+        M2b = np.logical_not(M2)
+        M3 = sin_angles > 0
+        M3b = np.logical_not(M3)
+        case1 = M1 * M2
+        case2 = M1 * M2b
+        case3 = M1b * M3
+        case4 = M1b * M3b
+
+        beginPos[0][case1] = 0
+        beginPos[1][case1] = 0
+        strideJoseph[0][case1] = 1
+        strideJoseph[1][case1] = 0
+        strideLine[0][case1] = 0
+        strideLine[1][case1] = 1
+
+        beginPos[0][case2] = dimslice-1
+        beginPos[1][case2] = dimslice-1
+        strideJoseph[0][case2] = -1
+        strideJoseph[1][case2] = 0
+        strideLine[0][case2] = 0
+        strideLine[1][case2] = -1
+
+        beginPos[0][case3] = dimslice-1
+        beginPos[1][case3] = 0
+        strideJoseph[0][case3] = 0
+        strideJoseph[1][case3] = 1
+        strideLine[0][case3] = -1
+        strideLine[1][case3] = 0
+
+        beginPos[0][case4] = 0
+        beginPos[1][case4] = dimslice-1
+        strideJoseph[0][case4] = 0
+        strideJoseph[1][case4] = -1
+        strideLine[0][case4] = 1
+        strideLine[1][case4] = 0
+
+        # For debug purpose
+        #~ self.beginPos = beginPos
+        #~ self.strideJoseph = strideJoseph
+        #~ self.strideLine = strideLine
+        #
+
+        pyopencl.enqueue_copy(self.queue, self.cl_mem["d_beginPos"], beginPos)
+        pyopencl.enqueue_copy(self.queue, self.cl_mem["d_strideJoseph"], strideJoseph)
+        pyopencl.enqueue_copy(self.queue, self.cl_mem["d_strideLine"], strideLine)
+
+    def _get_local_mem(self):
+        return pyopencl.LocalMemory(self.local_mem)  # constant for all image sizes
+
+    def cpy2d_to_sino(self, dst):
+        ndrange = (int(self.dwidth), int(self.nprojs)) # pyopencl < 2015.2
+        sino_shape_ocl = np.int32(ndrange)
+        wg = None
+        kernel_args = (
+            dst.data,
+            self._d_sino,
+            np.int32(self.dwidth),
+            np.int32(self._dimrecx),
+            np.int32((0, 0)),
+            np.int32((0, 0)),
+            sino_shape_ocl
+        )
+        return self.kernels.cpy2d(self.queue, ndrange, wg, *kernel_args)
+
+    def cpy2d_to_slice(self, src):
+        """
+        copy a Nx * Ny slice to self.d_slice which is (Nx+2)*(Ny+2)
+        """
+        ndrange = (int(self.shape[1]), int(self.shape[0])) #self.shape[::-1] # pyopencl < 2015.2
+        wg = None
+        slice_shape_ocl = np.int32(ndrange)
+        kernel_args = (
+            self.cl_mem["d_slice"].data,
+            src,
+            np.int32(self.shape[1]+2),
+            np.int32(self.shape[1]),
+            np.int32((1, 1)),
+            np.int32((0, 0)),
+            slice_shape_ocl
+        )
+        return self.kernels.cpy2d(self.queue, ndrange, wg, *kernel_args)
+
+    def projection(self, image=None, dst=None):
+        """Perform the projection on an input image
+
+        :param image: Image to project
+        :return: A sinogram
+        """
+        events = []
+        with self.sem:
+            if image is not None:
+                assert image.ndim == 2, "Treat only 2D images"
+                assert image.shape[0] == self.shape[0], "image shape is OK"
+                assert image.shape[1] == self.shape[1], "image shape is OK"
+                if not(self.is_cpu):
+                    self.transfer_to_texture(image)
+                    slice_ref = self.d_image_tex
+                else:
+                    self.transfer_to_slice(image)
+                    slice_ref = self.cl_mem["d_slice"].data
+            else:
+                if self.is_cpu:
+                    slice_ref = self.cl_mem["d_slice"].data
+                else:
+                    slice_ref = self.d_image_tex
+
+            kernel_args = (
+                self._d_sino,
+                slice_ref,
+                np.int32(self.shape[1]),
+                np.int32(self.dwidth),
+                self.cl_mem["d_angles"],
+                np.float32(self.axis_pos0),
+                self.cl_mem["d_axis_corrections"].data,  # TODO custom
+                self.cl_mem["d_beginPos"],
+                self.cl_mem["d_strideJoseph"],
+                self.cl_mem["d_strideLine"],
+                np.int32(self.nprojs),
+                self._dimrecx,
+                self._dimrecy,
+                self.offset_x,
+                self.offset_y,
+                np.int32(1),  # josephnoclip, 1 by default
+                np.int32(self.normalize)
+            )
+
+            # Call the kernel
+            if self.is_cpu:
+                event_pj = self.kernels.forward_kernel_cpu(
+                    self.queue,
+                    self.ndrange,
+                    self.wg,
+                    *kernel_args
+                )
+            else:
+                event_pj = self.kernels.forward_kernel(
+                    self.queue,
+                    self.ndrange,
+                    self.wg,
+                    *kernel_args
+                )
+            events.append(EventDescription("projection", event_pj))
+            if dst is None:
+                self._ex_sino[:] = 0
+                ev = pyopencl.enqueue_copy(self.queue, self._ex_sino, self._d_sino)
+                events.append(EventDescription("copy D->H result", ev))
+                ev.wait()
+                res = np.copy(self._ex_sino[:self.nprojs, :self.dwidth])
+            else:
+                ev = self.cpy2d_to_sino(dst)
+                events.append(EventDescription("copy D->D result", ev))
+                ev.wait()
+                res = dst
+        # /with self.sem
+        if self.profile:
+            self.events += events
+        #~ res = self._ex_sino
+        return res
+
+    __call__ = projection