summaryrefslogtreecommitdiff
path: root/silx/opencl/projection.py
diff options
context:
space:
mode:
Diffstat (limited to 'silx/opencl/projection.py')
-rw-r--r--silx/opencl/projection.py419
1 files changed, 419 insertions, 0 deletions
diff --git a/silx/opencl/projection.py b/silx/opencl/projection.py
new file mode 100644
index 0000000..0ebe9bc
--- /dev/null
+++ b/silx/opencl/projection.py
@@ -0,0 +1,419 @@
+#!/usr/bin/env python
+# coding: utf-8
+# /*##########################################################################
+#
+# Copyright (c) 2016 European Synchrotron Radiation Facility
+#
+# Permission is hereby granted, free of charge, to any person obtaining a copy
+# of this software and associated documentation files (the "Software"), to deal
+# in the Software without restriction, including without limitation the rights
+# to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
+# copies of the Software, and to permit persons to whom the Software is
+# furnished to do so, subject to the following conditions:
+#
+# The above copyright notice and this permission notice shall be included in
+# all copies or substantial portions of the Software.
+#
+# THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+# IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+# FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
+# AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+# LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
+# OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN
+# THE SOFTWARE.
+#
+# ###########################################################################*/
+"""Module for tomographic projector on the GPU"""
+
+from __future__ import absolute_import, print_function, with_statement, division
+
+__authors__ = ["A. Mirone, P. Paleo"]
+__license__ = "MIT"
+__date__ = "26/06/2017"
+
+import logging
+import numpy as np
+
+from .common import pyopencl
+from .processing import EventDescription, OpenclProcessing, BufferDescription
+from .backprojection import _sizeof, _idivup
+
+if pyopencl:
+ mf = pyopencl.mem_flags
+ import pyopencl.array as parray
+else:
+ raise ImportError("pyopencl is not installed")
+logger = logging.getLogger(__name__)
+
+
+class Projection(OpenclProcessing):
+ """
+ A class for performing a tomographic projection (Radon Transform) using
+ OpenCL
+ """
+ kernel_files = ["proj.cl", "array_utils.cl"]
+
+ def __init__(self, slice_shape, angles, axis_position=None,
+ detector_width=None, normalize=False, ctx=None,
+ devicetype="all", platformid=None, deviceid=None,
+ profile=False
+ ):
+ """Constructor of the OpenCL projector.
+
+ :param slice_shape: shape of the slice: (num_rows, num_columns).
+ :param angles: Either an integer number of angles, or a list of custom
+ angles values in radian.
+ :param axis_position: Optional, axis position. Default is
+ `(shape[1]-1)/2.0`.
+ :param detector_width: Optional, detector width in pixels.
+ If detector_width > slice_shape[1], the
+ projection data will be surrounded with zeros.
+ Using detector_width < slice_shape[1] might
+ result in a local tomography setup.
+ :param normalize: Optional, normalization. If set, the sinograms are
+ multiplied by the factor pi/(2*nprojs).
+ :param ctx: actual working context, left to None for automatic
+ initialization from device type or platformid/deviceid
+ :param devicetype: type of device, can be "CPU", "GPU", "ACC" or "ALL"
+ :param platformid: integer with the platform_identifier, as given by
+ clinfo
+ :param deviceid: Integer with the device identifier, as given by clinfo
+ :param profile: switch on profiling to be able to profile at the kernel
+ level, store profiling elements (makes code slightly
+ slower)
+ """
+ # OS X enforces a workgroup size of 1 when the kernel has synchronization barriers
+ # if sys.platform.startswith('darwin'): # assuming no discrete GPU
+ # raise NotImplementedError("Backprojection is not implemented on CPU for OS X yet")
+
+ OpenclProcessing.__init__(self, ctx=ctx, devicetype=devicetype,
+ platformid=platformid, deviceid=deviceid,
+ profile=profile)
+ self.shape = slice_shape
+ self.axis_pos = axis_position
+ self.angles = angles
+ self.dwidth = detector_width
+ self.normalize = normalize
+
+ # Default values
+ if self.axis_pos is None:
+ self.axis_pos = (self.shape[1] - 1) / 2.
+ if self.dwidth is None:
+ self.dwidth = self.shape[1]
+ if not(np.iterable(self.angles)):
+ if self.angles is None:
+ self.nprojs = self.shape[0]
+ else:
+ self.nprojs = self.angles
+ self.angles = np.linspace(start=0,
+ stop=np.pi,
+ num=self.nprojs,
+ endpoint=False).astype(dtype=np.float32)
+ else:
+ self.nprojs = len(self.angles)
+ self.offset_x = -np.float32((self.shape[1]-1)/2. - self.axis_pos) # TODO: custom
+ self.offset_y = -np.float32((self.shape[0]-1)/2. - self.axis_pos) # TODO: custom
+ # Reset axis_pos once offset are computed
+ self.axis_pos0 = np.float((self.shape[1]-1)/2.)
+
+ # Workgroup, ndrange and shared size
+ self.dimgrid_x = _idivup(self.dwidth, 16)
+ self.dimgrid_y = _idivup(self.nprojs, 16)
+ self._dimrecx = np.int32(self.dimgrid_x * 16)
+ self._dimrecy = np.int32(self.dimgrid_y * 16)
+ self.local_mem = 16 * 7 * _sizeof(np.float32)
+ self.wg = (16, 16)
+ self.ndrange = (
+ int(self.dimgrid_x) * self.wg[0], # int(): pyopencl <= 2015.1
+ int(self.dimgrid_y) * self.wg[1] # int(): pyopencl <= 2015.1
+ )
+
+ self.is_cpu = False
+ if self.device.type == "CPU":
+ self.is_cpu = True
+
+ # Allocate memory
+ self.buffers = [
+ BufferDescription("_d_sino", self._dimrecx * self._dimrecy, np.float32, mf.READ_WRITE),
+ BufferDescription("d_angles", self._dimrecy, np.float32, mf.READ_ONLY),
+ BufferDescription("d_beginPos", self._dimrecy * 2, np.int32, mf.READ_ONLY),
+ BufferDescription("d_strideJoseph", self._dimrecy * 2, np.int32, mf.READ_ONLY),
+ BufferDescription("d_strideLine", self._dimrecy * 2, np.int32, mf.READ_ONLY),
+ ]
+ self.add_to_cl_mem(
+ {
+ "d_axis_corrections": parray.zeros(self.queue,
+ self.nprojs, np.float32)
+ }
+ )
+ self._tmp_extended_img = np.zeros((self.shape[0]+2, self.shape[1]+2),
+ dtype=np.float32)
+ if self.is_cpu:
+ self.allocate_slice()
+ else:
+ self.allocate_textures()
+ self.allocate_buffers()
+ self._ex_sino = np.zeros((self._dimrecy, self._dimrecx),
+ dtype=np.float32)
+ if self.is_cpu:
+ self.cl_mem["d_slice"].fill(0.)
+ # enqueue_fill_buffer has issues if opencl 1.2 is not present
+ #~ pyopencl.enqueue_fill_buffer(
+ #~ self.queue,
+ #~ self.cl_mem["d_slice"],
+ #~ np.float32(0),
+ #~ 0,
+ #~ self._tmp_extended_img.size * _sizeof(np.float32)
+ #~ )
+ # Precomputations
+ self.compute_angles()
+ self.proj_precomputations()
+ self.cl_mem["d_axis_corrections"].fill(0.)
+ # enqueue_fill_buffer has issues if opencl 1.2 is not present
+ #~ pyopencl.enqueue_fill_buffer(
+ #~ self.queue,
+ #~ self.cl_mem["d_axis_corrections"],
+ #~ np.float32(0),
+ #~ 0,
+ #~ self.nprojs*_sizeof(np.float32)
+ #~ )
+ # Shorthands
+ self._d_sino = self.cl_mem["_d_sino"]
+
+ OpenclProcessing.compile_kernels(self, self.kernel_files)
+ # check that workgroup can actually be (16, 16)
+ self.check_workgroup_size("forward_kernel_cpu")
+
+ def compute_angles(self):
+ angles2 = np.zeros(self._dimrecy, dtype=np.float32) # dimrecy != num_projs
+ angles2[:self.nprojs] = np.copy(self.angles)
+ angles2[self.nprojs:] = angles2[self.nprojs-1]
+ self.angles2 = angles2
+ pyopencl.enqueue_copy(self.queue, self.cl_mem["d_angles"], angles2)
+
+ def allocate_slice(self):
+ self.add_to_cl_mem({"d_slice": parray.zeros(self.queue, (self.shape[1]+2, self.shape[1]+2), np.float32)})
+
+ def allocate_textures(self):
+ self.d_image_tex = pyopencl.Image(
+ self.ctx,
+ mf.READ_ONLY | mf.USE_HOST_PTR,
+ pyopencl.ImageFormat(
+ pyopencl.channel_order.INTENSITY,
+ pyopencl.channel_type.FLOAT
+ ), hostbuf=np.ascontiguousarray(self._tmp_extended_img.T),
+ )
+
+ def transfer_to_texture(self, image):
+ image2 = image
+ if not(image.flags["C_CONTIGUOUS"] and image.dtype == np.float32):
+ image2 = np.ascontiguousarray(image)
+ if self.is_cpu:
+ # TODO: create NoneEvent
+ return self.transfer_to_slice(image2)
+ #~ return pyopencl.enqueue_copy(
+ #~ self.queue,
+ #~ self.cl_mem["d_slice"].data,
+ #~ image2,
+ #~ origin=(1, 1),
+ #~ region=image.shape[::-1]
+ #~ )
+ else:
+ return pyopencl.enqueue_copy(
+ self.queue,
+ self.d_image_tex,
+ image2,
+ origin=(1, 1),
+ region=image.shape[::-1]
+ )
+
+ def transfer_device_to_texture(self, d_image):
+ if self.is_cpu:
+ # TODO this copy should not be necessary
+ return self.cpy2d_to_slice(d_image)
+ else:
+ return pyopencl.enqueue_copy(
+ self.queue,
+ self.d_image_tex,
+ d_image,
+ offset=0,
+ origin=(1, 1),
+ region=(int(self.shape[1]), int(self.shape[0]))#self.shape[::-1] # pyopencl <= 2015.2
+ )
+
+ def transfer_to_slice(self, image):
+ image2 = np.zeros((image.shape[0]+2, image.shape[1]+2), dtype=np.float32)
+ image2[1:-1, 1:-1] = image.astype(np.float32)
+ self.cl_mem["d_slice"].set(image2)
+
+ def proj_precomputations(self):
+ beginPos = np.zeros((2, self._dimrecy), dtype=np.int32)
+ strideJoseph = np.zeros((2, self._dimrecy), dtype=np.int32)
+ strideLine = np.zeros((2, self._dimrecy), dtype=np.int32)
+ cos_angles = np.cos(self.angles2)
+ sin_angles = np.sin(self.angles2)
+ dimslice = self.shape[1]
+
+ M1 = np.abs(cos_angles) > 0.70710678
+ M1b = np.logical_not(M1)
+ M2 = cos_angles > 0
+ M2b = np.logical_not(M2)
+ M3 = sin_angles > 0
+ M3b = np.logical_not(M3)
+ case1 = M1 * M2
+ case2 = M1 * M2b
+ case3 = M1b * M3
+ case4 = M1b * M3b
+
+ beginPos[0][case1] = 0
+ beginPos[1][case1] = 0
+ strideJoseph[0][case1] = 1
+ strideJoseph[1][case1] = 0
+ strideLine[0][case1] = 0
+ strideLine[1][case1] = 1
+
+ beginPos[0][case2] = dimslice-1
+ beginPos[1][case2] = dimslice-1
+ strideJoseph[0][case2] = -1
+ strideJoseph[1][case2] = 0
+ strideLine[0][case2] = 0
+ strideLine[1][case2] = -1
+
+ beginPos[0][case3] = dimslice-1
+ beginPos[1][case3] = 0
+ strideJoseph[0][case3] = 0
+ strideJoseph[1][case3] = 1
+ strideLine[0][case3] = -1
+ strideLine[1][case3] = 0
+
+ beginPos[0][case4] = 0
+ beginPos[1][case4] = dimslice-1
+ strideJoseph[0][case4] = 0
+ strideJoseph[1][case4] = -1
+ strideLine[0][case4] = 1
+ strideLine[1][case4] = 0
+
+ # For debug purpose
+ #~ self.beginPos = beginPos
+ #~ self.strideJoseph = strideJoseph
+ #~ self.strideLine = strideLine
+ #
+
+ pyopencl.enqueue_copy(self.queue, self.cl_mem["d_beginPos"], beginPos)
+ pyopencl.enqueue_copy(self.queue, self.cl_mem["d_strideJoseph"], strideJoseph)
+ pyopencl.enqueue_copy(self.queue, self.cl_mem["d_strideLine"], strideLine)
+
+ def _get_local_mem(self):
+ return pyopencl.LocalMemory(self.local_mem) # constant for all image sizes
+
+ def cpy2d_to_sino(self, dst):
+ ndrange = (int(self.dwidth), int(self.nprojs)) # pyopencl < 2015.2
+ sino_shape_ocl = np.int32(ndrange)
+ wg = None
+ kernel_args = (
+ dst.data,
+ self._d_sino,
+ np.int32(self.dwidth),
+ np.int32(self._dimrecx),
+ np.int32((0, 0)),
+ np.int32((0, 0)),
+ sino_shape_ocl
+ )
+ return self.kernels.cpy2d(self.queue, ndrange, wg, *kernel_args)
+
+ def cpy2d_to_slice(self, src):
+ """
+ copy a Nx * Ny slice to self.d_slice which is (Nx+2)*(Ny+2)
+ """
+ ndrange = (int(self.shape[1]), int(self.shape[0])) #self.shape[::-1] # pyopencl < 2015.2
+ wg = None
+ slice_shape_ocl = np.int32(ndrange)
+ kernel_args = (
+ self.cl_mem["d_slice"].data,
+ src,
+ np.int32(self.shape[1]+2),
+ np.int32(self.shape[1]),
+ np.int32((1, 1)),
+ np.int32((0, 0)),
+ slice_shape_ocl
+ )
+ return self.kernels.cpy2d(self.queue, ndrange, wg, *kernel_args)
+
+ def projection(self, image=None, dst=None):
+ """Perform the projection on an input image
+
+ :param image: Image to project
+ :return: A sinogram
+ """
+ events = []
+ with self.sem:
+ if image is not None:
+ assert image.ndim == 2, "Treat only 2D images"
+ assert image.shape[0] == self.shape[0], "image shape is OK"
+ assert image.shape[1] == self.shape[1], "image shape is OK"
+ if not(self.is_cpu):
+ self.transfer_to_texture(image)
+ slice_ref = self.d_image_tex
+ else:
+ self.transfer_to_slice(image)
+ slice_ref = self.cl_mem["d_slice"].data
+ else:
+ if self.is_cpu:
+ slice_ref = self.cl_mem["d_slice"].data
+ else:
+ slice_ref = self.d_image_tex
+
+ kernel_args = (
+ self._d_sino,
+ slice_ref,
+ np.int32(self.shape[1]),
+ np.int32(self.dwidth),
+ self.cl_mem["d_angles"],
+ np.float32(self.axis_pos0),
+ self.cl_mem["d_axis_corrections"].data, # TODO custom
+ self.cl_mem["d_beginPos"],
+ self.cl_mem["d_strideJoseph"],
+ self.cl_mem["d_strideLine"],
+ np.int32(self.nprojs),
+ self._dimrecx,
+ self._dimrecy,
+ self.offset_x,
+ self.offset_y,
+ np.int32(1), # josephnoclip, 1 by default
+ np.int32(self.normalize)
+ )
+
+ # Call the kernel
+ if self.is_cpu:
+ event_pj = self.kernels.forward_kernel_cpu(
+ self.queue,
+ self.ndrange,
+ self.wg,
+ *kernel_args
+ )
+ else:
+ event_pj = self.kernels.forward_kernel(
+ self.queue,
+ self.ndrange,
+ self.wg,
+ *kernel_args
+ )
+ events.append(EventDescription("projection", event_pj))
+ if dst is None:
+ self._ex_sino[:] = 0
+ ev = pyopencl.enqueue_copy(self.queue, self._ex_sino, self._d_sino)
+ events.append(EventDescription("copy D->H result", ev))
+ ev.wait()
+ res = np.copy(self._ex_sino[:self.nprojs, :self.dwidth])
+ else:
+ ev = self.cpy2d_to_sino(dst)
+ events.append(EventDescription("copy D->D result", ev))
+ ev.wait()
+ res = dst
+ # /with self.sem
+ if self.profile:
+ self.events += events
+ #~ res = self._ex_sino
+ return res
+
+ __call__ = projection