diff options
Diffstat (limited to 'silx/opencl/backprojection.py')
-rw-r--r-- | silx/opencl/backprojection.py | 467 |
1 files changed, 188 insertions, 279 deletions
diff --git a/silx/opencl/backprojection.py b/silx/opencl/backprojection.py index ca7244e..c257872 100644 --- a/silx/opencl/backprojection.py +++ b/silx/opencl/backprojection.py @@ -29,37 +29,30 @@ from __future__ import absolute_import, print_function, with_statement, division __authors__ = ["A. Mirone, P. Paleo"] __license__ = "MIT" -__date__ = "19/01/2018" +__date__ = "25/01/2019" import logging -import numpy +import numpy as np from .common import pyopencl from .processing import EventDescription, OpenclProcessing, BufferDescription -from .utils import nextpower as nextpow2 +from .sinofilter import SinoFilter +from .sinofilter import fourier_filter as fourier_filter_ +from ..utils.deprecation import deprecated if pyopencl: mf = pyopencl.mem_flags import pyopencl.array as parray else: - raise ImportError("pyopencl is not installed") + raise ImportError("Please install pyopencl in order to use opencl backprojection") logger = logging.getLogger(__name__) -# put in .common ? -try: - from pyfft.cl import Plan as pyfft_Plan - _has_pyfft = True -except ImportError: - _has_pyfft = False -# For silx v0.6 we disable the use FFT on GPU. -_has_pyfft = False - def _sizeof(Type): """ return the size (in bytes) of a scalar type, like the C behavior """ - return numpy.dtype(Type).itemsize + return np.dtype(Type).itemsize def _idivup(a, b): @@ -69,53 +62,14 @@ def _idivup(a, b): return (a + (b - 1)) // b -def fourier_filter(sino, filter_=None, fft_size=None): - """Simple numpy based implementation of fourier space filter - - :param sino: of shape shape = (num_projs, num_bins) - :param filter: filter function to apply in fourier space - :fft_size: size on which perform the fft. May be larger than the sino array - :return: filtered sinogram - """ - assert sino.ndim == 2 - num_projs, num_bins = sino.shape - if fft_size is None: - fft_size = nextpow2(num_bins * 2 - 1) - else: - assert fft_size >= num_bins - if fft_size == num_bins: - sino_zeropadded = sino.astype(numpy.float32) - else: - sino_zeropadded = numpy.zeros((num_projs, fft_size), - dtype=numpy.complex64) - sino_zeropadded[:, :num_bins] = sino.astype(numpy.float32) - - if filter_ is None: - h = numpy.zeros(fft_size, dtype=numpy.float32) - L2 = fft_size // 2 + 1 - h[0] = 1 / 4. - j = numpy.linspace(1, L2, L2 // 2, False) - h[1:L2:2] = -1. / (numpy.pi ** 2 * j ** 2) - h[L2:] = numpy.copy(h[1:L2 - 1][::-1]) - filter_ = numpy.fft.fft(h).astype(numpy.complex64) - - # Linear convolution - sino_f = numpy.fft.fft(sino, fft_size) - sino_f = sino_f * filter_ - sino_filtered = numpy.fft.ifft(sino_f)[:, :num_bins].real - # Send the filtered sinogram to device - return numpy.ascontiguousarray(sino_filtered.real, dtype=numpy.float32) - - class Backprojection(OpenclProcessing): """A class for performing the backprojection using OpenCL""" kernel_files = ["backproj.cl", "array_utils.cl"] - if _has_pyfft: - kernel_files.append("backproj_helper.cl") def __init__(self, sino_shape, slice_shape=None, axis_position=None, angles=None, filter_name=None, ctx=None, devicetype="all", - platformid=None, deviceid=None, profile=False): + platformid=None, deviceid=None, profile=False, + extra_options=None): """Constructor of the OpenCL (filtered) backprojection :param sino_shape: shape of the sinogram. The sinogram is in the format @@ -139,6 +93,8 @@ class Backprojection(OpenclProcessing): :param profile: switch on profiling to be able to profile at the kernel level, store profiling elements (makes code slightly slower) + :param extra_options: Advanced extra options in the form of a dict. + Current options are: cutoff, use_numpy_fft """ # OS X enforces a workgroup size of 1 when the kernel has # synchronization barriers if sys.platform.startswith('darwin'): @@ -148,10 +104,32 @@ class Backprojection(OpenclProcessing): OpenclProcessing.__init__(self, ctx=ctx, devicetype=devicetype, platformid=platformid, deviceid=deviceid, profile=profile) - self.shape = sino_shape - self.num_bins = numpy.int32(sino_shape[1]) - self.num_projs = numpy.int32(sino_shape[0]) + self._init_geometry(sino_shape, slice_shape, angles, axis_position, + extra_options) + self._allocate_memory() + self._compute_angles() + self._init_kernels() + self._init_filter(filter_name) + + def _init_geometry(self, sino_shape, slice_shape, angles, axis_position, + extra_options): + """Geometry Initialization + + :param sino_shape: shape of the sinogram. The sinogram is in the format + (n_b, n_a) where n_b is the number of detector bins + and n_a is the number of angles. + :param slice_shape: shape of the reconstructed slice. By + default, it is a square slice where the dimension + is the "x dimension" of the sinogram (number of + bins). + :param angles: list of projection angles in radian. + :param axis_position: axis position + :param dict extra_options: Advanced extra options + """ + self.shape = sino_shape + self.num_bins = np.int32(sino_shape[1]) + self.num_projs = np.int32(sino_shape[0]) self.angles = angles if slice_shape is None: self.slice_shape = (self.num_bins, self.num_bins) @@ -161,152 +139,161 @@ class Backprojection(OpenclProcessing): _idivup(self.slice_shape[0], 32) * 32, _idivup(self.slice_shape[1], 32) * 32 ) - self.slice = numpy.zeros(self.dimrec_shape, dtype=numpy.float32) - self.filter_name = filter_name if filter_name else "Ram-Lak" if axis_position: - self.axis_pos = numpy.float32(axis_position) + self.axis_pos = np.float32(axis_position) else: - self.axis_pos = numpy.float32((sino_shape[1] - 1.) / 2) + self.axis_pos = np.float32((sino_shape[1] - 1.) / 2) self.axis_array = None # TODO: add axis correction front-end + self._init_extra_options(extra_options) + def _init_extra_options(self, extra_options): + """Backprojection extra option initialization + + :param dict extra_options: Advanced extra options + """ + self.extra_options = { + "cutoff": 1., + "use_numpy_fft": False, + } + if extra_options is not None: + self.extra_options.update(extra_options) + + def _allocate_memory(self): + # Host memory + self.slice = np.zeros(self.dimrec_shape, dtype=np.float32) self.is_cpu = False if self.device.type == "CPU": self.is_cpu = True - self.compute_fft_plans() + # Device memory self.buffers = [ - BufferDescription("_d_slice", numpy.prod(self.dimrec_shape), numpy.float32, mf.READ_WRITE), - BufferDescription("d_sino", self.num_projs * self.num_bins, numpy.float32, mf.READ_WRITE), # before transferring to texture (if available) - BufferDescription("d_cos", self.num_projs, numpy.float32, mf.READ_ONLY), - BufferDescription("d_sin", self.num_projs, numpy.float32, mf.READ_ONLY), - BufferDescription("d_axes", self.num_projs, numpy.float32, mf.READ_ONLY), - ] - self.allocate_buffers() - if not(self.is_cpu): - self.allocate_textures() - self.compute_filter() - if self.pyfft_plan: - self.add_to_cl_mem({ - "d_filter": self.d_filter, - "d_sino_z": self.d_sino_z - }) + BufferDescription("_d_slice", self.dimrec_shape, np.float32, mf.READ_WRITE), + BufferDescription("d_sino", self.shape, np.float32, mf.READ_WRITE), # before transferring to texture (if available) + BufferDescription("d_cos", (self.num_projs,), np.float32, mf.READ_ONLY), + BufferDescription("d_sin", (self.num_projs,), np.float32, mf.READ_ONLY), + BufferDescription("d_axes", (self.num_projs,), np.float32, mf.READ_ONLY), + ] + self.allocate_buffers(use_array=True) self.d_sino = self.cl_mem["d_sino"] # shorthand - self.compute_angles() - self.local_mem = 256 * 3 * _sizeof(numpy.float32) # constant for all image sizes + # Texture memory (if relevant) + if not(self.is_cpu): + self._allocate_textures() + + # Local memory + self.local_mem = 256 * 3 * _sizeof(np.float32) # constant for all image sizes + + def _compute_angles(self): + if self.angles is None: + self.angles = np.linspace(0, np.pi, self.num_projs, False) + h_cos = np.cos(self.angles).astype(np.float32) + h_sin = np.sin(self.angles).astype(np.float32) + self.cl_mem["d_cos"][:] = h_cos[:] + self.cl_mem["d_sin"][:] = h_sin[:] + if self.axis_array: + self.cl_mem["d_axes"][:] = self.axis_array.astype(np.float32)[:] + else: + self.cl_mem["d_axes"][:] = np.ones(self.num_projs, dtype="f") * self.axis_pos + + def _init_kernels(self): OpenclProcessing.compile_kernels(self, self.kernel_files) # check that workgroup can actually be (16, 16) self.compiletime_workgroup_size = self.kernels.max_workgroup_size("backproj_cpu_kernel") # Workgroup and ndrange sizes are always the same self.wg = (16, 16) self.ndrange = ( - _idivup(int(self.dimrec_shape[1]), 32) * self.wg[0], # int(): pyopencl <= 2015.1 - _idivup(int(self.dimrec_shape[0]), 32) * self.wg[1] # int(): pyopencl <= 2015.1 + _idivup(int(self.dimrec_shape[1]), 32) * self.wg[0], + _idivup(int(self.dimrec_shape[0]), 32) * self.wg[1] ) - - def compute_angles(self): - if self.angles is None: - self.angles = numpy.linspace(0, numpy.pi, self.num_projs, False) - h_cos = numpy.cos(self.angles).astype(numpy.float32) - h_sin = numpy.sin(self.angles).astype(numpy.float32) - pyopencl.enqueue_copy(self.queue, self.cl_mem["d_cos"], h_cos) - pyopencl.enqueue_copy(self.queue, self.cl_mem["d_sin"], h_sin) - if self.axis_array: - pyopencl.enqueue_copy(self.queue, - self.cl_mem["d_axes"], - self.axis_array.astype(numpy.float32)) + # Prepare arguments for the kernel call + if self.is_cpu: + d_sino_ref = self.d_sino.data else: - pyopencl.enqueue_copy(self.queue, - self.cl_mem["d_axes"], - numpy.ones(self.num_projs, dtype=numpy.float32) * self.axis_pos) + d_sino_ref = self.d_sino_tex + self._backproj_kernel_args = ( + # num of projections (int32) + self.num_projs, + # num of bins (int32) + self.num_bins, + # axis position (float32) + self.axis_pos, + # d_slice (__global float32*) + self.cl_mem["_d_slice"].data, + # d_sino (__read_only image2d_t or float*) + d_sino_ref, + # gpu_offset_x (float32) # TODO custom ? + -np.float32((self.num_bins - 1) / 2. - self.axis_pos), + # gpu_offset_y (float32) # TODO custom ? + -np.float32((self.num_bins - 1) / 2. - self.axis_pos), + # d_cos (__global float32*) + self.cl_mem["d_cos"].data, + # d_sin (__global float32*) + self.cl_mem["d_sin"].data, + # d_axis (__global float32*) + self.cl_mem["d_axes"].data, + # shared mem (__local float32*) + self._get_local_mem() + ) - def allocate_textures(self): + def _allocate_textures(self): """ Allocate the texture for the sinogram. """ self.d_sino_tex = pyopencl.Image( - self.ctx, - mf.READ_ONLY | mf.USE_HOST_PTR, - pyopencl.ImageFormat( - pyopencl.channel_order.INTENSITY, - pyopencl.channel_type.FLOAT - ), - hostbuf=numpy.zeros(self.shape[::-1], dtype=numpy.float32) - ) - - def compute_fft_plans(self): - """ - If pyfft is installed, prepare a batched 1D FFT plan for the filtering - of FBP + self.ctx, + mf.READ_ONLY | mf.USE_HOST_PTR, + pyopencl.ImageFormat( + pyopencl.channel_order.INTENSITY, + pyopencl.channel_type.FLOAT + ), + hostbuf=np.zeros(self.shape[::-1], dtype=np.float32) + ) - """ - self.fft_size = nextpow2(self.num_bins * 2 - 1) - if _has_pyfft: - logger.debug("pyfft is available. Computing FFT plans...") - # batched 1D transform - self.pyfft_plan = pyfft_Plan(self.fft_size, queue=self.queue, - wait_for_finish=True) - self.d_sino_z = parray.zeros(self.queue, - (self.num_projs, self.fft_size), - dtype=numpy.complex64) - logger.debug("... done") - else: - logger.debug("pyfft not available, using numpy.fft") - self.pyfft_plan = None - # TODO: fall-back to fftw if present ? + def _init_filter(self, filter_name): + """Filter initialization - def compute_filter(self): - """ - Compute the filter for FBP + :param str filter_name: filter name """ - if self.filter_name == "Ram-Lak": - L = self.fft_size - h = numpy.zeros(L, dtype=numpy.float32) - L2 = L // 2 + 1 - h[0] = 1 / 4. - j = numpy.linspace(1, L2, L2 // 2, False) - h[1:L2:2] = -1. / (numpy.pi ** 2 * j ** 2) - h[L2:] = numpy.copy(h[1:L2 - 1][::-1]) - else: - # TODO: other filters - raise ValueError("Filter %s is not available" % self.filter_name) - self.filter = h - if self.pyfft_plan: - self.d_filter = parray.to_device(self.queue, h.astype(numpy.complex64)) - self.pyfft_plan.execute(self.d_filter.data) - else: - self.filter = numpy.fft.fft(h).astype(numpy.complex64) - self.d_filter = None + self.filter_name = filter_name or "ram-lak" + self.sino_filter = SinoFilter( + self.shape, + ctx=self.ctx, + filter_name=self.filter_name, + extra_options=self.extra_options, + ) def _get_local_mem(self): return pyopencl.LocalMemory(self.local_mem) # constant for all image sizes - def cpy2d_to_slice(self, dst): - ndrange = (int(self.slice_shape[1]), int(self.slice_shape[0])) # pyopencl < 2015.2 - slice_shape_ocl = numpy.int32(ndrange) + def _cpy2d_to_slice(self, dst): + ndrange = (int(self.slice_shape[1]), int(self.slice_shape[0])) + slice_shape_ocl = np.int32(ndrange) wg = None kernel_args = ( dst.data, - self.cl_mem["_d_slice"], - numpy.int32(self.slice_shape[1]), - numpy.int32(self.dimrec_shape[1]), - numpy.int32((0, 0)), - numpy.int32((0, 0)), + self.cl_mem["_d_slice"].data, + np.int32(self.slice_shape[1]), + np.int32(self.dimrec_shape[1]), + np.int32((0, 0)), + np.int32((0, 0)), slice_shape_ocl ) return self.kernels.cpy2d(self.queue, ndrange, wg, *kernel_args) - def transfer_to_texture(self, sino): + def _transfer_to_texture(self, sino): + if isinstance(sino, parray.Array): + return self._transfer_device_to_texture(sino) sino2 = sino - if not(sino.flags["C_CONTIGUOUS"] and sino.dtype == numpy.float32): - sino2 = numpy.ascontiguousarray(sino, dtype=numpy.float32) + if not(sino.flags["C_CONTIGUOUS"] and sino.dtype == np.float32): + sino2 = np.ascontiguousarray(sino, dtype=np.float32) if self.is_cpu: ev = pyopencl.enqueue_copy( self.queue, - self.d_sino, + self.d_sino.data, sino2 ) what = "transfer filtered sino H->D buffer" + ev.wait() else: ev = pyopencl.enqueue_copy( self.queue, @@ -318,21 +305,22 @@ class Backprojection(OpenclProcessing): what = "transfer filtered sino H->D texture" return EventDescription(what, ev) - def transfer_device_to_texture(self, d_sino): + def _transfer_device_to_texture(self, d_sino): if self.is_cpu: if id(self.d_sino) == id(d_sino): return ev = pyopencl.enqueue_copy( self.queue, - self.d_sino, + self.d_sino.data, d_sino ) what = "transfer filtered sino D->D buffer" + ev.wait() else: ev = pyopencl.enqueue_copy( self.queue, self.d_sino_tex, - d_sino, + d_sino.data, offset=0, origin=(0, 0), region=self.shape[::-1] @@ -340,65 +328,35 @@ class Backprojection(OpenclProcessing): what = "transfer filtered sino D->D texture" return EventDescription(what, ev) - def backprojection(self, sino=None, dst=None): + def backprojection(self, sino, output=None): """Perform the backprojection on an input sinogram - :param sino: sinogram. If provided, it returns the plain backprojection. - :param dst: destination (pyopencl.Array). If provided, the result will be written in this array. + :param sino: sinogram. + :param output: optional, output slice. + If provided, the result will be written in this array. :return: backprojection of sinogram """ events = [] with self.sem: - - if sino is not None: # assuming numpy.ndarray - events.append(self.transfer_to_texture(sino)) - # Prepare arguments for the kernel call - if self.is_cpu: - d_sino_ref = self.d_sino - else: - d_sino_ref = self.d_sino_tex - kernel_args = ( - self.num_projs, # num of projections (int32) - self.num_bins, # num of bins (int32) - self.axis_pos, # axis position (float32) - self.cl_mem["_d_slice"], # d_slice (__global float32*) - d_sino_ref, # d_sino (__read_only image2d_t or float*) - numpy.float32(0), # gpu_offset_x (float32) - numpy.float32(0), # gpu_offset_y (float32) - self.cl_mem["d_cos"], # d_cos (__global float32*) - self.cl_mem["d_sin"], # d_sin (__global float32*) - self.cl_mem["d_axes"], # d_axis (__global float32*) - self._get_local_mem() # shared mem (__local float32*) - ) - # Call the kernel + events.append(self._transfer_to_texture(sino)) + # Call the backprojection kernel if self.is_cpu: kernel_to_call = self.kernels.backproj_cpu_kernel else: kernel_to_call = self.kernels.backproj_kernel - event_bpj = kernel_to_call( + kernel_to_call( self.queue, self.ndrange, self.wg, - *kernel_args + *self._backproj_kernel_args ) - if dst is None: - self.slice[:] = 0 - events.append(EventDescription("backprojection", event_bpj)) - ev = pyopencl.enqueue_copy(self.queue, self.slice, - self.cl_mem["_d_slice"]) - events.append(EventDescription("copy D->H result", ev)) - ev.wait() - res = numpy.copy(self.slice) - if self.dimrec_shape[0] > self.slice_shape[0] or self.dimrec_shape[1] > self.slice_shape[1]: - res = res[:self.slice_shape[0], :self.slice_shape[1]] - # if the slice is backprojected onto a bigger grid - if self.slice_shape[1] > self.num_bins: - res = res[:self.slice_shape[0], :self.slice_shape[1]] + # Return + if output is None: + res = self.cl_mem["_d_slice"].get() + res = res[:self.slice_shape[0], :self.slice_shape[1]] else: - ev = self.cpy2d_to_slice(dst) - events.append(EventDescription("copy D->D result", ev)) - ev.wait() - res = dst + res = output + self._cpy2d_to_slice(output) # /with self.sem if self.profile: @@ -406,83 +364,34 @@ class Backprojection(OpenclProcessing): return res - def filter_projections(self, sino, rescale=True): - """ - Performs the FBP on a given sinogram. - - :param sinogram: sinogram to (filter-)backproject - :param rescale: if True (default), the sinogram is multiplied with - (pi/n_projs) - """ - if sino.shape[0] != self.num_projs or sino.shape[1] != self.num_bins: - raise ValueError("Expected sinogram with (projs, bins) = (%d, %d)" % (self.num_projs, self.num_bins)) - if rescale: - sino = sino * numpy.pi / self.num_projs - events = [] - # if pyfft is available, all can be done on the device - if self.d_filter is not None: - - # Zero-pad the sinogram. - # TODO: this can be done on GPU with a "Memcpy2D": - # cl.enqueue_copy(queue, dst, src, host_origin=(0,0), buffer_origin=(0,0), region=shape, host_pitches=(sino.shape[1],), buffer_pitches=(self.fft_size,)) - # However it does not work properly, and raises an error for pyopencl < 2017.1 - sino_zeropadded = numpy.zeros((sino.shape[0], self.fft_size), dtype=numpy.complex64) - sino_zeropadded[:, :self.num_bins] = sino.astype(numpy.float32) - sino_zeropadded = numpy.ascontiguousarray(sino_zeropadded, dtype=numpy.complex64) - with self.sem: - # send to GPU - ev = pyopencl.enqueue_copy(self.queue, self.d_sino_z.data, sino_zeropadded) - events.append(EventDescription("Send sino H->D", ev)) - - # FFT (in-place) - self.pyfft_plan.execute(self.d_sino_z.data, batch=self.num_projs) - - # Multiply (complex-wise) with the the filter - ev = self.kernels.mult(self.queue, - tuple(int(i) for i in self.d_sino_z.shape[::-1]), - None, - self.d_sino_z.data, - self.d_filter.data, - numpy.int32(self.fft_size), - self.num_projs - ) - events.append(EventDescription("complex 2D-1D multiplication", ev)) - # Inverse FFT (in-place) - self.pyfft_plan.execute(self.d_sino_z.data, batch=self.num_projs, inverse=True) - # Copy the real part of d_sino_z[:, :self.num_bins] (complex64) to d_sino (float32) - ev = self.kernels.cpy2d_c2r(self.queue, self.shape[::-1], None, - self.d_sino, - self.d_sino_z.data, - self.num_bins, - self.num_projs, - numpy.int32(self.fft_size) - ) - events.append(EventDescription("conversion from complex padded sinogram to sinogram", ev)) - # debug -# ev.wait() -# h_sino = numpy.zeros(sino.shape, dtype=numpy.float32) -# ev = pyopencl.enqueue_copy(self.queue, h_sino, self.d_sino) -# ev.wait() -# numpy.save("/tmp/filtered_sinogram_%s.npy" % self.ctx.devices[0].platform.name.split()[0], h_sino) - events.append(self.transfer_device_to_texture(self.d_sino)) - # ------ - else: # no pyfft - sino_filtered = fourier_filter(sino, filter_=self.filter, fft_size=self.fft_size) - with self.sem: - events.append(self.transfer_to_texture(sino_filtered)) - if self.profile: - self.events += events - - def filtered_backprojection(self, sino): + def filtered_backprojection(self, sino, output=None): """ Compute the filtered backprojection (FBP) on a sinogram. - :param sino: sinogram (`numpy.ndarray`) in the format (projections, - bins) + :param sino: sinogram (`np.ndarray` or `pyopencl.array.Array`) + with the shape (n_projections, n_bins) + :param output: output (`np.ndarray` or `pyopencl.array.Array`). + If nothing is provided, a new numpy array is returned. """ - - self.filter_projections(sino) - res = self.backprojection() + # Filter + self.sino_filter(sino, output=self.d_sino) + # Backproject + res = self.backprojection(self.d_sino, output=output) return res __call__ = filtered_backprojection + + + # ------------------- + # - Compatibility - + # ------------------- + + @deprecated(replacement="Backprojection.sino_filter", since_version="0.10") + def filter_projections(self, sino, rescale=True): + self.sino_filter(sino, output=self.d_sino) + + + +def fourier_filter(sino, filter_=None, fft_size=None): + return fourier_filter_(sino, filter_=filter_, fft_size=fft_size) + |