#!/usr/bin/env python # coding: utf-8 # /*########################################################################## # # Copyright (c) 2016 European Synchrotron Radiation Facility # # Permission is hereby granted, free of charge, to any person obtaining a copy # of this software and associated documentation files (the "Software"), to deal # in the Software without restriction, including without limitation the rights # to use, copy, modify, merge, publish, distribute, sublicense, and/or sell # copies of the Software, and to permit persons to whom the Software is # furnished to do so, subject to the following conditions: # # The above copyright notice and this permission notice shall be included in # all copies or substantial portions of the Software. # # THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR # IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, # FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE # AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER # LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, # OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN # THE SOFTWARE. # # ###########################################################################*/ """Module for (filtered) backprojection on the GPU""" from __future__ import absolute_import, print_function, with_statement, division __authors__ = ["A. Mirone, P. Paleo"] __license__ = "MIT" __date__ = "25/01/2019" import logging import numpy as np from .common import pyopencl from .processing import EventDescription, OpenclProcessing, BufferDescription from .sinofilter import SinoFilter from .sinofilter import fourier_filter as fourier_filter_ from ..utils.deprecation import deprecated if pyopencl: mf = pyopencl.mem_flags import pyopencl.array as parray else: raise ImportError("Please install pyopencl in order to use opencl backprojection") logger = logging.getLogger(__name__) def _sizeof(Type): """ return the size (in bytes) of a scalar type, like the C behavior """ return np.dtype(Type).itemsize def _idivup(a, b): """ return the integer division, plus one if `a` is not a multiple of `b` """ return (a + (b - 1)) // b class Backprojection(OpenclProcessing): """A class for performing the backprojection using OpenCL""" kernel_files = ["backproj.cl", "array_utils.cl"] def __init__(self, sino_shape, slice_shape=None, axis_position=None, angles=None, filter_name=None, ctx=None, devicetype="all", platformid=None, deviceid=None, profile=False, extra_options=None): """Constructor of the OpenCL (filtered) backprojection :param sino_shape: shape of the sinogram. The sinogram is in the format (n_b, n_a) where n_b is the number of detector bins and n_a is the number of angles. :param slice_shape: Optional, shape of the reconstructed slice. By default, it is a square slice where the dimension is the "x dimension" of the sinogram (number of bins). :param axis_position: Optional, axis position. Default is `(shape[1]-1)/2.0`. :param angles: Optional, a list of custom angles in radian. :param filter_name: Optional, name of the filter for FBP. Default is the Ram-Lak filter. :param ctx: actual working context, left to None for automatic initialization from device type or platformid/deviceid :param devicetype: type of device, can be "CPU", "GPU", "ACC" or "ALL" :param platformid: integer with the platform_identifier, as given by clinfo :param deviceid: Integer with the device identifier, as given by clinfo :param profile: switch on profiling to be able to profile at the kernel level, store profiling elements (makes code slightly slower) :param extra_options: Advanced extra options in the form of a dict. Current options are: cutoff, use_numpy_fft """ # OS X enforces a workgroup size of 1 when the kernel has # synchronization barriers if sys.platform.startswith('darwin'): # assuming no discrete GPU # raise NotImplementedError("Backprojection is not implemented on CPU for OS X yet") OpenclProcessing.__init__(self, ctx=ctx, devicetype=devicetype, platformid=platformid, deviceid=deviceid, profile=profile) self._init_geometry(sino_shape, slice_shape, angles, axis_position, extra_options) self._allocate_memory() self._compute_angles() self._init_kernels() self._init_filter(filter_name) def _init_geometry(self, sino_shape, slice_shape, angles, axis_position, extra_options): """Geometry Initialization :param sino_shape: shape of the sinogram. The sinogram is in the format (n_b, n_a) where n_b is the number of detector bins and n_a is the number of angles. :param slice_shape: shape of the reconstructed slice. By default, it is a square slice where the dimension is the "x dimension" of the sinogram (number of bins). :param angles: list of projection angles in radian. :param axis_position: axis position :param dict extra_options: Advanced extra options """ self.shape = sino_shape self.num_bins = np.int32(sino_shape[1]) self.num_projs = np.int32(sino_shape[0]) self.angles = angles if slice_shape is None: self.slice_shape = (self.num_bins, self.num_bins) else: self.slice_shape = slice_shape self.dimrec_shape = ( _idivup(self.slice_shape[0], 32) * 32, _idivup(self.slice_shape[1], 32) * 32 ) if axis_position: self.axis_pos = np.float32(axis_position) else: self.axis_pos = np.float32((sino_shape[1] - 1.) / 2) self.axis_array = None # TODO: add axis correction front-end self._init_extra_options(extra_options) def _init_extra_options(self, extra_options): """Backprojection extra option initialization :param dict extra_options: Advanced extra options """ self.extra_options = { "cutoff": 1., "use_numpy_fft": False, # It is axis_pos - (num_bins-1)/2 in PyHST "gpu_offset_x": 0., #self.axis_pos - (self.num_bins - 1) / 2., "gpu_offset_y": 0., #self.axis_pos - (self.num_bins - 1) / 2. } if extra_options is not None: self.extra_options.update(extra_options) def _allocate_memory(self): # Host memory self.slice = np.zeros(self.dimrec_shape, dtype=np.float32) self.is_cpu = False if self.device.type == "CPU": self.is_cpu = True # Device memory self.buffers = [ BufferDescription("_d_slice", self.dimrec_shape, np.float32, mf.READ_WRITE), BufferDescription("d_sino", self.shape, np.float32, mf.READ_WRITE), # before transferring to texture (if available) BufferDescription("d_cos", (self.num_projs,), np.float32, mf.READ_ONLY), BufferDescription("d_sin", (self.num_projs,), np.float32, mf.READ_ONLY), BufferDescription("d_axes", (self.num_projs,), np.float32, mf.READ_ONLY), ] self.allocate_buffers(use_array=True) self.d_sino = self.cl_mem["d_sino"] # shorthand # Texture memory (if relevant) if not(self.is_cpu): self._allocate_textures() # Local memory self.local_mem = 256 * 3 * _sizeof(np.float32) # constant for all image sizes def _compute_angles(self): if self.angles is None: self.angles = np.linspace(0, np.pi, self.num_projs, False) h_cos = np.cos(self.angles).astype(np.float32) h_sin = np.sin(self.angles).astype(np.float32) self.cl_mem["d_cos"][:] = h_cos[:] self.cl_mem["d_sin"][:] = h_sin[:] if self.axis_array: self.cl_mem["d_axes"][:] = self.axis_array.astype(np.float32)[:] else: self.cl_mem["d_axes"][:] = np.ones(self.num_projs, dtype="f") * self.axis_pos def _init_kernels(self): OpenclProcessing.compile_kernels(self, self.kernel_files) # check that workgroup can actually be (16, 16) self.compiletime_workgroup_size = self.kernels.max_workgroup_size("backproj_cpu_kernel") # Workgroup and ndrange sizes are always the same self.wg = (16, 16) self.ndrange = ( _idivup(int(self.dimrec_shape[1]), 32) * self.wg[0], _idivup(int(self.dimrec_shape[0]), 32) * self.wg[1] ) # Prepare arguments for the kernel call if self.is_cpu: d_sino_ref = self.d_sino.data else: d_sino_ref = self.d_sino_tex self._backproj_kernel_args = ( # num of projections (int32) self.num_projs, # num of bins (int32) self.num_bins, # axis position (float32) self.axis_pos, # d_slice (__global float32*) self.cl_mem["_d_slice"].data, # d_sino (__read_only image2d_t or float*) d_sino_ref, # gpu_offset_x (float32)  np.float32(self.extra_options["gpu_offset_x"]), # gpu_offset_y (float32) np.float32(self.extra_options["gpu_offset_y"]), # d_cos (__global float32*) self.cl_mem["d_cos"].data, # d_sin (__global float32*) self.cl_mem["d_sin"].data, # d_axis (__global float32*) self.cl_mem["d_axes"].data, # shared mem (__local float32*) self._get_local_mem() ) def _allocate_textures(self): """ Allocate the texture for the sinogram. """ self.d_sino_tex = pyopencl.Image( self.ctx, mf.READ_ONLY | mf.USE_HOST_PTR, pyopencl.ImageFormat( pyopencl.channel_order.INTENSITY, pyopencl.channel_type.FLOAT ), hostbuf=np.zeros(self.shape[::-1], dtype=np.float32) ) def _init_filter(self, filter_name): """Filter initialization :param str filter_name: filter name """ self.filter_name = filter_name or "ram-lak" self.sino_filter = SinoFilter( self.shape, ctx=self.ctx, filter_name=self.filter_name, extra_options=self.extra_options, ) def _get_local_mem(self): return pyopencl.LocalMemory(self.local_mem) # constant for all image sizes def _cpy2d_to_slice(self, dst): ndrange = (int(self.slice_shape[1]), int(self.slice_shape[0])) slice_shape_ocl = np.int32(ndrange) wg = None kernel_args = ( dst.data, self.cl_mem["_d_slice"].data, np.int32(self.slice_shape[1]), np.int32(self.dimrec_shape[1]), np.int32((0, 0)), np.int32((0, 0)), slice_shape_ocl ) return self.kernels.cpy2d(self.queue, ndrange, wg, *kernel_args) def _transfer_to_texture(self, sino): if isinstance(sino, parray.Array): return self._transfer_device_to_texture(sino) sino2 = sino if not(sino.flags["C_CONTIGUOUS"] and sino.dtype == np.float32): sino2 = np.ascontiguousarray(sino, dtype=np.float32) if self.is_cpu: ev = pyopencl.enqueue_copy( self.queue, self.d_sino.data, sino2 ) what = "transfer filtered sino H->D buffer" ev.wait() else: ev = pyopencl.enqueue_copy( self.queue, self.d_sino_tex, sino2, origin=(0, 0), region=self.shape[::-1] ) what = "transfer filtered sino H->D texture" return EventDescription(what, ev) def _transfer_device_to_texture(self, d_sino): if self.is_cpu: if id(self.d_sino) == id(d_sino): return ev = pyopencl.enqueue_copy( self.queue, self.d_sino.data, d_sino ) what = "transfer filtered sino D->D buffer" ev.wait() else: ev = pyopencl.enqueue_copy( self.queue, self.d_sino_tex, d_sino.data, offset=0, origin=(0, 0), region=self.shape[::-1] ) what = "transfer filtered sino D->D texture" return EventDescription(what, ev) def backprojection(self, sino, output=None): """Perform the backprojection on an input sinogram :param sino: sinogram. :param output: optional, output slice. If provided, the result will be written in this array. :return: backprojection of sinogram """ events = [] with self.sem: events.append(self._transfer_to_texture(sino)) # Call the backprojection kernel if self.is_cpu: kernel_to_call = self.kernels.backproj_cpu_kernel else: kernel_to_call = self.kernels.backproj_kernel kernel_to_call( self.queue, self.ndrange, self.wg, *self._backproj_kernel_args ) # Return if output is None: res = self.cl_mem["_d_slice"].get() res = res[:self.slice_shape[0], :self.slice_shape[1]] else: res = output self._cpy2d_to_slice(output) # /with self.sem if self.profile: self.events += events return res def filtered_backprojection(self, sino, output=None): """ Compute the filtered backprojection (FBP) on a sinogram. :param sino: sinogram (`np.ndarray` or `pyopencl.array.Array`) with the shape (n_projections, n_bins) :param output: output (`np.ndarray` or `pyopencl.array.Array`). If nothing is provided, a new numpy array is returned. """ # Filter self.sino_filter(sino, output=self.d_sino) # Backproject res = self.backprojection(self.d_sino, output=output) return res __call__ = filtered_backprojection # ------------------- # - Compatibility - # ------------------- @deprecated(replacement="Backprojection.sino_filter", since_version="0.10") def filter_projections(self, sino, rescale=True): self.sino_filter(sino, output=self.d_sino) def fourier_filter(sino, filter_=None, fft_size=None): return fourier_filter_(sino, filter_=filter_, fft_size=fft_size)