summaryrefslogtreecommitdiff
path: root/silx/opencl
diff options
context:
space:
mode:
Diffstat (limited to 'silx/opencl')
-rw-r--r--silx/opencl/__init__.py2
-rw-r--r--silx/opencl/backprojection.py488
-rw-r--r--silx/opencl/common.py29
-rw-r--r--silx/opencl/linalg.py218
-rw-r--r--silx/opencl/medfilt.py8
-rw-r--r--silx/opencl/processing.py61
-rw-r--r--silx/opencl/projection.py419
-rw-r--r--silx/opencl/reconstruction.py381
-rw-r--r--silx/opencl/setup.py9
-rw-r--r--silx/opencl/test/__init__.py21
-rw-r--r--silx/opencl/test/test_array_utils.py161
-rw-r--r--silx/opencl/test/test_backprojection.py165
-rw-r--r--silx/opencl/test/test_linalg.py215
-rw-r--r--silx/opencl/test/test_projection.py139
-rw-r--r--silx/opencl/utils.py71
15 files changed, 2325 insertions, 62 deletions
diff --git a/silx/opencl/__init__.py b/silx/opencl/__init__.py
index b970338..353b74b 100644
--- a/silx/opencl/__init__.py
+++ b/silx/opencl/__init__.py
@@ -40,7 +40,7 @@ __status__ = "stable"
import logging
-logger = logging.getLogger("silx.opencl")
+logger = logging.getLogger(__name__)
from .common import *
diff --git a/silx/opencl/backprojection.py b/silx/opencl/backprojection.py
new file mode 100644
index 0000000..15a03b9
--- /dev/null
+++ b/silx/opencl/backprojection.py
@@ -0,0 +1,488 @@
+#!/usr/bin/env python
+# coding: utf-8
+# /*##########################################################################
+#
+# Copyright (c) 2016 European Synchrotron Radiation Facility
+#
+# Permission is hereby granted, free of charge, to any person obtaining a copy
+# of this software and associated documentation files (the "Software"), to deal
+# in the Software without restriction, including without limitation the rights
+# to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
+# copies of the Software, and to permit persons to whom the Software is
+# furnished to do so, subject to the following conditions:
+#
+# The above copyright notice and this permission notice shall be included in
+# all copies or substantial portions of the Software.
+#
+# THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+# IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+# FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
+# AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+# LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
+# OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN
+# THE SOFTWARE.
+#
+# ###########################################################################*/
+"""Module for (filtered) backprojection on the GPU"""
+
+from __future__ import absolute_import, print_function, with_statement, division
+
+__authors__ = ["A. Mirone, P. Paleo"]
+__license__ = "MIT"
+__date__ = "05/10/2017"
+
+import logging
+import numpy
+
+from .common import pyopencl
+from .processing import EventDescription, OpenclProcessing, BufferDescription
+from .utils import nextpower as nextpow2
+
+if pyopencl:
+ mf = pyopencl.mem_flags
+ import pyopencl.array as parray
+else:
+ raise ImportError("pyopencl is not installed")
+logger = logging.getLogger(__name__)
+
+# put in .common ?
+try:
+ from pyfft.cl import Plan as pyfft_Plan
+ _has_pyfft = True
+except ImportError:
+ _has_pyfft = False
+# For silx v0.6 we disable the use FFT on GPU.
+_has_pyfft = False
+
+
+def _sizeof(Type):
+ """
+ return the size (in bytes) of a scalar type, like the C behavior
+ """
+ return numpy.dtype(Type).itemsize
+
+
+def _idivup(a, b):
+ """
+ return the integer division, plus one if `a` is not a multiple of `b`
+ """
+ return (a + (b - 1)) // b
+
+
+def fourier_filter(sino, filter_=None, fft_size=None):
+ """Simple numpy based implementation of fourier space filter
+
+ :param sino: of shape shape = (num_projs, num_bins)
+ :param filter: filter function to apply in fourier space
+ :fft_size: size on which perform the fft. May be larger than the sino array
+ :return: filtered sinogram
+ """
+ assert sino.ndim == 2
+ num_projs, num_bins = sino.shape
+ if fft_size is None:
+ fft_size = nextpow2(num_bins * 2 - 1)
+ else:
+ assert fft_size >= num_bins
+ if fft_size == num_bins:
+ sino_zeropadded = sino.astype(numpy.float32)
+ else:
+ sino_zeropadded = numpy.zeros((num_projs, fft_size),
+ dtype=numpy.complex64)
+ sino_zeropadded[:, :num_bins] = sino.astype(numpy.float32)
+
+ if filter_ is None:
+ h = numpy.zeros(fft_size, dtype=numpy.float32)
+ L2 = fft_size // 2 + 1
+ h[0] = 1 / 4.
+ j = numpy.linspace(1, L2, L2 // 2, False)
+ h[1:L2:2] = -1. / (numpy.pi ** 2 * j ** 2)
+ h[L2:] = numpy.copy(h[1:L2 - 1][::-1])
+ filter_ = numpy.fft.fft(h).astype(numpy.complex64)
+
+ # Linear convolution
+ sino_f = numpy.fft.fft(sino, fft_size)
+ sino_f = sino_f * filter_
+ sino_filtered = numpy.fft.ifft(sino_f)[:, :num_bins].real
+ # Send the filtered sinogram to device
+ return numpy.ascontiguousarray(sino_filtered.real, dtype=numpy.float32)
+
+
+class Backprojection(OpenclProcessing):
+ """A class for performing the backprojection using OpenCL"""
+ kernel_files = ["backproj.cl", "array_utils.cl"]
+ if _has_pyfft:
+ kernel_files.append("backproj_helper.cl")
+
+ def __init__(self, sino_shape, slice_shape=None, axis_position=None,
+ angles=None, filter_name=None, ctx=None, devicetype="all",
+ platformid=None, deviceid=None, profile=False):
+ """Constructor of the OpenCL (filtered) backprojection
+
+ :param sino_shape: shape of the sinogram. The sinogram is in the format
+ (n_b, n_a) where n_b is the number of detector bins
+ and n_a is the number of angles.
+ :param slice_shape: Optional, shape of the reconstructed slice. By
+ default, it is a square slice where the dimension
+ is the "x dimension" of the sinogram (number of
+ bins).
+ :param axis_position: Optional, axis position. Default is
+ `(shape[1]-1)/2.0`.
+ :param angles: Optional, a list of custom angles in radian.
+ :param filter_name: Optional, name of the filter for FBP. Default is
+ the Ram-Lak filter.
+ :param ctx: actual working context, left to None for automatic
+ initialization from device type or platformid/deviceid
+ :param devicetype: type of device, can be "CPU", "GPU", "ACC" or "ALL"
+ :param platformid: integer with the platform_identifier, as given by
+ clinfo
+ :param deviceid: Integer with the device identifier, as given by clinfo
+ :param profile: switch on profiling to be able to profile at the kernel
+ level, store profiling elements (makes code slightly
+ slower)
+ """
+ # OS X enforces a workgroup size of 1 when the kernel has
+ # synchronization barriers if sys.platform.startswith('darwin'):
+ # assuming no discrete GPU
+ # raise NotImplementedError("Backprojection is not implemented on CPU for OS X yet")
+
+ OpenclProcessing.__init__(self, ctx=ctx, devicetype=devicetype,
+ platformid=platformid, deviceid=deviceid,
+ profile=profile)
+ self.shape = sino_shape
+
+ self.num_bins = numpy.int32(sino_shape[1])
+ self.num_projs = numpy.int32(sino_shape[0])
+ self.angles = angles
+ if slice_shape is None:
+ self.slice_shape = (self.num_bins, self.num_bins)
+ else:
+ self.slice_shape = slice_shape
+ self.dimrec_shape = (
+ _idivup(self.slice_shape[0], 32) * 32,
+ _idivup(self.slice_shape[1], 32) * 32
+ )
+ self.slice = numpy.zeros(self.dimrec_shape, dtype=numpy.float32)
+ self.filter_name = filter_name if filter_name else "Ram-Lak"
+ if axis_position:
+ self.axis_pos = numpy.float32(axis_position)
+ else:
+ self.axis_pos = numpy.float32((sino_shape[1] - 1.) / 2)
+ self.axis_array = None # TODO: add axis correction front-end
+
+ self.is_cpu = False
+ if self.device.type == "CPU":
+ self.is_cpu = True
+
+ self.compute_fft_plans()
+ self.buffers = [
+ BufferDescription("_d_slice", numpy.prod(self.dimrec_shape), numpy.float32, mf.READ_WRITE),
+ BufferDescription("d_sino", self.num_projs * self.num_bins, numpy.float32, mf.READ_WRITE), # before transferring to texture (if available)
+ BufferDescription("d_cos", self.num_projs, numpy.float32, mf.READ_ONLY),
+ BufferDescription("d_sin", self.num_projs, numpy.float32, mf.READ_ONLY),
+ BufferDescription("d_axes", self.num_projs, numpy.float32, mf.READ_ONLY),
+ ]
+ self.allocate_buffers()
+ if not(self.is_cpu):
+ self.allocate_textures()
+ self.compute_filter()
+ if self.pyfft_plan:
+ self.add_to_cl_mem({
+ "d_filter": self.d_filter,
+ "d_sino_z": self.d_sino_z
+ })
+ self.d_sino = self.cl_mem["d_sino"] # shorthand
+ self.compute_angles()
+
+ self.local_mem = 256 * 3 * _sizeof(numpy.float32) # constant for all image sizes
+ OpenclProcessing.compile_kernels(self, self.kernel_files)
+ # check that workgroup can actually be (16, 16)
+ self.check_workgroup_size("backproj_cpu_kernel")
+ # Workgroup and ndrange sizes are always the same
+ self.wg = (16, 16)
+ self.ndrange = (
+ _idivup(int(self.dimrec_shape[1]), 32) * self.wg[0], # int(): pyopencl <= 2015.1
+ _idivup(int(self.dimrec_shape[0]), 32) * self.wg[1] # int(): pyopencl <= 2015.1
+ )
+
+ def compute_angles(self):
+ if self.angles is None:
+ self.angles = numpy.linspace(0, numpy.pi, self.num_projs, False)
+ h_cos = numpy.cos(self.angles).astype(numpy.float32)
+ h_sin = numpy.sin(self.angles).astype(numpy.float32)
+ pyopencl.enqueue_copy(self.queue, self.cl_mem["d_cos"], h_cos)
+ pyopencl.enqueue_copy(self.queue, self.cl_mem["d_sin"], h_sin)
+ if self.axis_array:
+ pyopencl.enqueue_copy(self.queue,
+ self.cl_mem["d_axes"],
+ self.axis_array.astype(numpy.float32))
+ else:
+ pyopencl.enqueue_copy(self.queue,
+ self.cl_mem["d_axes"],
+ numpy.ones(self.num_projs, dtype=numpy.float32) * self.axis_pos)
+
+ def allocate_textures(self):
+ """
+ Allocate the texture for the sinogram.
+ """
+ self.d_sino_tex = pyopencl.Image(
+ self.ctx,
+ mf.READ_ONLY | mf.USE_HOST_PTR,
+ pyopencl.ImageFormat(
+ pyopencl.channel_order.INTENSITY,
+ pyopencl.channel_type.FLOAT
+ ),
+ hostbuf=numpy.zeros(self.shape[::-1], dtype=numpy.float32)
+ )
+
+ def compute_fft_plans(self):
+ """
+ If pyfft is installed, prepare a batched 1D FFT plan for the filtering
+ of FBP
+
+ """
+ self.fft_size = nextpow2(self.num_bins * 2 - 1)
+ if _has_pyfft:
+ logger.debug("pyfft is available. Computing FFT plans...")
+ # batched 1D transform
+ self.pyfft_plan = pyfft_Plan(self.fft_size, queue=self.queue,
+ wait_for_finish=True)
+ self.d_sino_z = parray.zeros(self.queue,
+ (self.num_projs, self.fft_size),
+ dtype=numpy.complex64)
+ logger.debug("... done")
+ else:
+ logger.debug("pyfft not available, using numpy.fft")
+ self.pyfft_plan = None
+ # TODO: fall-back to fftw if present ?
+
+ def compute_filter(self):
+ """
+ Compute the filter for FBP
+ """
+ if self.filter_name == "Ram-Lak":
+ L = self.fft_size
+ h = numpy.zeros(L, dtype=numpy.float32)
+ L2 = L // 2 + 1
+ h[0] = 1 / 4.
+ j = numpy.linspace(1, L2, L2 // 2, False)
+ h[1:L2:2] = -1. / (numpy.pi ** 2 * j ** 2)
+ h[L2:] = numpy.copy(h[1:L2 - 1][::-1])
+ else:
+ # TODO: other filters
+ raise ValueError("Filter %s is not available" % self.filter_name)
+ self.filter = h
+ if self.pyfft_plan:
+ self.d_filter = parray.to_device(self.queue, h.astype(numpy.complex64))
+ self.pyfft_plan.execute(self.d_filter.data)
+ else:
+ self.filter = numpy.fft.fft(h).astype(numpy.complex64)
+ self.d_filter = None
+
+ def _get_local_mem(self):
+ return pyopencl.LocalMemory(self.local_mem) # constant for all image sizes
+
+ def cpy2d_to_slice(self, dst):
+ ndrange = (int(self.slice_shape[1]), int(self.slice_shape[0])) # pyopencl < 2015.2
+ slice_shape_ocl = numpy.int32(ndrange)
+ wg = None
+ kernel_args = (
+ dst.data,
+ self.cl_mem["_d_slice"],
+ numpy.int32(self.slice_shape[1]),
+ numpy.int32(self.dimrec_shape[1]),
+ numpy.int32((0, 0)),
+ numpy.int32((0, 0)),
+ slice_shape_ocl
+ )
+ return self.kernels.cpy2d(self.queue, ndrange, wg, *kernel_args)
+
+ def transfer_to_texture(self, sino):
+ sino2 = sino
+ if not(sino.flags["C_CONTIGUOUS"] and sino.dtype == numpy.float32):
+ sino2 = numpy.ascontiguousarray(sino, dtype=numpy.float32)
+ if self.is_cpu:
+ ev = pyopencl.enqueue_copy(
+ self.queue,
+ self.d_sino,
+ sino2
+ )
+ what = "transfer filtered sino H->D buffer"
+ else:
+ ev = pyopencl.enqueue_copy(
+ self.queue,
+ self.d_sino_tex,
+ sino2,
+ origin=(0, 0),
+ region=self.shape[::-1]
+ )
+ what = "transfer filtered sino H->D texture"
+ return EventDescription(what, ev)
+
+ def transfer_device_to_texture(self, d_sino):
+ if self.is_cpu:
+ if id(self.d_sino) == id(d_sino):
+ return
+ ev = pyopencl.enqueue_copy(
+ self.queue,
+ self.d_sino,
+ d_sino
+ )
+ what = "transfer filtered sino D->D buffer"
+ else:
+ ev = pyopencl.enqueue_copy(
+ self.queue,
+ self.d_sino_tex,
+ d_sino,
+ offset=0,
+ origin=(0, 0),
+ region=self.shape[::-1]
+ )
+ what = "transfer filtered sino D->D texture"
+ return EventDescription(what, ev)
+
+ def backprojection(self, sino=None, dst=None):
+ """Perform the backprojection on an input sinogram
+
+ :param sino: sinogram. If provided, it returns the plain backprojection.
+ :param dst: destination (pyopencl.Array). If provided, the result will be written in this array.
+ :return: backprojection of sinogram
+ """
+ events = []
+ with self.sem:
+
+ if sino is not None: # assuming numpy.ndarray
+ events.append(self.transfer_to_texture(sino))
+ # Prepare arguments for the kernel call
+ if self.is_cpu:
+ d_sino_ref = self.d_sino
+ else:
+ d_sino_ref = self.d_sino_tex
+ kernel_args = (
+ self.num_projs, # num of projections (int32)
+ self.num_bins, # num of bins (int32)
+ self.axis_pos, # axis position (float32)
+ self.cl_mem["_d_slice"], # d_slice (__global float32*)
+ d_sino_ref, # d_sino (__read_only image2d_t or float*)
+ numpy.float32(0), # gpu_offset_x (float32)
+ numpy.float32(0), # gpu_offset_y (float32)
+ self.cl_mem["d_cos"], # d_cos (__global float32*)
+ self.cl_mem["d_sin"], # d_sin (__global float32*)
+ self.cl_mem["d_axes"], # d_axis (__global float32*)
+ self._get_local_mem() # shared mem (__local float32*)
+ )
+ # Call the kernel
+ if self.is_cpu:
+ kernel_to_call = self.kernels.backproj_cpu_kernel
+ else:
+ kernel_to_call = self.kernels.backproj_kernel
+ event_bpj = kernel_to_call(
+ self.queue,
+ self.ndrange,
+ self.wg,
+ *kernel_args
+ )
+ if dst is None:
+ self.slice[:] = 0
+ events.append(EventDescription("backprojection", event_bpj))
+ ev = pyopencl.enqueue_copy(self.queue, self.slice,
+ self.cl_mem["_d_slice"])
+ events.append(EventDescription("copy D->H result", ev))
+ ev.wait()
+ res = numpy.copy(self.slice)
+ if self.dimrec_shape[0] > self.slice_shape[0] or self.dimrec_shape[1] > self.slice_shape[1]:
+ res = res[:self.slice_shape[0], :self.slice_shape[1]]
+ # if the slice is backprojected onto a bigger grid
+ if self.slice_shape[1] > self.num_bins:
+ res = res[:self.slice_shape[0], :self.slice_shape[1]]
+ else:
+ ev = self.cpy2d_to_slice(dst)
+ events.append(EventDescription("copy D->D result", ev))
+ ev.wait()
+ res = dst
+
+ # /with self.sem
+ if self.profile:
+ self.events += events
+
+ return res
+
+ def filter_projections(self, sino, rescale=True):
+ """
+ Performs the FBP on a given sinogram.
+
+ :param sinogram: sinogram to (filter-)backproject
+ :param rescale: if True (default), the sinogram is multiplied with
+ (pi/n_projs)
+ """
+ if sino.shape[0] != self.num_projs or sino.shape[1] != self.num_bins:
+ raise ValueError("Expected sinogram with (projs, bins) = (%d, %d)" % (self.num_projs, self.num_bins))
+ if rescale:
+ sino = sino * numpy.pi / self.num_projs
+ events = []
+ # if pyfft is available, all can be done on the device
+ if self.d_filter is not None:
+
+ # Zero-pad the sinogram.
+ # TODO: this can be done on GPU with a "Memcpy2D":
+ # cl.enqueue_copy(queue, dst, src, host_origin=(0,0), buffer_origin=(0,0), region=shape, host_pitches=(sino.shape[1],), buffer_pitches=(self.fft_size,))
+ # However it does not work properly, and raises an error for pyopencl < 2017.1
+ sino_zeropadded = numpy.zeros((sino.shape[0], self.fft_size), dtype=numpy.complex64)
+ sino_zeropadded[:, :self.num_bins] = sino.astype(numpy.float32)
+ sino_zeropadded = numpy.ascontiguousarray(sino_zeropadded, dtype=numpy.complex64)
+ with self.sem:
+ # send to GPU
+ ev = pyopencl.enqueue_copy(self.queue, self.d_sino_z.data, sino_zeropadded)
+ events.append(EventDescription("Send sino H->D", ev))
+
+ # FFT (in-place)
+ self.pyfft_plan.execute(self.d_sino_z.data, batch=self.num_projs)
+
+ # Multiply (complex-wise) with the the filter
+ ev = self.kernels.mult(self.queue,
+ tuple(int(i) for i in self.d_sino_z.shape[::-1]),
+ None,
+ self.d_sino_z.data,
+ self.d_filter.data,
+ numpy.int32(self.fft_size),
+ self.num_projs
+ )
+ events.append(EventDescription("complex 2D-1D multiplication", ev))
+ # Inverse FFT (in-place)
+ self.pyfft_plan.execute(self.d_sino_z.data, batch=self.num_projs, inverse=True)
+ # Copy the real part of d_sino_z[:, :self.num_bins] (complex64) to d_sino (float32)
+ ev = self.kernels.cpy2d_c2r(self.queue, self.shape[::-1], None,
+ self.d_sino,
+ self.d_sino_z.data,
+ self.num_bins,
+ self.num_projs,
+ numpy.int32(self.fft_size)
+ )
+ events.append(EventDescription("conversion from complex padded sinogram to sinogram", ev))
+ # debug
+# ev.wait()
+# h_sino = numpy.zeros(sino.shape, dtype=numpy.float32)
+# ev = pyopencl.enqueue_copy(self.queue, h_sino, self.d_sino)
+# ev.wait()
+# numpy.save("/tmp/filtered_sinogram_%s.npy" % self.ctx.devices[0].platform.name.split()[0], h_sino)
+ events.append(self.transfer_device_to_texture(self.d_sino))
+ # ------
+ else: # no pyfft
+ sino_filtered = fourier_filter(sino, filter_=self.filter, fft_size=self.fft_size)
+ with self.sem:
+ events.append(self.transfer_to_texture(sino_filtered))
+ if self.profile:
+ self.events += events
+
+ def filtered_backprojection(self, sino):
+ """
+ Compute the filtered backprojection (FBP) on a sinogram.
+
+ :param sino: sinogram (`numpy.ndarray`) in the format (projections,
+ bins)
+ """
+
+ self.filter_projections(sino)
+ res = self.backprojection()
+ return res
+
+ __call__ = filtered_backprojection
diff --git a/silx/opencl/common.py b/silx/opencl/common.py
index fcb4efa..ebf50c7 100644
--- a/silx/opencl/common.py
+++ b/silx/opencl/common.py
@@ -34,7 +34,7 @@ __author__ = "Jerome Kieffer"
__contact__ = "Jerome.Kieffer@ESRF.eu"
__license__ = "MIT"
__copyright__ = "2012-2017 European Synchrotron Radiation Facility, Grenoble, France"
-__date__ = "15/03/2017"
+__date__ = "05/10/2017"
__status__ = "stable"
__all__ = ["ocl", "pyopencl", "mf", "release_cl_buffers", "allocate_cl_buffers",
"measure_workgroup_size", "kernel_workgroup_size"]
@@ -47,7 +47,7 @@ import numpy
from .utils import get_opencl_code
-logger = logging.getLogger("silx.opencl")
+logger = logging.getLogger(__name__)
if os.environ.get("SILX_OPENCL") in ["0", "False"]:
@@ -59,14 +59,17 @@ else:
except ImportError:
logger.warning("Unable to import pyOpenCl. Please install it from: http://pypi.python.org/pypi/pyopencl")
pyopencl = None
- class mf(object):
- WRITE_ONLY = 1
- READ_ONLY = 1
- READ_WRITE = 1
else:
import pyopencl.array as array
mf = pyopencl.mem_flags
+if pyopencl is None:
+ # Define default mem flags
+ class mf(object):
+ WRITE_ONLY = 1
+ READ_ONLY = 1
+ READ_WRITE = 1
+
FLOP_PER_CORE = {"GPU": 64, # GPU, Fermi at least perform 64 flops per cycle/multicore, G80 were at 24 or 48 ...
"CPU": 4, # CPU, at least intel's have 4 operation per cycle
@@ -362,6 +365,8 @@ class OpenCL(object):
:param memory: minimum amount of memory (int)
:param extensions: list of extensions to be present
:param best: shall we look for the
+ :returns: A tuple of plateform ID and device ID, else None if nothing
+ found
"""
if extensions is None:
extensions = []
@@ -391,6 +396,9 @@ class OpenCL(object):
if best_found:
return best_found[0], best_found[1]
+ # Nothing found
+ return None
+
def create_context(self, devicetype="ALL", useFp64=False, platformid=None,
deviceid=None, cached=True):
"""
@@ -411,14 +419,17 @@ class OpenCL(object):
if (platformid is not None) and (deviceid is not None):
platformid = int(platformid)
deviceid = int(deviceid)
+ elif "PYOPENCL_CTX" in os.environ:
+ pyopencl_ctx = [int(i) if i.isdigit() else 0 for i in os.environ["PYOPENCL_CTX"].split(":")]
+ pyopencl_ctx += [0] * (2 - len(pyopencl_ctx)) # pad with 0
+ platformid, deviceid = pyopencl_ctx
else:
if useFp64:
ids = ocl.select_device(type=devicetype, extensions=["cl_khr_int64_base_atomics"])
else:
ids = ocl.select_device(dtype=devicetype)
if ids:
- platformid = ids[0]
- deviceid = ids[1]
+ platformid, deviceid = ids
if (platformid is not None) and (deviceid is not None):
if (platformid, deviceid) in self.context_cache:
ctx = self.context_cache[(platformid, deviceid)]
@@ -444,6 +455,7 @@ class OpenCL(object):
platform_id = pyopencl.get_platforms().index(oplat)
return self.platforms[platform_id].devices[device_id]
+
if pyopencl:
ocl = OpenCL()
if ocl.nb_devices == 0:
@@ -558,4 +570,3 @@ def kernel_workgroup_size(program, kernel):
device = program.devices[0]
query_wg = pyopencl.kernel_work_group_info.WORK_GROUP_SIZE
return kernel.get_work_group_info(query_wg, device)
-
diff --git a/silx/opencl/linalg.py b/silx/opencl/linalg.py
new file mode 100644
index 0000000..fba3e9c
--- /dev/null
+++ b/silx/opencl/linalg.py
@@ -0,0 +1,218 @@
+#!/usr/bin/env python
+# coding: utf-8
+# /*##########################################################################
+#
+# Copyright (c) 2016 European Synchrotron Radiation Facility
+#
+# Permission is hereby granted, free of charge, to any person obtaining a copy
+# of this software and associated documentation files (the "Software"), to deal
+# in the Software without restriction, including without limitation the rights
+# to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
+# copies of the Software, and to permit persons to whom the Software is
+# furnished to do so, subject to the following conditions:
+#
+# The above copyright notice and this permission notice shall be included in
+# all copies or substantial portions of the Software.
+#
+# THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+# IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+# FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
+# AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+# LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
+# OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN
+# THE SOFTWARE.
+#
+# ###########################################################################*/
+"""Module for basic linear algebra in OpenCL"""
+
+from __future__ import absolute_import, print_function, with_statement, division
+
+__authors__ = ["P. Paleo"]
+__license__ = "MIT"
+__date__ = "10/08/2017"
+
+import numpy as np
+
+from .common import pyopencl
+from .processing import EventDescription, OpenclProcessing
+
+import pyopencl.array as parray
+cl = pyopencl
+
+
+class LinAlg(OpenclProcessing):
+
+ kernel_files = ["linalg.cl"]
+
+ def __init__(self, shape, do_checks=False, ctx=None, devicetype="all", platformid=None, deviceid=None, profile=False):
+ """
+ Create a "Linear Algebra" plan for a given image shape.
+
+ :param shape: shape of the image (num_rows, num_columns)
+ :param do_checks (optional): if True, memory and data type checks are performed when possible.
+ :param ctx: actual working context, left to None for automatic
+ initialization from device type or platformid/deviceid
+ :param devicetype: type of device, can be "CPU", "GPU", "ACC" or "ALL"
+ :param platformid: integer with the platform_identifier, as given by clinfo
+ :param deviceid: Integer with the device identifier, as given by clinfo
+ :param profile: switch on profiling to be able to profile at the kernel level,
+ store profiling elements (makes code slightly slower)
+
+ """
+ OpenclProcessing.__init__(self, ctx=ctx, devicetype=devicetype,
+ platformid=platformid, deviceid=deviceid,
+ profile=profile)
+
+ self.d_gradient = parray.zeros(self.queue, shape, np.complex64)
+ self.d_image = parray.zeros(self.queue, shape, np.float32)
+ self.add_to_cl_mem({
+ "d_gradient": self.d_gradient,
+ "d_image": self.d_image
+ })
+
+ self.wg2D = None
+ self.shape = shape
+ self.ndrange2D = (
+ int(self.shape[1]),
+ int(self.shape[0])
+ )
+ self.do_checks = bool(do_checks)
+ OpenclProcessing.compile_kernels(self, self.kernel_files)
+
+ @staticmethod
+ def check_array(array, dtype, shape, arg_name):
+ if array.shape != shape or array.dtype != dtype:
+ raise ValueError("%s should be a %s array of type %s" %(arg_name, str(shape), str(dtype)))
+
+ def get_data_references(self, src, dst, default_src_ref, default_dst_ref):
+ """
+ From various types of src and dst arrays,
+ returns the references to the underlying data (Buffer) that will be used by the OpenCL kernels.
+ # TODO documentation
+
+ This function will make a copy host->device if the input is on host (eg. numpy array)
+ """
+ if dst is not None:
+ if isinstance(dst, cl.array.Array):
+ dst_ref = dst.data
+ elif isinstance(dst, cl.Buffer):
+ dst_ref = dst
+ else:
+ raise ValueError("dst should be either pyopencl.array.Array or pyopencl.Buffer")
+ else:
+ dst_ref = default_dst_ref
+
+ if isinstance(src, cl.array.Array):
+ src_ref = src.data
+ elif isinstance(src, cl.Buffer):
+ src_ref = src
+ else: # assuming numpy.ndarray
+ evt = cl.enqueue_copy(self.queue, default_src_ref, src)
+ self.events.append(EventDescription("copy H->D", evt))
+ src_ref = default_src_ref
+ return src_ref, dst_ref
+
+ def gradient(self, image, dst=None, return_to_host=False):
+ """
+ Compute the spatial gradient of an image.
+ The gradient is computed with first-order difference (not central difference).
+
+ :param image: image to compute the gradient from. It can be either a numpy.ndarray, a pyopencl Array or Buffer.
+ :param dst: optional, reference to a destination pyopencl Array or Buffer. It must be of complex64 data type.
+ :param return_to_host: optional, set to True if you want the result to be transferred back to host.
+
+ if dst is provided, it should be of type numpy.complex64 !
+ """
+ n_y, n_x = np.int32(self.shape)
+ if self.do_checks:
+ self.check_array(image, np.float32, self.shape, "image")
+ if dst is not None:
+ self.check_array(dst, np.complex64, self.shape, "dst")
+ img_ref, grad_ref = self.get_data_references(image, dst, self.d_image.data, self.d_gradient.data)
+
+ # Prepare the kernel call
+ kernel_args = [
+ img_ref,
+ grad_ref,
+ n_x,
+ n_y
+ ]
+ # Call the gradient kernel
+ evt = self.kernels.kern_gradient2D(
+ self.queue,
+ self.ndrange2D,
+ self.wg2D,
+ *kernel_args
+ )
+ self.events.append(EventDescription("gradient2D", evt))
+ # TODO: should the wait be done in any case ?
+ # In the case where dst=None, the wait() is mandatory since a user will be doing arithmetic on dst afterwards
+ if dst is None:
+ evt.wait()
+
+ if return_to_host:
+ if dst is not None:
+ res_tmp = self.d_gradient.get()
+ else:
+ res_tmp = np.zeros(self.shape, dtype=np.complex64)
+ cl.enqueue_copy(self.queue, res_tmp, grad_ref)
+ res = np.zeros((2,) + self.shape, dtype=np.float32)
+ res[0] = np.copy(res_tmp.real)
+ res[1] = np.copy(res_tmp.imag)
+ return res
+ else:
+ return dst
+
+ def divergence(self, gradient, dst=None, return_to_host=False):
+ """
+ Compute the spatial divergence of an image.
+ The divergence is designed to be the (negative) adjoint of the gradient.
+
+ :param gradient: gradient-like array to compute the divergence from. It can be either a numpy.ndarray, a pyopencl Array or Buffer.
+ :param dst: optional, reference to a destination pyopencl Array or Buffer. It must be of complex64 data type.
+ :param return_to_host: optional, set to True if you want the result to be transferred back to host.
+
+ if dst is provided, it should be of type numpy.complex64 !
+ """
+ n_y, n_x = np.int32(self.shape)
+ # numpy.ndarray gradients are expected to be (2, n_y, n_x)
+ if isinstance(gradient, np.ndarray):
+ gradient2 = np.zeros(self.shape, dtype=np.complex64)
+ gradient2.real = np.copy(gradient[0])
+ gradient2.imag = np.copy(gradient[1])
+ gradient = gradient2
+ elif self.do_checks:
+ self.check_array(gradient, np.complex64, self.shape, "gradient")
+ if dst is not None:
+ self.check_array(dst, np.float32, self.shape, "dst")
+ grad_ref, img_ref = self.get_data_references(gradient, dst, self.d_gradient.data, self.d_image.data)
+
+ # Prepare the kernel call
+ kernel_args = [
+ grad_ref,
+ img_ref,
+ n_x,
+ n_y
+ ]
+ # Call the gradient kernel
+ evt = self.kernels.kern_divergence2D(
+ self.queue,
+ self.ndrange2D,
+ self.wg2D,
+ *kernel_args
+ )
+ self.events.append(EventDescription("divergence2D", evt))
+ # TODO: should the wait be done in any case ?
+ # In the case where dst=None, the wait() is mandatory since a user will be doing arithmetic on dst afterwards
+ if dst is None:
+ evt.wait()
+
+ if return_to_host:
+ if dst is not None:
+ res = self.d_image.get()
+ else:
+ res = np.zeros(self.shape, dtype=np.float32)
+ cl.enqueue_copy(self.queue, res, img_ref)
+ return res
+ else:
+ return dst
diff --git a/silx/opencl/medfilt.py b/silx/opencl/medfilt.py
index 90cd49a..d4e425b 100644
--- a/silx/opencl/medfilt.py
+++ b/silx/opencl/medfilt.py
@@ -36,7 +36,7 @@ from __future__ import absolute_import, print_function, with_statement, division
__author__ = "Jerome Kieffer"
__license__ = "MIT"
-__date__ = "15/03/2017"
+__date__ = "12/09/2017"
__copyright__ = "2012-2017, ESRF, Grenoble"
__contact__ = "jerome.kieffer@esrf.fr"
@@ -51,7 +51,7 @@ if pyopencl:
mf = pyopencl.mem_flags
else:
raise ImportError("pyopencl is not installed")
-logger = logging.getLogger("silx.opencl.medfilt")
+logger = logging.getLogger(__name__)
class MedianFilter2D(OpenclProcessing):
@@ -202,7 +202,7 @@ class MedianFilter2D(OpenclProcessing):
kwargs["width"] = numpy.int32(image.shape[1])
# for k, v in kwargs.items():
# print("%s: %s (%s)" % (k, v, type(v)))
- mf2d = self.program.medfilt2d(self.queue,
+ mf2d = self.kernels.medfilt2d(self.queue,
(wg, image.shape[1]),
(wg, 1), *list(kwargs.values()))
events.append(EventDescription("median filter 2d", mf2d))
@@ -264,6 +264,6 @@ class _MedFilt2d(object):
new_shape = numpy.maximum(numpy.array(cls.median_filter.shape), shape)
ctx = cls.median_filter.ctx
cls.median_filter = MedianFilter2D(new_shape, kernel_size, ctx=ctx)
- return cls.median_filter.medfilt2d(image)
+ return cls.median_filter.medfilt2d(image, kernel_size=kernel_size)
medfilt2d = _MedFilt2d.medfilt2d
diff --git a/silx/opencl/processing.py b/silx/opencl/processing.py
index ca46701..1997a55 100644
--- a/silx/opencl/processing.py
+++ b/silx/opencl/processing.py
@@ -4,7 +4,7 @@
# Project: S I L X project
# https://github.com/silx-kit/silx
#
-# Copyright (C) European Synchrotron Radiation Facility, Grenoble, France
+# Copyright (C) 2012-2017 European Synchrotron Radiation Facility, Grenoble, France
#
# Principal author: Jérôme Kieffer (Jerome.Kieffer@ESRF.eu)
#
@@ -41,7 +41,7 @@ __author__ = "Jerome Kieffer"
__contact__ = "Jerome.Kieffer@ESRF.eu"
__license__ = "MIT"
__copyright__ = "European Synchrotron Radiation Facility, Grenoble, France"
-__date__ = "03/02/2017"
+__date__ = "03/10/2017"
__status__ = "stable"
@@ -51,14 +51,34 @@ import gc
from collections import namedtuple
import numpy
import threading
-from .common import ocl, pyopencl, release_cl_buffers
+from .common import ocl, pyopencl, release_cl_buffers, kernel_workgroup_size
from .utils import concatenate_cl_kernel
BufferDescription = namedtuple("BufferDescription", ["name", "size", "dtype", "flags"])
EventDescription = namedtuple("EventDescription", ["name", "event"])
-logger = logging.getLogger("pyFAI.opencl.processing")
+logger = logging.getLogger(__name__)
+
+
+class KernelContainer(object):
+ """Those object holds a copy of all kernels accessible as attributes"""
+
+ def __init__(self, program):
+ """Constructor of the class
+
+ :param program: the OpenCL program as generated by PyOpenCL
+ """
+ for kernel in program.all_kernels():
+ self.__setattr__(kernel.function_name, kernel)
+
+ def get_kernels(self):
+ "return the dictionary with all kernels"
+ return self.__dict__.copy()
+
+ def get_kernel(self, name):
+ "get a kernel from its name"
+ return self.__dict__.get(name)
class OpenclProcessing(object):
@@ -85,9 +105,10 @@ class OpenclProcessing(object):
:param devicetype: type of device, can be "CPU", "GPU", "ACC" or "ALL"
:param platformid: integer with the platform_identifier, as given by clinfo
:param deviceid: Integer with the device identifier, as given by clinfo
- :param block_size: preferred workgroup size, may vary depending on the outpcome of the compilation
- :param profile: switch on profiling to be able to profile at the kernel level,
- store profiling elements (makes code slightly slower)
+ :param block_size: preferred workgroup size, may vary depending on the
+ out come of the compilation
+ :param profile: switch on profiling to be able to profile at the kernel
+ level, store profiling elements (makes code slightly slower)
"""
self.sem = threading.Semaphore()
self.profile = None
@@ -108,6 +129,7 @@ class OpenclProcessing(object):
self.set_profiling(profile)
self.block_size = block_size
self.program = None
+ self.kernels = None
def __del__(self):
"""Destructor: release all buffers and programs
@@ -145,8 +167,8 @@ class OpenclProcessing(object):
ualloc = 0
for buf in buffers:
ualloc += numpy.dtype(buf.dtype).itemsize * buf.size
- logger.info("%.3fMB are needed on device which has %.3fMB",
- ualloc / 1.0e6, self.device.memory / 1.0e6)
+ logger.info("%.3fMB are needed on device: %s, which has %.3fMB",
+ ualloc / 1.0e6, self.device, self.device.memory / 1.0e6)
if ualloc >= self.device.memory:
raise MemoryError("Fatal error in allocate_buffers. Not enough "
@@ -157,13 +179,29 @@ class OpenclProcessing(object):
try:
for buf in buffers:
size = numpy.dtype(buf.dtype).itemsize * buf.size
- mem[buf.name] = pyopencl.Buffer(self.ctx, buf.flags, size)
+ mem[buf.name] = pyopencl.Buffer(self.ctx, buf.flags, int(size))
except pyopencl.MemoryError as error:
release_cl_buffers(mem)
raise MemoryError(error)
self.cl_mem.update(mem)
+ def add_to_cl_mem(self, parrays):
+ """
+ Add pyopencl.array, which are allocated by pyopencl, to self.cl_mem.
+ This should be used before calling allocate_buffers().
+
+ :param parrays: a dictionary of `pyopencl.array.Array` or `pyopencl.Buffer`
+ """
+ mem = self.cl_mem
+ for name, parr in parrays.items():
+ mem[name] = parr
+ self.cl_mem.update(mem)
+
+ def check_workgroup_size(self, kernel_name):
+ kernel = self.kernels.get_kernel(kernel_name)
+ self.compiletime_workgroup_size = kernel_workgroup_size(self.program, kernel)
+
def free_buffers(self):
"""free all device.memory allocated on the device
"""
@@ -199,12 +237,15 @@ class OpenclProcessing(object):
self.program = pyopencl.Program(self.ctx, kernel_src).build(options=compile_options)
except (pyopencl.MemoryError, pyopencl.LogicError) as error:
raise MemoryError(error)
+ else:
+ self.kernels = KernelContainer(self.program)
def free_kernels(self):
"""Free all kernels
"""
for kernel in self.cl_kernel_args:
self.cl_kernel_args[kernel] = []
+ self.kernels = None
self.program = None
def set_profiling(self, value=True):
diff --git a/silx/opencl/projection.py b/silx/opencl/projection.py
new file mode 100644
index 0000000..0ebe9bc
--- /dev/null
+++ b/silx/opencl/projection.py
@@ -0,0 +1,419 @@
+#!/usr/bin/env python
+# coding: utf-8
+# /*##########################################################################
+#
+# Copyright (c) 2016 European Synchrotron Radiation Facility
+#
+# Permission is hereby granted, free of charge, to any person obtaining a copy
+# of this software and associated documentation files (the "Software"), to deal
+# in the Software without restriction, including without limitation the rights
+# to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
+# copies of the Software, and to permit persons to whom the Software is
+# furnished to do so, subject to the following conditions:
+#
+# The above copyright notice and this permission notice shall be included in
+# all copies or substantial portions of the Software.
+#
+# THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+# IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+# FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
+# AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+# LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
+# OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN
+# THE SOFTWARE.
+#
+# ###########################################################################*/
+"""Module for tomographic projector on the GPU"""
+
+from __future__ import absolute_import, print_function, with_statement, division
+
+__authors__ = ["A. Mirone, P. Paleo"]
+__license__ = "MIT"
+__date__ = "26/06/2017"
+
+import logging
+import numpy as np
+
+from .common import pyopencl
+from .processing import EventDescription, OpenclProcessing, BufferDescription
+from .backprojection import _sizeof, _idivup
+
+if pyopencl:
+ mf = pyopencl.mem_flags
+ import pyopencl.array as parray
+else:
+ raise ImportError("pyopencl is not installed")
+logger = logging.getLogger(__name__)
+
+
+class Projection(OpenclProcessing):
+ """
+ A class for performing a tomographic projection (Radon Transform) using
+ OpenCL
+ """
+ kernel_files = ["proj.cl", "array_utils.cl"]
+
+ def __init__(self, slice_shape, angles, axis_position=None,
+ detector_width=None, normalize=False, ctx=None,
+ devicetype="all", platformid=None, deviceid=None,
+ profile=False
+ ):
+ """Constructor of the OpenCL projector.
+
+ :param slice_shape: shape of the slice: (num_rows, num_columns).
+ :param angles: Either an integer number of angles, or a list of custom
+ angles values in radian.
+ :param axis_position: Optional, axis position. Default is
+ `(shape[1]-1)/2.0`.
+ :param detector_width: Optional, detector width in pixels.
+ If detector_width > slice_shape[1], the
+ projection data will be surrounded with zeros.
+ Using detector_width < slice_shape[1] might
+ result in a local tomography setup.
+ :param normalize: Optional, normalization. If set, the sinograms are
+ multiplied by the factor pi/(2*nprojs).
+ :param ctx: actual working context, left to None for automatic
+ initialization from device type or platformid/deviceid
+ :param devicetype: type of device, can be "CPU", "GPU", "ACC" or "ALL"
+ :param platformid: integer with the platform_identifier, as given by
+ clinfo
+ :param deviceid: Integer with the device identifier, as given by clinfo
+ :param profile: switch on profiling to be able to profile at the kernel
+ level, store profiling elements (makes code slightly
+ slower)
+ """
+ # OS X enforces a workgroup size of 1 when the kernel has synchronization barriers
+ # if sys.platform.startswith('darwin'): # assuming no discrete GPU
+ # raise NotImplementedError("Backprojection is not implemented on CPU for OS X yet")
+
+ OpenclProcessing.__init__(self, ctx=ctx, devicetype=devicetype,
+ platformid=platformid, deviceid=deviceid,
+ profile=profile)
+ self.shape = slice_shape
+ self.axis_pos = axis_position
+ self.angles = angles
+ self.dwidth = detector_width
+ self.normalize = normalize
+
+ # Default values
+ if self.axis_pos is None:
+ self.axis_pos = (self.shape[1] - 1) / 2.
+ if self.dwidth is None:
+ self.dwidth = self.shape[1]
+ if not(np.iterable(self.angles)):
+ if self.angles is None:
+ self.nprojs = self.shape[0]
+ else:
+ self.nprojs = self.angles
+ self.angles = np.linspace(start=0,
+ stop=np.pi,
+ num=self.nprojs,
+ endpoint=False).astype(dtype=np.float32)
+ else:
+ self.nprojs = len(self.angles)
+ self.offset_x = -np.float32((self.shape[1]-1)/2. - self.axis_pos) # TODO: custom
+ self.offset_y = -np.float32((self.shape[0]-1)/2. - self.axis_pos) # TODO: custom
+ # Reset axis_pos once offset are computed
+ self.axis_pos0 = np.float((self.shape[1]-1)/2.)
+
+ # Workgroup, ndrange and shared size
+ self.dimgrid_x = _idivup(self.dwidth, 16)
+ self.dimgrid_y = _idivup(self.nprojs, 16)
+ self._dimrecx = np.int32(self.dimgrid_x * 16)
+ self._dimrecy = np.int32(self.dimgrid_y * 16)
+ self.local_mem = 16 * 7 * _sizeof(np.float32)
+ self.wg = (16, 16)
+ self.ndrange = (
+ int(self.dimgrid_x) * self.wg[0], # int(): pyopencl <= 2015.1
+ int(self.dimgrid_y) * self.wg[1] # int(): pyopencl <= 2015.1
+ )
+
+ self.is_cpu = False
+ if self.device.type == "CPU":
+ self.is_cpu = True
+
+ # Allocate memory
+ self.buffers = [
+ BufferDescription("_d_sino", self._dimrecx * self._dimrecy, np.float32, mf.READ_WRITE),
+ BufferDescription("d_angles", self._dimrecy, np.float32, mf.READ_ONLY),
+ BufferDescription("d_beginPos", self._dimrecy * 2, np.int32, mf.READ_ONLY),
+ BufferDescription("d_strideJoseph", self._dimrecy * 2, np.int32, mf.READ_ONLY),
+ BufferDescription("d_strideLine", self._dimrecy * 2, np.int32, mf.READ_ONLY),
+ ]
+ self.add_to_cl_mem(
+ {
+ "d_axis_corrections": parray.zeros(self.queue,
+ self.nprojs, np.float32)
+ }
+ )
+ self._tmp_extended_img = np.zeros((self.shape[0]+2, self.shape[1]+2),
+ dtype=np.float32)
+ if self.is_cpu:
+ self.allocate_slice()
+ else:
+ self.allocate_textures()
+ self.allocate_buffers()
+ self._ex_sino = np.zeros((self._dimrecy, self._dimrecx),
+ dtype=np.float32)
+ if self.is_cpu:
+ self.cl_mem["d_slice"].fill(0.)
+ # enqueue_fill_buffer has issues if opencl 1.2 is not present
+ #~ pyopencl.enqueue_fill_buffer(
+ #~ self.queue,
+ #~ self.cl_mem["d_slice"],
+ #~ np.float32(0),
+ #~ 0,
+ #~ self._tmp_extended_img.size * _sizeof(np.float32)
+ #~ )
+ # Precomputations
+ self.compute_angles()
+ self.proj_precomputations()
+ self.cl_mem["d_axis_corrections"].fill(0.)
+ # enqueue_fill_buffer has issues if opencl 1.2 is not present
+ #~ pyopencl.enqueue_fill_buffer(
+ #~ self.queue,
+ #~ self.cl_mem["d_axis_corrections"],
+ #~ np.float32(0),
+ #~ 0,
+ #~ self.nprojs*_sizeof(np.float32)
+ #~ )
+ # Shorthands
+ self._d_sino = self.cl_mem["_d_sino"]
+
+ OpenclProcessing.compile_kernels(self, self.kernel_files)
+ # check that workgroup can actually be (16, 16)
+ self.check_workgroup_size("forward_kernel_cpu")
+
+ def compute_angles(self):
+ angles2 = np.zeros(self._dimrecy, dtype=np.float32) # dimrecy != num_projs
+ angles2[:self.nprojs] = np.copy(self.angles)
+ angles2[self.nprojs:] = angles2[self.nprojs-1]
+ self.angles2 = angles2
+ pyopencl.enqueue_copy(self.queue, self.cl_mem["d_angles"], angles2)
+
+ def allocate_slice(self):
+ self.add_to_cl_mem({"d_slice": parray.zeros(self.queue, (self.shape[1]+2, self.shape[1]+2), np.float32)})
+
+ def allocate_textures(self):
+ self.d_image_tex = pyopencl.Image(
+ self.ctx,
+ mf.READ_ONLY | mf.USE_HOST_PTR,
+ pyopencl.ImageFormat(
+ pyopencl.channel_order.INTENSITY,
+ pyopencl.channel_type.FLOAT
+ ), hostbuf=np.ascontiguousarray(self._tmp_extended_img.T),
+ )
+
+ def transfer_to_texture(self, image):
+ image2 = image
+ if not(image.flags["C_CONTIGUOUS"] and image.dtype == np.float32):
+ image2 = np.ascontiguousarray(image)
+ if self.is_cpu:
+ # TODO: create NoneEvent
+ return self.transfer_to_slice(image2)
+ #~ return pyopencl.enqueue_copy(
+ #~ self.queue,
+ #~ self.cl_mem["d_slice"].data,
+ #~ image2,
+ #~ origin=(1, 1),
+ #~ region=image.shape[::-1]
+ #~ )
+ else:
+ return pyopencl.enqueue_copy(
+ self.queue,
+ self.d_image_tex,
+ image2,
+ origin=(1, 1),
+ region=image.shape[::-1]
+ )
+
+ def transfer_device_to_texture(self, d_image):
+ if self.is_cpu:
+ # TODO this copy should not be necessary
+ return self.cpy2d_to_slice(d_image)
+ else:
+ return pyopencl.enqueue_copy(
+ self.queue,
+ self.d_image_tex,
+ d_image,
+ offset=0,
+ origin=(1, 1),
+ region=(int(self.shape[1]), int(self.shape[0]))#self.shape[::-1] # pyopencl <= 2015.2
+ )
+
+ def transfer_to_slice(self, image):
+ image2 = np.zeros((image.shape[0]+2, image.shape[1]+2), dtype=np.float32)
+ image2[1:-1, 1:-1] = image.astype(np.float32)
+ self.cl_mem["d_slice"].set(image2)
+
+ def proj_precomputations(self):
+ beginPos = np.zeros((2, self._dimrecy), dtype=np.int32)
+ strideJoseph = np.zeros((2, self._dimrecy), dtype=np.int32)
+ strideLine = np.zeros((2, self._dimrecy), dtype=np.int32)
+ cos_angles = np.cos(self.angles2)
+ sin_angles = np.sin(self.angles2)
+ dimslice = self.shape[1]
+
+ M1 = np.abs(cos_angles) > 0.70710678
+ M1b = np.logical_not(M1)
+ M2 = cos_angles > 0
+ M2b = np.logical_not(M2)
+ M3 = sin_angles > 0
+ M3b = np.logical_not(M3)
+ case1 = M1 * M2
+ case2 = M1 * M2b
+ case3 = M1b * M3
+ case4 = M1b * M3b
+
+ beginPos[0][case1] = 0
+ beginPos[1][case1] = 0
+ strideJoseph[0][case1] = 1
+ strideJoseph[1][case1] = 0
+ strideLine[0][case1] = 0
+ strideLine[1][case1] = 1
+
+ beginPos[0][case2] = dimslice-1
+ beginPos[1][case2] = dimslice-1
+ strideJoseph[0][case2] = -1
+ strideJoseph[1][case2] = 0
+ strideLine[0][case2] = 0
+ strideLine[1][case2] = -1
+
+ beginPos[0][case3] = dimslice-1
+ beginPos[1][case3] = 0
+ strideJoseph[0][case3] = 0
+ strideJoseph[1][case3] = 1
+ strideLine[0][case3] = -1
+ strideLine[1][case3] = 0
+
+ beginPos[0][case4] = 0
+ beginPos[1][case4] = dimslice-1
+ strideJoseph[0][case4] = 0
+ strideJoseph[1][case4] = -1
+ strideLine[0][case4] = 1
+ strideLine[1][case4] = 0
+
+ # For debug purpose
+ #~ self.beginPos = beginPos
+ #~ self.strideJoseph = strideJoseph
+ #~ self.strideLine = strideLine
+ #
+
+ pyopencl.enqueue_copy(self.queue, self.cl_mem["d_beginPos"], beginPos)
+ pyopencl.enqueue_copy(self.queue, self.cl_mem["d_strideJoseph"], strideJoseph)
+ pyopencl.enqueue_copy(self.queue, self.cl_mem["d_strideLine"], strideLine)
+
+ def _get_local_mem(self):
+ return pyopencl.LocalMemory(self.local_mem) # constant for all image sizes
+
+ def cpy2d_to_sino(self, dst):
+ ndrange = (int(self.dwidth), int(self.nprojs)) # pyopencl < 2015.2
+ sino_shape_ocl = np.int32(ndrange)
+ wg = None
+ kernel_args = (
+ dst.data,
+ self._d_sino,
+ np.int32(self.dwidth),
+ np.int32(self._dimrecx),
+ np.int32((0, 0)),
+ np.int32((0, 0)),
+ sino_shape_ocl
+ )
+ return self.kernels.cpy2d(self.queue, ndrange, wg, *kernel_args)
+
+ def cpy2d_to_slice(self, src):
+ """
+ copy a Nx * Ny slice to self.d_slice which is (Nx+2)*(Ny+2)
+ """
+ ndrange = (int(self.shape[1]), int(self.shape[0])) #self.shape[::-1] # pyopencl < 2015.2
+ wg = None
+ slice_shape_ocl = np.int32(ndrange)
+ kernel_args = (
+ self.cl_mem["d_slice"].data,
+ src,
+ np.int32(self.shape[1]+2),
+ np.int32(self.shape[1]),
+ np.int32((1, 1)),
+ np.int32((0, 0)),
+ slice_shape_ocl
+ )
+ return self.kernels.cpy2d(self.queue, ndrange, wg, *kernel_args)
+
+ def projection(self, image=None, dst=None):
+ """Perform the projection on an input image
+
+ :param image: Image to project
+ :return: A sinogram
+ """
+ events = []
+ with self.sem:
+ if image is not None:
+ assert image.ndim == 2, "Treat only 2D images"
+ assert image.shape[0] == self.shape[0], "image shape is OK"
+ assert image.shape[1] == self.shape[1], "image shape is OK"
+ if not(self.is_cpu):
+ self.transfer_to_texture(image)
+ slice_ref = self.d_image_tex
+ else:
+ self.transfer_to_slice(image)
+ slice_ref = self.cl_mem["d_slice"].data
+ else:
+ if self.is_cpu:
+ slice_ref = self.cl_mem["d_slice"].data
+ else:
+ slice_ref = self.d_image_tex
+
+ kernel_args = (
+ self._d_sino,
+ slice_ref,
+ np.int32(self.shape[1]),
+ np.int32(self.dwidth),
+ self.cl_mem["d_angles"],
+ np.float32(self.axis_pos0),
+ self.cl_mem["d_axis_corrections"].data, # TODO custom
+ self.cl_mem["d_beginPos"],
+ self.cl_mem["d_strideJoseph"],
+ self.cl_mem["d_strideLine"],
+ np.int32(self.nprojs),
+ self._dimrecx,
+ self._dimrecy,
+ self.offset_x,
+ self.offset_y,
+ np.int32(1), # josephnoclip, 1 by default
+ np.int32(self.normalize)
+ )
+
+ # Call the kernel
+ if self.is_cpu:
+ event_pj = self.kernels.forward_kernel_cpu(
+ self.queue,
+ self.ndrange,
+ self.wg,
+ *kernel_args
+ )
+ else:
+ event_pj = self.kernels.forward_kernel(
+ self.queue,
+ self.ndrange,
+ self.wg,
+ *kernel_args
+ )
+ events.append(EventDescription("projection", event_pj))
+ if dst is None:
+ self._ex_sino[:] = 0
+ ev = pyopencl.enqueue_copy(self.queue, self._ex_sino, self._d_sino)
+ events.append(EventDescription("copy D->H result", ev))
+ ev.wait()
+ res = np.copy(self._ex_sino[:self.nprojs, :self.dwidth])
+ else:
+ ev = self.cpy2d_to_sino(dst)
+ events.append(EventDescription("copy D->D result", ev))
+ ev.wait()
+ res = dst
+ # /with self.sem
+ if self.profile:
+ self.events += events
+ #~ res = self._ex_sino
+ return res
+
+ __call__ = projection
diff --git a/silx/opencl/reconstruction.py b/silx/opencl/reconstruction.py
new file mode 100644
index 0000000..cb68680
--- /dev/null
+++ b/silx/opencl/reconstruction.py
@@ -0,0 +1,381 @@
+#!/usr/bin/env python
+# coding: utf-8
+# /*##########################################################################
+#
+# Copyright (c) 2016 European Synchrotron Radiation Facility
+#
+# Permission is hereby granted, free of charge, to any person obtaining a copy
+# of this software and associated documentation files (the "Software"), to deal
+# in the Software without restriction, including without limitation the rights
+# to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
+# copies of the Software, and to permit persons to whom the Software is
+# furnished to do so, subject to the following conditions:
+#
+# The above copyright notice and this permission notice shall be included in
+# all copies or substantial portions of the Software.
+#
+# THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+# IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+# FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
+# AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+# LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
+# OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN
+# THE SOFTWARE.
+#
+# ###########################################################################*/
+"""Module for tomographic reconstruction algorithms"""
+
+from __future__ import absolute_import, print_function, with_statement, division
+
+__authors__ = ["P. Paleo"]
+__license__ = "MIT"
+__date__ = "19/09/2017"
+
+import logging
+import numpy as np
+
+from .common import pyopencl
+from .processing import OpenclProcessing
+from .backprojection import Backprojection
+from .projection import Projection
+from .linalg import LinAlg
+
+import pyopencl.array as parray
+from pyopencl.elementwise import ElementwiseKernel
+logger = logging.getLogger(__name__)
+
+cl = pyopencl
+
+
+class ReconstructionAlgorithm(OpenclProcessing):
+ """
+ A parent class for all iterative tomographic reconstruction algorithms
+
+ :param sino_shape: shape of the sinogram. The sinogram is in the format
+ (n_b, n_a) where n_b is the number of detector bins and
+ n_a is the number of angles.
+ :param slice_shape: Optional, shape of the reconstructed slice.
+ By default, it is a square slice where the dimension
+ is the "x dimension" of the sinogram (number of bins).
+ :param axis_position: Optional, axis position. Default is `(shape[1]-1)/2.0`.
+ :param angles: Optional, a list of custom angles in radian.
+ :param ctx: actual working context, left to None for automatic
+ initialization from device type or platformid/deviceid
+ :param devicetype: type of device, can be "CPU", "GPU", "ACC" or "ALL"
+ :param platformid: integer with the platform_identifier, as given by clinfo
+ :param deviceid: Integer with the device identifier, as given by clinfo
+ :param profile: switch on profiling to be able to profile at the kernel level,
+ store profiling elements (makes code slightly slower)
+ """
+
+ def __init__(self, sino_shape, slice_shape=None, axis_position=None, angles=None,
+ ctx=None, devicetype="all", platformid=None, deviceid=None,
+ profile=False
+ ):
+ OpenclProcessing.__init__(self, ctx=ctx, devicetype=devicetype,
+ platformid=platformid, deviceid=deviceid,
+ profile=profile)
+
+ # Create a backprojector
+ self.backprojector = Backprojection(
+ sino_shape,
+ slice_shape=slice_shape,
+ axis_position=axis_position,
+ angles=angles,
+ ctx=self.ctx,
+ profile=profile
+ )
+ # Create a projector
+ self.projector = Projection(
+ self.backprojector.slice_shape,
+ self.backprojector.angles,
+ axis_position=axis_position,
+ detector_width=self.backprojector.num_bins,
+ normalize=False,
+ ctx=self.ctx,
+ profile=profile
+ )
+ self.sino_shape = sino_shape
+ self.is_cpu = self.backprojector.is_cpu
+ # Arrays
+ self.d_data = parray.zeros(self.queue, sino_shape, dtype=np.float32)
+ self.d_sino = parray.zeros_like(self.d_data)
+ self.d_x = parray.zeros(self.queue,
+ self.backprojector.slice_shape,
+ dtype=np.float32)
+ self.d_x_old = parray.zeros_like(self.d_x)
+
+ self.add_to_cl_mem({
+ "d_data": self.d_data,
+ "d_sino": self.d_sino,
+ "d_x": self.d_x,
+ "d_x_old": self.d_x_old,
+ })
+
+ def proj(self, d_slice, d_sino):
+ """
+ Project d_slice to d_sino
+ """
+ self.projector.transfer_device_to_texture(d_slice.data) #.wait()
+ self.projector.projection(dst=d_sino)
+
+ def backproj(self, d_sino, d_slice):
+ """
+ Backproject d_sino to d_slice
+ """
+ self.backprojector.transfer_device_to_texture(d_sino.data) #.wait()
+ self.backprojector.backprojection(dst=d_slice)
+
+
+class SIRT(ReconstructionAlgorithm):
+ """
+ A class for the SIRT algorithm
+
+ :param sino_shape: shape of the sinogram. The sinogram is in the format
+ (n_b, n_a) where n_b is the number of detector bins and
+ n_a is the number of angles.
+ :param slice_shape: Optional, shape of the reconstructed slice.
+ By default, it is a square slice where the dimension is
+ the "x dimension" of the sinogram (number of bins).
+ :param axis_position: Optional, axis position. Default is `(shape[1]-1)/2.0`.
+ :param angles: Optional, a list of custom angles in radian.
+ :param ctx: actual working context, left to None for automatic
+ initialization from device type or platformid/deviceid
+ :param devicetype: type of device, can be "CPU", "GPU", "ACC" or "ALL"
+ :param platformid: integer with the platform_identifier, as given by clinfo
+ :param deviceid: Integer with the device identifier, as given by clinfo
+ :param profile: switch on profiling to be able to profile at the kernel level,
+ store profiling elements (makes code slightly slower)
+
+ .. warning:: This is a beta version of the SIRT algorithm. Reconstruction
+ fails for at least on CPU (Xeon E3-1245 v5) using the AMD opencl
+ implementation.
+ """
+
+ def __init__(self, sino_shape, slice_shape=None, axis_position=None, angles=None,
+ ctx=None, devicetype="all", platformid=None, deviceid=None,
+ profile=False
+ ):
+
+ ReconstructionAlgorithm.__init__(self, sino_shape, slice_shape=slice_shape,
+ axis_position=axis_position, angles=angles,
+ ctx=ctx, devicetype=devicetype, platformid=platformid,
+ deviceid=deviceid, profile=profile)
+ self.compute_preconditioners()
+
+ def compute_preconditioners(self):
+ """
+ Create a diagonal preconditioner for the projection and backprojection
+ operator.
+ Each term of the diagonal is the sum of the projector/backprojector
+ along rows [1], i.e the projection/backprojection of an array of ones.
+
+ [1] Jens Gregor and Thomas Benson,
+ Computational Analysis and Improvement of SIRT,
+ IEEE transactions on medical imaging, vol. 27, no. 7, 2008
+ """
+
+ # r_{i,i} = 1/(sum_j a_{i,j})
+ slice_ones = np.ones(self.backprojector.slice_shape, dtype=np.float32)
+ R = 1./self.projector.projection(slice_ones) # could be all done on GPU, but I want extra checks
+ R[np.logical_not(np.isfinite(R))] = 1. # In the case where the rotation axis is excentred
+ self.d_R = parray.to_device(self.queue, R)
+ # c_{j,j} = 1/(sum_i a_{i,j})
+ sino_ones = np.ones(self.sino_shape, dtype=np.float32)
+ C = 1./self.backprojector.backprojection(sino_ones)
+ C[np.logical_not(np.isfinite(C))] = 1. # In the case where the rotation axis is excentred
+ self.d_C = parray.to_device(self.queue, C)
+
+ self.add_to_cl_mem({
+ "d_R": self.d_R,
+ "d_C": self.d_C
+ })
+
+ # TODO: compute and possibly return the residual
+ def run(self, data, n_it):
+ """
+ Run n_it iterations of the SIRT algorithm.
+ """
+ cl.enqueue_copy(self.queue, self.d_data.data, np.ascontiguousarray(data.astype(np.float32)))
+
+ d_x_old = self.d_x_old
+ d_x = self.d_x
+ d_R = self.d_R
+ d_C = self.d_C
+ d_sino = self.d_sino
+ d_x *= 0
+
+ for k in range(n_it):
+ d_x_old[:] = d_x[:]
+ # x{k+1} = x{k} - C A^T R (A x{k} - b)
+ self.proj(d_x, d_sino)
+ d_sino -= self.d_data
+ d_sino *= d_R
+ if self.is_cpu:
+ # This sync is necessary when using CPU, while it is not for GPU
+ d_sino.finish()
+ self.backproj(d_sino, d_x)
+ d_x *= -d_C
+ d_x += d_x_old
+ if self.is_cpu:
+ # This sync is necessary when using CPU, while it is not for GPU
+ d_x.finish()
+
+ return d_x
+
+ __call__ = run
+
+
+class TV(ReconstructionAlgorithm):
+ """
+ A class for reconstruction with Total Variation regularization using the
+ Chambolle-Pock TV reconstruction algorithm.
+
+ :param sino_shape: shape of the sinogram. The sinogram is in the format
+ (n_b, n_a) where n_b is the number of detector bins and
+ n_a is the number of angles.
+ :param slice_shape: Optional, shape of the reconstructed slice. By default,
+ it is a square slice where the dimension is the
+ "x dimension" of the sinogram (number of bins).
+ :param axis_position: Optional, axis position. Default is
+ `(shape[1]-1)/2.0`.
+ :param angles: Optional, a list of custom angles in radian.
+ :param ctx: actual working context, left to None for automatic
+ initialization from device type or platformid/deviceid
+ :param devicetype: type of device, can be "CPU", "GPU", "ACC" or "ALL"
+ :param platformid: integer with the platform_identifier, as given by clinfo
+ :param deviceid: Integer with the device identifier, as given by clinfo
+ :param profile: switch on profiling to be able to profile at the kernel
+ level, store profiling elements (makes code slightly slower)
+
+ .. warning:: This is a beta version of the Chambolle-Pock TV algorithm.
+ Reconstruction fails for at least on CPU (Xeon E3-1245 v5) using
+ the AMD opencl implementation.
+ """
+
+ def __init__(self, sino_shape, slice_shape=None, axis_position=None, angles=None,
+ ctx=None, devicetype="all", platformid=None, deviceid=None,
+ profile=False
+ ):
+ ReconstructionAlgorithm.__init__(self, sino_shape, slice_shape=slice_shape,
+ axis_position=axis_position, angles=angles,
+ ctx=ctx, devicetype=devicetype, platformid=platformid,
+ deviceid=deviceid, profile=profile)
+ self.compute_preconditioners()
+
+ # Create a LinAlg instance
+ self.linalg = LinAlg(self.backprojector.slice_shape, ctx=self.ctx)
+ # Positivity constraint
+ self.elwise_clamp = ElementwiseKernel(self.ctx, "float *a", "a[i] = max(a[i], 0.0f);")
+ # Projection onto the L-infinity ball of radius Lambda
+ self.elwise_proj_linf = ElementwiseKernel(
+ self.ctx,
+ "float2* a, float Lambda",
+ "a[i].x = copysign(min(fabs(a[i].x), Lambda), a[i].x); a[i].y = copysign(min(fabs(a[i].y), Lambda), a[i].y);",
+ "elwise_proj_linf"
+ )
+ # Additional arrays
+ self.linalg.gradient(self.d_x)
+ self.d_p = parray.zeros_like(self.linalg.cl_mem["d_gradient"])
+ self.d_q = parray.zeros_like(self.d_data)
+ self.d_g = self.linalg.d_image
+ self.d_tmp = parray.zeros_like(self.d_x)
+ self.add_to_cl_mem({
+ "d_p": self.d_p,
+ "d_q": self.d_q,
+ "d_tmp": self.d_tmp,
+ })
+
+ self.theta = 1.0
+
+ def compute_preconditioners(self):
+ """
+ Create a diagonal preconditioner for the projection and backprojection
+ operator.
+ Each term of the diagonal is the sum of the projector/backprojector
+ along rows [2],
+ i.e the projection/backprojection of an array of ones.
+
+ [2] T. Pock, A. Chambolle,
+ Diagonal preconditioning for first order primal-dual algorithms in
+ convex optimization,
+ International Conference on Computer Vision, 2011
+ """
+
+ # Compute the diagonal preconditioner "Sigma"
+ slice_ones = np.ones(self.backprojector.slice_shape, dtype=np.float32)
+ Sigma_k = 1./self.projector.projection(slice_ones)
+ Sigma_k[np.logical_not(np.isfinite(Sigma_k))] = 1.
+ self.d_Sigma_k = parray.to_device(self.queue, Sigma_k)
+ self.d_Sigma_kp1 = self.d_Sigma_k + 1 # TODO: memory vs computation
+ self.Sigma_grad = 1/2.0 # For discrete gradient, sum|D_i,j| = 2 along lines or cols
+
+ # Compute the diagonal preconditioner "Tau"
+ sino_ones = np.ones(self.sino_shape, dtype=np.float32)
+ C = self.backprojector.backprojection(sino_ones)
+ Tau = 1./(C + 2.)
+ self.d_Tau = parray.to_device(self.queue, Tau)
+
+ self.add_to_cl_mem({
+ "d_Sigma_k": self.d_Sigma_k,
+ "d_Sigma_kp1": self.d_Sigma_kp1,
+ "d_Tau": self.d_Tau
+ })
+
+ def run(self, data, n_it, Lambda, pos_constraint=False):
+ """
+ Run n_it iterations of the TV-regularized reconstruction,
+ with the regularization parameter Lambda.
+ """
+ cl.enqueue_copy(self.queue, self.d_data.data, np.ascontiguousarray(data.astype(np.float32)))
+
+ d_x = self.d_x
+ d_x_old = self.d_x_old
+ d_tmp = self.d_tmp
+ d_sino = self.d_sino
+ d_p = self.d_p
+ d_q = self.d_q
+ d_g = self.d_g
+
+ d_x *= 0
+ d_p *= 0
+ d_q *= 0
+
+ for k in range(0, n_it):
+ # Update primal variables
+ d_x_old[:] = d_x[:]
+ #~ x = x + Tau*div(p) - Tau*Kadj(q)
+ self.backproj(d_q, d_tmp)
+ self.linalg.divergence(d_p)
+ # TODO: this in less than three ops (one kernel ?)
+ d_g -= d_tmp # d_g -> L.d_image
+ d_g *= self.d_Tau
+ d_x += d_g
+
+ if pos_constraint:
+ self.elwise_clamp(d_x)
+
+ # Update dual variables
+ #~ p = proj_linf(p + Sigma_grad*gradient(x + theta*(x - x_old)), Lambda)
+ d_tmp[:] = d_x[:]
+ # FIXME: mul_add is out of place, put an equivalent thing in linalg...
+ #~ d_tmp.mul_add(1 + theta, d_x_old, -theta)
+ d_tmp *= 1+self.theta
+ d_tmp -= self.theta*d_x_old
+ self.linalg.gradient(d_tmp)
+ # TODO: out of place mul_add
+ #~ d_p.mul_add(1, L.cl_mem["d_gradient"], Sigma_grad)
+ self.linalg.cl_mem["d_gradient"] *= self.Sigma_grad
+ d_p += self.linalg.cl_mem["d_gradient"]
+ self.elwise_proj_linf(d_p, Lambda)
+
+ #~ q = (q + Sigma_k*K(x + theta*(x - x_old)) - Sigma_k*data)/(1.0 + Sigma_k)
+ self.proj(d_tmp, d_sino)
+ # TODO: this in less instructions
+ d_sino -= self.d_data
+ d_sino *= self.d_Sigma_k
+ d_q += d_sino
+ d_q /= self.d_Sigma_kp1
+ return d_x
+
+ __call__ = run
diff --git a/silx/opencl/setup.py b/silx/opencl/setup.py
index d0181cd..a2ae244 100644
--- a/silx/opencl/setup.py
+++ b/silx/opencl/setup.py
@@ -1,6 +1,6 @@
# coding: utf-8
#
-# Copyright (C) 2016 European Synchrotron Radiation Facility
+# Copyright (C) 2016-2017 European Synchrotron Radiation Facility
#
# Permission is hereby granted, free of charge, to any person obtaining a copy
# of this software and associated documentation files (the "Software"), to deal
@@ -27,14 +27,17 @@ __contact__ = "jerome.kieffer@esrf.eu"
__license__ = "MIT"
__copyright__ = "European Synchrotron Radiation Facility, Grenoble, France"
__authors__ = ["J. Kieffer"]
-__date__ = "08/09/2016"
+__date__ = "16/08/2017"
+import os.path
from numpy.distutils.misc_util import Configuration
def configuration(parent_package='', top_path=None):
config = Configuration('opencl', parent_package, top_path)
- config.add_subpackage('sift')
+ path = os.path.dirname(os.path.abspath(__file__))
+ if os.path.exists(os.path.join(path, 'sift')):
+ config.add_subpackage('sift')
config.add_subpackage('test')
return config
diff --git a/silx/opencl/test/__init__.py b/silx/opencl/test/__init__.py
index 24aa06e..f6aadcd 100644
--- a/silx/opencl/test/__init__.py
+++ b/silx/opencl/test/__init__.py
@@ -24,18 +24,31 @@
__authors__ = ["J. Kieffer"]
__license__ = "MIT"
-__date__ = "15/03/2017"
+__date__ = "01/09/2017"
+import os
import unittest
from . import test_addition
from . import test_medfilt
-from ..sift import test as test_sift
-
+from . import test_backprojection
+from . import test_projection
+from . import test_linalg
+from . import test_array_utils
def suite():
test_suite = unittest.TestSuite()
test_suite.addTests(test_addition.suite())
test_suite.addTests(test_medfilt.suite())
- test_suite.addTests(test_sift.suite())
+ test_suite.addTests(test_backprojection.suite())
+ test_suite.addTests(test_projection.suite())
+ test_suite.addTests(test_linalg.suite())
+ test_suite.addTests(test_array_utils.suite())
+
+ # Allow to remove sift from the project
+ test_base_dir = os.path.dirname(__file__)
+ sift_dir = os.path.join(test_base_dir, "..", "sift")
+ if os.path.exists(sift_dir):
+ from ..sift import test as test_sift
+ test_suite.addTests(test_sift.suite())
return test_suite
diff --git a/silx/opencl/test/test_array_utils.py b/silx/opencl/test/test_array_utils.py
new file mode 100644
index 0000000..833d828
--- /dev/null
+++ b/silx/opencl/test/test_array_utils.py
@@ -0,0 +1,161 @@
+#!/usr/bin/env python
+# coding: utf-8
+# /*##########################################################################
+#
+# Copyright (c) 2016 European Synchrotron Radiation Facility
+#
+# Permission is hereby granted, free of charge, to any person obtaining a copy
+# of this software and associated documentation files (the "Software"), to deal
+# in the Software without restriction, including without limitation the rights
+# to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
+# copies of the Software, and to permit persons to whom the Software is
+# furnished to do so, subject to the following conditions:
+#
+# The above copyright notice and this permission notice shall be included in
+# all copies or substantial portions of the Software.
+#
+# THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+# IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+# FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
+# AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+# LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
+# OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN
+# THE SOFTWARE.
+#
+# ###########################################################################*/
+"""Test of the OpenCL array_utils"""
+
+from __future__ import division, print_function
+
+__authors__ = ["Pierre paleo"]
+__license__ = "MIT"
+__copyright__ = "2013-2017 European Synchrotron Radiation Facility, Grenoble, France"
+__date__ = "14/06/2017"
+
+
+import time
+import logging
+import numpy as np
+import unittest
+try:
+ import mako
+except ImportError:
+ mako = None
+from ..common import ocl
+if ocl:
+ import pyopencl as cl
+ import pyopencl.array as parray
+ from .. import linalg
+from ..utils import get_opencl_code
+from silx.test.utils import utilstest
+
+logger = logging.getLogger(__name__)
+try:
+ from scipy.ndimage.filters import laplace
+ _has_scipy = True
+except ImportError:
+ _has_scipy = False
+
+
+
+@unittest.skipUnless(ocl and mako, "PyOpenCl is missing")
+class TestCpy2d(unittest.TestCase):
+
+ def setUp(self):
+ if ocl is None:
+ return
+ self.ctx = ocl.create_context()
+ if logger.getEffectiveLevel() <= logging.INFO:
+ self.PROFILE = True
+ self.queue = cl.CommandQueue(
+ self.ctx,
+ properties=cl.command_queue_properties.PROFILING_ENABLE)
+ else:
+ self.PROFILE = False
+ self.queue = cl.CommandQueue(self.ctx)
+ self.allocate_arrays()
+ self.program = cl.Program(self.ctx, get_opencl_code("array_utils")).build()
+
+ def allocate_arrays(self):
+ """
+ Allocate various types of arrays for the tests
+ """
+ self.prng_state = np.random.get_state()
+ # Generate arrays of random shape
+ self.shape1 = np.random.randint(20, high=512, size=(2,))
+ self.shape2 = np.random.randint(20, high=512, size=(2,))
+ self.array1 = np.random.rand(*self.shape1).astype(np.float32)
+ self.array2 = np.random.rand(*self.shape2).astype(np.float32)
+ self.d_array1 = parray.to_device(self.queue, self.array1)
+ self.d_array2 = parray.to_device(self.queue, self.array2)
+ # Generate random offsets
+ offset1_y = np.random.randint(2, high=min(self.shape1[0], self.shape2[0]) - 10)
+ offset1_x = np.random.randint(2, high=min(self.shape1[1], self.shape2[1]) - 10)
+ offset2_y = np.random.randint(2, high=min(self.shape1[0], self.shape2[0]) - 10)
+ offset2_x = np.random.randint(2, high=min(self.shape1[1], self.shape2[1]) - 10)
+ self.offset1 = (offset1_y, offset1_x)
+ self.offset2 = (offset2_y, offset2_x)
+ # Compute the size of the rectangle to transfer
+ size_y = np.random.randint(2, high=min(self.shape1[0], self.shape2[0]) - max(offset1_y, offset2_y) + 1)
+ size_x = np.random.randint(2, high=min(self.shape1[1], self.shape2[1]) - max(offset1_x, offset2_x) + 1)
+ self.transfer_shape = (size_y, size_x)
+
+ def tearDown(self):
+ self.array1 = None
+ self.array2 = None
+ self.d_array1.data.release()
+ self.d_array2.data.release()
+ self.d_array1 = None
+ self.d_array2 = None
+ self.ctx = None
+ self.queue = None
+
+ def compare(self, result, reference):
+ errmax = np.max(np.abs(result - reference))
+ logger.info("Max error = %e" % (errmax))
+ self.assertTrue(errmax == 0, str("Max error is too high"))#. PRNG state was %s" % str(self.prng_state)))
+
+ @unittest.skipUnless(ocl and mako, "pyopencl is missing")
+ def test_cpy2d(self):
+ """
+ Test rectangular transfer of self.d_array1 to self.d_array2
+ """
+ # Reference
+ o1 = self.offset1
+ o2 = self.offset2
+ T = self.transfer_shape
+ logger.info("""Testing D->D rectangular copy with (N1_y, N1_x) = %s,
+ (N2_y, N2_x) = %s:
+ array2[%d:%d, %d:%d] = array1[%d:%d, %d:%d]""" %
+ (
+ str(self.shape1), str(self.shape2),
+ o2[0], o2[0] + T[0],
+ o2[1], o2[1] + T[1],
+ o1[0], o1[0] + T[0],
+ o1[1], o1[1] + T[1]
+ )
+ )
+ self.array2[o2[0]:o2[0] + T[0], o2[1]:o2[1] + T[1]] = self.array1[o1[0]:o1[0] + T[0], o1[1]:o1[1] + T[1]]
+ kernel_args = (
+ self.d_array2.data,
+ self.d_array1.data,
+ np.int32(self.shape2[1]),
+ np.int32(self.shape1[1]),
+ np.int32(self.offset2[::-1]),
+ np.int32(self.offset1[::-1]),
+ np.int32(self.transfer_shape[::-1])
+ )
+ wg = None
+ ndrange = self.transfer_shape[::-1]
+ self.program.cpy2d(self.queue, ndrange, wg, *kernel_args)
+ res = self.d_array2.get()
+ self.compare(res, self.array2)
+
+
+def suite():
+ testSuite = unittest.TestSuite()
+ testSuite.addTest(TestCpy2d("test_cpy2d"))
+ return testSuite
+
+if __name__ == '__main__':
+ unittest.main(defaultTest="suite")
diff --git a/silx/opencl/test/test_backprojection.py b/silx/opencl/test/test_backprojection.py
new file mode 100644
index 0000000..342bd2f
--- /dev/null
+++ b/silx/opencl/test/test_backprojection.py
@@ -0,0 +1,165 @@
+#!/usr/bin/env python
+# coding: utf-8
+# /*##########################################################################
+#
+# Copyright (c) 2016 European Synchrotron Radiation Facility
+#
+# Permission is hereby granted, free of charge, to any person obtaining a copy
+# of this software and associated documentation files (the "Software"), to deal
+# in the Software without restriction, including without limitation the rights
+# to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
+# copies of the Software, and to permit persons to whom the Software is
+# furnished to do so, subject to the following conditions:
+#
+# The above copyright notice and this permission notice shall be included in
+# all copies or substantial portions of the Software.
+#
+# THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+# IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+# FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
+# AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+# LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
+# OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN
+# THE SOFTWARE.
+#
+# ###########################################################################*/
+"""Test of the filtered backprojection module"""
+
+from __future__ import division, print_function
+
+__authors__ = ["Pierre paleo"]
+__license__ = "MIT"
+__copyright__ = "2013-2017 European Synchrotron Radiation Facility, Grenoble, France"
+__date__ = "05/10/2017"
+
+
+import time
+import logging
+import numpy
+import unittest
+try:
+ import mako
+except ImportError:
+ mako = None
+from ..common import ocl
+if ocl:
+ from .. import backprojection
+from silx.test.utils import utilstest
+
+logger = logging.getLogger(__name__)
+
+
+def generate_coords(img_shp, center=None):
+ """
+ Return two 2D arrays containing the indexes of an image.
+ The zero is at the center of the image.
+ """
+ l_r, l_c = float(img_shp[0]), float(img_shp[1])
+ R, C = numpy.mgrid[:l_r, :l_c]
+ if center is None:
+ center0, center1 = l_r / 2., l_c / 2.
+ else:
+ center0, center1 = center
+ R = R + 0.5 - center0
+ C = C + 0.5 - center1
+ return R, C
+
+
+def clip_circle(img, center=None, radius=None):
+ """
+ Puts zeros outside the inscribed circle of the image support.
+ """
+ R, C = generate_coords(img.shape, center)
+ M = R * R + C * C
+ res = numpy.zeros_like(img)
+ if radius is None:
+ radius = img.shape[0] / 2. - 1
+ mask = M < radius * radius
+ res[mask] = img[mask]
+ return res
+
+
+@unittest.skipUnless(ocl and mako, "PyOpenCl is missing")
+class TestFBP(unittest.TestCase):
+
+ def setUp(self):
+ if ocl is None:
+ return
+ # ~ if sys.platform.startswith('darwin'):
+ # ~ self.skipTest("Backprojection is not implemented on CPU for OS X yet")
+ self.getfiles()
+ self.fbp = backprojection.Backprojection(self.sino.shape, profile=True)
+ if self.fbp.compiletime_workgroup_size < 16:
+ self.skipTest("Current implementation of OpenCL backprojection is not supported on this platform yet")
+
+ def tearDown(self):
+ self.sino = None
+# self.fbp.log_profile()
+ self.fbp = None
+
+ def getfiles(self):
+ # load sinogram of 512x512 MRI phantom
+ self.sino = numpy.load(utilstest.getfile("sino500.npz"))["data"]
+ # load reconstruction made with ASTRA FBP (with filter designed in spatial domain)
+ self.reference_rec = numpy.load(utilstest.getfile("rec_astra_500.npz"))["data"]
+
+ def measure(self):
+ "Common measurement of timings"
+ t1 = time.time()
+ try:
+ result = self.fbp.filtered_backprojection(self.sino)
+ except RuntimeError as msg:
+ logger.error(msg)
+ return
+ t2 = time.time()
+ return t2 - t1, result
+
+ def compare(self, res):
+ """
+ Compare a result with the reference reconstruction.
+ Only the valid reconstruction zone (inscribed circle) is taken into
+ account
+ """
+ res_clipped = clip_circle(res)
+ ref_clipped = clip_circle(self.reference_rec)
+ delta = abs(res_clipped - ref_clipped)
+ bad = delta > 1
+# numpy.save("/tmp/bad.npy", bad.astype(int))
+ logger.debug("Absolute difference: %s with %s outlier pixels out of %s", delta.max(), bad.sum(), numpy.prod(bad.shape))
+ return delta.max()
+
+ @unittest.skipUnless(ocl and mako, "pyopencl is missing")
+ def test_fbp(self):
+ """
+ tests FBP
+ """
+ # Test single reconstruction
+ # --------------------------
+ t, res = self.measure()
+ if t is None:
+ logger.info("test_fp: skipped")
+ else:
+ logger.info("test_backproj: time = %.3fs" % t)
+ err = self.compare(res)
+ msg = str("Max error = %e" % err)
+ logger.info(msg)
+ # TODO: cannot do better than 1e0 ?
+ # The plain backprojection was much better, so it must be an issue in the filtering process
+ self.assertTrue(err < 1., "Max error is too high")
+ # Test multiple reconstructions
+ # -----------------------------
+ res0 = numpy.copy(res)
+ for i in range(10):
+ res = self.fbp.filtered_backprojection(self.sino)
+ errmax = numpy.max(numpy.abs(res - res0))
+ self.assertTrue(errmax < 1.e-6, "Max error is too high")
+
+
+def suite():
+ testSuite = unittest.TestSuite()
+ testSuite.addTest(TestFBP("test_fbp"))
+ return testSuite
+
+
+if __name__ == '__main__':
+ unittest.main(defaultTest="suite")
diff --git a/silx/opencl/test/test_linalg.py b/silx/opencl/test/test_linalg.py
new file mode 100644
index 0000000..7d03983
--- /dev/null
+++ b/silx/opencl/test/test_linalg.py
@@ -0,0 +1,215 @@
+#!/usr/bin/env python
+# coding: utf-8
+# /*##########################################################################
+#
+# Copyright (c) 2016 European Synchrotron Radiation Facility
+#
+# Permission is hereby granted, free of charge, to any person obtaining a copy
+# of this software and associated documentation files (the "Software"), to deal
+# in the Software without restriction, including without limitation the rights
+# to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
+# copies of the Software, and to permit persons to whom the Software is
+# furnished to do so, subject to the following conditions:
+#
+# The above copyright notice and this permission notice shall be included in
+# all copies or substantial portions of the Software.
+#
+# THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+# IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+# FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
+# AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+# LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
+# OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN
+# THE SOFTWARE.
+#
+# ###########################################################################*/
+"""Test of the linalg module"""
+
+from __future__ import division, print_function
+
+__authors__ = ["Pierre paleo"]
+__license__ = "MIT"
+__copyright__ = "2013-2017 European Synchrotron Radiation Facility, Grenoble, France"
+__date__ = "14/06/2017"
+
+
+import time
+import logging
+import numpy as np
+import unittest
+try:
+ import mako
+except ImportError:
+ mako = None
+from ..common import ocl
+if ocl:
+ import pyopencl as cl
+ import pyopencl.array as parray
+ from .. import linalg
+from silx.test.utils import utilstest
+
+logger = logging.getLogger(__name__)
+try:
+ from scipy.ndimage.filters import laplace
+ _has_scipy = True
+except ImportError:
+ _has_scipy = False
+
+
+# TODO move this function in math or image ?
+def gradient(img):
+ '''
+ Compute the gradient of an image as a numpy array
+ Code from https://github.com/emmanuelle/tomo-tv/
+ '''
+ shape = [img.ndim, ] + list(img.shape)
+ gradient = np.zeros(shape, dtype=img.dtype)
+ slice_all = [0, slice(None, -1),]
+ for d in range(img.ndim):
+ gradient[slice_all] = np.diff(img, axis=d)
+ slice_all[0] = d + 1
+ slice_all.insert(1, slice(None))
+ return gradient
+
+
+# TODO move this function in math or image ?
+def divergence(grad):
+ '''
+ Compute the divergence of a gradient
+ Code from https://github.com/emmanuelle/tomo-tv/
+ '''
+ res = np.zeros(grad.shape[1:])
+ for d in range(grad.shape[0]):
+ this_grad = np.rollaxis(grad[d], d)
+ this_res = np.rollaxis(res, d)
+ this_res[:-1] += this_grad[:-1]
+ this_res[1:-1] -= this_grad[:-2]
+ this_res[-1] -= this_grad[-2]
+ return res
+
+
+@unittest.skipUnless(ocl and mako, "PyOpenCl is missing")
+class TestLinAlg(unittest.TestCase):
+
+ def setUp(self):
+ if ocl is None:
+ return
+ self.getfiles()
+ self.la = linalg.LinAlg(self.image.shape)
+ self.allocate_arrays()
+
+ def allocate_arrays(self):
+ """
+ Allocate various types of arrays for the tests
+ """
+ # numpy images
+ self.grad = np.zeros(self.image.shape, dtype=np.complex64)
+ self.grad2 = np.zeros((2,) + self.image.shape, dtype=np.float32)
+ self.grad_ref = gradient(self.image)
+ self.div_ref = divergence(self.grad_ref)
+ self.image2 = np.zeros_like(self.image)
+ # Device images
+ self.gradient_parray = parray.zeros(self.la.queue, self.image.shape, np.complex64)
+ # we should be using cl.Buffer(self.la.ctx, cl.mem_flags.READ_WRITE, size=self.image.nbytes*2),
+ # but platforms not suporting openCL 1.2 have a problem with enqueue_fill_buffer,
+ # so we use the parray "fill" utility
+ self.gradient_buffer = self.gradient_parray.data
+ # Do the same for image
+ self.image_parray = parray.to_device(self.la.queue, self.image)
+ self.image_buffer = self.image_parray.data
+ # Refs
+ tmp = np.zeros(self.image.shape, dtype=np.complex64)
+ tmp.real = np.copy(self.grad_ref[0])
+ tmp.imag = np.copy(self.grad_ref[1])
+ self.grad_ref_parray = parray.to_device(self.la.queue, tmp)
+ self.grad_ref_buffer = self.grad_ref_parray.data
+
+ def tearDown(self):
+ self.image = None
+ self.image2 = None
+ self.grad = None
+ self.grad2 = None
+ self.grad_ref = None
+ self.div_ref = None
+ self.gradient_parray.data.release()
+ self.gradient_parray = None
+ self.gradient_buffer = None
+ self.image_parray.data.release()
+ self.image_parray = None
+ self.image_buffer = None
+ self.grad_ref_parray.data.release()
+ self.grad_ref_parray = None
+ self.grad_ref_buffer = None
+
+ def getfiles(self):
+ # load 512x512 MRI phantom - TODO include Lena or ascent once a .npz is available
+ self.image = np.load(utilstest.getfile("Brain512.npz"))["data"]
+
+ def compare(self, result, reference, abstol, name):
+ errmax = np.max(np.abs(result - reference))
+ logger.info("%s: Max error = %e" % (name, errmax))
+ self.assertTrue(errmax < abstol, str("%s: Max error is too high" % name))
+
+ @unittest.skipUnless(ocl and mako, "pyopencl is missing")
+ def test_gradient(self):
+ arrays = {
+ "numpy.ndarray": self.image,
+ "buffer": self.image_buffer,
+ "parray": self.image_parray
+ }
+ for desc, image in arrays.items():
+ # Test with dst on host (numpy.ndarray)
+ res = self.la.gradient(image, return_to_host=True)
+ self.compare(res, self.grad_ref, 1e-6, str("gradient[src=%s, dst=numpy.ndarray]" % desc))
+ # Test with dst on device (pyopencl.Buffer)
+ self.la.gradient(image, dst=self.gradient_buffer)
+ cl.enqueue_copy(self.la.queue, self.grad, self.gradient_buffer)
+ self.grad2[0] = self.grad.real
+ self.grad2[1] = self.grad.imag
+ self.compare(self.grad2, self.grad_ref, 1e-6, str("gradient[src=%s, dst=buffer]" % desc))
+ # Test with dst on device (pyopencl.Array)
+ self.la.gradient(image, dst=self.gradient_parray)
+ self.grad = self.gradient_parray.get()
+ self.grad2[0] = self.grad.real
+ self.grad2[1] = self.grad.imag
+ self.compare(self.grad2, self.grad_ref, 1e-6, str("gradient[src=%s, dst=parray]" % desc))
+
+ @unittest.skipUnless(ocl and mako, "pyopencl is missing")
+ def test_divergence(self):
+ arrays = {
+ "numpy.ndarray": self.grad_ref,
+ "buffer": self.grad_ref_buffer,
+ "parray": self.grad_ref_parray
+ }
+ for desc, grad in arrays.items():
+ # Test with dst on host (numpy.ndarray)
+ res = self.la.divergence(grad, return_to_host=True)
+ self.compare(res, self.div_ref, 1e-6, str("divergence[src=%s, dst=numpy.ndarray]" % desc))
+ # Test with dst on device (pyopencl.Buffer)
+ self.la.divergence(grad, dst=self.image_buffer)
+ cl.enqueue_copy(self.la.queue, self.image2, self.image_buffer)
+ self.compare(self.image2, self.div_ref, 1e-6, str("divergence[src=%s, dst=buffer]" % desc))
+ # Test with dst on device (pyopencl.Array)
+ self.la.divergence(grad, dst=self.image_parray)
+ self.image2 = self.image_parray.get()
+ self.compare(self.image2, self.div_ref, 1e-6, str("divergence[src=%s, dst=parray]" % desc))
+
+ @unittest.skipUnless(ocl and mako and _has_scipy, "pyopencl and/or scipy is missing")
+ def test_laplacian(self):
+ laplacian_ref = laplace(self.image)
+ # Laplacian = div(grad)
+ self.la.gradient(self.image)
+ laplacian_ocl = self.la.divergence(self.la.d_gradient, return_to_host=True)
+ self.compare(laplacian_ocl, laplacian_ref, 1e-6, "laplacian")
+
+
+def suite():
+ testSuite = unittest.TestSuite()
+ testSuite.addTest(TestLinAlg("test_gradient"))
+ testSuite.addTest(TestLinAlg("test_divergence"))
+ testSuite.addTest(TestLinAlg("test_laplacian"))
+ return testSuite
+
+
+if __name__ == '__main__':
+ unittest.main(defaultTest="suite")
diff --git a/silx/opencl/test/test_projection.py b/silx/opencl/test/test_projection.py
new file mode 100644
index 0000000..c9a3a1c
--- /dev/null
+++ b/silx/opencl/test/test_projection.py
@@ -0,0 +1,139 @@
+#!/usr/bin/env python
+# coding: utf-8
+# /*##########################################################################
+#
+# Copyright (c) 2016 European Synchrotron Radiation Facility
+#
+# Permission is hereby granted, free of charge, to any person obtaining a copy
+# of this software and associated documentation files (the "Software"), to deal
+# in the Software without restriction, including without limitation the rights
+# to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
+# copies of the Software, and to permit persons to whom the Software is
+# furnished to do so, subject to the following conditions:
+#
+# The above copyright notice and this permission notice shall be included in
+# all copies or substantial portions of the Software.
+#
+# THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+# IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+# FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
+# AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+# LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
+# OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN
+# THE SOFTWARE.
+#
+# ###########################################################################*/
+"""Test of the forward projection module"""
+
+from __future__ import division, print_function
+
+__authors__ = ["Pierre paleo"]
+__license__ = "MIT"
+__copyright__ = "2013-2017 European Synchrotron Radiation Facility, Grenoble, France"
+__date__ = "14/06/2017"
+
+
+import time
+import logging
+import numpy as np
+import unittest
+try:
+ import mako
+except ImportError:
+ mako = None
+from ..common import ocl
+if ocl:
+ from .. import projection
+from silx.test.utils import utilstest
+
+logger = logging.getLogger(__name__)
+
+
+
+@unittest.skipUnless(ocl and mako, "PyOpenCl is missing")
+class TestProj(unittest.TestCase):
+
+ def setUp(self):
+ if ocl is None:
+ return
+ #~ if sys.platform.startswith('darwin'):
+ #~ self.skipTest("Projection is not implemented on CPU for OS X yet")
+ self.getfiles()
+ n_angles = self.sino.shape[0]
+ self.proj = projection.Projection(self.phantom.shape, n_angles)
+ if self.proj.compiletime_workgroup_size < 16:
+ self.skipTest("Current implementation of OpenCL projection is not supported on this platform yet")
+
+
+ def tearDown(self):
+ self.phantom = None
+ self.sino = None
+ self.proj = None
+
+
+ def getfiles(self):
+ # load 512x512 MRI phantom
+ self.phantom = np.load(utilstest.getfile("Brain512.npz"))["data"]
+ # load sinogram computed with PyHST
+ self.sino = np.load(utilstest.getfile("sino500_pyhst.npz"))["data"]
+
+
+ def measure(self):
+ "Common measurement of timings"
+ t1 = time.time()
+ try:
+ result = self.proj.projection(self.phantom)
+ except RuntimeError as msg:
+ logger.error(msg)
+ return
+ t2 = time.time()
+ return t2 - t1, result
+
+
+ def compare(self, res):
+ """
+ Compare a result with the reference reconstruction.
+ Only the valid reconstruction zone (inscribed circle) is taken into account
+ """
+ # Compare with the original phantom.
+ # TODO: compare a standard projection
+ ref = self.sino
+ return np.max(np.abs(res - ref))
+
+
+ @unittest.skipUnless(ocl and mako, "pyopencl is missing")
+ def test_proj(self):
+ """
+ tests Projection
+ """
+ # Test single reconstruction
+ # --------------------------
+ t, res = self.measure()
+ if t is None:
+ logger.info("test_proj: skipped")
+ else:
+ logger.info("test_proj: time = %.3fs" % t)
+ err = self.compare(res)
+ msg = str("Max error = %e" % err)
+ logger.info(msg)
+ # Interpolation differs at some lines, giving relative error of 10/50000
+ self.assertTrue(err < 20., "Max error is too high")
+ # Test multiple reconstructions
+ # -----------------------------
+ res0 = np.copy(res)
+ for i in range(10):
+ res = self.proj.projection(self.phantom)
+ errmax = np.max(np.abs(res - res0))
+ self.assertTrue(errmax < 1.e-6, "Max error is too high")
+
+
+
+def suite():
+ testSuite = unittest.TestSuite()
+ testSuite.addTest(TestProj("test_proj"))
+ return testSuite
+
+
+
+if __name__ == '__main__':
+ unittest.main(defaultTest="suite")
diff --git a/silx/opencl/utils.py b/silx/opencl/utils.py
index 58becb0..f558c1b 100644
--- a/silx/opencl/utils.py
+++ b/silx/opencl/utils.py
@@ -1,30 +1,30 @@
-#!/usr/bin/env python
# -*- coding: utf-8 -*-
+# /*##########################################################################
+# Copyright (C) 2017 European Synchrotron Radiation Facility
#
-# Project: Sift implementation in Python + OpenCL
-# https://github.com/silx-kit/silx
+# Permission is hereby granted, free of charge, to any person obtaining a copy
+# of this software and associated documentation files (the "Software"), to deal
+# in the Software without restriction, including without limitation the rights
+# to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
+# copies of the Software, and to permit persons to whom the Software is
+# furnished to do so, subject to the following conditions:
#
-# Permission is hereby granted, free of charge, to any person
-# obtaining a copy of this software and associated documentation
-# files (the "Software"), to deal in the Software without
-# restriction, including without limitation the rights to use,
-# copy, modify, merge, publish, distribute, sublicense, and/or sell
-# copies of the Software, and to permit persons to whom the
-# Software is furnished to do so, subject to the following
-# conditions:
+# The above copyright notice and this permission notice shall be included in
+# all copies or substantial portions of the Software.
#
-# The above copyright notice and this permission notice shall be
-# included in all copies or substantial portions of the Software.
+# THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+# IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+# FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
+# AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+# LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
+# OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN
+# THE SOFTWARE.
#
-# THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
-# EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES
-# OF MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
-# NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT
-# HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY,
-# WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
-# FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR
-# OTHER DEALINGS IN THE SOFTWARE.
-
+# ############################################################################*/
+"""
+Project: Sift implementation in Python + OpenCL
+ https://github.com/silx-kit/silx
+"""
from __future__ import division
@@ -32,12 +32,12 @@ __authors__ = ["Jérôme Kieffer", "Pierre Paleo"]
__contact__ = "jerome.kieffer@esrf.eu"
__license__ = "MIT"
__copyright__ = "European Synchrotron Radiation Facility, Grenoble, France"
-__date__ = "15/03/2017"
-__status__ = "Production"
+__date__ = "06/09/2017"
+__status__ = "Production"
import os
import numpy
-from ..resources import resource_filename
+from .. import resources
from math import log, ceil
@@ -77,14 +77,22 @@ def sizeof(shape, dtype="uint8"):
return cnt * itemsize
-def get_cl_file(filename):
- """get the full path of a openCL file
+def get_cl_file(resource):
+ """get the full path of a openCL resource file
+
+ The resource name can be prefixed by the name of a resource directory. For
+ example "silx:foo.png" identify the resource "foo.png" from the resource
+ directory "silx".
+ See also :func:`silx.resources.register_resource_directory`.
+ :param str resource: Resource name. File name contained if the `opencl`
+ directory of the resources.
:return: the full path of the openCL source file
"""
- if not filename.endswith(".cl"):
- filename += ".cl"
- return resource_filename(os.path.join("opencl", filename))
+ if not resource.endswith(".cl"):
+ resource += ".cl"
+ return resources._resource_filename(resource,
+ default_directory="opencl")
def read_cl_file(filename):
@@ -97,6 +105,7 @@ def read_cl_file(filename):
lines = [i for i in f.readlines() if not i.startswith("#include ")]
return "".join(lines)
+
get_opencl_code = read_cl_file
@@ -109,4 +118,4 @@ def concatenate_cl_kernel(filenames):
this method concatenates all the kernel from the list
"""
- return os.linesep.join(read_cl_file(fn) for fn in filenames)
+ return os.linesep.join(read_cl_file(fn) for fn in filenames)