diff options
Diffstat (limited to 'src/silx/opencl/image.py')
-rw-r--r-- | src/silx/opencl/image.py | 387 |
1 files changed, 387 insertions, 0 deletions
diff --git a/src/silx/opencl/image.py b/src/silx/opencl/image.py new file mode 100644 index 0000000..65e2d5e --- /dev/null +++ b/src/silx/opencl/image.py @@ -0,0 +1,387 @@ +# -*- coding: utf-8 -*- +# +# Project: silx +# https://github.com/silx-kit/silx +# +# Copyright (C) 2012-2017 European Synchrotron Radiation Facility, Grenoble, France +# +# Principal author: Jérôme Kieffer (Jerome.Kieffer@ESRF.eu) +# +# Permission is hereby granted, free of charge, to any person obtaining a copy +# of this software and associated documentation files (the "Software"), to deal +# in the Software without restriction, including without limitation the rights +# to use, copy, modify, merge, publish, distribute, sublicense, and/or sell +# copies of the Software, and to permit persons to whom the Software is +# furnished to do so, subject to the following conditions: +# . +# The above copyright notice and this permission notice shall be included in +# all copies or substantial portions of the Software. +# . +# THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR +# IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, +# FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE +# AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER +# LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, +# OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN +# THE SOFTWARE. + +"""A general purpose library for manipulating 2D images in 1 or 3 colors + +""" +from __future__ import absolute_import, print_function, with_statement, division + + +__author__ = "Jerome Kieffer" +__license__ = "MIT" +__date__ = "12/02/2018" +__copyright__ = "2012-2017, ESRF, Grenoble" +__contact__ = "jerome.kieffer@esrf.fr" + +import os +import logging +import numpy +from collections import OrderedDict +from math import floor, ceil, sqrt, log + +from .common import pyopencl, kernel_workgroup_size +from .processing import EventDescription, OpenclProcessing, BufferDescription + +if pyopencl: + mf = pyopencl.mem_flags +logger = logging.getLogger(__name__) + + +class ImageProcessing(OpenclProcessing): + + kernel_files = ["cast", "map", "max_min", "histogram"] + + converter = {numpy.dtype(numpy.uint8): "u8_to_float", + numpy.dtype(numpy.int8): "s8_to_float", + numpy.dtype(numpy.uint16): "u16_to_float", + numpy.dtype(numpy.int16): "s16_to_float", + numpy.dtype(numpy.uint32): "u32_to_float", + numpy.dtype(numpy.int32): "s32_to_float", + } + + def __init__(self, shape=None, ncolors=1, template=None, + ctx=None, devicetype="all", platformid=None, deviceid=None, + block_size=None, memory=None, profile=False): + """Constructor of the ImageProcessing class + + :param ctx: actual working context, left to None for automatic + initialization from device type or platformid/deviceid + :param devicetype: type of device, can be "CPU", "GPU", "ACC" or "ALL" + :param platformid: integer with the platform_identifier, as given by clinfo + :param deviceid: Integer with the device identifier, as given by clinfo + :param block_size: preferred workgroup size, may vary depending on the + out come of the compilation + :param memory: minimum memory available on device + :param profile: switch on profiling to be able to profile at the kernel + level, store profiling elements (makes code slightly slower) + """ + OpenclProcessing.__init__(self, ctx=ctx, devicetype=devicetype, + platformid=platformid, deviceid=deviceid, + block_size=block_size, memory=memory, profile=profile) + if template is not None: + shape = template.shape + if len(shape) > 2: + self.ncolors = shape[2] + self.shape = shape[:2] + else: + self.ncolors = 1 + self.shape = shape + else: + self.ncolors = ncolors + self.shape = shape + assert shape is not None + self.buffer_shape = self.shape if self.ncolors == 1 else self.shape + (self.ncolors,) + kernel_files = [os.path.join("image", i) for i in self.kernel_files] + self.compile_kernels(kernel_files, + compile_options="-DNB_COLOR=%i" % self.ncolors) + if self.ncolors == 1: + img_shape = self.shape + else: + img_shape = self.shape + (self.ncolors,) + + buffers = [BufferDescription("image0_d", img_shape, numpy.float32, None), + BufferDescription("image1_d", img_shape, numpy.float32, None), + BufferDescription("image2_d", img_shape, numpy.float32, None), + BufferDescription("max_min_d", 2, numpy.float32, None), + BufferDescription("cnt_d", 1, numpy.int32, None), ] + # Temporary buffer for max-min reduction + self.wg_red = kernel_workgroup_size(self.program, self.kernels.max_min_reduction_stage1) + if self.wg_red > 1: + self.wg_red = min(self.wg_red, + numpy.int32(1 << int(floor(log(sqrt(numpy.prod(self.shape)), 2))))) + tmp = BufferDescription("tmp_max_min_d", 2 * self.wg_red, numpy.float32, None) + buffers.append(tmp) + self.allocate_buffers(buffers, use_array=True) + self.cl_mem["cnt_d"].fill(0) + + def __repr__(self): + return "ImageProcessing for shape=%s, %i colors initalized on %s" % \ + (self.shape, self.ncolors, self.ctx.devices[0].name) + + def _get_in_out_buffers(self, img=None, copy=True, out=None, + out_dtype=None, out_size=None): + """Internal method used to select the proper buffers before processing. + + :param img: expects a numpy array or a pyopencl.array of dim 2 or 3 + :param copy: set to False to directly re-use a pyopencl array + :param out: provide an output buffer to store the result + :param out_dtype: enforce the type of the output buffer (optional) + :param out_size: enforce the size of the output buffer (optional) + :return: input_buffer, output_buffer + + Nota: this is not locked. + """ + events = [] + if out is not None and isinstance(out, pyopencl.array.Array): + if (out_size or out_dtype) is not None: + if out_size is not None: + assert out.size > out_size + if out_dtype is not None: + assert out_dtype == out.dtype + else: # assume it is same size and type as weoking buffer + assert out.shape == self.buffer_shape + assert out.dtype == numpy.float32 + out.finish() + output_array = out + else: + if out_dtype != numpy.float32 and out_size: + name = "%s_%s_d" % (numpy.dtype(out_dtype), out_size) + if name not in self.cl_mem: + output_array = self.cl_mem[name] = pyopencl.array.empty(self.queue, (out_size,), out_dtype) + else: + output_array = self.cl_mem[name] + else: + output_array = self.cl_mem["image2_d"] + + if img is None: + input_array = self.cl_mem["image1_d"] + if isinstance(img, pyopencl.array.Array): + if copy: + evt = pyopencl.enqueue_copy(self.queue, self.cl_mem["image1_d"].data, img.data) + input_array = self.cl_mem["image1_d"] + events.append(EventDescription("copy D->D", evt)) + else: + img.finish() + input_array = img + evt = None + else: + # assume this is numpy + if img.dtype.itemsize > 4: + logger.warning("Casting to float32 on CPU") + evt = pyopencl.enqueue_copy(self.queue, self.cl_mem["image1_d"].data, numpy.ascontiguousarray(img, numpy.float32)) + input_array = self.cl_mem["image1_d"] + events.append(EventDescription("cast+copy H->D", evt)) + else: + evt = pyopencl.enqueue_copy(self.queue, self.cl_mem["image1_d"].data, numpy.ascontiguousarray(img)) + input_array = self.cl_mem["image1_d"] + events.append(EventDescription("copy H->D", evt)) + if self.profile: + self.events += events + return input_array, output_array + + def to_float(self, img, copy=True, out=None): + """ Takes any array and convert it to a float array for ease of processing. + + :param img: expects a numpy array or a pyopencl.array of dim 2 or 3 + :param copy: set to False to directly re-use a pyopencl array + :param out: provide an output buffer to store the result + """ + assert img.shape == self.buffer_shape + + events = [] + with self.sem: + input_array, output_array = self._get_in_out_buffers(img, copy, out) + if (img.dtype.itemsize > 4) or (img.dtype == numpy.float32): + # copy device -> device, already there as float32 + ev = pyopencl.enqueue_copy(self.queue, output_array.data, input_array.data) + events.append(EventDescription("copy D->D", ev)) + else: + # Cast to float: + name = self.converter[img.dtype] + kernel = self.kernels.get_kernel(name) + ev = kernel(self.queue, (self.shape[1], self.shape[0]), None, + input_array.data, output_array.data, + numpy.int32(self.shape[1]), numpy.int32(self.shape[0]) + ) + events.append(EventDescription("cast %s" % name, ev)) + + if self.profile: + self.events += events + if out is None: + res = output_array.get() + return res + else: + output_array.finish() + return output_array + + def normalize(self, img, mini=0.0, maxi=1.0, copy=True, out=None): + """Scale the intensity of the image so that the minimum is 0 and the + maximum is 1.0 (or any value suggested). + + :param img: numpy array or pyopencl array of dim 2 or 3 and of type float + :param mini: Expected minimum value + :param maxi: expected maxiumum value + :param copy: set to False to use directly the input buffer + :param out: provides an output buffer. prevents a copy D->H + + This uses a min/max reduction in two stages plus a map operation + """ + assert img.shape == self.buffer_shape + events = [] + with self.sem: + input_array, output_array = self._get_in_out_buffers(img, copy, out) + size = numpy.int32(numpy.prod(self.shape)) + if self.wg_red == 1: + # Probably on MacOS CPU WG==1 --> serial code. + kernel = self.kernels.get_kernel("max_min_serial") + evt = kernel(self.queue, (1,), (1,), + input_array.data, + size, + self.cl_mem["max_min_d"].data) + ed = EventDescription("max_min_serial", evt) + events.append(ed) + else: + stage1 = self.kernels.max_min_reduction_stage1 + stage2 = self.kernels.max_min_reduction_stage2 + local_mem = pyopencl.LocalMemory(int(self.wg_red * 8)) + k1 = stage1(self.queue, (int(self.wg_red ** 2),), (int(self.wg_red),), + input_array.data, + self.cl_mem["tmp_max_min_d"].data, + size, + local_mem) + k2 = stage2(self.queue, (int(self.wg_red),), (int(self.wg_red),), + self.cl_mem["tmp_max_min_d"].data, + self.cl_mem["max_min_d"].data, + local_mem) + + events += [EventDescription("max_min_stage1", k1), + EventDescription("max_min_stage2", k2)] + + evt = self.kernels.normalize_image(self.queue, (self.shape[1], self.shape[0]), None, + input_array.data, output_array.data, + numpy.int32(self.shape[1]), numpy.int32(self.shape[0]), + self.cl_mem["max_min_d"].data, + numpy.float32(mini), numpy.float32(maxi)) + events.append(EventDescription("normalize", evt)) + if self.profile: + self.events += events + + if out is None: + res = output_array.get() + return res + else: + output_array.finish() + return output_array + + def histogram(self, img=None, nbins=255, range=None, + log_scale=False, copy=True, out=None): + """Compute the histogram of a set of data. + + :param img: input image. If None, use the one already on the device + :param nbins: number of bins + :param range: the lower and upper range of the bins. If not provided, + range is simply ``(a.min(), a.max())``. Values outside the + range are ignored. The first element of the range must be + less than or equal to the second. + :param log_scale: perform the binning in lograrithmic scale. + Open to extension + :param copy: unset to directly use the input buffer without copy + :param out: use a provided array for offering the result + :return: histogram (size=nbins), edges (size=nbins+1) + API similar to numpy + """ + assert img.shape == self.buffer_shape + + input_array = self.to_float(img, copy=copy, out=self.cl_mem["image0_d"]) + events = [] + with self.sem: + input_array, output_array = self._get_in_out_buffers(input_array, copy=False, + out=out, + out_dtype=numpy.int32, + out_size=nbins) + + if range is None: + # measure actually the bounds + size = numpy.int32(numpy.prod(self.shape)) + if self.wg_red == 1: + # Probably on MacOS CPU WG==1 --> serial code. + kernel = self.kernels.get_kernel("max_min_serial") + + evt = kernel(self.queue, (1,), (1,), + input_array.data, + size, + self.cl_mem["max_min_d"].data) + events.append(EventDescription("max_min_serial", evt)) + else: + stage1 = self.kernels.max_min_reduction_stage1 + stage2 = self.kernels.max_min_reduction_stage2 + local_mem = pyopencl.LocalMemory(int(self.wg_red * 2 * numpy.dtype("float32").itemsize)) + k1 = stage1(self.queue, (int(self.wg_red ** 2),), (int(self.wg_red),), + input_array.data, + self.cl_mem["tmp_max_min_d"].data, + size, + local_mem) + k2 = stage2(self.queue, (int(self.wg_red),), (int(self.wg_red),), + self.cl_mem["tmp_max_min_d"].data, + self.cl_mem["max_min_d"].data, + local_mem) + + events += [EventDescription("max_min_stage1", k1), + EventDescription("max_min_stage2", k2)] + maxi, mini = self.cl_mem["max_min_d"].get() + else: + mini = numpy.float32(min(range)) + maxi = numpy.float32(max(range)) + device = self.ctx.devices[0] + nb_engines = device.max_compute_units + tmp_size = nb_engines * nbins + name = "tmp_int32_%s_d" % (tmp_size) + if name not in self.cl_mem: + tmp_array = self.cl_mem[name] = pyopencl.array.empty(self.queue, (tmp_size,), numpy.int32) + else: + tmp_array = self.cl_mem[name] + + edge_name = "tmp_float32_%s_d" % (nbins + 1) + if edge_name not in self.cl_mem: + edges_array = self.cl_mem[edge_name] = pyopencl.array.empty(self.queue, (nbins + 1,), numpy.float32) + else: + edges_array = self.cl_mem[edge_name] + + shared = pyopencl.LocalMemory(numpy.dtype(numpy.int32).itemsize * nbins) + + # Handle log-scale + if log_scale: + map_operation = numpy.int32(1) + else: + map_operation = numpy.int32(0) + kernel = self.kernels.get_kernel("histogram") + wg = min(device.max_work_group_size, + 1 << (int(ceil(log(nbins, 2)))), + self.kernels.max_workgroup_size(kernel)) + evt = kernel(self.queue, (wg * nb_engines,), (wg,), + input_array.data, + numpy.int32(input_array.size), + mini, + maxi, + map_operation, + output_array.data, + edges_array.data, + numpy.int32(nbins), + tmp_array.data, + self.cl_mem["cnt_d"].data, + shared) + events.append(EventDescription("histogram", evt)) + + if self.profile: + self.events += events + + if out is None: + res = output_array.get() + return res, edges_array.get() + else: + output_array.finish() + return output_array, edges_array |