diff options
Diffstat (limited to 'src/silx/opencl/statistics.py')
-rw-r--r-- | src/silx/opencl/statistics.py | 242 |
1 files changed, 242 insertions, 0 deletions
diff --git a/src/silx/opencl/statistics.py b/src/silx/opencl/statistics.py new file mode 100644 index 0000000..a96ee33 --- /dev/null +++ b/src/silx/opencl/statistics.py @@ -0,0 +1,242 @@ +# -*- coding: utf-8 -*- +# +# Project: SILX +# https://github.com/silx-kit/silx +# +# Copyright (C) 2012-2019 European Synchrotron Radiation Facility, Grenoble, France +# +# Principal author: Jérôme Kieffer (Jerome.Kieffer@ESRF.eu) +# +# Permission is hereby granted, free of charge, to any person obtaining a copy +# of this software and associated documentation files (the "Software"), to deal +# in the Software without restriction, including without limitation the rights +# to use, copy, modify, merge, publish, distribute, sublicense, and/or sell +# copies of the Software, and to permit persons to whom the Software is +# furnished to do so, subject to the following conditions: +# . +# The above copyright notice and this permission notice shall be included in +# all copies or substantial portions of the Software. +# . +# THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR +# IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, +# FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE +# AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER +# LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, +# OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN +# THE SOFTWARE. + +"""A module for performing basic statistical analysis (min, max, mean, std) on +large data where numpy is not very efficient. +""" + +__author__ = "Jerome Kieffer" +__license__ = "MIT" +__date__ = "19/05/2021" +__copyright__ = "2012-2019, ESRF, Grenoble" +__contact__ = "jerome.kieffer@esrf.fr" + +import logging +import numpy +from collections import OrderedDict, namedtuple +from math import sqrt + +from .common import pyopencl +from .processing import EventDescription, OpenclProcessing, BufferDescription +from .utils import concatenate_cl_kernel + +if pyopencl: + mf = pyopencl.mem_flags + from pyopencl.reduction import ReductionKernel + try: + from pyopencl import cltypes + except ImportError: + v = pyopencl.array.vec() + float8 = v.float8 + else: + float8 = cltypes.float8 + +else: + raise ImportError("pyopencl is not installed") +logger = logging.getLogger(__name__) + +StatResults = namedtuple("StatResults", ["min", "max", "cnt", "sum", "mean", + "var", "std"]) +zero8 = "(float8)(FLT_MAX, -FLT_MAX, 0.0f, 0.0f, 0.0f, 0.0f, 0.0f, 0.0f)" +# min max cnt cnt_e sum sum_e var var_e + + +class Statistics(OpenclProcessing): + """A class for doing statistical analysis using OpenCL + + :param List[int] size: Shape of input data to treat + :param numpy.dtype dtype: Input data type + :param numpy.ndarray template: Data template to extract size & dtype + :param ctx: Actual working context, left to None for automatic + initialization from device type or platformid/deviceid + :param str devicetype: Type of device, can be "CPU", "GPU", "ACC" or "ALL" + :param int platformid: Platform identifier as given by clinfo + :param int deviceid: Device identifier as given by clinfo + :param int block_size: + Preferred workgroup size, may vary depending on the outcome of the compilation + :param bool profile: + Switch on profiling to be able to profile at the kernel level, + store profiling elements (makes code slightly slower) + """ + buffers = [ + BufferDescription("raw", 1, numpy.float32, mf.READ_ONLY), + BufferDescription("converted", 1, numpy.float32, mf.READ_WRITE), + ] + kernel_files = ["preprocess.cl"] + mapping = {numpy.int8: "s8_to_float", + numpy.uint8: "u8_to_float", + numpy.int16: "s16_to_float", + numpy.uint16: "u16_to_float", + numpy.uint32: "u32_to_float", + numpy.int32: "s32_to_float"} + + def __init__(self, size=None, dtype=None, template=None, + ctx=None, devicetype="all", platformid=None, deviceid=None, + block_size=None, profile=False + ): + OpenclProcessing.__init__(self, ctx=ctx, devicetype=devicetype, + platformid=platformid, deviceid=deviceid, + block_size=block_size, profile=profile) + self.size = size + self.dtype = dtype + if template is not None: + self.size = template.size + self.dtype = template.dtype + + self.buffers = [BufferDescription(i.name, i.size * self.size, i.dtype, i.flags) + for i in self.__class__.buffers] + + self.allocate_buffers(use_array=True) + self.compile_kernels() + self.set_kernel_arguments() + + def set_kernel_arguments(self): + """Parametrize all kernel arguments""" + for val in self.mapping.values(): + self.cl_kernel_args[val] = OrderedDict(((i, self.cl_mem[i]) for i in ("raw", "converted"))) + + def compile_kernels(self): + """Compile the kernel""" + OpenclProcessing.compile_kernels(self, + self.kernel_files, + "-D NIMAGE=%i" % self.size) + compiler_options = self.get_compiler_options(x87_volatile=True) + src = concatenate_cl_kernel(("doubleword.cl", "statistics.cl")) + self.reduction_comp = ReductionKernel(self.ctx, + dtype_out=float8, + neutral=zero8, + map_expr="map_statistics(data, i)", + reduce_expr="reduce_statistics(a,b)", + arguments="__global float *data", + preamble=src, + options=compiler_options) + self.reduction_simple = ReductionKernel(self.ctx, + dtype_out=float8, + neutral=zero8, + map_expr="map_statistics(data, i)", + reduce_expr="reduce_statistics_simple(a,b)", + arguments="__global float *data", + preamble=src, + options=compiler_options) + + if "cl_khr_fp64" in self.device.extensions: + self.reduction_double = ReductionKernel(self.ctx, + dtype_out=float8, + neutral=zero8, + map_expr="map_statistics(data, i)", + reduce_expr="reduce_statistics_double(a,b)", + arguments="__global float *data", + preamble=src, + options=compiler_options) + else: + logger.info("Device %s does not support double-precision arithmetics, fall-back on compensated one", self.device) + self.reduction_double = self.reduction_comp + + def send_buffer(self, data, dest): + """ + Send a numpy array to the device, including the cast on the device if + possible + + :param numpy.ndarray data: numpy array with data + :param dest: name of the buffer as registered in the class + """ + logger.info("send data to %s", dest) + dest_type = numpy.dtype([i.dtype for i in self.buffers if i.name == dest][0]) + events = [] + if (data.dtype == dest_type) or (data.dtype.itemsize > dest_type.itemsize): + copy_image = pyopencl.enqueue_copy(self.queue, + self.cl_mem[dest].data, + numpy.ascontiguousarray(data, dest_type)) + events.append(EventDescription("copy H->D %s" % dest, copy_image)) + else: + copy_image = pyopencl.enqueue_copy(self.queue, + self.cl_mem["raw"].data, + numpy.ascontiguousarray(data)) + kernel = getattr(self.program, self.mapping[data.dtype.type]) + cast_to_float = kernel(self.queue, + (self.size,), + None, + self.cl_mem["raw"].data, + self.cl_mem[dest].data) + events += [ + EventDescription("copy H->D raw", copy_image), + EventDescription(f"cast to float {dest}", cast_to_float) + ] + if self.profile: + self.events += events + return events + + def process(self, data, comp=True): + """Actually calculate the statics on the data + + :param numpy.ndarray data: numpy array with the image + :param comp: use Kahan compensated arithmetics for the calculation + :return: Statistics named tuple + :rtype: StatResults + """ + if data.ndim != 1: + data = data.ravel() + size = data.size + assert size <= self.size, "size is OK" + events = [] + if comp is True: + comp = "comp" + elif comp is False: + comp = "single" + else: + comp = comp.lower() + with self.sem: + self.send_buffer(data, "converted") + if comp in ("single", "fp32", "float32"): + reduction = self.reduction_simple + elif comp in ("double", "fp64", "float64"): + reduction = self.reduction_double + else: + reduction = self.reduction_comp + res_d, evt = reduction(self.cl_mem["converted"][:self.size], + queue=self.queue, + return_event=True) + events.append(EventDescription(f"statistical reduction {comp}", evt)) + if self.profile: + self.events += events + res_h = res_d.get() + min_ = 1.0 * res_h["s0"] + max_ = 1.0 * res_h["s1"] + count = 1.0 * res_h["s2"] + res_h["s3"] + sum_ = 1.0 * res_h["s4"] + res_h["s5"] + m2 = 1.0 * res_h["s6"] + res_h["s7"] + var = m2 / (count - 1.0) + res = StatResults(min_, + max_, + count, + sum_, + sum_ / count, + var, + sqrt(var)) + return res + + __call__ = process |