diff options
Diffstat (limited to 'silx/opencl')
-rw-r--r-- | silx/opencl/__init__.py | 46 | ||||
-rw-r--r-- | silx/opencl/common.py | 561 | ||||
-rw-r--r-- | silx/opencl/medfilt.py | 269 | ||||
-rw-r--r-- | silx/opencl/processing.py | 275 | ||||
-rw-r--r-- | silx/opencl/setup.py | 44 | ||||
-rw-r--r-- | silx/opencl/test/__init__.py | 41 | ||||
-rw-r--r-- | silx/opencl/test/test_addition.py | 135 | ||||
-rw-r--r-- | silx/opencl/test/test_medfilt.py | 175 | ||||
-rw-r--r-- | silx/opencl/utils.py | 112 |
9 files changed, 1658 insertions, 0 deletions
diff --git a/silx/opencl/__init__.py b/silx/opencl/__init__.py new file mode 100644 index 0000000..b970338 --- /dev/null +++ b/silx/opencl/__init__.py @@ -0,0 +1,46 @@ +#!/usr/bin/env python +# -*- coding: utf-8 -*- +# +# Project: S I L X project +# https://github.com/silx-kit/silx +# +# Copyright (C) 2012-2017 European Synchrotron Radiation Facility, Grenoble, France +# +# Principal author: Jérôme Kieffer (Jerome.Kieffer@ESRF.eu) +# +# Permission is hereby granted, free of charge, to any person +# obtaining a copy of this software and associated documentation +# files (the "Software"), to deal in the Software without +# restriction, including without limitation the rights to use, +# copy, modify, merge, publish, distribute, sublicense, and/or sell +# copies of the Software, and to permit persons to whom the +# Software is furnished to do so, subject to the following +# conditions: +# +# The above copyright notice and this permission notice shall be +# included in all copies or substantial portions of the Software. +# +# THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, +# EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES +# OF MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND +# NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT +# HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, +# WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING +# FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR +# OTHER DEALINGS IN THE SOFTWARE. +# + +__author__ = "Jerome Kieffer" +__contact__ = "Jerome.Kieffer@ESRF.eu" +__license__ = "MIT" +__copyright__ = "European Synchrotron Radiation Facility, Grenoble, France" +__date__ = "15/03/2017" +__status__ = "stable" + +import logging + + +logger = logging.getLogger("silx.opencl") + + +from .common import * diff --git a/silx/opencl/common.py b/silx/opencl/common.py new file mode 100644 index 0000000..fcb4efa --- /dev/null +++ b/silx/opencl/common.py @@ -0,0 +1,561 @@ +#!/usr/bin/env python +# -*- coding: utf-8 -*- +# +# Project: S I L X project +# https://github.com/silx-kit/silx +# +# Copyright (C) 2012-2017 European Synchrotron Radiation Facility, Grenoble, France +# +# Principal author: Jérôme Kieffer (Jerome.Kieffer@ESRF.eu) +# +# Permission is hereby granted, free of charge, to any person +# obtaining a copy of this software and associated documentation +# files (the "Software"), to deal in the Software without +# restriction, including without limitation the rights to use, +# copy, modify, merge, publish, distribute, sublicense, and/or sell +# copies of the Software, and to permit persons to whom the +# Software is furnished to do so, subject to the following +# conditions: +# +# The above copyright notice and this permission notice shall be +# included in all copies or substantial portions of the Software. +# +# THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, +# EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES +# OF MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND +# NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT +# HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, +# WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING +# FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR +# OTHER DEALINGS IN THE SOFTWARE. +# + +__author__ = "Jerome Kieffer" +__contact__ = "Jerome.Kieffer@ESRF.eu" +__license__ = "MIT" +__copyright__ = "2012-2017 European Synchrotron Radiation Facility, Grenoble, France" +__date__ = "15/03/2017" +__status__ = "stable" +__all__ = ["ocl", "pyopencl", "mf", "release_cl_buffers", "allocate_cl_buffers", + "measure_workgroup_size", "kernel_workgroup_size"] + +import os +import logging + +import numpy + +from .utils import get_opencl_code + + +logger = logging.getLogger("silx.opencl") + + +if os.environ.get("SILX_OPENCL") in ["0", "False"]: + logger.warning("Use of OpenCL has been disables from environment variable: SILX_OPENCL=0") + pyopencl = None +else: + try: + import pyopencl + except ImportError: + logger.warning("Unable to import pyOpenCl. Please install it from: http://pypi.python.org/pypi/pyopencl") + pyopencl = None + class mf(object): + WRITE_ONLY = 1 + READ_ONLY = 1 + READ_WRITE = 1 + else: + import pyopencl.array as array + mf = pyopencl.mem_flags + + +FLOP_PER_CORE = {"GPU": 64, # GPU, Fermi at least perform 64 flops per cycle/multicore, G80 were at 24 or 48 ... + "CPU": 4, # CPU, at least intel's have 4 operation per cycle + "ACC": 8} # ACC: the Xeon-phi (MIC) appears to be able to process 8 Flops per hyperthreaded-core + +# Sources : https://en.wikipedia.org/wiki/CUDA +NVIDIA_FLOP_PER_CORE = {(1, 0): 24, # Guessed ! + (1, 1): 24, # Measured on G98 [Quadro NVS 295] + (1, 2): 24, # Guessed ! + (1, 3): 24, # measured on a GT285 (GT200) + (2, 0): 64, # Measured on a 580 (GF110) + (2, 1): 96, # Measured on Quadro2000 GF106GL + (3, 0): 384, # Guessed! + (3, 5): 384, # Measured on K20 + (3, 7): 384, # K80: Guessed! + (5, 0): 256, # Maxwell 4 warps/SM 2 flops/ CU + (5, 2): 256, # Titan-X + (5, 3): 256, # TX1 + (6, 0): 128, # GP100 + (6, 1): 128, # GP104 + (6, 2): 128, # ? + (7, 0): 256, # Volta ? + (7, 1): 256, # Volta ? + } + +AMD_FLOP_PER_CORE = 160 # Measured on a M7820 10 core, 700MHz 1120GFlops + + +class Device(object): + """ + Simple class that contains the structure of an OpenCL device + """ + def __init__(self, name="None", dtype=None, version=None, driver_version=None, + extensions="", memory=None, available=None, + cores=None, frequency=None, flop_core=None, idx=0, workgroup=1): + """ + Simple container with some important data for the OpenCL device description. + + :param name: name of the device + :param dtype: device type: CPU/GPU/ACC... + :param version: driver version + :param driver_version: + :param extensions: List of opencl extensions + :param memory: maximum memory available on the device + :param available: is the device deactivated or not + :param cores: number of SM/cores + :param frequency: frequency of the device + :param flop_core: Flopating Point operation per core per cycle + :param idx: index of the device within the platform + :param workgroup: max workgroup size + """ + self.name = name.strip() + self.type = dtype + self.version = version + self.driver_version = driver_version + self.extensions = extensions.split() + self.memory = memory + self.available = available + self.cores = cores + self.frequency = frequency + self.id = idx + self.max_work_group_size = workgroup + if not flop_core: + flop_core = FLOP_PER_CORE.get(dtype, 1) + if cores and frequency: + self.flops = cores * frequency * flop_core + else: + self.flops = flop_core + + def __repr__(self): + return "%s" % self.name + + def pretty_print(self): + """ + Complete device description + + :return: string + """ + lst = ["Name\t\t:\t%s" % self.name, + "Type\t\t:\t%s" % self.type, + "Memory\t\t:\t%.3f MB" % (self.memory / 2.0 ** 20), + "Cores\t\t:\t%s CU" % self.cores, + "Frequency\t:\t%s MHz" % self.frequency, + "Speed\t\t:\t%.3f GFLOPS" % (self.flops / 1000.), + "Version\t\t:\t%s" % self.version, + "Available\t:\t%s" % self.available] + return os.linesep.join(lst) + + +class Platform(object): + """ + Simple class that contains the structure of an OpenCL platform + """ + def __init__(self, name="None", vendor="None", version=None, extensions=None, idx=0): + """ + Class containing all descriptions of a platform and all devices description within that platform. + + :param name: platform name + :param vendor: name of the brand/vendor + :param version: + :param extensions: list of the extension provided by the platform to all of its devices + :param idx: index of the platform + """ + self.name = name.strip() + self.vendor = vendor.strip() + self.version = version + self.extensions = extensions.split() + self.devices = [] + self.id = idx + + def __repr__(self): + return "%s" % self.name + + def add_device(self, device): + """ + Add new device to the platform + + :param device: Device instance + """ + self.devices.append(device) + + def get_device(self, key): + """ + Return a device according to key + + :param key: identifier for a device, either it's id (int) or it's name + :type key: int or str + """ + out = None + try: + devid = int(key) + except ValueError: + for a_dev in self.devices: + if a_dev.name == key: + out = a_dev + else: + if len(self.devices) > devid > 0: + out = self.devices[devid] + return out + + +def _measure_workgroup_size(device_or_context, fast=False): + """Mesure the maximal work group size of the given device + + :param device_or_context: instance of pyopencl.Device or pyopencl.Context + or 2-tuple (platformid,deviceid) + :param fast: ask the kernel the valid value, don't probe it + :return: maximum size for the workgroup + """ + if isinstance(device_or_context, pyopencl.Device): + ctx = pyopencl.Context(devices=[device_or_context]) + device = device_or_context + elif isinstance(device_or_context, pyopencl.Context): + ctx = device_or_context + device = device_or_context.devices[0] + elif isinstance(device_or_context, (tuple, list)) and len(device_or_context) == 2: + ctx = ocl.create_context(platformid=device_or_context[0], + deviceid=device_or_context[1]) + device = ctx.devices[0] + else: + raise RuntimeError("""given parameter device_or_context is not an + instanciation of a device or a context""") + shape = device.max_work_group_size + # get the context + + assert ctx is not None + queue = pyopencl.CommandQueue(ctx) + + max_valid_wg = 1 + data = numpy.random.random(shape).astype(numpy.float32) + d_data = pyopencl.array.to_device(queue, data) + d_data_1 = pyopencl.array.zeros_like(d_data) + 1 + + program = pyopencl.Program(ctx, get_opencl_code("addition")).build() + if fast: + max_valid_wg = program.addition.get_work_group_info(pyopencl.kernel_work_group_info.WORK_GROUP_SIZE, device) + else: + maxi = int(round(numpy.log2(shape))) + for i in range(maxi + 1): + d_res = pyopencl.array.empty_like(d_data) + wg = 1 << i + try: + evt = program.addition( + queue, (shape,), (wg,), + d_data.data, d_data_1.data, d_res.data, numpy.int32(shape)) + evt.wait() + except Exception as error: + logger.info("%s on device %s for WG=%s/%s", error, device.name, wg, shape) + program = queue = d_res = d_data_1 = d_data = None + break + else: + res = d_res.get() + good = numpy.allclose(res, data + 1) + if good: + if wg > max_valid_wg: + max_valid_wg = wg + else: + logger.warning("ArithmeticError on %s for WG=%s/%s", wg, device.name, shape) + + return max_valid_wg + + +def _is_nvidia_gpu(vendor, devtype): + return (vendor == "NVIDIA Corporation") and (devtype == "GPU") + + +class OpenCL(object): + """ + Simple class that wraps the structure ocl_tools_extended.h + + This is a static class. + ocl should be the only instance and shared among all python modules. + """ + + platforms = [] + nb_devices = 0 + context_cache = {} # key: 2-tuple of int, value: context + if pyopencl: + platform = device = pypl = devtype = extensions = pydev = None + for idx, platform in enumerate(pyopencl.get_platforms()): + pypl = Platform(platform.name, platform.vendor, platform.version, platform.extensions, idx) + for idd, device in enumerate(platform.get_devices()): + #################################################### + # Nvidia does not report int64 atomics (we are using) ... + # this is a hack around as any nvidia GPU with double-precision supports int64 atomics + #################################################### + extensions = device.extensions + if (pypl.vendor == "NVIDIA Corporation") and ('cl_khr_fp64' in extensions): + extensions += ' cl_khr_int64_base_atomics cl_khr_int64_extended_atomics' + try: + devtype = pyopencl.device_type.to_string(device.type).upper() + except ValueError: + # pocl does not describe itself as a CPU ! + devtype = "CPU" + if len(devtype) > 3: + devtype = devtype[:3] + if _is_nvidia_gpu(pypl.vendor, devtype) and "compute_capability_major_nv" in dir(device): + comput_cap = device.compute_capability_major_nv, device.compute_capability_minor_nv + flop_core = NVIDIA_FLOP_PER_CORE.get(comput_cap, min(NVIDIA_FLOP_PER_CORE.values())) + elif (pypl.vendor == "Advanced Micro Devices, Inc.") and (devtype == "GPU"): + flop_core = AMD_FLOP_PER_CORE + elif devtype == "CPU": + flop_core = FLOP_PER_CORE.get(devtype, 1) + else: + flop_core = 1 + workgroup = device.max_work_group_size + if (devtype == "CPU") and (pypl.vendor == "Apple"): + logger.warning("For Apple's OpenCL on CPU: Measuring actual valid max_work_goup_size.") + workgroup = _measure_workgroup_size(device, fast=True) + if (devtype == "GPU") and os.environ.get("GPU") == "False": + # Environment variable to disable GPU devices + continue + pydev = Device(device.name, devtype, device.version, device.driver_version, extensions, + device.global_mem_size, bool(device.available), device.max_compute_units, + device.max_clock_frequency, flop_core, idd, workgroup) + pypl.add_device(pydev) + nb_devices += 1 + platforms.append(pypl) + del platform, device, pypl, devtype, extensions, pydev + + def __repr__(self): + out = ["OpenCL devices:"] + for platformid, platform in enumerate(self.platforms): + deviceids = ["(%s,%s) %s" % (platformid, deviceid, dev.name) + for deviceid, dev in enumerate(platform.devices)] + out.append("[%s] %s: " % (platformid, platform.name) + ", ".join(deviceids)) + return os.linesep.join(out) + + def get_platform(self, key): + """ + Return a platform according + + :param key: identifier for a platform, either an Id (int) or it's name + :type key: int or str + """ + out = None + try: + platid = int(key) + except ValueError: + for a_plat in self.platforms: + if a_plat.name == key: + out = a_plat + else: + if len(self.platforms) > platid > 0: + out = self.platforms[platid] + return out + + def select_device(self, dtype="ALL", memory=None, extensions=None, best=True, **kwargs): + """ + Select a device based on few parameters (at the end, keep the one with most memory) + + :param dtype: "gpu" or "cpu" or "all" .... + :param memory: minimum amount of memory (int) + :param extensions: list of extensions to be present + :param best: shall we look for the + """ + if extensions is None: + extensions = [] + if "type" in kwargs: + dtype = kwargs["type"].upper() + else: + dtype = dtype.upper() + if len(dtype) > 3: + dtype = dtype[:3] + best_found = None + for platformid, platform in enumerate(self.platforms): + for deviceid, device in enumerate(platform.devices): + if (dtype in ["ALL", "DEF"]) or (device.type == dtype): + if (memory is None) or (memory <= device.memory): + found = True + for ext in extensions: + if ext not in device.extensions: + found = False + if found: + if not best: + return platformid, deviceid + else: + if not best_found: + best_found = platformid, deviceid, device.flops + elif best_found[2] < device.flops: + best_found = platformid, deviceid, device.flops + if best_found: + return best_found[0], best_found[1] + + def create_context(self, devicetype="ALL", useFp64=False, platformid=None, + deviceid=None, cached=True): + """ + Choose a device and initiate a context. + + Devicetypes can be GPU,gpu,CPU,cpu,DEF,ACC,ALL. + Suggested are GPU,CPU. + For each setting to work there must be such an OpenCL device and properly installed. + E.g.: If Nvidia driver is installed, GPU will succeed but CPU will fail. + The AMD SDK kit is required for CPU via OpenCL. + :param devicetype: string in ["cpu","gpu", "all", "acc"] + :param useFp64: boolean specifying if double precision will be used + :param platformid: integer + :param deviceid: integer + :param cached: True if we want to cache the context + :return: OpenCL context on the selected device + """ + if (platformid is not None) and (deviceid is not None): + platformid = int(platformid) + deviceid = int(deviceid) + else: + if useFp64: + ids = ocl.select_device(type=devicetype, extensions=["cl_khr_int64_base_atomics"]) + else: + ids = ocl.select_device(dtype=devicetype) + if ids: + platformid = ids[0] + deviceid = ids[1] + if (platformid is not None) and (deviceid is not None): + if (platformid, deviceid) in self.context_cache: + ctx = self.context_cache[(platformid, deviceid)] + else: + ctx = pyopencl.Context(devices=[pyopencl.get_platforms()[platformid].get_devices()[deviceid]]) + if cached: + self.context_cache[(platformid, deviceid)] = ctx + else: + logger.warning("Last chance to get an OpenCL device ... probably not the one requested") + ctx = pyopencl.create_some_context(interactive=False) + return ctx + + def device_from_context(self, context): + """ + Retrieves the Device from the context + + :param context: OpenCL context + :return: instance of Device + """ + odevice = context.devices[0] + oplat = odevice.platform + device_id = oplat.get_devices().index(odevice) + platform_id = pyopencl.get_platforms().index(oplat) + return self.platforms[platform_id].devices[device_id] + +if pyopencl: + ocl = OpenCL() + if ocl.nb_devices == 0: + ocl = None +else: + ocl = None + + +def release_cl_buffers(cl_buffers): + """ + :param cl_buffers: the buffer you want to release + :type cl_buffers: dict(str, pyopencl.Buffer) + + This method release the memory of the buffers store in the dict + """ + for key, buffer_ in cl_buffers.items(): + if buffer_ is not None: + if isinstance(buffer_, pyopencl.array.Array): + try: + buffer_.data.release() + except pyopencl.LogicError: + logger.error("Error while freeing buffer %s", key) + else: + try: + buffer_.release() + except pyopencl.LogicError: + logger.error("Error while freeing buffer %s", key) + cl_buffers[key] = None + return cl_buffers + + +def allocate_cl_buffers(buffers, device=None, context=None): + """ + :param buffers: the buffers info use to create the pyopencl.Buffer + :type buffers: list(std, flag, numpy.dtype, int) + :param device: one of the context device + :param context: opencl contextdevice + :return: a dict containing the instanciated pyopencl.Buffer + :rtype: dict(str, pyopencl.Buffer) + + This method instanciate the pyopencl.Buffer from the buffers + description. + """ + mem = {} + if device is None: + device = ocl.device_from_context(context) + + # check if enough memory is available on the device + ualloc = 0 + for _, _, dtype, size in buffers: + ualloc += numpy.dtype(dtype).itemsize * size + memory = device.memory + logger.info("%.3fMB are needed on device which has %.3fMB", + ualloc / 1.0e6, memory / 1.0e6) + if ualloc >= memory: + memError = "Fatal error in allocate_buffers." + memError += "Not enough device memory for buffers" + memError += "(%lu requested, %lu available)" % (ualloc, memory) + raise MemoryError(memError) # noqa + + # do the allocation + try: + for name, flag, dtype, size in buffers: + mem[name] = pyopencl.Buffer(context, flag, + numpy.dtype(dtype).itemsize * size) + except pyopencl.MemoryError as error: + release_cl_buffers(mem) + raise MemoryError(error) + + return mem + + +def measure_workgroup_size(device): + """Measure the actual size of the workgroup + + :param device: device or context or 2-tuple with indexes + :return: the actual measured workgroup size + + if device is "all", returns a dict with all devices with their ids as keys. + """ + if (ocl is None) or (device is None): + return None + + if isinstance(device, tuple) and (len(device) == 2): + # this is probably a tuple (platformid, deviceid) + device = ocl.create_context(platformid=device[0], deviceid=device[1]) + + if device == "all": + res = {} + for pid, platform in enumerate(ocl.platforms): + for did, _devices in enumerate(platform.devices): + tup = (pid, did) + res[tup] = measure_workgroup_size(tup) + else: + res = _measure_workgroup_size(device) + return res + + +def kernel_workgroup_size(program, kernel): + """Extract the compile time maximum workgroup size + + :param program: OpenCL program + :param kernel: kernel or name of the kernel + :return: the maximum acceptable workgroup size for the given kernel + """ + assert isinstance(program, pyopencl.Program) + if not isinstance(kernel, pyopencl.Kernel): + kernel_name = kernel + assert kernel in (k.function_name for k in program.all_kernels()), "the kernel exists" + kernel = program.__getattr__(kernel_name) + + device = program.devices[0] + query_wg = pyopencl.kernel_work_group_info.WORK_GROUP_SIZE + return kernel.get_work_group_info(query_wg, device) + diff --git a/silx/opencl/medfilt.py b/silx/opencl/medfilt.py new file mode 100644 index 0000000..90cd49a --- /dev/null +++ b/silx/opencl/medfilt.py @@ -0,0 +1,269 @@ +# -*- coding: utf-8 -*- +# +# Project: Azimuthal integration +# https://github.com/silx-kit/pyFAI +# +# Copyright (C) 2012-2017 European Synchrotron Radiation Facility, Grenoble, France +# +# Principal author: Jérôme Kieffer (Jerome.Kieffer@ESRF.eu) +# +# Permission is hereby granted, free of charge, to any person obtaining a copy +# of this software and associated documentation files (the "Software"), to deal +# in the Software without restriction, including without limitation the rights +# to use, copy, modify, merge, publish, distribute, sublicense, and/or sell +# copies of the Software, and to permit persons to whom the Software is +# furnished to do so, subject to the following conditions: +# . +# The above copyright notice and this permission notice shall be included in +# all copies or substantial portions of the Software. +# . +# THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR +# IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, +# FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE +# AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER +# LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, +# OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN +# THE SOFTWARE. + +"""A module for performing the 1d, 2d and 3d median filter ... + +The target is to mimic the signature of scipy.signal.medfilt and scipy.medfilt2 + +The first implementation targets 2D implementation where this operation is costly (~10s/2kx2k image) +""" +from __future__ import absolute_import, print_function, with_statement, division + + +__author__ = "Jerome Kieffer" +__license__ = "MIT" +__date__ = "15/03/2017" +__copyright__ = "2012-2017, ESRF, Grenoble" +__contact__ = "jerome.kieffer@esrf.fr" + +import logging +import numpy +from collections import OrderedDict + +from .common import pyopencl, kernel_workgroup_size +from .processing import EventDescription, OpenclProcessing, BufferDescription + +if pyopencl: + mf = pyopencl.mem_flags +else: + raise ImportError("pyopencl is not installed") +logger = logging.getLogger("silx.opencl.medfilt") + + +class MedianFilter2D(OpenclProcessing): + """A class for doing median filtering using OpenCL""" + buffers = [ + BufferDescription("result", 1, numpy.float32, mf.WRITE_ONLY), + BufferDescription("image_raw", 1, numpy.float32, mf.READ_ONLY), + BufferDescription("image", 1, numpy.float32, mf.READ_WRITE), + ] + kernel_files = ["preprocess.cl", "bitonic.cl", "medfilt.cl"] + mapping = {numpy.int8: "s8_to_float", + numpy.uint8: "u8_to_float", + numpy.int16: "s16_to_float", + numpy.uint16: "u16_to_float", + numpy.uint32: "u32_to_float", + numpy.int32: "s32_to_float"} + + def __init__(self, shape, kernel_size=(3, 3), + ctx=None, devicetype="all", platformid=None, deviceid=None, + block_size=None, profile=False + ): + """Constructor of the OpenCL 2D median filtering class + + :param shape: shape of the images to treat + :param kernel size: 2-tuple of odd values + :param ctx: actual working context, left to None for automatic + initialization from device type or platformid/deviceid + :param devicetype: type of device, can be "CPU", "GPU", "ACC" or "ALL" + :param platformid: integer with the platform_identifier, as given by clinfo + :param deviceid: Integer with the device identifier, as given by clinfo + :param block_size: preferred workgroup size, may vary depending on the outpcome of the compilation + :param profile: switch on profiling to be able to profile at the kernel level, + store profiling elements (makes code slightly slower) + """ + OpenclProcessing.__init__(self, ctx=ctx, devicetype=devicetype, + platformid=platformid, deviceid=deviceid, + block_size=block_size, profile=profile) + self.shape = shape + self.size = self.shape[0] * self.shape[1] + self.kernel_size = self.calc_kernel_size(kernel_size) + self.workgroup_size = (self.calc_wg(self.kernel_size), 1) # 3D kernel + self.buffers = [BufferDescription(i.name, i.size * self.size, i.dtype, i.flags) + for i in self.__class__.buffers] + + self.allocate_buffers() + self.local_mem = self._get_local_mem(self.workgroup_size[0]) + OpenclProcessing.compile_kernels(self, self.kernel_files, "-D NIMAGE=%i" % self.size) + self.set_kernel_arguments() + + def set_kernel_arguments(self): + """Parametrize all kernel arguments + """ + for val in self.mapping.values(): + self.cl_kernel_args[val] = OrderedDict(((i, self.cl_mem[i]) for i in ("image_raw", "image"))) + self.cl_kernel_args["medfilt2d"] = OrderedDict((("image", self.cl_mem["image"]), + ("result", self.cl_mem["result"]), + ("local", self.local_mem), + ("khs1", numpy.int32(self.kernel_size[0] // 2)), # Kernel half-size along dim1 (lines) + ("khs2", numpy.int32(self.kernel_size[1] // 2)), # Kernel half-size along dim2 (columns) + ("height", numpy.int32(self.shape[0])), # Image size along dim1 (lines) + ("width", numpy.int32(self.shape[1])))) +# ('debug', self.cl_mem["debug"]))) # Image size along dim2 (columns)) + + def _get_local_mem(self, wg): + return pyopencl.LocalMemory(wg * 32) # 4byte per float, 8 element per thread + + def send_buffer(self, data, dest): + """Send a numpy array to the device, including the cast on the device if possible + + :param data: numpy array with data + :param dest: name of the buffer as registered in the class + """ + + dest_type = numpy.dtype([i.dtype for i in self.buffers if i.name == dest][0]) + events = [] + if (data.dtype == dest_type) or (data.dtype.itemsize > dest_type.itemsize): + copy_image = pyopencl.enqueue_copy(self.queue, self.cl_mem[dest], numpy.ascontiguousarray(data, dest_type)) + events.append(EventDescription("copy H->D %s" % dest, copy_image)) + else: + copy_image = pyopencl.enqueue_copy(self.queue, self.cl_mem["image_raw"], numpy.ascontiguousarray(data)) + kernel = getattr(self.program, self.mapping[data.dtype.type]) + cast_to_float = kernel(self.queue, (self.size,), None, self.cl_mem["image_raw"], self.cl_mem[dest]) + events += [EventDescription("copy H->D %s" % dest, copy_image), EventDescription("cast to float", cast_to_float)] + if self.profile: + self.events += events + + def calc_wg(self, kernel_size): + """calculate and return the optimal workgroup size for the first dimension, taking into account + the 8-height band + + :param kernel_size: 2-tuple of int, shape of the median window + :return: optimal workgroup size + """ + needed_threads = ((kernel_size[0] + 7) // 8) * kernel_size[1] + if needed_threads < 8: + wg = 8 + elif needed_threads < 32: + wg = 32 + else: + wg = 1 << (int(needed_threads).bit_length()) + return wg + + def medfilt2d(self, image, kernel_size=None): + """Actually apply the median filtering on the image + + :param image: numpy array with the image + :param kernel_size: 2-tuple if + :return: median-filtered 2D image + + + Nota: for window size 1x1 -> 7x7 up to 49 / 64 elements in 8 threads, 8elt/th + 9x9 -> 15x15 up to 225 / 256 elements in 32 threads, 8elt/th + 17x17 -> 21x21 up to 441 / 512 elements in 64 threads, 8elt/th + + TODO: change window size on the fly, + + + """ + events = [] + if kernel_size is None: + kernel_size = self.kernel_size + else: + kernel_size = self.calc_kernel_size(kernel_size) + kernel_half_size = kernel_size // numpy.int32(2) + # this is the workgroup size + wg = self.calc_wg(kernel_size) + + # check for valid work group size: + amws = kernel_workgroup_size(self.program, "medfilt2d") + logger.warning("max actual workgroup size: %s, expected: %s", amws, wg) + if wg > amws: + raise RuntimeError("Workgroup size is too big for medfilt2d: %s>%s" % (wg, amws)) + + localmem = self._get_local_mem(wg) + + assert image.ndim == 2, "Treat only 2D images" + assert image.shape[0] <= self.shape[0], "height is OK" + assert image.shape[1] <= self.shape[1], "width is OK" + + with self.sem: + self.send_buffer(image, "image") + + kwargs = self.cl_kernel_args["medfilt2d"] + kwargs["local"] = localmem + kwargs["khs1"] = kernel_half_size[0] + kwargs["khs2"] = kernel_half_size[1] + kwargs["height"] = numpy.int32(image.shape[0]) + kwargs["width"] = numpy.int32(image.shape[1]) +# for k, v in kwargs.items(): +# print("%s: %s (%s)" % (k, v, type(v))) + mf2d = self.program.medfilt2d(self.queue, + (wg, image.shape[1]), + (wg, 1), *list(kwargs.values())) + events.append(EventDescription("median filter 2d", mf2d)) + + result = numpy.empty(image.shape, numpy.float32) + ev = pyopencl.enqueue_copy(self.queue, result, self.cl_mem["result"]) + events.append(EventDescription("copy D->H result", ev)) + ev.wait() + if self.profile: + self.events += events + return result + __call__ = medfilt2d + + @staticmethod + def calc_kernel_size(kernel_size): + """format the kernel size to be a 2-length numpy array of int32 + """ + kernel_size = numpy.asarray(kernel_size, dtype=numpy.int32) + if kernel_size.shape == (): + kernel_size = numpy.repeat(kernel_size.item(), 2).astype(numpy.int32) + for size in kernel_size: + if (size % 2) != 1: + raise ValueError("Each element of kernel_size should be odd.") + return kernel_size + + +class _MedFilt2d(object): + median_filter = None + + @classmethod + def medfilt2d(cls, ary, kernel_size=3): + """Median filter a 2-dimensional array. + + Apply a median filter to the `input` array using a local window-size + given by `kernel_size` (must be odd). + + :param ary: A 2-dimensional input array. + :param kernel_size: A scalar or a list of length 2, giving the size of the + median filter window in each dimension. Elements of + `kernel_size` should be odd. If `kernel_size` is a scalar, + then this scalar is used as the size in each dimension. + Default is a kernel of size (3, 3). + :return: An array the same size as input containing the median filtered + result. always work on float32 values + + About the padding: + + * The filling mode in scipy.signal.medfilt2d is zero-padding + * This implementation is equivalent to: + scipy.ndimage.filters.median_filter(ary, kernel_size, mode="nearest") + + """ + image = numpy.atleast_2d(ary) + shape = numpy.array(image.shape) + if cls.median_filter is None: + cls.median_filter = MedianFilter2D(image.shape, kernel_size) + elif (numpy.array(cls.median_filter.shape) < shape).any(): + # enlarger the buffer size + new_shape = numpy.maximum(numpy.array(cls.median_filter.shape), shape) + ctx = cls.median_filter.ctx + cls.median_filter = MedianFilter2D(new_shape, kernel_size, ctx=ctx) + return cls.median_filter.medfilt2d(image) + +medfilt2d = _MedFilt2d.medfilt2d diff --git a/silx/opencl/processing.py b/silx/opencl/processing.py new file mode 100644 index 0000000..ca46701 --- /dev/null +++ b/silx/opencl/processing.py @@ -0,0 +1,275 @@ +#!/usr/bin/env python +# -*- coding: utf-8 -*- +# +# Project: S I L X project +# https://github.com/silx-kit/silx +# +# Copyright (C) European Synchrotron Radiation Facility, Grenoble, France +# +# Principal author: Jérôme Kieffer (Jerome.Kieffer@ESRF.eu) +# +# Permission is hereby granted, free of charge, to any person +# obtaining a copy of this software and associated documentation +# files (the "Software"), to deal in the Software without +# restriction, including without limitation the rights to use, +# copy, modify, merge, publish, distribute, sublicense, and/or sell +# copies of the Software, and to permit persons to whom the +# Software is furnished to do so, subject to the following +# conditions: +# +# The above copyright notice and this permission notice shall be +# included in all copies or substantial portions of the Software. +# +# THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, +# EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES +# OF MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND +# NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT +# HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, +# WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING +# FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR +# OTHER DEALINGS IN THE SOFTWARE. +# + +""" +Common OpenCL abstract base classes for different processing +""" + +from __future__ import absolute_import, print_function, division + + +__author__ = "Jerome Kieffer" +__contact__ = "Jerome.Kieffer@ESRF.eu" +__license__ = "MIT" +__copyright__ = "European Synchrotron Radiation Facility, Grenoble, France" +__date__ = "03/02/2017" +__status__ = "stable" + + +import os +import logging +import gc +from collections import namedtuple +import numpy +import threading +from .common import ocl, pyopencl, release_cl_buffers +from .utils import concatenate_cl_kernel + + +BufferDescription = namedtuple("BufferDescription", ["name", "size", "dtype", "flags"]) +EventDescription = namedtuple("EventDescription", ["name", "event"]) + +logger = logging.getLogger("pyFAI.opencl.processing") + + +class OpenclProcessing(object): + """Abstract class for different types of OpenCL processing. + + This class provides: + * Generation of the context, queues, profiling mode + * Additional function to allocate/free all buffers declared as static attributes of the class + * Functions to compile kernels, cache them and clean them + * helper functions to clone the object + """ + # Example of how to create an output buffer of 10 floats + buffers = [BufferDescription("output", 10, numpy.float32, None), + ] + # list of kernel source files to be concatenated before compilation of the program + kernel_files = [] + + def __init__(self, ctx=None, devicetype="all", platformid=None, deviceid=None, + block_size=None, profile=False): + """Constructor of the abstract OpenCL processing class + + :param ctx: actual working context, left to None for automatic + initialization from device type or platformid/deviceid + :param devicetype: type of device, can be "CPU", "GPU", "ACC" or "ALL" + :param platformid: integer with the platform_identifier, as given by clinfo + :param deviceid: Integer with the device identifier, as given by clinfo + :param block_size: preferred workgroup size, may vary depending on the outpcome of the compilation + :param profile: switch on profiling to be able to profile at the kernel level, + store profiling elements (makes code slightly slower) + """ + self.sem = threading.Semaphore() + self.profile = None + self.events = [] # List with of EventDescription, kept for profiling + self.cl_mem = {} # dict with all buffer allocated + self.cl_program = None # The actual OpenCL program + self.cl_kernel_args = {} # dict with all kernel arguments + if ctx: + self.ctx = ctx + else: + self.ctx = ocl.create_context(devicetype=devicetype, platformid=platformid, deviceid=deviceid) + device_name = self.ctx.devices[0].name.strip() + platform_name = self.ctx.devices[0].platform.name.strip() + platform = ocl.get_platform(platform_name) + self.device = platform.get_device(device_name) + self.cl_kernel_args = {} # dict with all kernel arguments + + self.set_profiling(profile) + self.block_size = block_size + self.program = None + + def __del__(self): + """Destructor: release all buffers and programs + """ + self.free_kernels() + self.free_buffers() + self.queue = None + self.ctx = None + gc.collect() + + def allocate_buffers(self, buffers=None): + """ + Allocate OpenCL buffers required for a specific configuration + + :param buffers: a list of BufferDescriptions, leave to None for + paramatrized buffers. + + Note that an OpenCL context also requires some memory, as well + as Event and other OpenCL functionalities which cannot and are + not taken into account here. The memory required by a context + varies depending on the device. Typical for GTX580 is 65Mb but + for a 9300m is ~15Mb In addition, a GPU will always have at + least 3-5Mb of memory in use. Unfortunately, OpenCL does NOT + have a built-in way to check the actual free memory on a + device, only the total memory. + """ + + if buffers is None: + buffers = self.buffers + + with self.sem: + mem = {} + + # check if enough memory is available on the device + ualloc = 0 + for buf in buffers: + ualloc += numpy.dtype(buf.dtype).itemsize * buf.size + logger.info("%.3fMB are needed on device which has %.3fMB", + ualloc / 1.0e6, self.device.memory / 1.0e6) + + if ualloc >= self.device.memory: + raise MemoryError("Fatal error in allocate_buffers. Not enough " + " device memory for buffers (%lu requested, %lu available)" + % (ualloc, self.device.memory)) + + # do the allocation + try: + for buf in buffers: + size = numpy.dtype(buf.dtype).itemsize * buf.size + mem[buf.name] = pyopencl.Buffer(self.ctx, buf.flags, size) + except pyopencl.MemoryError as error: + release_cl_buffers(mem) + raise MemoryError(error) + + self.cl_mem.update(mem) + + def free_buffers(self): + """free all device.memory allocated on the device + """ + with self.sem: + for key, buf in list(self.cl_mem.items()): + if buf is not None: + if isinstance(buf, pyopencl.array.Array): + try: + buf.data.release() + except pyopencl.LogicError: + logger.error("Error while freeing buffer %s", key) + else: + try: + buf.release() + except pyopencl.LogicError: + logger.error("Error while freeing buffer %s", key) + self.cl_mem[key] = None + + def compile_kernels(self, kernel_files=None, compile_options=None): + """Call the OpenCL compiler + + :param kernel_files: list of path to the kernel + (by default use the one declared in the class) + :param compile_options: string of compile options + """ + # concatenate all needed source files into a single openCL module + kernel_files = kernel_files or self.kernel_files + kernel_src = concatenate_cl_kernel(kernel_files) + + compile_options = compile_options or "" + logger.info("Compiling file %s with options %s", kernel_files, compile_options) + try: + self.program = pyopencl.Program(self.ctx, kernel_src).build(options=compile_options) + except (pyopencl.MemoryError, pyopencl.LogicError) as error: + raise MemoryError(error) + + def free_kernels(self): + """Free all kernels + """ + for kernel in self.cl_kernel_args: + self.cl_kernel_args[kernel] = [] + self.program = None + + def set_profiling(self, value=True): + """Switch On/Off the profiling flag of the command queue to allow debugging + + :param value: set to True to enable profiling, or to False to disable it. + Without profiling, the processing is marginally faster + + Profiling information can then be retrieved with the 'log_profile' method + """ + if bool(value) != self.profile: + with self.sem: + self.profile = bool(value) + if self.profile: + self.queue = pyopencl.CommandQueue(self.ctx, + properties=pyopencl.command_queue_properties.PROFILING_ENABLE) + else: + self.queue = pyopencl.CommandQueue(self.ctx) + + def log_profile(self): + """If we are in profiling mode, prints out all timing for every single OpenCL call + """ + t = 0.0 + out = ["", "Profiling info for OpenCL %s" % self.__class__.__name__] + if self.profile: + for e in self.events: + if "__len__" in dir(e) and len(e) >= 2: + et = 1e-6 * (e[1].profile.end - e[1].profile.start) + out.append("%50s:\t%.3fms" % (e[0], et)) + t += et + + out.append("_" * 80) + out.append("%50s:\t%.3fms" % ("Total execution time", t)) + logger.info(os.linesep.join(out)) + return out + +# This should be implemented by concrete class +# def __copy__(self): +# """Shallow copy of the object +# +# :return: copy of the object +# """ +# return self.__class__((self._data, self._indices, self._indptr), +# self.size, block_size=self.BLOCK_SIZE, +# platformid=self.platform.id, +# deviceid=self.device.id, +# checksum=self.on_device.get("data"), +# profile=self.profile, empty=self.empty) +# +# def __deepcopy__(self, memo=None): +# """deep copy of the object +# +# :return: deepcopy of the object +# """ +# if memo is None: +# memo = {} +# new_csr = self._data.copy(), self._indices.copy(), self._indptr.copy() +# memo[id(self._data)] = new_csr[0] +# memo[id(self._indices)] = new_csr[1] +# memo[id(self._indptr)] = new_csr[2] +# new_obj = self.__class__(new_csr, self.size, +# block_size=self.BLOCK_SIZE, +# platformid=self.platform.id, +# deviceid=self.device.id, +# checksum=self.on_device.get("data"), +# profile=self.profile, empty=self.empty) +# memo[id(self)] = new_obj +# return new_obj diff --git a/silx/opencl/setup.py b/silx/opencl/setup.py new file mode 100644 index 0000000..d0181cd --- /dev/null +++ b/silx/opencl/setup.py @@ -0,0 +1,44 @@ +# coding: utf-8 +# +# Copyright (C) 2016 European Synchrotron Radiation Facility +# +# Permission is hereby granted, free of charge, to any person obtaining a copy +# of this software and associated documentation files (the "Software"), to deal +# in the Software without restriction, including without limitation the rights +# to use, copy, modify, merge, publish, distribute, sublicense, and/or sell +# copies of the Software, and to permit persons to whom the Software is +# furnished to do so, subject to the following conditions: +# +# The above copyright notice and this permission notice shall be included in +# all copies or substantial portions of the Software. +# +# THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR +# IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, +# FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE +# AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER +# LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, +# OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN +# THE SOFTWARE. +# + +from __future__ import division + +__contact__ = "jerome.kieffer@esrf.eu" +__license__ = "MIT" +__copyright__ = "European Synchrotron Radiation Facility, Grenoble, France" +__authors__ = ["J. Kieffer"] +__date__ = "08/09/2016" + +from numpy.distutils.misc_util import Configuration + + +def configuration(parent_package='', top_path=None): + config = Configuration('opencl', parent_package, top_path) + config.add_subpackage('sift') + config.add_subpackage('test') + return config + + +if __name__ == "__main__": + from numpy.distutils.core import setup + setup(configuration=configuration) diff --git a/silx/opencl/test/__init__.py b/silx/opencl/test/__init__.py new file mode 100644 index 0000000..24aa06e --- /dev/null +++ b/silx/opencl/test/__init__.py @@ -0,0 +1,41 @@ +# -*- coding: utf-8 -*- +# +# Project: silx +# https://github.com/silx-kit/silx +# +# Copyright (C) 2012-2016 European Synchrotron Radiation Facility, Grenoble, France +# Permission is hereby granted, free of charge, to any person obtaining a copy +# of this software and associated documentation files (the "Software"), to deal +# in the Software without restriction, including without limitation the rights +# to use, copy, modify, merge, publish, distribute, sublicense, and/or sell +# copies of the Software, and to permit persons to whom the Software is +# furnished to do so, subject to the following conditions: +# +# The above copyright notice and this permission notice shall be included in +# all copies or substantial portions of the Software. +# +# THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR +# IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, +# FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE +# AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER +# LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, +# OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN +# THE SOFTWARE. + +__authors__ = ["J. Kieffer"] +__license__ = "MIT" +__date__ = "15/03/2017" + +import unittest +from . import test_addition +from . import test_medfilt +from ..sift import test as test_sift + + +def suite(): + test_suite = unittest.TestSuite() + test_suite.addTests(test_addition.suite()) + test_suite.addTests(test_medfilt.suite()) + test_suite.addTests(test_sift.suite()) + + return test_suite diff --git a/silx/opencl/test/test_addition.py b/silx/opencl/test/test_addition.py new file mode 100644 index 0000000..89e49be --- /dev/null +++ b/silx/opencl/test/test_addition.py @@ -0,0 +1,135 @@ +#!/usr/bin/env python +# -*- coding: utf-8 -*- +# +# Project: Sift implementation in Python + OpenCL +# https://github.com/silx-kit/silx +# +# Permission is hereby granted, free of charge, to any person +# obtaining a copy of this software and associated documentation +# files (the "Software"), to deal in the Software without +# restriction, including without limitation the rights to use, +# copy, modify, merge, publish, distribute, sublicense, and/or sell +# copies of the Software, and to permit persons to whom the +# Software is furnished to do so, subject to the following +# conditions: +# +# The above copyright notice and this permission notice shall be +# included in all copies or substantial portions of the Software. +# +# THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, +# EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES +# OF MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND +# NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT +# HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, +# WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING +# FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR +# OTHER DEALINGS IN THE SOFTWARE. + +""" +Simple test of an addition +""" + +from __future__ import division, print_function + +__authors__ = ["Henri Payno, Jérôme Kieffer"] +__contact__ = "jerome.kieffer@esrf.eu" +__license__ = "MIT" +__copyright__ = "2013 European Synchrotron Radiation Facility, Grenoble, France" +__date__ = "15/03/2017" + +import logging +import numpy + +import unittest +from ..common import ocl, _measure_workgroup_size +if ocl: + import pyopencl + import pyopencl.array +from ..utils import get_opencl_code +logger = logging.getLogger(__name__) + + +@unittest.skipUnless(ocl, "PyOpenCl is missing") +class TestAddition(unittest.TestCase): + + @classmethod + def setUpClass(cls): + super(TestAddition, cls).setUpClass() + if ocl: + cls.ctx = ocl.create_context() + if logger.getEffectiveLevel() <= logging.INFO: + cls.PROFILE = True + cls.queue = pyopencl.CommandQueue( + cls.ctx, + properties=pyopencl.command_queue_properties.PROFILING_ENABLE) + else: + cls.PROFILE = False + cls.queue = pyopencl.CommandQueue(cls.ctx) + cls.max_valid_wg = 0 + + @classmethod + def tearDownClass(cls): + super(TestAddition, cls).tearDownClass() + print("Maximum valid workgroup size %s on device %s" % (cls.max_valid_wg, cls.ctx.devices[0])) + cls.ctx = None + cls.queue = None + + def setUp(self): + if ocl is None: + return + self.shape = 4096 + self.data = numpy.random.random(self.shape).astype(numpy.float32) + self.d_array_img = pyopencl.array.to_device(self.queue, self.data) + self.d_array_5 = pyopencl.array.zeros_like(self.d_array_img) - 5 + self.program = pyopencl.Program(self.ctx, get_opencl_code("addition")).build() + + def tearDown(self): + self.img = self.data = None + self.d_array_img = self.d_array_5 = self.program = None + + @unittest.skipUnless(ocl, "pyopencl is missing") + def test_add(self): + """ + tests the addition kernel + """ + maxi = int(round(numpy.log2(self.shape))) + for i in range(maxi): + d_array_result = pyopencl.array.empty_like(self.d_array_img) + wg = 1 << i + try: + evt = self.program.addition(self.queue, (self.shape,), (wg,), + self.d_array_img.data, self.d_array_5.data, d_array_result.data, numpy.int32(self.shape)) + evt.wait() + except Exception as error: + max_valid_wg = self.program.addition.get_work_group_info(pyopencl.kernel_work_group_info.WORK_GROUP_SIZE, self.ctx.devices[0]) + msg = "Error %s on WG=%s: %s" % (error, wg, max_valid_wg) + self.assertLess(max_valid_wg, wg, msg) + break + else: + res = d_array_result.get() + good = numpy.allclose(res, self.data - 5) + if good and wg>self.max_valid_wg: + self.__class__.max_valid_wg = wg + self.assert_(good, "calculation is correct for WG=%s" % wg) + + @unittest.skipUnless(ocl, "pyopencl is missing") + def test_measurement(self): + """ + tests that all devices are working properly ... + """ + for platform in ocl.platforms: + for did, device in enumerate(platform.devices): + meas = _measure_workgroup_size((platform.id, device.id)) + self.assertEqual(meas, device.max_work_group_size, + "Workgroup size for %s/%s: %s == %s" % (platform, device, meas, device.max_work_group_size)) + + +def suite(): + testSuite = unittest.TestSuite() + testSuite.addTest(TestAddition("test_add")) + testSuite.addTest(TestAddition("test_measurement")) + return testSuite + + +if __name__ == '__main__': + unittest.main(defaultTest="suite") diff --git a/silx/opencl/test/test_medfilt.py b/silx/opencl/test/test_medfilt.py new file mode 100644 index 0000000..f4e4cc8 --- /dev/null +++ b/silx/opencl/test/test_medfilt.py @@ -0,0 +1,175 @@ +#!/usr/bin/env python +# -*- coding: utf-8 -*- +# +# Project: Median filter of images + OpenCL +# https://github.com/silx-kit/silx +# +# Permission is hereby granted, free of charge, to any person +# obtaining a copy of this software and associated documentation +# files (the "Software"), to deal in the Software without +# restriction, including without limitation the rights to use, +# copy, modify, merge, publish, distribute, sublicense, and/or sell +# copies of the Software, and to permit persons to whom the +# Software is furnished to do so, subject to the following +# conditions: +# +# The above copyright notice and this permission notice shall be +# included in all copies or substantial portions of the Software. +# +# THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, +# EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES +# OF MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND +# NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT +# HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, +# WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING +# FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR +# OTHER DEALINGS IN THE SOFTWARE. + +""" +Simple test of the median filter +""" + +from __future__ import division, print_function + +__authors__ = ["Jérôme Kieffer"] +__contact__ = "jerome.kieffer@esrf.eu" +__license__ = "MIT" +__copyright__ = "2013-2017 European Synchrotron Radiation Facility, Grenoble, France" +__date__ = "15/03/2017" + + +import sys +import time +import logging +import numpy +import unittest +from collections import namedtuple +try: + import mako +except ImportError: + mako = None +from ..common import ocl +if ocl: + import pyopencl + import pyopencl.array + from .. import medfilt + +logger = logging.getLogger(__name__) + +Result = namedtuple("Result", ["size", "error", "sp_time", "oc_time"]) + +try: + from scipy.misc import ascent +except: + def ascent(): + """Dummy image from random data""" + return numpy.random.random((512, 512)) +try: + from scipy.ndimage import filters + median_filter = filters.median_filter + HAS_SCIPY = True +except: + HAS_SCIPY = False + from silx.math import medfilt2d as median_filter + +@unittest.skipUnless(ocl and mako, "PyOpenCl is missing") +class TestMedianFilter(unittest.TestCase): + + def setUp(self): + if ocl is None: + return + self.data = ascent().astype(numpy.float32) + self.medianfilter = medfilt.MedianFilter2D(self.data.shape, devicetype="gpu") + + def tearDown(self): + self.data = None + self.medianfilter = None + + def measure(self, size): + "Common measurement of accuracy and timings" + t0 = time.time() + if HAS_SCIPY: + ref = median_filter(self.data, size, mode="nearest") + else: + ref = median_filter(self.data, size) + t1 = time.time() + try: + got = self.medianfilter.medfilt2d(self.data, size) + except RuntimeError as msg: + logger.error(msg) + return + t2 = time.time() + delta = abs(got - ref).max() + return Result(size, delta, t1 - t0, t2 - t1) + + @unittest.skipUnless(ocl and mako, "pyopencl is missing") + def test_medfilt(self): + """ + tests the median filter kernel + """ + r = self.measure(size=11) + if r is None: + logger.info("test_medfilt: size: %s: skipped") + else: + logger.info("test_medfilt: size: %s error %s, t_ref: %.3fs, t_ocl: %.3fs" % r) + self.assert_(r.error == 0, 'Results are correct') + + def benchmark(self, limit=36): + "Run some benchmarking" + try: + import PyQt5 + from ...gui.matplotlib import pylab + from ...gui.utils import update_fig + except: + pylab = None + + def update_fig(*ag, **kwarg): + pass + + fig = pylab.figure() + fig.suptitle("Median filter of an image 512x512") + sp = fig.add_subplot(1, 1, 1) + sp.set_title(self.medianfilter.ctx.devices[0].name) + sp.set_xlabel("Window width & height") + sp.set_ylabel("Execution time (s)") + sp.set_xlim(2, limit + 1) + sp.set_ylim(0, 4) + data_size = [] + data_scipy = [] + data_opencl = [] + plot_sp = sp.plot(data_size, data_scipy, "-or", label="scipy")[0] + plot_opencl = sp.plot(data_size, data_opencl, "-ob", label="opencl")[0] + sp.legend(loc=2) + fig.show() + update_fig(fig) + for s in range(3, limit, 2): + r = self.measure(s) + print(r) + if r.error == 0: + data_size.append(s) + data_scipy.append(r.sp_time) + data_opencl.append(r.oc_time) + plot_sp.set_data(data_size, data_scipy) + plot_opencl.set_data(data_size, data_opencl) + update_fig(fig) + fig.show() + if sys.version_info[0] < 3: + raw_input() + else: + input() + + +def suite(): + testSuite = unittest.TestSuite() + testSuite.addTest(TestMedianFilter("test_medfilt")) + return testSuite + + +def benchmark(): + testSuite = unittest.TestSuite() + testSuite.addTest(TestMedianFilter("benchmark")) + return testSuite + + +if __name__ == '__main__': + unittest.main(defaultTest="suite") diff --git a/silx/opencl/utils.py b/silx/opencl/utils.py new file mode 100644 index 0000000..58becb0 --- /dev/null +++ b/silx/opencl/utils.py @@ -0,0 +1,112 @@ +#!/usr/bin/env python +# -*- coding: utf-8 -*- +# +# Project: Sift implementation in Python + OpenCL +# https://github.com/silx-kit/silx +# +# Permission is hereby granted, free of charge, to any person +# obtaining a copy of this software and associated documentation +# files (the "Software"), to deal in the Software without +# restriction, including without limitation the rights to use, +# copy, modify, merge, publish, distribute, sublicense, and/or sell +# copies of the Software, and to permit persons to whom the +# Software is furnished to do so, subject to the following +# conditions: +# +# The above copyright notice and this permission notice shall be +# included in all copies or substantial portions of the Software. +# +# THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, +# EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES +# OF MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND +# NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT +# HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, +# WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING +# FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR +# OTHER DEALINGS IN THE SOFTWARE. + + +from __future__ import division + +__authors__ = ["Jérôme Kieffer", "Pierre Paleo"] +__contact__ = "jerome.kieffer@esrf.eu" +__license__ = "MIT" +__copyright__ = "European Synchrotron Radiation Facility, Grenoble, France" +__date__ = "15/03/2017" +__status__ = "Production" + +import os +import numpy +from ..resources import resource_filename +from math import log, ceil + + +def calc_size(shape, blocksize): + """ + Calculate the optimal size for a kernel according to the workgroup size + """ + if "__len__" in dir(blocksize): + return tuple((int(i) + int(j) - 1) & ~(int(j) - 1) for i, j in zip(shape, blocksize)) + else: + return tuple((int(i) + int(blocksize) - 1) & ~(int(blocksize) - 1) for i in shape) + + +def nextpower(n): + """Calculate the power of two + + :param n: an integer, for example 100 + :return: another integer, 100-> 128 + """ + return 1 << int(ceil(log(n, 2))) + + +def sizeof(shape, dtype="uint8"): + """ + Calculate the number of bytes needed to allocate for a given structure + + :param shape: size or tuple of sizes + :param dtype: data type + """ + itemsize = numpy.dtype(dtype).itemsize + cnt = 1 + if "__len__" in dir(shape): + for dim in shape: + cnt *= dim + else: + cnt = int(shape) + return cnt * itemsize + + +def get_cl_file(filename): + """get the full path of a openCL file + + :return: the full path of the openCL source file + """ + if not filename.endswith(".cl"): + filename += ".cl" + return resource_filename(os.path.join("opencl", filename)) + + +def read_cl_file(filename): + """ + :param filename: read an OpenCL file and apply a preprocessor + :return: preprocessed source code + """ + with open(get_cl_file(filename), "r") as f: + # Dummy preprocessor which removes the #include + lines = [i for i in f.readlines() if not i.startswith("#include ")] + return "".join(lines) + +get_opencl_code = read_cl_file + + +def concatenate_cl_kernel(filenames): + """Concatenates all the kernel from the list of files + + :param filenames: filenames containing the kernels + :type filenames: list of str which can be filename of kernel as a string. + :return: a string with all kernels concatenated + + this method concatenates all the kernel from the list + """ + return os.linesep.join(read_cl_file(fn) for fn in filenames) |