diff options
Diffstat (limited to 'silx/opencl/processing.py')
-rw-r--r-- | silx/opencl/processing.py | 59 |
1 files changed, 45 insertions, 14 deletions
diff --git a/silx/opencl/processing.py b/silx/opencl/processing.py index 1997a55..250582d 100644 --- a/silx/opencl/processing.py +++ b/silx/opencl/processing.py @@ -4,7 +4,7 @@ # Project: S I L X project # https://github.com/silx-kit/silx # -# Copyright (C) 2012-2017 European Synchrotron Radiation Facility, Grenoble, France +# Copyright (C) 2012-2018 European Synchrotron Radiation Facility, Grenoble, France # # Principal author: Jérôme Kieffer (Jerome.Kieffer@ESRF.eu) # @@ -31,7 +31,7 @@ # """ -Common OpenCL abstract base classes for different processing +Common OpenCL abstract base classe for different processing """ from __future__ import absolute_import, print_function, division @@ -41,7 +41,7 @@ __author__ = "Jerome Kieffer" __contact__ = "Jerome.Kieffer@ESRF.eu" __license__ = "MIT" __copyright__ = "European Synchrotron Radiation Facility, Grenoble, France" -__date__ = "03/10/2017" +__date__ = "27/02/2018" __status__ = "stable" @@ -69,17 +69,29 @@ class KernelContainer(object): :param program: the OpenCL program as generated by PyOpenCL """ + self._program = program for kernel in program.all_kernels(): self.__setattr__(kernel.function_name, kernel) def get_kernels(self): "return the dictionary with all kernels" - return self.__dict__.copy() + return dict(item for item in self.__dict__.items() + if not item[0].startswith("_")) def get_kernel(self, name): "get a kernel from its name" + logger.debug("KernelContainer.get_kernel(%s)", name) return self.__dict__.get(name) + def max_workgroup_size(self, kernel_name): + "Retrieve the compile time max_workgroup_size for a given kernel" + if isinstance(kernel_name, pyopencl.Kernel): + kernel = kernel_name + else: + kernel = self.get_kernel(kernel_name) + + return kernel_workgroup_size(self._program, kernel) + class OpenclProcessing(object): """Abstract class for different types of OpenCL processing. @@ -97,7 +109,7 @@ class OpenclProcessing(object): kernel_files = [] def __init__(self, ctx=None, devicetype="all", platformid=None, deviceid=None, - block_size=None, profile=False): + block_size=None, memory=None, profile=False): """Constructor of the abstract OpenCL processing class :param ctx: actual working context, left to None for automatic @@ -107,6 +119,7 @@ class OpenclProcessing(object): :param deviceid: Integer with the device identifier, as given by clinfo :param block_size: preferred workgroup size, may vary depending on the out come of the compilation + :param memory: minimum memory available on device :param profile: switch on profiling to be able to profile at the kernel level, store profiling elements (makes code slightly slower) """ @@ -116,10 +129,13 @@ class OpenclProcessing(object): self.cl_mem = {} # dict with all buffer allocated self.cl_program = None # The actual OpenCL program self.cl_kernel_args = {} # dict with all kernel arguments + self.queue = None if ctx: self.ctx = ctx else: - self.ctx = ocl.create_context(devicetype=devicetype, platformid=platformid, deviceid=deviceid) + self.ctx = ocl.create_context(devicetype=devicetype, + platformid=platformid, deviceid=deviceid, + memory=memory) device_name = self.ctx.devices[0].name.strip() platform_name = self.ctx.devices[0].platform.name.strip() platform = ocl.get_platform(platform_name) @@ -134,19 +150,23 @@ class OpenclProcessing(object): def __del__(self): """Destructor: release all buffers and programs """ + self.reset_log() self.free_kernels() self.free_buffers() self.queue = None + self.device = None self.ctx = None gc.collect() - def allocate_buffers(self, buffers=None): + def allocate_buffers(self, buffers=None, use_array=False): """ Allocate OpenCL buffers required for a specific configuration :param buffers: a list of BufferDescriptions, leave to None for paramatrized buffers. - + :param use_array: allocate memory as pyopencl.array.Array + instead of pyopencl.Buffer + Note that an OpenCL context also requires some memory, as well as Event and other OpenCL functionalities which cannot and are not taken into account here. The memory required by a context @@ -166,7 +186,7 @@ class OpenclProcessing(object): # check if enough memory is available on the device ualloc = 0 for buf in buffers: - ualloc += numpy.dtype(buf.dtype).itemsize * buf.size + ualloc += numpy.dtype(buf.dtype).itemsize * numpy.prod(buf.size) logger.info("%.3fMB are needed on device: %s, which has %.3fMB", ualloc / 1.0e6, self.device, self.device.memory / 1.0e6) @@ -177,9 +197,13 @@ class OpenclProcessing(object): # do the allocation try: - for buf in buffers: - size = numpy.dtype(buf.dtype).itemsize * buf.size - mem[buf.name] = pyopencl.Buffer(self.ctx, buf.flags, int(size)) + if use_array: + for buf in buffers: + mem[buf.name] = pyopencl.array.empty(self.queue, buf.size, buf.dtype) + else: + for buf in buffers: + size = numpy.dtype(buf.dtype).itemsize * numpy.prod(buf.size) + mem[buf.name] = pyopencl.Buffer(self.ctx, buf.flags, int(size)) except pyopencl.MemoryError as error: release_cl_buffers(mem) raise MemoryError(error) @@ -199,8 +223,8 @@ class OpenclProcessing(object): self.cl_mem.update(mem) def check_workgroup_size(self, kernel_name): - kernel = self.kernels.get_kernel(kernel_name) - self.compiletime_workgroup_size = kernel_workgroup_size(self.program, kernel) + "Calculate the maximum workgroup size from given kernel after compilation" + return self.kernels.max_workgroup_size(kernel_name) def free_buffers(self): """free all device.memory allocated on the device @@ -282,6 +306,13 @@ class OpenclProcessing(object): logger.info(os.linesep.join(out)) return out + def reset_log(self): + """ + Resets the profiling timers + """ + with self.sem: + self.events = [] + # This should be implemented by concrete class # def __copy__(self): # """Shallow copy of the object |