1 files changed, 45 insertions, 14 deletions
diff --git a/silx/opencl/processing.py b/silx/opencl/processing.py
index 1997a55..250582d 100644
--- a/silx/opencl/processing.py
+++ b/silx/opencl/processing.py
@@ -4,7 +4,7 @@
 #    Project: S I L X project
 #             https://github.com/silx-kit/silx
 #
-#    Copyright (C) 2012-2017 European Synchrotron Radiation Facility, Grenoble, France
+#    Copyright (C) 2012-2018 European Synchrotron Radiation Facility, Grenoble, France
 #
 #    Principal author:       Jérôme Kieffer (Jerome.Kieffer@ESRF.eu)
 #
@@ -31,7 +31,7 @@
 #
 
 """
-Common OpenCL abstract base classes for different processing
+Common OpenCL abstract base classe for different processing
 """
 
 from __future__ import absolute_import, print_function, division
@@ -41,7 +41,7 @@ __author__ = "Jerome Kieffer"
 __contact__ = "Jerome.Kieffer@ESRF.eu"
 __license__ = "MIT"
 __copyright__ = "European Synchrotron Radiation Facility, Grenoble, France"
-__date__ = "03/10/2017"
+__date__ = "27/02/2018"
 __status__ = "stable"
 
 
@@ -69,17 +69,29 @@ class KernelContainer(object):
 
         :param program: the OpenCL program as generated by PyOpenCL
         """
+        self._program = program
         for kernel in program.all_kernels():
             self.__setattr__(kernel.function_name, kernel)
 
     def get_kernels(self):
         "return the dictionary with all kernels"
-        return self.__dict__.copy()
+        return dict(item for item in self.__dict__.items()
+                    if not item[0].startswith("_"))
 
     def get_kernel(self, name):
         "get a kernel from its name"
+        logger.debug("KernelContainer.get_kernel(%s)", name)
         return self.__dict__.get(name)
 
+    def max_workgroup_size(self, kernel_name):
+        "Retrieve the compile time max_workgroup_size for a given kernel"
+        if isinstance(kernel_name, pyopencl.Kernel):
+            kernel = kernel_name
+        else:
+            kernel = self.get_kernel(kernel_name)
+
+        return kernel_workgroup_size(self._program, kernel)
+
 
 class OpenclProcessing(object):
     """Abstract class for different types of OpenCL processing.
@@ -97,7 +109,7 @@ class OpenclProcessing(object):
     kernel_files = []
 
     def __init__(self, ctx=None, devicetype="all", platformid=None, deviceid=None,
-                 block_size=None, profile=False):
+                 block_size=None, memory=None, profile=False):
         """Constructor of the abstract OpenCL processing class
 
         :param ctx: actual working context, left to None for automatic
@@ -107,6 +119,7 @@ class OpenclProcessing(object):
         :param deviceid: Integer with the device identifier, as given by clinfo
         :param block_size: preferred workgroup size, may vary depending on the
                             out come of the compilation
+        :param memory: minimum memory available on device
         :param profile: switch on profiling to be able to profile at the kernel
                          level, store profiling elements (makes code slightly slower)
         """
@@ -116,10 +129,13 @@ class OpenclProcessing(object):
         self.cl_mem = {}  # dict with all buffer allocated
         self.cl_program = None  # The actual OpenCL program
         self.cl_kernel_args = {}  # dict with all kernel arguments
+        self.queue = None
         if ctx:
             self.ctx = ctx
         else:
-            self.ctx = ocl.create_context(devicetype=devicetype, platformid=platformid, deviceid=deviceid)
+            self.ctx = ocl.create_context(devicetype=devicetype,
+                                          platformid=platformid, deviceid=deviceid,
+                                          memory=memory)
         device_name = self.ctx.devices[0].name.strip()
         platform_name = self.ctx.devices[0].platform.name.strip()
         platform = ocl.get_platform(platform_name)
@@ -134,19 +150,23 @@ class OpenclProcessing(object):
     def __del__(self):
         """Destructor: release all buffers and programs
         """
+        self.reset_log()
         self.free_kernels()
         self.free_buffers()
         self.queue = None
+        self.device = None
         self.ctx = None
         gc.collect()
 
-    def allocate_buffers(self, buffers=None):
+    def allocate_buffers(self, buffers=None, use_array=False):
         """
         Allocate OpenCL buffers required for a specific configuration
 
         :param buffers: a list of BufferDescriptions, leave to None for
                         paramatrized buffers.
-
+        :param use_array: allocate memory as pyopencl.array.Array
+                            instead of pyopencl.Buffer
+        
         Note that an OpenCL context also requires some memory, as well
         as Event and other OpenCL functionalities which cannot and are
         not taken into account here.  The memory required by a context
@@ -166,7 +186,7 @@ class OpenclProcessing(object):
             # check if enough memory is available on the device
             ualloc = 0
             for buf in buffers:
-                ualloc += numpy.dtype(buf.dtype).itemsize * buf.size
+                ualloc += numpy.dtype(buf.dtype).itemsize * numpy.prod(buf.size)
             logger.info("%.3fMB are needed on device: %s,  which has %.3fMB",
                         ualloc / 1.0e6, self.device, self.device.memory / 1.0e6)
 
@@ -177,9 +197,13 @@ class OpenclProcessing(object):
 
             # do the allocation
             try:
-                for buf in buffers:
-                    size = numpy.dtype(buf.dtype).itemsize * buf.size
-                    mem[buf.name] = pyopencl.Buffer(self.ctx, buf.flags, int(size))
+                if use_array:
+                    for buf in buffers:
+                        mem[buf.name] = pyopencl.array.empty(self.queue, buf.size, buf.dtype)
+                else:
+                    for buf in buffers:
+                        size = numpy.dtype(buf.dtype).itemsize * numpy.prod(buf.size)
+                        mem[buf.name] = pyopencl.Buffer(self.ctx, buf.flags, int(size))
             except pyopencl.MemoryError as error:
                 release_cl_buffers(mem)
                 raise MemoryError(error)
@@ -199,8 +223,8 @@ class OpenclProcessing(object):
         self.cl_mem.update(mem)
 
     def check_workgroup_size(self, kernel_name):
-        kernel = self.kernels.get_kernel(kernel_name)
-        self.compiletime_workgroup_size = kernel_workgroup_size(self.program, kernel)
+        "Calculate the maximum workgroup size from given kernel after compilation"
+        return self.kernels.max_workgroup_size(kernel_name)
 
     def free_buffers(self):
         """free all device.memory allocated on the device
@@ -282,6 +306,13 @@ class OpenclProcessing(object):
         logger.info(os.linesep.join(out))
         return out
 
+    def reset_log(self):
+        """
+        Resets the profiling timers
+        """
+        with self.sem:
+            self.events = []
+
 # This should be implemented by concrete class
 #     def __copy__(self):
 #         """Shallow copy of the object