1 files changed, 81 insertions, 9 deletions
diff --git a/silx/opencl/common.py b/silx/opencl/common.py
index 110d941..002c15d 100644
--- a/silx/opencl/common.py
+++ b/silx/opencl/common.py
@@ -34,7 +34,7 @@ __author__ = "Jerome Kieffer"
 __contact__ = "Jerome.Kieffer@ESRF.eu"
 __license__ = "MIT"
 __copyright__ = "2012-2017 European Synchrotron Radiation Facility, Grenoble, France"
-__date__ = "28/11/2019"
+__date__ = "30/11/2020"
 __status__ = "stable"
 __all__ = ["ocl", "pyopencl", "mf", "release_cl_buffers", "allocate_cl_buffers",
            "measure_workgroup_size", "kernel_workgroup_size"]
@@ -46,10 +46,8 @@ import numpy
 
 from .utils import get_opencl_code
 
-
 logger = logging.getLogger(__name__)
 
-
 if os.environ.get("SILX_OPENCL") in ["0", "False"]:
     logger.info("Use of OpenCL has been disabled from environment variable: SILX_OPENCL=0")
     pyopencl = None
@@ -70,13 +68,13 @@ else:
             mf = pyopencl.mem_flags
 
 if pyopencl is None:
+
     # Define default mem flags
     class mf(object):
         WRITE_ONLY = 1
         READ_ONLY = 1
         READ_WRITE = 1
 
-
 FLOP_PER_CORE = {"GPU": 64,  # GPU, Fermi at least perform 64 flops per cycle/multicore, G80 were at 24 or 48 ...
                  "CPU": 4,  # CPU, at least intel's have 4 operation per cycle
                  "ACC": 8}  # ACC: the Xeon-phi (MIC) appears to be able to process 8 Flops per hyperthreaded-core
@@ -108,6 +106,7 @@ class Device(object):
     """
     Simple class that contains the structure of an OpenCL device
     """
+
     def __init__(self, name="None", dtype=None, version=None, driver_version=None,
                  extensions="", memory=None, available=None,
                  cores=None, frequency=None, flop_core=None, idx=0, workgroup=1):
@@ -174,6 +173,7 @@ class Platform(object):
     """
     Simple class that contains the structure of an OpenCL platform
     """
+
     def __init__(self, name="None", vendor="None", version=None, extensions=None, idx=0):
         """
         Class containing all descriptions of a platform and all devices description within that platform.
@@ -225,6 +225,8 @@ class Platform(object):
 def _measure_workgroup_size(device_or_context, fast=False):
     """Mesure the maximal work group size of the given device
 
+    DEPRECATED since not perfectly correct !
+
     :param device_or_context: instance of pyopencl.Device or pyopencl.Context
                     or 2-tuple (platformid,deviceid)
     :param fast: ask the kernel the valid value, don't probe it
@@ -318,7 +320,7 @@ class OpenCL(object):
                 ####################################################
                 extensions = device.extensions
                 if (pypl.vendor == "NVIDIA Corporation") and ('cl_khr_fp64' in extensions):
-                                extensions += ' cl_khr_int64_base_atomics cl_khr_int64_extended_atomics'
+                    extensions += ' cl_khr_int64_base_atomics cl_khr_int64_extended_atomics'
                 try:
                     devtype = pyopencl.device_type.to_string(device.type).upper()
                 except ValueError:
@@ -573,6 +575,53 @@ def allocate_cl_buffers(buffers, device=None, context=None):
     return mem
 
 
+def allocate_texture(ctx, shape, hostbuf=None, support_1D=False):
+    """
+    Allocate an OpenCL image ("texture").
+
+    :param ctx: OpenCL context
+    :param shape: Shape of the image. Note that pyopencl and OpenCL < 1.2
+        do not support 1D images, so 1D images are handled as 2D with one row
+    :param support_1D: force the image to be 1D if the shape has only one dim
+    """
+    if len(shape) == 1 and not(support_1D):
+        shape = (1,) + shape
+    return pyopencl.Image(
+        ctx,
+        pyopencl.mem_flags.READ_ONLY | pyopencl.mem_flags.USE_HOST_PTR,
+        pyopencl.ImageFormat(
+            pyopencl.channel_order.INTENSITY,
+            pyopencl.channel_type.FLOAT
+        ),
+        hostbuf=numpy.zeros(shape[::-1], dtype=numpy.float32)
+    )
+
+
+def check_textures_availability(ctx):
+    """
+    Check whether textures are supported on the current OpenCL context.
+
+    :param ctx: OpenCL context
+    """
+    try:
+        dummy_texture = allocate_texture(ctx, (16, 16))
+        # Need to further access some attributes (pocl)
+        dummy_height = dummy_texture.height
+        textures_available = True
+        del dummy_texture, dummy_height
+    except (pyopencl.RuntimeError, pyopencl.LogicError):
+        textures_available = False
+    # Nvidia Fermi GPUs (compute capability 2.X) do not support opencl read_imagef
+    # There is no way to detect this until a kernel is compiled
+    try:
+        cc = ctx.devices[0].compute_capability_major_nv
+        textures_available &= (cc >= 3)
+    except (pyopencl.LogicError, AttributeError):  # probably not a Nvidia GPU
+        pass
+    #
+    return textures_available
+
+
 def measure_workgroup_size(device):
     """Measure the actual size of the workgroup
 
@@ -599,12 +648,25 @@ def measure_workgroup_size(device):
     return res
 
 
-def kernel_workgroup_size(program, kernel):
-    """Extract the compile time maximum workgroup size
+def query_kernel_info(program, kernel, what="WORK_GROUP_SIZE"):
+    """Extract the compile time information from a kernel
 
     :param program: OpenCL program
     :param kernel: kernel or name of the kernel
-    :return: the maximum acceptable workgroup size for the given kernel
+    :param what: what is the query about ?
+    :return: int or 3-int for the workgroup size.
+    
+    Possible information available are:
+    * 'COMPILE_WORK_GROUP_SIZE': Returns the work-group size specified inside the kernel (__attribute__((reqd_work_gr oup_size(X, Y, Z))))
+    * 'GLOBAL_WORK_SIZE': maximum global size that can be used to execute a kernel  #OCL2.1!
+    * 'LOCAL_MEM_SIZE': amount of local memory in bytes being used by the kernel
+    * 'PREFERRED_WORK_GROUP_SIZE_MULTIPLE': preferred multiple of workgroup size for launch. This is a performance hint.
+    * 'PRIVATE_MEM_SIZE' Returns the minimum amount of private memory, in bytes, used by each workitem in the kernel
+    * 'WORK_GROUP_SIZE': maximum work-group size that can be used to execute a kernel on a specific device given by device
+    
+    Further information on:
+    https://www.khronos.org/registry/OpenCL/sdk/1.1/docs/man/xhtml/clGetKernelWorkGroupInfo.html
+    
     """
     assert isinstance(program, pyopencl.Program)
     if not isinstance(kernel, pyopencl.Kernel):
@@ -613,5 +675,15 @@ def kernel_workgroup_size(program, kernel):
         kernel = program.__getattr__(kernel_name)
 
     device = program.devices[0]
-    query_wg = pyopencl.kernel_work_group_info.WORK_GROUP_SIZE
+    query_wg = getattr(pyopencl.kernel_work_group_info, what)
     return kernel.get_work_group_info(query_wg, device)
+
+
+def kernel_workgroup_size(program, kernel):
+    """Extract the compile time maximum workgroup size
+
+    :param program: OpenCL program
+    :param kernel: kernel or name of the kernel
+    :return: the maximum acceptable workgroup size for the given kernel
+    """
+    return query_kernel_info(program, kernel, what="WORK_GROUP_SIZE")