8 files changed, 211 insertions, 140 deletions
diff --git a/silx/opencl/backprojection.py b/silx/opencl/backprojection.py
index 5a4087b..65a9836 100644
--- a/silx/opencl/backprojection.py
+++ b/silx/opencl/backprojection.py
@@ -164,9 +164,7 @@ class Backprojection(OpenclProcessing):
     def _allocate_memory(self):
         # Host memory
         self.slice = np.zeros(self.dimrec_shape, dtype=np.float32)
-        self.is_cpu = False
-        if self.device.type == "CPU":
-            self.is_cpu = True
+        self._use_textures = self.check_textures_availability()
 
         # Device memory
         self.buffers = [
@@ -180,7 +178,7 @@ class Backprojection(OpenclProcessing):
         self.d_sino = self.cl_mem["d_sino"]  # shorthand
 
         # Texture memory (if relevant)
-        if not(self.is_cpu):
+        if self._use_textures:
             self._allocate_textures()
 
         # Local memory
@@ -199,7 +197,14 @@ class Backprojection(OpenclProcessing):
             self.cl_mem["d_axes"][:] = np.ones(self.num_projs, dtype="f") * self.axis_pos
 
     def _init_kernels(self):
-        OpenclProcessing.compile_kernels(self, self.kernel_files)
+        compile_options = None
+        if not(self._use_textures):
+            compile_options = "-DDONT_USE_TEXTURES"
+        OpenclProcessing.compile_kernels(
+            self,
+            self.kernel_files,
+            compile_options=compile_options
+        )
         # check that workgroup can actually be (16, 16)
         self.compiletime_workgroup_size = self.kernels.max_workgroup_size("backproj_cpu_kernel")
         # Workgroup and ndrange sizes are always the same
@@ -209,7 +214,7 @@ class Backprojection(OpenclProcessing):
             _idivup(int(self.dimrec_shape[0]), 32) * self.wg[1]
         )
         # Prepare arguments for the kernel call
-        if self.is_cpu:
+        if not(self._use_textures):
             d_sino_ref = self.d_sino.data
         else:
             d_sino_ref = self.d_sino_tex
@@ -242,15 +247,7 @@ class Backprojection(OpenclProcessing):
         """
         Allocate the texture for the sinogram.
         """
-        self.d_sino_tex = pyopencl.Image(
-            self.ctx,
-            mf.READ_ONLY | mf.USE_HOST_PTR,
-            pyopencl.ImageFormat(
-                pyopencl.channel_order.INTENSITY,
-                pyopencl.channel_type.FLOAT
-            ),
-            hostbuf=np.zeros(self.shape[::-1], dtype=np.float32)
-        )
+        self.d_sino_tex = self.allocate_texture(self.shape)
 
     def _init_filter(self, filter_name):
         """Filter initialization
@@ -289,7 +286,7 @@ class Backprojection(OpenclProcessing):
         sino2 = sino
         if not(sino.flags["C_CONTIGUOUS"] and sino.dtype == np.float32):
             sino2 = np.ascontiguousarray(sino, dtype=np.float32)
-        if self.is_cpu:
+        if not(self._use_textures):
             ev = pyopencl.enqueue_copy(
                                         self.queue,
                                         self.d_sino.data,
@@ -309,7 +306,7 @@ class Backprojection(OpenclProcessing):
         return EventDescription(what, ev)
 
     def _transfer_device_to_texture(self, d_sino):
-        if self.is_cpu:
+        if not(self._use_textures):
             if id(self.d_sino) == id(d_sino):
                 return
             ev = pyopencl.enqueue_copy(
@@ -343,7 +340,7 @@ class Backprojection(OpenclProcessing):
         with self.sem:
             events.append(self._transfer_to_texture(sino))
             # Call the backprojection kernel
-            if self.is_cpu:
+            if not(self._use_textures):
                 kernel_to_call = self.kernels.backproj_cpu_kernel
             else:
                 kernel_to_call = self.kernels.backproj_kernel
diff --git a/silx/opencl/common.py b/silx/opencl/common.py
index 110d941..002c15d 100644
--- a/silx/opencl/common.py
+++ b/silx/opencl/common.py
@@ -34,7 +34,7 @@ __author__ = "Jerome Kieffer"
 __contact__ = "Jerome.Kieffer@ESRF.eu"
 __license__ = "MIT"
 __copyright__ = "2012-2017 European Synchrotron Radiation Facility, Grenoble, France"
-__date__ = "28/11/2019"
+__date__ = "30/11/2020"
 __status__ = "stable"
 __all__ = ["ocl", "pyopencl", "mf", "release_cl_buffers", "allocate_cl_buffers",
            "measure_workgroup_size", "kernel_workgroup_size"]
@@ -46,10 +46,8 @@ import numpy
 
 from .utils import get_opencl_code
 
-
 logger = logging.getLogger(__name__)
 
-
 if os.environ.get("SILX_OPENCL") in ["0", "False"]:
     logger.info("Use of OpenCL has been disabled from environment variable: SILX_OPENCL=0")
     pyopencl = None
@@ -70,13 +68,13 @@ else:
             mf = pyopencl.mem_flags
 
 if pyopencl is None:
+
     # Define default mem flags
     class mf(object):
         WRITE_ONLY = 1
         READ_ONLY = 1
         READ_WRITE = 1
 
-
 FLOP_PER_CORE = {"GPU": 64,  # GPU, Fermi at least perform 64 flops per cycle/multicore, G80 were at 24 or 48 ...
                  "CPU": 4,  # CPU, at least intel's have 4 operation per cycle
                  "ACC": 8}  # ACC: the Xeon-phi (MIC) appears to be able to process 8 Flops per hyperthreaded-core
@@ -108,6 +106,7 @@ class Device(object):
     """
     Simple class that contains the structure of an OpenCL device
     """
+
     def __init__(self, name="None", dtype=None, version=None, driver_version=None,
                  extensions="", memory=None, available=None,
                  cores=None, frequency=None, flop_core=None, idx=0, workgroup=1):
@@ -174,6 +173,7 @@ class Platform(object):
     """
     Simple class that contains the structure of an OpenCL platform
     """
+
     def __init__(self, name="None", vendor="None", version=None, extensions=None, idx=0):
         """
         Class containing all descriptions of a platform and all devices description within that platform.
@@ -225,6 +225,8 @@ class Platform(object):
 def _measure_workgroup_size(device_or_context, fast=False):
     """Mesure the maximal work group size of the given device
 
+    DEPRECATED since not perfectly correct !
+
     :param device_or_context: instance of pyopencl.Device or pyopencl.Context
                     or 2-tuple (platformid,deviceid)
     :param fast: ask the kernel the valid value, don't probe it
@@ -318,7 +320,7 @@ class OpenCL(object):
                 ####################################################
                 extensions = device.extensions
                 if (pypl.vendor == "NVIDIA Corporation") and ('cl_khr_fp64' in extensions):
-                                extensions += ' cl_khr_int64_base_atomics cl_khr_int64_extended_atomics'
+                    extensions += ' cl_khr_int64_base_atomics cl_khr_int64_extended_atomics'
                 try:
                     devtype = pyopencl.device_type.to_string(device.type).upper()
                 except ValueError:
@@ -573,6 +575,53 @@ def allocate_cl_buffers(buffers, device=None, context=None):
     return mem
 
 
+def allocate_texture(ctx, shape, hostbuf=None, support_1D=False):
+    """
+    Allocate an OpenCL image ("texture").
+
+    :param ctx: OpenCL context
+    :param shape: Shape of the image. Note that pyopencl and OpenCL < 1.2
+        do not support 1D images, so 1D images are handled as 2D with one row
+    :param support_1D: force the image to be 1D if the shape has only one dim
+    """
+    if len(shape) == 1 and not(support_1D):
+        shape = (1,) + shape
+    return pyopencl.Image(
+        ctx,
+        pyopencl.mem_flags.READ_ONLY | pyopencl.mem_flags.USE_HOST_PTR,
+        pyopencl.ImageFormat(
+            pyopencl.channel_order.INTENSITY,
+            pyopencl.channel_type.FLOAT
+        ),
+        hostbuf=numpy.zeros(shape[::-1], dtype=numpy.float32)
+    )
+
+
+def check_textures_availability(ctx):
+    """
+    Check whether textures are supported on the current OpenCL context.
+
+    :param ctx: OpenCL context
+    """
+    try:
+        dummy_texture = allocate_texture(ctx, (16, 16))
+        # Need to further access some attributes (pocl)
+        dummy_height = dummy_texture.height
+        textures_available = True
+        del dummy_texture, dummy_height
+    except (pyopencl.RuntimeError, pyopencl.LogicError):
+        textures_available = False
+    # Nvidia Fermi GPUs (compute capability 2.X) do not support opencl read_imagef
+    # There is no way to detect this until a kernel is compiled
+    try:
+        cc = ctx.devices[0].compute_capability_major_nv
+        textures_available &= (cc >= 3)
+    except (pyopencl.LogicError, AttributeError):  # probably not a Nvidia GPU
+        pass
+    #
+    return textures_available
+
+
 def measure_workgroup_size(device):
     """Measure the actual size of the workgroup
 
@@ -599,12 +648,25 @@ def measure_workgroup_size(device):
     return res
 
 
-def kernel_workgroup_size(program, kernel):
-    """Extract the compile time maximum workgroup size
+def query_kernel_info(program, kernel, what="WORK_GROUP_SIZE"):
+    """Extract the compile time information from a kernel
 
     :param program: OpenCL program
     :param kernel: kernel or name of the kernel
-    :return: the maximum acceptable workgroup size for the given kernel
+    :param what: what is the query about ?
+    :return: int or 3-int for the workgroup size.
+    
+    Possible information available are:
+    * 'COMPILE_WORK_GROUP_SIZE': Returns the work-group size specified inside the kernel (__attribute__((reqd_work_gr oup_size(X, Y, Z))))
+    * 'GLOBAL_WORK_SIZE': maximum global size that can be used to execute a kernel  #OCL2.1!
+    * 'LOCAL_MEM_SIZE': amount of local memory in bytes being used by the kernel
+    * 'PREFERRED_WORK_GROUP_SIZE_MULTIPLE': preferred multiple of workgroup size for launch. This is a performance hint.
+    * 'PRIVATE_MEM_SIZE' Returns the minimum amount of private memory, in bytes, used by each workitem in the kernel
+    * 'WORK_GROUP_SIZE': maximum work-group size that can be used to execute a kernel on a specific device given by device
+    
+    Further information on:
+    https://www.khronos.org/registry/OpenCL/sdk/1.1/docs/man/xhtml/clGetKernelWorkGroupInfo.html
+    
     """
     assert isinstance(program, pyopencl.Program)
     if not isinstance(kernel, pyopencl.Kernel):
@@ -613,5 +675,15 @@ def kernel_workgroup_size(program, kernel):
         kernel = program.__getattr__(kernel_name)
 
     device = program.devices[0]
-    query_wg = pyopencl.kernel_work_group_info.WORK_GROUP_SIZE
+    query_wg = getattr(pyopencl.kernel_work_group_info, what)
     return kernel.get_work_group_info(query_wg, device)
+
+
+def kernel_workgroup_size(program, kernel):
+    """Extract the compile time maximum workgroup size
+
+    :param program: OpenCL program
+    :param kernel: kernel or name of the kernel
+    :return: the maximum acceptable workgroup size for the given kernel
+    """
+    return query_kernel_info(program, kernel, what="WORK_GROUP_SIZE")
diff --git a/silx/opencl/convolution.py b/silx/opencl/convolution.py
index 138b985..15ef931 100644
--- a/silx/opencl/convolution.py
+++ b/silx/opencl/convolution.py
@@ -91,17 +91,8 @@ class Convolution(OpenclProcessing):
         }
         extra_opts = extra_options or {}
         self.extra_options.update(extra_opts)
-        self.is_cpu = (self.device.type == "CPU")
         self.use_textures = not(self.extra_options["dont_use_textures"])
-        self.use_textures *= not(self.is_cpu)
-        # Nvidia Fermi GPUs (compute capability 2.X) do not support opencl read_imagef
-        try:
-            cc = self.ctx.devices[0].compute_capability_major_nv
-            self.use_textures *= (cc >= 3)
-        except cl.LogicError: # probably not a Nvidia GPU
-            pass
-        except AttributeError: # probably not a Nvidia GPU
-            pass
+        self.use_textures &= self.check_textures_availability()
 
     def _get_dimensions(self, shape, kernel):
         self.shape = shape
diff --git a/silx/opencl/processing.py b/silx/opencl/processing.py
index 6b475b9..470b141 100644
--- a/silx/opencl/processing.py
+++ b/silx/opencl/processing.py
@@ -36,26 +36,23 @@ Common OpenCL abstract base classe for different processing
 
 from __future__ import absolute_import, print_function, division
 
-
 __author__ = "Jerome Kieffer"
 __contact__ = "Jerome.Kieffer@ESRF.eu"
 __license__ = "MIT"
 __copyright__ = "European Synchrotron Radiation Facility, Grenoble, France"
-__date__ = "05/08/2019"
+__date__ = "04/12/2020"
 __status__ = "stable"
 
-
 import os
 import logging
 import gc
 from collections import namedtuple
 import numpy
 import threading
-from .common import ocl, pyopencl, release_cl_buffers, kernel_workgroup_size
+from .common import ocl, pyopencl, release_cl_buffers, query_kernel_info, allocate_texture, check_textures_availability
 from .utils import concatenate_cl_kernel
 import platform
 
-
 BufferDescription = namedtuple("BufferDescription", ["name", "size", "dtype", "flags"])
 EventDescription = namedtuple("EventDescription", ["name", "event"])
 
@@ -85,13 +82,22 @@ class KernelContainer(object):
         return self.__dict__.get(name)
 
     def max_workgroup_size(self, kernel_name):
-        "Retrieve the compile time max_workgroup_size for a given kernel"
+        "Retrieve the compile time WORK_GROUP_SIZE for a given kernel"
         if isinstance(kernel_name, pyopencl.Kernel):
             kernel = kernel_name
         else:
             kernel = self.get_kernel(kernel_name)
 
-        return kernel_workgroup_size(self._program, kernel)
+        return query_kernel_info(self._program, kernel, "WORK_GROUP_SIZE")
+
+    def min_workgroup_size(self, kernel_name):
+        "Retrieve the compile time PREFERRED_WORK_GROUP_SIZE_MULTIPLE for a given kernel"
+        if isinstance(kernel_name, pyopencl.Kernel):
+            kernel = kernel_name
+        else:
+            kernel = self.get_kernel(kernel_name)
+
+        return query_kernel_info(self._program, kernel, "PREFERRED_WORK_GROUP_SIZE_MULTIPLE")
 
 
 class OpenclProcessing(object):
@@ -149,6 +155,9 @@ class OpenclProcessing(object):
         self.program = None
         self.kernels = None
 
+    def check_textures_availability(self):
+        return check_textures_availability(self.ctx)
+
     def __del__(self):
         """Destructor: release all buffers and programs
         """
@@ -156,8 +165,10 @@ class OpenclProcessing(object):
             self.reset_log()
             self.free_kernels()
             self.free_buffers()
-        except Exception:
-            pass
+            if self.queue is not None:
+                self.queue.finish()
+        except Exception as err:
+            logger.warning("%s: %s", type(err), err)
         self.queue = None
         self.device = None
         self.ctx = None
@@ -287,6 +298,8 @@ class OpenclProcessing(object):
         if bool(value) != self.profile:
             with self.sem:
                 self.profile = bool(value)
+                if self.queue is not None:
+                    self.queue.finish()
                 if self.profile:
                     self.queue = pyopencl.CommandQueue(self.ctx,
                         properties=pyopencl.command_queue_properties.PROFILING_ENABLE)
@@ -304,24 +317,7 @@ class OpenclProcessing(object):
             self.events.append(EventDescription(desc, event))
 
     def allocate_texture(self, shape, hostbuf=None, support_1D=False):
-        """
-        Allocate an OpenCL image ("texture").
-
-        :param shape: Shape of the image. Note that pyopencl and OpenCL < 1.2
-            do not support 1D images, so 1D images are handled as 2D with one row
-        :param support_1D: force the image to be 1D if the shape has only one dim
-        """
-        if len(shape) == 1 and not(support_1D):
-            shape = (1,) + shape
-        return pyopencl.Image(
-            self.ctx,
-            pyopencl.mem_flags.READ_ONLY | pyopencl.mem_flags.USE_HOST_PTR,
-            pyopencl.ImageFormat(
-                pyopencl.channel_order.INTENSITY,
-                pyopencl.channel_type.FLOAT
-            ),
-            hostbuf=numpy.zeros(shape[::-1], dtype=numpy.float32)
-        )
+        return allocate_texture(self.ctx, shape, hostbuf=hostbuf, support_1D=support_1D)
 
     def transfer_to_texture(self, arr, tex_ref):
         """
@@ -336,10 +332,10 @@ class OpenclProcessing(object):
         if ndim == 1:
             # pyopencl and OpenCL < 1.2 do not support image1d_t
             # force 2D with one row in this case
-            #~ ndim = 2
+            # ~ ndim = 2
             shp = (1,) + shp
         copy_kwargs = {"origin":(0,) * ndim, "region": shp[::-1]}
-        if not(isinstance(arr, numpy.ndarray)): # assuming pyopencl.array.Array
+        if not(isinstance(arr, numpy.ndarray)):  # assuming pyopencl.array.Array
             # D->D copy
             copy_args[2] = arr.data
             copy_kwargs["offset"] = 0
diff --git a/silx/opencl/projection.py b/silx/opencl/projection.py
index da8752f..c02faf6 100644
--- a/silx/opencl/projection.py
+++ b/silx/opencl/projection.py
@@ -2,7 +2,7 @@
 # coding: utf-8
 # /*##########################################################################
 #
-# Copyright (c) 2016 European Synchrotron Radiation Facility
+# Copyright (c) 2016-2020 European Synchrotron Radiation Facility
 #
 # Permission is hereby granted, free of charge, to any person obtaining a copy
 # of this software and associated documentation files (the "Software"), to deal
@@ -115,7 +115,7 @@ class Projection(OpenclProcessing):
         self.offset_x = -np.float32((self.shape[1] - 1) / 2. - self.axis_pos)  # TODO: custom
         self.offset_y = -np.float32((self.shape[0] - 1) / 2. - self.axis_pos)  # TODO: custom
         # Reset axis_pos once offset are computed
-        self.axis_pos0 = np.float((self.shape[1] - 1) / 2.)
+        self.axis_pos0 = np.float64((self.shape[1] - 1) / 2.)
 
         # Workgroup, ndrange and shared size
         self.dimgrid_x = _idivup(self.dwidth, 16)
@@ -129,9 +129,7 @@ class Projection(OpenclProcessing):
             int(self.dimgrid_y) * self.wg[1]  # int(): pyopencl <= 2015.1
         )
 
-        self.is_cpu = False
-        if self.device.type == "CPU":
-            self.is_cpu = True
+        self._use_textures = self.check_textures_availability()
 
         # Allocate memory
         self.buffers = [
@@ -150,14 +148,14 @@ class Projection(OpenclProcessing):
         )
         self._tmp_extended_img = np.zeros((self.shape[0] + 2, self.shape[1] + 2),
                                           dtype=np.float32)
-        if self.is_cpu:
+        if not(self._use_textures):
             self.allocate_slice()
         else:
             self.allocate_textures()
         self.allocate_buffers()
         self._ex_sino = np.zeros((self._dimrecy, self._dimrecx),
                                  dtype=np.float32)
-        if self.is_cpu:
+        if not(self._use_textures):
             self.cl_mem["d_slice"].fill(0.)
             # enqueue_fill_buffer has issues if opencl 1.2 is not present
             # ~ pyopencl.enqueue_fill_buffer(
@@ -182,7 +180,14 @@ class Projection(OpenclProcessing):
         # Shorthands
         self._d_sino = self.cl_mem["_d_sino"]
 
-        OpenclProcessing.compile_kernels(self, self.kernel_files)
+        compile_options = None
+        if not(self._use_textures):
+            compile_options = "-DDONT_USE_TEXTURES"
+        OpenclProcessing.compile_kernels(
+            self,
+            self.kernel_files,
+            compile_options=compile_options
+        )
         # check that workgroup can actually be (16, 16)
         self.compiletime_workgroup_size = self.kernels.max_workgroup_size("forward_kernel_cpu")
 
@@ -194,7 +199,7 @@ class Projection(OpenclProcessing):
         pyopencl.enqueue_copy(self.queue, self.cl_mem["d_angles"], angles2)
 
     def allocate_slice(self):
-        ary = parray.zeros(self.queue, (self.shape[1] + 2, self.shape[1] + 2), np.float32)
+        ary = parray.empty(self.queue, (self.shape[1] + 2, self.shape[1] + 2), np.float32)
         ary.fill(0)
         self.add_to_cl_mem({"d_slice": ary})
 
@@ -212,7 +217,7 @@ class Projection(OpenclProcessing):
         image2 = image
         if not(image.flags["C_CONTIGUOUS"] and image.dtype == np.float32):
             image2 = np.ascontiguousarray(image)
-        if self.is_cpu:
+        if not(self._use_textures):
             # TODO: create NoneEvent
             return self.transfer_to_slice(image2)
             # ~ return pyopencl.enqueue_copy(
@@ -232,7 +237,7 @@ class Projection(OpenclProcessing):
                    )
 
     def transfer_device_to_texture(self, d_image):
-        if self.is_cpu:
+        if not(self._use_textures):
             # TODO this copy should not be necessary
             return self.cpy2d_to_slice(d_image)
         else:
@@ -355,14 +360,14 @@ class Projection(OpenclProcessing):
                 assert image.ndim == 2, "Treat only 2D images"
                 assert image.shape[0] == self.shape[0], "image shape is OK"
                 assert image.shape[1] == self.shape[1], "image shape is OK"
-                if not(self.is_cpu):
+                if self._use_textures:
                     self.transfer_to_texture(image)
                     slice_ref = self.d_image_tex
                 else:
                     self.transfer_to_slice(image)
                     slice_ref = self.cl_mem["d_slice"].data
             else:
-                if self.is_cpu:
+                if not(self._use_textures):
                     slice_ref = self.cl_mem["d_slice"].data
                 else:
                     slice_ref = self.d_image_tex
@@ -388,7 +393,7 @@ class Projection(OpenclProcessing):
             )
 
             # Call the kernel
-            if self.is_cpu:
+            if not(self._use_textures):
                 event_pj = self.kernels.forward_kernel_cpu(
                     self.queue,
                     self.ndrange,
diff --git a/silx/opencl/test/test_addition.py b/silx/opencl/test/test_addition.py
index 49cc0b4..19dfdf0 100644
--- a/silx/opencl/test/test_addition.py
+++ b/silx/opencl/test/test_addition.py
@@ -29,19 +29,17 @@
 Simple test of an addition
 """
 
-from __future__ import division, print_function
-
 __authors__ = ["Henri Payno, Jérôme Kieffer"]
 __contact__ = "jerome.kieffer@esrf.eu"
 __license__ = "MIT"
 __copyright__ = "2013 European Synchrotron Radiation Facility, Grenoble, France"
-__date__ = "01/08/2019"
+__date__ = "30/11/2020"
 
 import logging
 import numpy
 
 import unittest
-from ..common import ocl, _measure_workgroup_size
+from ..common import ocl, _measure_workgroup_size, query_kernel_info
 if ocl:
     import pyopencl
     import pyopencl.array
@@ -116,7 +114,7 @@ class TestAddition(unittest.TestCase):
     @unittest.skipUnless(ocl, "pyopencl is missing")
     def test_measurement(self):
         """
-        tests that all devices are working properly ...
+        tests that all devices are working properly ... lengthy and error prone
         """
         for platform in ocl.platforms:
             for did, device in enumerate(platform.devices):
@@ -124,11 +122,31 @@ class TestAddition(unittest.TestCase):
                 self.assertEqual(meas, device.max_work_group_size,
                                  "Workgroup size for %s/%s: %s == %s" % (platform, device, meas, device.max_work_group_size))
 
+    @unittest.skipUnless(ocl, "pyopencl is missing")
+    def test_query(self):
+        """
+        tests that all devices are working properly ... lengthy and error prone
+        """
+        for what in ("COMPILE_WORK_GROUP_SIZE",
+                     "LOCAL_MEM_SIZE",
+                     "PREFERRED_WORK_GROUP_SIZE_MULTIPLE",
+                     "PRIVATE_MEM_SIZE",
+                     "WORK_GROUP_SIZE"):
+            logger.info("%s: %s", what, query_kernel_info(program=self.program, kernel="addition", what=what))
+
+        # Not all ICD work properly ....    
+        #self.assertEqual(3, len(query_kernel_info(program=self.program, kernel="addition", what="COMPILE_WORK_GROUP_SIZE")), "3D kernel")
+
+        min_wg = query_kernel_info(program=self.program, kernel="addition", what="PREFERRED_WORK_GROUP_SIZE_MULTIPLE")
+        max_wg = query_kernel_info(program=self.program, kernel="addition", what="WORK_GROUP_SIZE")
+        self.assertEqual(max_wg % min_wg, 0, msg="max_wg is a multiple of min_wg")
+
 
 def suite():
     testSuite = unittest.TestSuite()
     testSuite.addTest(TestAddition("test_add"))
     # testSuite.addTest(TestAddition("test_measurement"))
+    testSuite.addTest(TestAddition("test_query"))
     return testSuite
 
 
diff --git a/silx/opencl/test/test_backprojection.py b/silx/opencl/test/test_backprojection.py
index b2f2070..9dfdd3a 100644
--- a/silx/opencl/test/test_backprojection.py
+++ b/silx/opencl/test/test_backprojection.py
@@ -96,8 +96,9 @@ class TestFBP(unittest.TestCase):
         # Therefore, we cannot expect results to be the "same" (up to float32
         # numerical error)
         self.tol = 5e-2
-        if self.fbp.is_cpu:
+        if not(self.fbp._use_textures) or self.fbp.device.type == "CPU":
             # Precision is less when using CPU
+            # (either CPU textures or "manual" linear interpolation)
             self.tol *= 2
 
     def tearDown(self):
diff --git a/silx/opencl/test/test_convolution.py b/silx/opencl/test/test_convolution.py
index 27cb8a9..7bceb0d 100644
--- a/silx/opencl/test/test_convolution.py
+++ b/silx/opencl/test/test_convolution.py
@@ -41,15 +41,18 @@ from itertools import product
 import numpy as np
 from silx.utils.testutils import parameterize
 from silx.image.utils import gaussian_kernel
+
 try:
     from scipy.ndimage import convolve, convolve1d
     from scipy.misc import ascent
+
     scipy_convolve = convolve
     scipy_convolve1d = convolve1d
 except ImportError:
     scipy_convolve = None
 import unittest
-from ..common import ocl
+from ..common import ocl, check_textures_availability
+
 if ocl:
     import pyopencl as cl
     import pyopencl.array as parray
@@ -59,7 +62,6 @@ logger = logging.getLogger(__name__)
 
 @unittest.skipUnless(ocl and scipy_convolve, "PyOpenCl/scipy is missing")
 class TestConvolution(unittest.TestCase):
-
     @classmethod
     def setUpClass(cls):
         super(TestConvolution, cls).setUpClass()
@@ -67,7 +69,7 @@ class TestConvolution(unittest.TestCase):
         cls.data1d = cls.image[0]
         cls.data2d = cls.image
         cls.data3d = np.tile(cls.image[224:-224, 224:-224], (62, 1, 1))
-        cls.kernel1d = gaussian_kernel(1.)
+        cls.kernel1d = gaussian_kernel(1.0)
         cls.kernel2d = np.outer(cls.kernel1d, cls.kernel1d)
         cls.kernel3d = np.multiply.outer(cls.kernel2d, cls.kernel1d)
         cls.ctx = ocl.create_context()
@@ -97,7 +99,7 @@ class TestConvolution(unittest.TestCase):
         )
         return errmsg
 
-    def __init__(self, methodName='runTest', param=None):
+    def __init__(self, methodName="runTest", param=None):
         unittest.TestCase.__init__(self, methodName)
         self.param = param
         self.mode = param["boundary_handling"]
@@ -107,32 +109,27 @@ class TestConvolution(unittest.TestCase):
             use_textures=%s, input_device=%s, output_device=%s
             """
             % (
-                self.mode, param["use_textures"],
-                param["input_on_device"], param["output_on_device"]
+                self.mode,
+                param["use_textures"],
+                param["input_on_device"],
+                param["output_on_device"],
             )
         )
 
     def instantiate_convol(self, shape, kernel, axes=None):
-        def is_fermi_device(dev):
-            try:
-                res = (dev.compute_capability_major_nv < 3)
-            except cl.LogicError:
-                res = False
-            except AttributeError:
-                res = False
-            return res
-        if (self.mode == "constant") and (
-            not(self.param["use_textures"])
-                or (self.ctx.devices[0].type == cl._cl.device_type.CPU)
-                or (is_fermi_device(self.ctx.devices[0]))
+        if self.mode == "constant":
+            if not (self.param["use_textures"]) or (
+                self.param["use_textures"]
+                and not (check_textures_availability(self.ctx))
             ):
                 self.skipTest("mode=constant not implemented without textures")
         C = Convolution(
-            shape, kernel,
+            shape,
+            kernel,
             mode=self.mode,
             ctx=self.ctx,
             axes=axes,
-            extra_options={"dont_use_textures": not(self.param["use_textures"])}
+            extra_options={"dont_use_textures": not (self.param["use_textures"])},
         )
         return C
 
@@ -142,13 +139,9 @@ class TestConvolution(unittest.TestCase):
             "test_separable_2D": (2, 1),
             "test_separable_3D": (3, 1),
             "test_nonseparable_2D": (2, 2),
-            "test_nonseparable_3D":  (3, 3),
-        }
-        dim_data = {
-            1: self.data1d,
-            2: self.data2d,
-            3: self.data3d
+            "test_nonseparable_3D": (3, 3),
         }
+        dim_data = {1: self.data1d, 2: self.data2d, 3: self.data3d}
         dim_kernel = {
             1: self.kernel1d,
             2: self.kernel2d,
@@ -159,24 +152,26 @@ class TestConvolution(unittest.TestCase):
 
     def get_reference_function(self, test_name):
         ref_func = {
-            "test_1D":
-                lambda x, y : scipy_convolve1d(x, y, mode=self.mode),
-            "test_separable_2D":
-                lambda x, y : scipy_convolve1d(
-                    scipy_convolve1d(x, y, mode=self.mode, axis=1),
-                    y, mode=self.mode, axis=0
-                ),
-            "test_separable_3D":
-                lambda x, y: scipy_convolve1d(
-                    scipy_convolve1d(
-                        scipy_convolve1d(x, y, mode=self.mode, axis=2),
-                        y, mode=self.mode, axis=1),
-                    y, mode=self.mode, axis=0
+            "test_1D": lambda x, y: scipy_convolve1d(x, y, mode=self.mode),
+            "test_separable_2D": lambda x, y: scipy_convolve1d(
+                scipy_convolve1d(x, y, mode=self.mode, axis=1),
+                y,
+                mode=self.mode,
+                axis=0,
+            ),
+            "test_separable_3D": lambda x, y: scipy_convolve1d(
+                scipy_convolve1d(
+                    scipy_convolve1d(x, y, mode=self.mode, axis=2),
+                    y,
+                    mode=self.mode,
+                    axis=1,
                 ),
-            "test_nonseparable_2D":
-                lambda x, y: scipy_convolve(x, y, mode=self.mode),
-            "test_nonseparable_3D":
-                lambda x, y : scipy_convolve(x, y, mode=self.mode),
+                y,
+                mode=self.mode,
+                axis=0,
+            ),
+            "test_nonseparable_2D": lambda x, y: scipy_convolve(x, y, mode=self.mode),
+            "test_nonseparable_3D": lambda x, y: scipy_convolve(x, y, mode=self.mode),
         }
         return ref_func[test_name]
 
@@ -226,8 +221,8 @@ class TestConvolution(unittest.TestCase):
         data = self.data3d
         kernel = self.kernel2d
         conv = self.instantiate_convol(data.shape, kernel, axes=(0,))
-        res = conv(data) # 3D
-        ref = scipy_convolve(data[0], kernel, mode=self.mode) # 2D
+        res = conv(data)  # 3D
+        ref = scipy_convolve(data[0], kernel, mode=self.mode)  # 2D
 
         std = np.std(res, axis=0)
         std_max = np.max(np.abs(std))
@@ -244,12 +239,9 @@ def test_convolution():
     output_on_device_ = [True, False]
     testSuite = unittest.TestSuite()
 
-    param_vals = list(product(
-        boundary_handling_,
-        use_textures_,
-        input_on_device_,
-        output_on_device_
-    ))
+    param_vals = list(
+        product(boundary_handling_, use_textures_, input_on_device_, output_on_device_)
+    )
     for boundary_handling, use_textures, input_dev, output_dev in param_vals:
         testcase = parameterize(
             TestConvolution,
@@ -258,17 +250,16 @@ def test_convolution():
                 "input_on_device": input_dev,
                 "output_on_device": output_dev,
                 "use_textures": use_textures,
-            }
+            },
         )
         testSuite.addTest(testcase)
     return testSuite
 
 
-
 def suite():
     testSuite = test_convolution()
     return testSuite
 
 
-if __name__ == '__main__':
+if __name__ == "__main__":
     unittest.main(defaultTest="suite")