33 files changed, 0 insertions, 8255 deletions
diff --git a/silx/opencl/__init__.py b/silx/opencl/__init__.py
deleted file mode 100644
index fbd1f88..0000000
--- a/silx/opencl/__init__.py
+++ /dev/null
@@ -1,52 +0,0 @@
-#!/usr/bin/env python
-# -*- coding: utf-8 -*-
-#
-#    Project: S I L X project
-#             https://github.com/silx-kit/silx
-#
-#    Copyright (C) 2012-2018 European Synchrotron Radiation Facility, Grenoble, France
-#
-#    Principal author:       Jérôme Kieffer (Jerome.Kieffer@ESRF.eu)
-#
-# Permission is hereby granted, free of charge, to any person
-# obtaining a copy of this software and associated documentation
-# files (the "Software"), to deal in the Software without
-# restriction, including without limitation the rights to use,
-# copy, modify, merge, publish, distribute, sublicense, and/or sell
-# copies of the Software, and to permit persons to whom the
-# Software is furnished to do so, subject to the following
-# conditions:
-#
-# The above copyright notice and this permission notice shall be
-# included in all copies or substantial portions of the Software.
-#
-# THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
-# EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES
-# OF MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
-# NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT
-# HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY,
-# WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
-# FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR
-# OTHER DEALINGS IN THE SOFTWARE.
-#
-"""This package provides OpenCl-based optimized processing functions.
-
-For more processing functions, see the silx.math and silx.image packages.
-
-See silx documentation: http://www.silx.org/doc/silx/latest/
-"""
-
-__author__ = "Jerome Kieffer"
-__contact__ = "Jerome.Kieffer@ESRF.eu"
-__license__ = "MIT"
-__copyright__ = "European Synchrotron Radiation Facility, Grenoble, France"
-__date__ = "15/03/2017"
-__status__ = "stable"
-
-import logging
-
-
-logger = logging.getLogger(__name__)
-
-
-from .common import *
diff --git a/silx/opencl/backprojection.py b/silx/opencl/backprojection.py
deleted file mode 100644
index 65a9836..0000000
--- a/silx/opencl/backprojection.py
+++ /dev/null
@@ -1,397 +0,0 @@
-#!/usr/bin/env python
-# coding: utf-8
-# /*##########################################################################
-#
-# Copyright (c) 2016 European Synchrotron Radiation Facility
-#
-# Permission is hereby granted, free of charge, to any person obtaining a copy
-# of this software and associated documentation files (the "Software"), to deal
-# in the Software without restriction, including without limitation the rights
-# to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
-# copies of the Software, and to permit persons to whom the Software is
-# furnished to do so, subject to the following conditions:
-#
-# The above copyright notice and this permission notice shall be included in
-# all copies or substantial portions of the Software.
-#
-# THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
-# IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
-# FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
-# AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
-# LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
-# OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN
-# THE SOFTWARE.
-#
-# ###########################################################################*/
-"""Module for (filtered) backprojection on the GPU"""
-
-from __future__ import absolute_import, print_function, with_statement, division
-
-__authors__ = ["A. Mirone, P. Paleo"]
-__license__ = "MIT"
-__date__ = "25/01/2019"
-
-import logging
-import numpy as np
-
-from .common import pyopencl
-from .processing import EventDescription, OpenclProcessing, BufferDescription
-from .sinofilter import SinoFilter
-from .sinofilter import fourier_filter as fourier_filter_
-from ..utils.deprecation import deprecated
-
-if pyopencl:
-    mf = pyopencl.mem_flags
-    import pyopencl.array as parray
-else:
-    raise ImportError("Please install pyopencl in order to use opencl backprojection")
-logger = logging.getLogger(__name__)
-
-
-def _sizeof(Type):
-    """
-    return the size (in bytes) of a scalar type, like the C behavior
-    """
-    return np.dtype(Type).itemsize
-
-
-def _idivup(a, b):
-    """
-    return the integer division, plus one if `a` is not a multiple of `b`
-    """
-    return (a + (b - 1)) // b
-
-
-class Backprojection(OpenclProcessing):
-    """A class for performing the backprojection using OpenCL"""
-    kernel_files = ["backproj.cl", "array_utils.cl"]
-
-    def __init__(self, sino_shape, slice_shape=None, axis_position=None,
-                 angles=None, filter_name=None, ctx=None, devicetype="all",
-                 platformid=None, deviceid=None, profile=False,
-                 extra_options=None):
-        """Constructor of the OpenCL (filtered) backprojection
-
-        :param sino_shape: shape of the sinogram. The sinogram is in the format
-                           (n_b, n_a) where n_b is the number of detector bins
-                           and n_a is the number of angles.
-        :param slice_shape: Optional, shape of the reconstructed slice. By
-                            default, it is a square slice where the dimension
-                            is the "x dimension" of the sinogram (number of
-                            bins).
-        :param axis_position: Optional, axis position. Default is
-                              `(shape[1]-1)/2.0`.
-        :param angles: Optional, a list of custom angles in radian.
-        :param filter_name: Optional, name of the filter for FBP. Default is
-                            the Ram-Lak filter.
-        :param ctx: actual working context, left to None for automatic
-                    initialization from device type or platformid/deviceid
-        :param devicetype: type of device, can be "CPU", "GPU", "ACC" or "ALL"
-        :param platformid: integer with the platform_identifier, as given by
-                           clinfo
-        :param deviceid: Integer with the device identifier, as given by clinfo
-        :param profile: switch on profiling to be able to profile at the kernel
-                        level, store profiling elements (makes code slightly
-                        slower)
-        :param extra_options: Advanced extra options in the form of a dict.
-            Current options are: cutoff, use_numpy_fft
-        """
-        # OS X enforces a workgroup size of 1 when the kernel has
-        # synchronization barriers if sys.platform.startswith('darwin'):
-        #  assuming no discrete GPU
-        #    raise NotImplementedError("Backprojection is not implemented on CPU for OS X yet")
-
-        OpenclProcessing.__init__(self, ctx=ctx, devicetype=devicetype,
-                                  platformid=platformid, deviceid=deviceid,
-                                  profile=profile)
-
-        self._init_geometry(sino_shape, slice_shape, angles, axis_position,
-                           extra_options)
-        self._allocate_memory()
-        self._compute_angles()
-        self._init_kernels()
-        self._init_filter(filter_name)
-
-    def _init_geometry(self, sino_shape, slice_shape, angles, axis_position,
-                      extra_options):
-        """Geometry Initialization
-
-        :param sino_shape: shape of the sinogram. The sinogram is in the format
-                           (n_b, n_a) where n_b is the number of detector bins
-                           and n_a is the number of angles.
-        :param slice_shape: shape of the reconstructed slice. By
-                            default, it is a square slice where the dimension
-                            is the "x dimension" of the sinogram (number of
-                            bins).
-        :param angles: list of projection angles in radian.
-        :param axis_position: axis position
-        :param dict extra_options: Advanced extra options
-        """
-        self.shape = sino_shape
-        self.num_bins = np.int32(sino_shape[1])
-        self.num_projs = np.int32(sino_shape[0])
-        self.angles = angles
-        if slice_shape is None:
-            self.slice_shape = (self.num_bins, self.num_bins)
-        else:
-            self.slice_shape = slice_shape
-        self.dimrec_shape = (
-            _idivup(self.slice_shape[0], 32) * 32,
-            _idivup(self.slice_shape[1], 32) * 32
-        )
-        if axis_position:
-            self.axis_pos = np.float32(axis_position)
-        else:
-            self.axis_pos = np.float32((sino_shape[1] - 1.) / 2)
-        self.axis_array = None  # TODO: add axis correction front-end
-        self._init_extra_options(extra_options)
-
-    def _init_extra_options(self, extra_options):
-        """Backprojection extra option initialization
-
-        :param dict extra_options: Advanced extra options
-        """
-        self.extra_options = {
-            "cutoff": 1.,
-            "use_numpy_fft": False,
-            # It is  axis_pos - (num_bins-1)/2  in PyHST
-            "gpu_offset_x": 0., #self.axis_pos - (self.num_bins - 1) / 2.,
-            "gpu_offset_y": 0., #self.axis_pos - (self.num_bins - 1) / 2.
-        }
-        if extra_options is not None:
-            self.extra_options.update(extra_options)
-
-    def _allocate_memory(self):
-        # Host memory
-        self.slice = np.zeros(self.dimrec_shape, dtype=np.float32)
-        self._use_textures = self.check_textures_availability()
-
-        # Device memory
-        self.buffers = [
-            BufferDescription("_d_slice", self.dimrec_shape, np.float32, mf.READ_WRITE),
-            BufferDescription("d_sino", self.shape, np.float32, mf.READ_WRITE),  # before transferring to texture (if available)
-            BufferDescription("d_cos", (self.num_projs,), np.float32, mf.READ_ONLY),
-            BufferDescription("d_sin", (self.num_projs,), np.float32, mf.READ_ONLY),
-            BufferDescription("d_axes", (self.num_projs,), np.float32, mf.READ_ONLY),
-        ]
-        self.allocate_buffers(use_array=True)
-        self.d_sino = self.cl_mem["d_sino"]  # shorthand
-
-        # Texture memory (if relevant)
-        if self._use_textures:
-            self._allocate_textures()
-
-        # Local memory
-        self.local_mem = 256 * 3 * _sizeof(np.float32)  # constant for all image sizes
-
-    def _compute_angles(self):
-        if self.angles is None:
-            self.angles = np.linspace(0, np.pi, self.num_projs, False)
-        h_cos = np.cos(self.angles).astype(np.float32)
-        h_sin = np.sin(self.angles).astype(np.float32)
-        self.cl_mem["d_cos"][:] = h_cos[:]
-        self.cl_mem["d_sin"][:] = h_sin[:]
-        if self.axis_array:
-            self.cl_mem["d_axes"][:] = self.axis_array.astype(np.float32)[:]
-        else:
-            self.cl_mem["d_axes"][:] = np.ones(self.num_projs, dtype="f") * self.axis_pos
-
-    def _init_kernels(self):
-        compile_options = None
-        if not(self._use_textures):
-            compile_options = "-DDONT_USE_TEXTURES"
-        OpenclProcessing.compile_kernels(
-            self,
-            self.kernel_files,
-            compile_options=compile_options
-        )
-        # check that workgroup can actually be (16, 16)
-        self.compiletime_workgroup_size = self.kernels.max_workgroup_size("backproj_cpu_kernel")
-        # Workgroup and ndrange sizes are always the same
-        self.wg = (16, 16)
-        self.ndrange = (
-            _idivup(int(self.dimrec_shape[1]), 32) * self.wg[0],
-            _idivup(int(self.dimrec_shape[0]), 32) * self.wg[1]
-        )
-        # Prepare arguments for the kernel call
-        if not(self._use_textures):
-            d_sino_ref = self.d_sino.data
-        else:
-            d_sino_ref = self.d_sino_tex
-        self._backproj_kernel_args = (
-            # num of projections (int32)
-            self.num_projs,
-            # num of bins (int32)
-            self.num_bins,
-            # axis position (float32)
-            self.axis_pos,
-            # d_slice (__global float32*)
-            self.cl_mem["_d_slice"].data,
-            # d_sino (__read_only image2d_t or float*)
-            d_sino_ref,
-            # gpu_offset_x (float32) 
-            np.float32(self.extra_options["gpu_offset_x"]),
-            # gpu_offset_y (float32)
-            np.float32(self.extra_options["gpu_offset_y"]),
-            # d_cos (__global float32*)
-            self.cl_mem["d_cos"].data,
-            # d_sin (__global float32*)
-            self.cl_mem["d_sin"].data,
-            # d_axis  (__global float32*)
-            self.cl_mem["d_axes"].data,
-            # shared mem (__local float32*)
-            self._get_local_mem()
-        )
-
-    def _allocate_textures(self):
-        """
-        Allocate the texture for the sinogram.
-        """
-        self.d_sino_tex = self.allocate_texture(self.shape)
-
-    def _init_filter(self, filter_name):
-        """Filter initialization
-
-        :param str filter_name: filter name
-        """
-        self.filter_name = filter_name or "ram-lak"
-        self.sino_filter = SinoFilter(
-            self.shape,
-            ctx=self.ctx,
-            filter_name=self.filter_name,
-            extra_options=self.extra_options,
-        )
-
-    def _get_local_mem(self):
-        return pyopencl.LocalMemory(self.local_mem)  # constant for all image sizes
-
-    def _cpy2d_to_slice(self, dst):
-        ndrange = (int(self.slice_shape[1]), int(self.slice_shape[0]))
-        slice_shape_ocl = np.int32(ndrange)
-        wg = None
-        kernel_args = (
-            dst.data,
-            self.cl_mem["_d_slice"].data,
-            np.int32(self.slice_shape[1]),
-            np.int32(self.dimrec_shape[1]),
-            np.int32((0, 0)),
-            np.int32((0, 0)),
-            slice_shape_ocl
-        )
-        return self.kernels.cpy2d(self.queue, ndrange, wg, *kernel_args)
-
-    def _transfer_to_texture(self, sino):
-        if isinstance(sino, parray.Array):
-            return self._transfer_device_to_texture(sino)
-        sino2 = sino
-        if not(sino.flags["C_CONTIGUOUS"] and sino.dtype == np.float32):
-            sino2 = np.ascontiguousarray(sino, dtype=np.float32)
-        if not(self._use_textures):
-            ev = pyopencl.enqueue_copy(
-                                        self.queue,
-                                        self.d_sino.data,
-                                        sino2
-                                        )
-            what = "transfer filtered sino H->D buffer"
-            ev.wait()
-        else:
-            ev = pyopencl.enqueue_copy(
-                                       self.queue,
-                                       self.d_sino_tex,
-                                       sino2,
-                                       origin=(0, 0),
-                                       region=self.shape[::-1]
-                                       )
-            what = "transfer filtered sino H->D texture"
-        return EventDescription(what, ev)
-
-    def _transfer_device_to_texture(self, d_sino):
-        if not(self._use_textures):
-            if id(self.d_sino) == id(d_sino):
-                return
-            ev = pyopencl.enqueue_copy(
-                                       self.queue,
-                                       self.d_sino.data,
-                                       d_sino
-                                       )
-            what = "transfer filtered sino D->D buffer"
-            ev.wait()
-        else:
-            ev = pyopencl.enqueue_copy(
-                                       self.queue,
-                                       self.d_sino_tex,
-                                       d_sino.data,
-                                       offset=0,
-                                       origin=(0, 0),
-                                       region=self.shape[::-1]
-                                       )
-            what = "transfer filtered sino D->D texture"
-        return EventDescription(what, ev)
-
-    def backprojection(self, sino, output=None):
-        """Perform the backprojection on an input sinogram
-
-        :param sino: sinogram.
-        :param output: optional, output slice.
-            If provided, the result will be written in this array.
-        :return: backprojection of sinogram
-        """
-        events = []
-        with self.sem:
-            events.append(self._transfer_to_texture(sino))
-            # Call the backprojection kernel
-            if not(self._use_textures):
-                kernel_to_call = self.kernels.backproj_cpu_kernel
-            else:
-                kernel_to_call = self.kernels.backproj_kernel
-            kernel_to_call(
-                self.queue,
-                self.ndrange,
-                self.wg,
-                *self._backproj_kernel_args
-            )
-            # Return
-            if output is None:
-                res = self.cl_mem["_d_slice"].get()
-                res = res[:self.slice_shape[0], :self.slice_shape[1]]
-            else:
-                res = output
-                self._cpy2d_to_slice(output)
-
-        # /with self.sem
-        if self.profile:
-            self.events += events
-
-        return res
-
-    def filtered_backprojection(self, sino, output=None):
-        """
-        Compute the filtered backprojection (FBP) on a sinogram.
-
-        :param sino: sinogram (`np.ndarray` or `pyopencl.array.Array`)
-            with the shape (n_projections, n_bins)
-        :param output: output (`np.ndarray` or `pyopencl.array.Array`).
-            If nothing is provided, a new numpy array is returned.
-        """
-        # Filter
-        self.sino_filter(sino, output=self.d_sino)
-        # Backproject
-        res = self.backprojection(self.d_sino, output=output)
-        return res
-
-    __call__ = filtered_backprojection
-
-
-    # -------------------
-    # - Compatibility  -
-    # -------------------
-
-    @deprecated(replacement="Backprojection.sino_filter", since_version="0.10")
-    def filter_projections(self, sino, rescale=True):
-        self.sino_filter(sino, output=self.d_sino)
-
-
-
-def fourier_filter(sino, filter_=None, fft_size=None):
-    return fourier_filter_(sino, filter_=filter_, fft_size=fft_size)
-
diff --git a/silx/opencl/codec/__init__.py b/silx/opencl/codec/__init__.py
deleted file mode 100644
index e69de29..0000000
--- a/silx/opencl/codec/__init__.py
+++ /dev/null
diff --git a/silx/opencl/codec/byte_offset.py b/silx/opencl/codec/byte_offset.py
deleted file mode 100644
index 9a52427..0000000
--- a/silx/opencl/codec/byte_offset.py
+++ /dev/null
@@ -1,439 +0,0 @@
-#!/usr/bin/env python
-# -*- coding: utf-8 -*-
-#
-#    Project: Sift implementation in Python + OpenCL
-#             https://github.com/silx-kit/silx
-#
-#    Copyright (C) 2013-2020  European Synchrotron Radiation Facility, Grenoble, France
-#
-# Permission is hereby granted, free of charge, to any person
-# obtaining a copy of this software and associated documentation
-# files (the "Software"), to deal in the Software without
-# restriction, including without limitation the rights to use,
-# copy, modify, merge, publish, distribute, sublicense, and/or sell
-# copies of the Software, and to permit persons to whom the
-# Software is furnished to do so, subject to the following
-# conditions:
-#
-# The above copyright notice and this permission notice shall be
-# included in all copies or substantial portions of the Software.
-#
-# THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
-# EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES
-# OF MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
-# NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT
-# HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY,
-# WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
-# FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR
-# OTHER DEALINGS IN THE SOFTWARE.
-
-"""
-This module provides a class for CBF byte offset compression/decompression.
-"""
-
-from __future__ import division, print_function, with_statement
-
-__authors__ = ["Jérôme Kieffer"]
-__contact__ = "jerome.kieffer@esrf.eu"
-__license__ = "MIT"
-__copyright__ = "European Synchrotron Radiation Facility, Grenoble, France"
-__date__ = "11/10/2018"
-__status__ = "production"
-
-
-import functools
-import os
-import numpy
-from ..common import ocl, pyopencl
-from ..processing import BufferDescription, EventDescription, OpenclProcessing
-
-import logging
-logger = logging.getLogger(__name__)
-
-if pyopencl:
-    import pyopencl.version
-    if pyopencl.version.VERSION < (2016, 0):
-        from pyopencl.scan import GenericScanKernel, GenericDebugScanKernel
-    else:
-        from pyopencl.algorithm import GenericScanKernel
-        from pyopencl.scan import GenericDebugScanKernel
-else:
-    logger.warning("No PyOpenCL, no byte-offset, please see fabio")
-
-
-class ByteOffset(OpenclProcessing):
-    """Perform the byte offset compression/decompression on the GPU
-
-        See :class:`OpenclProcessing` for optional arguments description.
-
-        :param int raw_size:
-            Size of the raw stream for decompression.
-            It can be (slightly) larger than the array.
-        :param int dec_size:
-            Size of the decompression output array
-            (mandatory for decompression)
-        """
-
-    def __init__(self, raw_size=None, dec_size=None,
-                 ctx=None, devicetype="all",
-                 platformid=None, deviceid=None,
-                 block_size=None, profile=False):
-        OpenclProcessing.__init__(self, ctx=ctx, devicetype=devicetype,
-                                  platformid=platformid, deviceid=deviceid,
-                                  block_size=block_size, profile=profile)
-        if self.block_size is None:
-            self.block_size = self.device.max_work_group_size
-        wg = self.block_size
-
-        buffers = [BufferDescription("counter", 1, numpy.int32, None)]
-
-        if raw_size is None:
-            self.raw_size = -1
-            self.padded_raw_size = -1
-        else:
-            self.raw_size = int(raw_size)
-            self.padded_raw_size = int((self.raw_size + wg - 1) & ~(wg - 1))
-            buffers += [
-                BufferDescription("raw", self.padded_raw_size, numpy.int8, None),
-                BufferDescription("mask", self.padded_raw_size, numpy.int32, None),
-                BufferDescription("values", self.padded_raw_size, numpy.int32, None),
-                BufferDescription("exceptions", self.padded_raw_size, numpy.int32, None)
-            ]
-
-        if dec_size is None:
-            self.dec_size = None
-        else:
-            self.dec_size = numpy.int32(dec_size)
-            buffers += [
-                BufferDescription("data_float", self.dec_size, numpy.float32, None),
-                BufferDescription("data_int", self.dec_size, numpy.int32, None)
-            ]
-
-        self.allocate_buffers(buffers, use_array=True)
-
-        self.compile_kernels([os.path.join("codec", "byte_offset")])
-        self.kernels.__setattr__("scan", self._init_double_scan())
-        self.kernels.__setattr__("compression_scan",
-                                 self._init_compression_scan())
-
-    def _init_double_scan(self):
-        """"generates a double scan on indexes and values in one operation"""
-        arguments = "__global int *value", "__global int *index"
-        int2 = pyopencl.tools.get_or_register_dtype("int2")
-        input_expr = "index[i]>0 ? (int2)(0, 0) : (int2)(value[i], 1)"
-        scan_expr = "a+b"
-        neutral = "(int2)(0,0)"
-        output_statement = "value[i] = item.s0; index[i+1] = item.s1;"
-
-        if self.block_size > 256:
-            knl = GenericScanKernel(self.ctx,
-                                    dtype=int2,
-                                    arguments=arguments,
-                                    input_expr=input_expr,
-                                    scan_expr=scan_expr,
-                                    neutral=neutral,
-                                    output_statement=output_statement)
-        else:  # MacOS on CPU
-            knl = GenericDebugScanKernel(self.ctx,
-                                         dtype=int2,
-                                         arguments=arguments,
-                                         input_expr=input_expr,
-                                         scan_expr=scan_expr,
-                                         neutral=neutral,
-                                         output_statement=output_statement)
-        return knl
-
-    def decode(self, raw, as_float=False, out=None):
-        """This function actually performs the decompression by calling the kernels
-
-        :param numpy.ndarray raw: The compressed data as a 1D numpy array of char.
-        :param bool as_float: True to decompress as float32,
-                              False (default) to decompress as int32
-        :param pyopencl.array out: pyopencl array in which to place the result.
-        :return: The decompressed image as an pyopencl array.
-        :rtype: pyopencl.array
-        """
-        assert self.dec_size is not None, \
-            "dec_size is a mandatory ByteOffset init argument for decompression"
-
-        events = []
-        with self.sem:
-            len_raw = numpy.int32(len(raw))
-            if len_raw > self.padded_raw_size:
-                wg = self.block_size
-                self.raw_size = int(len(raw))
-                self.padded_raw_size = (self.raw_size + wg - 1) & ~(wg - 1)
-                logger.info("increase raw buffer size to %s", self.padded_raw_size)
-                buffers = {
-                           "raw": pyopencl.array.empty(self.queue, self.padded_raw_size, dtype=numpy.int8),
-                           "mask": pyopencl.array.empty(self.queue, self.padded_raw_size, dtype=numpy.int32),
-                           "exceptions": pyopencl.array.empty(self.queue, self.padded_raw_size, dtype=numpy.int32),
-                           "values": pyopencl.array.empty(self.queue, self.padded_raw_size, dtype=numpy.int32),
-                          }
-                self.cl_mem.update(buffers)
-            else:
-                wg = self.block_size
-
-            evt = pyopencl.enqueue_copy(self.queue, self.cl_mem["raw"].data,
-                                        raw,
-                                        is_blocking=False)
-            events.append(EventDescription("copy raw H -> D", evt))
-            evt = self.kernels.fill_int_mem(self.queue, (self.padded_raw_size,), (wg,),
-                                            self.cl_mem["mask"].data,
-                                            numpy.int32(self.padded_raw_size),
-                                            numpy.int32(0),
-                                            numpy.int32(0))
-            events.append(EventDescription("memset mask", evt))
-            evt = self.kernels.fill_int_mem(self.queue, (1,), (1,),
-                                            self.cl_mem["counter"].data,
-                                            numpy.int32(1),
-                                            numpy.int32(0),
-                                            numpy.int32(0))
-            events.append(EventDescription("memset counter", evt))
-            evt = self.kernels.mark_exceptions(self.queue, (self.padded_raw_size,), (wg,),
-                                               self.cl_mem["raw"].data,
-                                               len_raw,
-                                               numpy.int32(self.raw_size),
-                                               self.cl_mem["mask"].data,
-                                               self.cl_mem["values"].data,
-                                               self.cl_mem["counter"].data,
-                                               self.cl_mem["exceptions"].data)
-            events.append(EventDescription("mark exceptions", evt))
-            nb_exceptions = numpy.empty(1, dtype=numpy.int32)
-            evt = pyopencl.enqueue_copy(self.queue, nb_exceptions, self.cl_mem["counter"].data,
-                                        is_blocking=False)
-            events.append(EventDescription("copy counter D -> H", evt))
-            evt.wait()
-            nbexc = int(nb_exceptions[0])
-            if nbexc == 0:
-                logger.info("nbexc %i", nbexc)
-            else:
-                evt = self.kernels.treat_exceptions(self.queue, (nbexc,), (1,),
-                                                    self.cl_mem["raw"].data,
-                                                    len_raw,
-                                                    self.cl_mem["mask"].data,
-                                                    self.cl_mem["exceptions"].data,
-                                                    self.cl_mem["values"].data
-                                                    )
-                events.append(EventDescription("treat_exceptions", evt))
-
-            #self.cl_mem["copy_values"] = self.cl_mem["values"].copy()
-            #self.cl_mem["copy_mask"] = self.cl_mem["mask"].copy()
-            evt = self.kernels.scan(self.cl_mem["values"],
-                                    self.cl_mem["mask"],
-                                    queue=self.queue,
-                                    size=int(len_raw),
-                                    wait_for=(evt,))
-            events.append(EventDescription("double scan", evt))
-            #evt.wait()
-            if out is not None:
-                if out.dtype == numpy.float32:
-                    copy_results = self.kernels.copy_result_float
-                else:
-                    copy_results = self.kernels.copy_result_int
-            else:
-                if as_float:
-                    out = self.cl_mem["data_float"]
-                    copy_results = self.kernels.copy_result_float
-                else:
-                    out = self.cl_mem["data_int"]
-                    copy_results = self.kernels.copy_result_int
-            evt = copy_results(self.queue, (self.padded_raw_size,), (wg,),
-                               self.cl_mem["values"].data,
-                               self.cl_mem["mask"].data,
-                               len_raw,
-                               self.dec_size,
-                               out.data
-                               )
-            events.append(EventDescription("copy_results", evt))
-            #evt.wait()
-            if self.profile:
-                self.events += events
-        return out
-
-    __call__ = decode
-
-    def _init_compression_scan(self):
-        """Initialize CBF compression scan kernels"""
-        preamble = """
-        int compressed_size(int diff) {
-            int abs_diff = abs(diff);
-
-            if (abs_diff < 128) {
-                return 1;
-            }
-            else if (abs_diff < 32768) {
-                return 3;
-            }
-            else {
-                return 7;
-            }
-        }
-
-        void write(const int index,
-                   const int diff,
-                   global char *output) {
-            int abs_diff = abs(diff);
-
-            if (abs_diff < 128) {
-                output[index] = (char) diff;
-            }
-            else if (abs_diff < 32768) {
-                output[index] = -128;
-                output[index + 1] = (char) (diff >> 0);
-                output[index + 2] = (char) (diff >> 8);
-            }
-            else {
-                output[index] = -128;
-                output[index + 1] = 0;
-                output[index + 2] = -128;
-                output[index + 3] = (char) (diff >> 0);
-                output[index + 4] = (char) (diff >> 8);
-                output[index + 5] = (char) (diff >> 16);
-                output[index + 6] = (char) (diff >> 24);
-            }
-        }
-        """
-        arguments = "__global const int *data, __global char *compressed, __global int *size"
-        input_expr = "compressed_size((i == 0) ? data[0] : (data[i] - data[i - 1]))"
-        scan_expr = "a+b"
-        neutral = "0"
-        output_statement = """
-        if (prev_item == 0) { // 1st thread store compressed data size
-            size[0] = last_item;
-        }
-        write(prev_item, (i == 0) ? data[0] : (data[i] - data[i - 1]), compressed);
-        """
-
-        if self.block_size >= 64:
-            knl = GenericScanKernel(self.ctx,
-                                    dtype=numpy.int32,
-                                    preamble=preamble,
-                                    arguments=arguments,
-                                    input_expr=input_expr,
-                                    scan_expr=scan_expr,
-                                    neutral=neutral,
-                                    output_statement=output_statement)
-        else:  # MacOS on CPU
-            knl = GenericDebugScanKernel(self.ctx,
-                                         dtype=numpy.int32,
-                                         preamble=preamble,
-                                         arguments=arguments,
-                                         input_expr=input_expr,
-                                         scan_expr=scan_expr,
-                                         neutral=neutral,
-                                         output_statement=output_statement)
-        return knl
-
-    def encode(self, data, out=None):
-        """Compress data to CBF.
-
-        :param data: The data to compress as a numpy array
-                     (or a pyopencl Array) of int32.
-        :type data: Union[numpy.ndarray, pyopencl.array.Array]
-        :param pyopencl.array out:
-            pyopencl array of int8 in which to store the result.
-            The array should be large enough to store the compressed data.
-        :return: The compressed data as a pyopencl array.
-                 If out is provided, this array shares the backing buffer,
-                 but has the exact size of the compressed data and the queue
-                 of the ByteOffset instance.
-        :rtype: pyopencl.array
-        :raises ValueError: if out array is not large enough
-        """
-
-        events = []
-        with self.sem:
-            if isinstance(data, pyopencl.array.Array):
-                d_data = data  # Uses provided array
-
-            else:  # Copy data to device
-                data = numpy.ascontiguousarray(data, dtype=numpy.int32).ravel()
-
-                # Make sure data array exists and is large enough
-                if ("data_input" not in self.cl_mem or
-                        self.cl_mem["data_input"].size < data.size):
-                    logger.info("increase data input buffer size to %s", data.size)
-                    self.cl_mem.update({
-                        "data_input": pyopencl.array.empty(self.queue,
-                                                           data.size,
-                                                           dtype=numpy.int32)})
-                d_data = self.cl_mem["data_input"]
-
-                evt = pyopencl.enqueue_copy(
-                    self.queue, d_data.data, data, is_blocking=False)
-                events.append(EventDescription("copy data H -> D", evt))
-
-            # Make sure compressed array exists and is large enough
-            compressed_size = d_data.size * 7
-            if ("compressed" not in self.cl_mem or
-                    self.cl_mem["compressed"].size < compressed_size):
-                logger.info("increase compressed buffer size to %s", compressed_size)
-                self.cl_mem.update({
-                    "compressed": pyopencl.array.empty(self.queue,
-                                                       compressed_size,
-                                                       dtype=numpy.int8)})
-            d_compressed = self.cl_mem["compressed"]
-            d_size = self.cl_mem["counter"]  # Shared with decompression
-
-            evt = self.kernels.compression_scan(d_data, d_compressed, d_size)
-            events.append(EventDescription("compression scan", evt))
-            byte_count = int(d_size.get()[0])
-
-            if out is None:
-                # Create out array from a sub-region of the compressed buffer
-                out = pyopencl.array.Array(
-                    self.queue,
-                    shape=(byte_count,),
-                    dtype=numpy.int8,
-                    allocator=functools.partial(
-                        d_compressed.base_data.get_sub_region,
-                        d_compressed.offset))
-
-            elif out.size < byte_count:
-                raise ValueError(
-                    "Provided output buffer is not large enough: "
-                    "requires %d bytes, got %d" % (byte_count, out.size))
-
-            else:  # out.size >= byte_count
-                # Create an array with a sub-region of out and this class queue
-                out = pyopencl.array.Array(
-                    self.queue,
-                    shape=(byte_count,),
-                    dtype=numpy.int8,
-                    allocator=functools.partial(out.base_data.get_sub_region,
-                                                out.offset))
-
-                evt = pyopencl.enqueue_copy(self.queue, out.data, d_compressed.data,
-                                            byte_count=byte_count)
-                events.append(
-                    EventDescription("copy D -> D: internal -> out", evt))
-
-            if self.profile:
-                self.events += events
-
-        return out
-
-    def encode_to_bytes(self, data):
-        """Compresses data to CBF and returns compressed data as bytes.
-
-        Usage:
-
-        Provided an image (`image`) stored as a numpy array of int32,
-        first, create a byte offset compression/decompression object:
-
-        >>> from silx.opencl.codec.byte_offset import ByteOffset
-        >>> byte_offset_codec = ByteOffset()
-
-        Then, compress an image into bytes:
-
-        >>> compressed = byte_offset_codec.encode_to_bytes(image)
-
-        :param data: The data to compress as a numpy array
-                     (or a pyopencl Array) of int32.
-        :type data: Union[numpy.ndarray, pyopencl.array.Array]
-        :return: The compressed data as bytes.
-        :rtype: bytes
-        """
-        compressed_array = self.encode(data)
-        return compressed_array.get().tobytes()
diff --git a/silx/opencl/codec/setup.py b/silx/opencl/codec/setup.py
deleted file mode 100644
index 4a5c1e5..0000000
--- a/silx/opencl/codec/setup.py
+++ /dev/null
@@ -1,43 +0,0 @@
-# coding: utf-8
-#
-# Copyright (C) 2016-2017 European Synchrotron Radiation Facility
-#
-# Permission is hereby granted, free of charge, to any person obtaining a copy
-# of this software and associated documentation files (the "Software"), to deal
-# in the Software without restriction, including without limitation the rights
-# to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
-# copies of the Software, and to permit persons to whom the Software is
-# furnished to do so, subject to the following conditions:
-#
-# The above copyright notice and this permission notice shall be included in
-# all copies or substantial portions of the Software.
-#
-# THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
-# IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
-# FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
-# AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
-# LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
-# OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN
-# THE SOFTWARE.
-#
-
-from __future__ import division
-
-__contact__ = "jerome.kieffer@esrf.eu"
-__license__ = "MIT"
-__copyright__ = "European Synchrotron Radiation Facility, Grenoble, France"
-__authors__ = ["J. Kieffer"]
-__date__ = "13/10/2017"
-
-from numpy.distutils.misc_util import Configuration
-
-
-def configuration(parent_package='', top_path=None):
-    config = Configuration('codec', parent_package, top_path)
-    config.add_subpackage('test')
-    return config
-
-
-if __name__ == "__main__":
-    from numpy.distutils.core import setup
-    setup(configuration=configuration)
diff --git a/silx/opencl/codec/test/__init__.py b/silx/opencl/codec/test/__init__.py
deleted file mode 100644
index ec76dd3..0000000
--- a/silx/opencl/codec/test/__init__.py
+++ /dev/null
@@ -1,37 +0,0 @@
-# -*- coding: utf-8 -*-
-#
-#    Project: silx
-#             https://github.com/silx-kit/silx
-#
-#    Copyright (C) 2013-2017  European Synchrotron Radiation Facility, Grenoble, France
-# Permission is hereby granted, free of charge, to any person obtaining a copy
-# of this software and associated documentation files (the "Software"), to deal
-# in the Software without restriction, including without limitation the rights
-# to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
-# copies of the Software, and to permit persons to whom the Software is
-# furnished to do so, subject to the following conditions:
-#
-# The above copyright notice and this permission notice shall be included in
-# all copies or substantial portions of the Software.
-#
-# THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
-# IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
-# FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
-# AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
-# LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
-# OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN
-# THE SOFTWARE.
-
-__authors__ = ["J. Kieffer"]
-__license__ = "MIT"
-__date__ = "13/10/2017"
-
-import unittest
-from . import test_byte_offset
-
-
-def suite():
-    testSuite = unittest.TestSuite()
-    testSuite.addTest(test_byte_offset.suite())
-
-    return testSuite
diff --git a/silx/opencl/codec/test/test_byte_offset.py b/silx/opencl/codec/test/test_byte_offset.py
deleted file mode 100644
index d1482ce..0000000
--- a/silx/opencl/codec/test/test_byte_offset.py
+++ /dev/null
@@ -1,315 +0,0 @@
-#!/usr/bin/env python
-# -*- coding: utf-8 -*-
-#
-#    Project: Byte-offset decompression in OpenCL
-#             https://github.com/silx-kit/silx
-#
-#    Copyright (C) 2013-2020  European Synchrotron Radiation Facility,
-#                             Grenoble, France
-# Permission is hereby granted, free of charge, to any person
-# obtaining a copy of this software and associated documentation
-# files (the "Software"), to deal in the Software without
-# restriction, including without limitation the rights to use,
-# copy, modify, merge, publish, distribute, sublicense, and/or sell
-# copies of the Software, and to permit persons to whom the
-# Software is furnished to do so, subject to the following
-# conditions:
-#
-# The above copyright notice and this permission notice shall be
-# included in all copies or substantial portions of the Software.
-#
-# THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
-# EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES
-# OF MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
-# NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT
-# HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY,
-# WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
-# FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR
-# OTHER DEALINGS IN THE SOFTWARE.
-
-"""
-Test suite for byte-offset decompression 
-"""
-
-from __future__ import division, print_function
-
-__authors__ = ["Jérôme Kieffer"]
-__contact__ = "jerome.kieffer@esrf.eu"
-__license__ = "MIT"
-__copyright__ = "2013 European Synchrotron Radiation Facility, Grenoble, France"
-__date__ = "02/03/2021"
-
-import sys
-import time
-import logging
-import numpy
-from silx.opencl.common import ocl, pyopencl
-from silx.opencl.codec import byte_offset
-import fabio
-import unittest
-logger = logging.getLogger(__name__)
-
-
-@unittest.skipUnless(ocl and pyopencl,
-                     "PyOpenCl is missing")
-class TestByteOffset(unittest.TestCase):
-
-    @staticmethod
-    def _create_test_data(shape, nexcept, lam=200):
-        """Create test (image, compressed stream) pair.
-
-        :param shape: Shape of test image
-        :param int nexcept: Number of exceptions in the image
-        :param lam: Expectation of interval argument for numpy.random.poisson
-        :return: (reference image array, compressed stream)
-        """
-        size = numpy.prod(shape)
-        ref = numpy.random.poisson(lam, numpy.prod(shape))
-        exception_loc = numpy.random.randint(0, size, size=nexcept)
-        exception_value = numpy.random.randint(0, 1000000, size=nexcept)
-        ref[exception_loc] = exception_value
-        ref.shape = shape
-
-        raw = fabio.compression.compByteOffset(ref)
-        return ref, raw
-
-    def test_decompress(self):
-        """
-        tests the byte offset decompression on GPU
-        """
-        ref, raw = self._create_test_data(shape=(91, 97), nexcept=229)
-        # ref, raw = self._create_test_data(shape=(7, 9), nexcept=0)
-
-        size = numpy.prod(ref.shape)
-
-        try:
-            bo = byte_offset.ByteOffset(raw_size=len(raw), dec_size=size, profile=True)
-        except (RuntimeError, pyopencl.RuntimeError) as err:
-            logger.warning(err)
-            if sys.platform == "darwin":
-                raise unittest.SkipTest("Byte-offset decompression is known to be buggy on MacOS-CPU")
-            else:
-                raise err
-        print(bo.block_size)
-
-        t0 = time.time()
-        res_cy = fabio.compression.decByteOffset(raw)
-        t1 = time.time()
-        res_cl = bo.decode(raw)
-        t2 = time.time()
-        delta_cy = abs(ref.ravel() - res_cy).max()
-        delta_cl = abs(ref.ravel() - res_cl.get()).max()
-
-        logger.debug("Global execution time: fabio %.3fms, OpenCL: %.3fms.",
-                     1000.0 * (t1 - t0),
-                     1000.0 * (t2 - t1))
-        bo.log_profile()
-        # print(ref)
-        # print(res_cl.get())
-        self.assertEqual(delta_cy, 0, "Checks fabio works")
-        self.assertEqual(delta_cl, 0, "Checks opencl works")
-
-    def test_many_decompress(self, ntest=10):
-        """
-        tests the byte offset decompression on GPU, many images to ensure there 
-        is not leaking in memory 
-        """
-        shape = (991, 997)
-        size = numpy.prod(shape)
-        ref, raw = self._create_test_data(shape=shape, nexcept=0, lam=100)
-
-        try:
-            bo = byte_offset.ByteOffset(len(raw), size, profile=True)
-        except (RuntimeError, pyopencl.RuntimeError) as err:
-            logger.warning(err)
-            if sys.platform == "darwin":
-                raise unittest.SkipTest("Byte-offset decompression is known to be buggy on MacOS-CPU")
-            else:
-                raise err
-        t0 = time.time()
-        res_cy = fabio.compression.decByteOffset(raw)
-        t1 = time.time()
-        res_cl = bo(raw)
-        t2 = time.time()
-        delta_cy = abs(ref.ravel() - res_cy).max()
-        delta_cl = abs(ref.ravel() - res_cl.get()).max()
-        self.assertEqual(delta_cy, 0, "Checks fabio works")
-        self.assertEqual(delta_cl, 0, "Checks opencl works")
-        logger.debug("Global execution time: fabio %.3fms, OpenCL: %.3fms.",
-                     1000.0 * (t1 - t0),
-                     1000.0 * (t2 - t1))
-
-        for i in range(ntest):
-            ref, raw = self._create_test_data(shape=shape, nexcept=2729, lam=200)
-
-            t0 = time.time()
-            res_cy = fabio.compression.decByteOffset(raw)
-            t1 = time.time()
-            res_cl = bo(raw)
-            t2 = time.time()
-            delta_cy = abs(ref.ravel() - res_cy).max()
-            delta_cl = abs(ref.ravel() - res_cl.get()).max()
-            self.assertEqual(delta_cy, 0, "Checks fabio works #%i" % i)
-            self.assertEqual(delta_cl, 0, "Checks opencl works #%i" % i)
-
-            logger.debug("Global execution time: fabio %.3fms, OpenCL: %.3fms.",
-                         1000.0 * (t1 - t0),
-                         1000.0 * (t2 - t1))
-        bo.log_profile(stats=True)
-
-    def test_encode(self):
-        """Test byte offset compression"""
-        ref, raw = self._create_test_data(shape=(2713, 2719), nexcept=2729)
-
-        try:
-            bo = byte_offset.ByteOffset(len(raw), ref.size, profile=True)
-        except (RuntimeError, pyopencl.RuntimeError) as err:
-            logger.warning(err)
-            raise err
-
-        t0 = time.time()
-        compressed_array = bo.encode(ref)
-        t1 = time.time()
-
-        compressed_stream = compressed_array.get().tobytes()
-        self.assertEqual(raw, compressed_stream)
-
-        logger.debug("Global execution time: OpenCL: %.3fms.",
-                     1000.0 * (t1 - t0))
-        bo.log_profile()
-
-    def test_encode_to_array(self):
-        """Test byte offset compression while providing an out array"""
-
-        ref, raw = self._create_test_data(shape=(2713, 2719), nexcept=2729)
-
-        try:
-            bo = byte_offset.ByteOffset(profile=True)
-        except (RuntimeError, pyopencl.RuntimeError) as err:
-            logger.warning(err)
-            raise err
-        # Test with out buffer too small
-        out = pyopencl.array.empty(bo.queue, (10,), numpy.int8)
-        with self.assertRaises(ValueError):
-            bo.encode(ref, out)
-
-        # Test with out buffer too big
-        out = pyopencl.array.empty(bo.queue, (len(raw) + 10,), numpy.int8)
-
-        compressed_array = bo.encode(ref, out)
-
-        # Get size from returned array
-        compressed_size = compressed_array.size
-        self.assertEqual(compressed_size, len(raw))
-
-        # Get data from out array, read it from bo object queue
-        out_bo_queue = out.with_queue(bo.queue)
-        compressed_stream = out_bo_queue.get().tobytes()[:compressed_size]
-        self.assertEqual(raw, compressed_stream)
-
-    def test_encode_to_bytes(self):
-        """Test byte offset compression to bytes"""
-        ref, raw = self._create_test_data(shape=(2713, 2719), nexcept=2729)
-
-        try:
-            bo = byte_offset.ByteOffset(profile=True)
-        except (RuntimeError, pyopencl.RuntimeError) as err:
-            logger.warning(err)
-            raise err
-
-        t0 = time.time()
-        res_fabio = fabio.compression.compByteOffset(ref)
-        t1 = time.time()
-        compressed_stream = bo.encode_to_bytes(ref)
-        t2 = time.time()
-
-        self.assertEqual(raw, compressed_stream)
-
-        logger.debug("Global execution time: fabio %.3fms, OpenCL: %.3fms.",
-                     1000.0 * (t1 - t0),
-                     1000.0 * (t2 - t1))
-        bo.log_profile()
-
-    def test_encode_to_bytes_from_array(self):
-        """Test byte offset compression to bytes from a pyopencl array.
-        """
-        ref, raw = self._create_test_data(shape=(2713, 2719), nexcept=2729)
-
-        try:
-            bo = byte_offset.ByteOffset(profile=True)
-        except (RuntimeError, pyopencl.RuntimeError) as err:
-            logger.warning(err)
-            raise err
-
-        d_ref = pyopencl.array.to_device(
-            bo.queue, ref.astype(numpy.int32).ravel())
-
-        t0 = time.time()
-        res_fabio = fabio.compression.compByteOffset(ref)
-        t1 = time.time()
-        compressed_stream = bo.encode_to_bytes(d_ref)
-        t2 = time.time()
-
-        self.assertEqual(raw, compressed_stream)
-
-        logger.debug("Global execution time: fabio %.3fms, OpenCL: %.3fms.",
-                     1000.0 * (t1 - t0),
-                     1000.0 * (t2 - t1))
-        bo.log_profile()
-
-    def test_many_encode(self, ntest=10):
-        """Test byte offset compression with many image"""
-        shape = (991, 997)
-        ref, raw = self._create_test_data(shape=shape, nexcept=0, lam=100)
-
-        try:
-            bo = byte_offset.ByteOffset(profile=False)
-        except (RuntimeError, pyopencl.RuntimeError) as err:
-            logger.warning(err)
-            raise err
-
-        bo_durations = []
-
-        t0 = time.time()
-        res_fabio = fabio.compression.compByteOffset(ref)
-        t1 = time.time()
-        compressed_stream = bo.encode_to_bytes(ref)
-        t2 = time.time()
-        bo_durations.append(1000.0 * (t2 - t1))
-
-        self.assertEqual(raw, compressed_stream)
-        logger.debug("Global execution time: fabio %.3fms, OpenCL: %.3fms.",
-                     1000.0 * (t1 - t0),
-                     1000.0 * (t2 - t1))
-
-        for i in range(ntest):
-            ref, raw = self._create_test_data(shape=shape, nexcept=2729, lam=200)
-
-            t0 = time.time()
-            res_fabio = fabio.compression.compByteOffset(ref)
-            t1 = time.time()
-            compressed_stream = bo.encode_to_bytes(ref)
-            t2 = time.time()
-            bo_durations.append(1000.0 * (t2 - t1))
-
-            self.assertEqual(raw, compressed_stream)
-            logger.debug("Global execution time: fabio %.3fms, OpenCL: %.3fms.",
-                         1000.0 * (t1 - t0),
-                         1000.0 * (t2 - t1))
-
-        logger.debug("OpenCL execution time: Mean: %fms, Min: %fms, Max: %fms",
-                     numpy.mean(bo_durations),
-                     numpy.min(bo_durations),
-                     numpy.max(bo_durations))
-
-
-def suite():
-    test_suite = unittest.TestSuite()
-    test_suite.addTest(TestByteOffset("test_decompress"))
-    test_suite.addTest(TestByteOffset("test_many_decompress"))
-    test_suite.addTest(TestByteOffset("test_encode"))
-    test_suite.addTest(TestByteOffset("test_encode_to_array"))
-    test_suite.addTest(TestByteOffset("test_encode_to_bytes"))
-    test_suite.addTest(TestByteOffset("test_encode_to_bytes_from_array"))
-    test_suite.addTest(TestByteOffset("test_many_encode"))
-    return test_suite
diff --git a/silx/opencl/common.py b/silx/opencl/common.py
deleted file mode 100644
index da966f6..0000000
--- a/silx/opencl/common.py
+++ /dev/null
@@ -1,691 +0,0 @@
-#!/usr/bin/env python
-# -*- coding: utf-8 -*-
-#
-#    Project: S I L X project
-#             https://github.com/silx-kit/silx
-#
-#    Copyright (C) 2012-2021 European Synchrotron Radiation Facility, Grenoble, France
-#
-#    Principal author:       Jérôme Kieffer (Jerome.Kieffer@ESRF.eu)
-#
-# Permission is hereby granted, free of charge, to any person
-# obtaining a copy of this software and associated documentation
-# files (the "Software"), to deal in the Software without
-# restriction, including without limitation the rights to use,
-# copy, modify, merge, publish, distribute, sublicense, and/or sell
-# copies of the Software, and to permit persons to whom the
-# Software is furnished to do so, subject to the following
-# conditions:
-#
-# The above copyright notice and this permission notice shall be
-# included in all copies or substantial portions of the Software.
-#
-# THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
-# EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES
-# OF MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
-# NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT
-# HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY,
-# WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
-# FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR
-# OTHER DEALINGS IN THE SOFTWARE.
-#
-
-__author__ = "Jerome Kieffer"
-__contact__ = "Jerome.Kieffer@ESRF.eu"
-__license__ = "MIT"
-__copyright__ = "2012-2017 European Synchrotron Radiation Facility, Grenoble, France"
-__date__ = "30/11/2020"
-__status__ = "stable"
-__all__ = ["ocl", "pyopencl", "mf", "release_cl_buffers", "allocate_cl_buffers",
-           "measure_workgroup_size", "kernel_workgroup_size"]
-
-import os
-import logging
-
-import numpy
-
-from .utils import get_opencl_code
-
-logger = logging.getLogger(__name__)
-
-if os.environ.get("SILX_OPENCL") in ["0", "False"]:
-    logger.info("Use of OpenCL has been disabled from environment variable: SILX_OPENCL=0")
-    pyopencl = None
-else:
-    try:
-        import pyopencl
-    except ImportError:
-        logger.warning("Unable to import pyOpenCl. Please install it from: https://pypi.org/project/pyopencl")
-        pyopencl = None
-    else:
-        try:
-            pyopencl.get_platforms()
-        except pyopencl.LogicError:
-            logger.warning("The module pyOpenCL has been imported but can't be used here")
-            pyopencl = None
-        else:
-            import pyopencl.array as array
-            mf = pyopencl.mem_flags
-
-if pyopencl is None:
-
-    # Define default mem flags
-    class mf(object):
-        WRITE_ONLY = 1
-        READ_ONLY = 1
-        READ_WRITE = 1
-else:
-    mf = pyopencl.mem_flags
-
-FLOP_PER_CORE = {"GPU": 64,  # GPU, Fermi at least perform 64 flops per cycle/multicore, G80 were at 24 or 48 ...
-                 "CPU": 4,  # CPU, at least intel's have 4 operation per cycle
-                 "ACC": 8}  # ACC: the Xeon-phi (MIC) appears to be able to process 8 Flops per hyperthreaded-core
-
-# Sources : https://en.wikipedia.org/wiki/CUDA
-NVIDIA_FLOP_PER_CORE = {(1, 0): 24,  # Guessed !
-                        (1, 1): 24,  # Measured on G98 [Quadro NVS 295]
-                        (1, 2): 24,  # Guessed !
-                        (1, 3): 24,  # measured on a GT285 (GT200)
-                        (2, 0): 64,  # Measured on a 580 (GF110)
-                        (2, 1): 96,  # Measured on Quadro2000 GF106GL
-                        (3, 0): 384,  # Guessed!
-                        (3, 5): 384,  # Measured on K20
-                        (3, 7): 384,  # K80: Guessed!
-                        (5, 0): 256,  # Maxwell 4 warps/SM 2 flops/ CU
-                        (5, 2): 256,  # Titan-X
-                        (5, 3): 256,  # TX1
-                        (6, 0): 128,  # GP100
-                        (6, 1): 128,  # GP104
-                        (6, 2): 128,  # ?
-                        (7, 0): 128,  # Volta # measured on Telsa V100
-                        (7, 1): 128,  # Volta ?
-                        }
-
-AMD_FLOP_PER_CORE = 160  # Measured on a M7820 10 core, 700MHz 1120GFlops
-
-
-class Device(object):
-    """
-    Simple class that contains the structure of an OpenCL device
-    """
-
-    def __init__(self, name="None", dtype=None, version=None, driver_version=None,
-                 extensions="", memory=None, available=None,
-                 cores=None, frequency=None, flop_core=None, idx=0, workgroup=1):
-        """
-        Simple container with some important data for the OpenCL device description.
-
-        :param name: name of the device
-        :param dtype: device type: CPU/GPU/ACC...
-        :param version: driver version
-        :param driver_version:
-        :param extensions: List of opencl extensions
-        :param memory: maximum memory available on the device
-        :param available: is the device deactivated or not
-        :param cores: number of SM/cores
-        :param frequency: frequency of the device
-        :param flop_core: Flopating Point operation per core per cycle
-        :param idx: index of the device within the platform
-        :param workgroup: max workgroup size
-        """
-        self.name = name.strip()
-        self.type = dtype
-        self.version = version
-        self.driver_version = driver_version
-        self.extensions = extensions.split()
-        self.memory = memory
-        self.available = available
-        self.cores = cores
-        self.frequency = frequency
-        self.id = idx
-        self.max_work_group_size = workgroup
-        if not flop_core:
-            flop_core = FLOP_PER_CORE.get(dtype, 1)
-        if cores and frequency:
-            self.flops = cores * frequency * flop_core
-        else:
-            self.flops = flop_core
-
-    def __repr__(self):
-        return "%s" % self.name
-
-    def pretty_print(self):
-        """
-        Complete device description
-
-        :return: string
-        """
-        lst = ["Name\t\t:\t%s" % self.name,
-               "Type\t\t:\t%s" % self.type,
-               "Memory\t\t:\t%.3f MB" % (self.memory / 2.0 ** 20),
-               "Cores\t\t:\t%s CU" % self.cores,
-               "Frequency\t:\t%s MHz" % self.frequency,
-               "Speed\t\t:\t%.3f GFLOPS" % (self.flops / 1000.),
-               "Version\t\t:\t%s" % self.version,
-               "Available\t:\t%s" % self.available]
-        return os.linesep.join(lst)
-
-    def set_unavailable(self):
-        """Use this method to flag a faulty device
-        """
-        self.available = False
-
-
-class Platform(object):
-    """
-    Simple class that contains the structure of an OpenCL platform
-    """
-
-    def __init__(self, name="None", vendor="None", version=None, extensions=None, idx=0):
-        """
-        Class containing all descriptions of a platform and all devices description within that platform.
-
-        :param name: platform name
-        :param vendor: name of the brand/vendor
-        :param version:
-        :param extensions: list of the extension provided by the platform to all of its devices
-        :param idx: index of the platform
-        """
-        self.name = name.strip()
-        self.vendor = vendor.strip()
-        self.version = version
-        self.extensions = extensions.split()
-        self.devices = []
-        self.id = idx
-
-    def __repr__(self):
-        return "%s" % self.name
-
-    def add_device(self, device):
-        """
-        Add new device to the platform
-
-        :param device: Device instance
-        """
-        self.devices.append(device)
-
-    def get_device(self, key):
-        """
-        Return a device according to key
-
-        :param key: identifier for a device, either it's id (int) or it's name
-        :type key: int or str
-        """
-        out = None
-        try:
-            devid = int(key)
-        except ValueError:
-            for a_dev in self.devices:
-                if a_dev.name == key:
-                    out = a_dev
-        else:
-            if len(self.devices) > devid > 0:
-                out = self.devices[devid]
-        return out
-
-
-def _measure_workgroup_size(device_or_context, fast=False):
-    """Mesure the maximal work group size of the given device
-
-    DEPRECATED since not perfectly correct !
-
-    :param device_or_context: instance of pyopencl.Device or pyopencl.Context
-                    or 2-tuple (platformid,deviceid)
-    :param fast: ask the kernel the valid value, don't probe it
-    :return: maximum size for the workgroup
-    """
-    if isinstance(device_or_context, pyopencl.Device):
-        try:
-            ctx = pyopencl.Context(devices=[device_or_context])
-        except pyopencl._cl.LogicError as error:
-            platform = device_or_context.platform
-            platformid = pyopencl.get_platforms().index(platform)
-            deviceid = platform.get_devices().index(device_or_context)
-            ocl.platforms[platformid].devices[deviceid].set_unavailable()
-            raise RuntimeError("Unable to create context on %s/%s: %s" % (platform, device_or_context, error))
-        else:
-            device = device_or_context
-    elif isinstance(device_or_context, pyopencl.Context):
-        ctx = device_or_context
-        device = device_or_context.devices[0]
-    elif isinstance(device_or_context, (tuple, list)) and len(device_or_context) == 2:
-        ctx = ocl.create_context(platformid=device_or_context[0],
-                                 deviceid=device_or_context[1])
-        device = ctx.devices[0]
-    else:
-        raise RuntimeError("""given parameter device_or_context is not an
-            instanciation of a device or a context""")
-    shape = device.max_work_group_size
-    # get the context
-
-    assert ctx is not None
-    queue = pyopencl.CommandQueue(ctx)
-
-    max_valid_wg = 1
-    data = numpy.random.random(shape).astype(numpy.float32)
-    d_data = pyopencl.array.to_device(queue, data)
-    d_data_1 = pyopencl.array.empty_like(d_data)
-    d_data_1.fill(numpy.float32(1.0))
-
-    program = pyopencl.Program(ctx, get_opencl_code("addition")).build()
-    if fast:
-        max_valid_wg = program.addition.get_work_group_info(pyopencl.kernel_work_group_info.WORK_GROUP_SIZE, device)
-    else:
-        maxi = int(round(numpy.log2(shape)))
-        for i in range(maxi + 1):
-            d_res = pyopencl.array.empty_like(d_data)
-            wg = 1 << i
-            try:
-                evt = program.addition(
-                    queue, (shape,), (wg,),
-                    d_data.data, d_data_1.data, d_res.data, numpy.int32(shape))
-                evt.wait()
-            except Exception as error:
-                logger.info("%s on device %s for WG=%s/%s", error, device.name, wg, shape)
-                program = queue = d_res = d_data_1 = d_data = None
-                break
-            else:
-                res = d_res.get()
-                good = numpy.allclose(res, data + 1)
-                if good:
-                    if wg > max_valid_wg:
-                        max_valid_wg = wg
-                else:
-                    logger.warning("ArithmeticError on %s for WG=%s/%s", wg, device.name, shape)
-
-    return max_valid_wg
-
-
-def _is_nvidia_gpu(vendor, devtype):
-    return (vendor == "NVIDIA Corporation") and (devtype == "GPU")
-
-
-class OpenCL(object):
-    """
-    Simple class that wraps the structure ocl_tools_extended.h
-
-    This is a static class.
-    ocl should be the only instance and shared among all python modules.
-    """
-
-    platforms = []
-    nb_devices = 0
-    context_cache = {}  # key: 2-tuple of int, value: context
-    if pyopencl:
-        platform = device = pypl = devtype = extensions = pydev = None
-        for idx, platform in enumerate(pyopencl.get_platforms()):
-            pypl = Platform(platform.name, platform.vendor, platform.version, platform.extensions, idx)
-            for idd, device in enumerate(platform.get_devices()):
-                ####################################################
-                # Nvidia does not report int64 atomics (we are using) ...
-                # this is a hack around as any nvidia GPU with double-precision supports int64 atomics
-                ####################################################
-                extensions = device.extensions
-                if (pypl.vendor == "NVIDIA Corporation") and ('cl_khr_fp64' in extensions):
-                    extensions += ' cl_khr_int64_base_atomics cl_khr_int64_extended_atomics'
-                try:
-                    devtype = pyopencl.device_type.to_string(device.type).upper()
-                except ValueError:
-                    # pocl does not describe itself as a CPU !
-                    devtype = "CPU"
-                if len(devtype) > 3:
-                    if "GPU" in devtype:
-                        devtype = "GPU"
-                    elif "ACC" in devtype:
-                        devtype = "ACC"
-                    elif "CPU" in devtype:
-                        devtype = "CPU"
-                    else:
-                        devtype = devtype[:3]
-                if _is_nvidia_gpu(device.vendor, devtype) and ("compute_capability_major_nv" in dir(device)):
-                    try:
-                        comput_cap = device.compute_capability_major_nv, device.compute_capability_minor_nv
-                    except pyopencl.LogicError:
-                        flop_core = FLOP_PER_CORE["GPU"]
-                    else:
-                        flop_core = NVIDIA_FLOP_PER_CORE.get(comput_cap, FLOP_PER_CORE["GPU"])
-                elif (pypl.vendor == "Advanced Micro Devices, Inc.") and (devtype == "GPU"):
-                    flop_core = AMD_FLOP_PER_CORE
-                elif devtype == "CPU":
-                    flop_core = FLOP_PER_CORE.get(devtype, 1)
-                else:
-                    flop_core = 1
-                workgroup = device.max_work_group_size
-                if (devtype == "CPU") and (pypl.vendor == "Apple"):
-                    logger.info("For Apple's OpenCL on CPU: Measuring actual valid max_work_goup_size.")
-                    workgroup = _measure_workgroup_size(device, fast=True)
-                if (devtype == "GPU") and os.environ.get("GPU") == "False":
-                    # Environment variable to disable GPU devices
-                    continue
-                pydev = Device(device.name, devtype, device.version, device.driver_version, extensions,
-                               device.global_mem_size, bool(device.available), device.max_compute_units,
-                               device.max_clock_frequency, flop_core, idd, workgroup)
-                pypl.add_device(pydev)
-                nb_devices += 1
-            platforms.append(pypl)
-        del platform, device, pypl, devtype, extensions, pydev
-
-    def __repr__(self):
-        out = ["OpenCL devices:"]
-        for platformid, platform in enumerate(self.platforms):
-            deviceids = ["(%s,%s) %s" % (platformid, deviceid, dev.name)
-                         for deviceid, dev in enumerate(platform.devices)]
-            out.append("[%s] %s: " % (platformid, platform.name) + ", ".join(deviceids))
-        return os.linesep.join(out)
-
-    def get_platform(self, key):
-        """
-        Return a platform according
-
-        :param key: identifier for a platform, either an Id (int) or it's name
-        :type key: int or str
-        """
-        out = None
-        try:
-            platid = int(key)
-        except ValueError:
-            for a_plat in self.platforms:
-                if a_plat.name == key:
-                    out = a_plat
-        else:
-            if len(self.platforms) > platid > 0:
-                out = self.platforms[platid]
-        return out
-
-    def select_device(self, dtype="ALL", memory=None, extensions=None, best=True, **kwargs):
-        """
-        Select a device based on few parameters (at the end, keep the one with most memory)
-
-        :param dtype: "gpu" or "cpu" or "all" ....
-        :param memory: minimum amount of memory (int)
-        :param extensions: list of extensions to be present
-        :param best: shall we look for the
-        :returns: A tuple of plateform ID and device ID, else None if nothing
-            found
-        """
-        if extensions is None:
-            extensions = []
-        if "type" in kwargs:
-            dtype = kwargs["type"].upper()
-        else:
-            dtype = dtype.upper()
-        if len(dtype) > 3:
-            dtype = dtype[:3]
-        best_found = None
-        for platformid, platform in enumerate(self.platforms):
-            for deviceid, device in enumerate(platform.devices):
-                if not device.available:
-                    continue
-                if (dtype in ["ALL", "DEF"]) or (device.type == dtype):
-                    if (memory is None) or (memory <= device.memory):
-                        found = True
-                        for ext in extensions:
-                            if ext not in device.extensions:
-                                found = False
-                        if found:
-                            if not best:
-                                return platformid, deviceid
-                            else:
-                                if not best_found:
-                                    best_found = platformid, deviceid, device.flops
-                                elif best_found[2] < device.flops:
-                                    best_found = platformid, deviceid, device.flops
-        if best_found:
-            return best_found[0], best_found[1]
-
-        # Nothing found
-        return None
-
-    def create_context(self, devicetype="ALL", useFp64=False, platformid=None,
-                       deviceid=None, cached=True, memory=None, extensions=None):
-        """
-        Choose a device and initiate a context.
-
-        Devicetypes can be GPU,gpu,CPU,cpu,DEF,ACC,ALL.
-        Suggested are GPU,CPU.
-        For each setting to work there must be such an OpenCL device and properly installed.
-        E.g.: If Nvidia driver is installed, GPU will succeed but CPU will fail.
-              The AMD SDK kit is required for CPU via OpenCL.
-        :param devicetype: string in ["cpu","gpu", "all", "acc"]
-        :param useFp64: boolean specifying if double precision will be used: deprecated use extensions=["cl_khr_fp64"]
-        :param platformid: integer
-        :param deviceid: integer
-        :param cached: True if we want to cache the context
-        :param memory: minimum amount of memory of the device
-        :param extensions: list of extensions to be present
-        :return: OpenCL context on the selected device
-        """
-        if extensions is None:
-            extensions = []
-        if useFp64:
-            logger.warning("Deprecation: please select your device using the extension name!, i.e. extensions=['cl_khr_fp64']")
-            extensions.append('cl_khr_fp64')
-
-        if (platformid is not None) and (deviceid is not None):
-            platformid = int(platformid)
-            deviceid = int(deviceid)
-        elif "PYOPENCL_CTX" in os.environ:
-            pyopencl_ctx = [int(i) if i.isdigit() else 0 for i in os.environ["PYOPENCL_CTX"].split(":")]
-            pyopencl_ctx += [0] * (2 - len(pyopencl_ctx))  # pad with 0
-            platformid, deviceid = pyopencl_ctx
-        else:
-            ids = ocl.select_device(type=devicetype, extensions=extensions)
-            if ids:
-                platformid, deviceid = ids
-        ctx = None
-        if (platformid is not None) and (deviceid is not None):
-            if (platformid, deviceid) in self.context_cache:
-                ctx = self.context_cache[(platformid, deviceid)]
-            else:
-                try:
-                    ctx = pyopencl.Context(devices=[pyopencl.get_platforms()[platformid].get_devices()[deviceid]])
-                except pyopencl._cl.LogicError as error:
-                    self.platforms[platformid].devices[deviceid].set_unavailable()
-                    logger.warning("Unable to create context on %s/%s: %s", platformid, deviceid, error)
-                    ctx = None
-                else:
-                    if cached:
-                        self.context_cache[(platformid, deviceid)] = ctx
-        if ctx is None:
-            logger.warning("Last chance to get an OpenCL device ... probably not the one requested")
-            ctx = pyopencl.create_some_context(interactive=False)
-        return ctx
-
-    def device_from_context(self, context):
-        """
-        Retrieves the Device from the context
-
-        :param context: OpenCL context
-        :return: instance of Device
-        """
-        odevice = context.devices[0]
-        oplat = odevice.platform
-        device_id = oplat.get_devices().index(odevice)
-        platform_id = pyopencl.get_platforms().index(oplat)
-        return self.platforms[platform_id].devices[device_id]
-
-
-if pyopencl:
-    ocl = OpenCL()
-    if ocl.nb_devices == 0:
-        ocl = None
-else:
-    ocl = None
-
-
-def release_cl_buffers(cl_buffers):
-    """
-    :param cl_buffers: the buffer you want to release
-    :type cl_buffers: dict(str, pyopencl.Buffer)
-
-    This method release the memory of the buffers store in the dict
-    """
-    for key, buffer_ in cl_buffers.items():
-        if buffer_ is not None:
-            if isinstance(buffer_, pyopencl.array.Array):
-                try:
-                    buffer_.data.release()
-                except pyopencl.LogicError:
-                    logger.error("Error while freeing buffer %s", key)
-            else:
-                try:
-                    buffer_.release()
-                except pyopencl.LogicError:
-                    logger.error("Error while freeing buffer %s", key)
-            cl_buffers[key] = None
-    return cl_buffers
-
-
-def allocate_cl_buffers(buffers, device=None, context=None):
-    """
-    :param buffers: the buffers info use to create the pyopencl.Buffer
-    :type buffers: list(std, flag, numpy.dtype, int)
-    :param device: one of the context device
-    :param context: opencl contextdevice
-    :return: a dict containing the instanciated pyopencl.Buffer
-    :rtype: dict(str, pyopencl.Buffer)
-
-    This method instanciate the pyopencl.Buffer from the buffers
-    description.
-    """
-    mem = {}
-    if device is None:
-        device = ocl.device_from_context(context)
-
-    # check if enough memory is available on the device
-    ualloc = 0
-    for _, _, dtype, size in buffers:
-        ualloc += numpy.dtype(dtype).itemsize * size
-    memory = device.memory
-    logger.info("%.3fMB are needed on device which has %.3fMB",
-                ualloc / 1.0e6, memory / 1.0e6)
-    if ualloc >= memory:
-        memError = "Fatal error in allocate_buffers."
-        memError += "Not enough device memory for buffers"
-        memError += "(%lu requested, %lu available)" % (ualloc, memory)
-        raise MemoryError(memError)  # noqa
-
-    # do the allocation
-    try:
-        for name, flag, dtype, size in buffers:
-            mem[name] = pyopencl.Buffer(context, flag,
-                                        numpy.dtype(dtype).itemsize * size)
-    except pyopencl.MemoryError as error:
-        release_cl_buffers(mem)
-        raise MemoryError(error)
-
-    return mem
-
-
-def allocate_texture(ctx, shape, hostbuf=None, support_1D=False):
-    """
-    Allocate an OpenCL image ("texture").
-
-    :param ctx: OpenCL context
-    :param shape: Shape of the image. Note that pyopencl and OpenCL < 1.2
-        do not support 1D images, so 1D images are handled as 2D with one row
-    :param support_1D: force the image to be 1D if the shape has only one dim
-    """
-    if len(shape) == 1 and not(support_1D):
-        shape = (1,) + shape
-    return pyopencl.Image(
-        ctx,
-        pyopencl.mem_flags.READ_ONLY | pyopencl.mem_flags.USE_HOST_PTR,
-        pyopencl.ImageFormat(
-            pyopencl.channel_order.INTENSITY,
-            pyopencl.channel_type.FLOAT
-        ),
-        hostbuf=numpy.zeros(shape[::-1], dtype=numpy.float32)
-    )
-
-
-def check_textures_availability(ctx):
-    """
-    Check whether textures are supported on the current OpenCL context.
-
-    :param ctx: OpenCL context
-    """
-    try:
-        dummy_texture = allocate_texture(ctx, (16, 16))
-        # Need to further access some attributes (pocl)
-        dummy_height = dummy_texture.height
-        textures_available = True
-        del dummy_texture, dummy_height
-    except (pyopencl.RuntimeError, pyopencl.LogicError):
-        textures_available = False
-    # Nvidia Fermi GPUs (compute capability 2.X) do not support opencl read_imagef
-    # There is no way to detect this until a kernel is compiled
-    try:
-        cc = ctx.devices[0].compute_capability_major_nv
-        textures_available &= (cc >= 3)
-    except (pyopencl.LogicError, AttributeError):  # probably not a Nvidia GPU
-        pass
-    #
-    return textures_available
-
-
-def measure_workgroup_size(device):
-    """Measure the actual size of the workgroup
-
-    :param device: device or context or 2-tuple with indexes
-    :return: the actual measured workgroup size
-
-    if device is "all", returns a dict with all devices with their ids as keys.
-    """
-    if (ocl is None) or (device is None):
-        return None
-
-    if isinstance(device, tuple) and (len(device) == 2):
-        # this is probably a tuple (platformid, deviceid)
-        device = ocl.create_context(platformid=device[0], deviceid=device[1])
-
-    if device == "all":
-        res = {}
-        for pid, platform in enumerate(ocl.platforms):
-            for did, _devices in enumerate(platform.devices):
-                tup = (pid, did)
-                res[tup] = measure_workgroup_size(tup)
-    else:
-        res = _measure_workgroup_size(device)
-    return res
-
-
-def query_kernel_info(program, kernel, what="WORK_GROUP_SIZE"):
-    """Extract the compile time information from a kernel
-
-    :param program: OpenCL program
-    :param kernel: kernel or name of the kernel
-    :param what: what is the query about ?
-    :return: int or 3-int for the workgroup size.
-    
-    Possible information available are:
-    * 'COMPILE_WORK_GROUP_SIZE': Returns the work-group size specified inside the kernel (__attribute__((reqd_work_gr oup_size(X, Y, Z))))
-    * 'GLOBAL_WORK_SIZE': maximum global size that can be used to execute a kernel  #OCL2.1!
-    * 'LOCAL_MEM_SIZE': amount of local memory in bytes being used by the kernel
-    * 'PREFERRED_WORK_GROUP_SIZE_MULTIPLE': preferred multiple of workgroup size for launch. This is a performance hint.
-    * 'PRIVATE_MEM_SIZE' Returns the minimum amount of private memory, in bytes, used by each workitem in the kernel
-    * 'WORK_GROUP_SIZE': maximum work-group size that can be used to execute a kernel on a specific device given by device
-    
-    Further information on:
-    https://www.khronos.org/registry/OpenCL/sdk/1.1/docs/man/xhtml/clGetKernelWorkGroupInfo.html
-    
-    """
-    assert isinstance(program, pyopencl.Program)
-    if not isinstance(kernel, pyopencl.Kernel):
-        kernel_name = kernel
-        assert kernel in (k.function_name for k in program.all_kernels()), "the kernel exists"
-        kernel = program.__getattr__(kernel_name)
-
-    device = program.devices[0]
-    query_wg = getattr(pyopencl.kernel_work_group_info, what)
-    return kernel.get_work_group_info(query_wg, device)
-
-
-def kernel_workgroup_size(program, kernel):
-    """Extract the compile time maximum workgroup size
-
-    :param program: OpenCL program
-    :param kernel: kernel or name of the kernel
-    :return: the maximum acceptable workgroup size for the given kernel
-    """
-    return query_kernel_info(program, kernel, what="WORK_GROUP_SIZE")
diff --git a/silx/opencl/convolution.py b/silx/opencl/convolution.py
deleted file mode 100644
index 15ef931..0000000
--- a/silx/opencl/convolution.py
+++ /dev/null
@@ -1,442 +0,0 @@
-#!/usr/bin/env python
-# coding: utf-8
-# /*##########################################################################
-#
-# Copyright (c) 2019 European Synchrotron Radiation Facility
-#
-# Permission is hereby granted, free of charge, to any person obtaining a copy
-# of this software and associated documentation files (the "Software"), to deal
-# in the Software without restriction, including without limitation the rights
-# to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
-# copies of the Software, and to permit persons to whom the Software is
-# furnished to do so, subject to the following conditions:
-#
-# The above copyright notice and this permission notice shall be included in
-# all copies or substantial portions of the Software.
-#
-# THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
-# IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
-# FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
-# AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
-# LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
-# OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN
-# THE SOFTWARE.
-#
-# ###########################################################################*/
-"""Module for convolution on CPU/GPU."""
-
-from __future__ import absolute_import, print_function, with_statement, division
-
-__authors__ = ["P. Paleo"]
-__license__ = "MIT"
-__date__ = "01/08/2019"
-
-import numpy as np
-from copy import copy  # python2
-from .common import pyopencl as cl
-import pyopencl.array as parray
-from .processing import OpenclProcessing, EventDescription
-from .utils import ConvolutionInfos
-
-class Convolution(OpenclProcessing):
-    """
-    A class for performing convolution on CPU/GPU with OpenCL.
-    """
-
-    def __init__(self, shape, kernel, axes=None, mode=None, ctx=None,
-                 devicetype="all", platformid=None, deviceid=None,
-                 profile=False, extra_options=None):
-        """Constructor of OpenCL Convolution.
-
-        :param shape: shape of the array.
-        :param kernel: convolution kernel (1D, 2D or 3D).
-        :param axes: axes along which the convolution is performed,
-            for batched convolutions.
-        :param mode: Boundary handling mode. Available modes are:
-            "reflect": cba|abcd|dcb
-            "nearest": aaa|abcd|ddd
-            "wrap": bcd|abcd|abc
-            "constant": 000|abcd|000
-            Default is "reflect".
-        :param ctx: actual working context, left to None for automatic
-                    initialization from device type or platformid/deviceid
-        :param devicetype: type of device, can be "CPU", "GPU", "ACC" or "ALL"
-        :param platformid: integer with the platform_identifier, as given by
-                           clinfo
-        :param deviceid: Integer with the device identifier, as given by clinfo
-        :param profile: switch on profiling to be able to profile at the kernel
-                        level, store profiling elements (makes code slightly
-                        slower)
-        :param extra_options: Advanced options (dict). Current options are:
-            "allocate_input_array": True,
-            "allocate_output_array": True,
-            "allocate_tmp_array": True,
-            "dont_use_textures": False,
-        """
-        OpenclProcessing.__init__(self, ctx=ctx, devicetype=devicetype,
-                                  platformid=platformid, deviceid=deviceid,
-                                  profile=profile)
-
-        self._configure_extra_options(extra_options)
-        self._determine_use_case(shape, kernel, axes)
-        self._allocate_memory(mode)
-        self._init_kernels()
-
-    def _configure_extra_options(self, extra_options):
-        self.extra_options = {
-            "allocate_input_array": True,
-            "allocate_output_array": True,
-            "allocate_tmp_array": True,
-            "dont_use_textures": False,
-        }
-        extra_opts = extra_options or {}
-        self.extra_options.update(extra_opts)
-        self.use_textures = not(self.extra_options["dont_use_textures"])
-        self.use_textures &= self.check_textures_availability()
-
-    def _get_dimensions(self, shape, kernel):
-        self.shape = shape
-        self.data_ndim = self._check_dimensions(shape=shape, name="Data")
-        self.kernel_ndim = self._check_dimensions(arr=kernel, name="Kernel")
-        Nx = shape[-1]
-        if self.data_ndim >= 2:
-            Ny = shape[-2]
-        else:
-            Ny = 1
-        if self.data_ndim >= 3:
-            Nz = shape[-3]
-        else:
-            Nz = 1
-        self.Nx = np.int32(Nx)
-        self.Ny = np.int32(Ny)
-        self.Nz = np.int32(Nz)
-
-    def _determine_use_case(self, shape, kernel, axes):
-        """
-        Determine the convolution use case from the input/kernel shape, and axes.
-        """
-        self._get_dimensions(shape, kernel)
-        if self.kernel_ndim > self.data_ndim:
-            raise ValueError("Kernel dimensions cannot exceed data dimensions")
-        data_ndim = self.data_ndim
-        kernel_ndim = self.kernel_ndim
-        self.kernel = kernel.astype("f")
-
-        convol_infos = ConvolutionInfos()
-        k = (data_ndim, kernel_ndim)
-        if k not in convol_infos.use_cases:
-            raise ValueError(
-                "Cannot find a use case for data ndim = %d and kernel ndim = %d"
-                % (data_ndim, kernel_ndim)
-            )
-        possible_use_cases = convol_infos.use_cases[k]
-
-        self.use_case_name = None
-        for uc_name, uc_params in possible_use_cases.items():
-            if axes in convol_infos.allowed_axes[uc_name]:
-                self.use_case_name = uc_name
-                self.use_case_desc = uc_params["name"]
-                #~ self.use_case_kernels = uc_params["kernels"].copy()
-                self.use_case_kernels = copy(uc_params["kernels"]) # TODO use the above line once we get rid of python2
-        if self.use_case_name is None:
-            raise ValueError(
-                "Cannot find a use case for data ndim = %d, kernel ndim = %d and axes=%s"
-                % (data_ndim, kernel_ndim, str(axes))
-            )
-        # TODO implement this use case
-        if self.use_case_name == "batched_separable_2D_1D_3D":
-            raise NotImplementedError(
-                "The use case %s is not implemented"
-                % self.use_case_name
-            )
-        #
-        self.axes = axes
-        # Replace "axes=None" with an actual value (except for ND-ND)
-        allowed_axes = convol_infos.allowed_axes[self.use_case_name]
-        if len(allowed_axes) > 1:
-            # The default choice might impact perfs
-            self.axes = allowed_axes[0] or allowed_axes[1]
-        self.separable = self.use_case_name.startswith("separable")
-        self.batched = self.use_case_name.startswith("batched")
-        # Update kernel names when using textures
-        if self.use_textures:
-            for i, kern_name in enumerate(self.use_case_kernels):
-                self.use_case_kernels[i] = kern_name + "_tex"
-
-    def _allocate_memory(self, mode):
-        self.mode = mode or "reflect"
-        option_array_names = {
-            "allocate_input_array": "data_in",
-            "allocate_output_array": "data_out",
-            "allocate_tmp_array": "data_tmp",
-        }
-        # Nonseparable transforms do not need tmp array
-        if not(self.separable):
-            self.extra_options["allocate_tmp_array"] = False
-        # Allocate arrays
-        for option_name, array_name in option_array_names.items():
-            if self.extra_options[option_name]:
-                value = parray.empty(self.queue, self.shape, np.float32)
-                value.fill(np.float32(0.0))
-            else:
-                value = None
-            setattr(self, array_name, value)
-
-        if isinstance(self.kernel, np.ndarray):
-            self.d_kernel = parray.to_device(self.queue, self.kernel)
-        else:
-            if not(isinstance(self.kernel, parray.Array)):
-                raise ValueError("kernel must be either numpy array or pyopencl array")
-            self.d_kernel = self.kernel
-        self._old_input_ref = None
-        self._old_output_ref = None
-        if self.use_textures:
-            self._allocate_textures()
-        self._c_modes_mapping = {
-            "periodic": 2,
-            "wrap": 2,
-            "nearest": 1,
-            "replicate": 1,
-            "reflect": 0,
-            "constant": 3,
-        }
-        mp = self._c_modes_mapping
-        if self.mode.lower() not in mp:
-            raise ValueError(
-                """
-                Mode %s is not available for textures. Available modes are:
-                %s
-                """
-                % (self.mode, str(mp.keys()))
-            )
-        # TODO
-        if not(self.use_textures) and self.mode.lower() == "constant":
-            raise NotImplementedError(
-                "mode='constant' is not implemented without textures yet"
-            )
-        #
-        self._c_conv_mode = mp[self.mode]
-
-    def _allocate_textures(self):
-        self.data_in_tex = self.allocate_texture(self.shape)
-        self.d_kernel_tex = self.allocate_texture(self.kernel.shape)
-        self.transfer_to_texture(self.d_kernel, self.d_kernel_tex)
-
-    def _init_kernels(self):
-        if self.kernel_ndim > 1:
-            if np.abs(np.diff(self.kernel.shape)).max() > 0:
-                raise NotImplementedError(
-                    "Non-separable convolution with non-square kernels is not implemented yet"
-                )
-        compile_options = [str("-DUSED_CONV_MODE=%d" % self._c_conv_mode)]
-        if self.use_textures:
-            kernel_files = ["convolution_textures.cl"]
-            compile_options.extend([
-                str("-DIMAGE_DIMS=%d" % self.data_ndim),
-                str("-DFILTER_DIMS=%d" % self.kernel_ndim),
-            ])
-            d_kernel_ref = self.d_kernel_tex
-        else:
-            kernel_files = ["convolution.cl"]
-            d_kernel_ref = self.d_kernel.data
-        self.compile_kernels(
-            kernel_files=kernel_files,
-            compile_options=compile_options
-        )
-        self.ndrange = self.shape[::-1]
-        self.wg = None
-        kernel_args = [
-            self.queue,
-            self.ndrange, self.wg,
-            None,
-            None,
-            d_kernel_ref,
-            np.int32(self.kernel.shape[0]),
-            self.Nx, self.Ny, self.Nz
-        ]
-        if self.kernel_ndim == 2:
-            kernel_args.insert(6, np.int32(self.kernel.shape[1]))
-        if self.kernel_ndim == 3:
-            kernel_args.insert(6, np.int32(self.kernel.shape[2]))
-            kernel_args.insert(7, np.int32(self.kernel.shape[1]))
-        self.kernel_args = tuple(kernel_args)
-        # If self.data_tmp is allocated, separable transforms can be performed
-        # by a series of batched transforms, without any copy, by swapping refs.
-        self.swap_pattern = None
-        if self.separable:
-            if self.data_tmp is not None:
-                self.swap_pattern = {
-                    2: [
-                        ("data_in", "data_tmp"),
-                        ("data_tmp", "data_out")
-                    ],
-                    3: [
-                        ("data_in", "data_out"),
-                        ("data_out", "data_tmp"),
-                        ("data_tmp", "data_out"),
-                    ],
-                }
-            else:
-                # TODO
-                raise NotImplementedError("For now, data_tmp has to be allocated")
-
-    def _get_swapped_arrays(self, i):
-        """
-        Get the input and output arrays to use when using a "swap pattern".
-        Swapping refs enables to avoid copies between temp. array and output.
-        For example, a separable 2D->1D convolution on 2D data reads:
-          data_tmp = convol(data_input, kernel, axis=1) # step i=0
-          data_out = convol(data_tmp, kernel, axis=0) # step i=1
-
-        :param i: current step number of the separable convolution
-        """
-        if self.use_textures:
-            # copy is needed when using texture, as data_out is a Buffer
-            if i > 0:
-                self.transfer_to_texture(self.data_out, self.data_in_tex)
-            return self.data_in_tex, self.data_out
-        n_batchs = len(self.axes)
-        in_ref, out_ref = self.swap_pattern[n_batchs][i]
-        d_in = getattr(self, in_ref)
-        d_out = getattr(self, out_ref)
-        return d_in, d_out
-
-    def _configure_kernel_args(self, opencl_kernel_args, input_ref, output_ref):
-        # TODO more elegant
-        if isinstance(input_ref, parray.Array):
-            input_ref = input_ref.data
-        if isinstance(output_ref, parray.Array):
-            output_ref = output_ref.data
-        if input_ref is not None or output_ref is not None:
-            opencl_kernel_args = list(opencl_kernel_args)
-            if input_ref is not None:
-                opencl_kernel_args[3] = input_ref
-            if output_ref is not None:
-                opencl_kernel_args[4] = output_ref
-            opencl_kernel_args = tuple(opencl_kernel_args)
-        return opencl_kernel_args
-
-    @staticmethod
-    def _check_dimensions(arr=None, shape=None, name="", dim_min=1, dim_max=3):
-        if shape is not None:
-            ndim = len(shape)
-        elif arr is not None:
-            ndim = arr.ndim
-        else:
-            raise ValueError("Please provide either arr= or shape=")
-        if ndim < dim_min or ndim > dim_max:
-            raise ValueError("%s dimensions should be between %d and %d"
-                % (name, dim_min, dim_max)
-            )
-        return ndim
-
-    def _check_array(self, arr):
-        # TODO allow cl.Buffer
-        if not(isinstance(arr, parray.Array) or isinstance(arr, np.ndarray)):
-            raise TypeError("Expected either pyopencl.array.Array or numpy.ndarray")
-        # TODO composition with ImageProcessing/cast
-        if arr.dtype != np.float32:
-            raise TypeError("Data must be float32")
-        if arr.shape != self.shape:
-            raise ValueError("Expected data shape = %s" % str(self.shape))
-
-    def _set_arrays(self, array, output=None):
-        # When using textures: copy
-        if self.use_textures:
-            self.transfer_to_texture(array, self.data_in_tex)
-            data_in_ref = self.data_in_tex
-        else:
-            # Otherwise: copy H->D or update references.
-            if isinstance(array, np.ndarray):
-                self.data_in[:] = array[:]
-            else:
-                self._old_input_ref = self.data_in
-                self.data_in = array
-            data_in_ref = self.data_in
-        if output is not None:
-            if not(isinstance(output, np.ndarray)):
-                self._old_output_ref = self.data_out
-                self.data_out = output
-        # Update OpenCL kernel arguments with new array references
-        self.kernel_args = self._configure_kernel_args(
-            self.kernel_args,
-            data_in_ref,
-            self.data_out
-        )
-
-    def _separable_convolution(self):
-        assert len(self.axes) == len(self.use_case_kernels)
-        # Separable: one kernel call per data dimension
-        for i, axis in enumerate(self.axes):
-            in_ref, out_ref = self._get_swapped_arrays(i)
-            self._batched_convolution(axis, input_ref=in_ref, output_ref=out_ref)
-
-    def _batched_convolution(self, axis, input_ref=None, output_ref=None):
-        # Batched: one kernel call in total
-        opencl_kernel = self.kernels.get_kernel(self.use_case_kernels[axis])
-        opencl_kernel_args = self._configure_kernel_args(
-            self.kernel_args,
-            input_ref,
-            output_ref
-        )
-        ev = opencl_kernel(*opencl_kernel_args)
-        if self.profile:
-            self.events.append(EventDescription("batched convolution", ev))
-
-    def _nd_convolution(self):
-        assert len(self.use_case_kernels) == 1
-        opencl_kernel = self.kernels.get_kernel(self.use_case_kernels[0])
-        ev = opencl_kernel(*self.kernel_args)
-        if self.profile:
-            self.events.append(EventDescription("ND convolution", ev))
-
-    def _recover_arrays_references(self):
-        if self._old_input_ref is not None:
-            self.data_in = self._old_input_ref
-            self._old_input_ref = None
-        if self._old_output_ref is not None:
-            self.data_out = self._old_output_ref
-            self._old_output_ref = None
-        self.kernel_args = self._configure_kernel_args(
-            self.kernel_args,
-            self.data_in,
-            self.data_out
-        )
-
-    def _get_output(self, output):
-        if output is None:
-            res = self.data_out.get()
-        else:
-            res = output
-            if isinstance(output, np.ndarray):
-                output[:] = self.data_out[:]
-        self._recover_arrays_references()
-        return res
-
-    def convolve(self, array, output=None):
-        """
-        Convolve an array with the class kernel.
-
-        :param array: Input array. Can be numpy.ndarray or pyopencl.array.Array.
-        :param output: Output array. Can be numpy.ndarray or pyopencl.array.Array.
-        """
-        self._check_array(array)
-        self._set_arrays(array, output=output)
-        if self.axes is not None:
-            if self.separable:
-                self._separable_convolution()
-            elif self.batched:
-                assert len(self.axes) == 1
-                self._batched_convolution(self.axes[0])
-            # else: ND-ND convol
-        else:
-            # ND-ND convol
-            self._nd_convolution()
-
-        res = self._get_output(output)
-        return res
-
-
-    __call__ = convolve
-
-
diff --git a/silx/opencl/image.py b/silx/opencl/image.py
deleted file mode 100644
index 65e2d5e..0000000
--- a/silx/opencl/image.py
+++ /dev/null
@@ -1,387 +0,0 @@
-# -*- coding: utf-8 -*-
-#
-#    Project: silx
-#             https://github.com/silx-kit/silx
-#
-#    Copyright (C) 2012-2017 European Synchrotron Radiation Facility, Grenoble, France
-#
-#    Principal author:       Jérôme Kieffer (Jerome.Kieffer@ESRF.eu)
-#
-#  Permission is hereby granted, free of charge, to any person obtaining a copy
-#  of this software and associated documentation files (the "Software"), to deal
-#  in the Software without restriction, including without limitation the rights
-#  to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
-#  copies of the Software, and to permit persons to whom the Software is
-#  furnished to do so, subject to the following conditions:
-#  .
-#  The above copyright notice and this permission notice shall be included in
-#  all copies or substantial portions of the Software.
-#  .
-#  THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
-#  IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
-#  FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
-#  AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
-#  LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
-#  OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN
-#  THE SOFTWARE.
-
-"""A general purpose library for manipulating 2D images in 1 or 3 colors 
-
-"""
-from __future__ import absolute_import, print_function, with_statement, division
-
-
-__author__ = "Jerome Kieffer"
-__license__ = "MIT"
-__date__ = "12/02/2018"
-__copyright__ = "2012-2017, ESRF, Grenoble"
-__contact__ = "jerome.kieffer@esrf.fr"
-
-import os
-import logging
-import numpy
-from collections import OrderedDict
-from math import floor, ceil, sqrt, log
-
-from .common import pyopencl, kernel_workgroup_size
-from .processing import EventDescription, OpenclProcessing, BufferDescription
-
-if pyopencl:
-    mf = pyopencl.mem_flags
-logger = logging.getLogger(__name__)
-
-
-class ImageProcessing(OpenclProcessing):
-
-    kernel_files = ["cast", "map", "max_min", "histogram"]
-
-    converter = {numpy.dtype(numpy.uint8): "u8_to_float",
-                 numpy.dtype(numpy.int8): "s8_to_float",
-                 numpy.dtype(numpy.uint16): "u16_to_float",
-                 numpy.dtype(numpy.int16): "s16_to_float",
-                 numpy.dtype(numpy.uint32): "u32_to_float",
-                 numpy.dtype(numpy.int32): "s32_to_float",
-                 }
-
-    def __init__(self, shape=None, ncolors=1, template=None,
-                 ctx=None, devicetype="all", platformid=None, deviceid=None,
-                 block_size=None, memory=None, profile=False):
-        """Constructor of the ImageProcessing class
-
-        :param ctx: actual working context, left to None for automatic
-                    initialization from device type or platformid/deviceid
-        :param devicetype: type of device, can be "CPU", "GPU", "ACC" or "ALL"
-        :param platformid: integer with the platform_identifier, as given by clinfo
-        :param deviceid: Integer with the device identifier, as given by clinfo
-        :param block_size: preferred workgroup size, may vary depending on the
-                            out come of the compilation
-        :param memory: minimum memory available on device
-        :param profile: switch on profiling to be able to profile at the kernel
-                         level, store profiling elements (makes code slightly slower)
-        """
-        OpenclProcessing.__init__(self, ctx=ctx, devicetype=devicetype,
-                                  platformid=platformid, deviceid=deviceid,
-                                  block_size=block_size, memory=memory, profile=profile)
-        if template is not None:
-            shape = template.shape
-            if len(shape) > 2:
-                self.ncolors = shape[2]
-                self.shape = shape[:2]
-            else:
-                self.ncolors = 1
-                self.shape = shape
-        else:
-            self.ncolors = ncolors
-            self.shape = shape
-            assert shape is not None
-        self.buffer_shape = self.shape if self.ncolors == 1 else self.shape + (self.ncolors,)
-        kernel_files = [os.path.join("image", i) for i in self.kernel_files]
-        self.compile_kernels(kernel_files,
-                             compile_options="-DNB_COLOR=%i" % self.ncolors)
-        if self.ncolors == 1:
-            img_shape = self.shape
-        else:
-            img_shape = self.shape + (self.ncolors,)
-
-        buffers = [BufferDescription("image0_d", img_shape, numpy.float32, None),
-                   BufferDescription("image1_d", img_shape, numpy.float32, None),
-                   BufferDescription("image2_d", img_shape, numpy.float32, None),
-                   BufferDescription("max_min_d", 2, numpy.float32, None),
-                   BufferDescription("cnt_d", 1, numpy.int32, None), ]
-        # Temporary buffer for max-min reduction
-        self.wg_red = kernel_workgroup_size(self.program, self.kernels.max_min_reduction_stage1)
-        if self.wg_red > 1:
-            self.wg_red = min(self.wg_red,
-                              numpy.int32(1 << int(floor(log(sqrt(numpy.prod(self.shape)), 2)))))
-            tmp = BufferDescription("tmp_max_min_d", 2 * self.wg_red, numpy.float32, None)
-            buffers.append(tmp)
-        self.allocate_buffers(buffers, use_array=True)
-        self.cl_mem["cnt_d"].fill(0)
-
-    def __repr__(self):
-        return "ImageProcessing for shape=%s, %i colors initalized on %s" % \
-            (self.shape, self.ncolors, self.ctx.devices[0].name)
-
-    def _get_in_out_buffers(self, img=None, copy=True, out=None,
-                            out_dtype=None, out_size=None):
-        """Internal method used to select the proper buffers before processing.
-
-        :param img: expects a numpy array or a pyopencl.array of dim 2 or 3
-        :param copy: set to False to directly re-use a pyopencl array
-        :param out: provide an output buffer to store the result
-        :param out_dtype: enforce the type of the output buffer (optional)
-        :param out_size: enforce the size of the output buffer (optional)
-        :return: input_buffer, output_buffer
-        
-        Nota: this is not locked.
-        """
-        events = []
-        if out is not None and isinstance(out, pyopencl.array.Array):
-            if (out_size or out_dtype) is not None:
-                if out_size is not None:
-                    assert out.size > out_size
-                if out_dtype is not None:
-                    assert out_dtype == out.dtype
-            else:  # assume it is same size and type as weoking buffer
-                assert out.shape == self.buffer_shape
-                assert out.dtype == numpy.float32
-            out.finish()
-            output_array = out
-        else:
-            if out_dtype != numpy.float32 and out_size:
-                name = "%s_%s_d" % (numpy.dtype(out_dtype), out_size)
-                if name not in self.cl_mem:
-                    output_array = self.cl_mem[name] = pyopencl.array.empty(self.queue, (out_size,), out_dtype)
-                else:
-                    output_array = self.cl_mem[name]
-            else:
-                output_array = self.cl_mem["image2_d"]
-
-        if img is None:
-            input_array = self.cl_mem["image1_d"]
-        if isinstance(img, pyopencl.array.Array):
-            if copy:
-                evt = pyopencl.enqueue_copy(self.queue, self.cl_mem["image1_d"].data, img.data)
-                input_array = self.cl_mem["image1_d"]
-                events.append(EventDescription("copy D->D", evt))
-            else:
-                img.finish()
-                input_array = img
-                evt = None
-        else:
-            # assume this is numpy
-            if img.dtype.itemsize > 4:
-                logger.warning("Casting to float32 on CPU")
-                evt = pyopencl.enqueue_copy(self.queue, self.cl_mem["image1_d"].data, numpy.ascontiguousarray(img, numpy.float32))
-                input_array = self.cl_mem["image1_d"]
-                events.append(EventDescription("cast+copy H->D", evt))
-            else:
-                evt = pyopencl.enqueue_copy(self.queue, self.cl_mem["image1_d"].data, numpy.ascontiguousarray(img))
-                input_array = self.cl_mem["image1_d"]
-                events.append(EventDescription("copy H->D", evt))
-        if self.profile:
-            self.events += events
-        return input_array, output_array
-
-    def to_float(self, img, copy=True, out=None):
-        """ Takes any array and convert it to a float array for ease of processing.
-        
-        :param img: expects a numpy array or a pyopencl.array of dim 2 or 3
-        :param copy: set to False to directly re-use a pyopencl array
-        :param out: provide an output buffer to store the result
-        """
-        assert img.shape == self.buffer_shape
-
-        events = []
-        with self.sem:
-            input_array, output_array = self._get_in_out_buffers(img, copy, out)
-            if (img.dtype.itemsize > 4) or (img.dtype == numpy.float32):
-                # copy device -> device, already there as float32
-                ev = pyopencl.enqueue_copy(self.queue, output_array.data, input_array.data)
-                events.append(EventDescription("copy D->D", ev))
-            else:
-                # Cast to float:
-                name = self.converter[img.dtype]
-                kernel = self.kernels.get_kernel(name)
-                ev = kernel(self.queue, (self.shape[1], self.shape[0]), None,
-                            input_array.data, output_array.data,
-                            numpy.int32(self.shape[1]), numpy.int32(self.shape[0])
-                            )
-                events.append(EventDescription("cast %s" % name, ev))
-
-        if self.profile:
-            self.events += events
-        if out is None:
-            res = output_array.get()
-            return res
-        else:
-            output_array.finish()
-            return output_array
-
-    def normalize(self, img, mini=0.0, maxi=1.0, copy=True, out=None):
-        """Scale the intensity of the image so that the minimum is 0 and the
-        maximum is 1.0 (or any value suggested).
-        
-        :param img: numpy array or pyopencl array of dim 2 or 3 and of type float
-        :param mini: Expected minimum value
-        :param maxi: expected maxiumum value
-        :param copy: set to False to use directly the input buffer
-        :param out: provides an output buffer. prevents a copy D->H
-        
-        This uses a min/max reduction in two stages plus a map operation  
-        """
-        assert img.shape == self.buffer_shape
-        events = []
-        with self.sem:
-            input_array, output_array = self._get_in_out_buffers(img, copy, out)
-            size = numpy.int32(numpy.prod(self.shape))
-            if self.wg_red == 1:
-                #  Probably on MacOS CPU WG==1 --> serial code.
-                kernel = self.kernels.get_kernel("max_min_serial")
-                evt = kernel(self.queue, (1,), (1,),
-                             input_array.data,
-                             size,
-                             self.cl_mem["max_min_d"].data)
-                ed = EventDescription("max_min_serial", evt)
-                events.append(ed)
-            else:
-                stage1 = self.kernels.max_min_reduction_stage1
-                stage2 = self.kernels.max_min_reduction_stage2
-                local_mem = pyopencl.LocalMemory(int(self.wg_red * 8))
-                k1 = stage1(self.queue, (int(self.wg_red ** 2),), (int(self.wg_red),),
-                            input_array.data,
-                            self.cl_mem["tmp_max_min_d"].data,
-                            size,
-                            local_mem)
-                k2 = stage2(self.queue, (int(self.wg_red),), (int(self.wg_red),),
-                            self.cl_mem["tmp_max_min_d"].data,
-                            self.cl_mem["max_min_d"].data,
-                            local_mem)
-
-                events += [EventDescription("max_min_stage1", k1),
-                           EventDescription("max_min_stage2", k2)]
-
-            evt = self.kernels.normalize_image(self.queue, (self.shape[1], self.shape[0]), None,
-                                               input_array.data, output_array.data,
-                                               numpy.int32(self.shape[1]), numpy.int32(self.shape[0]),
-                                               self.cl_mem["max_min_d"].data,
-                                               numpy.float32(mini), numpy.float32(maxi))
-            events.append(EventDescription("normalize", evt))
-        if self.profile:
-            self.events += events
-
-        if out is None:
-            res = output_array.get()
-            return res
-        else:
-            output_array.finish()
-            return output_array
-
-    def histogram(self, img=None, nbins=255, range=None,
-                  log_scale=False, copy=True, out=None):
-        """Compute the histogram of a set of data.
-        
-        :param img: input image. If None, use the one already on the device
-        :param nbins: number of bins
-        :param range: the lower and upper range of the bins.  If not provided, 
-                    range is simply ``(a.min(), a.max())``.  Values outside the 
-                    range are ignored. The first element of the range must be 
-                    less than or equal to the second.
-        :param log_scale: perform the binning in lograrithmic scale. 
-                         Open to extension
-        :param copy: unset to directly use the input buffer without copy
-        :param out: use a provided array for offering the result 
-        :return: histogram (size=nbins), edges (size=nbins+1)
-        API similar to numpy  
-        """
-        assert img.shape == self.buffer_shape
-
-        input_array = self.to_float(img, copy=copy, out=self.cl_mem["image0_d"])
-        events = []
-        with self.sem:
-            input_array, output_array = self._get_in_out_buffers(input_array, copy=False,
-                                                                 out=out,
-                                                                 out_dtype=numpy.int32,
-                                                                 out_size=nbins)
-
-            if range is None:
-                # measure actually the bounds
-                size = numpy.int32(numpy.prod(self.shape))
-                if self.wg_red == 1:
-                    #  Probably on MacOS CPU WG==1 --> serial code.
-                    kernel = self.kernels.get_kernel("max_min_serial")
-
-                    evt = kernel(self.queue, (1,), (1,),
-                                 input_array.data,
-                                 size,
-                                 self.cl_mem["max_min_d"].data)
-                    events.append(EventDescription("max_min_serial", evt))
-                else:
-                    stage1 = self.kernels.max_min_reduction_stage1
-                    stage2 = self.kernels.max_min_reduction_stage2
-                    local_mem = pyopencl.LocalMemory(int(self.wg_red * 2 * numpy.dtype("float32").itemsize))
-                    k1 = stage1(self.queue, (int(self.wg_red ** 2),), (int(self.wg_red),),
-                                input_array.data,
-                                self.cl_mem["tmp_max_min_d"].data,
-                                size,
-                                local_mem)
-                    k2 = stage2(self.queue, (int(self.wg_red),), (int(self.wg_red),),
-                                self.cl_mem["tmp_max_min_d"].data,
-                                self.cl_mem["max_min_d"].data,
-                                local_mem)
-
-                    events += [EventDescription("max_min_stage1", k1),
-                               EventDescription("max_min_stage2", k2)]
-                maxi, mini = self.cl_mem["max_min_d"].get()
-            else:
-                mini = numpy.float32(min(range))
-                maxi = numpy.float32(max(range))
-            device = self.ctx.devices[0]
-            nb_engines = device.max_compute_units
-            tmp_size = nb_engines * nbins
-            name = "tmp_int32_%s_d" % (tmp_size)
-            if name not in self.cl_mem:
-                tmp_array = self.cl_mem[name] = pyopencl.array.empty(self.queue, (tmp_size,), numpy.int32)
-            else:
-                tmp_array = self.cl_mem[name]
-
-            edge_name = "tmp_float32_%s_d" % (nbins + 1)
-            if edge_name not in self.cl_mem:
-                edges_array = self.cl_mem[edge_name] = pyopencl.array.empty(self.queue, (nbins + 1,), numpy.float32)
-            else:
-                edges_array = self.cl_mem[edge_name]
-
-            shared = pyopencl.LocalMemory(numpy.dtype(numpy.int32).itemsize * nbins)
-
-            # Handle log-scale
-            if log_scale:
-                map_operation = numpy.int32(1)
-            else:
-                map_operation = numpy.int32(0)
-            kernel = self.kernels.get_kernel("histogram")
-            wg = min(device.max_work_group_size,
-                     1 << (int(ceil(log(nbins, 2)))),
-                     self.kernels.max_workgroup_size(kernel))
-            evt = kernel(self.queue, (wg * nb_engines,), (wg,),
-                         input_array.data,
-                         numpy.int32(input_array.size),
-                         mini,
-                         maxi,
-                         map_operation,
-                         output_array.data,
-                         edges_array.data,
-                         numpy.int32(nbins),
-                         tmp_array.data,
-                         self.cl_mem["cnt_d"].data,
-                         shared)
-            events.append(EventDescription("histogram", evt))
-
-        if self.profile:
-            self.events += events
-
-        if out is None:
-            res = output_array.get()
-            return res, edges_array.get()
-        else:
-            output_array.finish()
-            return output_array, edges_array
diff --git a/silx/opencl/linalg.py b/silx/opencl/linalg.py
deleted file mode 100644
index a64122a..0000000
--- a/silx/opencl/linalg.py
+++ /dev/null
@@ -1,220 +0,0 @@
-#!/usr/bin/env python
-# coding: utf-8
-# /*##########################################################################
-#
-# Copyright (c) 2016 European Synchrotron Radiation Facility
-#
-# Permission is hereby granted, free of charge, to any person obtaining a copy
-# of this software and associated documentation files (the "Software"), to deal
-# in the Software without restriction, including without limitation the rights
-# to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
-# copies of the Software, and to permit persons to whom the Software is
-# furnished to do so, subject to the following conditions:
-#
-# The above copyright notice and this permission notice shall be included in
-# all copies or substantial portions of the Software.
-#
-# THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
-# IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
-# FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
-# AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
-# LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
-# OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN
-# THE SOFTWARE.
-#
-# ###########################################################################*/
-"""Module for basic linear algebra in OpenCL"""
-
-from __future__ import absolute_import, print_function, with_statement, division
-
-__authors__ = ["P. Paleo"]
-__license__ = "MIT"
-__date__ = "01/08/2019"
-
-import numpy as np
-
-from .common import pyopencl
-from .processing import EventDescription, OpenclProcessing
-
-import pyopencl.array as parray
-cl = pyopencl
-
-
-class LinAlg(OpenclProcessing):
-
-    kernel_files = ["linalg.cl"]
-
-    def __init__(self, shape, do_checks=False, ctx=None, devicetype="all", platformid=None, deviceid=None, profile=False):
-        """
-        Create a "Linear Algebra" plan for a given image shape.
-
-        :param shape: shape of the image (num_rows, num_columns)
-        :param do_checks (optional): if True, memory and data type checks are performed when possible.
-        :param ctx: actual working context, left to None for automatic
-                    initialization from device type or platformid/deviceid
-        :param devicetype: type of device, can be "CPU", "GPU", "ACC" or "ALL"
-        :param platformid: integer with the platform_identifier, as given by clinfo
-        :param deviceid: Integer with the device identifier, as given by clinfo
-        :param profile: switch on profiling to be able to profile at the kernel level,
-                        store profiling elements (makes code slightly slower)
-
-        """
-        OpenclProcessing.__init__(self, ctx=ctx, devicetype=devicetype,
-                                  platformid=platformid, deviceid=deviceid,
-                                  profile=profile)
-
-        self.d_gradient = parray.empty(self.queue, shape, np.complex64)
-        self.d_gradient.fill(np.complex64(0.0))
-        self.d_image = parray.empty(self.queue, shape, np.float32)
-        self.d_image.fill(np.float32(0.0))
-        self.add_to_cl_mem({
-            "d_gradient": self.d_gradient,
-            "d_image": self.d_image
-        })
-
-        self.wg2D = None
-        self.shape = shape
-        self.ndrange2D = (
-            int(self.shape[1]),
-            int(self.shape[0])
-        )
-        self.do_checks = bool(do_checks)
-        OpenclProcessing.compile_kernels(self, self.kernel_files)
-
-    @staticmethod
-    def check_array(array, dtype, shape, arg_name):
-        if array.shape != shape or array.dtype != dtype:
-            raise ValueError("%s should be a %s array of type %s" %(arg_name, str(shape), str(dtype)))
-
-    def get_data_references(self, src, dst, default_src_ref, default_dst_ref):
-        """
-        From various types of src and dst arrays,
-        returns the references to the underlying data (Buffer) that will be used by the OpenCL kernels.
-        # TODO documentation
-
-        This function will make a copy host->device if the input is on host (eg. numpy array)
-        """
-        if dst is not None:
-            if isinstance(dst, cl.array.Array):
-                dst_ref = dst.data
-            elif isinstance(dst, cl.Buffer):
-                dst_ref = dst
-            else:
-                raise ValueError("dst should be either pyopencl.array.Array or pyopencl.Buffer")
-        else:
-            dst_ref = default_dst_ref
-
-        if isinstance(src, cl.array.Array):
-            src_ref = src.data
-        elif isinstance(src, cl.Buffer):
-            src_ref = src
-        else:  # assuming numpy.ndarray
-            evt = cl.enqueue_copy(self.queue, default_src_ref, src)
-            self.events.append(EventDescription("copy H->D", evt))
-            src_ref = default_src_ref
-        return src_ref, dst_ref
-
-    def gradient(self, image, dst=None, return_to_host=False):
-        """
-        Compute the spatial gradient of an image.
-        The gradient is computed with first-order difference (not central difference).
-
-        :param image: image to compute the gradient from. It can be either a numpy.ndarray, a pyopencl Array or Buffer.
-        :param dst: optional, reference to a destination pyopencl Array or Buffer. It must be of complex64 data type.
-        :param return_to_host: optional, set to True if you want the result to be transferred back to host.
-
-        if dst is provided, it should be of type numpy.complex64 !
-        """
-        n_y, n_x = np.int32(self.shape)
-        if self.do_checks:
-            self.check_array(image, np.float32, self.shape, "image")
-            if dst is not None:
-                self.check_array(dst, np.complex64, self.shape, "dst")
-        img_ref, grad_ref = self.get_data_references(image, dst, self.d_image.data, self.d_gradient.data)
-
-        # Prepare the kernel call
-        kernel_args = [
-            img_ref,
-            grad_ref,
-            n_x,
-            n_y
-        ]
-        # Call the gradient kernel
-        evt = self.kernels.kern_gradient2D(
-            self.queue,
-            self.ndrange2D,
-            self.wg2D,
-            *kernel_args
-        )
-        self.events.append(EventDescription("gradient2D", evt))
-        # TODO: should the wait be done in any case ?
-        # In the case where dst=None, the wait() is mandatory since a user will be doing arithmetic on dst afterwards
-        if dst is None:
-            evt.wait()
-
-        if return_to_host:
-            if dst is not None:
-                res_tmp = self.d_gradient.get()
-            else:
-                res_tmp = np.zeros(self.shape, dtype=np.complex64)
-                cl.enqueue_copy(self.queue, res_tmp, grad_ref)
-            res = np.zeros((2,) + self.shape, dtype=np.float32)
-            res[0] = np.copy(res_tmp.real)
-            res[1] = np.copy(res_tmp.imag)
-            return res
-        else:
-            return dst
-
-    def divergence(self, gradient, dst=None, return_to_host=False):
-        """
-        Compute the spatial divergence of an image.
-        The divergence is designed to be the (negative) adjoint of the gradient.
-
-        :param gradient: gradient-like array to compute the divergence from. It can be either a numpy.ndarray, a pyopencl Array or Buffer.
-        :param dst: optional, reference to a destination pyopencl Array or Buffer. It must be of complex64 data type.
-        :param return_to_host: optional, set to True if you want the result to be transferred back to host.
-
-        if dst is provided, it should be of type numpy.complex64 !
-        """
-        n_y, n_x = np.int32(self.shape)
-        # numpy.ndarray gradients are expected to be (2, n_y, n_x)
-        if isinstance(gradient, np.ndarray):
-            gradient2 = np.zeros(self.shape, dtype=np.complex64)
-            gradient2.real = np.copy(gradient[0])
-            gradient2.imag = np.copy(gradient[1])
-            gradient = gradient2
-        elif self.do_checks:
-            self.check_array(gradient, np.complex64, self.shape, "gradient")
-            if dst is not None:
-                self.check_array(dst, np.float32, self.shape, "dst")
-        grad_ref, img_ref = self.get_data_references(gradient, dst, self.d_gradient.data, self.d_image.data)
-
-        # Prepare the kernel call
-        kernel_args = [
-            grad_ref,
-            img_ref,
-            n_x,
-            n_y
-        ]
-        # Call the gradient kernel
-        evt = self.kernels.kern_divergence2D(
-            self.queue,
-            self.ndrange2D,
-            self.wg2D,
-            *kernel_args
-        )
-        self.events.append(EventDescription("divergence2D", evt))
-        # TODO: should the wait be done in any case ?
-        # In the case where dst=None, the wait() is mandatory since a user will be doing arithmetic on dst afterwards
-        if dst is None:
-            evt.wait()
-
-        if return_to_host:
-            if dst is not None:
-                res = self.d_image.get()
-            else:
-                res = np.zeros(self.shape, dtype=np.float32)
-                cl.enqueue_copy(self.queue, res, img_ref)
-            return res
-        else:
-            return dst
diff --git a/silx/opencl/medfilt.py b/silx/opencl/medfilt.py
deleted file mode 100644
index d4e425b..0000000
--- a/silx/opencl/medfilt.py
+++ /dev/null
@@ -1,269 +0,0 @@
-# -*- coding: utf-8 -*-
-#
-#    Project: Azimuthal integration
-#             https://github.com/silx-kit/pyFAI
-#
-#    Copyright (C) 2012-2017 European Synchrotron Radiation Facility, Grenoble, France
-#
-#    Principal author:       Jérôme Kieffer (Jerome.Kieffer@ESRF.eu)
-#
-#  Permission is hereby granted, free of charge, to any person obtaining a copy
-#  of this software and associated documentation files (the "Software"), to deal
-#  in the Software without restriction, including without limitation the rights
-#  to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
-#  copies of the Software, and to permit persons to whom the Software is
-#  furnished to do so, subject to the following conditions:
-#  .
-#  The above copyright notice and this permission notice shall be included in
-#  all copies or substantial portions of the Software.
-#  .
-#  THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
-#  IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
-#  FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
-#  AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
-#  LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
-#  OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN
-#  THE SOFTWARE.
-
-"""A module for performing the 1d, 2d and 3d median filter ...
-
-The target is to mimic the signature of scipy.signal.medfilt and scipy.medfilt2
-
-The first implementation targets 2D implementation where this operation is costly (~10s/2kx2k image)
-"""
-from __future__ import absolute_import, print_function, with_statement, division
-
-
-__author__ = "Jerome Kieffer"
-__license__ = "MIT"
-__date__ = "12/09/2017"
-__copyright__ = "2012-2017, ESRF, Grenoble"
-__contact__ = "jerome.kieffer@esrf.fr"
-
-import logging
-import numpy
-from collections import OrderedDict
-
-from .common import pyopencl, kernel_workgroup_size
-from .processing import EventDescription, OpenclProcessing, BufferDescription
-
-if pyopencl:
-    mf = pyopencl.mem_flags
-else:
-    raise ImportError("pyopencl is not installed")
-logger = logging.getLogger(__name__)
-
-
-class MedianFilter2D(OpenclProcessing):
-    """A class for doing median filtering using OpenCL"""
-    buffers = [
-               BufferDescription("result", 1, numpy.float32, mf.WRITE_ONLY),
-               BufferDescription("image_raw", 1, numpy.float32, mf.READ_ONLY),
-               BufferDescription("image", 1, numpy.float32, mf.READ_WRITE),
-               ]
-    kernel_files = ["preprocess.cl", "bitonic.cl", "medfilt.cl"]
-    mapping = {numpy.int8: "s8_to_float",
-               numpy.uint8: "u8_to_float",
-               numpy.int16: "s16_to_float",
-               numpy.uint16: "u16_to_float",
-               numpy.uint32: "u32_to_float",
-               numpy.int32: "s32_to_float"}
-
-    def __init__(self, shape, kernel_size=(3, 3),
-                 ctx=None, devicetype="all", platformid=None, deviceid=None,
-                 block_size=None, profile=False
-                 ):
-        """Constructor of the OpenCL 2D median filtering class
-
-        :param shape: shape of the images to treat
-        :param kernel size: 2-tuple of odd values
-        :param ctx: actual working context, left to None for automatic
-                    initialization from device type or platformid/deviceid
-        :param devicetype: type of device, can be "CPU", "GPU", "ACC" or "ALL"
-        :param platformid: integer with the platform_identifier, as given by clinfo
-        :param deviceid: Integer with the device identifier, as given by clinfo
-        :param block_size: preferred workgroup size, may vary depending on the outpcome of the compilation
-        :param profile: switch on profiling to be able to profile at the kernel level,
-                        store profiling elements (makes code slightly slower)
-        """
-        OpenclProcessing.__init__(self, ctx=ctx, devicetype=devicetype,
-                                  platformid=platformid, deviceid=deviceid,
-                                  block_size=block_size, profile=profile)
-        self.shape = shape
-        self.size = self.shape[0] * self.shape[1]
-        self.kernel_size = self.calc_kernel_size(kernel_size)
-        self.workgroup_size = (self.calc_wg(self.kernel_size), 1)  # 3D kernel
-        self.buffers = [BufferDescription(i.name, i.size * self.size, i.dtype, i.flags)
-                        for i in self.__class__.buffers]
-
-        self.allocate_buffers()
-        self.local_mem = self._get_local_mem(self.workgroup_size[0])
-        OpenclProcessing.compile_kernels(self, self.kernel_files, "-D NIMAGE=%i" % self.size)
-        self.set_kernel_arguments()
-
-    def set_kernel_arguments(self):
-        """Parametrize all kernel arguments
-        """
-        for val in self.mapping.values():
-            self.cl_kernel_args[val] = OrderedDict(((i, self.cl_mem[i]) for i in ("image_raw", "image")))
-        self.cl_kernel_args["medfilt2d"] = OrderedDict((("image", self.cl_mem["image"]),
-                                                        ("result", self.cl_mem["result"]),
-                                                        ("local", self.local_mem),
-                                                        ("khs1", numpy.int32(self.kernel_size[0] // 2)),  # Kernel half-size along dim1 (lines)
-                                                        ("khs2", numpy.int32(self.kernel_size[1] // 2)),  # Kernel half-size along dim2 (columns)
-                                                        ("height", numpy.int32(self.shape[0])),  # Image size along dim1 (lines)
-                                                        ("width", numpy.int32(self.shape[1]))))
-#                                                         ('debug', self.cl_mem["debug"])))  # Image size along dim2 (columns))
-
-    def _get_local_mem(self, wg):
-        return pyopencl.LocalMemory(wg * 32)  # 4byte per float, 8 element per thread
-
-    def send_buffer(self, data, dest):
-        """Send a numpy array to the device, including the cast on the device if possible
-
-        :param data: numpy array with data
-        :param dest: name of the buffer as registered in the class
-        """
-
-        dest_type = numpy.dtype([i.dtype for i in self.buffers if i.name == dest][0])
-        events = []
-        if (data.dtype == dest_type) or (data.dtype.itemsize > dest_type.itemsize):
-            copy_image = pyopencl.enqueue_copy(self.queue, self.cl_mem[dest], numpy.ascontiguousarray(data, dest_type))
-            events.append(EventDescription("copy H->D %s" % dest, copy_image))
-        else:
-            copy_image = pyopencl.enqueue_copy(self.queue, self.cl_mem["image_raw"], numpy.ascontiguousarray(data))
-            kernel = getattr(self.program, self.mapping[data.dtype.type])
-            cast_to_float = kernel(self.queue, (self.size,), None, self.cl_mem["image_raw"], self.cl_mem[dest])
-            events += [EventDescription("copy H->D %s" % dest, copy_image), EventDescription("cast to float", cast_to_float)]
-        if self.profile:
-            self.events += events
-
-    def calc_wg(self, kernel_size):
-        """calculate and return the optimal workgroup size for the first dimension, taking into account
-        the 8-height band
-
-        :param kernel_size: 2-tuple of int, shape of the median window
-        :return: optimal workgroup size
-        """
-        needed_threads = ((kernel_size[0] + 7) // 8) * kernel_size[1]
-        if needed_threads < 8:
-            wg = 8
-        elif needed_threads < 32:
-            wg = 32
-        else:
-            wg = 1 << (int(needed_threads).bit_length())
-        return wg
-
-    def medfilt2d(self, image, kernel_size=None):
-        """Actually apply the median filtering on the image
-
-        :param image: numpy array with the image
-        :param kernel_size: 2-tuple if
-        :return: median-filtered  2D image
-
-
-        Nota: for window size 1x1 -> 7x7     up to 49  /  64 elements in   8 threads, 8elt/th
-                              9x9 -> 15x15   up to 225 / 256 elements in  32 threads, 8elt/th
-                              17x17 -> 21x21 up to 441 / 512 elements in  64 threads, 8elt/th
-
-        TODO: change window size on the fly,
-
-
-        """
-        events = []
-        if kernel_size is None:
-            kernel_size = self.kernel_size
-        else:
-            kernel_size = self.calc_kernel_size(kernel_size)
-        kernel_half_size = kernel_size // numpy.int32(2)
-        # this is the workgroup size
-        wg = self.calc_wg(kernel_size)
-
-        # check for valid work group size:
-        amws = kernel_workgroup_size(self.program, "medfilt2d")
-        logger.warning("max actual workgroup size: %s, expected: %s", amws, wg)
-        if wg > amws:
-            raise RuntimeError("Workgroup size is too big for medfilt2d: %s>%s" % (wg, amws))
-
-        localmem = self._get_local_mem(wg)
-
-        assert image.ndim == 2, "Treat only 2D images"
-        assert image.shape[0] <= self.shape[0], "height is OK"
-        assert image.shape[1] <= self.shape[1], "width is OK"
-
-        with self.sem:
-            self.send_buffer(image, "image")
-
-            kwargs = self.cl_kernel_args["medfilt2d"]
-            kwargs["local"] = localmem
-            kwargs["khs1"] = kernel_half_size[0]
-            kwargs["khs2"] = kernel_half_size[1]
-            kwargs["height"] = numpy.int32(image.shape[0])
-            kwargs["width"] = numpy.int32(image.shape[1])
-#             for k, v in kwargs.items():
-#                 print("%s: %s (%s)" % (k, v, type(v)))
-            mf2d = self.kernels.medfilt2d(self.queue,
-                                          (wg, image.shape[1]),
-                                          (wg, 1), *list(kwargs.values()))
-            events.append(EventDescription("median filter 2d", mf2d))
-
-            result = numpy.empty(image.shape, numpy.float32)
-            ev = pyopencl.enqueue_copy(self.queue, result, self.cl_mem["result"])
-            events.append(EventDescription("copy D->H result", ev))
-            ev.wait()
-        if self.profile:
-            self.events += events
-        return result
-    __call__ = medfilt2d
-
-    @staticmethod
-    def calc_kernel_size(kernel_size):
-        """format the kernel size to be a 2-length numpy array of int32
-        """
-        kernel_size = numpy.asarray(kernel_size, dtype=numpy.int32)
-        if kernel_size.shape == ():
-            kernel_size = numpy.repeat(kernel_size.item(), 2).astype(numpy.int32)
-        for size in kernel_size:
-            if (size % 2) != 1:
-                raise ValueError("Each element of kernel_size should be odd.")
-        return kernel_size
-
-
-class _MedFilt2d(object):
-    median_filter = None
-
-    @classmethod
-    def medfilt2d(cls, ary, kernel_size=3):
-        """Median filter a 2-dimensional array.
-
-        Apply a median filter to the `input` array using a local window-size
-        given by `kernel_size` (must be odd).
-
-        :param ary: A 2-dimensional input array.
-        :param kernel_size: A scalar or a list of length 2, giving the size of the
-                            median filter window in each dimension.  Elements of
-                            `kernel_size` should be odd.  If `kernel_size` is a scalar,
-                            then this scalar is used as the size in each dimension.
-                            Default is a kernel of size (3, 3).
-        :return: An array the same size as input containing the median filtered
-                result. always work on float32 values
-
-        About the padding:
-
-        * The filling mode in scipy.signal.medfilt2d is zero-padding
-        * This implementation is equivalent to:
-            scipy.ndimage.filters.median_filter(ary, kernel_size, mode="nearest")
-
-        """
-        image = numpy.atleast_2d(ary)
-        shape = numpy.array(image.shape)
-        if cls.median_filter is None:
-            cls.median_filter = MedianFilter2D(image.shape, kernel_size)
-        elif (numpy.array(cls.median_filter.shape) < shape).any():
-            # enlarger the buffer size
-            new_shape = numpy.maximum(numpy.array(cls.median_filter.shape), shape)
-            ctx = cls.median_filter.ctx
-            cls.median_filter = MedianFilter2D(new_shape, kernel_size, ctx=ctx)
-        return cls.median_filter.medfilt2d(image, kernel_size=kernel_size)
-
-medfilt2d = _MedFilt2d.medfilt2d
diff --git a/silx/opencl/processing.py b/silx/opencl/processing.py
deleted file mode 100644
index 8b81f7f..0000000
--- a/silx/opencl/processing.py
+++ /dev/null
@@ -1,447 +0,0 @@
-#!/usr/bin/env python
-# -*- coding: utf-8 -*-
-#
-#    Project: S I L X project
-#             https://github.com/silx-kit/silx
-#
-#    Copyright (C) 2012-2018 European Synchrotron Radiation Facility, Grenoble, France
-#
-#    Principal author:       Jérôme Kieffer (Jerome.Kieffer@ESRF.eu)
-#
-# Permission is hereby granted, free of charge, to any person
-# obtaining a copy of this software and associated documentation
-# files (the "Software"), to deal in the Software without
-# restriction, including without limitation the rights to use,
-# copy, modify, merge, publish, distribute, sublicense, and/or sell
-# copies of the Software, and to permit persons to whom the
-# Software is furnished to do so, subject to the following
-# conditions:
-#
-# The above copyright notice and this permission notice shall be
-# included in all copies or substantial portions of the Software.
-#
-# THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
-# EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES
-# OF MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
-# NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT
-# HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY,
-# WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
-# FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR
-# OTHER DEALINGS IN THE SOFTWARE.
-#
-
-"""
-Common OpenCL abstract base classe for different processing
-"""
-
-__author__ = "Jerome Kieffer"
-__contact__ = "Jerome.Kieffer@ESRF.eu"
-__license__ = "MIT"
-__copyright__ = "European Synchrotron Radiation Facility, Grenoble, France"
-__date__ = "02/03/2021"
-__status__ = "stable"
-
-import sys
-import os
-import logging
-import gc
-from collections import namedtuple, OrderedDict
-import numpy
-import threading
-from .common import ocl, pyopencl, release_cl_buffers, query_kernel_info, allocate_texture, check_textures_availability
-from .utils import concatenate_cl_kernel
-import platform
-
-BufferDescription = namedtuple("BufferDescription", ["name", "size", "dtype", "flags"])
-EventDescription = namedtuple("EventDescription", ["name", "event"])
-
-logger = logging.getLogger(__name__)
-
-
-class KernelContainer(object):
-    """Those object holds a copy of all kernels accessible as attributes"""
-
-    def __init__(self, program):
-        """Constructor of the class
-
-        :param program: the OpenCL program as generated by PyOpenCL
-        """
-        self._program = program
-        for kernel in program.all_kernels():
-            self.__setattr__(kernel.function_name, kernel)
-
-    def get_kernels(self):
-        "return the dictionary with all kernels"
-        return dict(item for item in self.__dict__.items()
-                    if not item[0].startswith("_"))
-
-    def get_kernel(self, name):
-        "get a kernel from its name"
-        logger.debug("KernelContainer.get_kernel(%s)", name)
-        return self.__dict__.get(name)
-
-    def max_workgroup_size(self, kernel_name):
-        "Retrieve the compile time WORK_GROUP_SIZE for a given kernel"
-        if isinstance(kernel_name, pyopencl.Kernel):
-            kernel = kernel_name
-        else:
-            kernel = self.get_kernel(kernel_name)
-
-        return query_kernel_info(self._program, kernel, "WORK_GROUP_SIZE")
-
-    def min_workgroup_size(self, kernel_name):
-        "Retrieve the compile time PREFERRED_WORK_GROUP_SIZE_MULTIPLE for a given kernel"
-        if isinstance(kernel_name, pyopencl.Kernel):
-            kernel = kernel_name
-        else:
-            kernel = self.get_kernel(kernel_name)
-
-        return query_kernel_info(self._program, kernel, "PREFERRED_WORK_GROUP_SIZE_MULTIPLE")
-
-
-class OpenclProcessing(object):
-    """Abstract class for different types of OpenCL processing.
-
-    This class provides:
-    * Generation of the context, queues, profiling mode
-    * Additional function to allocate/free all buffers declared as static attributes of the class
-    * Functions to compile kernels, cache them and clean them
-    * helper functions to clone the object
-    """
-    # Example of how to create an output buffer of 10 floats
-    buffers = [BufferDescription("output", 10, numpy.float32, None),
-               ]
-    # list of kernel source files to be concatenated before compilation of the program
-    kernel_files = []
-
-    def __init__(self, ctx=None, devicetype="all", platformid=None, deviceid=None,
-                 block_size=None, memory=None, profile=False):
-        """Constructor of the abstract OpenCL processing class
-
-        :param ctx: actual working context, left to None for automatic
-                    initialization from device type or platformid/deviceid
-        :param devicetype: type of device, can be "CPU", "GPU", "ACC" or "ALL"
-        :param platformid: integer with the platform_identifier, as given by clinfo
-        :param deviceid: Integer with the device identifier, as given by clinfo
-        :param block_size: preferred workgroup size, may vary depending on the
-                            out come of the compilation
-        :param memory: minimum memory available on device
-        :param profile: switch on profiling to be able to profile at the kernel
-                         level, store profiling elements (makes code slightly slower)
-        """
-        self.sem = threading.Semaphore()
-        self._X87_VOLATILE = None
-        self.profile = None
-        self.events = []  # List with of EventDescription, kept for profiling
-        self.cl_mem = {}  # dict with all buffer allocated
-        self.cl_program = None  # The actual OpenCL program
-        self.cl_kernel_args = {}  # dict with all kernel arguments
-        self.queue = None
-        if ctx:
-            self.ctx = ctx
-        else:
-            self.ctx = ocl.create_context(devicetype=devicetype,
-                                          platformid=platformid, deviceid=deviceid,
-                                          memory=memory)
-        device_name = self.ctx.devices[0].name.strip()
-        platform_name = self.ctx.devices[0].platform.name.strip()
-        platform = ocl.get_platform(platform_name)
-        self.device = platform.get_device(device_name)
-        self.cl_kernel_args = {}  # dict with all kernel arguments
-
-        self.set_profiling(profile)
-        self.block_size = block_size
-        self.program = None
-        self.kernels = None
-
-    def check_textures_availability(self):
-        return check_textures_availability(self.ctx)
-
-    def __del__(self):
-        """Destructor: release all buffers and programs
-        """
-        try:
-            self.reset_log()
-            self.free_kernels()
-            self.free_buffers()
-            if self.queue is not None:
-                self.queue.finish()
-        except Exception as err:
-            logger.warning("%s: %s", type(err), err)
-        self.queue = None
-        self.device = None
-        self.ctx = None
-        gc.collect()
-
-    def allocate_buffers(self, buffers=None, use_array=False):
-        """
-        Allocate OpenCL buffers required for a specific configuration
-
-        :param buffers: a list of BufferDescriptions, leave to None for
-                        paramatrized buffers.
-        :param use_array: allocate memory as pyopencl.array.Array
-                            instead of pyopencl.Buffer
-
-        Note that an OpenCL context also requires some memory, as well
-        as Event and other OpenCL functionalities which cannot and are
-        not taken into account here.  The memory required by a context
-        varies depending on the device. Typical for GTX580 is 65Mb but
-        for a 9300m is ~15Mb In addition, a GPU will always have at
-        least 3-5Mb of memory in use.  Unfortunately, OpenCL does NOT
-        have a built-in way to check the actual free memory on a
-        device, only the total memory.
-        """
-        if buffers is None:
-            buffers = self.buffers
-
-        with self.sem:
-            mem = {}
-
-            # check if enough memory is available on the device
-            ualloc = 0
-            for buf in buffers:
-                ualloc += numpy.dtype(buf.dtype).itemsize * numpy.prod(buf.size)
-            logger.info("%.3fMB are needed on device: %s,  which has %.3fMB",
-                        ualloc / 1.0e6, self.device, self.device.memory / 1.0e6)
-
-            if ualloc >= self.device.memory:
-                raise MemoryError("Fatal error in allocate_buffers. Not enough "
-                                  " device memory for buffers (%lu requested, %lu available)"
-                                  % (ualloc, self.device.memory))
-
-            # do the allocation
-            try:
-                if use_array:
-                    for buf in buffers:
-                        mem[buf.name] = pyopencl.array.empty(self.queue, buf.size, buf.dtype)
-                else:
-                    for buf in buffers:
-                        size = numpy.dtype(buf.dtype).itemsize * numpy.prod(buf.size)
-                        mem[buf.name] = pyopencl.Buffer(self.ctx, buf.flags, int(size))
-            except pyopencl.MemoryError as error:
-                release_cl_buffers(mem)
-                raise MemoryError(error)
-
-        self.cl_mem.update(mem)
-
-    def add_to_cl_mem(self, parrays):
-        """
-        Add pyopencl.array, which are allocated by pyopencl, to self.cl_mem.
-        This should be used before calling allocate_buffers().
-
-        :param parrays: a dictionary of `pyopencl.array.Array` or `pyopencl.Buffer`
-        """
-        mem = self.cl_mem
-        for name, parr in parrays.items():
-            mem[name] = parr
-        self.cl_mem.update(mem)
-
-    def check_workgroup_size(self, kernel_name):
-        "Calculate the maximum workgroup size from given kernel after compilation"
-        return self.kernels.max_workgroup_size(kernel_name)
-
-    def free_buffers(self):
-        """free all device.memory allocated on the device
-        """
-        with self.sem:
-            for key, buf in list(self.cl_mem.items()):
-                if buf is not None:
-                    if isinstance(buf, pyopencl.array.Array):
-                        try:
-                            buf.data.release()
-                        except pyopencl.LogicError:
-                            logger.error("Error while freeing buffer %s", key)
-                    else:
-                        try:
-                            buf.release()
-                        except pyopencl.LogicError:
-                            logger.error("Error while freeing buffer %s", key)
-                    self.cl_mem[key] = None
-
-    def compile_kernels(self, kernel_files=None, compile_options=None):
-        """Call the OpenCL compiler
-
-        :param kernel_files: list of path to the kernel
-            (by default use the one declared in the class)
-        :param compile_options: string of compile options
-        """
-        # concatenate all needed source files into a single openCL module
-        kernel_files = kernel_files or self.kernel_files
-        kernel_src = concatenate_cl_kernel(kernel_files)
-
-        compile_options = compile_options or self.get_compiler_options()
-        logger.info("Compiling file %s with options %s", kernel_files, compile_options)
-        try:
-            self.program = pyopencl.Program(self.ctx, kernel_src).build(options=compile_options)
-        except (pyopencl.MemoryError, pyopencl.LogicError) as error:
-            raise MemoryError(error)
-        else:
-            self.kernels = KernelContainer(self.program)
-
-    def free_kernels(self):
-        """Free all kernels
-        """
-        for kernel in self.cl_kernel_args:
-            self.cl_kernel_args[kernel] = []
-        self.kernels = None
-        self.program = None
-
-    def set_profiling(self, value=True):
-        """Switch On/Off the profiling flag of the command queue to allow debugging
-
-        :param value: set to True to enable profiling, or to False to disable it.
-                      Without profiling, the processing is marginally faster
-
-        Profiling information can then be retrieved with the 'log_profile' method
-        """
-        if bool(value) != self.profile:
-            with self.sem:
-                self.profile = bool(value)
-                if self.queue is not None:
-                    self.queue.finish()
-                if self.profile:
-                    self.queue = pyopencl.CommandQueue(self.ctx,
-                        properties=pyopencl.command_queue_properties.PROFILING_ENABLE)
-                else:
-                    self.queue = pyopencl.CommandQueue(self.ctx)
-
-    def profile_add(self, event, desc):
-        """
-        Add an OpenCL event to the events lists, if profiling is enabled.
-
-        :param event: silx.opencl.processing.EventDescription.
-        :param desc: event description
-        """
-        if self.profile:
-            self.events.append(EventDescription(desc, event))
-
-    def allocate_texture(self, shape, hostbuf=None, support_1D=False):
-        return allocate_texture(self.ctx, shape, hostbuf=hostbuf, support_1D=support_1D)
-
-    def transfer_to_texture(self, arr, tex_ref):
-        """
-        Transfer an array to a texture.
-
-        :param arr: Input array. Can be a numpy array or a pyopencl array.
-        :param tex_ref: texture reference (pyopencl._cl.Image).
-        """
-        copy_args = [self.queue, tex_ref, arr]
-        shp = arr.shape
-        ndim = arr.ndim
-        if ndim == 1:
-            # pyopencl and OpenCL < 1.2 do not support image1d_t
-            # force 2D with one row in this case
-            # ~ ndim = 2
-            shp = (1,) + shp
-        copy_kwargs = {"origin":(0,) * ndim, "region": shp[::-1]}
-        if not(isinstance(arr, numpy.ndarray)):  # assuming pyopencl.array.Array
-            # D->D copy
-            copy_args[2] = arr.data
-            copy_kwargs["offset"] = 0
-        ev = pyopencl.enqueue_copy(*copy_args, **copy_kwargs)
-        self.profile_add(ev, "Transfer to texture")
-
-    def log_profile(self, stats=False):
-        """If we are in profiling mode, prints out all timing for every single OpenCL call
-        
-        :param stats: if True, prints the statistics on each kernel instead of all execution timings
-        :return: list of lines to print
-        """
-        total_time = 0.0
-        out = [""]
-        if stats:
-            stats = OrderedDict()
-            out.append(f"OpenCL kernel profiling statistics in milliseconds for: {self.__class__.__name__}")
-            out.append(f"{'Kernel name':>50} (count):      min   median      max     mean      std")
-        else:
-            stats = None
-            out.append(f"Profiling info for OpenCL: {self.__class__.__name__}")
-
-        if self.profile:
-            for e in self.events:
-                if "__len__" in dir(e) and len(e) >= 2:
-                    name = e[0]
-                    pr = e[1].profile
-                    t0 = pr.start
-                    t1 = pr.end
-                    et = 1e-6 * (t1 - t0)
-                    total_time += et
-                    if stats is None:
-                        out.append(f"{name:>50}        : {et:.3f}ms")
-                    else:
-                        if name in stats:
-                            stats[name].append(et)
-                        else:
-                            stats[name] = [et]
-            if stats is not None:
-                for k, v in stats.items():
-                    n = numpy.array(v)
-                    out.append(f"{k:>50} ({len(v):5}): {n.min():8.3f} {numpy.median(n):8.3f} {n.max():8.3f} {n.mean():8.3f} {n.std():8.3f}")
-            out.append("_" * 80)
-            out.append(f"{'Total OpenCL execution time':>50}        : {total_time:.3f}ms")
-
-        logger.info(os.linesep.join(out))
-        return out
-
-    def reset_log(self):
-        """
-        Resets the profiling timers
-        """
-        with self.sem:
-            self.events = []
-
-    @property
-    def x87_volatile_option(self):
-        # this is running 32 bits OpenCL woth POCL
-        if self._X87_VOLATILE is None:
-            if (platform.machine() in ("i386", "i686", "x86_64", "AMD64") and
-                    (tuple.__itemsize__ == 4) and
-                    self.ctx.devices[0].platform.name == 'Portable Computing Language'):
-                self._X87_VOLATILE = "-DX87_VOLATILE=volatile"
-            else:
-                self._X87_VOLATILE = ""
-        return self._X87_VOLATILE
-
-    def get_compiler_options(self, x87_volatile=False):
-        """Provide the default OpenCL compiler options
-
-        :param x87_volatile: needed for Kahan summation
-        :return: string with compiler option
-        """
-        option_list = []
-        if x87_volatile:
-            option_list.append(self.x87_volatile_option)
-        return " ".join(i for i in option_list if i)
-
-# This should be implemented by concrete class
-#     def __copy__(self):
-#         """Shallow copy of the object
-#
-#         :return: copy of the object
-#         """
-#         return self.__class__((self._data, self._indices, self._indptr),
-#                               self.size, block_size=self.BLOCK_SIZE,
-#                               platformid=self.platform.id,
-#                               deviceid=self.device.id,
-#                               checksum=self.on_device.get("data"),
-#                               profile=self.profile, empty=self.empty)
-#
-#     def __deepcopy__(self, memo=None):
-#         """deep copy of the object
-#
-#         :return: deepcopy of the object
-#         """
-#         if memo is None:
-#             memo = {}
-#         new_csr = self._data.copy(), self._indices.copy(), self._indptr.copy()
-#         memo[id(self._data)] = new_csr[0]
-#         memo[id(self._indices)] = new_csr[1]
-#         memo[id(self._indptr)] = new_csr[2]
-#         new_obj = self.__class__(new_csr, self.size,
-#                                  block_size=self.BLOCK_SIZE,
-#                                  platformid=self.platform.id,
-#                                  deviceid=self.device.id,
-#                                  checksum=self.on_device.get("data"),
-#                                  profile=self.profile, empty=self.empty)
-#         memo[id(self)] = new_obj
-#         return new_obj
diff --git a/silx/opencl/projection.py b/silx/opencl/projection.py
deleted file mode 100644
index c02faf6..0000000
--- a/silx/opencl/projection.py
+++ /dev/null
@@ -1,428 +0,0 @@
-#!/usr/bin/env python
-# coding: utf-8
-# /*##########################################################################
-#
-# Copyright (c) 2016-2020 European Synchrotron Radiation Facility
-#
-# Permission is hereby granted, free of charge, to any person obtaining a copy
-# of this software and associated documentation files (the "Software"), to deal
-# in the Software without restriction, including without limitation the rights
-# to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
-# copies of the Software, and to permit persons to whom the Software is
-# furnished to do so, subject to the following conditions:
-#
-# The above copyright notice and this permission notice shall be included in
-# all copies or substantial portions of the Software.
-#
-# THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
-# IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
-# FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
-# AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
-# LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
-# OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN
-# THE SOFTWARE.
-#
-# ###########################################################################*/
-"""Module for tomographic projector on the GPU"""
-
-from __future__ import absolute_import, print_function, with_statement, division
-
-__authors__ = ["A. Mirone, P. Paleo"]
-__license__ = "MIT"
-__date__ = "01/08/2019"
-
-import logging
-import numpy as np
-
-from .common import pyopencl
-from .processing import EventDescription, OpenclProcessing, BufferDescription
-from .backprojection import _sizeof, _idivup
-
-if pyopencl:
-    mf = pyopencl.mem_flags
-    import pyopencl.array as parray
-else:
-    raise ImportError("pyopencl is not installed")
-logger = logging.getLogger(__name__)
-
-
-class Projection(OpenclProcessing):
-    """
-    A class for performing a tomographic projection (Radon Transform) using
-    OpenCL
-    """
-    kernel_files = ["proj.cl", "array_utils.cl"]
-    logger.warning("Forward Projecter is untested and unsuported for now")
-
-    def __init__(self, slice_shape, angles, axis_position=None,
-                 detector_width=None, normalize=False, ctx=None,
-                 devicetype="all", platformid=None, deviceid=None,
-                 profile=False
-                 ):
-        """Constructor of the OpenCL projector.
-
-        :param slice_shape: shape of the slice: (num_rows, num_columns).
-        :param angles: Either an integer number of angles, or a list of custom
-                       angles values in radian.
-        :param axis_position: Optional, axis position. Default is
-                              `(shape[1]-1)/2.0`.
-        :param detector_width: Optional, detector width in pixels.
-                               If detector_width > slice_shape[1], the
-                               projection data will be surrounded with zeros.
-                               Using detector_width < slice_shape[1] might
-                               result in a local tomography setup.
-        :param normalize: Optional, normalization. If set, the sinograms are
-                          multiplied by the factor pi/(2*nprojs).
-        :param ctx: actual working context, left to None for automatic
-                    initialization from device type or platformid/deviceid
-        :param devicetype: type of device, can be "CPU", "GPU", "ACC" or "ALL"
-        :param platformid: integer with the platform_identifier, as given by
-                           clinfo
-        :param deviceid: Integer with the device identifier, as given by clinfo
-        :param profile: switch on profiling to be able to profile at the kernel
-                        level, store profiling elements (makes code slightly
-                        slower)
-        """
-        # OS X enforces a workgroup size of 1 when the kernel has synchronization barriers
-        # if sys.platform.startswith('darwin'): # assuming no discrete GPU
-        #    raise NotImplementedError("Backprojection is not implemented on CPU for OS X yet")
-
-        OpenclProcessing.__init__(self, ctx=ctx, devicetype=devicetype,
-                                  platformid=platformid, deviceid=deviceid,
-                                  profile=profile)
-        self.shape = slice_shape
-        self.axis_pos = axis_position
-        self.angles = angles
-        self.dwidth = detector_width
-        self.normalize = normalize
-
-        # Default values
-        if self.axis_pos is None:
-            self.axis_pos = (self.shape[1] - 1) / 2.
-        if self.dwidth is None:
-            self.dwidth = self.shape[1]
-        if not(np.iterable(self.angles)):
-            if self.angles is None:
-                self.nprojs = self.shape[0]
-            else:
-                self.nprojs = self.angles
-            self.angles = np.linspace(start=0,
-                                      stop=np.pi,
-                                      num=self.nprojs,
-                                      endpoint=False).astype(dtype=np.float32)
-        else:
-            self.nprojs = len(self.angles)
-        self.offset_x = -np.float32((self.shape[1] - 1) / 2. - self.axis_pos)  # TODO: custom
-        self.offset_y = -np.float32((self.shape[0] - 1) / 2. - self.axis_pos)  # TODO: custom
-        # Reset axis_pos once offset are computed
-        self.axis_pos0 = np.float64((self.shape[1] - 1) / 2.)
-
-        # Workgroup, ndrange and shared size
-        self.dimgrid_x = _idivup(self.dwidth, 16)
-        self.dimgrid_y = _idivup(self.nprojs, 16)
-        self._dimrecx = np.int32(self.dimgrid_x * 16)
-        self._dimrecy = np.int32(self.dimgrid_y * 16)
-        self.local_mem = 16 * 7 * _sizeof(np.float32)
-        self.wg = (16, 16)
-        self.ndrange = (
-            int(self.dimgrid_x) * self.wg[0],  # int(): pyopencl <= 2015.1
-            int(self.dimgrid_y) * self.wg[1]  # int(): pyopencl <= 2015.1
-        )
-
-        self._use_textures = self.check_textures_availability()
-
-        # Allocate memory
-        self.buffers = [
-            BufferDescription("_d_sino", self._dimrecx * self._dimrecy, np.float32, mf.READ_WRITE),
-            BufferDescription("d_angles", self._dimrecy, np.float32, mf.READ_ONLY),
-            BufferDescription("d_beginPos", self._dimrecy * 2, np.int32, mf.READ_ONLY),
-            BufferDescription("d_strideJoseph", self._dimrecy * 2, np.int32, mf.READ_ONLY),
-            BufferDescription("d_strideLine", self._dimrecy * 2, np.int32, mf.READ_ONLY),
-        ]
-        d_axis_corrections = parray.empty(self.queue, self.nprojs, np.float32)
-        d_axis_corrections.fill(np.float32(0.0))
-        self.add_to_cl_mem(
-            {
-                "d_axis_corrections": d_axis_corrections
-            }
-        )
-        self._tmp_extended_img = np.zeros((self.shape[0] + 2, self.shape[1] + 2),
-                                          dtype=np.float32)
-        if not(self._use_textures):
-            self.allocate_slice()
-        else:
-            self.allocate_textures()
-        self.allocate_buffers()
-        self._ex_sino = np.zeros((self._dimrecy, self._dimrecx),
-                                 dtype=np.float32)
-        if not(self._use_textures):
-            self.cl_mem["d_slice"].fill(0.)
-            # enqueue_fill_buffer has issues if opencl 1.2 is not present
-            # ~ pyopencl.enqueue_fill_buffer(
-                # ~ self.queue,
-                # ~ self.cl_mem["d_slice"],
-                # ~ np.float32(0),
-                # ~ 0,
-                # ~ self._tmp_extended_img.size * _sizeof(np.float32)
-            # ~ )
-        # Precomputations
-        self.compute_angles()
-        self.proj_precomputations()
-        self.cl_mem["d_axis_corrections"].fill(0.)
-        # enqueue_fill_buffer has issues if opencl 1.2 is not present
-        # ~ pyopencl.enqueue_fill_buffer(
-                                    # ~ self.queue,
-                                    # ~ self.cl_mem["d_axis_corrections"],
-                                    # ~ np.float32(0),
-                                    # ~ 0,
-                                    # ~ self.nprojs*_sizeof(np.float32)
-                                    # ~ )
-        # Shorthands
-        self._d_sino = self.cl_mem["_d_sino"]
-
-        compile_options = None
-        if not(self._use_textures):
-            compile_options = "-DDONT_USE_TEXTURES"
-        OpenclProcessing.compile_kernels(
-            self,
-            self.kernel_files,
-            compile_options=compile_options
-        )
-        # check that workgroup can actually be (16, 16)
-        self.compiletime_workgroup_size = self.kernels.max_workgroup_size("forward_kernel_cpu")
-
-    def compute_angles(self):
-        angles2 = np.zeros(self._dimrecy, dtype=np.float32)  # dimrecy != num_projs
-        angles2[:self.nprojs] = np.copy(self.angles)
-        angles2[self.nprojs:] = angles2[self.nprojs - 1]
-        self.angles2 = angles2
-        pyopencl.enqueue_copy(self.queue, self.cl_mem["d_angles"], angles2)
-
-    def allocate_slice(self):
-        ary = parray.empty(self.queue, (self.shape[1] + 2, self.shape[1] + 2), np.float32)
-        ary.fill(0)
-        self.add_to_cl_mem({"d_slice": ary})
-
-    def allocate_textures(self):
-        self.d_image_tex = pyopencl.Image(
-                self.ctx,
-                mf.READ_ONLY | mf.USE_HOST_PTR,
-                pyopencl.ImageFormat(
-                    pyopencl.channel_order.INTENSITY,
-                    pyopencl.channel_type.FLOAT
-                ), hostbuf=np.ascontiguousarray(self._tmp_extended_img.T),
-            )
-
-    def transfer_to_texture(self, image):
-        image2 = image
-        if not(image.flags["C_CONTIGUOUS"] and image.dtype == np.float32):
-            image2 = np.ascontiguousarray(image)
-        if not(self._use_textures):
-            # TODO: create NoneEvent
-            return self.transfer_to_slice(image2)
-            # ~ return pyopencl.enqueue_copy(
-                        # ~ self.queue,
-                        # ~ self.cl_mem["d_slice"].data,
-                        # ~ image2,
-                        # ~ origin=(1, 1),
-                        # ~ region=image.shape[::-1]
-                        # ~ )
-        else:
-            return pyopencl.enqueue_copy(
-                       self.queue,
-                       self.d_image_tex,
-                       image2,
-                       origin=(1, 1),
-                       region=image.shape[::-1]
-                   )
-
-    def transfer_device_to_texture(self, d_image):
-        if not(self._use_textures):
-            # TODO this copy should not be necessary
-            return self.cpy2d_to_slice(d_image)
-        else:
-            return pyopencl.enqueue_copy(
-                self.queue,
-                self.d_image_tex,
-                d_image,
-                offset=0,
-                origin=(1, 1),
-                region=(int(self.shape[1]), int(self.shape[0]))  # self.shape[::-1] # pyopencl <= 2015.2
-            )
-
-    def transfer_to_slice(self, image):
-        image2 = np.zeros((image.shape[0] + 2, image.shape[1] + 2), dtype=np.float32)
-        image2[1:-1, 1:-1] = image.astype(np.float32)
-        self.cl_mem["d_slice"].set(image2)
-
-    def proj_precomputations(self):
-        beginPos = np.zeros((2, self._dimrecy), dtype=np.int32)
-        strideJoseph = np.zeros((2, self._dimrecy), dtype=np.int32)
-        strideLine = np.zeros((2, self._dimrecy), dtype=np.int32)
-        cos_angles = np.cos(self.angles2)
-        sin_angles = np.sin(self.angles2)
-        dimslice = self.shape[1]
-
-        M1 = np.abs(cos_angles) > 0.70710678
-        M1b = np.logical_not(M1)
-        M2 = cos_angles > 0
-        M2b = np.logical_not(M2)
-        M3 = sin_angles > 0
-        M3b = np.logical_not(M3)
-        case1 = M1 * M2
-        case2 = M1 * M2b
-        case3 = M1b * M3
-        case4 = M1b * M3b
-
-        beginPos[0][case1] = 0
-        beginPos[1][case1] = 0
-        strideJoseph[0][case1] = 1
-        strideJoseph[1][case1] = 0
-        strideLine[0][case1] = 0
-        strideLine[1][case1] = 1
-
-        beginPos[0][case2] = dimslice - 1
-        beginPos[1][case2] = dimslice - 1
-        strideJoseph[0][case2] = -1
-        strideJoseph[1][case2] = 0
-        strideLine[0][case2] = 0
-        strideLine[1][case2] = -1
-
-        beginPos[0][case3] = dimslice - 1
-        beginPos[1][case3] = 0
-        strideJoseph[0][case3] = 0
-        strideJoseph[1][case3] = 1
-        strideLine[0][case3] = -1
-        strideLine[1][case3] = 0
-
-        beginPos[0][case4] = 0
-        beginPos[1][case4] = dimslice - 1
-        strideJoseph[0][case4] = 0
-        strideJoseph[1][case4] = -1
-        strideLine[0][case4] = 1
-        strideLine[1][case4] = 0
-
-        # For debug purpose
-        # ~ self.beginPos = beginPos
-        # ~ self.strideJoseph = strideJoseph
-        # ~ self.strideLine = strideLine
-        #
-
-        pyopencl.enqueue_copy(self.queue, self.cl_mem["d_beginPos"], beginPos)
-        pyopencl.enqueue_copy(self.queue, self.cl_mem["d_strideJoseph"], strideJoseph)
-        pyopencl.enqueue_copy(self.queue, self.cl_mem["d_strideLine"], strideLine)
-
-    def _get_local_mem(self):
-        return pyopencl.LocalMemory(self.local_mem)  # constant for all image sizes
-
-    def cpy2d_to_sino(self, dst):
-        ndrange = (int(self.dwidth), int(self.nprojs))  # pyopencl < 2015.2
-        sino_shape_ocl = np.int32(ndrange)
-        wg = None
-        kernel_args = (
-            dst.data,
-            self._d_sino,
-            np.int32(self.dwidth),
-            np.int32(self._dimrecx),
-            np.int32((0, 0)),
-            np.int32((0, 0)),
-            sino_shape_ocl
-        )
-        return self.kernels.cpy2d(self.queue, ndrange, wg, *kernel_args)
-
-    def cpy2d_to_slice(self, src):
-        """
-        copy a Nx * Ny slice to self.d_slice which is (Nx+2)*(Ny+2)
-        """
-        ndrange = (int(self.shape[1]), int(self.shape[0]))  # self.shape[::-1] # pyopencl < 2015.2
-        wg = None
-        slice_shape_ocl = np.int32(ndrange)
-        kernel_args = (
-            self.cl_mem["d_slice"].data,
-            src,
-            np.int32(self.shape[1] + 2),
-            np.int32(self.shape[1]),
-            np.int32((1, 1)),
-            np.int32((0, 0)),
-            slice_shape_ocl
-        )
-        return self.kernels.cpy2d(self.queue, ndrange, wg, *kernel_args)
-
-    def projection(self, image=None, dst=None):
-        """Perform the projection on an input image
-
-        :param image: Image to project
-        :return: A sinogram
-        """
-        events = []
-        with self.sem:
-            if image is not None:
-                assert image.ndim == 2, "Treat only 2D images"
-                assert image.shape[0] == self.shape[0], "image shape is OK"
-                assert image.shape[1] == self.shape[1], "image shape is OK"
-                if self._use_textures:
-                    self.transfer_to_texture(image)
-                    slice_ref = self.d_image_tex
-                else:
-                    self.transfer_to_slice(image)
-                    slice_ref = self.cl_mem["d_slice"].data
-            else:
-                if not(self._use_textures):
-                    slice_ref = self.cl_mem["d_slice"].data
-                else:
-                    slice_ref = self.d_image_tex
-
-            kernel_args = (
-                self._d_sino,
-                slice_ref,
-                np.int32(self.shape[1]),
-                np.int32(self.dwidth),
-                self.cl_mem["d_angles"],
-                np.float32(self.axis_pos0),
-                self.cl_mem["d_axis_corrections"].data,  # TODO custom
-                self.cl_mem["d_beginPos"],
-                self.cl_mem["d_strideJoseph"],
-                self.cl_mem["d_strideLine"],
-                np.int32(self.nprojs),
-                self._dimrecx,
-                self._dimrecy,
-                self.offset_x,
-                self.offset_y,
-                np.int32(1),  # josephnoclip, 1 by default
-                np.int32(self.normalize)
-            )
-
-            # Call the kernel
-            if not(self._use_textures):
-                event_pj = self.kernels.forward_kernel_cpu(
-                    self.queue,
-                    self.ndrange,
-                    self.wg,
-                    *kernel_args
-                )
-            else:
-                event_pj = self.kernels.forward_kernel(
-                    self.queue,
-                    self.ndrange,
-                    self.wg,
-                    *kernel_args
-                )
-            events.append(EventDescription("projection", event_pj))
-            if dst is None:
-                self._ex_sino[:] = 0
-                ev = pyopencl.enqueue_copy(self.queue, self._ex_sino, self._d_sino)
-                events.append(EventDescription("copy D->H result", ev))
-                ev.wait()
-                res = np.copy(self._ex_sino[:self.nprojs, :self.dwidth])
-            else:
-                ev = self.cpy2d_to_sino(dst)
-                events.append(EventDescription("copy D->D result", ev))
-                ev.wait()
-                res = dst
-        # /with self.sem
-        if self.profile:
-            self.events += events
-        # ~ res = self._ex_sino
-        return res
-
-    __call__ = projection
diff --git a/silx/opencl/reconstruction.py b/silx/opencl/reconstruction.py
deleted file mode 100644
index 2c84aee..0000000
--- a/silx/opencl/reconstruction.py
+++ /dev/null
@@ -1,388 +0,0 @@
-#!/usr/bin/env python
-# coding: utf-8
-# /*##########################################################################
-#
-# Copyright (c) 2016 European Synchrotron Radiation Facility
-#
-# Permission is hereby granted, free of charge, to any person obtaining a copy
-# of this software and associated documentation files (the "Software"), to deal
-# in the Software without restriction, including without limitation the rights
-# to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
-# copies of the Software, and to permit persons to whom the Software is
-# furnished to do so, subject to the following conditions:
-#
-# The above copyright notice and this permission notice shall be included in
-# all copies or substantial portions of the Software.
-#
-# THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
-# IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
-# FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
-# AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
-# LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
-# OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN
-# THE SOFTWARE.
-#
-# ###########################################################################*/
-"""Module for tomographic reconstruction algorithms"""
-
-from __future__ import absolute_import, print_function, with_statement, division
-
-__authors__ = ["P. Paleo"]
-__license__ = "MIT"
-__date__ = "01/08/2019"
-
-import logging
-import numpy as np
-
-from .common import pyopencl
-from .processing import OpenclProcessing
-from .backprojection import Backprojection
-from .projection import Projection
-from .linalg import LinAlg
-
-import pyopencl.array as parray
-from pyopencl.elementwise import ElementwiseKernel
-logger = logging.getLogger(__name__)
-
-cl = pyopencl
-
-
-class ReconstructionAlgorithm(OpenclProcessing):
-    """
-    A parent class for all iterative tomographic reconstruction algorithms
-
-    :param sino_shape: shape of the sinogram. The sinogram is in the format
-                       (n_b, n_a) where n_b is the number of detector bins and
-                       n_a is the number of angles.
-    :param slice_shape: Optional, shape of the reconstructed slice.
-                        By default, it is a square slice where the dimension
-                        is the "x dimension" of the sinogram (number of bins).
-    :param axis_position: Optional, axis position. Default is `(shape[1]-1)/2.0`.
-    :param angles: Optional, a list of custom angles in radian.
-    :param ctx: actual working context, left to None for automatic
-                initialization from device type or platformid/deviceid
-    :param devicetype: type of device, can be "CPU", "GPU", "ACC" or "ALL"
-    :param platformid: integer with the platform_identifier, as given by clinfo
-    :param deviceid: Integer with the device identifier, as given by clinfo
-    :param profile: switch on profiling to be able to profile at the kernel level,
-                    store profiling elements (makes code slightly slower)
-    """
-
-    def __init__(self, sino_shape, slice_shape=None, axis_position=None, angles=None,
-                 ctx=None, devicetype="all", platformid=None, deviceid=None,
-                 profile=False
-                 ):
-        OpenclProcessing.__init__(self, ctx=ctx, devicetype=devicetype,
-                                  platformid=platformid, deviceid=deviceid,
-                                  profile=profile)
-
-        # Create a backprojector
-        self.backprojector = Backprojection(
-            sino_shape,
-            slice_shape=slice_shape,
-            axis_position=axis_position,
-            angles=angles,
-            ctx=self.ctx,
-            profile=profile
-        )
-        # Create a projector
-        self.projector = Projection(
-            self.backprojector.slice_shape,
-            self.backprojector.angles,
-            axis_position=axis_position,
-            detector_width=self.backprojector.num_bins,
-            normalize=False,
-            ctx=self.ctx,
-            profile=profile
-        )
-        self.sino_shape = sino_shape
-        self.is_cpu = self.backprojector.is_cpu
-        # Arrays
-        self.d_data = parray.empty(self.queue, sino_shape, dtype=np.float32)
-        self.d_data.fill(0.0)
-        self.d_sino = parray.empty_like(self.d_data)
-        self.d_sino.fill(0.0)
-        self.d_x = parray.empty(self.queue,
-                                self.backprojector.slice_shape,
-                                dtype=np.float32)
-        self.d_x.fill(0.0)
-        self.d_x_old = parray.empty_like(self.d_x)
-        self.d_x_old.fill(0.0)
-
-        self.add_to_cl_mem({
-                            "d_data": self.d_data,
-                            "d_sino": self.d_sino,
-                            "d_x": self.d_x,
-                            "d_x_old": self.d_x_old,
-                            })
-
-    def proj(self, d_slice, d_sino):
-        """
-        Project d_slice to d_sino
-        """
-        self.projector.transfer_device_to_texture(d_slice.data)  #.wait()
-        self.projector.projection(dst=d_sino)
-
-    def backproj(self, d_sino, d_slice):
-        """
-        Backproject d_sino to d_slice
-        """
-        self.backprojector.transfer_device_to_texture(d_sino.data)  #.wait()
-        self.backprojector.backprojection(dst=d_slice)
-
-
-class SIRT(ReconstructionAlgorithm):
-    """
-    A class for the SIRT algorithm
-
-    :param sino_shape: shape of the sinogram. The sinogram is in the format
-                       (n_b, n_a) where n_b is the number of detector bins and
-                       n_a is the number of angles.
-    :param slice_shape: Optional, shape of the reconstructed slice.
-                        By default, it is a square slice where the dimension is
-                        the "x dimension" of the sinogram (number of bins).
-    :param axis_position: Optional, axis position. Default is `(shape[1]-1)/2.0`.
-    :param angles: Optional, a list of custom angles in radian.
-    :param ctx: actual working context, left to None for automatic
-                initialization from device type or platformid/deviceid
-    :param devicetype: type of device, can be "CPU", "GPU", "ACC" or "ALL"
-    :param platformid: integer with the platform_identifier, as given by clinfo
-    :param deviceid: Integer with the device identifier, as given by clinfo
-    :param profile: switch on profiling to be able to profile at the kernel level,
-                    store profiling elements (makes code slightly slower)
-
-    .. warning:: This is a beta version of the SIRT algorithm. Reconstruction
-            fails for at least on CPU (Xeon E3-1245 v5) using the AMD opencl
-            implementation.
-    """
-
-    def __init__(self, sino_shape, slice_shape=None, axis_position=None, angles=None,
-                 ctx=None, devicetype="all", platformid=None, deviceid=None,
-                 profile=False
-                 ):
-
-        ReconstructionAlgorithm.__init__(self, sino_shape, slice_shape=slice_shape,
-                                         axis_position=axis_position, angles=angles,
-                                         ctx=ctx, devicetype=devicetype, platformid=platformid,
-                                         deviceid=deviceid, profile=profile)
-        self.compute_preconditioners()
-
-    def compute_preconditioners(self):
-        """
-        Create a diagonal preconditioner for the projection and backprojection
-        operator.
-        Each term of the diagonal is the sum of the projector/backprojector
-        along rows [1], i.e the projection/backprojection of an array of ones.
-
-        [1] Jens Gregor and Thomas Benson,
-            Computational Analysis and Improvement of SIRT,
-            IEEE transactions on medical imaging, vol. 27, no. 7,  2008
-        """
-
-        # r_{i,i} = 1/(sum_j a_{i,j})
-        slice_ones = np.ones(self.backprojector.slice_shape, dtype=np.float32)
-        R = 1./self.projector.projection(slice_ones)  # could be all done on GPU, but I want extra checks
-        R[np.logical_not(np.isfinite(R))] = 1.  # In the case where the rotation axis is excentred
-        self.d_R = parray.to_device(self.queue, R)
-        # c_{j,j} = 1/(sum_i a_{i,j})
-        sino_ones = np.ones(self.sino_shape, dtype=np.float32)
-        C = 1./self.backprojector.backprojection(sino_ones)
-        C[np.logical_not(np.isfinite(C))] = 1.  # In the case where the rotation axis is excentred
-        self.d_C = parray.to_device(self.queue, C)
-
-        self.add_to_cl_mem({
-            "d_R": self.d_R,
-            "d_C": self.d_C
-        })
-
-    # TODO: compute and possibly return the residual
-    def run(self, data, n_it):
-        """
-        Run n_it iterations of the SIRT algorithm.
-        """
-        cl.enqueue_copy(self.queue, self.d_data.data, np.ascontiguousarray(data.astype(np.float32)))
-
-        d_x_old = self.d_x_old
-        d_x = self.d_x
-        d_R = self.d_R
-        d_C = self.d_C
-        d_sino = self.d_sino
-        d_x *= 0
-
-        for k in range(n_it):
-            d_x_old[:] = d_x[:]
-            # x{k+1} = x{k} - C A^T R (A x{k} - b)
-            self.proj(d_x, d_sino)
-            d_sino -= self.d_data
-            d_sino *= d_R
-            if self.is_cpu:
-                # This sync is necessary when using CPU, while it is not for GPU
-                d_sino.finish()
-            self.backproj(d_sino, d_x)
-            d_x *= -d_C
-            d_x += d_x_old
-            if self.is_cpu:
-                # This sync is necessary when using CPU, while it is not for GPU
-                d_x.finish()
-
-        return d_x
-
-    __call__ = run
-
-
-class TV(ReconstructionAlgorithm):
-    """
-    A class for reconstruction with Total Variation regularization using the
-    Chambolle-Pock TV reconstruction algorithm.
-
-    :param sino_shape: shape of the sinogram. The sinogram is in the format
-                       (n_b, n_a) where n_b is the number of detector bins and
-                       n_a is the number of angles.
-    :param slice_shape: Optional, shape of the reconstructed slice. By default,
-                        it is a square slice where the dimension is the
-                        "x dimension" of the sinogram (number of bins).
-    :param axis_position: Optional, axis position. Default is
-                          `(shape[1]-1)/2.0`.
-    :param angles: Optional, a list of custom angles in radian.
-    :param ctx: actual working context, left to None for automatic
-                initialization from device type or platformid/deviceid
-    :param devicetype: type of device, can be "CPU", "GPU", "ACC" or "ALL"
-    :param platformid: integer with the platform_identifier, as given by clinfo
-    :param deviceid: Integer with the device identifier, as given by clinfo
-    :param profile: switch on profiling to be able to profile at the kernel
-                    level, store profiling elements (makes code slightly slower)
-
-    .. warning:: This is a beta version of the Chambolle-Pock TV algorithm.
-            Reconstruction fails for at least on CPU (Xeon E3-1245 v5) using
-            the AMD opencl implementation.
-    """
-
-    def __init__(self, sino_shape, slice_shape=None, axis_position=None, angles=None,
-                 ctx=None, devicetype="all", platformid=None, deviceid=None,
-                 profile=False
-                 ):
-        ReconstructionAlgorithm.__init__(self, sino_shape, slice_shape=slice_shape,
-                                         axis_position=axis_position, angles=angles,
-                                         ctx=ctx, devicetype=devicetype, platformid=platformid,
-                                         deviceid=deviceid, profile=profile)
-        self.compute_preconditioners()
-
-        # Create a LinAlg instance
-        self.linalg = LinAlg(self.backprojector.slice_shape, ctx=self.ctx)
-        # Positivity constraint
-        self.elwise_clamp = ElementwiseKernel(self.ctx, "float *a", "a[i] = max(a[i], 0.0f);")
-        # Projection onto the L-infinity ball of radius Lambda
-        self.elwise_proj_linf = ElementwiseKernel(
-            self.ctx,
-            "float2* a, float Lambda",
-            "a[i].x = copysign(min(fabs(a[i].x), Lambda), a[i].x); a[i].y = copysign(min(fabs(a[i].y), Lambda), a[i].y);",
-            "elwise_proj_linf"
-        )
-        # Additional arrays
-        self.linalg.gradient(self.d_x)
-        self.d_p = parray.empty_like(self.linalg.cl_mem["d_gradient"])
-        self.d_q = parray.empty_like(self.d_data)
-        self.d_g = self.linalg.d_image
-        self.d_tmp = parray.empty_like(self.d_x)
-        self.d_p.fill(0)
-        self.d_q.fill(0)
-        self.d_tmp.fill(0)
-        self.add_to_cl_mem({
-            "d_p": self.d_p,
-            "d_q": self.d_q,
-            "d_tmp": self.d_tmp,
-        })
-
-        self.theta = 1.0
-
-    def compute_preconditioners(self):
-        """
-        Create a diagonal preconditioner for the projection and backprojection
-        operator.
-        Each term of the diagonal is the sum of the projector/backprojector
-        along rows [2],
-        i.e the projection/backprojection of an array of ones.
-
-        [2] T. Pock, A. Chambolle,
-            Diagonal preconditioning for first order primal-dual algorithms in
-            convex optimization,
-            International Conference on Computer Vision, 2011
-        """
-
-        # Compute the diagonal preconditioner "Sigma"
-        slice_ones = np.ones(self.backprojector.slice_shape, dtype=np.float32)
-        Sigma_k = 1./self.projector.projection(slice_ones)
-        Sigma_k[np.logical_not(np.isfinite(Sigma_k))] = 1.
-        self.d_Sigma_k = parray.to_device(self.queue, Sigma_k)
-        self.d_Sigma_kp1 = self.d_Sigma_k + 1  # TODO: memory vs computation
-        self.Sigma_grad = 1/2.0  # For discrete gradient, sum|D_i,j| = 2 along lines or cols
-
-        # Compute the diagonal preconditioner "Tau"
-        sino_ones = np.ones(self.sino_shape, dtype=np.float32)
-        C = self.backprojector.backprojection(sino_ones)
-        Tau = 1./(C + 2.)
-        self.d_Tau = parray.to_device(self.queue, Tau)
-
-        self.add_to_cl_mem({
-            "d_Sigma_k": self.d_Sigma_k,
-            "d_Sigma_kp1": self.d_Sigma_kp1,
-            "d_Tau": self.d_Tau
-        })
-
-    def run(self, data, n_it, Lambda, pos_constraint=False):
-        """
-        Run n_it iterations of the TV-regularized reconstruction,
-        with the regularization parameter Lambda.
-        """
-        cl.enqueue_copy(self.queue, self.d_data.data, np.ascontiguousarray(data.astype(np.float32)))
-
-        d_x = self.d_x
-        d_x_old = self.d_x_old
-        d_tmp = self.d_tmp
-        d_sino = self.d_sino
-        d_p = self.d_p
-        d_q = self.d_q
-        d_g = self.d_g
-
-        d_x *= 0
-        d_p *= 0
-        d_q *= 0
-
-        for k in range(0, n_it):
-            # Update primal variables
-            d_x_old[:] = d_x[:]
-            #~ x = x + Tau*div(p) - Tau*Kadj(q)
-            self.backproj(d_q, d_tmp)
-            self.linalg.divergence(d_p)
-            # TODO: this in less than three ops (one kernel ?)
-            d_g -= d_tmp  # d_g -> L.d_image
-            d_g *= self.d_Tau
-            d_x += d_g
-
-            if pos_constraint:
-                self.elwise_clamp(d_x)
-
-            # Update dual variables
-            #~ p = proj_linf(p + Sigma_grad*gradient(x + theta*(x - x_old)), Lambda)
-            d_tmp[:] = d_x[:]
-            # FIXME: mul_add is out of place, put an equivalent thing in linalg...
-            #~ d_tmp.mul_add(1 + theta, d_x_old, -theta)
-            d_tmp *= 1+self.theta
-            d_tmp -= self.theta*d_x_old
-            self.linalg.gradient(d_tmp)
-            # TODO: out of place mul_add
-            #~ d_p.mul_add(1, L.cl_mem["d_gradient"], Sigma_grad)
-            self.linalg.cl_mem["d_gradient"] *= self.Sigma_grad
-            d_p += self.linalg.cl_mem["d_gradient"]
-            self.elwise_proj_linf(d_p, Lambda)
-
-            #~ q = (q + Sigma_k*K(x + theta*(x - x_old)) - Sigma_k*data)/(1.0 + Sigma_k)
-            self.proj(d_tmp, d_sino)
-            # TODO: this in less instructions
-            d_sino -= self.d_data
-            d_sino *= self.d_Sigma_k
-            d_q += d_sino
-            d_q /= self.d_Sigma_kp1
-        return d_x
-
-    __call__ = run
diff --git a/silx/opencl/setup.py b/silx/opencl/setup.py
deleted file mode 100644
index 10fb1be..0000000
--- a/silx/opencl/setup.py
+++ /dev/null
@@ -1,48 +0,0 @@
-# coding: utf-8
-#
-# Copyright (C) 2016-2017 European Synchrotron Radiation Facility
-#
-# Permission is hereby granted, free of charge, to any person obtaining a copy
-# of this software and associated documentation files (the "Software"), to deal
-# in the Software without restriction, including without limitation the rights
-# to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
-# copies of the Software, and to permit persons to whom the Software is
-# furnished to do so, subject to the following conditions:
-#
-# The above copyright notice and this permission notice shall be included in
-# all copies or substantial portions of the Software.
-#
-# THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
-# IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
-# FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
-# AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
-# LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
-# OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN
-# THE SOFTWARE.
-#
-
-from __future__ import division
-
-__contact__ = "jerome.kieffer@esrf.eu"
-__license__ = "MIT"
-__copyright__ = "European Synchrotron Radiation Facility, Grenoble, France"
-__authors__ = ["J. Kieffer"]
-__date__ = "16/10/2017"
-
-import os.path
-from numpy.distutils.misc_util import Configuration
-
-
-def configuration(parent_package='', top_path=None):
-    config = Configuration('opencl', parent_package, top_path)
-    path = os.path.dirname(os.path.abspath(__file__))
-    if os.path.exists(os.path.join(path, 'sift')):
-        config.add_subpackage('sift')
-    config.add_subpackage('codec')
-    config.add_subpackage('test')
-    return config
-
-
-if __name__ == "__main__":
-    from numpy.distutils.core import setup
-    setup(configuration=configuration)
diff --git a/silx/opencl/sinofilter.py b/silx/opencl/sinofilter.py
deleted file mode 100644
index d608744..0000000
--- a/silx/opencl/sinofilter.py
+++ /dev/null
@@ -1,435 +0,0 @@
-#!/usr/bin/env python
-# coding: utf-8
-# /*##########################################################################
-#
-# Copyright (c) 2016-2019 European Synchrotron Radiation Facility
-#
-# Permission is hereby granted, free of charge, to any person obtaining a copy
-# of this software and associated documentation files (the "Software"), to deal
-# in the Software without restriction, including without limitation the rights
-# to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
-# copies of the Software, and to permit persons to whom the Software is
-# furnished to do so, subject to the following conditions:
-#
-# The above copyright notice and this permission notice shall be included in
-# all copies or substantial portions of the Software.
-#
-# THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
-# IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
-# FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
-# AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
-# LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
-# OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN
-# THE SOFTWARE.
-#
-# ###########################################################################*/
-"""Module for sinogram filtering on CPU/GPU."""
-
-from __future__ import absolute_import, print_function, with_statement, division
-
-__authors__ = ["P. Paleo"]
-__license__ = "MIT"
-__date__ = "07/06/2019"
-
-import numpy as np
-from math import pi
-
-
-import pyopencl.array as parray
-from .common import pyopencl as cl
-from .processing import OpenclProcessing
-from ..math.fft.clfft import CLFFT, __have_clfft__
-from ..math.fft.npfft import NPFFT
-from ..image.tomography import generate_powers, get_next_power, compute_fourier_filter
-from ..utils.deprecation import deprecated
-
-
-
-class SinoFilter(OpenclProcessing):
-    """A class for performing sinogram filtering on GPU using OpenCL.
-
-    This is a convolution in the Fourier space, along one dimension:
-
-    - In 2D: (n_a, d_x): n_a filterings (1D FFT of size d_x)
-    - In 3D: (n_z, n_a, d_x): n_z*n_a filterings (1D FFT of size d_x)
-    """
-    kernel_files = ["array_utils.cl"]
-    powers = generate_powers()
-
-    def __init__(self, sino_shape, filter_name=None, ctx=None,
-                 devicetype="all", platformid=None, deviceid=None,
-                 profile=False, extra_options=None):
-        """Constructor of OpenCL FFT-Convolve.
-
-        :param sino_shape: shape of the sinogram.
-        :param filter_name: Name of the filter. Defaut is "ram-lak".
-        :param ctx: actual working context, left to None for automatic
-                    initialization from device type or platformid/deviceid
-        :param devicetype: type of device, can be "CPU", "GPU", "ACC" or "ALL"
-        :param platformid: integer with the platform_identifier, as given by
-                           clinfo
-        :param deviceid: Integer with the device identifier, as given by clinfo
-        :param profile: switch on profiling to be able to profile at the kernel
-                        level, store profiling elements (makes code slightly
-                        slower)
-        :param dict extra_options: Advanced extra options.
-            Current options are: cutoff, use_numpy_fft
-        """
-        OpenclProcessing.__init__(self, ctx=ctx, devicetype=devicetype,
-                                  platformid=platformid, deviceid=deviceid,
-                                  profile=profile)
-
-        self._init_extra_options(extra_options)
-        self._calculate_shapes(sino_shape)
-        self._init_fft()
-        self._allocate_memory()
-        self._compute_filter(filter_name)
-        self._init_kernels()
-
-    def _calculate_shapes(self, sino_shape):
-        """
-
-        :param sino_shape: shape of the sinogram.
-        """
-        self.ndim = len(sino_shape)
-        if self.ndim == 2:
-            n_angles, dwidth = sino_shape
-        else:
-            raise ValueError("Invalid sinogram number of dimensions: "
-                             "expected 2 dimensions")
-        self.sino_shape = sino_shape
-        self.n_angles = n_angles
-        self.dwidth = dwidth
-        self.dwidth_padded = get_next_power(2 * self.dwidth, powers=self.powers)
-        self.sino_padded_shape = (n_angles, self.dwidth_padded)
-        sino_f_shape = list(self.sino_padded_shape)
-        sino_f_shape[-1] = sino_f_shape[-1] // 2 + 1
-        self.sino_f_shape = tuple(sino_f_shape)
-
-    def _init_extra_options(self, extra_options):
-        """
-
-        :param dict extra_options: Advanced extra options.
-            Current options are: cutoff,
-        """
-        self.extra_options = {
-            "cutoff": 1.,
-            "use_numpy_fft": False,
-        }
-        if extra_options is not None:
-            self.extra_options.update(extra_options)
-
-    def _init_fft(self):
-        if __have_clfft__ and not(self.extra_options["use_numpy_fft"]):
-            self.fft_backend = "opencl"
-            self.fft = CLFFT(
-                self.sino_padded_shape,
-                dtype=np.float32,
-                axes=(-1,),
-                ctx=self.ctx,
-            )
-        else:
-            self.fft_backend = "numpy"
-            print("The gpyfft module was not found. The Fourier transforms "
-                  "will be done on CPU. For more performances, it is advised "
-                  "to install gpyfft.""")
-            self.fft = NPFFT(
-                template=np.zeros(self.sino_padded_shape, "f"),
-                axes=(-1,),
-            )
-
-    def _allocate_memory(self):
-        self.d_filter_f = parray.zeros(self.queue, (self.sino_f_shape[-1],), np.complex64)
-        self.is_cpu = (self.device.type == "CPU")
-        # These are already allocated by FFT() if using the opencl backend
-        if self.fft_backend == "opencl":
-            self.d_sino_padded = self.fft.data_in
-            self.d_sino_f = self.fft.data_out
-        else:
-            # When using the numpy backend, arrays are not pre-allocated
-            self.d_sino_padded = np.zeros(self.sino_padded_shape, "f")
-            self.d_sino_f = np.zeros(self.sino_f_shape, np.complex64)
-        # These are needed for rectangular memcpy in certain cases (see below).
-        self.tmp_sino_device = parray.zeros(self.queue, self.sino_shape, "f")
-        self.tmp_sino_host = np.zeros(self.sino_shape, "f")
-
-    def _compute_filter(self, filter_name):
-        """
-
-        :param str filter_name: filter name
-        """
-        self.filter_name = filter_name or "ram-lak"
-        filter_f = compute_fourier_filter(
-            self.dwidth_padded,
-            self.filter_name,
-            cutoff=self.extra_options["cutoff"],
-        )[:self.dwidth_padded // 2 + 1]  # R2C
-        self.set_filter(filter_f, normalize=True)
-
-    def set_filter(self, h_filt, normalize=True):
-        """
-        Set a filter for sinogram filtering.
-
-        :param h_filt: Filter. Each line of the sinogram will be filtered with
-            this filter. It has to be the Real-to-Complex Fourier Transform
-            of some real filter, padded to 2*sinogram_width.
-        :param normalize: Whether to normalize the filter with pi/num_angles.
-        """
-        if h_filt.size != self.sino_f_shape[-1]:
-            raise ValueError(
-                """
-                Invalid filter size: expected %d, got %d.
-                Please check that the filter is the Fourier R2C transform of
-                some real 1D filter.
-                """
-                % (self.sino_f_shape[-1], h_filt.size)
-            )
-        if not(np.iscomplexobj(h_filt)):
-            print("Warning: expected a complex Fourier filter")
-        self.filter_f = h_filt
-        if normalize:
-            self.filter_f *= pi / self.n_angles
-        self.filter_f = self.filter_f.astype(np.complex64)
-        self.d_filter_f[:] = self.filter_f[:]
-
-    def _init_kernels(self):
-        OpenclProcessing.compile_kernels(self, self.kernel_files)
-        h, w = self.d_sino_f.shape
-        self.mult_kern_args = (self.queue, (int(w), (int(h))), None,
-                               self.d_sino_f.data,
-                               self.d_filter_f.data,
-                               np.int32(w),
-                               np.int32(h))
-
-    def check_array(self, arr):
-        if arr.dtype != np.float32:
-            raise ValueError("Expected data type = numpy.float32")
-        if arr.shape != self.sino_shape:
-            raise ValueError("Expected sinogram shape %s, got %s" %
-                             (self.sino_shape, arr.shape))
-        if not(isinstance(arr, np.ndarray) or isinstance(arr, parray.Array)):
-            raise ValueError("Expected either numpy.ndarray or "
-                             "pyopencl.array.Array")
-
-    def copy2d(self, dst, src, transfer_shape, dst_offset=(0, 0),
-               src_offset=(0, 0)):
-        """
-
-        :param dst:
-        :param src:
-        :param transfer_shape:
-        :param dst_offset:
-        :param src_offset:
-        """
-        shape = tuple(int(i) for i in transfer_shape[::-1])
-        ev = self.kernels.cpy2d(self.queue, shape, None,
-                                dst.data,
-                                src.data,
-                                np.int32(dst.shape[1]),
-                                np.int32(src.shape[1]),
-                                np.int32(dst_offset),
-                                np.int32(src_offset),
-                                np.int32(transfer_shape[::-1]))
-        ev.wait()
-
-    def copy2d_host(self, dst, src, transfer_shape, dst_offset=(0, 0),
-                    src_offset=(0, 0)):
-        """
-
-        :param dst:
-        :param src:
-        :param transfer_shape:
-        :param dst_offset:
-        :param src_offset:
-        """
-        s = transfer_shape
-        do = dst_offset
-        so = src_offset
-        dst[do[0]:do[0] + s[0], do[1]:do[1] + s[1]] = src[so[0]:so[0] + s[0], so[1]:so[1] + s[1]]
-
-    def _prepare_input_sino(self, sino):
-        """
-        :param sino: sinogram
-        """
-        self.check_array(sino)
-        self.d_sino_padded.fill(0)
-        if self.fft_backend == "opencl":
-            # OpenCL backend: FFT/mult/IFFT are done on device.
-            if isinstance(sino, np.ndarray):
-                # OpenCL backend + numpy input: copy H->D.
-                # As pyopencl does not support rectangular copies, we have to
-                # do a copy H->D in a temporary device buffer, and then call a
-                # kernel doing the rectangular D-D copy.
-                self.tmp_sino_device[:] = sino[:]
-                if self.is_cpu:
-                    self.tmp_sino_device.finish()
-                d_sino_ref = self.tmp_sino_device
-            else:
-                d_sino_ref = sino
-            # Rectangular copy D->D
-            self.copy2d(self.d_sino_padded, d_sino_ref, self.sino_shape)
-            if self.is_cpu:
-                self.d_sino_padded.finish()  # should not be required here
-        else:
-            # Numpy backend: FFT/mult/IFFT are done on host.
-            if not(isinstance(sino, np.ndarray)):
-                # Numpy backend + pyopencl input: need to copy D->H
-                self.tmp_sino_host[:] = sino[:]
-                h_sino_ref = self.tmp_sino_host
-            else:
-                h_sino_ref = sino
-            # Rectangular copy H->H
-            self.copy2d_host(self.d_sino_padded, h_sino_ref, self.sino_shape)
-
-    def _get_output_sino(self, output):
-        """
-        :param Union[numpy.dtype,None] output: sinogram output.
-        :return: sinogram
-        """
-        if output is None:
-            res = np.zeros(self.sino_shape, dtype=np.float32)
-        else:
-            res = output
-        if self.fft_backend == "opencl":
-            if isinstance(res, np.ndarray):
-                # OpenCL backend + numpy output: copy D->H
-                # As pyopencl does not support rectangular copies, we first have
-                # to call a kernel doing rectangular copy D->D, then do a copy
-                # D->H.
-                self.copy2d(dst=self.tmp_sino_device,
-                            src=self.d_sino_padded,
-                            transfer_shape=self.sino_shape)
-                if self.is_cpu:
-                    self.tmp_sino_device.finish()  # should not be required here
-                res[:] = self.tmp_sino_device.get()[:]
-            else:
-                if self.is_cpu:
-                    self.d_sino_padded.finish()
-                self.copy2d(res, self.d_sino_padded, self.sino_shape)
-                if self.is_cpu:
-                    res.finish()  # should not be required here
-        else:
-            if not(isinstance(res, np.ndarray)):
-                # Numpy backend + pyopencl output: rect copy H->H + copy H->D
-                self.copy2d_host(dst=self.tmp_sino_host,
-                                 src=self.d_sino_padded,
-                                 transfer_shape=self.sino_shape)
-                res[:] = self.tmp_sino_host[:]
-            else:
-                # Numpy backend + numpy output: rect copy H->H
-                self.copy2d_host(res, self.d_sino_padded, self.sino_shape)
-        return res
-
-    def _do_fft(self):
-        if self.fft_backend == "opencl":
-            self.fft.fft(self.d_sino_padded, output=self.d_sino_f)
-            if self.is_cpu:
-                self.d_sino_f.finish()
-        else:
-            # numpy backend does not support "output=" argument,
-            # and rfft always return a complex128 result.
-            res = self.fft.fft(self.d_sino_padded).astype(np.complex64)
-            self.d_sino_f[:] = res[:]
-
-    def _multiply_fourier(self):
-        if self.fft_backend == "opencl":
-            # Everything is on device. Call the multiplication kernel.
-            ev = self.kernels.inplace_complex_mul_2Dby1D(
-                *self.mult_kern_args
-            )
-            ev.wait()
-            if self.is_cpu:
-                self.d_sino_f.finish()  # should not be required here
-        else:
-            # Everything is on host.
-            self.d_sino_f *= self.filter_f
-
-    def _do_ifft(self):
-        if self.fft_backend == "opencl":
-            if self.is_cpu:
-                self.d_sino_padded.fill(0)
-                self.d_sino_padded.finish()
-            self.fft.ifft(self.d_sino_f, output=self.d_sino_padded)
-            if self.is_cpu:
-                self.d_sino_padded.finish()
-        else:
-            # numpy backend does not support "output=" argument,
-            # and irfft always return a float64 result.
-            res = self.fft.ifft(self.d_sino_f).astype("f")
-            self.d_sino_padded[:] = res[:]
-
-    def filter_sino(self, sino, output=None):
-        """
-
-        :param sino: sinogram
-        :param output:
-        :return: filtered sinogram
-        """
-        # Handle input sinogram
-        self._prepare_input_sino(sino)
-        # FFT
-        self._do_fft()
-        # multiply with filter in the Fourier domain
-        self._multiply_fourier()
-        # iFFT
-        self._do_ifft()
-        # return
-        res = self._get_output_sino(output)
-        return res
-        # ~ return output
-
-    __call__ = filter_sino
-
-
-
-
-# -------------------
-# - Compatibility  -
-# -------------------
-
-
-def nextpow2(N):
-    p = 1
-    while p < N:
-        p *= 2
-    return p
-
-
-@deprecated(replacement="Backprojection.sino_filter", since_version="0.10")
-def fourier_filter(sino, filter_=None, fft_size=None):
-    """Simple np based implementation of fourier space filter.
-    This function is deprecated, please use silx.opencl.sinofilter.SinoFilter.
-
-    :param sino: of shape shape = (num_projs, num_bins)
-    :param filter: filter function to apply in fourier space
-    :fft_size: size on which perform the fft. May be larger than the sino array
-    :return: filtered sinogram
-    """
-    assert sino.ndim == 2
-    num_projs, num_bins = sino.shape
-    if fft_size is None:
-        fft_size = nextpow2(num_bins * 2 - 1)
-    else:
-        assert fft_size >= num_bins
-    if fft_size == num_bins:
-        sino_zeropadded = sino.astype(np.float32)
-    else:
-        sino_zeropadded = np.zeros((num_projs, fft_size),
-                                      dtype=np.complex64)
-        sino_zeropadded[:, :num_bins] = sino.astype(np.float32)
-
-    if filter_ is None:
-        h = np.zeros(fft_size, dtype=np.float32)
-        L2 = fft_size // 2 + 1
-        h[0] = 1 / 4.
-        j = np.linspace(1, L2, L2 // 2, False)
-        h[1:L2:2] = -1. / (np.pi ** 2 * j ** 2)
-        h[L2:] = np.copy(h[1:L2 - 1][::-1])
-        filter_ = np.fft.fft(h).astype(np.complex64)
-
-    # Linear convolution
-    sino_f = np.fft.fft(sino, fft_size)
-    sino_f = sino_f * filter_
-    sino_filtered = np.fft.ifft(sino_f)[:, :num_bins].real
-
-    return np.ascontiguousarray(sino_filtered.real, dtype=np.float32)
diff --git a/silx/opencl/sparse.py b/silx/opencl/sparse.py
deleted file mode 100644
index 514589a..0000000
--- a/silx/opencl/sparse.py
+++ /dev/null
@@ -1,377 +0,0 @@
-#!/usr/bin/env python
-# coding: utf-8
-# /*##########################################################################
-#
-# Copyright (c) 2019 European Synchrotron Radiation Facility
-#
-# Permission is hereby granted, free of charge, to any person obtaining a copy
-# of this software and associated documentation files (the "Software"), to deal
-# in the Software without restriction, including without limitation the rights
-# to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
-# copies of the Software, and to permit persons to whom the Software is
-# furnished to do so, subject to the following conditions:
-#
-# The above copyright notice and this permission notice shall be included in
-# all copies or substantial portions of the Software.
-#
-# THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
-# IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
-# FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
-# AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
-# LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
-# OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN
-# THE SOFTWARE.
-#
-# ###########################################################################*/
-"""Module for data sparsification on CPU/GPU."""
-
-from __future__ import absolute_import, print_function, with_statement, division
-
-__authors__ = ["P. Paleo"]
-__license__ = "MIT"
-__date__ = "07/06/2019"
-
-import numpy
-import pyopencl.array as parray
-from collections import namedtuple
-from pyopencl.scan import GenericScanKernel
-from pyopencl.tools import dtype_to_ctype
-from .common import pyopencl as cl
-from .processing import OpenclProcessing, EventDescription, BufferDescription
-mf = cl.mem_flags
-
-
-CSRData = namedtuple("CSRData", ["data", "indices", "indptr"])
-
-def tuple_to_csrdata(arrs):
-    """
-    Converts a 3-tuple to a CSRData namedtuple.
-    """
-    if arrs is None:
-        return None
-    return CSRData(data=arrs[0], indices=arrs[1], indptr=arrs[2])
-
-
-
-class CSR(OpenclProcessing):
-    kernel_files = ["sparse.cl"]
-
-    def __init__(self, shape, dtype="f", max_nnz=None, idx_dtype=numpy.int32,
-                 ctx=None, devicetype="all", platformid=None, deviceid=None,
-                 block_size=None, memory=None, profile=False):
-        """
-        Compute Compressed Sparse Row format of an image (2D matrix).
-        It is designed to be compatible with scipy.sparse.csr_matrix.
-
-        :param shape: tuple
-            Matrix shape.
-        :param dtype: str or numpy.dtype, optional
-            Numeric data type. By default, sparse matrix data will be float32.
-        :param max_nnz: int, optional
-            Maximum number of non-zero elements. By default, the arrays "data"
-            and "indices" are allocated with prod(shape) elements, but
-            in practice a much lesser space is needed.
-            The number of non-zero items cannot be known in advance, but one can
-            estimate an upper-bound with this parameter to save memory.
-
-        Opencl processing parameters
-        -----------------------------
-        Please refer to the documentation of silx.opencl.processing.OpenclProcessing
-        for information on the other parameters.
-        """
-
-        OpenclProcessing.__init__(self, ctx=ctx, devicetype=devicetype,
-                                  platformid=platformid, deviceid=deviceid,
-                                  block_size=block_size, memory=memory,
-                                  profile=profile)
-        self._set_parameters(shape, dtype, max_nnz, idx_dtype)
-        self._allocate_memory()
-        self._setup_kernels()
-
-    # --------------------------------------------------------------------------
-    # -------------------------- Initialization --------------------------------
-    # --------------------------------------------------------------------------
-
-    def _set_parameters(self, shape, dtype, max_nnz, idx_dtype):
-        self.shape = shape
-        self.size = numpy.prod(shape)
-        self._set_idx_dtype(idx_dtype)
-        assert len(shape) == 2 #
-        if max_nnz is None:
-            self.max_nnz = numpy.prod(shape) # worst case
-        else:
-            self.max_nnz = int(max_nnz)
-        self._set_dtype(dtype)
-
-
-    def _set_idx_dtype(self, idx_dtype):
-        idx_dtype = numpy.dtype(idx_dtype)
-        if idx_dtype.kind not in ["i", "u"]:
-            raise ValueError("Not an integer type: %s" % idx_dtype)
-        # scan value type must have size divisible by 4 bytes
-        if idx_dtype.itemsize % 4 != 0:
-            raise ValueError("Due to an internal pyopencl limitation, idx_dtype type must have size divisible by 4 bytes")
-        self.indice_dtype = idx_dtype #
-
-
-    def _set_dtype(self, dtype):
-        self.dtype = numpy.dtype(dtype)
-        if self.dtype.kind == "c":
-            raise ValueError("Complex data is not supported")
-        if self.dtype == numpy.dtype(numpy.float32):
-            self._c_zero_str = "0.0f"
-        elif self.dtype == numpy.dtype(numpy.float64):
-            self._c_zero_str = "0.0"
-        else: # assuming integer
-            self._c_zero_str = "0"
-        self.c_dtype = dtype_to_ctype(self.dtype)
-        self.idx_c_dtype = dtype_to_ctype(self.indice_dtype)
-
-
-    def _allocate_memory(self):
-        self.is_cpu = (self.device.type == "CPU") # move to OpenclProcessing ?
-        self.buffers = [
-            BufferDescription("array", (self.size,), self.dtype, mf.READ_ONLY),
-            BufferDescription("data", (self.max_nnz,), self.dtype, mf.READ_WRITE),
-            BufferDescription("indices", (self.max_nnz,), self.indice_dtype, mf.READ_WRITE),
-            BufferDescription("indptr", (self.shape[0]+1,), self.indice_dtype, mf.READ_WRITE),
-        ]
-        self.allocate_buffers(use_array=True)
-        for arr_name in ["array", "data", "indices", "indptr"]:
-            setattr(self, arr_name, self.cl_mem[arr_name])
-            self.cl_mem[arr_name].fill(0) # allocate_buffers() uses empty()
-        self._old_array = self.array
-        self._old_data = self.data
-        self._old_indices = self.indices
-        self._old_indptr = self.indptr
-
-
-    def _setup_kernels(self):
-        self._setup_compaction_kernel()
-        self._setup_decompaction_kernel()
-
-
-    def _setup_compaction_kernel(self):
-        kernel_signature = str(
-            "__global %s *data, \
-            __global %s *data_compacted, \
-            __global %s *indices, \
-            __global %s* indptr \
-            """ % (self.c_dtype, self.c_dtype, self.idx_c_dtype, self.idx_c_dtype)
-        )
-        if self.dtype.kind == "f":
-            map_nonzero_expr = "(fabs(data[i]) > %s) ? 1 : 0" % self._c_zero_str
-        elif self.dtype.kind in ["u", "i"]:
-            map_nonzero_expr = "(data[i] != %s) ? 1 : 0" % self._c_zero_str
-        else:
-            raise ValueError("Unknown data type")
-
-        self.scan_kernel = GenericScanKernel(
-            self.ctx, self.indice_dtype,
-            arguments=kernel_signature,
-            input_expr=map_nonzero_expr,
-            scan_expr="a+b", neutral="0",
-            output_statement="""
-                // item is the running sum of input_expr(i), i.e the cumsum of "nonzero"
-                if (prev_item != item) {
-                    data_compacted[item-1] = data[i];
-                    indices[item-1] = GET_INDEX(i);
-                }
-                // The last cumsum element of each line of "nonzero" goes to inptr[i]
-                if ((i+1) % IMAGE_WIDTH == 0) {
-                    indptr[(i/IMAGE_WIDTH)+1] = item;
-                }
-                """,
-            options=["-DIMAGE_WIDTH=%d" % self.shape[1]],
-            preamble="#define GET_INDEX(i) (i % IMAGE_WIDTH)",
-        )
-
-
-    def _setup_decompaction_kernel(self):
-        OpenclProcessing.compile_kernels(
-            self,
-            self.kernel_files,
-            compile_options=[
-                "-DIMAGE_WIDTH=%d" % self.shape[1],
-                "-DDTYPE=%s" % self.c_dtype,
-                "-DIDX_DTYPE=%s" % self.idx_c_dtype,
-            ]
-        )
-        device = self.ctx.devices[0]
-        wg_x = min(
-            device.max_work_group_size,
-            32,
-            self.kernels.max_workgroup_size("densify_csr")
-        )
-        self._decomp_wg = (wg_x, 1)
-        self._decomp_grid = (self._decomp_wg[0], self.shape[0])
-
-
-    # --------------------------------------------------------------------------
-    # -------------------------- Array utils -----------------------------------
-    # --------------------------------------------------------------------------
-
-    # TODO handle pyopencl Buffer
-    def check_array(self, arr):
-        """
-        Check that provided array is compatible with current context.
-
-        :param arr: numpy.ndarray or pyopencl.array.Array
-            2D array in dense format.
-        """
-        assert arr.size == self.size
-        assert arr.dtype == self.dtype
-
-
-    # TODO handle pyopencl Buffer
-    def check_sparse_arrays(self, csr_data):
-        """
-        Check that the provided sparse arrays are compatible with the current
-        context.
-
-        :param arrays: namedtuple CSRData.
-            It contains the arrays "data", "indices", "indptr"
-        """
-        assert isinstance(csr_data, CSRData)
-        for arr in [csr_data.data, csr_data.indices, csr_data.indptr]:
-            assert arr.ndim == 1
-        assert csr_data.data.size <= self.max_nnz
-        assert csr_data.indices.size <= self.max_nnz
-        assert csr_data.indptr.size == self.shape[0]+1
-        assert csr_data.data.dtype == self.dtype
-        assert csr_data.indices.dtype == self.indice_dtype
-        assert csr_data.indptr.dtype == self.indice_dtype
-
-
-    def set_array(self, arr):
-        """
-        Set the provided array as the current context 2D matrix.
-
-        :param arr: numpy.ndarray or pyopencl.array.Array
-            2D array in dense format.
-        """
-        if arr is None:
-            return
-        self.check_array(arr)
-        # GenericScanKernel only supports 1D data
-        if isinstance(arr, parray.Array):
-            self._old_array = self.array
-            self.array = arr
-        elif isinstance(arr, numpy.ndarray):
-            self.array[:] = arr.ravel()[:]
-        else:
-            raise ValueError("Expected pyopencl array or numpy array")
-
-
-    def set_sparse_arrays(self, csr_data):
-        if csr_data is None:
-            return
-        self.check_sparse_arrays(csr_data)
-        for name, arr in {"data": csr_data.data, "indices": csr_data.indices, "indptr": csr_data.indptr}.items():
-            # The current array is a device array. Don't copy, use it directly
-            if isinstance(arr, parray.Array):
-                setattr(self, "_old_" + name, getattr(self, name))
-                setattr(self, name, arr)
-            # The current array is a numpy.ndarray: copy H2D
-            elif isinstance(arr, numpy.ndarray):
-                getattr(self, name)[:arr.size] = arr[:]
-            else:
-                raise ValueError("Unsupported array type: %s" % type(arr))
-
-
-    def _recover_arrays_references(self):
-        """
-        Recover the previous arrays references, and return the references of the
-        "current" arrays.
-        """
-        array = self.array
-        data = self.data
-        indices = self.indices
-        indptr = self.indptr
-        for name in ["array", "data", "indices", "indptr"]:
-            # self.X = self._old_X
-            setattr(self, name, getattr(self, "_old_" + name))
-        return array, (data, indices, indptr)
-
-
-    def get_sparse_arrays(self, output):
-        """
-        Get the 2D dense array of the current context.
-
-        :param output: tuple or None
-            tuple in the form (data, indices, indptr). These arrays have to be
-            compatible with the current context (size and data type).
-            The content of these arrays will be overwritten with the result of
-            the previous computation.
-        """
-        numels = self.max_nnz
-        if output is None:
-            data = self.data.get()[:numels]
-            ind = self.indices.get()[:numels]
-            indptr = self.indptr.get()
-            res = (data, ind, indptr)
-        else:
-            res = output
-        return res
-
-
-    def get_array(self, output):
-        if output is None:
-            res = self.array.get().reshape(self.shape)
-        else:
-            res = output
-        return res
-
-    # --------------------------------------------------------------------------
-    # -------------------------- Compaction ------------------------------------
-    # --------------------------------------------------------------------------
-
-    def sparsify(self, arr, output=None):
-        """
-        Convert an image (2D matrix) into a CSR representation.
-
-        :param arr: numpy.ndarray or pyopencl.array.Array
-            Input array.
-        :param output: tuple of pyopencl.array.Array, optional
-            If provided, this must be a tuple of 3 arrays (data, indices, indptr).
-            The content of each array is overwritten by the computation result.
-        """
-        self.set_array(arr)
-        self.set_sparse_arrays(tuple_to_csrdata(output))
-        evt = self.scan_kernel(
-            self.array,
-            self.data,
-            self.indices,
-            self.indptr,
-        )
-        #~ evt.wait()
-        self.profile_add(evt, "sparsification kernel")
-        res = self.get_sparse_arrays(output)
-        self._recover_arrays_references()
-        return res
-
-    # --------------------------------------------------------------------------
-    # -------------------------- Decompaction ----------------------------------
-    # --------------------------------------------------------------------------
-
-    def densify(self, data, indices, indptr, output=None):
-        self.set_sparse_arrays(
-            CSRData(data=data, indices=indices, indptr=indptr)
-        )
-        self.set_array(output)
-        evt = self.kernels.densify_csr(
-            self.queue,
-            self._decomp_grid,
-            self._decomp_wg,
-            self.data.data,
-            self.indices.data,
-            self.indptr.data,
-            self.array.data,
-            numpy.int32(self.shape[0]),
-        )
-        #~ evt.wait()
-        self.profile_add(evt, "desparsification kernel")
-        res = self.get_array(output)
-        self._recover_arrays_references()
-        return res
-
diff --git a/silx/opencl/statistics.py b/silx/opencl/statistics.py
deleted file mode 100644
index a96ee33..0000000
--- a/silx/opencl/statistics.py
+++ /dev/null
@@ -1,242 +0,0 @@
-# -*- coding: utf-8 -*-
-#
-#    Project: SILX
-#             https://github.com/silx-kit/silx
-#
-#    Copyright (C) 2012-2019 European Synchrotron Radiation Facility, Grenoble, France
-#
-#    Principal author:       Jérôme Kieffer (Jerome.Kieffer@ESRF.eu)
-#
-#  Permission is hereby granted, free of charge, to any person obtaining a copy
-#  of this software and associated documentation files (the "Software"), to deal
-#  in the Software without restriction, including without limitation the rights
-#  to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
-#  copies of the Software, and to permit persons to whom the Software is
-#  furnished to do so, subject to the following conditions:
-#  .
-#  The above copyright notice and this permission notice shall be included in
-#  all copies or substantial portions of the Software.
-#  .
-#  THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
-#  IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
-#  FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
-#  AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
-#  LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
-#  OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN
-#  THE SOFTWARE.
-
-"""A module for performing basic statistical analysis (min, max, mean, std) on
-large data where numpy is not very efficient.
-"""
-
-__author__ = "Jerome Kieffer"
-__license__ = "MIT"
-__date__ = "19/05/2021"
-__copyright__ = "2012-2019, ESRF, Grenoble"
-__contact__ = "jerome.kieffer@esrf.fr"
-
-import logging
-import numpy
-from collections import OrderedDict, namedtuple
-from math import sqrt
-
-from .common import pyopencl
-from .processing import EventDescription, OpenclProcessing, BufferDescription
-from .utils import concatenate_cl_kernel
-
-if pyopencl:
-    mf = pyopencl.mem_flags
-    from pyopencl.reduction import ReductionKernel
-    try:
-        from pyopencl import cltypes
-    except ImportError:
-        v = pyopencl.array.vec()
-        float8 = v.float8
-    else:
-        float8 = cltypes.float8
-
-else:
-    raise ImportError("pyopencl is not installed")
-logger = logging.getLogger(__name__)
-
-StatResults = namedtuple("StatResults", ["min", "max", "cnt", "sum", "mean",
-                                         "var", "std"])
-zero8 = "(float8)(FLT_MAX, -FLT_MAX, 0.0f, 0.0f, 0.0f, 0.0f, 0.0f, 0.0f)"
-#                    min      max    cnt  cnt_e  sum   sum_e  var  var_e
-
-
-class Statistics(OpenclProcessing):
-    """A class for doing statistical analysis using OpenCL
-
-    :param List[int] size: Shape of input data to treat
-    :param numpy.dtype dtype: Input data type
-    :param numpy.ndarray template: Data template to extract size & dtype
-    :param ctx: Actual working context, left to None for automatic
-                initialization from device type or platformid/deviceid
-    :param str devicetype: Type of device, can be "CPU", "GPU", "ACC" or "ALL"
-    :param int platformid: Platform identifier as given by clinfo
-    :param int deviceid: Device identifier as given by clinfo
-    :param int block_size:
-        Preferred workgroup size, may vary depending on the outcome of the compilation
-    :param bool profile:
-        Switch on profiling to be able to profile at the kernel level,
-        store profiling elements (makes code slightly slower)
-    """
-    buffers = [
-        BufferDescription("raw", 1, numpy.float32, mf.READ_ONLY),
-        BufferDescription("converted", 1, numpy.float32, mf.READ_WRITE),
-    ]
-    kernel_files = ["preprocess.cl"]
-    mapping = {numpy.int8: "s8_to_float",
-               numpy.uint8: "u8_to_float",
-               numpy.int16: "s16_to_float",
-               numpy.uint16: "u16_to_float",
-               numpy.uint32: "u32_to_float",
-               numpy.int32: "s32_to_float"}
-
-    def __init__(self, size=None, dtype=None, template=None,
-                 ctx=None, devicetype="all", platformid=None, deviceid=None,
-                 block_size=None, profile=False
-                 ):
-        OpenclProcessing.__init__(self, ctx=ctx, devicetype=devicetype,
-                                  platformid=platformid, deviceid=deviceid,
-                                  block_size=block_size, profile=profile)
-        self.size = size
-        self.dtype = dtype
-        if template is not None:
-            self.size = template.size
-            self.dtype = template.dtype
-
-        self.buffers = [BufferDescription(i.name, i.size * self.size, i.dtype, i.flags)
-                        for i in self.__class__.buffers]
-
-        self.allocate_buffers(use_array=True)
-        self.compile_kernels()
-        self.set_kernel_arguments()
-
-    def set_kernel_arguments(self):
-        """Parametrize all kernel arguments"""
-        for val in self.mapping.values():
-            self.cl_kernel_args[val] = OrderedDict(((i, self.cl_mem[i]) for i in ("raw", "converted")))
-
-    def compile_kernels(self):
-        """Compile the kernel"""
-        OpenclProcessing.compile_kernels(self,
-                                         self.kernel_files,
-                                         "-D NIMAGE=%i" % self.size)
-        compiler_options = self.get_compiler_options(x87_volatile=True)
-        src = concatenate_cl_kernel(("doubleword.cl", "statistics.cl"))
-        self.reduction_comp = ReductionKernel(self.ctx,
-                                              dtype_out=float8,
-                                              neutral=zero8,
-                                              map_expr="map_statistics(data, i)",
-                                              reduce_expr="reduce_statistics(a,b)",
-                                              arguments="__global float *data",
-                                              preamble=src,
-                                              options=compiler_options)
-        self.reduction_simple = ReductionKernel(self.ctx,
-                                                dtype_out=float8,
-                                                neutral=zero8,
-                                                map_expr="map_statistics(data, i)",
-                                                reduce_expr="reduce_statistics_simple(a,b)",
-                                                arguments="__global float *data",
-                                                preamble=src,
-                                                options=compiler_options)
-
-        if "cl_khr_fp64" in self.device.extensions:
-            self.reduction_double = ReductionKernel(self.ctx,
-                                                    dtype_out=float8,
-                                                    neutral=zero8,
-                                                    map_expr="map_statistics(data, i)",
-                                                    reduce_expr="reduce_statistics_double(a,b)",
-                                                    arguments="__global float *data",
-                                                    preamble=src,
-                                                    options=compiler_options)
-        else:
-            logger.info("Device %s does not support double-precision arithmetics, fall-back on compensated one", self.device)
-            self.reduction_double = self.reduction_comp
-
-    def send_buffer(self, data, dest):
-        """
-        Send a numpy array to the device, including the cast on the device if
-        possible
-
-        :param numpy.ndarray data: numpy array with data
-        :param dest: name of the buffer as registered in the class
-        """
-        logger.info("send data to %s", dest)
-        dest_type = numpy.dtype([i.dtype for i in self.buffers if i.name == dest][0])
-        events = []
-        if (data.dtype == dest_type) or (data.dtype.itemsize > dest_type.itemsize):
-            copy_image = pyopencl.enqueue_copy(self.queue,
-                                               self.cl_mem[dest].data,
-                                               numpy.ascontiguousarray(data, dest_type))
-            events.append(EventDescription("copy H->D %s" % dest, copy_image))
-        else:
-            copy_image = pyopencl.enqueue_copy(self.queue,
-                                               self.cl_mem["raw"].data,
-                                               numpy.ascontiguousarray(data))
-            kernel = getattr(self.program, self.mapping[data.dtype.type])
-            cast_to_float = kernel(self.queue,
-                                   (self.size,),
-                                   None,
-                                   self.cl_mem["raw"].data,
-                                   self.cl_mem[dest].data)
-            events += [
-                EventDescription("copy H->D raw", copy_image),
-                EventDescription(f"cast to float {dest}", cast_to_float)
-            ]
-        if self.profile:
-            self.events += events
-        return events
-
-    def process(self, data, comp=True):
-        """Actually calculate the statics on the data
-
-        :param numpy.ndarray data: numpy array with the image
-        :param comp: use Kahan compensated arithmetics for the calculation 
-        :return: Statistics named tuple
-        :rtype: StatResults
-        """
-        if data.ndim != 1:
-            data = data.ravel()
-        size = data.size
-        assert size <= self.size, "size is OK"
-        events = []
-        if comp is True:
-            comp = "comp"
-        elif comp is False:
-            comp = "single"
-        else:
-            comp = comp.lower()
-        with self.sem:
-            self.send_buffer(data, "converted")
-            if comp in ("single", "fp32", "float32"):
-                reduction = self.reduction_simple
-            elif comp in ("double", "fp64", "float64"):
-                reduction = self.reduction_double
-            else:
-                reduction = self.reduction_comp
-            res_d, evt = reduction(self.cl_mem["converted"][:self.size],
-                                   queue=self.queue,
-                                   return_event=True)
-            events.append(EventDescription(f"statistical reduction {comp}", evt))
-            if self.profile:
-                self.events += events
-            res_h = res_d.get()
-        min_ = 1.0 * res_h["s0"]
-        max_ = 1.0 * res_h["s1"]
-        count = 1.0 * res_h["s2"] + res_h["s3"]
-        sum_ = 1.0 * res_h["s4"] + res_h["s5"]
-        m2 = 1.0 * res_h["s6"] + res_h["s7"]
-        var = m2 / (count - 1.0)
-        res = StatResults(min_,
-                          max_,
-                          count,
-                          sum_,
-                          sum_ / count,
-                          var,
-                          sqrt(var))
-        return res
-
-    __call__ = process
diff --git a/silx/opencl/test/__init__.py b/silx/opencl/test/__init__.py
deleted file mode 100644
index 928dbaf..0000000
--- a/silx/opencl/test/__init__.py
+++ /dev/null
@@ -1,68 +0,0 @@
-# -*- coding: utf-8 -*-
-#
-#    Project: silx
-#             https://github.com/silx-kit/silx
-#
-#    Copyright (C) 2012-2016  European Synchrotron Radiation Facility, Grenoble, France
-# Permission is hereby granted, free of charge, to any person obtaining a copy
-# of this software and associated documentation files (the "Software"), to deal
-# in the Software without restriction, including without limitation the rights
-# to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
-# copies of the Software, and to permit persons to whom the Software is
-# furnished to do so, subject to the following conditions:
-#
-# The above copyright notice and this permission notice shall be included in
-# all copies or substantial portions of the Software.
-#
-# THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
-# IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
-# FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
-# AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
-# LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
-# OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN
-# THE SOFTWARE.
-
-__authors__ = ["J. Kieffer"]
-__license__ = "MIT"
-__date__ = "17/05/2021"
-
-import os
-import unittest
-from . import test_addition
-from . import test_medfilt
-from . import test_backprojection
-from . import test_projection
-from . import test_linalg
-from . import test_array_utils
-from ..codec import test as test_codec
-from . import test_image
-from . import test_kahan
-from . import test_doubleword
-from . import test_stats
-from . import test_convolution
-from . import test_sparse
-
-
-def suite():
-    test_suite = unittest.TestSuite()
-    test_suite.addTests(test_addition.suite())
-    test_suite.addTests(test_medfilt.suite())
-    test_suite.addTests(test_backprojection.suite())
-    test_suite.addTests(test_projection.suite())
-    test_suite.addTests(test_linalg.suite())
-    test_suite.addTests(test_array_utils.suite())
-    test_suite.addTests(test_codec.suite())
-    test_suite.addTests(test_image.suite())
-    test_suite.addTests(test_kahan.suite())
-    test_suite.addTests(test_doubleword.suite())
-    test_suite.addTests(test_stats.suite())
-    test_suite.addTests(test_convolution.suite())
-    test_suite.addTests(test_sparse.suite())
-    # Allow to remove sift from the project
-    test_base_dir = os.path.dirname(__file__)
-    sift_dir = os.path.join(test_base_dir, "..", "sift")
-    if os.path.exists(sift_dir):
-        from ..sift import test as test_sift
-        test_suite.addTests(test_sift.suite())
-
-    return test_suite
diff --git a/silx/opencl/test/test_addition.py b/silx/opencl/test/test_addition.py
deleted file mode 100644
index 19dfdf0..0000000
--- a/silx/opencl/test/test_addition.py
+++ /dev/null
@@ -1,154 +0,0 @@
-#!/usr/bin/env python
-# -*- coding: utf-8 -*-
-#
-#    Project: Sift implementation in Python + OpenCL
-#             https://github.com/silx-kit/silx
-#
-# Permission is hereby granted, free of charge, to any person
-# obtaining a copy of this software and associated documentation
-# files (the "Software"), to deal in the Software without
-# restriction, including without limitation the rights to use,
-# copy, modify, merge, publish, distribute, sublicense, and/or sell
-# copies of the Software, and to permit persons to whom the
-# Software is furnished to do so, subject to the following
-# conditions:
-#
-# The above copyright notice and this permission notice shall be
-# included in all copies or substantial portions of the Software.
-#
-# THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
-# EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES
-# OF MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
-# NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT
-# HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY,
-# WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
-# FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR
-# OTHER DEALINGS IN THE SOFTWARE.
-
-"""
-Simple test of an addition
-"""
-
-__authors__ = ["Henri Payno, Jérôme Kieffer"]
-__contact__ = "jerome.kieffer@esrf.eu"
-__license__ = "MIT"
-__copyright__ = "2013 European Synchrotron Radiation Facility, Grenoble, France"
-__date__ = "30/11/2020"
-
-import logging
-import numpy
-
-import unittest
-from ..common import ocl, _measure_workgroup_size, query_kernel_info
-if ocl:
-    import pyopencl
-    import pyopencl.array
-from ..utils import get_opencl_code
-logger = logging.getLogger(__name__)
-
-
-@unittest.skipUnless(ocl, "PyOpenCl is missing")
-class TestAddition(unittest.TestCase):
-
-    @classmethod
-    def setUpClass(cls):
-        super(TestAddition, cls).setUpClass()
-        if ocl:
-            cls.ctx = ocl.create_context()
-            if logger.getEffectiveLevel() <= logging.INFO:
-                cls.PROFILE = True
-                cls.queue = pyopencl.CommandQueue(
-                                cls.ctx,
-                                properties=pyopencl.command_queue_properties.PROFILING_ENABLE)
-            else:
-                cls.PROFILE = False
-                cls.queue = pyopencl.CommandQueue(cls.ctx)
-            cls.max_valid_wg = 0
-
-    @classmethod
-    def tearDownClass(cls):
-        super(TestAddition, cls).tearDownClass()
-        print("Maximum valid workgroup size %s on device %s" % (cls.max_valid_wg, cls.ctx.devices[0]))
-        cls.ctx = None
-        cls.queue = None
-
-    def setUp(self):
-        if ocl is None:
-            return
-        self.shape = 4096
-        self.data = numpy.random.random(self.shape).astype(numpy.float32)
-        self.d_array_img = pyopencl.array.to_device(self.queue, self.data)
-        self.d_array_5 = pyopencl.array.empty_like(self.d_array_img)
-        self.d_array_5.fill(-5)
-        self.program = pyopencl.Program(self.ctx, get_opencl_code("addition")).build()
-
-    def tearDown(self):
-        self.img = self.data = None
-        self.d_array_img = self.d_array_5 = self.program = None
-
-    @unittest.skipUnless(ocl, "pyopencl is missing")
-    def test_add(self):
-        """
-        tests the addition  kernel
-        """
-        maxi = int(round(numpy.log2(self.shape)))
-        for i in range(maxi):
-            d_array_result = pyopencl.array.empty_like(self.d_array_img)
-            wg = 1 << i
-            try:
-                evt = self.program.addition(self.queue, (self.shape,), (wg,),
-                       self.d_array_img.data, self.d_array_5.data, d_array_result.data, numpy.int32(self.shape))
-                evt.wait()
-            except Exception as error:
-                max_valid_wg = self.program.addition.get_work_group_info(pyopencl.kernel_work_group_info.WORK_GROUP_SIZE, self.ctx.devices[0])
-                msg = "Error %s on WG=%s: %s" % (error, wg, max_valid_wg)
-                self.assertLess(max_valid_wg, wg, msg)
-                break
-            else:
-                res = d_array_result.get()
-                good = numpy.allclose(res, self.data - 5)
-                if good and wg > self.max_valid_wg:
-                    self.__class__.max_valid_wg = wg
-                self.assertTrue(good, "calculation is correct for WG=%s" % wg)
-
-    @unittest.skipUnless(ocl, "pyopencl is missing")
-    def test_measurement(self):
-        """
-        tests that all devices are working properly ... lengthy and error prone
-        """
-        for platform in ocl.platforms:
-            for did, device in enumerate(platform.devices):
-                meas = _measure_workgroup_size((platform.id, device.id))
-                self.assertEqual(meas, device.max_work_group_size,
-                                 "Workgroup size for %s/%s: %s == %s" % (platform, device, meas, device.max_work_group_size))
-
-    @unittest.skipUnless(ocl, "pyopencl is missing")
-    def test_query(self):
-        """
-        tests that all devices are working properly ... lengthy and error prone
-        """
-        for what in ("COMPILE_WORK_GROUP_SIZE",
-                     "LOCAL_MEM_SIZE",
-                     "PREFERRED_WORK_GROUP_SIZE_MULTIPLE",
-                     "PRIVATE_MEM_SIZE",
-                     "WORK_GROUP_SIZE"):
-            logger.info("%s: %s", what, query_kernel_info(program=self.program, kernel="addition", what=what))
-
-        # Not all ICD work properly ....    
-        #self.assertEqual(3, len(query_kernel_info(program=self.program, kernel="addition", what="COMPILE_WORK_GROUP_SIZE")), "3D kernel")
-
-        min_wg = query_kernel_info(program=self.program, kernel="addition", what="PREFERRED_WORK_GROUP_SIZE_MULTIPLE")
-        max_wg = query_kernel_info(program=self.program, kernel="addition", what="WORK_GROUP_SIZE")
-        self.assertEqual(max_wg % min_wg, 0, msg="max_wg is a multiple of min_wg")
-
-
-def suite():
-    testSuite = unittest.TestSuite()
-    testSuite.addTest(TestAddition("test_add"))
-    # testSuite.addTest(TestAddition("test_measurement"))
-    testSuite.addTest(TestAddition("test_query"))
-    return testSuite
-
-
-if __name__ == '__main__':
-    unittest.main(defaultTest="suite")
diff --git a/silx/opencl/test/test_array_utils.py b/silx/opencl/test/test_array_utils.py
deleted file mode 100644
index 833d828..0000000
--- a/silx/opencl/test/test_array_utils.py
+++ /dev/null
@@ -1,161 +0,0 @@
-#!/usr/bin/env python
-# coding: utf-8
-# /*##########################################################################
-#
-# Copyright (c) 2016 European Synchrotron Radiation Facility
-#
-# Permission is hereby granted, free of charge, to any person obtaining a copy
-# of this software and associated documentation files (the "Software"), to deal
-# in the Software without restriction, including without limitation the rights
-# to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
-# copies of the Software, and to permit persons to whom the Software is
-# furnished to do so, subject to the following conditions:
-#
-# The above copyright notice and this permission notice shall be included in
-# all copies or substantial portions of the Software.
-#
-# THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
-# IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
-# FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
-# AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
-# LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
-# OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN
-# THE SOFTWARE.
-#
-# ###########################################################################*/
-"""Test of the OpenCL array_utils"""
-
-from __future__ import division, print_function
-
-__authors__ = ["Pierre paleo"]
-__license__ = "MIT"
-__copyright__ = "2013-2017 European Synchrotron Radiation Facility, Grenoble, France"
-__date__ = "14/06/2017"
-
-
-import time
-import logging
-import numpy as np
-import unittest
-try:
-    import mako
-except ImportError:
-    mako = None
-from ..common import ocl
-if ocl:
-    import pyopencl as cl
-    import pyopencl.array as parray
-    from .. import linalg
-from ..utils import get_opencl_code
-from silx.test.utils import utilstest
-
-logger = logging.getLogger(__name__)
-try:
-    from scipy.ndimage.filters import laplace
-    _has_scipy = True
-except ImportError:
-    _has_scipy = False
-
-
-
-@unittest.skipUnless(ocl and mako, "PyOpenCl is missing")
-class TestCpy2d(unittest.TestCase):
-
-    def setUp(self):
-        if ocl is None:
-            return
-        self.ctx = ocl.create_context()
-        if logger.getEffectiveLevel() <= logging.INFO:
-            self.PROFILE = True
-            self.queue = cl.CommandQueue(
-                            self.ctx,
-                            properties=cl.command_queue_properties.PROFILING_ENABLE)
-        else:
-            self.PROFILE = False
-            self.queue = cl.CommandQueue(self.ctx)
-        self.allocate_arrays()
-        self.program = cl.Program(self.ctx, get_opencl_code("array_utils")).build()
-
-    def allocate_arrays(self):
-        """
-        Allocate various types of arrays for the tests
-        """
-        self.prng_state = np.random.get_state()
-        # Generate arrays of random shape
-        self.shape1 = np.random.randint(20, high=512, size=(2,))
-        self.shape2 = np.random.randint(20, high=512, size=(2,))
-        self.array1 = np.random.rand(*self.shape1).astype(np.float32)
-        self.array2 = np.random.rand(*self.shape2).astype(np.float32)
-        self.d_array1 = parray.to_device(self.queue, self.array1)
-        self.d_array2 = parray.to_device(self.queue, self.array2)
-        # Generate random offsets
-        offset1_y = np.random.randint(2, high=min(self.shape1[0], self.shape2[0]) - 10)
-        offset1_x = np.random.randint(2, high=min(self.shape1[1], self.shape2[1]) - 10)
-        offset2_y = np.random.randint(2, high=min(self.shape1[0], self.shape2[0]) - 10)
-        offset2_x = np.random.randint(2, high=min(self.shape1[1], self.shape2[1]) - 10)
-        self.offset1 = (offset1_y, offset1_x)
-        self.offset2 = (offset2_y, offset2_x)
-        # Compute the size of the rectangle to transfer
-        size_y = np.random.randint(2, high=min(self.shape1[0], self.shape2[0]) - max(offset1_y, offset2_y) + 1)
-        size_x = np.random.randint(2, high=min(self.shape1[1], self.shape2[1]) - max(offset1_x, offset2_x) + 1)
-        self.transfer_shape = (size_y, size_x)
-
-    def tearDown(self):
-        self.array1 = None
-        self.array2 = None
-        self.d_array1.data.release()
-        self.d_array2.data.release()
-        self.d_array1 = None
-        self.d_array2 = None
-        self.ctx = None
-        self.queue = None
-
-    def compare(self, result, reference):
-        errmax = np.max(np.abs(result - reference))
-        logger.info("Max error = %e" % (errmax))
-        self.assertTrue(errmax == 0, str("Max error is too high"))#. PRNG state was %s" % str(self.prng_state)))
-
-    @unittest.skipUnless(ocl and mako, "pyopencl is missing")
-    def test_cpy2d(self):
-        """
-        Test rectangular transfer of self.d_array1 to self.d_array2
-        """
-        # Reference
-        o1 = self.offset1
-        o2 = self.offset2
-        T = self.transfer_shape
-        logger.info("""Testing D->D rectangular copy with (N1_y, N1_x) = %s,
-                    (N2_y, N2_x) = %s:
-                    array2[%d:%d, %d:%d] = array1[%d:%d, %d:%d]""" %
-                        (
-                            str(self.shape1), str(self.shape2),
-                            o2[0], o2[0] + T[0],
-                            o2[1], o2[1] + T[1],
-                            o1[0], o1[0] + T[0],
-                            o1[1], o1[1] + T[1]
-                        )
-                    )
-        self.array2[o2[0]:o2[0] + T[0], o2[1]:o2[1] + T[1]] = self.array1[o1[0]:o1[0] + T[0], o1[1]:o1[1] + T[1]]
-        kernel_args = (
-            self.d_array2.data,
-            self.d_array1.data,
-            np.int32(self.shape2[1]),
-            np.int32(self.shape1[1]),
-            np.int32(self.offset2[::-1]),
-            np.int32(self.offset1[::-1]),
-            np.int32(self.transfer_shape[::-1])
-        )
-        wg = None
-        ndrange = self.transfer_shape[::-1]
-        self.program.cpy2d(self.queue, ndrange, wg, *kernel_args)
-        res = self.d_array2.get()
-        self.compare(res, self.array2)
-
-
-def suite():
-    testSuite = unittest.TestSuite()
-    testSuite.addTest(TestCpy2d("test_cpy2d"))
-    return testSuite
-
-if __name__ == '__main__':
-    unittest.main(defaultTest="suite")
diff --git a/silx/opencl/test/test_backprojection.py b/silx/opencl/test/test_backprojection.py
deleted file mode 100644
index 9dfdd3a..0000000
--- a/silx/opencl/test/test_backprojection.py
+++ /dev/null
@@ -1,231 +0,0 @@
-#!/usr/bin/env python
-# coding: utf-8
-# /*##########################################################################
-#
-# Copyright (c) 2016 European Synchrotron Radiation Facility
-#
-# Permission is hereby granted, free of charge, to any person obtaining a copy
-# of this software and associated documentation files (the "Software"), to deal
-# in the Software without restriction, including without limitation the rights
-# to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
-# copies of the Software, and to permit persons to whom the Software is
-# furnished to do so, subject to the following conditions:
-#
-# The above copyright notice and this permission notice shall be included in
-# all copies or substantial portions of the Software.
-#
-# THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
-# IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
-# FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
-# AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
-# LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
-# OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN
-# THE SOFTWARE.
-#
-# ###########################################################################*/
-"""Test of the filtered backprojection module"""
-
-from __future__ import division, print_function
-
-__authors__ = ["Pierre paleo"]
-__license__ = "MIT"
-__copyright__ = "2013-2017 European Synchrotron Radiation Facility, Grenoble, France"
-__date__ = "19/01/2018"
-
-
-import time
-import logging
-import numpy as np
-import unittest
-from math import pi
-try:
-    import mako
-except ImportError:
-    mako = None
-from ..common import ocl
-if ocl:
-    from .. import backprojection
-    from ...image.tomography import compute_fourier_filter
-from silx.test.utils import utilstest
-
-logger = logging.getLogger(__name__)
-
-
-def generate_coords(img_shp, center=None):
-    """
-    Return two 2D arrays containing the indexes of an image.
-    The zero is at the center of the image.
-    """
-    l_r, l_c = float(img_shp[0]), float(img_shp[1])
-    R, C = np.mgrid[:l_r, :l_c]
-    if center is None:
-        center0, center1 = l_r / 2., l_c / 2.
-    else:
-        center0, center1 = center
-    R = R + 0.5 - center0
-    C = C + 0.5 - center1
-    return R, C
-
-
-def clip_circle(img, center=None, radius=None):
-    """
-    Puts zeros outside the inscribed circle of the image support.
-    """
-    R, C = generate_coords(img.shape, center)
-    M = R * R + C * C
-    res = np.zeros_like(img)
-    if radius is None:
-        radius = img.shape[0] / 2. - 1
-    mask = M < radius * radius
-    res[mask] = img[mask]
-    return res
-
-
-@unittest.skipUnless(ocl and mako, "PyOpenCl is missing")
-class TestFBP(unittest.TestCase):
-
-    def setUp(self):
-        if ocl is None:
-            return
-        self.getfiles()
-        self.fbp = backprojection.Backprojection(self.sino.shape, profile=True)
-        if self.fbp.compiletime_workgroup_size < 16 * 16:
-            self.skipTest("Current implementation of OpenCL backprojection is "
-                          "not supported on this platform yet")
-        # Astra does not use the same backprojector implementation.
-        # Therefore, we cannot expect results to be the "same" (up to float32
-        # numerical error)
-        self.tol = 5e-2
-        if not(self.fbp._use_textures) or self.fbp.device.type == "CPU":
-            # Precision is less when using CPU
-            # (either CPU textures or "manual" linear interpolation)
-            self.tol *= 2
-
-    def tearDown(self):
-        self.sino = None
-        # self.fbp.log_profile()
-        self.fbp = None
-
-    def getfiles(self):
-        # load sinogram of 512x512 MRI phantom
-        self.sino = np.load(utilstest.getfile("sino500.npz"))["data"]
-        # load reconstruction made with ASTRA FBP (with filter designed in spatial domain)
-        self.reference_rec = np.load(utilstest.getfile("rec_astra_500.npz"))["data"]
-
-    def measure(self):
-        "Common measurement of timings"
-        t1 = time.time()
-        try:
-            result = self.fbp.filtered_backprojection(self.sino)
-        except RuntimeError as msg:
-            logger.error(msg)
-            return
-        t2 = time.time()
-        return t2 - t1, result
-
-    def compare(self, res):
-        """
-        Compare a result with the reference reconstruction.
-        Only the valid reconstruction zone (inscribed circle) is taken into
-        account
-        """
-        res_clipped = clip_circle(res)
-        ref_clipped = clip_circle(self.reference_rec)
-        delta = abs(res_clipped - ref_clipped)
-        bad = delta > 1
-        logger.debug("Absolute difference: %s with %s outlier pixels out of %s"
-                     "", delta.max(), bad.sum(), np.prod(bad.shape))
-        return delta.max()
-
-    @unittest.skipUnless(ocl and mako, "pyopencl is missing")
-    def test_fbp(self):
-        """
-        tests FBP
-        """
-        # Test single reconstruction
-        # --------------------------
-        t, res = self.measure()
-        if t is None:
-            logger.info("test_fp: skipped")
-        else:
-            logger.info("test_backproj: time = %.3fs" % t)
-            err = self.compare(res)
-            msg = str("Max error = %e" % err)
-            logger.info(msg)
-            self.assertTrue(err < self.tol, "Max error is too high")
-
-        # Test multiple reconstructions
-        # -----------------------------
-        res0 = np.copy(res)
-        for i in range(10):
-            res = self.fbp.filtered_backprojection(self.sino)
-            errmax = np.max(np.abs(res - res0))
-            self.assertTrue(errmax < 1.e-6, "Max error is too high")
-
-    @unittest.skipUnless(ocl and mako, "pyopencl is missing")
-    def test_fbp_filters(self):
-        """
-        Test the different available filters of silx FBP.
-        """
-        avail_filters = [
-            "ramlak", "shepp-logan", "cosine", "hamming",
-            "hann"
-        ]
-        # Create a Dirac delta function at a single angle view.
-        # As the filters are radially invarant:
-        #   - backprojection yields an image where each line is a Dirac.
-        #   - FBP yields an image where each line is the spatial filter
-        # One can simply filter "dirac" without backprojecting it, but this
-        # test will also ensure that backprojection behaves well.
-        dirac = np.zeros_like(self.sino)
-        na, dw = dirac.shape
-        dirac[0, dw//2] = na / pi * 2
-
-        for filter_name in avail_filters:
-            B = backprojection.Backprojection(dirac.shape, filter_name=filter_name)
-            r = B(dirac)
-            # Check that radial invariance is kept
-            std0 = np.max(np.abs(np.std(r, axis=0)))
-            self.assertTrue(
-                std0 < 5.e-6,
-                "Something wrong with FBP(filter=%s)" % filter_name
-            )
-            # Check that the filter is retrieved
-            r_f = np.fft.fft(np.fft.fftshift(r[0])).real / 2.  # filter factor
-            ref_filter_f = compute_fourier_filter(dw, filter_name)
-            errmax = np.max(np.abs(r_f - ref_filter_f))
-            logger.info("FBP filter %s: max error=%e" % (filter_name, errmax))
-            self.assertTrue(
-                errmax < 1.e-3,
-                "Something wrong with FBP(filter=%s)" % filter_name
-            )
-
-    @unittest.skipUnless(ocl and mako, "pyopencl is missing")
-    def test_fbp_oddsize(self):
-        # Generate a 513-sinogram.
-        # The padded width will be nextpow(513*2).
-        # silx [0.10, 0.10.1] will give 1029, which makes R2C transform fail.
-        sino = np.pad(self.sino, ((0, 0), (1, 0)), mode='edge')
-        B = backprojection.Backprojection(sino.shape, axis_position=self.fbp.axis_pos+1)
-        res = B(sino)
-        # Compare with self.reference_rec. Tolerance is high as backprojector
-        # is not fully shift-invariant.
-        errmax = np.max(np.abs(clip_circle(res[1:, 1:] - self.reference_rec)))
-        self.assertLess(
-            errmax, 1.e-1,
-            "Something wrong with FBP on odd-sized sinogram"
-        )
-
-
-
-
-def suite():
-    testSuite = unittest.TestSuite()
-    testSuite.addTest(TestFBP("test_fbp"))
-    testSuite.addTest(TestFBP("test_fbp_filters"))
-    testSuite.addTest(TestFBP("test_fbp_oddsize"))
-    return testSuite
-
-
-if __name__ == '__main__':
-    unittest.main(defaultTest="suite")
diff --git a/silx/opencl/test/test_convolution.py b/silx/opencl/test/test_convolution.py
deleted file mode 100644
index 7bceb0d..0000000
--- a/silx/opencl/test/test_convolution.py
+++ /dev/null
@@ -1,265 +0,0 @@
-#!/usr/bin/env python
-# coding: utf-8
-# /*##########################################################################
-#
-# Copyright (c) 2019 European Synchrotron Radiation Facility
-#
-# Permission is hereby granted, free of charge, to any person obtaining a copy
-# of this software and associated documentation files (the "Software"), to deal
-# in the Software without restriction, including without limitation the rights
-# to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
-# copies of the Software, and to permit persons to whom the Software is
-# furnished to do so, subject to the following conditions:
-#
-# The above copyright notice and this permission notice shall be included in
-# all copies or substantial portions of the Software.
-#
-# THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
-# IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
-# FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
-# AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
-# LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
-# OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN
-# THE SOFTWARE.
-#
-# ###########################################################################*/
-
-"""
-Test of the Convolution class.
-"""
-
-from __future__ import division, print_function
-
-__authors__ = ["Pierre Paleo"]
-__contact__ = "pierre.paleo@esrf.fr"
-__license__ = "MIT"
-__copyright__ = "2019 European Synchrotron Radiation Facility, Grenoble, France"
-__date__ = "01/08/2019"
-
-import logging
-from itertools import product
-import numpy as np
-from silx.utils.testutils import parameterize
-from silx.image.utils import gaussian_kernel
-
-try:
-    from scipy.ndimage import convolve, convolve1d
-    from scipy.misc import ascent
-
-    scipy_convolve = convolve
-    scipy_convolve1d = convolve1d
-except ImportError:
-    scipy_convolve = None
-import unittest
-from ..common import ocl, check_textures_availability
-
-if ocl:
-    import pyopencl as cl
-    import pyopencl.array as parray
-    from silx.opencl.convolution import Convolution
-logger = logging.getLogger(__name__)
-
-
-@unittest.skipUnless(ocl and scipy_convolve, "PyOpenCl/scipy is missing")
-class TestConvolution(unittest.TestCase):
-    @classmethod
-    def setUpClass(cls):
-        super(TestConvolution, cls).setUpClass()
-        cls.image = np.ascontiguousarray(ascent()[:, :511], dtype="f")
-        cls.data1d = cls.image[0]
-        cls.data2d = cls.image
-        cls.data3d = np.tile(cls.image[224:-224, 224:-224], (62, 1, 1))
-        cls.kernel1d = gaussian_kernel(1.0)
-        cls.kernel2d = np.outer(cls.kernel1d, cls.kernel1d)
-        cls.kernel3d = np.multiply.outer(cls.kernel2d, cls.kernel1d)
-        cls.ctx = ocl.create_context()
-        cls.tol = {
-            "1D": 1e-4,
-            "2D": 1e-3,
-            "3D": 1e-3,
-        }
-
-    @classmethod
-    def tearDownClass(cls):
-        cls.data1d = cls.data2d = cls.data3d = cls.image = None
-        cls.kernel1d = cls.kernel2d = cls.kernel3d = None
-
-    @staticmethod
-    def compare(arr1, arr2):
-        return np.max(np.abs(arr1 - arr2))
-
-    @staticmethod
-    def print_err(conv):
-        errmsg = str(
-            """
-            Something wrong with %s
-            mode=%s, texture=%s
-            """
-            % (conv.use_case_desc, conv.mode, conv.use_textures)
-        )
-        return errmsg
-
-    def __init__(self, methodName="runTest", param=None):
-        unittest.TestCase.__init__(self, methodName)
-        self.param = param
-        self.mode = param["boundary_handling"]
-        logger.debug(
-            """
-            Testing convolution with boundary_handling=%s,
-            use_textures=%s, input_device=%s, output_device=%s
-            """
-            % (
-                self.mode,
-                param["use_textures"],
-                param["input_on_device"],
-                param["output_on_device"],
-            )
-        )
-
-    def instantiate_convol(self, shape, kernel, axes=None):
-        if self.mode == "constant":
-            if not (self.param["use_textures"]) or (
-                self.param["use_textures"]
-                and not (check_textures_availability(self.ctx))
-            ):
-                self.skipTest("mode=constant not implemented without textures")
-        C = Convolution(
-            shape,
-            kernel,
-            mode=self.mode,
-            ctx=self.ctx,
-            axes=axes,
-            extra_options={"dont_use_textures": not (self.param["use_textures"])},
-        )
-        return C
-
-    def get_data_and_kernel(self, test_name):
-        dims = {
-            "test_1D": (1, 1),
-            "test_separable_2D": (2, 1),
-            "test_separable_3D": (3, 1),
-            "test_nonseparable_2D": (2, 2),
-            "test_nonseparable_3D": (3, 3),
-        }
-        dim_data = {1: self.data1d, 2: self.data2d, 3: self.data3d}
-        dim_kernel = {
-            1: self.kernel1d,
-            2: self.kernel2d,
-            3: self.kernel3d,
-        }
-        dd, kd = dims[test_name]
-        return dim_data[dd], dim_kernel[kd]
-
-    def get_reference_function(self, test_name):
-        ref_func = {
-            "test_1D": lambda x, y: scipy_convolve1d(x, y, mode=self.mode),
-            "test_separable_2D": lambda x, y: scipy_convolve1d(
-                scipy_convolve1d(x, y, mode=self.mode, axis=1),
-                y,
-                mode=self.mode,
-                axis=0,
-            ),
-            "test_separable_3D": lambda x, y: scipy_convolve1d(
-                scipy_convolve1d(
-                    scipy_convolve1d(x, y, mode=self.mode, axis=2),
-                    y,
-                    mode=self.mode,
-                    axis=1,
-                ),
-                y,
-                mode=self.mode,
-                axis=0,
-            ),
-            "test_nonseparable_2D": lambda x, y: scipy_convolve(x, y, mode=self.mode),
-            "test_nonseparable_3D": lambda x, y: scipy_convolve(x, y, mode=self.mode),
-        }
-        return ref_func[test_name]
-
-    def template_test(self, test_name):
-        data, kernel = self.get_data_and_kernel(test_name)
-        conv = self.instantiate_convol(data.shape, kernel)
-        if self.param["input_on_device"]:
-            data_ref = parray.to_device(conv.queue, data)
-        else:
-            data_ref = data
-        if self.param["output_on_device"]:
-            d_res = parray.empty_like(conv.data_out)
-            d_res.fill(0)
-            res = d_res
-        else:
-            res = None
-        res = conv(data_ref, output=res)
-        if self.param["output_on_device"]:
-            res = res.get()
-        ref_func = self.get_reference_function(test_name)
-        ref = ref_func(data, kernel)
-        metric = self.compare(res, ref)
-        logger.info("%s: max error = %.2e" % (test_name, metric))
-        tol = self.tol[str("%dD" % kernel.ndim)]
-        self.assertLess(metric, tol, self.print_err(conv))
-
-    def test_1D(self):
-        self.template_test("test_1D")
-
-    def test_separable_2D(self):
-        self.template_test("test_separable_2D")
-
-    def test_separable_3D(self):
-        self.template_test("test_separable_3D")
-
-    def test_nonseparable_2D(self):
-        self.template_test("test_nonseparable_2D")
-
-    def test_nonseparable_3D(self):
-        self.template_test("test_nonseparable_3D")
-
-    def test_batched_2D(self):
-        """
-        Test batched (nonseparable) 2D convolution on 3D data.
-        In this test: batch along "z" (axis 0)
-        """
-        data = self.data3d
-        kernel = self.kernel2d
-        conv = self.instantiate_convol(data.shape, kernel, axes=(0,))
-        res = conv(data)  # 3D
-        ref = scipy_convolve(data[0], kernel, mode=self.mode)  # 2D
-
-        std = np.std(res, axis=0)
-        std_max = np.max(np.abs(std))
-        self.assertLess(std_max, self.tol["2D"], self.print_err(conv))
-        metric = self.compare(res[0], ref)
-        logger.info("test_nonseparable_3D: max error = %.2e" % metric)
-        self.assertLess(metric, self.tol["2D"], self.print_err(conv))
-
-
-def test_convolution():
-    boundary_handling_ = ["reflect", "nearest", "wrap", "constant"]
-    use_textures_ = [True, False]
-    input_on_device_ = [True, False]
-    output_on_device_ = [True, False]
-    testSuite = unittest.TestSuite()
-
-    param_vals = list(
-        product(boundary_handling_, use_textures_, input_on_device_, output_on_device_)
-    )
-    for boundary_handling, use_textures, input_dev, output_dev in param_vals:
-        testcase = parameterize(
-            TestConvolution,
-            param={
-                "boundary_handling": boundary_handling,
-                "input_on_device": input_dev,
-                "output_on_device": output_dev,
-                "use_textures": use_textures,
-            },
-        )
-        testSuite.addTest(testcase)
-    return testSuite
-
-
-def suite():
-    testSuite = test_convolution()
-    return testSuite
-
-
-if __name__ == "__main__":
-    unittest.main(defaultTest="suite")
diff --git a/silx/opencl/test/test_doubleword.py b/silx/opencl/test/test_doubleword.py
deleted file mode 100644
index ca947e0..0000000
--- a/silx/opencl/test/test_doubleword.py
+++ /dev/null
@@ -1,258 +0,0 @@
-#!/usr/bin/env python
-# coding: utf-8
-#
-#    Project: The silx project
-#             https://github.com/silx-kit/silx
-#
-#    Copyright (C) 2021-2021 European Synchrotron Radiation Facility, Grenoble, France
-#
-#    Principal author:       Jérôme Kieffer (Jerome.Kieffer@ESRF.eu)
-#
-# Permission is hereby granted, free of charge, to any person obtaining a copy
-# of this software and associated documentation files (the "Software"), to deal
-# in the Software without restriction, including without limitation the rights
-# to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
-# copies of the Software, and to permit persons to whom the Software is
-# furnished to do so, subject to the following conditions:
-#
-# The above copyright notice and this permission notice shall be included in
-# all copies or substantial portions of the Software.
-#
-# THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
-# IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
-# FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
-# AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
-# LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
-# OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN
-# THE SOFTWARE.
-
-"test suite for OpenCL code"
-
-__author__ = "Jérôme Kieffer"
-__contact__ = "Jerome.Kieffer@ESRF.eu"
-__license__ = "MIT"
-__copyright__ = "European Synchrotron Radiation Facility, Grenoble, France"
-__date__ = "31/05/2021"
-
-import unittest
-import numpy
-import logging
-import platform
-
-logger = logging.getLogger(__name__)
-try:
-    import pyopencl
-except ImportError as error:
-    logger.warning("OpenCL module (pyopencl) is not present, skip tests. %s.", error)
-    pyopencl = None
-
-from .. import ocl
-if ocl is not None:
-    from ..utils import read_cl_file
-    from .. import pyopencl
-    import pyopencl.array
-    from pyopencl.elementwise import ElementwiseKernel
-from ...test.utils import test_options
-
-EPS32 = numpy.finfo("float32").eps
-EPS64 = numpy.finfo("float64").eps
-
-
-class TestDoubleWord(unittest.TestCase):
-    """
-    Test the kernels for compensated math in OpenCL
-    """
-
-    @classmethod
-    def setUpClass(cls):
-        if not test_options.WITH_OPENCL_TEST:
-            raise unittest.SkipTest("User request to skip OpenCL tests")
-        if pyopencl is None or ocl is None:
-            raise unittest.SkipTest("OpenCL module (pyopencl) is not present or no device available")
-
-        cls.ctx = ocl.create_context(devicetype="GPU")
-        cls.queue = pyopencl.CommandQueue(cls.ctx, properties=pyopencl.command_queue_properties.PROFILING_ENABLE)
-
-        # this is running 32 bits OpenCL woth POCL
-        if (platform.machine() in ("i386", "i686", "x86_64") and (tuple.__itemsize__ == 4) and
-                cls.ctx.devices[0].platform.name == 'Portable Computing Language'):
-            cls.args = "-DX87_VOLATILE=volatile"
-        else:
-            cls.args = ""
-        size = 1024
-        cls.a = 1.0 + numpy.random.random(size)
-        cls.b = 1.0 + numpy.random.random(size)
-        cls.ah = cls.a.astype(numpy.float32)
-        cls.bh = cls.b.astype(numpy.float32)
-        cls.al = (cls.a - cls.ah).astype(numpy.float32)
-        cls.bl = (cls.b - cls.bh).astype(numpy.float32)
-        cls.doubleword = read_cl_file("doubleword.cl")
-
-    @classmethod
-    def tearDownClass(cls):
-        cls.queue = None
-        cls.ctx = None
-        cls.a = cls.al = cls.ah = None
-        cls.b = cls.bl = cls.bh = None
-        cls.doubleword = None
-
-    def test_fast_sum2(self):
-        test_kernel = ElementwiseKernel(self.ctx,
-                      "float *a, float *b, float *res_h, float *res_l",
-                      "float2 tmp = fast_fp_plus_fp(a[i], b[i]); res_h[i] = tmp.s0; res_l[i] = tmp.s1",
-                      preamble=self.doubleword)
-        a_g = pyopencl.array.to_device(self.queue, self.ah)
-        b_g = pyopencl.array.to_device(self.queue, self.bl)
-        res_l = pyopencl.array.empty_like(a_g)
-        res_h = pyopencl.array.empty_like(a_g)
-        test_kernel(a_g, b_g, res_h, res_l)
-        self.assertEqual(abs(self.ah + self.bl - res_h.get()).max(), 0, "Major matches")
-        self.assertGreater(abs(self.ah.astype(numpy.float64) + self.bl - res_h.get()).max(), 0, "Exact mismatches")
-        self.assertEqual(abs(self.ah.astype(numpy.float64) + self.bl - (res_h.get().astype(numpy.float64) + res_l.get())).max(), 0, "Exact matches")
-
-    def test_sum2(self):
-        test_kernel = ElementwiseKernel(self.ctx,
-                    "float *a, float *b, float *res_h, float *res_l",
-                    "float2 tmp = fp_plus_fp(a[i],b[i]); res_h[i]=tmp.s0; res_l[i]=tmp.s1;",
-                    preamble=self.doubleword)
-        a_g = pyopencl.array.to_device(self.queue, self.ah)
-        b_g = pyopencl.array.to_device(self.queue, self.bh)
-        res_l = pyopencl.array.empty_like(a_g)
-        res_h = pyopencl.array.empty_like(a_g)
-        test_kernel(a_g, b_g, res_h, res_l)
-        self.assertEqual(abs(self.ah + self.bh - res_h.get()).max(), 0, "Major matches")
-        self.assertGreater(abs(self.ah.astype(numpy.float64) + self.bh - res_h.get()).max(), 0, "Exact mismatches")
-        self.assertEqual(abs(self.ah.astype(numpy.float64) + self.bh - (res_h.get().astype(numpy.float64) + res_l.get())).max(), 0, "Exact matches")
-
-    def test_prod2(self):
-        test_kernel = ElementwiseKernel(self.ctx,
-                    "float *a, float *b, float *res_h, float *res_l",
-                    "float2 tmp = fp_times_fp(a[i],b[i]); res_h[i]=tmp.s0; res_l[i]=tmp.s1;",
-                    preamble=self.doubleword)
-        a_g = pyopencl.array.to_device(self.queue, self.ah)
-        b_g = pyopencl.array.to_device(self.queue, self.bh)
-        res_l = pyopencl.array.empty_like(a_g)
-        res_h = pyopencl.array.empty_like(a_g)
-        test_kernel(a_g, b_g, res_h, res_l)
-        res_m = res_h.get()
-        res = res_h.get().astype(numpy.float64) + res_l.get()
-        self.assertEqual(abs(self.ah * self.bh - res_m).max(), 0, "Major matches")
-        self.assertGreater(abs(self.ah.astype(numpy.float64) * self.bh - res_m).max(), 0, "Exact mismatches")
-        self.assertEqual(abs(self.ah.astype(numpy.float64) * self.bh - res).max(), 0, "Exact matches")
-
-    def test_dw_plus_fp(self):
-        test_kernel = ElementwiseKernel(self.ctx,
-                    "float *ah, float *al, float *b, float *res_h, float *res_l",
-                    "float2 tmp = dw_plus_fp((float2)(ah[i], al[i]),b[i]); res_h[i]=tmp.s0; res_l[i]=tmp.s1;",
-                    preamble=self.doubleword)
-        ah_g = pyopencl.array.to_device(self.queue, self.ah)
-        al_g = pyopencl.array.to_device(self.queue, self.al)
-        b_g = pyopencl.array.to_device(self.queue, self.bh)
-        res_l = pyopencl.array.empty_like(b_g)
-        res_h = pyopencl.array.empty_like(b_g)
-        test_kernel(ah_g, al_g, b_g, res_h, res_l)
-        res_m = res_h.get()
-        res = res_h.get().astype(numpy.float64) + res_l.get()
-        self.assertLess(abs(self.a + self.bh - res_m).max(), EPS32, "Major matches")
-        self.assertGreater(abs(self.a + self.bh - res_m).max(), EPS64, "Exact mismatches")
-        self.assertLess(abs(self.ah.astype(numpy.float64) + self.al + self.bh - res).max(), 2 * EPS32 ** 2, "Exact matches")
-
-    def test_dw_plus_dw(self):
-        test_kernel = ElementwiseKernel(self.ctx,
-                    "float *ah, float *al, float *bh, float *bl, float *res_h, float *res_l",
-                    "float2 tmp = dw_plus_dw((float2)(ah[i], al[i]),(float2)(bh[i], bl[i])); res_h[i]=tmp.s0; res_l[i]=tmp.s1;",
-                    preamble=self.doubleword)
-        ah_g = pyopencl.array.to_device(self.queue, self.ah)
-        al_g = pyopencl.array.to_device(self.queue, self.al)
-        bh_g = pyopencl.array.to_device(self.queue, self.bh)
-        bl_g = pyopencl.array.to_device(self.queue, self.bl)
-        res_l = pyopencl.array.empty_like(bh_g)
-        res_h = pyopencl.array.empty_like(bh_g)
-        test_kernel(ah_g, al_g, bh_g, bl_g, res_h, res_l)
-        res_m = res_h.get()
-        res = res_h.get().astype(numpy.float64) + res_l.get()
-        self.assertLess(abs(self.a + self.b - res_m).max(), EPS32, "Major matches")
-        self.assertGreater(abs(self.a + self.b - res_m).max(), EPS64, "Exact mismatches")
-        self.assertLess(abs(self.a + self.b - res).max(), 3 * EPS32 ** 2, "Exact matches")
-
-    def test_dw_times_fp(self):
-        test_kernel = ElementwiseKernel(self.ctx,
-                    "float *ah, float *al, float *b, float *res_h, float *res_l",
-                    "float2 tmp = dw_times_fp((float2)(ah[i], al[i]),b[i]); res_h[i]=tmp.s0; res_l[i]=tmp.s1;",
-                    preamble=self.doubleword)
-        ah_g = pyopencl.array.to_device(self.queue, self.ah)
-        al_g = pyopencl.array.to_device(self.queue, self.al)
-        b_g = pyopencl.array.to_device(self.queue, self.bh)
-        res_l = pyopencl.array.empty_like(b_g)
-        res_h = pyopencl.array.empty_like(b_g)
-        test_kernel(ah_g, al_g, b_g, res_h, res_l)
-        res_m = res_h.get()
-        res = res_h.get().astype(numpy.float64) + res_l.get()
-        self.assertLess(abs(self.a * self.bh - res_m).max(), EPS32, "Major matches")
-        self.assertGreater(abs(self.a * self.bh - res_m).max(), EPS64, "Exact mismatches")
-        self.assertLess(abs(self.a * self.bh - res).max(), 2 * EPS32 ** 2, "Exact matches")
-
-    def test_dw_times_dw(self):
-        test_kernel = ElementwiseKernel(self.ctx,
-                    "float *ah, float *al, float *bh, float *bl, float *res_h, float *res_l",
-                    "float2 tmp = dw_times_dw((float2)(ah[i], al[i]),(float2)(bh[i], bl[i])); res_h[i]=tmp.s0; res_l[i]=tmp.s1;",
-                    preamble=self.doubleword)
-        ah_g = pyopencl.array.to_device(self.queue, self.ah)
-        al_g = pyopencl.array.to_device(self.queue, self.al)
-        bh_g = pyopencl.array.to_device(self.queue, self.bh)
-        bl_g = pyopencl.array.to_device(self.queue, self.bl)
-        res_l = pyopencl.array.empty_like(bh_g)
-        res_h = pyopencl.array.empty_like(bh_g)
-        test_kernel(ah_g, al_g, bh_g, bl_g, res_h, res_l)
-        res_m = res_h.get()
-        res = res_h.get().astype(numpy.float64) + res_l.get()
-        self.assertLess(abs(self.a * self.b - res_m).max(), EPS32, "Major matches")
-        self.assertGreater(abs(self.a * self.b - res_m).max(), EPS64, "Exact mismatches")
-        self.assertLess(abs(self.a * self.b - res).max(), 5 * EPS32 ** 2, "Exact matches")
-
-    def test_dw_div_fp(self):
-        test_kernel = ElementwiseKernel(self.ctx,
-                    "float *ah, float *al, float *b, float *res_h, float *res_l",
-                    "float2 tmp = dw_div_fp((float2)(ah[i], al[i]),b[i]); res_h[i]=tmp.s0; res_l[i]=tmp.s1;",
-                    preamble=self.doubleword)
-        ah_g = pyopencl.array.to_device(self.queue, self.ah)
-        al_g = pyopencl.array.to_device(self.queue, self.al)
-        b_g = pyopencl.array.to_device(self.queue, self.bh)
-        res_l = pyopencl.array.empty_like(b_g)
-        res_h = pyopencl.array.empty_like(b_g)
-        test_kernel(ah_g, al_g, b_g, res_h, res_l)
-        res_m = res_h.get()
-        res = res_h.get().astype(numpy.float64) + res_l.get()
-        self.assertLess(abs(self.a / self.bh - res_m).max(), EPS32, "Major matches")
-        self.assertGreater(abs(self.a / self.bh - res_m).max(), EPS64, "Exact mismatches")
-        self.assertLess(abs(self.a / self.bh - res).max(), 3 * EPS32 ** 2, "Exact matches")
-
-    def test_dw_div_dw(self):
-        test_kernel = ElementwiseKernel(self.ctx,
-                    "float *ah, float *al, float *bh, float *bl, float *res_h, float *res_l",
-                    "float2 tmp = dw_div_dw((float2)(ah[i], al[i]),(float2)(bh[i], bl[i])); res_h[i]=tmp.s0; res_l[i]=tmp.s1;",
-                    preamble=self.doubleword)
-        ah_g = pyopencl.array.to_device(self.queue, self.ah)
-        al_g = pyopencl.array.to_device(self.queue, self.al)
-        bh_g = pyopencl.array.to_device(self.queue, self.bh)
-        bl_g = pyopencl.array.to_device(self.queue, self.bl)
-        res_l = pyopencl.array.empty_like(bh_g)
-        res_h = pyopencl.array.empty_like(bh_g)
-        test_kernel(ah_g, al_g, bh_g, bl_g, res_h, res_l)
-        res_m = res_h.get()
-        res = res_h.get().astype(numpy.float64) + res_l.get()
-        self.assertLess(abs(self.a / self.b - res_m).max(), EPS32, "Major matches")
-        self.assertGreater(abs(self.a / self.b - res_m).max(), EPS64, "Exact mismatches")
-        self.assertLess(abs(self.a / self.b - res).max(), 6 * EPS32 ** 2, "Exact matches")
-
-
-def suite():
-    testsuite = unittest.TestSuite()
-    loader = unittest.defaultTestLoader.loadTestsFromTestCase
-    testsuite.addTest(loader(TestDoubleWord))
-    return testsuite
-
-
-if __name__ == '__main__':
-    runner = unittest.TextTestRunner()
-    runner.run(suite())
diff --git a/silx/opencl/test/test_image.py b/silx/opencl/test/test_image.py
deleted file mode 100644
index d73a854..0000000
--- a/silx/opencl/test/test_image.py
+++ /dev/null
@@ -1,137 +0,0 @@
-#!/usr/bin/env python
-# -*- coding: utf-8 -*-
-#
-#    Project: image manipulation in  OpenCL
-#             https://github.com/silx-kit/silx
-#
-# Permission is hereby granted, free of charge, to any person
-# obtaining a copy of this software and associated documentation
-# files (the "Software"), to deal in the Software without
-# restriction, including without limitation the rights to use,
-# copy, modify, merge, publish, distribute, sublicense, and/or sell
-# copies of the Software, and to permit persons to whom the
-# Software is furnished to do so, subject to the following
-# conditions:
-#
-# The above copyright notice and this permission notice shall be
-# included in all copies or substantial portions of the Software.
-#
-# THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
-# EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES
-# OF MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
-# NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT
-# HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY,
-# WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
-# FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR
-# OTHER DEALINGS IN THE SOFTWARE.
-
-"""
-Simple test of image manipulation
-"""
-
-from __future__ import division, print_function
-
-__authors__ = ["Jérôme Kieffer"]
-__contact__ = "jerome.kieffer@esrf.eu"
-__license__ = "MIT"
-__copyright__ = "2017 European Synchrotron Radiation Facility, Grenoble, France"
-__date__ = "13/02/2018"
-
-import logging
-import numpy
-
-import unittest
-from ..common import ocl, _measure_workgroup_size
-if ocl:
-    import pyopencl
-    import pyopencl.array
-from ...test.utils import utilstest
-from ..image import ImageProcessing
-logger = logging.getLogger(__name__)
-try:
-    from PIL import Image
-except ImportError:
-    Image = None
-
-
-@unittest.skipUnless(ocl and Image, "PyOpenCl/Image is missing")
-class TestImage(unittest.TestCase):
-
-    @classmethod
-    def setUpClass(cls):
-        super(TestImage, cls).setUpClass()
-        if ocl:
-            cls.ctx = ocl.create_context()
-            cls.lena = utilstest.getfile("lena.png")
-            cls.data = numpy.asarray(Image.open(cls.lena))
-            cls.ip = ImageProcessing(ctx=cls.ctx, template=cls.data, profile=True)
-
-    @classmethod
-    def tearDownClass(cls):
-        super(TestImage, cls).tearDownClass()
-        cls.ctx = None
-        cls.lena = None
-        cls.data = None
-        if logger.level <= logging.INFO:
-            logger.warning("\n".join(cls.ip.log_profile()))
-        cls.ip = None
-
-    def setUp(self):
-        if ocl is None:
-            return
-        self.data = numpy.asarray(Image.open(self.lena))
-
-    def tearDown(self):
-        self.img = self.data = None
-
-    @unittest.skipUnless(ocl, "pyopencl is missing")
-    def test_cast(self):
-        """
-        tests the cast kernel
-        """
-        res = self.ip.to_float(self.data)
-        self.assertEqual(res.shape, self.data.shape, "shape")
-        self.assertEqual(res.dtype, numpy.float32, "dtype")
-        self.assertEqual(abs(res - self.data).max(), 0, "content")
-
-    @unittest.skipUnless(ocl, "pyopencl is missing")
-    def test_normalize(self):
-        """
-        tests that all devices are working properly ...
-        """
-        tmp = pyopencl.array.empty(self.ip.ctx, self.data.shape, "float32")
-        res = self.ip.to_float(self.data, out=tmp)
-        res2 = self.ip.normalize(tmp, -100, 100, copy=False)
-        norm = (self.data.astype(numpy.float32) - self.data.min()) / (self.data.max() - self.data.min())
-        ref2 = 200 * norm - 100
-        self.assertLess(abs(res2 - ref2).max(), 3e-5, "content")
-
-    @unittest.skipUnless(ocl, "pyopencl is missing")
-    def test_histogram(self):
-        """
-        Test on a greyscaled image ... of Lena :)
-        """
-        lena_bw = (0.2126 * self.data[:, :, 0] +
-                   0.7152 * self.data[:, :, 1] +
-                   0.0722 * self.data[:, :, 2]).astype("int32")
-        ref = numpy.histogram(lena_bw, 255)
-        ip = ImageProcessing(ctx=self.ctx, template=lena_bw, profile=True)
-        res = ip.histogram(lena_bw, 255)
-        ip.log_profile()
-        delta = (ref[0] - res[0])
-        deltap = (ref[1] - res[1])
-        self.assertEqual(delta.sum(), 0, "errors are self-compensated")
-        self.assertLessEqual(abs(delta).max(), 1, "errors are small")
-        self.assertLessEqual(abs(deltap).max(), 3e-5, "errors on position are small: %s" % (abs(deltap).max()))
-
-
-def suite():
-    testSuite = unittest.TestSuite()
-    testSuite.addTest(TestImage("test_cast"))
-    testSuite.addTest(TestImage("test_normalize"))
-    testSuite.addTest(TestImage("test_histogram"))
-    return testSuite
-
-
-if __name__ == '__main__':
-    unittest.main(defaultTest="suite")
diff --git a/silx/opencl/test/test_kahan.py b/silx/opencl/test/test_kahan.py
deleted file mode 100644
index 6ea599b..0000000
--- a/silx/opencl/test/test_kahan.py
+++ /dev/null
@@ -1,269 +0,0 @@
-#!/usr/bin/env python
-# coding: utf-8
-#
-#    Project: OpenCL numerical library
-#             https://github.com/silx-kit/silx
-#
-#    Copyright (C) 2015-2021 European Synchrotron Radiation Facility, Grenoble, France
-#
-#    Principal author:       Jérôme Kieffer (Jerome.Kieffer@ESRF.eu)
-#
-# Permission is hereby granted, free of charge, to any person obtaining a copy
-# of this software and associated documentation files (the "Software"), to deal
-# in the Software without restriction, including without limitation the rights
-# to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
-# copies of the Software, and to permit persons to whom the Software is
-# furnished to do so, subject to the following conditions:
-#
-# The above copyright notice and this permission notice shall be included in
-# all copies or substantial portions of the Software.
-#
-# THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
-# IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
-# FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
-# AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
-# LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
-# OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN
-# THE SOFTWARE.
-
-"test suite for OpenCL code"
-
-__author__ = "Jérôme Kieffer"
-__contact__ = "Jerome.Kieffer@ESRF.eu"
-__license__ = "MIT"
-__copyright__ = "European Synchrotron Radiation Facility, Grenoble, France"
-__date__ = "17/05/2021"
-
-
-import unittest
-import numpy
-import logging
-import platform
-
-logger = logging.getLogger(__name__)
-try:
-    import pyopencl
-except ImportError as error:
-    logger.warning("OpenCL module (pyopencl) is not present, skip tests. %s.", error)
-    pyopencl = None
-
-from .. import ocl
-if ocl is not None:
-    from ..utils import read_cl_file
-    from .. import pyopencl
-    import pyopencl.array
-from ...test.utils import test_options
-
-
-class TestKahan(unittest.TestCase):
-    """
-    Test the kernels for compensated math in OpenCL
-    """
-
-    @classmethod
-    def setUpClass(cls):
-        if not test_options.WITH_OPENCL_TEST:
-            raise unittest.SkipTest("User request to skip OpenCL tests")
-        if pyopencl is None or ocl is None:
-            raise unittest.SkipTest("OpenCL module (pyopencl) is not present or no device available")
-
-        cls.ctx = ocl.create_context(devicetype="GPU")
-        cls.queue = pyopencl.CommandQueue(cls.ctx, properties=pyopencl.command_queue_properties.PROFILING_ENABLE)
-
-        # this is running 32 bits OpenCL woth POCL
-        if (platform.machine() in ("i386", "i686", "x86_64") and (tuple.__itemsize__ == 4) and
-                cls.ctx.devices[0].platform.name == 'Portable Computing Language'):
-            cls.args = "-DX87_VOLATILE=volatile"
-        else:
-            cls.args = ""
-
-    @classmethod
-    def tearDownClass(cls):
-        cls.queue = None
-        cls.ctx = None
-
-    @staticmethod
-    def dummy_sum(ary, dtype=None):
-        "perform the actual sum in a dummy way "
-        if dtype is None:
-            dtype = ary.dtype.type
-        sum_ = dtype(0)
-        for i in ary:
-            sum_ += i
-        return sum_
-
-    def test_kahan(self):
-        # simple test
-        N = 26
-        data = (1 << (N - 1 - numpy.arange(N))).astype(numpy.float32)
-
-        ref64 = numpy.sum(data, dtype=numpy.float64)
-        ref32 = self.dummy_sum(data)
-        if (ref64 == ref32):
-            logger.warning("Kahan: invalid tests as float32 provides the same result as float64")
-        # Dummy kernel to evaluate
-        src = """
-        kernel void summation(global float* data,
-                                           int size,
-                                    global float* result)
-        {
-            float2 acc = (float2)(0.0f, 0.0f);
-            for (int i=0; i<size; i++)
-            {
-                acc = kahan_sum(acc, data[i]);
-            }
-            result[0] = acc.s0;
-            result[1] = acc.s1;
-        }
-        """
-        prg = pyopencl.Program(self.ctx, read_cl_file("kahan.cl") + src).build(self.args)
-        ones_d = pyopencl.array.to_device(self.queue, data)
-        res_d = pyopencl.array.empty(self.queue, 2, numpy.float32)
-        res_d.fill(0)
-        evt = prg.summation(self.queue, (1,), (1,), ones_d.data, numpy.int32(N), res_d.data)
-        evt.wait()
-        res = res_d.get().sum(dtype=numpy.float64)
-        self.assertEqual(ref64, res, "test_kahan")
-
-    def test_dot16(self):
-        # simple test
-        N = 16
-        data = (1 << (N - 1 - numpy.arange(N))).astype(numpy.float32)
-
-        ref64 = numpy.dot(data.astype(numpy.float64), data.astype(numpy.float64))
-        ref32 = numpy.dot(data, data)
-        if (ref64 == ref32):
-            logger.warning("dot16: invalid tests as float32 provides the same result as float64")
-        # Dummy kernel to evaluate
-        src = """
-        kernel void test_dot16(global float* data,
-                                           int size,
-                               global float* result)
-        {
-            float2 acc = (float2)(0.0f, 0.0f);
-            float16 data16 = (float16) (data[0],data[1],data[2],data[3],data[4],
-                                        data[5],data[6],data[7],data[8],data[9],
-                         data[10],data[11],data[12],data[13],data[14],data[15]);
-            acc = comp_dot16(data16, data16);
-            result[0] = acc.s0;
-            result[1] = acc.s1;
-        }
-
-        kernel void test_dot8(global float* data,
-                                           int size,
-                               global float* result)
-        {
-            float2 acc = (float2)(0.0f, 0.0f);
-            float8 data0 = (float8) (data[0],data[2],data[4],data[6],data[8],data[10],data[12],data[14]);
-            float8 data1 = (float8) (data[1],data[3],data[5],data[7],data[9],data[11],data[13],data[15]);
-            acc = comp_dot8(data0, data1);
-            result[0] = acc.s0;
-            result[1] = acc.s1;
-        }
-
-        kernel void test_dot4(global float* data,
-                                           int size,
-                               global float* result)
-        {
-            float2 acc = (float2)(0.0f, 0.0f);
-            float4 data0 = (float4) (data[0],data[4],data[8],data[12]);
-            float4 data1 = (float4) (data[3],data[7],data[11],data[15]);
-            acc = comp_dot4(data0, data1);
-            result[0] = acc.s0;
-            result[1] = acc.s1;
-        }
-
-        kernel void test_dot3(global float* data,
-                                           int size,
-                               global float* result)
-        {
-            float2 acc = (float2)(0.0f, 0.0f);
-            float3 data0 = (float3) (data[0],data[4],data[12]);
-            float3 data1 = (float3) (data[3],data[11],data[15]);
-            acc = comp_dot3(data0, data1);
-            result[0] = acc.s0;
-            result[1] = acc.s1;
-        }
-
-        kernel void test_dot2(global float* data,
-                                           int size,
-                               global float* result)
-        {
-            float2 acc = (float2)(0.0f, 0.0f);
-            float2 data0 = (float2) (data[0],data[14]);
-            float2 data1 = (float2) (data[1],data[15]);
-            acc = comp_dot2(data0, data1);
-            result[0] = acc.s0;
-            result[1] = acc.s1;
-        }
-
-        """
-
-        prg = pyopencl.Program(self.ctx, read_cl_file("kahan.cl") + src).build(self.args)
-        ones_d = pyopencl.array.to_device(self.queue, data)
-        res_d = pyopencl.array.empty(self.queue, 2, numpy.float32)
-        res_d.fill(0)
-        evt = prg.test_dot16(self.queue, (1,), (1,), ones_d.data, numpy.int32(N), res_d.data)
-        evt.wait()
-        res = res_d.get().sum(dtype="float64")
-        self.assertEqual(ref64, res, "test_dot16")
-
-        res_d.fill(0)
-        data0 = data[0::2]
-        data1 = data[1::2]
-        ref64 = numpy.dot(data0.astype(numpy.float64), data1.astype(numpy.float64))
-        ref32 = numpy.dot(data0, data1)
-        if (ref64 == ref32):
-            logger.warning("dot8: invalid tests as float32 provides the same result as float64")
-        evt = prg.test_dot8(self.queue, (1,), (1,), ones_d.data, numpy.int32(N), res_d.data)
-        evt.wait()
-        res = res_d.get().sum(dtype="float64")
-        self.assertEqual(ref64, res, "test_dot8")
-
-        res_d.fill(0)
-        data0 = data[0::4]
-        data1 = data[3::4]
-        ref64 = numpy.dot(data0.astype(numpy.float64), data1.astype(numpy.float64))
-        ref32 = numpy.dot(data0, data1)
-        if (ref64 == ref32):
-            logger.warning("dot4: invalid tests as float32 provides the same result as float64")
-        evt = prg.test_dot4(self.queue, (1,), (1,), ones_d.data, numpy.int32(N), res_d.data)
-        evt.wait()
-        res = res_d.get().sum(dtype="float64")
-        self.assertEqual(ref64, res, "test_dot4")
-
-        res_d.fill(0)
-        data0 = numpy.array([data[0], data[4], data[12]])
-        data1 = numpy.array([data[3], data[11], data[15]])
-        ref64 = numpy.dot(data0.astype(numpy.float64), data1.astype(numpy.float64))
-        ref32 = numpy.dot(data0, data1)
-        if (ref64 == ref32):
-            logger.warning("dot3: invalid tests as float32 provides the same result as float64")
-        evt = prg.test_dot3(self.queue, (1,), (1,), ones_d.data, numpy.int32(N), res_d.data)
-        evt.wait()
-        res = res_d.get().sum(dtype="float64")
-        self.assertEqual(ref64, res, "test_dot3")
-
-        res_d.fill(0)
-        data0 = numpy.array([data[0], data[14]])
-        data1 = numpy.array([data[1], data[15]])
-        ref64 = numpy.dot(data0.astype(numpy.float64), data1.astype(numpy.float64))
-        ref32 = numpy.dot(data0, data1)
-        if (ref64 == ref32):
-            logger.warning("dot2: invalid tests as float32 provides the same result as float64")
-        evt = prg.test_dot2(self.queue, (1,), (1,), ones_d.data, numpy.int32(N), res_d.data)
-        evt.wait()
-        res = res_d.get().sum(dtype="float64")
-        self.assertEqual(ref64, res, "test_dot2")
-
-
-def suite():
-    testsuite = unittest.TestSuite()
-    loader = unittest.defaultTestLoader.loadTestsFromTestCase
-    testsuite.addTest(loader(TestKahan))
-    return testsuite
-
-
-if __name__ == '__main__':
-    runner = unittest.TextTestRunner()
-    runner.run(suite())
diff --git a/silx/opencl/test/test_linalg.py b/silx/opencl/test/test_linalg.py
deleted file mode 100644
index 0b6c730..0000000
--- a/silx/opencl/test/test_linalg.py
+++ /dev/null
@@ -1,216 +0,0 @@
-#!/usr/bin/env python
-# coding: utf-8
-# /*##########################################################################
-#
-# Copyright (c) 2016 European Synchrotron Radiation Facility
-#
-# Permission is hereby granted, free of charge, to any person obtaining a copy
-# of this software and associated documentation files (the "Software"), to deal
-# in the Software without restriction, including without limitation the rights
-# to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
-# copies of the Software, and to permit persons to whom the Software is
-# furnished to do so, subject to the following conditions:
-#
-# The above copyright notice and this permission notice shall be included in
-# all copies or substantial portions of the Software.
-#
-# THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
-# IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
-# FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
-# AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
-# LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
-# OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN
-# THE SOFTWARE.
-#
-# ###########################################################################*/
-"""Test of the linalg module"""
-
-from __future__ import division, print_function
-
-__authors__ = ["Pierre paleo"]
-__license__ = "MIT"
-__copyright__ = "2013-2017 European Synchrotron Radiation Facility, Grenoble, France"
-__date__ = "01/08/2019"
-
-
-import time
-import logging
-import numpy as np
-import unittest
-try:
-    import mako
-except ImportError:
-    mako = None
-from ..common import ocl
-if ocl:
-    import pyopencl as cl
-    import pyopencl.array as parray
-    from .. import linalg
-from silx.test.utils import utilstest
-
-logger = logging.getLogger(__name__)
-try:
-    from scipy.ndimage.filters import laplace
-    _has_scipy = True
-except ImportError:
-    _has_scipy = False
-
-
-# TODO move this function in math or image ?
-def gradient(img):
-    '''
-    Compute the gradient of an image as a numpy array
-    Code from https://github.com/emmanuelle/tomo-tv/
-    '''
-    shape = [img.ndim, ] + list(img.shape)
-    gradient = np.zeros(shape, dtype=img.dtype)
-    slice_all = [0, slice(None, -1),]
-    for d in range(img.ndim):
-        gradient[tuple(slice_all)] = np.diff(img, axis=d)
-        slice_all[0] = d + 1
-        slice_all.insert(1, slice(None))
-    return gradient
-
-
-# TODO move this function in math or image ?
-def divergence(grad):
-    '''
-    Compute the divergence of a gradient
-    Code from https://github.com/emmanuelle/tomo-tv/
-    '''
-    res = np.zeros(grad.shape[1:])
-    for d in range(grad.shape[0]):
-        this_grad = np.rollaxis(grad[d], d)
-        this_res = np.rollaxis(res, d)
-        this_res[:-1] += this_grad[:-1]
-        this_res[1:-1] -= this_grad[:-2]
-        this_res[-1] -= this_grad[-2]
-    return res
-
-
-@unittest.skipUnless(ocl and mako, "PyOpenCl is missing")
-class TestLinAlg(unittest.TestCase):
-
-    def setUp(self):
-        if ocl is None:
-            return
-        self.getfiles()
-        self.la = linalg.LinAlg(self.image.shape)
-        self.allocate_arrays()
-
-    def allocate_arrays(self):
-        """
-        Allocate various types of arrays for the tests
-        """
-        # numpy images
-        self.grad = np.zeros(self.image.shape, dtype=np.complex64)
-        self.grad2 = np.zeros((2,) + self.image.shape, dtype=np.float32)
-        self.grad_ref = gradient(self.image)
-        self.div_ref = divergence(self.grad_ref)
-        self.image2 = np.zeros_like(self.image)
-        # Device images
-        self.gradient_parray = parray.empty(self.la.queue, self.image.shape, np.complex64)
-        self.gradient_parray.fill(0)
-        # we should be using cl.Buffer(self.la.ctx, cl.mem_flags.READ_WRITE, size=self.image.nbytes*2),
-        # but platforms not suporting openCL 1.2 have a problem with enqueue_fill_buffer,
-        # so we use the parray "fill" utility
-        self.gradient_buffer = self.gradient_parray.data
-        # Do the same for image
-        self.image_parray = parray.to_device(self.la.queue, self.image)
-        self.image_buffer = self.image_parray.data
-        # Refs
-        tmp = np.zeros(self.image.shape, dtype=np.complex64)
-        tmp.real = np.copy(self.grad_ref[0])
-        tmp.imag = np.copy(self.grad_ref[1])
-        self.grad_ref_parray = parray.to_device(self.la.queue, tmp)
-        self.grad_ref_buffer = self.grad_ref_parray.data
-
-    def tearDown(self):
-        self.image = None
-        self.image2 = None
-        self.grad = None
-        self.grad2 = None
-        self.grad_ref = None
-        self.div_ref = None
-        self.gradient_parray.data.release()
-        self.gradient_parray = None
-        self.gradient_buffer = None
-        self.image_parray.data.release()
-        self.image_parray = None
-        self.image_buffer = None
-        self.grad_ref_parray.data.release()
-        self.grad_ref_parray = None
-        self.grad_ref_buffer = None
-
-    def getfiles(self):
-        # load 512x512 MRI phantom - TODO include Lena or ascent once a .npz is available
-        self.image = np.load(utilstest.getfile("Brain512.npz"))["data"]
-
-    def compare(self, result, reference, abstol, name):
-        errmax = np.max(np.abs(result - reference))
-        logger.info("%s: Max error = %e" % (name, errmax))
-        self.assertTrue(errmax < abstol, str("%s: Max error is too high" % name))
-
-    @unittest.skipUnless(ocl and mako, "pyopencl is missing")
-    def test_gradient(self):
-        arrays = {
-            "numpy.ndarray": self.image,
-            "buffer": self.image_buffer,
-            "parray": self.image_parray
-        }
-        for desc, image in arrays.items():
-            # Test with dst on host (numpy.ndarray)
-            res = self.la.gradient(image, return_to_host=True)
-            self.compare(res, self.grad_ref, 1e-6, str("gradient[src=%s, dst=numpy.ndarray]" % desc))
-            # Test with dst on device (pyopencl.Buffer)
-            self.la.gradient(image, dst=self.gradient_buffer)
-            cl.enqueue_copy(self.la.queue, self.grad, self.gradient_buffer)
-            self.grad2[0] = self.grad.real
-            self.grad2[1] = self.grad.imag
-            self.compare(self.grad2, self.grad_ref, 1e-6, str("gradient[src=%s, dst=buffer]" % desc))
-            # Test with dst on device (pyopencl.Array)
-            self.la.gradient(image, dst=self.gradient_parray)
-            self.grad = self.gradient_parray.get()
-            self.grad2[0] = self.grad.real
-            self.grad2[1] = self.grad.imag
-            self.compare(self.grad2, self.grad_ref, 1e-6, str("gradient[src=%s, dst=parray]" % desc))
-
-    @unittest.skipUnless(ocl and mako, "pyopencl is missing")
-    def test_divergence(self):
-        arrays = {
-            "numpy.ndarray": self.grad_ref,
-            "buffer": self.grad_ref_buffer,
-            "parray": self.grad_ref_parray
-        }
-        for desc, grad in arrays.items():
-            # Test with dst on host (numpy.ndarray)
-            res = self.la.divergence(grad, return_to_host=True)
-            self.compare(res, self.div_ref, 1e-6, str("divergence[src=%s, dst=numpy.ndarray]" % desc))
-            # Test with dst on device (pyopencl.Buffer)
-            self.la.divergence(grad, dst=self.image_buffer)
-            cl.enqueue_copy(self.la.queue, self.image2, self.image_buffer)
-            self.compare(self.image2, self.div_ref, 1e-6, str("divergence[src=%s, dst=buffer]" % desc))
-            # Test with dst on device (pyopencl.Array)
-            self.la.divergence(grad, dst=self.image_parray)
-            self.image2 = self.image_parray.get()
-            self.compare(self.image2, self.div_ref, 1e-6, str("divergence[src=%s, dst=parray]" % desc))
-
-    @unittest.skipUnless(ocl and mako and _has_scipy, "pyopencl and/or scipy is missing")
-    def test_laplacian(self):
-        laplacian_ref = laplace(self.image)
-        # Laplacian = div(grad)
-        self.la.gradient(self.image)
-        laplacian_ocl = self.la.divergence(self.la.d_gradient, return_to_host=True)
-        self.compare(laplacian_ocl, laplacian_ref, 1e-6, "laplacian")
-
-
-def suite():
-    testSuite = unittest.TestSuite()
-    testSuite.addTest(TestLinAlg("test_gradient"))
-    testSuite.addTest(TestLinAlg("test_divergence"))
-    testSuite.addTest(TestLinAlg("test_laplacian"))
-    return testSuite
-
-
-if __name__ == '__main__':
-    unittest.main(defaultTest="suite")
diff --git a/silx/opencl/test/test_medfilt.py b/silx/opencl/test/test_medfilt.py
deleted file mode 100644
index 976b199..0000000
--- a/silx/opencl/test/test_medfilt.py
+++ /dev/null
@@ -1,175 +0,0 @@
-#!/usr/bin/env python
-# -*- coding: utf-8 -*-
-#
-#    Project: Median filter of images + OpenCL
-#             https://github.com/silx-kit/silx
-#
-# Permission is hereby granted, free of charge, to any person
-# obtaining a copy of this software and associated documentation
-# files (the "Software"), to deal in the Software without
-# restriction, including without limitation the rights to use,
-# copy, modify, merge, publish, distribute, sublicense, and/or sell
-# copies of the Software, and to permit persons to whom the
-# Software is furnished to do so, subject to the following
-# conditions:
-#
-# The above copyright notice and this permission notice shall be
-# included in all copies or substantial portions of the Software.
-#
-# THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
-# EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES
-# OF MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
-# NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT
-# HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY,
-# WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
-# FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR
-# OTHER DEALINGS IN THE SOFTWARE.
-
-"""
-Simple test of the median filter
-"""
-
-from __future__ import division, print_function
-
-__authors__ = ["Jérôme Kieffer"]
-__contact__ = "jerome.kieffer@esrf.eu"
-__license__ = "MIT"
-__copyright__ = "2013-2017 European Synchrotron Radiation Facility, Grenoble, France"
-__date__ = "05/07/2018"
-
-
-import sys
-import time
-import logging
-import numpy
-import unittest
-from collections import namedtuple
-try:
-    import mako
-except ImportError:
-    mako = None
-from ..common import ocl
-if ocl:
-    import pyopencl
-    import pyopencl.array
-    from .. import medfilt
-
-logger = logging.getLogger(__name__)
-
-Result = namedtuple("Result", ["size", "error", "sp_time", "oc_time"])
-
-try:
-    from scipy.misc import ascent
-except:
-    def ascent():
-        """Dummy image from random data"""
-        return numpy.random.random((512, 512))
-try:
-    from scipy.ndimage import filters
-    median_filter = filters.median_filter
-    HAS_SCIPY = True
-except:
-    HAS_SCIPY = False
-    from silx.math import medfilt2d as median_filter
-
-@unittest.skipUnless(ocl and mako, "PyOpenCl is missing")
-class TestMedianFilter(unittest.TestCase):
-
-    def setUp(self):
-        if ocl is None:
-            return
-        self.data = ascent().astype(numpy.float32)
-        self.medianfilter = medfilt.MedianFilter2D(self.data.shape, devicetype="gpu")
-
-    def tearDown(self):
-        self.data = None
-        self.medianfilter = None
-
-    def measure(self, size):
-        "Common measurement of accuracy and timings"
-        t0 = time.time()
-        if HAS_SCIPY:
-            ref = median_filter(self.data, size, mode="nearest")
-        else:
-            ref = median_filter(self.data, size)
-        t1 = time.time()
-        try:
-            got = self.medianfilter.medfilt2d(self.data, size)
-        except RuntimeError as msg:
-            logger.error(msg)
-            return
-        t2 = time.time()
-        delta = abs(got - ref).max()
-        return Result(size, delta, t1 - t0, t2 - t1)
-
-    @unittest.skipUnless(ocl and mako, "pyopencl is missing")
-    def test_medfilt(self):
-        """
-        tests the median filter kernel
-        """
-        r = self.measure(size=11)
-        if r is None:
-            logger.info("test_medfilt: size: %s: skipped")
-        else:
-            logger.info("test_medfilt: size: %s error %s, t_ref: %.3fs, t_ocl: %.3fs" % r)
-            self.assertEqual(r.error, 0, 'Results are correct')
-
-    def benchmark(self, limit=36):
-        "Run some benchmarking"
-        try:
-            import PyQt5
-            from ...gui.matplotlib import pylab
-            from ...gui.utils import update_fig
-        except:
-            pylab = None
-
-            def update_fig(*ag, **kwarg):
-                pass
-
-        fig = pylab.figure()
-        fig.suptitle("Median filter of an image 512x512")
-        sp = fig.add_subplot(1, 1, 1)
-        sp.set_title(self.medianfilter.ctx.devices[0].name)
-        sp.set_xlabel("Window width & height")
-        sp.set_ylabel("Execution time (s)")
-        sp.set_xlim(2, limit + 1)
-        sp.set_ylim(0, 4)
-        data_size = []
-        data_scipy = []
-        data_opencl = []
-        plot_sp = sp.plot(data_size, data_scipy, "-or", label="scipy")[0]
-        plot_opencl = sp.plot(data_size, data_opencl, "-ob", label="opencl")[0]
-        sp.legend(loc=2)
-        fig.show()
-        update_fig(fig)
-        for s in range(3, limit, 2):
-            r = self.measure(s)
-            print(r)
-            if r.error == 0:
-                data_size.append(s)
-                data_scipy.append(r.sp_time)
-                data_opencl.append(r.oc_time)
-                plot_sp.set_data(data_size, data_scipy)
-                plot_opencl.set_data(data_size, data_opencl)
-                update_fig(fig)
-        fig.show()
-        if sys.version_info[0] < 3:
-            raw_input()
-        else:
-            input()
-
-
-def suite():
-    testSuite = unittest.TestSuite()
-    testSuite.addTest(TestMedianFilter("test_medfilt"))
-    return testSuite
-
-
-def benchmark():
-    testSuite = unittest.TestSuite()
-    testSuite.addTest(TestMedianFilter("benchmark"))
-    return testSuite
-
-
-if __name__ == '__main__':
-    unittest.main(defaultTest="suite")
diff --git a/silx/opencl/test/test_projection.py b/silx/opencl/test/test_projection.py
deleted file mode 100644
index 7631128..0000000
--- a/silx/opencl/test/test_projection.py
+++ /dev/null
@@ -1,131 +0,0 @@
-#!/usr/bin/env python
-# coding: utf-8
-# /*##########################################################################
-#
-# Copyright (c) 2016 European Synchrotron Radiation Facility
-#
-# Permission is hereby granted, free of charge, to any person obtaining a copy
-# of this software and associated documentation files (the "Software"), to deal
-# in the Software without restriction, including without limitation the rights
-# to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
-# copies of the Software, and to permit persons to whom the Software is
-# furnished to do so, subject to the following conditions:
-#
-# The above copyright notice and this permission notice shall be included in
-# all copies or substantial portions of the Software.
-#
-# THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
-# IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
-# FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
-# AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
-# LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
-# OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN
-# THE SOFTWARE.
-#
-# ###########################################################################*/
-"""Test of the forward projection module"""
-
-from __future__ import division, print_function
-
-__authors__ = ["Pierre paleo"]
-__license__ = "MIT"
-__copyright__ = "2013-2017 European Synchrotron Radiation Facility, Grenoble, France"
-__date__ = "19/01/2018"
-
-
-import time
-import logging
-import numpy as np
-import unittest
-try:
-    import mako
-except ImportError:
-    mako = None
-from ..common import ocl
-if ocl:
-    from .. import projection
-from silx.test.utils import utilstest
-
-logger = logging.getLogger(__name__)
-
-
-@unittest.skipUnless(ocl and mako, "PyOpenCl is missing")
-class TestProj(unittest.TestCase):
-
-    def setUp(self):
-        if ocl is None:
-            return
-        # ~ if sys.platform.startswith('darwin'):
-            # ~ self.skipTest("Projection is not implemented on CPU for OS X yet")
-        self.getfiles()
-        n_angles = self.sino.shape[0]
-        self.proj = projection.Projection(self.phantom.shape, n_angles)
-        if self.proj.compiletime_workgroup_size < 16 * 16:
-            self.skipTest("Current implementation of OpenCL projection is not supported on this platform yet")
-
-    def tearDown(self):
-        self.phantom = None
-        self.sino = None
-        self.proj = None
-
-    def getfiles(self):
-        # load 512x512 MRI phantom
-        self.phantom = np.load(utilstest.getfile("Brain512.npz"))["data"]
-        # load sinogram computed with PyHST
-        self.sino = np.load(utilstest.getfile("sino500_pyhst.npz"))["data"]
-
-    def measure(self):
-        "Common measurement of timings"
-        t1 = time.time()
-        try:
-            result = self.proj.projection(self.phantom)
-        except RuntimeError as msg:
-            logger.error(msg)
-            return
-        t2 = time.time()
-        return t2 - t1, result
-
-    def compare(self, res):
-        """
-        Compare a result with the reference reconstruction.
-        Only the valid reconstruction zone (inscribed circle) is taken into account
-        """
-        # Compare with the original phantom.
-        # TODO: compare a standard projection
-        ref = self.sino
-        return np.max(np.abs(res - ref))
-
-    @unittest.skipUnless(ocl and mako, "pyopencl is missing")
-    def test_proj(self):
-        """
-        tests Projection
-        """
-        # Test single reconstruction
-        # --------------------------
-        t, res = self.measure()
-        if t is None:
-            logger.info("test_proj: skipped")
-        else:
-            logger.info("test_proj: time = %.3fs" % t)
-            err = self.compare(res)
-            msg = str("Max error = %e" % err)
-            logger.info(msg)
-            # Interpolation differs at some lines, giving relative error of 10/50000
-            self.assertTrue(err < 20., "Max error is too high")
-        # Test multiple reconstructions
-        # -----------------------------
-        res0 = np.copy(res)
-        for i in range(10):
-            res = self.proj.projection(self.phantom)
-            errmax = np.max(np.abs(res - res0))
-            self.assertTrue(errmax < 1.e-6, "Max error is too high")
-
-
-def suite():
-    testSuite = unittest.TestSuite()
-    testSuite.addTest(TestProj("test_proj"))
-    return testSuite
-
-
-if __name__ == '__main__':
-    unittest.main(defaultTest="suite")
diff --git a/silx/opencl/test/test_sparse.py b/silx/opencl/test/test_sparse.py
deleted file mode 100644
index 76a6a0a..0000000
--- a/silx/opencl/test/test_sparse.py
+++ /dev/null
@@ -1,203 +0,0 @@
-#!/usr/bin/env python
-# coding: utf-8
-# /*##########################################################################
-#
-# Copyright (c) 2018-2019 European Synchrotron Radiation Facility
-#
-# Permission is hereby granted, free of charge, to any person obtaining a copy
-# of this software and associated documentation files (the "Software"), to deal
-# in the Software without restriction, including without limitation the rights
-# to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
-# copies of the Software, and to permit persons to whom the Software is
-# furnished to do so, subject to the following conditions:
-#
-# The above copyright notice and this permission notice shall be included in
-# all copies or substantial portions of the Software.
-#
-# THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
-# IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
-# FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
-# AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
-# LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
-# OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN
-# THE SOFTWARE.
-#
-# ###########################################################################*/
-"""Test of the sparse module"""
-
-import numpy as np
-import unittest
-import logging
-from itertools import product
-from ..common import ocl
-if ocl:
-    import pyopencl.array as parray
-    from silx.opencl.sparse import CSR
-try:
-    import scipy.sparse as sp
-except ImportError:
-    sp = None
-logger = logging.getLogger(__name__)
-
-
-
-def generate_sparse_random_data(
-    shape=(1000,),
-    data_min=0, data_max=100,
-    density=0.1,
-    use_only_integers=True,
-    dtype="f"):
-    """
-    Generate random sparse data where.
-
-    Parameters
-    ------------
-    shape: tuple
-        Output data shape.
-    data_min: int or float
-        Minimum value of data
-    data_max: int or float
-        Maximum value of data
-    density: float
-        Density of non-zero elements in the output data.
-        Low value of density mean low number of non-zero elements.
-    use_only_integers: bool
-        If set to True, the output data items will be primarily integers,
-        possibly casted to float if dtype is a floating-point type.
-        This can be used for ease of debugging.
-    dtype: str or numpy.dtype
-        Output data type
-    """
-    mask = np.random.binomial(1, density, size=shape)
-    if use_only_integers:
-        d = np.random.randint(data_min, high=data_max, size=shape)
-    else:
-        d = data_min + (data_max - data_min) * np.random.rand(*shape)
-    return (d * mask).astype(dtype)
-
-
-
-@unittest.skipUnless(ocl and sp, "PyOpenCl/scipy is missing")
-class TestCSR(unittest.TestCase):
-    """Test CSR format"""
-
-    def setUp(self):
-        # Test possible configurations
-        input_on_device = [False, True]
-        output_on_device = [False, True]
-        dtypes = [np.float32, np.int32, np.uint16]
-        self._test_configs = list(product(input_on_device, output_on_device, dtypes))
-
-
-    def compute_ref_sparsification(self, array):
-        ref_sparse = sp.csr_matrix(array)
-        return ref_sparse
-
-
-    def test_sparsification(self):
-        for input_on_device, output_on_device, dtype in self._test_configs:
-            self._test_sparsification(input_on_device, output_on_device, dtype)
-
-
-    def _test_sparsification(self, input_on_device, output_on_device, dtype):
-        current_config = "input on device: %s, output on device: %s, dtype: %s" % (
-            str(input_on_device), str(output_on_device), str(dtype)
-        )
-        logger.debug("CSR: %s" % current_config)
-        # Generate data and reference CSR
-        array = generate_sparse_random_data(shape=(512, 511), dtype=dtype)
-        ref_sparse = self.compute_ref_sparsification(array)
-        # Sparsify on device
-        csr = CSR(array.shape, dtype=dtype)
-        if input_on_device:
-            # The array has to be flattened
-            arr = parray.to_device(csr.queue, array.ravel())
-        else:
-            arr = array
-        if output_on_device:
-            d_data = parray.empty_like(csr.data)
-            d_indices = parray.empty_like(csr.indices)
-            d_indptr = parray.empty_like(csr.indptr)
-            d_data.fill(0)
-            d_indices.fill(0)
-            d_indptr.fill(0)
-            output = (d_data, d_indices, d_indptr)
-        else:
-            output = None
-        data, indices, indptr = csr.sparsify(arr, output=output)
-        if output_on_device:
-            data = data.get()
-            indices = indices.get()
-            indptr = indptr.get()
-        # Compare
-        nnz = ref_sparse.nnz
-        self.assertTrue(
-            np.allclose(data[:nnz], ref_sparse.data),
-            "something wrong with sparsified data (%s)"
-            % current_config
-        )
-        self.assertTrue(
-            np.allclose(indices[:nnz], ref_sparse.indices),
-            "something wrong with sparsified indices (%s)"
-            % current_config
-        )
-        self.assertTrue(
-            np.allclose(indptr, ref_sparse.indptr),
-            "something wrong with sparsified indices pointers (indptr) (%s)"
-            % current_config
-        )
-
-
-    def test_desparsification(self):
-        for input_on_device, output_on_device, dtype in self._test_configs:
-            self._test_desparsification(input_on_device, output_on_device, dtype)
-
-
-    def _test_desparsification(self, input_on_device, output_on_device, dtype):
-        current_config = "input on device: %s, output on device: %s, dtype: %s" % (
-            str(input_on_device), str(output_on_device), str(dtype)
-        )
-        logger.debug("CSR: %s" % current_config)
-        # Generate data and reference CSR
-        array = generate_sparse_random_data(shape=(512, 511), dtype=dtype)
-        ref_sparse = self.compute_ref_sparsification(array)
-        # De-sparsify on device
-        csr = CSR(array.shape, dtype=dtype, max_nnz=ref_sparse.nnz)
-        if input_on_device:
-            data = parray.to_device(csr.queue, ref_sparse.data)
-            indices = parray.to_device(csr.queue, ref_sparse.indices)
-            indptr = parray.to_device(csr.queue, ref_sparse.indptr)
-        else:
-            data = ref_sparse.data
-            indices = ref_sparse.indices
-            indptr = ref_sparse.indptr
-        if output_on_device:
-            d_arr = parray.empty_like(csr.array)
-            d_arr.fill(0)
-            output = d_arr
-        else:
-            output = None
-        arr = csr.densify(data, indices, indptr, output=output)
-        if output_on_device:
-            arr = arr.get()
-        # Compare
-        self.assertTrue(
-            np.allclose(arr.reshape(array.shape), array),
-            "something wrong with densified data (%s)"
-            % current_config
-        )
-
-
-
-def suite():
-    suite = unittest.TestSuite()
-    suite.addTest(
-        unittest.defaultTestLoader.loadTestsFromTestCase(TestCSR)
-    )
-    return suite
-
-
-if __name__ == '__main__':
-    unittest.main(defaultTest="suite")
-
-
diff --git a/silx/opencl/test/test_stats.py b/silx/opencl/test/test_stats.py
deleted file mode 100644
index 8baf05e..0000000
--- a/silx/opencl/test/test_stats.py
+++ /dev/null
@@ -1,116 +0,0 @@
-#!/usr/bin/env python
-# -*- coding: utf-8 -*-
-#
-#    Project: Sift implementation in Python + OpenCL
-#             https://github.com/silx-kit/silx
-#
-# Permission is hereby granted, free of charge, to any person
-# obtaining a copy of this software and associated documentation
-# files (the "Software"), to deal in the Software without
-# restriction, including without limitation the rights to use,
-# copy, modify, merge, publish, distribute, sublicense, and/or sell
-# copies of the Software, and to permit persons to whom the
-# Software is furnished to do so, subject to the following
-# conditions:
-#
-# The above copyright notice and this permission notice shall be
-# included in all copies or substantial portions of the Software.
-#
-# THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
-# EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES
-# OF MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
-# NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT
-# HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY,
-# WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
-# FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR
-# OTHER DEALINGS IN THE SOFTWARE.
-
-"""
-Simple test of an addition
-"""
-__authors__ = ["Henri Payno, Jérôme Kieffer"]
-__contact__ = "jerome.kieffer@esrf.eu"
-__license__ = "MIT"
-__copyright__ = "2013 European Synchrotron Radiation Facility, Grenoble, France"
-__date__ = "19/05/2021"
-
-import logging
-import time
-import numpy
-
-import unittest
-from ..common import ocl
-if ocl:
-    import pyopencl
-    import pyopencl.array
-    from ..statistics import StatResults, Statistics
-from ..utils import get_opencl_code
-logger = logging.getLogger(__name__)
-
-
-@unittest.skipUnless(ocl, "PyOpenCl is missing")
-class TestStatistics(unittest.TestCase):
-
-    @classmethod
-    def setUpClass(cls):
-        cls.size = 1 << 20  # 1 million elements
-        cls.data = numpy.random.randint(0, 65000, cls.size).astype("uint16")
-        fdata = cls.data.astype("float64")
-        t0 = time.perf_counter()
-        std = fdata.std()
-        cls.ref = StatResults(fdata.min(), fdata.max(), float(fdata.size),
-                              fdata.sum(), fdata.mean(), std ** 2,
-                              std)
-        t1 = time.perf_counter()
-        cls.ref_time = t1 - t0
-
-    @classmethod
-    def tearDownClass(cls):
-        cls.size = cls.ref = cls.data = cls.ref_time = None
-
-    @classmethod
-    def validate(cls, res):
-        return (
-            (res.min == cls.ref.min) and
-            (res.max == cls.ref.max) and
-            (res.cnt == cls.ref.cnt) and
-            abs(res.mean - cls.ref.mean) < 0.01 and
-            abs(res.std - cls.ref.std) < 0.1)
-
-    def test_measurement(self):
-        """
-        tests that all devices are working properly ...
-        """
-        logger.info("Reference results: %s", self.ref)
-        for pid, platform in enumerate(ocl.platforms):
-            for did, device in enumerate(platform.devices):
-                try:
-                    s = Statistics(template=self.data, platformid=pid, deviceid=did)
-                except Exception as err:
-                    failed_init = True
-                    res = StatResults(0, 0, 0, 0, 0, 0, 0)
-                    print(err)
-                else:
-                    failed_init = False
-                    for comp in ("single", "double", "comp"):
-                        t0 = time.perf_counter()
-                        res = s(self.data, comp=comp)
-                        t1 = time.perf_counter()
-                        logger.info("Runtime on %s/%s : %.3fms x%.1f", platform, device, 1000 * (t1 - t0), self.ref_time / (t1 - t0))
-
-                        if failed_init or not self.validate(res):
-                            logger.error("failed_init %s; Computation modes %s", failed_init, comp)
-                            logger.error("Failed on platform %s device %s", platform, device)
-                            logger.error("Reference results: %s", self.ref)
-                            logger.error("Faulty results: %s", res)
-                            self.assertTrue(False, f"Stat calculation failed on {platform},{device}  in mode {comp}")
-
-
-def suite():
-    testSuite = unittest.TestSuite()
-    testSuite.addTest(TestStatistics("test_measurement"))
-    return testSuite
-
-
-if __name__ == '__main__':
-    unittest.main(defaultTest="suite")
diff --git a/silx/opencl/utils.py b/silx/opencl/utils.py
deleted file mode 100644
index 575e018..0000000
--- a/silx/opencl/utils.py
+++ /dev/null
@@ -1,214 +0,0 @@
-# -*- coding: utf-8 -*-
-# /*##########################################################################
-# Copyright (C) 2017 European Synchrotron Radiation Facility
-#
-# Permission is hereby granted, free of charge, to any person obtaining a copy
-# of this software and associated documentation files (the "Software"), to deal
-# in the Software without restriction, including without limitation the rights
-# to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
-# copies of the Software, and to permit persons to whom the Software is
-# furnished to do so, subject to the following conditions:
-#
-# The above copyright notice and this permission notice shall be included in
-# all copies or substantial portions of the Software.
-#
-# THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
-# IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
-# FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
-# AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
-# LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
-# OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN
-# THE SOFTWARE.
-#
-# ############################################################################*/
-"""
-Project: Sift implementation in Python + OpenCL
-         https://github.com/silx-kit/silx
-"""
-
-from __future__ import division
-
-__authors__ = ["Jérôme Kieffer", "Pierre Paleo"]
-__contact__ = "jerome.kieffer@esrf.eu"
-__license__ = "MIT"
-__copyright__ = "European Synchrotron Radiation Facility, Grenoble, France"
-__date__ = "06/09/2017"
-__status__ = "Production"
-
-import os
-import numpy
-from .. import resources
-from math import log, ceil
-
-
-def calc_size(shape, blocksize):
-    """
-    Calculate the optimal size for a kernel according to the workgroup size
-    """
-    if "__len__" in dir(blocksize):
-        return tuple((int(i) + int(j) - 1) & ~(int(j) - 1) for i, j in zip(shape, blocksize))
-    else:
-        return tuple((int(i) + int(blocksize) - 1) & ~(int(blocksize) - 1) for i in shape)
-
-
-def nextpower(n):
-    """Calculate the power of two
-
-    :param n: an integer, for example 100
-    :return: another integer, 100-> 128
-    """
-    return 1 << int(ceil(log(n, 2)))
-
-
-def sizeof(shape, dtype="uint8"):
-    """
-    Calculate the number of bytes needed to allocate for a given structure
-
-    :param shape: size or tuple of sizes
-    :param dtype: data type
-    """
-    itemsize = numpy.dtype(dtype).itemsize
-    cnt = 1
-    if "__len__" in dir(shape):
-        for dim in shape:
-            cnt *= dim
-    else:
-        cnt = int(shape)
-    return cnt * itemsize
-
-
-def get_cl_file(resource):
-    """get the full path of a openCL resource file
-
-    The resource name can be prefixed by the name of a resource directory. For
-    example "silx:foo.png" identify the resource "foo.png" from the resource
-    directory "silx".
-    See also :func:`silx.resources.register_resource_directory`.
-
-    :param str resource: Resource name. File name contained if the `opencl`
-        directory of the resources.
-    :return: the full path of the openCL source file
-    """
-    if not resource.endswith(".cl"):
-        resource += ".cl"
-    return resources._resource_filename(resource,
-                                        default_directory="opencl")
-
-
-def read_cl_file(filename):
-    """
-    :param filename: read an OpenCL file and apply a preprocessor
-    :return: preprocessed source code
-    """
-    with open(get_cl_file(filename), "r") as f:
-        # Dummy preprocessor which removes the #include
-        lines = [i for i in f.readlines() if not i.startswith("#include ")]
-    return "".join(lines)
-
-
-get_opencl_code = read_cl_file
-
-
-def concatenate_cl_kernel(filenames):
-    """Concatenates all the kernel from the list of files
-
-    :param filenames: filenames containing the kernels
-    :type filenames: list of str which can be filename of kernel as a string.
-    :return: a string with all kernels concatenated
-
-    this method concatenates all the kernel from the list
-    """
-    return os.linesep.join(read_cl_file(fn) for fn in filenames)
-
-
-
-
-class ConvolutionInfos(object):
-    allowed_axes = {
-        "1D": [None],
-        "separable_2D_1D_2D": [None, (0, 1), (1, 0)],
-        "batched_1D_2D": [(0,), (1,)],
-        "separable_3D_1D_3D": [
-            None,
-            (0, 1, 2),
-            (1, 2, 0),
-            (2, 0, 1),
-            (2, 1, 0),
-            (1, 0, 2),
-            (0, 2, 1)
-        ],
-        "batched_1D_3D": [(0,), (1,), (2,)],
-        "batched_separable_2D_1D_3D": [(0,), (1,), (2,)], # unsupported (?)
-        "2D": [None],
-        "batched_2D_3D": [(0,), (1,), (2,)],
-        "separable_3D_2D_3D": [
-            (1, 0),
-            (0, 1),
-            (2, 0),
-            (0, 2),
-            (1, 2),
-            (2, 1),
-        ],
-        "3D": [None],
-    }
-    use_cases = {
-        (1, 1): {
-            "1D": {
-                "name": "1D convolution on 1D data",
-                "kernels": ["convol_1D_X"],
-            },
-        },
-        (2, 2): {
-            "2D": {
-                "name": "2D convolution on 2D data",
-                "kernels": ["convol_2D_XY"],
-            },
-        },
-        (3, 3): {
-            "3D": {
-                "name": "3D convolution on 3D data",
-                "kernels": ["convol_3D_XYZ"],
-            },
-        },
-        (2, 1): {
-            "separable_2D_1D_2D": {
-                "name": "Separable (2D->1D) convolution on 2D data",
-                "kernels": ["convol_1D_X", "convol_1D_Y"],
-            },
-            "batched_1D_2D": {
-                "name": "Batched 1D convolution on 2D data",
-                "kernels": ["convol_1D_X", "convol_1D_Y"],
-            },
-        },
-        (3, 1): {
-            "separable_3D_1D_3D": {
-                "name": "Separable (3D->1D) convolution on 3D data",
-                "kernels": ["convol_1D_X", "convol_1D_Y", "convol_1D_Z"],
-            },
-            "batched_1D_3D": {
-                "name": "Batched 1D convolution on 3D data",
-                "kernels": ["convol_1D_X", "convol_1D_Y", "convol_1D_Z"],
-            },
-            "batched_separable_2D_1D_3D": {
-                "name": "Batched separable (2D->1D) convolution on 3D data",
-                "kernels": ["convol_1D_X", "convol_1D_Y", "convol_1D_Z"],
-            },
-        },
-        (3, 2): {
-            "separable_3D_2D_3D": {
-                "name": "Separable (3D->2D) convolution on 3D data",
-                "kernels": ["convol_2D_XY", "convol_2D_XZ", "convol_2D_YZ"],
-            },
-            "batched_2D_3D": {
-                "name": "Batched 2D convolution on 3D data",
-                "kernels": ["convol_2D_XY", "convol_2D_XZ", "convol_2D_YZ"],
-            },
-        },
-    }
-
-
-
-
-
-
-