summaryrefslogtreecommitdiff
path: root/silx/opencl
diff options
context:
space:
mode:
Diffstat (limited to 'silx/opencl')
-rw-r--r--silx/opencl/backprojection.py33
-rw-r--r--silx/opencl/common.py90
-rw-r--r--silx/opencl/convolution.py11
-rw-r--r--silx/opencl/processing.py54
-rw-r--r--silx/opencl/projection.py33
-rw-r--r--silx/opencl/test/test_addition.py28
-rw-r--r--silx/opencl/test/test_backprojection.py3
-rw-r--r--silx/opencl/test/test_convolution.py99
8 files changed, 211 insertions, 140 deletions
diff --git a/silx/opencl/backprojection.py b/silx/opencl/backprojection.py
index 5a4087b..65a9836 100644
--- a/silx/opencl/backprojection.py
+++ b/silx/opencl/backprojection.py
@@ -164,9 +164,7 @@ class Backprojection(OpenclProcessing):
def _allocate_memory(self):
# Host memory
self.slice = np.zeros(self.dimrec_shape, dtype=np.float32)
- self.is_cpu = False
- if self.device.type == "CPU":
- self.is_cpu = True
+ self._use_textures = self.check_textures_availability()
# Device memory
self.buffers = [
@@ -180,7 +178,7 @@ class Backprojection(OpenclProcessing):
self.d_sino = self.cl_mem["d_sino"] # shorthand
# Texture memory (if relevant)
- if not(self.is_cpu):
+ if self._use_textures:
self._allocate_textures()
# Local memory
@@ -199,7 +197,14 @@ class Backprojection(OpenclProcessing):
self.cl_mem["d_axes"][:] = np.ones(self.num_projs, dtype="f") * self.axis_pos
def _init_kernels(self):
- OpenclProcessing.compile_kernels(self, self.kernel_files)
+ compile_options = None
+ if not(self._use_textures):
+ compile_options = "-DDONT_USE_TEXTURES"
+ OpenclProcessing.compile_kernels(
+ self,
+ self.kernel_files,
+ compile_options=compile_options
+ )
# check that workgroup can actually be (16, 16)
self.compiletime_workgroup_size = self.kernels.max_workgroup_size("backproj_cpu_kernel")
# Workgroup and ndrange sizes are always the same
@@ -209,7 +214,7 @@ class Backprojection(OpenclProcessing):
_idivup(int(self.dimrec_shape[0]), 32) * self.wg[1]
)
# Prepare arguments for the kernel call
- if self.is_cpu:
+ if not(self._use_textures):
d_sino_ref = self.d_sino.data
else:
d_sino_ref = self.d_sino_tex
@@ -242,15 +247,7 @@ class Backprojection(OpenclProcessing):
"""
Allocate the texture for the sinogram.
"""
- self.d_sino_tex = pyopencl.Image(
- self.ctx,
- mf.READ_ONLY | mf.USE_HOST_PTR,
- pyopencl.ImageFormat(
- pyopencl.channel_order.INTENSITY,
- pyopencl.channel_type.FLOAT
- ),
- hostbuf=np.zeros(self.shape[::-1], dtype=np.float32)
- )
+ self.d_sino_tex = self.allocate_texture(self.shape)
def _init_filter(self, filter_name):
"""Filter initialization
@@ -289,7 +286,7 @@ class Backprojection(OpenclProcessing):
sino2 = sino
if not(sino.flags["C_CONTIGUOUS"] and sino.dtype == np.float32):
sino2 = np.ascontiguousarray(sino, dtype=np.float32)
- if self.is_cpu:
+ if not(self._use_textures):
ev = pyopencl.enqueue_copy(
self.queue,
self.d_sino.data,
@@ -309,7 +306,7 @@ class Backprojection(OpenclProcessing):
return EventDescription(what, ev)
def _transfer_device_to_texture(self, d_sino):
- if self.is_cpu:
+ if not(self._use_textures):
if id(self.d_sino) == id(d_sino):
return
ev = pyopencl.enqueue_copy(
@@ -343,7 +340,7 @@ class Backprojection(OpenclProcessing):
with self.sem:
events.append(self._transfer_to_texture(sino))
# Call the backprojection kernel
- if self.is_cpu:
+ if not(self._use_textures):
kernel_to_call = self.kernels.backproj_cpu_kernel
else:
kernel_to_call = self.kernels.backproj_kernel
diff --git a/silx/opencl/common.py b/silx/opencl/common.py
index 110d941..002c15d 100644
--- a/silx/opencl/common.py
+++ b/silx/opencl/common.py
@@ -34,7 +34,7 @@ __author__ = "Jerome Kieffer"
__contact__ = "Jerome.Kieffer@ESRF.eu"
__license__ = "MIT"
__copyright__ = "2012-2017 European Synchrotron Radiation Facility, Grenoble, France"
-__date__ = "28/11/2019"
+__date__ = "30/11/2020"
__status__ = "stable"
__all__ = ["ocl", "pyopencl", "mf", "release_cl_buffers", "allocate_cl_buffers",
"measure_workgroup_size", "kernel_workgroup_size"]
@@ -46,10 +46,8 @@ import numpy
from .utils import get_opencl_code
-
logger = logging.getLogger(__name__)
-
if os.environ.get("SILX_OPENCL") in ["0", "False"]:
logger.info("Use of OpenCL has been disabled from environment variable: SILX_OPENCL=0")
pyopencl = None
@@ -70,13 +68,13 @@ else:
mf = pyopencl.mem_flags
if pyopencl is None:
+
# Define default mem flags
class mf(object):
WRITE_ONLY = 1
READ_ONLY = 1
READ_WRITE = 1
-
FLOP_PER_CORE = {"GPU": 64, # GPU, Fermi at least perform 64 flops per cycle/multicore, G80 were at 24 or 48 ...
"CPU": 4, # CPU, at least intel's have 4 operation per cycle
"ACC": 8} # ACC: the Xeon-phi (MIC) appears to be able to process 8 Flops per hyperthreaded-core
@@ -108,6 +106,7 @@ class Device(object):
"""
Simple class that contains the structure of an OpenCL device
"""
+
def __init__(self, name="None", dtype=None, version=None, driver_version=None,
extensions="", memory=None, available=None,
cores=None, frequency=None, flop_core=None, idx=0, workgroup=1):
@@ -174,6 +173,7 @@ class Platform(object):
"""
Simple class that contains the structure of an OpenCL platform
"""
+
def __init__(self, name="None", vendor="None", version=None, extensions=None, idx=0):
"""
Class containing all descriptions of a platform and all devices description within that platform.
@@ -225,6 +225,8 @@ class Platform(object):
def _measure_workgroup_size(device_or_context, fast=False):
"""Mesure the maximal work group size of the given device
+ DEPRECATED since not perfectly correct !
+
:param device_or_context: instance of pyopencl.Device or pyopencl.Context
or 2-tuple (platformid,deviceid)
:param fast: ask the kernel the valid value, don't probe it
@@ -318,7 +320,7 @@ class OpenCL(object):
####################################################
extensions = device.extensions
if (pypl.vendor == "NVIDIA Corporation") and ('cl_khr_fp64' in extensions):
- extensions += ' cl_khr_int64_base_atomics cl_khr_int64_extended_atomics'
+ extensions += ' cl_khr_int64_base_atomics cl_khr_int64_extended_atomics'
try:
devtype = pyopencl.device_type.to_string(device.type).upper()
except ValueError:
@@ -573,6 +575,53 @@ def allocate_cl_buffers(buffers, device=None, context=None):
return mem
+def allocate_texture(ctx, shape, hostbuf=None, support_1D=False):
+ """
+ Allocate an OpenCL image ("texture").
+
+ :param ctx: OpenCL context
+ :param shape: Shape of the image. Note that pyopencl and OpenCL < 1.2
+ do not support 1D images, so 1D images are handled as 2D with one row
+ :param support_1D: force the image to be 1D if the shape has only one dim
+ """
+ if len(shape) == 1 and not(support_1D):
+ shape = (1,) + shape
+ return pyopencl.Image(
+ ctx,
+ pyopencl.mem_flags.READ_ONLY | pyopencl.mem_flags.USE_HOST_PTR,
+ pyopencl.ImageFormat(
+ pyopencl.channel_order.INTENSITY,
+ pyopencl.channel_type.FLOAT
+ ),
+ hostbuf=numpy.zeros(shape[::-1], dtype=numpy.float32)
+ )
+
+
+def check_textures_availability(ctx):
+ """
+ Check whether textures are supported on the current OpenCL context.
+
+ :param ctx: OpenCL context
+ """
+ try:
+ dummy_texture = allocate_texture(ctx, (16, 16))
+ # Need to further access some attributes (pocl)
+ dummy_height = dummy_texture.height
+ textures_available = True
+ del dummy_texture, dummy_height
+ except (pyopencl.RuntimeError, pyopencl.LogicError):
+ textures_available = False
+ # Nvidia Fermi GPUs (compute capability 2.X) do not support opencl read_imagef
+ # There is no way to detect this until a kernel is compiled
+ try:
+ cc = ctx.devices[0].compute_capability_major_nv
+ textures_available &= (cc >= 3)
+ except (pyopencl.LogicError, AttributeError): # probably not a Nvidia GPU
+ pass
+ #
+ return textures_available
+
+
def measure_workgroup_size(device):
"""Measure the actual size of the workgroup
@@ -599,12 +648,25 @@ def measure_workgroup_size(device):
return res
-def kernel_workgroup_size(program, kernel):
- """Extract the compile time maximum workgroup size
+def query_kernel_info(program, kernel, what="WORK_GROUP_SIZE"):
+ """Extract the compile time information from a kernel
:param program: OpenCL program
:param kernel: kernel or name of the kernel
- :return: the maximum acceptable workgroup size for the given kernel
+ :param what: what is the query about ?
+ :return: int or 3-int for the workgroup size.
+
+ Possible information available are:
+ * 'COMPILE_WORK_GROUP_SIZE': Returns the work-group size specified inside the kernel (__attribute__((reqd_work_gr oup_size(X, Y, Z))))
+ * 'GLOBAL_WORK_SIZE': maximum global size that can be used to execute a kernel #OCL2.1!
+ * 'LOCAL_MEM_SIZE': amount of local memory in bytes being used by the kernel
+ * 'PREFERRED_WORK_GROUP_SIZE_MULTIPLE': preferred multiple of workgroup size for launch. This is a performance hint.
+ * 'PRIVATE_MEM_SIZE' Returns the minimum amount of private memory, in bytes, used by each workitem in the kernel
+ * 'WORK_GROUP_SIZE': maximum work-group size that can be used to execute a kernel on a specific device given by device
+
+ Further information on:
+ https://www.khronos.org/registry/OpenCL/sdk/1.1/docs/man/xhtml/clGetKernelWorkGroupInfo.html
+
"""
assert isinstance(program, pyopencl.Program)
if not isinstance(kernel, pyopencl.Kernel):
@@ -613,5 +675,15 @@ def kernel_workgroup_size(program, kernel):
kernel = program.__getattr__(kernel_name)
device = program.devices[0]
- query_wg = pyopencl.kernel_work_group_info.WORK_GROUP_SIZE
+ query_wg = getattr(pyopencl.kernel_work_group_info, what)
return kernel.get_work_group_info(query_wg, device)
+
+
+def kernel_workgroup_size(program, kernel):
+ """Extract the compile time maximum workgroup size
+
+ :param program: OpenCL program
+ :param kernel: kernel or name of the kernel
+ :return: the maximum acceptable workgroup size for the given kernel
+ """
+ return query_kernel_info(program, kernel, what="WORK_GROUP_SIZE")
diff --git a/silx/opencl/convolution.py b/silx/opencl/convolution.py
index 138b985..15ef931 100644
--- a/silx/opencl/convolution.py
+++ b/silx/opencl/convolution.py
@@ -91,17 +91,8 @@ class Convolution(OpenclProcessing):
}
extra_opts = extra_options or {}
self.extra_options.update(extra_opts)
- self.is_cpu = (self.device.type == "CPU")
self.use_textures = not(self.extra_options["dont_use_textures"])
- self.use_textures *= not(self.is_cpu)
- # Nvidia Fermi GPUs (compute capability 2.X) do not support opencl read_imagef
- try:
- cc = self.ctx.devices[0].compute_capability_major_nv
- self.use_textures *= (cc >= 3)
- except cl.LogicError: # probably not a Nvidia GPU
- pass
- except AttributeError: # probably not a Nvidia GPU
- pass
+ self.use_textures &= self.check_textures_availability()
def _get_dimensions(self, shape, kernel):
self.shape = shape
diff --git a/silx/opencl/processing.py b/silx/opencl/processing.py
index 6b475b9..470b141 100644
--- a/silx/opencl/processing.py
+++ b/silx/opencl/processing.py
@@ -36,26 +36,23 @@ Common OpenCL abstract base classe for different processing
from __future__ import absolute_import, print_function, division
-
__author__ = "Jerome Kieffer"
__contact__ = "Jerome.Kieffer@ESRF.eu"
__license__ = "MIT"
__copyright__ = "European Synchrotron Radiation Facility, Grenoble, France"
-__date__ = "05/08/2019"
+__date__ = "04/12/2020"
__status__ = "stable"
-
import os
import logging
import gc
from collections import namedtuple
import numpy
import threading
-from .common import ocl, pyopencl, release_cl_buffers, kernel_workgroup_size
+from .common import ocl, pyopencl, release_cl_buffers, query_kernel_info, allocate_texture, check_textures_availability
from .utils import concatenate_cl_kernel
import platform
-
BufferDescription = namedtuple("BufferDescription", ["name", "size", "dtype", "flags"])
EventDescription = namedtuple("EventDescription", ["name", "event"])
@@ -85,13 +82,22 @@ class KernelContainer(object):
return self.__dict__.get(name)
def max_workgroup_size(self, kernel_name):
- "Retrieve the compile time max_workgroup_size for a given kernel"
+ "Retrieve the compile time WORK_GROUP_SIZE for a given kernel"
if isinstance(kernel_name, pyopencl.Kernel):
kernel = kernel_name
else:
kernel = self.get_kernel(kernel_name)
- return kernel_workgroup_size(self._program, kernel)
+ return query_kernel_info(self._program, kernel, "WORK_GROUP_SIZE")
+
+ def min_workgroup_size(self, kernel_name):
+ "Retrieve the compile time PREFERRED_WORK_GROUP_SIZE_MULTIPLE for a given kernel"
+ if isinstance(kernel_name, pyopencl.Kernel):
+ kernel = kernel_name
+ else:
+ kernel = self.get_kernel(kernel_name)
+
+ return query_kernel_info(self._program, kernel, "PREFERRED_WORK_GROUP_SIZE_MULTIPLE")
class OpenclProcessing(object):
@@ -149,6 +155,9 @@ class OpenclProcessing(object):
self.program = None
self.kernels = None
+ def check_textures_availability(self):
+ return check_textures_availability(self.ctx)
+
def __del__(self):
"""Destructor: release all buffers and programs
"""
@@ -156,8 +165,10 @@ class OpenclProcessing(object):
self.reset_log()
self.free_kernels()
self.free_buffers()
- except Exception:
- pass
+ if self.queue is not None:
+ self.queue.finish()
+ except Exception as err:
+ logger.warning("%s: %s", type(err), err)
self.queue = None
self.device = None
self.ctx = None
@@ -287,6 +298,8 @@ class OpenclProcessing(object):
if bool(value) != self.profile:
with self.sem:
self.profile = bool(value)
+ if self.queue is not None:
+ self.queue.finish()
if self.profile:
self.queue = pyopencl.CommandQueue(self.ctx,
properties=pyopencl.command_queue_properties.PROFILING_ENABLE)
@@ -304,24 +317,7 @@ class OpenclProcessing(object):
self.events.append(EventDescription(desc, event))
def allocate_texture(self, shape, hostbuf=None, support_1D=False):
- """
- Allocate an OpenCL image ("texture").
-
- :param shape: Shape of the image. Note that pyopencl and OpenCL < 1.2
- do not support 1D images, so 1D images are handled as 2D with one row
- :param support_1D: force the image to be 1D if the shape has only one dim
- """
- if len(shape) == 1 and not(support_1D):
- shape = (1,) + shape
- return pyopencl.Image(
- self.ctx,
- pyopencl.mem_flags.READ_ONLY | pyopencl.mem_flags.USE_HOST_PTR,
- pyopencl.ImageFormat(
- pyopencl.channel_order.INTENSITY,
- pyopencl.channel_type.FLOAT
- ),
- hostbuf=numpy.zeros(shape[::-1], dtype=numpy.float32)
- )
+ return allocate_texture(self.ctx, shape, hostbuf=hostbuf, support_1D=support_1D)
def transfer_to_texture(self, arr, tex_ref):
"""
@@ -336,10 +332,10 @@ class OpenclProcessing(object):
if ndim == 1:
# pyopencl and OpenCL < 1.2 do not support image1d_t
# force 2D with one row in this case
- #~ ndim = 2
+ # ~ ndim = 2
shp = (1,) + shp
copy_kwargs = {"origin":(0,) * ndim, "region": shp[::-1]}
- if not(isinstance(arr, numpy.ndarray)): # assuming pyopencl.array.Array
+ if not(isinstance(arr, numpy.ndarray)): # assuming pyopencl.array.Array
# D->D copy
copy_args[2] = arr.data
copy_kwargs["offset"] = 0
diff --git a/silx/opencl/projection.py b/silx/opencl/projection.py
index da8752f..c02faf6 100644
--- a/silx/opencl/projection.py
+++ b/silx/opencl/projection.py
@@ -2,7 +2,7 @@
# coding: utf-8
# /*##########################################################################
#
-# Copyright (c) 2016 European Synchrotron Radiation Facility
+# Copyright (c) 2016-2020 European Synchrotron Radiation Facility
#
# Permission is hereby granted, free of charge, to any person obtaining a copy
# of this software and associated documentation files (the "Software"), to deal
@@ -115,7 +115,7 @@ class Projection(OpenclProcessing):
self.offset_x = -np.float32((self.shape[1] - 1) / 2. - self.axis_pos) # TODO: custom
self.offset_y = -np.float32((self.shape[0] - 1) / 2. - self.axis_pos) # TODO: custom
# Reset axis_pos once offset are computed
- self.axis_pos0 = np.float((self.shape[1] - 1) / 2.)
+ self.axis_pos0 = np.float64((self.shape[1] - 1) / 2.)
# Workgroup, ndrange and shared size
self.dimgrid_x = _idivup(self.dwidth, 16)
@@ -129,9 +129,7 @@ class Projection(OpenclProcessing):
int(self.dimgrid_y) * self.wg[1] # int(): pyopencl <= 2015.1
)
- self.is_cpu = False
- if self.device.type == "CPU":
- self.is_cpu = True
+ self._use_textures = self.check_textures_availability()
# Allocate memory
self.buffers = [
@@ -150,14 +148,14 @@ class Projection(OpenclProcessing):
)
self._tmp_extended_img = np.zeros((self.shape[0] + 2, self.shape[1] + 2),
dtype=np.float32)
- if self.is_cpu:
+ if not(self._use_textures):
self.allocate_slice()
else:
self.allocate_textures()
self.allocate_buffers()
self._ex_sino = np.zeros((self._dimrecy, self._dimrecx),
dtype=np.float32)
- if self.is_cpu:
+ if not(self._use_textures):
self.cl_mem["d_slice"].fill(0.)
# enqueue_fill_buffer has issues if opencl 1.2 is not present
# ~ pyopencl.enqueue_fill_buffer(
@@ -182,7 +180,14 @@ class Projection(OpenclProcessing):
# Shorthands
self._d_sino = self.cl_mem["_d_sino"]
- OpenclProcessing.compile_kernels(self, self.kernel_files)
+ compile_options = None
+ if not(self._use_textures):
+ compile_options = "-DDONT_USE_TEXTURES"
+ OpenclProcessing.compile_kernels(
+ self,
+ self.kernel_files,
+ compile_options=compile_options
+ )
# check that workgroup can actually be (16, 16)
self.compiletime_workgroup_size = self.kernels.max_workgroup_size("forward_kernel_cpu")
@@ -194,7 +199,7 @@ class Projection(OpenclProcessing):
pyopencl.enqueue_copy(self.queue, self.cl_mem["d_angles"], angles2)
def allocate_slice(self):
- ary = parray.zeros(self.queue, (self.shape[1] + 2, self.shape[1] + 2), np.float32)
+ ary = parray.empty(self.queue, (self.shape[1] + 2, self.shape[1] + 2), np.float32)
ary.fill(0)
self.add_to_cl_mem({"d_slice": ary})
@@ -212,7 +217,7 @@ class Projection(OpenclProcessing):
image2 = image
if not(image.flags["C_CONTIGUOUS"] and image.dtype == np.float32):
image2 = np.ascontiguousarray(image)
- if self.is_cpu:
+ if not(self._use_textures):
# TODO: create NoneEvent
return self.transfer_to_slice(image2)
# ~ return pyopencl.enqueue_copy(
@@ -232,7 +237,7 @@ class Projection(OpenclProcessing):
)
def transfer_device_to_texture(self, d_image):
- if self.is_cpu:
+ if not(self._use_textures):
# TODO this copy should not be necessary
return self.cpy2d_to_slice(d_image)
else:
@@ -355,14 +360,14 @@ class Projection(OpenclProcessing):
assert image.ndim == 2, "Treat only 2D images"
assert image.shape[0] == self.shape[0], "image shape is OK"
assert image.shape[1] == self.shape[1], "image shape is OK"
- if not(self.is_cpu):
+ if self._use_textures:
self.transfer_to_texture(image)
slice_ref = self.d_image_tex
else:
self.transfer_to_slice(image)
slice_ref = self.cl_mem["d_slice"].data
else:
- if self.is_cpu:
+ if not(self._use_textures):
slice_ref = self.cl_mem["d_slice"].data
else:
slice_ref = self.d_image_tex
@@ -388,7 +393,7 @@ class Projection(OpenclProcessing):
)
# Call the kernel
- if self.is_cpu:
+ if not(self._use_textures):
event_pj = self.kernels.forward_kernel_cpu(
self.queue,
self.ndrange,
diff --git a/silx/opencl/test/test_addition.py b/silx/opencl/test/test_addition.py
index 49cc0b4..19dfdf0 100644
--- a/silx/opencl/test/test_addition.py
+++ b/silx/opencl/test/test_addition.py
@@ -29,19 +29,17 @@
Simple test of an addition
"""
-from __future__ import division, print_function
-
__authors__ = ["Henri Payno, Jérôme Kieffer"]
__contact__ = "jerome.kieffer@esrf.eu"
__license__ = "MIT"
__copyright__ = "2013 European Synchrotron Radiation Facility, Grenoble, France"
-__date__ = "01/08/2019"
+__date__ = "30/11/2020"
import logging
import numpy
import unittest
-from ..common import ocl, _measure_workgroup_size
+from ..common import ocl, _measure_workgroup_size, query_kernel_info
if ocl:
import pyopencl
import pyopencl.array
@@ -116,7 +114,7 @@ class TestAddition(unittest.TestCase):
@unittest.skipUnless(ocl, "pyopencl is missing")
def test_measurement(self):
"""
- tests that all devices are working properly ...
+ tests that all devices are working properly ... lengthy and error prone
"""
for platform in ocl.platforms:
for did, device in enumerate(platform.devices):
@@ -124,11 +122,31 @@ class TestAddition(unittest.TestCase):
self.assertEqual(meas, device.max_work_group_size,
"Workgroup size for %s/%s: %s == %s" % (platform, device, meas, device.max_work_group_size))
+ @unittest.skipUnless(ocl, "pyopencl is missing")
+ def test_query(self):
+ """
+ tests that all devices are working properly ... lengthy and error prone
+ """
+ for what in ("COMPILE_WORK_GROUP_SIZE",
+ "LOCAL_MEM_SIZE",
+ "PREFERRED_WORK_GROUP_SIZE_MULTIPLE",
+ "PRIVATE_MEM_SIZE",
+ "WORK_GROUP_SIZE"):
+ logger.info("%s: %s", what, query_kernel_info(program=self.program, kernel="addition", what=what))
+
+ # Not all ICD work properly ....
+ #self.assertEqual(3, len(query_kernel_info(program=self.program, kernel="addition", what="COMPILE_WORK_GROUP_SIZE")), "3D kernel")
+
+ min_wg = query_kernel_info(program=self.program, kernel="addition", what="PREFERRED_WORK_GROUP_SIZE_MULTIPLE")
+ max_wg = query_kernel_info(program=self.program, kernel="addition", what="WORK_GROUP_SIZE")
+ self.assertEqual(max_wg % min_wg, 0, msg="max_wg is a multiple of min_wg")
+
def suite():
testSuite = unittest.TestSuite()
testSuite.addTest(TestAddition("test_add"))
# testSuite.addTest(TestAddition("test_measurement"))
+ testSuite.addTest(TestAddition("test_query"))
return testSuite
diff --git a/silx/opencl/test/test_backprojection.py b/silx/opencl/test/test_backprojection.py
index b2f2070..9dfdd3a 100644
--- a/silx/opencl/test/test_backprojection.py
+++ b/silx/opencl/test/test_backprojection.py
@@ -96,8 +96,9 @@ class TestFBP(unittest.TestCase):
# Therefore, we cannot expect results to be the "same" (up to float32
# numerical error)
self.tol = 5e-2
- if self.fbp.is_cpu:
+ if not(self.fbp._use_textures) or self.fbp.device.type == "CPU":
# Precision is less when using CPU
+ # (either CPU textures or "manual" linear interpolation)
self.tol *= 2
def tearDown(self):
diff --git a/silx/opencl/test/test_convolution.py b/silx/opencl/test/test_convolution.py
index 27cb8a9..7bceb0d 100644
--- a/silx/opencl/test/test_convolution.py
+++ b/silx/opencl/test/test_convolution.py
@@ -41,15 +41,18 @@ from itertools import product
import numpy as np
from silx.utils.testutils import parameterize
from silx.image.utils import gaussian_kernel
+
try:
from scipy.ndimage import convolve, convolve1d
from scipy.misc import ascent
+
scipy_convolve = convolve
scipy_convolve1d = convolve1d
except ImportError:
scipy_convolve = None
import unittest
-from ..common import ocl
+from ..common import ocl, check_textures_availability
+
if ocl:
import pyopencl as cl
import pyopencl.array as parray
@@ -59,7 +62,6 @@ logger = logging.getLogger(__name__)
@unittest.skipUnless(ocl and scipy_convolve, "PyOpenCl/scipy is missing")
class TestConvolution(unittest.TestCase):
-
@classmethod
def setUpClass(cls):
super(TestConvolution, cls).setUpClass()
@@ -67,7 +69,7 @@ class TestConvolution(unittest.TestCase):
cls.data1d = cls.image[0]
cls.data2d = cls.image
cls.data3d = np.tile(cls.image[224:-224, 224:-224], (62, 1, 1))
- cls.kernel1d = gaussian_kernel(1.)
+ cls.kernel1d = gaussian_kernel(1.0)
cls.kernel2d = np.outer(cls.kernel1d, cls.kernel1d)
cls.kernel3d = np.multiply.outer(cls.kernel2d, cls.kernel1d)
cls.ctx = ocl.create_context()
@@ -97,7 +99,7 @@ class TestConvolution(unittest.TestCase):
)
return errmsg
- def __init__(self, methodName='runTest', param=None):
+ def __init__(self, methodName="runTest", param=None):
unittest.TestCase.__init__(self, methodName)
self.param = param
self.mode = param["boundary_handling"]
@@ -107,32 +109,27 @@ class TestConvolution(unittest.TestCase):
use_textures=%s, input_device=%s, output_device=%s
"""
% (
- self.mode, param["use_textures"],
- param["input_on_device"], param["output_on_device"]
+ self.mode,
+ param["use_textures"],
+ param["input_on_device"],
+ param["output_on_device"],
)
)
def instantiate_convol(self, shape, kernel, axes=None):
- def is_fermi_device(dev):
- try:
- res = (dev.compute_capability_major_nv < 3)
- except cl.LogicError:
- res = False
- except AttributeError:
- res = False
- return res
- if (self.mode == "constant") and (
- not(self.param["use_textures"])
- or (self.ctx.devices[0].type == cl._cl.device_type.CPU)
- or (is_fermi_device(self.ctx.devices[0]))
+ if self.mode == "constant":
+ if not (self.param["use_textures"]) or (
+ self.param["use_textures"]
+ and not (check_textures_availability(self.ctx))
):
self.skipTest("mode=constant not implemented without textures")
C = Convolution(
- shape, kernel,
+ shape,
+ kernel,
mode=self.mode,
ctx=self.ctx,
axes=axes,
- extra_options={"dont_use_textures": not(self.param["use_textures"])}
+ extra_options={"dont_use_textures": not (self.param["use_textures"])},
)
return C
@@ -142,13 +139,9 @@ class TestConvolution(unittest.TestCase):
"test_separable_2D": (2, 1),
"test_separable_3D": (3, 1),
"test_nonseparable_2D": (2, 2),
- "test_nonseparable_3D": (3, 3),
- }
- dim_data = {
- 1: self.data1d,
- 2: self.data2d,
- 3: self.data3d
+ "test_nonseparable_3D": (3, 3),
}
+ dim_data = {1: self.data1d, 2: self.data2d, 3: self.data3d}
dim_kernel = {
1: self.kernel1d,
2: self.kernel2d,
@@ -159,24 +152,26 @@ class TestConvolution(unittest.TestCase):
def get_reference_function(self, test_name):
ref_func = {
- "test_1D":
- lambda x, y : scipy_convolve1d(x, y, mode=self.mode),
- "test_separable_2D":
- lambda x, y : scipy_convolve1d(
- scipy_convolve1d(x, y, mode=self.mode, axis=1),
- y, mode=self.mode, axis=0
- ),
- "test_separable_3D":
- lambda x, y: scipy_convolve1d(
- scipy_convolve1d(
- scipy_convolve1d(x, y, mode=self.mode, axis=2),
- y, mode=self.mode, axis=1),
- y, mode=self.mode, axis=0
+ "test_1D": lambda x, y: scipy_convolve1d(x, y, mode=self.mode),
+ "test_separable_2D": lambda x, y: scipy_convolve1d(
+ scipy_convolve1d(x, y, mode=self.mode, axis=1),
+ y,
+ mode=self.mode,
+ axis=0,
+ ),
+ "test_separable_3D": lambda x, y: scipy_convolve1d(
+ scipy_convolve1d(
+ scipy_convolve1d(x, y, mode=self.mode, axis=2),
+ y,
+ mode=self.mode,
+ axis=1,
),
- "test_nonseparable_2D":
- lambda x, y: scipy_convolve(x, y, mode=self.mode),
- "test_nonseparable_3D":
- lambda x, y : scipy_convolve(x, y, mode=self.mode),
+ y,
+ mode=self.mode,
+ axis=0,
+ ),
+ "test_nonseparable_2D": lambda x, y: scipy_convolve(x, y, mode=self.mode),
+ "test_nonseparable_3D": lambda x, y: scipy_convolve(x, y, mode=self.mode),
}
return ref_func[test_name]
@@ -226,8 +221,8 @@ class TestConvolution(unittest.TestCase):
data = self.data3d
kernel = self.kernel2d
conv = self.instantiate_convol(data.shape, kernel, axes=(0,))
- res = conv(data) # 3D
- ref = scipy_convolve(data[0], kernel, mode=self.mode) # 2D
+ res = conv(data) # 3D
+ ref = scipy_convolve(data[0], kernel, mode=self.mode) # 2D
std = np.std(res, axis=0)
std_max = np.max(np.abs(std))
@@ -244,12 +239,9 @@ def test_convolution():
output_on_device_ = [True, False]
testSuite = unittest.TestSuite()
- param_vals = list(product(
- boundary_handling_,
- use_textures_,
- input_on_device_,
- output_on_device_
- ))
+ param_vals = list(
+ product(boundary_handling_, use_textures_, input_on_device_, output_on_device_)
+ )
for boundary_handling, use_textures, input_dev, output_dev in param_vals:
testcase = parameterize(
TestConvolution,
@@ -258,17 +250,16 @@ def test_convolution():
"input_on_device": input_dev,
"output_on_device": output_dev,
"use_textures": use_textures,
- }
+ },
)
testSuite.addTest(testcase)
return testSuite
-
def suite():
testSuite = test_convolution()
return testSuite
-if __name__ == '__main__':
+if __name__ == "__main__":
unittest.main(defaultTest="suite")