diff options
Diffstat (limited to 'silx/opencl')
-rw-r--r-- | silx/opencl/backprojection.py | 33 | ||||
-rw-r--r-- | silx/opencl/common.py | 90 | ||||
-rw-r--r-- | silx/opencl/convolution.py | 11 | ||||
-rw-r--r-- | silx/opencl/processing.py | 54 | ||||
-rw-r--r-- | silx/opencl/projection.py | 33 | ||||
-rw-r--r-- | silx/opencl/test/test_addition.py | 28 | ||||
-rw-r--r-- | silx/opencl/test/test_backprojection.py | 3 | ||||
-rw-r--r-- | silx/opencl/test/test_convolution.py | 99 |
8 files changed, 211 insertions, 140 deletions
diff --git a/silx/opencl/backprojection.py b/silx/opencl/backprojection.py index 5a4087b..65a9836 100644 --- a/silx/opencl/backprojection.py +++ b/silx/opencl/backprojection.py @@ -164,9 +164,7 @@ class Backprojection(OpenclProcessing): def _allocate_memory(self): # Host memory self.slice = np.zeros(self.dimrec_shape, dtype=np.float32) - self.is_cpu = False - if self.device.type == "CPU": - self.is_cpu = True + self._use_textures = self.check_textures_availability() # Device memory self.buffers = [ @@ -180,7 +178,7 @@ class Backprojection(OpenclProcessing): self.d_sino = self.cl_mem["d_sino"] # shorthand # Texture memory (if relevant) - if not(self.is_cpu): + if self._use_textures: self._allocate_textures() # Local memory @@ -199,7 +197,14 @@ class Backprojection(OpenclProcessing): self.cl_mem["d_axes"][:] = np.ones(self.num_projs, dtype="f") * self.axis_pos def _init_kernels(self): - OpenclProcessing.compile_kernels(self, self.kernel_files) + compile_options = None + if not(self._use_textures): + compile_options = "-DDONT_USE_TEXTURES" + OpenclProcessing.compile_kernels( + self, + self.kernel_files, + compile_options=compile_options + ) # check that workgroup can actually be (16, 16) self.compiletime_workgroup_size = self.kernels.max_workgroup_size("backproj_cpu_kernel") # Workgroup and ndrange sizes are always the same @@ -209,7 +214,7 @@ class Backprojection(OpenclProcessing): _idivup(int(self.dimrec_shape[0]), 32) * self.wg[1] ) # Prepare arguments for the kernel call - if self.is_cpu: + if not(self._use_textures): d_sino_ref = self.d_sino.data else: d_sino_ref = self.d_sino_tex @@ -242,15 +247,7 @@ class Backprojection(OpenclProcessing): """ Allocate the texture for the sinogram. """ - self.d_sino_tex = pyopencl.Image( - self.ctx, - mf.READ_ONLY | mf.USE_HOST_PTR, - pyopencl.ImageFormat( - pyopencl.channel_order.INTENSITY, - pyopencl.channel_type.FLOAT - ), - hostbuf=np.zeros(self.shape[::-1], dtype=np.float32) - ) + self.d_sino_tex = self.allocate_texture(self.shape) def _init_filter(self, filter_name): """Filter initialization @@ -289,7 +286,7 @@ class Backprojection(OpenclProcessing): sino2 = sino if not(sino.flags["C_CONTIGUOUS"] and sino.dtype == np.float32): sino2 = np.ascontiguousarray(sino, dtype=np.float32) - if self.is_cpu: + if not(self._use_textures): ev = pyopencl.enqueue_copy( self.queue, self.d_sino.data, @@ -309,7 +306,7 @@ class Backprojection(OpenclProcessing): return EventDescription(what, ev) def _transfer_device_to_texture(self, d_sino): - if self.is_cpu: + if not(self._use_textures): if id(self.d_sino) == id(d_sino): return ev = pyopencl.enqueue_copy( @@ -343,7 +340,7 @@ class Backprojection(OpenclProcessing): with self.sem: events.append(self._transfer_to_texture(sino)) # Call the backprojection kernel - if self.is_cpu: + if not(self._use_textures): kernel_to_call = self.kernels.backproj_cpu_kernel else: kernel_to_call = self.kernels.backproj_kernel diff --git a/silx/opencl/common.py b/silx/opencl/common.py index 110d941..002c15d 100644 --- a/silx/opencl/common.py +++ b/silx/opencl/common.py @@ -34,7 +34,7 @@ __author__ = "Jerome Kieffer" __contact__ = "Jerome.Kieffer@ESRF.eu" __license__ = "MIT" __copyright__ = "2012-2017 European Synchrotron Radiation Facility, Grenoble, France" -__date__ = "28/11/2019" +__date__ = "30/11/2020" __status__ = "stable" __all__ = ["ocl", "pyopencl", "mf", "release_cl_buffers", "allocate_cl_buffers", "measure_workgroup_size", "kernel_workgroup_size"] @@ -46,10 +46,8 @@ import numpy from .utils import get_opencl_code - logger = logging.getLogger(__name__) - if os.environ.get("SILX_OPENCL") in ["0", "False"]: logger.info("Use of OpenCL has been disabled from environment variable: SILX_OPENCL=0") pyopencl = None @@ -70,13 +68,13 @@ else: mf = pyopencl.mem_flags if pyopencl is None: + # Define default mem flags class mf(object): WRITE_ONLY = 1 READ_ONLY = 1 READ_WRITE = 1 - FLOP_PER_CORE = {"GPU": 64, # GPU, Fermi at least perform 64 flops per cycle/multicore, G80 were at 24 or 48 ... "CPU": 4, # CPU, at least intel's have 4 operation per cycle "ACC": 8} # ACC: the Xeon-phi (MIC) appears to be able to process 8 Flops per hyperthreaded-core @@ -108,6 +106,7 @@ class Device(object): """ Simple class that contains the structure of an OpenCL device """ + def __init__(self, name="None", dtype=None, version=None, driver_version=None, extensions="", memory=None, available=None, cores=None, frequency=None, flop_core=None, idx=0, workgroup=1): @@ -174,6 +173,7 @@ class Platform(object): """ Simple class that contains the structure of an OpenCL platform """ + def __init__(self, name="None", vendor="None", version=None, extensions=None, idx=0): """ Class containing all descriptions of a platform and all devices description within that platform. @@ -225,6 +225,8 @@ class Platform(object): def _measure_workgroup_size(device_or_context, fast=False): """Mesure the maximal work group size of the given device + DEPRECATED since not perfectly correct ! + :param device_or_context: instance of pyopencl.Device or pyopencl.Context or 2-tuple (platformid,deviceid) :param fast: ask the kernel the valid value, don't probe it @@ -318,7 +320,7 @@ class OpenCL(object): #################################################### extensions = device.extensions if (pypl.vendor == "NVIDIA Corporation") and ('cl_khr_fp64' in extensions): - extensions += ' cl_khr_int64_base_atomics cl_khr_int64_extended_atomics' + extensions += ' cl_khr_int64_base_atomics cl_khr_int64_extended_atomics' try: devtype = pyopencl.device_type.to_string(device.type).upper() except ValueError: @@ -573,6 +575,53 @@ def allocate_cl_buffers(buffers, device=None, context=None): return mem +def allocate_texture(ctx, shape, hostbuf=None, support_1D=False): + """ + Allocate an OpenCL image ("texture"). + + :param ctx: OpenCL context + :param shape: Shape of the image. Note that pyopencl and OpenCL < 1.2 + do not support 1D images, so 1D images are handled as 2D with one row + :param support_1D: force the image to be 1D if the shape has only one dim + """ + if len(shape) == 1 and not(support_1D): + shape = (1,) + shape + return pyopencl.Image( + ctx, + pyopencl.mem_flags.READ_ONLY | pyopencl.mem_flags.USE_HOST_PTR, + pyopencl.ImageFormat( + pyopencl.channel_order.INTENSITY, + pyopencl.channel_type.FLOAT + ), + hostbuf=numpy.zeros(shape[::-1], dtype=numpy.float32) + ) + + +def check_textures_availability(ctx): + """ + Check whether textures are supported on the current OpenCL context. + + :param ctx: OpenCL context + """ + try: + dummy_texture = allocate_texture(ctx, (16, 16)) + # Need to further access some attributes (pocl) + dummy_height = dummy_texture.height + textures_available = True + del dummy_texture, dummy_height + except (pyopencl.RuntimeError, pyopencl.LogicError): + textures_available = False + # Nvidia Fermi GPUs (compute capability 2.X) do not support opencl read_imagef + # There is no way to detect this until a kernel is compiled + try: + cc = ctx.devices[0].compute_capability_major_nv + textures_available &= (cc >= 3) + except (pyopencl.LogicError, AttributeError): # probably not a Nvidia GPU + pass + # + return textures_available + + def measure_workgroup_size(device): """Measure the actual size of the workgroup @@ -599,12 +648,25 @@ def measure_workgroup_size(device): return res -def kernel_workgroup_size(program, kernel): - """Extract the compile time maximum workgroup size +def query_kernel_info(program, kernel, what="WORK_GROUP_SIZE"): + """Extract the compile time information from a kernel :param program: OpenCL program :param kernel: kernel or name of the kernel - :return: the maximum acceptable workgroup size for the given kernel + :param what: what is the query about ? + :return: int or 3-int for the workgroup size. + + Possible information available are: + * 'COMPILE_WORK_GROUP_SIZE': Returns the work-group size specified inside the kernel (__attribute__((reqd_work_gr oup_size(X, Y, Z)))) + * 'GLOBAL_WORK_SIZE': maximum global size that can be used to execute a kernel #OCL2.1! + * 'LOCAL_MEM_SIZE': amount of local memory in bytes being used by the kernel + * 'PREFERRED_WORK_GROUP_SIZE_MULTIPLE': preferred multiple of workgroup size for launch. This is a performance hint. + * 'PRIVATE_MEM_SIZE' Returns the minimum amount of private memory, in bytes, used by each workitem in the kernel + * 'WORK_GROUP_SIZE': maximum work-group size that can be used to execute a kernel on a specific device given by device + + Further information on: + https://www.khronos.org/registry/OpenCL/sdk/1.1/docs/man/xhtml/clGetKernelWorkGroupInfo.html + """ assert isinstance(program, pyopencl.Program) if not isinstance(kernel, pyopencl.Kernel): @@ -613,5 +675,15 @@ def kernel_workgroup_size(program, kernel): kernel = program.__getattr__(kernel_name) device = program.devices[0] - query_wg = pyopencl.kernel_work_group_info.WORK_GROUP_SIZE + query_wg = getattr(pyopencl.kernel_work_group_info, what) return kernel.get_work_group_info(query_wg, device) + + +def kernel_workgroup_size(program, kernel): + """Extract the compile time maximum workgroup size + + :param program: OpenCL program + :param kernel: kernel or name of the kernel + :return: the maximum acceptable workgroup size for the given kernel + """ + return query_kernel_info(program, kernel, what="WORK_GROUP_SIZE") diff --git a/silx/opencl/convolution.py b/silx/opencl/convolution.py index 138b985..15ef931 100644 --- a/silx/opencl/convolution.py +++ b/silx/opencl/convolution.py @@ -91,17 +91,8 @@ class Convolution(OpenclProcessing): } extra_opts = extra_options or {} self.extra_options.update(extra_opts) - self.is_cpu = (self.device.type == "CPU") self.use_textures = not(self.extra_options["dont_use_textures"]) - self.use_textures *= not(self.is_cpu) - # Nvidia Fermi GPUs (compute capability 2.X) do not support opencl read_imagef - try: - cc = self.ctx.devices[0].compute_capability_major_nv - self.use_textures *= (cc >= 3) - except cl.LogicError: # probably not a Nvidia GPU - pass - except AttributeError: # probably not a Nvidia GPU - pass + self.use_textures &= self.check_textures_availability() def _get_dimensions(self, shape, kernel): self.shape = shape diff --git a/silx/opencl/processing.py b/silx/opencl/processing.py index 6b475b9..470b141 100644 --- a/silx/opencl/processing.py +++ b/silx/opencl/processing.py @@ -36,26 +36,23 @@ Common OpenCL abstract base classe for different processing from __future__ import absolute_import, print_function, division - __author__ = "Jerome Kieffer" __contact__ = "Jerome.Kieffer@ESRF.eu" __license__ = "MIT" __copyright__ = "European Synchrotron Radiation Facility, Grenoble, France" -__date__ = "05/08/2019" +__date__ = "04/12/2020" __status__ = "stable" - import os import logging import gc from collections import namedtuple import numpy import threading -from .common import ocl, pyopencl, release_cl_buffers, kernel_workgroup_size +from .common import ocl, pyopencl, release_cl_buffers, query_kernel_info, allocate_texture, check_textures_availability from .utils import concatenate_cl_kernel import platform - BufferDescription = namedtuple("BufferDescription", ["name", "size", "dtype", "flags"]) EventDescription = namedtuple("EventDescription", ["name", "event"]) @@ -85,13 +82,22 @@ class KernelContainer(object): return self.__dict__.get(name) def max_workgroup_size(self, kernel_name): - "Retrieve the compile time max_workgroup_size for a given kernel" + "Retrieve the compile time WORK_GROUP_SIZE for a given kernel" if isinstance(kernel_name, pyopencl.Kernel): kernel = kernel_name else: kernel = self.get_kernel(kernel_name) - return kernel_workgroup_size(self._program, kernel) + return query_kernel_info(self._program, kernel, "WORK_GROUP_SIZE") + + def min_workgroup_size(self, kernel_name): + "Retrieve the compile time PREFERRED_WORK_GROUP_SIZE_MULTIPLE for a given kernel" + if isinstance(kernel_name, pyopencl.Kernel): + kernel = kernel_name + else: + kernel = self.get_kernel(kernel_name) + + return query_kernel_info(self._program, kernel, "PREFERRED_WORK_GROUP_SIZE_MULTIPLE") class OpenclProcessing(object): @@ -149,6 +155,9 @@ class OpenclProcessing(object): self.program = None self.kernels = None + def check_textures_availability(self): + return check_textures_availability(self.ctx) + def __del__(self): """Destructor: release all buffers and programs """ @@ -156,8 +165,10 @@ class OpenclProcessing(object): self.reset_log() self.free_kernels() self.free_buffers() - except Exception: - pass + if self.queue is not None: + self.queue.finish() + except Exception as err: + logger.warning("%s: %s", type(err), err) self.queue = None self.device = None self.ctx = None @@ -287,6 +298,8 @@ class OpenclProcessing(object): if bool(value) != self.profile: with self.sem: self.profile = bool(value) + if self.queue is not None: + self.queue.finish() if self.profile: self.queue = pyopencl.CommandQueue(self.ctx, properties=pyopencl.command_queue_properties.PROFILING_ENABLE) @@ -304,24 +317,7 @@ class OpenclProcessing(object): self.events.append(EventDescription(desc, event)) def allocate_texture(self, shape, hostbuf=None, support_1D=False): - """ - Allocate an OpenCL image ("texture"). - - :param shape: Shape of the image. Note that pyopencl and OpenCL < 1.2 - do not support 1D images, so 1D images are handled as 2D with one row - :param support_1D: force the image to be 1D if the shape has only one dim - """ - if len(shape) == 1 and not(support_1D): - shape = (1,) + shape - return pyopencl.Image( - self.ctx, - pyopencl.mem_flags.READ_ONLY | pyopencl.mem_flags.USE_HOST_PTR, - pyopencl.ImageFormat( - pyopencl.channel_order.INTENSITY, - pyopencl.channel_type.FLOAT - ), - hostbuf=numpy.zeros(shape[::-1], dtype=numpy.float32) - ) + return allocate_texture(self.ctx, shape, hostbuf=hostbuf, support_1D=support_1D) def transfer_to_texture(self, arr, tex_ref): """ @@ -336,10 +332,10 @@ class OpenclProcessing(object): if ndim == 1: # pyopencl and OpenCL < 1.2 do not support image1d_t # force 2D with one row in this case - #~ ndim = 2 + # ~ ndim = 2 shp = (1,) + shp copy_kwargs = {"origin":(0,) * ndim, "region": shp[::-1]} - if not(isinstance(arr, numpy.ndarray)): # assuming pyopencl.array.Array + if not(isinstance(arr, numpy.ndarray)): # assuming pyopencl.array.Array # D->D copy copy_args[2] = arr.data copy_kwargs["offset"] = 0 diff --git a/silx/opencl/projection.py b/silx/opencl/projection.py index da8752f..c02faf6 100644 --- a/silx/opencl/projection.py +++ b/silx/opencl/projection.py @@ -2,7 +2,7 @@ # coding: utf-8 # /*########################################################################## # -# Copyright (c) 2016 European Synchrotron Radiation Facility +# Copyright (c) 2016-2020 European Synchrotron Radiation Facility # # Permission is hereby granted, free of charge, to any person obtaining a copy # of this software and associated documentation files (the "Software"), to deal @@ -115,7 +115,7 @@ class Projection(OpenclProcessing): self.offset_x = -np.float32((self.shape[1] - 1) / 2. - self.axis_pos) # TODO: custom self.offset_y = -np.float32((self.shape[0] - 1) / 2. - self.axis_pos) # TODO: custom # Reset axis_pos once offset are computed - self.axis_pos0 = np.float((self.shape[1] - 1) / 2.) + self.axis_pos0 = np.float64((self.shape[1] - 1) / 2.) # Workgroup, ndrange and shared size self.dimgrid_x = _idivup(self.dwidth, 16) @@ -129,9 +129,7 @@ class Projection(OpenclProcessing): int(self.dimgrid_y) * self.wg[1] # int(): pyopencl <= 2015.1 ) - self.is_cpu = False - if self.device.type == "CPU": - self.is_cpu = True + self._use_textures = self.check_textures_availability() # Allocate memory self.buffers = [ @@ -150,14 +148,14 @@ class Projection(OpenclProcessing): ) self._tmp_extended_img = np.zeros((self.shape[0] + 2, self.shape[1] + 2), dtype=np.float32) - if self.is_cpu: + if not(self._use_textures): self.allocate_slice() else: self.allocate_textures() self.allocate_buffers() self._ex_sino = np.zeros((self._dimrecy, self._dimrecx), dtype=np.float32) - if self.is_cpu: + if not(self._use_textures): self.cl_mem["d_slice"].fill(0.) # enqueue_fill_buffer has issues if opencl 1.2 is not present # ~ pyopencl.enqueue_fill_buffer( @@ -182,7 +180,14 @@ class Projection(OpenclProcessing): # Shorthands self._d_sino = self.cl_mem["_d_sino"] - OpenclProcessing.compile_kernels(self, self.kernel_files) + compile_options = None + if not(self._use_textures): + compile_options = "-DDONT_USE_TEXTURES" + OpenclProcessing.compile_kernels( + self, + self.kernel_files, + compile_options=compile_options + ) # check that workgroup can actually be (16, 16) self.compiletime_workgroup_size = self.kernels.max_workgroup_size("forward_kernel_cpu") @@ -194,7 +199,7 @@ class Projection(OpenclProcessing): pyopencl.enqueue_copy(self.queue, self.cl_mem["d_angles"], angles2) def allocate_slice(self): - ary = parray.zeros(self.queue, (self.shape[1] + 2, self.shape[1] + 2), np.float32) + ary = parray.empty(self.queue, (self.shape[1] + 2, self.shape[1] + 2), np.float32) ary.fill(0) self.add_to_cl_mem({"d_slice": ary}) @@ -212,7 +217,7 @@ class Projection(OpenclProcessing): image2 = image if not(image.flags["C_CONTIGUOUS"] and image.dtype == np.float32): image2 = np.ascontiguousarray(image) - if self.is_cpu: + if not(self._use_textures): # TODO: create NoneEvent return self.transfer_to_slice(image2) # ~ return pyopencl.enqueue_copy( @@ -232,7 +237,7 @@ class Projection(OpenclProcessing): ) def transfer_device_to_texture(self, d_image): - if self.is_cpu: + if not(self._use_textures): # TODO this copy should not be necessary return self.cpy2d_to_slice(d_image) else: @@ -355,14 +360,14 @@ class Projection(OpenclProcessing): assert image.ndim == 2, "Treat only 2D images" assert image.shape[0] == self.shape[0], "image shape is OK" assert image.shape[1] == self.shape[1], "image shape is OK" - if not(self.is_cpu): + if self._use_textures: self.transfer_to_texture(image) slice_ref = self.d_image_tex else: self.transfer_to_slice(image) slice_ref = self.cl_mem["d_slice"].data else: - if self.is_cpu: + if not(self._use_textures): slice_ref = self.cl_mem["d_slice"].data else: slice_ref = self.d_image_tex @@ -388,7 +393,7 @@ class Projection(OpenclProcessing): ) # Call the kernel - if self.is_cpu: + if not(self._use_textures): event_pj = self.kernels.forward_kernel_cpu( self.queue, self.ndrange, diff --git a/silx/opencl/test/test_addition.py b/silx/opencl/test/test_addition.py index 49cc0b4..19dfdf0 100644 --- a/silx/opencl/test/test_addition.py +++ b/silx/opencl/test/test_addition.py @@ -29,19 +29,17 @@ Simple test of an addition """ -from __future__ import division, print_function - __authors__ = ["Henri Payno, Jérôme Kieffer"] __contact__ = "jerome.kieffer@esrf.eu" __license__ = "MIT" __copyright__ = "2013 European Synchrotron Radiation Facility, Grenoble, France" -__date__ = "01/08/2019" +__date__ = "30/11/2020" import logging import numpy import unittest -from ..common import ocl, _measure_workgroup_size +from ..common import ocl, _measure_workgroup_size, query_kernel_info if ocl: import pyopencl import pyopencl.array @@ -116,7 +114,7 @@ class TestAddition(unittest.TestCase): @unittest.skipUnless(ocl, "pyopencl is missing") def test_measurement(self): """ - tests that all devices are working properly ... + tests that all devices are working properly ... lengthy and error prone """ for platform in ocl.platforms: for did, device in enumerate(platform.devices): @@ -124,11 +122,31 @@ class TestAddition(unittest.TestCase): self.assertEqual(meas, device.max_work_group_size, "Workgroup size for %s/%s: %s == %s" % (platform, device, meas, device.max_work_group_size)) + @unittest.skipUnless(ocl, "pyopencl is missing") + def test_query(self): + """ + tests that all devices are working properly ... lengthy and error prone + """ + for what in ("COMPILE_WORK_GROUP_SIZE", + "LOCAL_MEM_SIZE", + "PREFERRED_WORK_GROUP_SIZE_MULTIPLE", + "PRIVATE_MEM_SIZE", + "WORK_GROUP_SIZE"): + logger.info("%s: %s", what, query_kernel_info(program=self.program, kernel="addition", what=what)) + + # Not all ICD work properly .... + #self.assertEqual(3, len(query_kernel_info(program=self.program, kernel="addition", what="COMPILE_WORK_GROUP_SIZE")), "3D kernel") + + min_wg = query_kernel_info(program=self.program, kernel="addition", what="PREFERRED_WORK_GROUP_SIZE_MULTIPLE") + max_wg = query_kernel_info(program=self.program, kernel="addition", what="WORK_GROUP_SIZE") + self.assertEqual(max_wg % min_wg, 0, msg="max_wg is a multiple of min_wg") + def suite(): testSuite = unittest.TestSuite() testSuite.addTest(TestAddition("test_add")) # testSuite.addTest(TestAddition("test_measurement")) + testSuite.addTest(TestAddition("test_query")) return testSuite diff --git a/silx/opencl/test/test_backprojection.py b/silx/opencl/test/test_backprojection.py index b2f2070..9dfdd3a 100644 --- a/silx/opencl/test/test_backprojection.py +++ b/silx/opencl/test/test_backprojection.py @@ -96,8 +96,9 @@ class TestFBP(unittest.TestCase): # Therefore, we cannot expect results to be the "same" (up to float32 # numerical error) self.tol = 5e-2 - if self.fbp.is_cpu: + if not(self.fbp._use_textures) or self.fbp.device.type == "CPU": # Precision is less when using CPU + # (either CPU textures or "manual" linear interpolation) self.tol *= 2 def tearDown(self): diff --git a/silx/opencl/test/test_convolution.py b/silx/opencl/test/test_convolution.py index 27cb8a9..7bceb0d 100644 --- a/silx/opencl/test/test_convolution.py +++ b/silx/opencl/test/test_convolution.py @@ -41,15 +41,18 @@ from itertools import product import numpy as np from silx.utils.testutils import parameterize from silx.image.utils import gaussian_kernel + try: from scipy.ndimage import convolve, convolve1d from scipy.misc import ascent + scipy_convolve = convolve scipy_convolve1d = convolve1d except ImportError: scipy_convolve = None import unittest -from ..common import ocl +from ..common import ocl, check_textures_availability + if ocl: import pyopencl as cl import pyopencl.array as parray @@ -59,7 +62,6 @@ logger = logging.getLogger(__name__) @unittest.skipUnless(ocl and scipy_convolve, "PyOpenCl/scipy is missing") class TestConvolution(unittest.TestCase): - @classmethod def setUpClass(cls): super(TestConvolution, cls).setUpClass() @@ -67,7 +69,7 @@ class TestConvolution(unittest.TestCase): cls.data1d = cls.image[0] cls.data2d = cls.image cls.data3d = np.tile(cls.image[224:-224, 224:-224], (62, 1, 1)) - cls.kernel1d = gaussian_kernel(1.) + cls.kernel1d = gaussian_kernel(1.0) cls.kernel2d = np.outer(cls.kernel1d, cls.kernel1d) cls.kernel3d = np.multiply.outer(cls.kernel2d, cls.kernel1d) cls.ctx = ocl.create_context() @@ -97,7 +99,7 @@ class TestConvolution(unittest.TestCase): ) return errmsg - def __init__(self, methodName='runTest', param=None): + def __init__(self, methodName="runTest", param=None): unittest.TestCase.__init__(self, methodName) self.param = param self.mode = param["boundary_handling"] @@ -107,32 +109,27 @@ class TestConvolution(unittest.TestCase): use_textures=%s, input_device=%s, output_device=%s """ % ( - self.mode, param["use_textures"], - param["input_on_device"], param["output_on_device"] + self.mode, + param["use_textures"], + param["input_on_device"], + param["output_on_device"], ) ) def instantiate_convol(self, shape, kernel, axes=None): - def is_fermi_device(dev): - try: - res = (dev.compute_capability_major_nv < 3) - except cl.LogicError: - res = False - except AttributeError: - res = False - return res - if (self.mode == "constant") and ( - not(self.param["use_textures"]) - or (self.ctx.devices[0].type == cl._cl.device_type.CPU) - or (is_fermi_device(self.ctx.devices[0])) + if self.mode == "constant": + if not (self.param["use_textures"]) or ( + self.param["use_textures"] + and not (check_textures_availability(self.ctx)) ): self.skipTest("mode=constant not implemented without textures") C = Convolution( - shape, kernel, + shape, + kernel, mode=self.mode, ctx=self.ctx, axes=axes, - extra_options={"dont_use_textures": not(self.param["use_textures"])} + extra_options={"dont_use_textures": not (self.param["use_textures"])}, ) return C @@ -142,13 +139,9 @@ class TestConvolution(unittest.TestCase): "test_separable_2D": (2, 1), "test_separable_3D": (3, 1), "test_nonseparable_2D": (2, 2), - "test_nonseparable_3D": (3, 3), - } - dim_data = { - 1: self.data1d, - 2: self.data2d, - 3: self.data3d + "test_nonseparable_3D": (3, 3), } + dim_data = {1: self.data1d, 2: self.data2d, 3: self.data3d} dim_kernel = { 1: self.kernel1d, 2: self.kernel2d, @@ -159,24 +152,26 @@ class TestConvolution(unittest.TestCase): def get_reference_function(self, test_name): ref_func = { - "test_1D": - lambda x, y : scipy_convolve1d(x, y, mode=self.mode), - "test_separable_2D": - lambda x, y : scipy_convolve1d( - scipy_convolve1d(x, y, mode=self.mode, axis=1), - y, mode=self.mode, axis=0 - ), - "test_separable_3D": - lambda x, y: scipy_convolve1d( - scipy_convolve1d( - scipy_convolve1d(x, y, mode=self.mode, axis=2), - y, mode=self.mode, axis=1), - y, mode=self.mode, axis=0 + "test_1D": lambda x, y: scipy_convolve1d(x, y, mode=self.mode), + "test_separable_2D": lambda x, y: scipy_convolve1d( + scipy_convolve1d(x, y, mode=self.mode, axis=1), + y, + mode=self.mode, + axis=0, + ), + "test_separable_3D": lambda x, y: scipy_convolve1d( + scipy_convolve1d( + scipy_convolve1d(x, y, mode=self.mode, axis=2), + y, + mode=self.mode, + axis=1, ), - "test_nonseparable_2D": - lambda x, y: scipy_convolve(x, y, mode=self.mode), - "test_nonseparable_3D": - lambda x, y : scipy_convolve(x, y, mode=self.mode), + y, + mode=self.mode, + axis=0, + ), + "test_nonseparable_2D": lambda x, y: scipy_convolve(x, y, mode=self.mode), + "test_nonseparable_3D": lambda x, y: scipy_convolve(x, y, mode=self.mode), } return ref_func[test_name] @@ -226,8 +221,8 @@ class TestConvolution(unittest.TestCase): data = self.data3d kernel = self.kernel2d conv = self.instantiate_convol(data.shape, kernel, axes=(0,)) - res = conv(data) # 3D - ref = scipy_convolve(data[0], kernel, mode=self.mode) # 2D + res = conv(data) # 3D + ref = scipy_convolve(data[0], kernel, mode=self.mode) # 2D std = np.std(res, axis=0) std_max = np.max(np.abs(std)) @@ -244,12 +239,9 @@ def test_convolution(): output_on_device_ = [True, False] testSuite = unittest.TestSuite() - param_vals = list(product( - boundary_handling_, - use_textures_, - input_on_device_, - output_on_device_ - )) + param_vals = list( + product(boundary_handling_, use_textures_, input_on_device_, output_on_device_) + ) for boundary_handling, use_textures, input_dev, output_dev in param_vals: testcase = parameterize( TestConvolution, @@ -258,17 +250,16 @@ def test_convolution(): "input_on_device": input_dev, "output_on_device": output_dev, "use_textures": use_textures, - } + }, ) testSuite.addTest(testcase) return testSuite - def suite(): testSuite = test_convolution() return testSuite -if __name__ == '__main__': +if __name__ == "__main__": unittest.main(defaultTest="suite") |