summaryrefslogtreecommitdiff
path: root/silx/opencl/common.py
diff options
context:
space:
mode:
Diffstat (limited to 'silx/opencl/common.py')
-rw-r--r--silx/opencl/common.py691
1 files changed, 0 insertions, 691 deletions
diff --git a/silx/opencl/common.py b/silx/opencl/common.py
deleted file mode 100644
index da966f6..0000000
--- a/silx/opencl/common.py
+++ /dev/null
@@ -1,691 +0,0 @@
-#!/usr/bin/env python
-# -*- coding: utf-8 -*-
-#
-# Project: S I L X project
-# https://github.com/silx-kit/silx
-#
-# Copyright (C) 2012-2021 European Synchrotron Radiation Facility, Grenoble, France
-#
-# Principal author: Jérôme Kieffer (Jerome.Kieffer@ESRF.eu)
-#
-# Permission is hereby granted, free of charge, to any person
-# obtaining a copy of this software and associated documentation
-# files (the "Software"), to deal in the Software without
-# restriction, including without limitation the rights to use,
-# copy, modify, merge, publish, distribute, sublicense, and/or sell
-# copies of the Software, and to permit persons to whom the
-# Software is furnished to do so, subject to the following
-# conditions:
-#
-# The above copyright notice and this permission notice shall be
-# included in all copies or substantial portions of the Software.
-#
-# THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
-# EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES
-# OF MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
-# NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT
-# HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY,
-# WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
-# FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR
-# OTHER DEALINGS IN THE SOFTWARE.
-#
-
-__author__ = "Jerome Kieffer"
-__contact__ = "Jerome.Kieffer@ESRF.eu"
-__license__ = "MIT"
-__copyright__ = "2012-2017 European Synchrotron Radiation Facility, Grenoble, France"
-__date__ = "30/11/2020"
-__status__ = "stable"
-__all__ = ["ocl", "pyopencl", "mf", "release_cl_buffers", "allocate_cl_buffers",
- "measure_workgroup_size", "kernel_workgroup_size"]
-
-import os
-import logging
-
-import numpy
-
-from .utils import get_opencl_code
-
-logger = logging.getLogger(__name__)
-
-if os.environ.get("SILX_OPENCL") in ["0", "False"]:
- logger.info("Use of OpenCL has been disabled from environment variable: SILX_OPENCL=0")
- pyopencl = None
-else:
- try:
- import pyopencl
- except ImportError:
- logger.warning("Unable to import pyOpenCl. Please install it from: https://pypi.org/project/pyopencl")
- pyopencl = None
- else:
- try:
- pyopencl.get_platforms()
- except pyopencl.LogicError:
- logger.warning("The module pyOpenCL has been imported but can't be used here")
- pyopencl = None
- else:
- import pyopencl.array as array
- mf = pyopencl.mem_flags
-
-if pyopencl is None:
-
- # Define default mem flags
- class mf(object):
- WRITE_ONLY = 1
- READ_ONLY = 1
- READ_WRITE = 1
-else:
- mf = pyopencl.mem_flags
-
-FLOP_PER_CORE = {"GPU": 64, # GPU, Fermi at least perform 64 flops per cycle/multicore, G80 were at 24 or 48 ...
- "CPU": 4, # CPU, at least intel's have 4 operation per cycle
- "ACC": 8} # ACC: the Xeon-phi (MIC) appears to be able to process 8 Flops per hyperthreaded-core
-
-# Sources : https://en.wikipedia.org/wiki/CUDA
-NVIDIA_FLOP_PER_CORE = {(1, 0): 24, # Guessed !
- (1, 1): 24, # Measured on G98 [Quadro NVS 295]
- (1, 2): 24, # Guessed !
- (1, 3): 24, # measured on a GT285 (GT200)
- (2, 0): 64, # Measured on a 580 (GF110)
- (2, 1): 96, # Measured on Quadro2000 GF106GL
- (3, 0): 384, # Guessed!
- (3, 5): 384, # Measured on K20
- (3, 7): 384, # K80: Guessed!
- (5, 0): 256, # Maxwell 4 warps/SM 2 flops/ CU
- (5, 2): 256, # Titan-X
- (5, 3): 256, # TX1
- (6, 0): 128, # GP100
- (6, 1): 128, # GP104
- (6, 2): 128, # ?
- (7, 0): 128, # Volta # measured on Telsa V100
- (7, 1): 128, # Volta ?
- }
-
-AMD_FLOP_PER_CORE = 160 # Measured on a M7820 10 core, 700MHz 1120GFlops
-
-
-class Device(object):
- """
- Simple class that contains the structure of an OpenCL device
- """
-
- def __init__(self, name="None", dtype=None, version=None, driver_version=None,
- extensions="", memory=None, available=None,
- cores=None, frequency=None, flop_core=None, idx=0, workgroup=1):
- """
- Simple container with some important data for the OpenCL device description.
-
- :param name: name of the device
- :param dtype: device type: CPU/GPU/ACC...
- :param version: driver version
- :param driver_version:
- :param extensions: List of opencl extensions
- :param memory: maximum memory available on the device
- :param available: is the device deactivated or not
- :param cores: number of SM/cores
- :param frequency: frequency of the device
- :param flop_core: Flopating Point operation per core per cycle
- :param idx: index of the device within the platform
- :param workgroup: max workgroup size
- """
- self.name = name.strip()
- self.type = dtype
- self.version = version
- self.driver_version = driver_version
- self.extensions = extensions.split()
- self.memory = memory
- self.available = available
- self.cores = cores
- self.frequency = frequency
- self.id = idx
- self.max_work_group_size = workgroup
- if not flop_core:
- flop_core = FLOP_PER_CORE.get(dtype, 1)
- if cores and frequency:
- self.flops = cores * frequency * flop_core
- else:
- self.flops = flop_core
-
- def __repr__(self):
- return "%s" % self.name
-
- def pretty_print(self):
- """
- Complete device description
-
- :return: string
- """
- lst = ["Name\t\t:\t%s" % self.name,
- "Type\t\t:\t%s" % self.type,
- "Memory\t\t:\t%.3f MB" % (self.memory / 2.0 ** 20),
- "Cores\t\t:\t%s CU" % self.cores,
- "Frequency\t:\t%s MHz" % self.frequency,
- "Speed\t\t:\t%.3f GFLOPS" % (self.flops / 1000.),
- "Version\t\t:\t%s" % self.version,
- "Available\t:\t%s" % self.available]
- return os.linesep.join(lst)
-
- def set_unavailable(self):
- """Use this method to flag a faulty device
- """
- self.available = False
-
-
-class Platform(object):
- """
- Simple class that contains the structure of an OpenCL platform
- """
-
- def __init__(self, name="None", vendor="None", version=None, extensions=None, idx=0):
- """
- Class containing all descriptions of a platform and all devices description within that platform.
-
- :param name: platform name
- :param vendor: name of the brand/vendor
- :param version:
- :param extensions: list of the extension provided by the platform to all of its devices
- :param idx: index of the platform
- """
- self.name = name.strip()
- self.vendor = vendor.strip()
- self.version = version
- self.extensions = extensions.split()
- self.devices = []
- self.id = idx
-
- def __repr__(self):
- return "%s" % self.name
-
- def add_device(self, device):
- """
- Add new device to the platform
-
- :param device: Device instance
- """
- self.devices.append(device)
-
- def get_device(self, key):
- """
- Return a device according to key
-
- :param key: identifier for a device, either it's id (int) or it's name
- :type key: int or str
- """
- out = None
- try:
- devid = int(key)
- except ValueError:
- for a_dev in self.devices:
- if a_dev.name == key:
- out = a_dev
- else:
- if len(self.devices) > devid > 0:
- out = self.devices[devid]
- return out
-
-
-def _measure_workgroup_size(device_or_context, fast=False):
- """Mesure the maximal work group size of the given device
-
- DEPRECATED since not perfectly correct !
-
- :param device_or_context: instance of pyopencl.Device or pyopencl.Context
- or 2-tuple (platformid,deviceid)
- :param fast: ask the kernel the valid value, don't probe it
- :return: maximum size for the workgroup
- """
- if isinstance(device_or_context, pyopencl.Device):
- try:
- ctx = pyopencl.Context(devices=[device_or_context])
- except pyopencl._cl.LogicError as error:
- platform = device_or_context.platform
- platformid = pyopencl.get_platforms().index(platform)
- deviceid = platform.get_devices().index(device_or_context)
- ocl.platforms[platformid].devices[deviceid].set_unavailable()
- raise RuntimeError("Unable to create context on %s/%s: %s" % (platform, device_or_context, error))
- else:
- device = device_or_context
- elif isinstance(device_or_context, pyopencl.Context):
- ctx = device_or_context
- device = device_or_context.devices[0]
- elif isinstance(device_or_context, (tuple, list)) and len(device_or_context) == 2:
- ctx = ocl.create_context(platformid=device_or_context[0],
- deviceid=device_or_context[1])
- device = ctx.devices[0]
- else:
- raise RuntimeError("""given parameter device_or_context is not an
- instanciation of a device or a context""")
- shape = device.max_work_group_size
- # get the context
-
- assert ctx is not None
- queue = pyopencl.CommandQueue(ctx)
-
- max_valid_wg = 1
- data = numpy.random.random(shape).astype(numpy.float32)
- d_data = pyopencl.array.to_device(queue, data)
- d_data_1 = pyopencl.array.empty_like(d_data)
- d_data_1.fill(numpy.float32(1.0))
-
- program = pyopencl.Program(ctx, get_opencl_code("addition")).build()
- if fast:
- max_valid_wg = program.addition.get_work_group_info(pyopencl.kernel_work_group_info.WORK_GROUP_SIZE, device)
- else:
- maxi = int(round(numpy.log2(shape)))
- for i in range(maxi + 1):
- d_res = pyopencl.array.empty_like(d_data)
- wg = 1 << i
- try:
- evt = program.addition(
- queue, (shape,), (wg,),
- d_data.data, d_data_1.data, d_res.data, numpy.int32(shape))
- evt.wait()
- except Exception as error:
- logger.info("%s on device %s for WG=%s/%s", error, device.name, wg, shape)
- program = queue = d_res = d_data_1 = d_data = None
- break
- else:
- res = d_res.get()
- good = numpy.allclose(res, data + 1)
- if good:
- if wg > max_valid_wg:
- max_valid_wg = wg
- else:
- logger.warning("ArithmeticError on %s for WG=%s/%s", wg, device.name, shape)
-
- return max_valid_wg
-
-
-def _is_nvidia_gpu(vendor, devtype):
- return (vendor == "NVIDIA Corporation") and (devtype == "GPU")
-
-
-class OpenCL(object):
- """
- Simple class that wraps the structure ocl_tools_extended.h
-
- This is a static class.
- ocl should be the only instance and shared among all python modules.
- """
-
- platforms = []
- nb_devices = 0
- context_cache = {} # key: 2-tuple of int, value: context
- if pyopencl:
- platform = device = pypl = devtype = extensions = pydev = None
- for idx, platform in enumerate(pyopencl.get_platforms()):
- pypl = Platform(platform.name, platform.vendor, platform.version, platform.extensions, idx)
- for idd, device in enumerate(platform.get_devices()):
- ####################################################
- # Nvidia does not report int64 atomics (we are using) ...
- # this is a hack around as any nvidia GPU with double-precision supports int64 atomics
- ####################################################
- extensions = device.extensions
- if (pypl.vendor == "NVIDIA Corporation") and ('cl_khr_fp64' in extensions):
- extensions += ' cl_khr_int64_base_atomics cl_khr_int64_extended_atomics'
- try:
- devtype = pyopencl.device_type.to_string(device.type).upper()
- except ValueError:
- # pocl does not describe itself as a CPU !
- devtype = "CPU"
- if len(devtype) > 3:
- if "GPU" in devtype:
- devtype = "GPU"
- elif "ACC" in devtype:
- devtype = "ACC"
- elif "CPU" in devtype:
- devtype = "CPU"
- else:
- devtype = devtype[:3]
- if _is_nvidia_gpu(device.vendor, devtype) and ("compute_capability_major_nv" in dir(device)):
- try:
- comput_cap = device.compute_capability_major_nv, device.compute_capability_minor_nv
- except pyopencl.LogicError:
- flop_core = FLOP_PER_CORE["GPU"]
- else:
- flop_core = NVIDIA_FLOP_PER_CORE.get(comput_cap, FLOP_PER_CORE["GPU"])
- elif (pypl.vendor == "Advanced Micro Devices, Inc.") and (devtype == "GPU"):
- flop_core = AMD_FLOP_PER_CORE
- elif devtype == "CPU":
- flop_core = FLOP_PER_CORE.get(devtype, 1)
- else:
- flop_core = 1
- workgroup = device.max_work_group_size
- if (devtype == "CPU") and (pypl.vendor == "Apple"):
- logger.info("For Apple's OpenCL on CPU: Measuring actual valid max_work_goup_size.")
- workgroup = _measure_workgroup_size(device, fast=True)
- if (devtype == "GPU") and os.environ.get("GPU") == "False":
- # Environment variable to disable GPU devices
- continue
- pydev = Device(device.name, devtype, device.version, device.driver_version, extensions,
- device.global_mem_size, bool(device.available), device.max_compute_units,
- device.max_clock_frequency, flop_core, idd, workgroup)
- pypl.add_device(pydev)
- nb_devices += 1
- platforms.append(pypl)
- del platform, device, pypl, devtype, extensions, pydev
-
- def __repr__(self):
- out = ["OpenCL devices:"]
- for platformid, platform in enumerate(self.platforms):
- deviceids = ["(%s,%s) %s" % (platformid, deviceid, dev.name)
- for deviceid, dev in enumerate(platform.devices)]
- out.append("[%s] %s: " % (platformid, platform.name) + ", ".join(deviceids))
- return os.linesep.join(out)
-
- def get_platform(self, key):
- """
- Return a platform according
-
- :param key: identifier for a platform, either an Id (int) or it's name
- :type key: int or str
- """
- out = None
- try:
- platid = int(key)
- except ValueError:
- for a_plat in self.platforms:
- if a_plat.name == key:
- out = a_plat
- else:
- if len(self.platforms) > platid > 0:
- out = self.platforms[platid]
- return out
-
- def select_device(self, dtype="ALL", memory=None, extensions=None, best=True, **kwargs):
- """
- Select a device based on few parameters (at the end, keep the one with most memory)
-
- :param dtype: "gpu" or "cpu" or "all" ....
- :param memory: minimum amount of memory (int)
- :param extensions: list of extensions to be present
- :param best: shall we look for the
- :returns: A tuple of plateform ID and device ID, else None if nothing
- found
- """
- if extensions is None:
- extensions = []
- if "type" in kwargs:
- dtype = kwargs["type"].upper()
- else:
- dtype = dtype.upper()
- if len(dtype) > 3:
- dtype = dtype[:3]
- best_found = None
- for platformid, platform in enumerate(self.platforms):
- for deviceid, device in enumerate(platform.devices):
- if not device.available:
- continue
- if (dtype in ["ALL", "DEF"]) or (device.type == dtype):
- if (memory is None) or (memory <= device.memory):
- found = True
- for ext in extensions:
- if ext not in device.extensions:
- found = False
- if found:
- if not best:
- return platformid, deviceid
- else:
- if not best_found:
- best_found = platformid, deviceid, device.flops
- elif best_found[2] < device.flops:
- best_found = platformid, deviceid, device.flops
- if best_found:
- return best_found[0], best_found[1]
-
- # Nothing found
- return None
-
- def create_context(self, devicetype="ALL", useFp64=False, platformid=None,
- deviceid=None, cached=True, memory=None, extensions=None):
- """
- Choose a device and initiate a context.
-
- Devicetypes can be GPU,gpu,CPU,cpu,DEF,ACC,ALL.
- Suggested are GPU,CPU.
- For each setting to work there must be such an OpenCL device and properly installed.
- E.g.: If Nvidia driver is installed, GPU will succeed but CPU will fail.
- The AMD SDK kit is required for CPU via OpenCL.
- :param devicetype: string in ["cpu","gpu", "all", "acc"]
- :param useFp64: boolean specifying if double precision will be used: deprecated use extensions=["cl_khr_fp64"]
- :param platformid: integer
- :param deviceid: integer
- :param cached: True if we want to cache the context
- :param memory: minimum amount of memory of the device
- :param extensions: list of extensions to be present
- :return: OpenCL context on the selected device
- """
- if extensions is None:
- extensions = []
- if useFp64:
- logger.warning("Deprecation: please select your device using the extension name!, i.e. extensions=['cl_khr_fp64']")
- extensions.append('cl_khr_fp64')
-
- if (platformid is not None) and (deviceid is not None):
- platformid = int(platformid)
- deviceid = int(deviceid)
- elif "PYOPENCL_CTX" in os.environ:
- pyopencl_ctx = [int(i) if i.isdigit() else 0 for i in os.environ["PYOPENCL_CTX"].split(":")]
- pyopencl_ctx += [0] * (2 - len(pyopencl_ctx)) # pad with 0
- platformid, deviceid = pyopencl_ctx
- else:
- ids = ocl.select_device(type=devicetype, extensions=extensions)
- if ids:
- platformid, deviceid = ids
- ctx = None
- if (platformid is not None) and (deviceid is not None):
- if (platformid, deviceid) in self.context_cache:
- ctx = self.context_cache[(platformid, deviceid)]
- else:
- try:
- ctx = pyopencl.Context(devices=[pyopencl.get_platforms()[platformid].get_devices()[deviceid]])
- except pyopencl._cl.LogicError as error:
- self.platforms[platformid].devices[deviceid].set_unavailable()
- logger.warning("Unable to create context on %s/%s: %s", platformid, deviceid, error)
- ctx = None
- else:
- if cached:
- self.context_cache[(platformid, deviceid)] = ctx
- if ctx is None:
- logger.warning("Last chance to get an OpenCL device ... probably not the one requested")
- ctx = pyopencl.create_some_context(interactive=False)
- return ctx
-
- def device_from_context(self, context):
- """
- Retrieves the Device from the context
-
- :param context: OpenCL context
- :return: instance of Device
- """
- odevice = context.devices[0]
- oplat = odevice.platform
- device_id = oplat.get_devices().index(odevice)
- platform_id = pyopencl.get_platforms().index(oplat)
- return self.platforms[platform_id].devices[device_id]
-
-
-if pyopencl:
- ocl = OpenCL()
- if ocl.nb_devices == 0:
- ocl = None
-else:
- ocl = None
-
-
-def release_cl_buffers(cl_buffers):
- """
- :param cl_buffers: the buffer you want to release
- :type cl_buffers: dict(str, pyopencl.Buffer)
-
- This method release the memory of the buffers store in the dict
- """
- for key, buffer_ in cl_buffers.items():
- if buffer_ is not None:
- if isinstance(buffer_, pyopencl.array.Array):
- try:
- buffer_.data.release()
- except pyopencl.LogicError:
- logger.error("Error while freeing buffer %s", key)
- else:
- try:
- buffer_.release()
- except pyopencl.LogicError:
- logger.error("Error while freeing buffer %s", key)
- cl_buffers[key] = None
- return cl_buffers
-
-
-def allocate_cl_buffers(buffers, device=None, context=None):
- """
- :param buffers: the buffers info use to create the pyopencl.Buffer
- :type buffers: list(std, flag, numpy.dtype, int)
- :param device: one of the context device
- :param context: opencl contextdevice
- :return: a dict containing the instanciated pyopencl.Buffer
- :rtype: dict(str, pyopencl.Buffer)
-
- This method instanciate the pyopencl.Buffer from the buffers
- description.
- """
- mem = {}
- if device is None:
- device = ocl.device_from_context(context)
-
- # check if enough memory is available on the device
- ualloc = 0
- for _, _, dtype, size in buffers:
- ualloc += numpy.dtype(dtype).itemsize * size
- memory = device.memory
- logger.info("%.3fMB are needed on device which has %.3fMB",
- ualloc / 1.0e6, memory / 1.0e6)
- if ualloc >= memory:
- memError = "Fatal error in allocate_buffers."
- memError += "Not enough device memory for buffers"
- memError += "(%lu requested, %lu available)" % (ualloc, memory)
- raise MemoryError(memError) # noqa
-
- # do the allocation
- try:
- for name, flag, dtype, size in buffers:
- mem[name] = pyopencl.Buffer(context, flag,
- numpy.dtype(dtype).itemsize * size)
- except pyopencl.MemoryError as error:
- release_cl_buffers(mem)
- raise MemoryError(error)
-
- return mem
-
-
-def allocate_texture(ctx, shape, hostbuf=None, support_1D=False):
- """
- Allocate an OpenCL image ("texture").
-
- :param ctx: OpenCL context
- :param shape: Shape of the image. Note that pyopencl and OpenCL < 1.2
- do not support 1D images, so 1D images are handled as 2D with one row
- :param support_1D: force the image to be 1D if the shape has only one dim
- """
- if len(shape) == 1 and not(support_1D):
- shape = (1,) + shape
- return pyopencl.Image(
- ctx,
- pyopencl.mem_flags.READ_ONLY | pyopencl.mem_flags.USE_HOST_PTR,
- pyopencl.ImageFormat(
- pyopencl.channel_order.INTENSITY,
- pyopencl.channel_type.FLOAT
- ),
- hostbuf=numpy.zeros(shape[::-1], dtype=numpy.float32)
- )
-
-
-def check_textures_availability(ctx):
- """
- Check whether textures are supported on the current OpenCL context.
-
- :param ctx: OpenCL context
- """
- try:
- dummy_texture = allocate_texture(ctx, (16, 16))
- # Need to further access some attributes (pocl)
- dummy_height = dummy_texture.height
- textures_available = True
- del dummy_texture, dummy_height
- except (pyopencl.RuntimeError, pyopencl.LogicError):
- textures_available = False
- # Nvidia Fermi GPUs (compute capability 2.X) do not support opencl read_imagef
- # There is no way to detect this until a kernel is compiled
- try:
- cc = ctx.devices[0].compute_capability_major_nv
- textures_available &= (cc >= 3)
- except (pyopencl.LogicError, AttributeError): # probably not a Nvidia GPU
- pass
- #
- return textures_available
-
-
-def measure_workgroup_size(device):
- """Measure the actual size of the workgroup
-
- :param device: device or context or 2-tuple with indexes
- :return: the actual measured workgroup size
-
- if device is "all", returns a dict with all devices with their ids as keys.
- """
- if (ocl is None) or (device is None):
- return None
-
- if isinstance(device, tuple) and (len(device) == 2):
- # this is probably a tuple (platformid, deviceid)
- device = ocl.create_context(platformid=device[0], deviceid=device[1])
-
- if device == "all":
- res = {}
- for pid, platform in enumerate(ocl.platforms):
- for did, _devices in enumerate(platform.devices):
- tup = (pid, did)
- res[tup] = measure_workgroup_size(tup)
- else:
- res = _measure_workgroup_size(device)
- return res
-
-
-def query_kernel_info(program, kernel, what="WORK_GROUP_SIZE"):
- """Extract the compile time information from a kernel
-
- :param program: OpenCL program
- :param kernel: kernel or name of the kernel
- :param what: what is the query about ?
- :return: int or 3-int for the workgroup size.
-
- Possible information available are:
- * 'COMPILE_WORK_GROUP_SIZE': Returns the work-group size specified inside the kernel (__attribute__((reqd_work_gr oup_size(X, Y, Z))))
- * 'GLOBAL_WORK_SIZE': maximum global size that can be used to execute a kernel #OCL2.1!
- * 'LOCAL_MEM_SIZE': amount of local memory in bytes being used by the kernel
- * 'PREFERRED_WORK_GROUP_SIZE_MULTIPLE': preferred multiple of workgroup size for launch. This is a performance hint.
- * 'PRIVATE_MEM_SIZE' Returns the minimum amount of private memory, in bytes, used by each workitem in the kernel
- * 'WORK_GROUP_SIZE': maximum work-group size that can be used to execute a kernel on a specific device given by device
-
- Further information on:
- https://www.khronos.org/registry/OpenCL/sdk/1.1/docs/man/xhtml/clGetKernelWorkGroupInfo.html
-
- """
- assert isinstance(program, pyopencl.Program)
- if not isinstance(kernel, pyopencl.Kernel):
- kernel_name = kernel
- assert kernel in (k.function_name for k in program.all_kernels()), "the kernel exists"
- kernel = program.__getattr__(kernel_name)
-
- device = program.devices[0]
- query_wg = getattr(pyopencl.kernel_work_group_info, what)
- return kernel.get_work_group_info(query_wg, device)
-
-
-def kernel_workgroup_size(program, kernel):
- """Extract the compile time maximum workgroup size
-
- :param program: OpenCL program
- :param kernel: kernel or name of the kernel
- :return: the maximum acceptable workgroup size for the given kernel
- """
- return query_kernel_info(program, kernel, what="WORK_GROUP_SIZE")