summaryrefslogtreecommitdiff
path: root/silx/opencl/common.py
diff options
context:
space:
mode:
Diffstat (limited to 'silx/opencl/common.py')
-rw-r--r--silx/opencl/common.py38
1 files changed, 28 insertions, 10 deletions
diff --git a/silx/opencl/common.py b/silx/opencl/common.py
index 8d31c8a..110d941 100644
--- a/silx/opencl/common.py
+++ b/silx/opencl/common.py
@@ -34,7 +34,7 @@ __author__ = "Jerome Kieffer"
__contact__ = "Jerome.Kieffer@ESRF.eu"
__license__ = "MIT"
__copyright__ = "2012-2017 European Synchrotron Radiation Facility, Grenoble, France"
-__date__ = "07/06/2019"
+__date__ = "28/11/2019"
__status__ = "stable"
__all__ = ["ocl", "pyopencl", "mf", "release_cl_buffers", "allocate_cl_buffers",
"measure_workgroup_size", "kernel_workgroup_size"]
@@ -60,8 +60,14 @@ else:
logger.warning("Unable to import pyOpenCl. Please install it from: http://pypi.python.org/pypi/pyopencl")
pyopencl = None
else:
- import pyopencl.array as array
- mf = pyopencl.mem_flags
+ try:
+ pyopencl.get_platforms()
+ except pyopencl.LogicError:
+ logger.warning("The module pyOpenCL has been imported but can't be used here")
+ pyopencl = None
+ else:
+ import pyopencl.array as array
+ mf = pyopencl.mem_flags
if pyopencl is None:
# Define default mem flags
@@ -91,8 +97,8 @@ NVIDIA_FLOP_PER_CORE = {(1, 0): 24, # Guessed !
(6, 0): 128, # GP100
(6, 1): 128, # GP104
(6, 2): 128, # ?
- (7, 0): 256, # Volta ?
- (7, 1): 256, # Volta ?
+ (7, 0): 128, # Volta # measured on Telsa V100
+ (7, 1): 128, # Volta ?
}
AMD_FLOP_PER_CORE = 160 # Measured on a M7820 10 core, 700MHz 1120GFlops
@@ -254,7 +260,8 @@ def _measure_workgroup_size(device_or_context, fast=False):
max_valid_wg = 1
data = numpy.random.random(shape).astype(numpy.float32)
d_data = pyopencl.array.to_device(queue, data)
- d_data_1 = pyopencl.array.zeros_like(d_data) + 1
+ d_data_1 = pyopencl.array.empty_like(d_data)
+ d_data_1.fill(numpy.float32(1.0))
program = pyopencl.Program(ctx, get_opencl_code("addition")).build()
if fast:
@@ -318,10 +325,21 @@ class OpenCL(object):
# pocl does not describe itself as a CPU !
devtype = "CPU"
if len(devtype) > 3:
- devtype = devtype[:3]
- if _is_nvidia_gpu(pypl.vendor, devtype) and "compute_capability_major_nv" in dir(device):
- comput_cap = device.compute_capability_major_nv, device.compute_capability_minor_nv
- flop_core = NVIDIA_FLOP_PER_CORE.get(comput_cap, min(NVIDIA_FLOP_PER_CORE.values()))
+ if "GPU" in devtype:
+ devtype = "GPU"
+ elif "ACC" in devtype:
+ devtype = "ACC"
+ elif "CPU" in devtype:
+ devtype = "CPU"
+ else:
+ devtype = devtype[:3]
+ if _is_nvidia_gpu(device.vendor, devtype) and ("compute_capability_major_nv" in dir(device)):
+ try:
+ comput_cap = device.compute_capability_major_nv, device.compute_capability_minor_nv
+ except pyopencl.LogicError:
+ flop_core = FLOP_PER_CORE["GPU"]
+ else:
+ flop_core = NVIDIA_FLOP_PER_CORE.get(comput_cap, FLOP_PER_CORE["GPU"])
elif (pypl.vendor == "Advanced Micro Devices, Inc.") and (devtype == "GPU"):
flop_core = AMD_FLOP_PER_CORE
elif devtype == "CPU":