summaryrefslogtreecommitdiff
path: root/silx/opencl/common.py
diff options
context:
space:
mode:
Diffstat (limited to 'silx/opencl/common.py')
-rw-r--r--silx/opencl/common.py36
1 files changed, 21 insertions, 15 deletions
diff --git a/silx/opencl/common.py b/silx/opencl/common.py
index 73cf676..3525bf4 100644
--- a/silx/opencl/common.py
+++ b/silx/opencl/common.py
@@ -34,7 +34,7 @@ __author__ = "Jerome Kieffer"
__contact__ = "Jerome.Kieffer@ESRF.eu"
__license__ = "MIT"
__copyright__ = "2012-2017 European Synchrotron Radiation Facility, Grenoble, France"
-__date__ = "07/06/2019"
+__date__ = "28/11/2019"
__status__ = "stable"
__all__ = ["ocl", "pyopencl", "mf", "release_cl_buffers", "allocate_cl_buffers",
"measure_workgroup_size", "kernel_workgroup_size"]
@@ -70,6 +70,7 @@ else:
mf = pyopencl.mem_flags
if pyopencl is None:
+ # Define default mem flags
class mf(object):
WRITE_ONLY = 1
READ_ONLY = 1
@@ -77,13 +78,6 @@ if pyopencl is None:
else:
mf = pyopencl.mem_flags
-if pyopencl is None:
- # Define default mem flags
- class mf(object):
- WRITE_ONLY = 1
- READ_ONLY = 1
- READ_WRITE = 1
-
FLOP_PER_CORE = {"GPU": 64, # GPU, Fermi at least perform 64 flops per cycle/multicore, G80 were at 24 or 48 ...
"CPU": 4, # CPU, at least intel's have 4 operation per cycle
@@ -105,8 +99,8 @@ NVIDIA_FLOP_PER_CORE = {(1, 0): 24, # Guessed !
(6, 0): 128, # GP100
(6, 1): 128, # GP104
(6, 2): 128, # ?
- (7, 0): 256, # Volta ?
- (7, 1): 256, # Volta ?
+ (7, 0): 128, # Volta # measured on Telsa V100
+ (7, 1): 128, # Volta ?
}
AMD_FLOP_PER_CORE = 160 # Measured on a M7820 10 core, 700MHz 1120GFlops
@@ -268,7 +262,8 @@ def _measure_workgroup_size(device_or_context, fast=False):
max_valid_wg = 1
data = numpy.random.random(shape).astype(numpy.float32)
d_data = pyopencl.array.to_device(queue, data)
- d_data_1 = pyopencl.array.zeros_like(d_data) + 1
+ d_data_1 = pyopencl.array.empty_like(d_data)
+ d_data_1.fill(numpy.float32(1.0))
program = pyopencl.Program(ctx, get_opencl_code("addition")).build()
if fast:
@@ -332,10 +327,21 @@ class OpenCL(object):
# pocl does not describe itself as a CPU !
devtype = "CPU"
if len(devtype) > 3:
- devtype = devtype[:3]
- if _is_nvidia_gpu(pypl.vendor, devtype) and "compute_capability_major_nv" in dir(device):
- comput_cap = device.compute_capability_major_nv, device.compute_capability_minor_nv
- flop_core = NVIDIA_FLOP_PER_CORE.get(comput_cap, min(NVIDIA_FLOP_PER_CORE.values()))
+ if "GPU" in devtype:
+ devtype = "GPU"
+ elif "ACC" in devtype:
+ devtype = "ACC"
+ elif "CPU" in devtype:
+ devtype = "CPU"
+ else:
+ devtype = devtype[:3]
+ if _is_nvidia_gpu(device.vendor, devtype) and ("compute_capability_major_nv" in dir(device)):
+ try:
+ comput_cap = device.compute_capability_major_nv, device.compute_capability_minor_nv
+ except pyopencl.LogicError:
+ flop_core = FLOP_PER_CORE["GPU"]
+ else:
+ flop_core = NVIDIA_FLOP_PER_CORE.get(comput_cap, FLOP_PER_CORE["GPU"])
elif (pypl.vendor == "Advanced Micro Devices, Inc.") and (devtype == "GPU"):
flop_core = AMD_FLOP_PER_CORE
elif devtype == "CPU":