diff options
Diffstat (limited to 'silx/opencl/common.py')
-rw-r--r-- | silx/opencl/common.py | 36 |
1 files changed, 21 insertions, 15 deletions
diff --git a/silx/opencl/common.py b/silx/opencl/common.py index 73cf676..3525bf4 100644 --- a/silx/opencl/common.py +++ b/silx/opencl/common.py @@ -34,7 +34,7 @@ __author__ = "Jerome Kieffer" __contact__ = "Jerome.Kieffer@ESRF.eu" __license__ = "MIT" __copyright__ = "2012-2017 European Synchrotron Radiation Facility, Grenoble, France" -__date__ = "07/06/2019" +__date__ = "28/11/2019" __status__ = "stable" __all__ = ["ocl", "pyopencl", "mf", "release_cl_buffers", "allocate_cl_buffers", "measure_workgroup_size", "kernel_workgroup_size"] @@ -70,6 +70,7 @@ else: mf = pyopencl.mem_flags if pyopencl is None: + # Define default mem flags class mf(object): WRITE_ONLY = 1 READ_ONLY = 1 @@ -77,13 +78,6 @@ if pyopencl is None: else: mf = pyopencl.mem_flags -if pyopencl is None: - # Define default mem flags - class mf(object): - WRITE_ONLY = 1 - READ_ONLY = 1 - READ_WRITE = 1 - FLOP_PER_CORE = {"GPU": 64, # GPU, Fermi at least perform 64 flops per cycle/multicore, G80 were at 24 or 48 ... "CPU": 4, # CPU, at least intel's have 4 operation per cycle @@ -105,8 +99,8 @@ NVIDIA_FLOP_PER_CORE = {(1, 0): 24, # Guessed ! (6, 0): 128, # GP100 (6, 1): 128, # GP104 (6, 2): 128, # ? - (7, 0): 256, # Volta ? - (7, 1): 256, # Volta ? + (7, 0): 128, # Volta # measured on Telsa V100 + (7, 1): 128, # Volta ? } AMD_FLOP_PER_CORE = 160 # Measured on a M7820 10 core, 700MHz 1120GFlops @@ -268,7 +262,8 @@ def _measure_workgroup_size(device_or_context, fast=False): max_valid_wg = 1 data = numpy.random.random(shape).astype(numpy.float32) d_data = pyopencl.array.to_device(queue, data) - d_data_1 = pyopencl.array.zeros_like(d_data) + 1 + d_data_1 = pyopencl.array.empty_like(d_data) + d_data_1.fill(numpy.float32(1.0)) program = pyopencl.Program(ctx, get_opencl_code("addition")).build() if fast: @@ -332,10 +327,21 @@ class OpenCL(object): # pocl does not describe itself as a CPU ! devtype = "CPU" if len(devtype) > 3: - devtype = devtype[:3] - if _is_nvidia_gpu(pypl.vendor, devtype) and "compute_capability_major_nv" in dir(device): - comput_cap = device.compute_capability_major_nv, device.compute_capability_minor_nv - flop_core = NVIDIA_FLOP_PER_CORE.get(comput_cap, min(NVIDIA_FLOP_PER_CORE.values())) + if "GPU" in devtype: + devtype = "GPU" + elif "ACC" in devtype: + devtype = "ACC" + elif "CPU" in devtype: + devtype = "CPU" + else: + devtype = devtype[:3] + if _is_nvidia_gpu(device.vendor, devtype) and ("compute_capability_major_nv" in dir(device)): + try: + comput_cap = device.compute_capability_major_nv, device.compute_capability_minor_nv + except pyopencl.LogicError: + flop_core = FLOP_PER_CORE["GPU"] + else: + flop_core = NVIDIA_FLOP_PER_CORE.get(comput_cap, FLOP_PER_CORE["GPU"]) elif (pypl.vendor == "Advanced Micro Devices, Inc.") and (devtype == "GPU"): flop_core = AMD_FLOP_PER_CORE elif devtype == "CPU": |