#!/usr/bin/env python # coding: utf-8 # /*########################################################################## # # Copyright (c) 2016 European Synchrotron Radiation Facility # # Permission is hereby granted, free of charge, to any person obtaining a copy # of this software and associated documentation files (the "Software"), to deal # in the Software without restriction, including without limitation the rights # to use, copy, modify, merge, publish, distribute, sublicense, and/or sell # copies of the Software, and to permit persons to whom the Software is # furnished to do so, subject to the following conditions: # # The above copyright notice and this permission notice shall be included in # all copies or substantial portions of the Software. # # THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR # IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, # FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE # AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER # LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, # OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN # THE SOFTWARE. # # ###########################################################################*/ """Module for basic linear algebra in OpenCL""" from __future__ import absolute_import, print_function, with_statement, division __authors__ = ["P. Paleo"] __license__ = "MIT" __date__ = "10/08/2017" import numpy as np from .common import pyopencl from .processing import EventDescription, OpenclProcessing import pyopencl.array as parray cl = pyopencl class LinAlg(OpenclProcessing): kernel_files = ["linalg.cl"] def __init__(self, shape, do_checks=False, ctx=None, devicetype="all", platformid=None, deviceid=None, profile=False): """ Create a "Linear Algebra" plan for a given image shape. :param shape: shape of the image (num_rows, num_columns) :param do_checks (optional): if True, memory and data type checks are performed when possible. :param ctx: actual working context, left to None for automatic initialization from device type or platformid/deviceid :param devicetype: type of device, can be "CPU", "GPU", "ACC" or "ALL" :param platformid: integer with the platform_identifier, as given by clinfo :param deviceid: Integer with the device identifier, as given by clinfo :param profile: switch on profiling to be able to profile at the kernel level, store profiling elements (makes code slightly slower) """ OpenclProcessing.__init__(self, ctx=ctx, devicetype=devicetype, platformid=platformid, deviceid=deviceid, profile=profile) self.d_gradient = parray.zeros(self.queue, shape, np.complex64) self.d_image = parray.zeros(self.queue, shape, np.float32) self.add_to_cl_mem({ "d_gradient": self.d_gradient, "d_image": self.d_image }) self.wg2D = None self.shape = shape self.ndrange2D = ( int(self.shape[1]), int(self.shape[0]) ) self.do_checks = bool(do_checks) OpenclProcessing.compile_kernels(self, self.kernel_files) @staticmethod def check_array(array, dtype, shape, arg_name): if array.shape != shape or array.dtype != dtype: raise ValueError("%s should be a %s array of type %s" %(arg_name, str(shape), str(dtype))) def get_data_references(self, src, dst, default_src_ref, default_dst_ref): """ From various types of src and dst arrays, returns the references to the underlying data (Buffer) that will be used by the OpenCL kernels. # TODO documentation This function will make a copy host->device if the input is on host (eg. numpy array) """ if dst is not None: if isinstance(dst, cl.array.Array): dst_ref = dst.data elif isinstance(dst, cl.Buffer): dst_ref = dst else: raise ValueError("dst should be either pyopencl.array.Array or pyopencl.Buffer") else: dst_ref = default_dst_ref if isinstance(src, cl.array.Array): src_ref = src.data elif isinstance(src, cl.Buffer): src_ref = src else: # assuming numpy.ndarray evt = cl.enqueue_copy(self.queue, default_src_ref, src) self.events.append(EventDescription("copy H->D", evt)) src_ref = default_src_ref return src_ref, dst_ref def gradient(self, image, dst=None, return_to_host=False): """ Compute the spatial gradient of an image. The gradient is computed with first-order difference (not central difference). :param image: image to compute the gradient from. It can be either a numpy.ndarray, a pyopencl Array or Buffer. :param dst: optional, reference to a destination pyopencl Array or Buffer. It must be of complex64 data type. :param return_to_host: optional, set to True if you want the result to be transferred back to host. if dst is provided, it should be of type numpy.complex64 ! """ n_y, n_x = np.int32(self.shape) if self.do_checks: self.check_array(image, np.float32, self.shape, "image") if dst is not None: self.check_array(dst, np.complex64, self.shape, "dst") img_ref, grad_ref = self.get_data_references(image, dst, self.d_image.data, self.d_gradient.data) # Prepare the kernel call kernel_args = [ img_ref, grad_ref, n_x, n_y ] # Call the gradient kernel evt = self.kernels.kern_gradient2D( self.queue, self.ndrange2D, self.wg2D, *kernel_args ) self.events.append(EventDescription("gradient2D", evt)) # TODO: should the wait be done in any case ? # In the case where dst=None, the wait() is mandatory since a user will be doing arithmetic on dst afterwards if dst is None: evt.wait() if return_to_host: if dst is not None: res_tmp = self.d_gradient.get() else: res_tmp = np.zeros(self.shape, dtype=np.complex64) cl.enqueue_copy(self.queue, res_tmp, grad_ref) res = np.zeros((2,) + self.shape, dtype=np.float32) res[0] = np.copy(res_tmp.real) res[1] = np.copy(res_tmp.imag) return res else: return dst def divergence(self, gradient, dst=None, return_to_host=False): """ Compute the spatial divergence of an image. The divergence is designed to be the (negative) adjoint of the gradient. :param gradient: gradient-like array to compute the divergence from. It can be either a numpy.ndarray, a pyopencl Array or Buffer. :param dst: optional, reference to a destination pyopencl Array or Buffer. It must be of complex64 data type. :param return_to_host: optional, set to True if you want the result to be transferred back to host. if dst is provided, it should be of type numpy.complex64 ! """ n_y, n_x = np.int32(self.shape) # numpy.ndarray gradients are expected to be (2, n_y, n_x) if isinstance(gradient, np.ndarray): gradient2 = np.zeros(self.shape, dtype=np.complex64) gradient2.real = np.copy(gradient[0]) gradient2.imag = np.copy(gradient[1]) gradient = gradient2 elif self.do_checks: self.check_array(gradient, np.complex64, self.shape, "gradient") if dst is not None: self.check_array(dst, np.float32, self.shape, "dst") grad_ref, img_ref = self.get_data_references(gradient, dst, self.d_gradient.data, self.d_image.data) # Prepare the kernel call kernel_args = [ grad_ref, img_ref, n_x, n_y ] # Call the gradient kernel evt = self.kernels.kern_divergence2D( self.queue, self.ndrange2D, self.wg2D, *kernel_args ) self.events.append(EventDescription("divergence2D", evt)) # TODO: should the wait be done in any case ? # In the case where dst=None, the wait() is mandatory since a user will be doing arithmetic on dst afterwards if dst is None: evt.wait() if return_to_host: if dst is not None: res = self.d_image.get() else: res = np.zeros(self.shape, dtype=np.float32) cl.enqueue_copy(self.queue, res, img_ref) return res else: return dst