diff options
Diffstat (limited to 'silx/resources/opencl/array_utils.cl')
-rw-r--r-- | silx/resources/opencl/array_utils.cl | 66 |
1 files changed, 53 insertions, 13 deletions
diff --git a/silx/resources/opencl/array_utils.cl b/silx/resources/opencl/array_utils.cl index 60677dc..6f78921 100644 --- a/silx/resources/opencl/array_utils.cl +++ b/silx/resources/opencl/array_utils.cl @@ -1,7 +1,6 @@ /** * 2D Memcpy for float* arrays, - * replacing pyopencl "enqueue_copy" which does not return the expected result - * when dealing with rectangular buffers. + * replacing pyopencl "enqueue_copy" which does not work for rectangular copies. * ALL THE SIZES/OFFSETS ARE SPECIFIED IN PIXELS, NOT IN BYTES. * In the (x, y) convention, x is the fast index (as in CUDA). * @@ -15,19 +14,60 @@ * */ kernel void cpy2d( - global float* dst, - global float* src, - int dst_width, - int src_width, - int2 dst_offset, - int2 src_offset, - int2 transfer_shape) + global float* dst, + global float* src, + int dst_width, + int src_width, + int2 dst_offset, + int2 src_offset, + int2 transfer_shape) { - int gidx = get_global_id(0), - gidy = get_global_id(1); - if (gidx < transfer_shape.x && gidy < transfer_shape.y) - { + int gidx = get_global_id(0), gidy = get_global_id(1); + if (gidx < transfer_shape.x && gidy < transfer_shape.y) { dst[(dst_offset.y + gidy)*dst_width + (dst_offset.x + gidx)] = src[(src_offset.y + gidy)*src_width + (src_offset.x + gidx)]; } } + +// Looks like cfloat_t and cfloat_mul are not working, yet specified in +// pyopencl documentation. Here we are using float2 as in all available examples +// #include <pyopencl-complex.h> +// typedef cfloat_t complex; + +static inline float2 complex_mul(float2 a, float2 b) { + float2 res = (float2) (0, 0); + res.x = a.x * b.x - a.y * b.y; + res.y = a.y * b.x + a.x * b.y; + return res; +} + +// arr2D *= arr1D (line by line, i.e along fast dim) +kernel void inplace_complex_mul_2Dby1D( + global float2* arr2D, + global float2* arr1D, + int width, + int height) +{ + int x = get_global_id(0); + int y = get_global_id(1); + if ((x >= width) || (y >= height)) return; + int i = y*width + x; + arr2D[i] = complex_mul(arr2D[i], arr1D[x]); +} + + +// arr3D *= arr1D (along fast dim) +kernel void inplace_complex_mul_3Dby1D( + global float2* arr3D, + global float2* arr1D, + int width, + int height, + int depth) +{ + int x = get_global_id(0); + int y = get_global_id(1); + int z = get_global_id(2); + if ((x >= width) || (y >= height) || (z >= depth)) return; + int i = (z*height + y)*width + x; + arr3D[i] = complex_mul(arr3D[i], arr1D[x]); +} |