summaryrefslogtreecommitdiff
path: root/silx/resources/opencl/array_utils.cl
blob: 6f7892135ce09254197909d5eff29b67d0407822 (plain)
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
/**
 *  2D Memcpy for float* arrays,
 * replacing pyopencl "enqueue_copy" which does not work for rectangular copies.
 * ALL THE SIZES/OFFSETS ARE SPECIFIED IN PIXELS, NOT IN BYTES.
 * In the (x, y) convention, x is the fast index (as in CUDA).
 *
 * :param dst: destination array
 * :param src: source array
 * :param dst_width: width of the dst array
 * :param src_width: width of the src array
 * :param dst_offset: tuple with the offset (x, y) in the dst array
 * :param src_offset: tuple with the offset (x, y) in the src array
 * :param transfer_shape: shape of the transfer array in the form (x, y)
 *
 */
kernel void cpy2d(
    global float* dst,
    global float* src,
    int dst_width,
    int src_width,
    int2 dst_offset,
    int2 src_offset,
    int2 transfer_shape)
{
    int gidx = get_global_id(0), gidy = get_global_id(1);
    if (gidx < transfer_shape.x && gidy < transfer_shape.y) {
        dst[(dst_offset.y + gidy)*dst_width + (dst_offset.x + gidx)] = src[(src_offset.y + gidy)*src_width + (src_offset.x + gidx)];
    }
}


// Looks like cfloat_t and cfloat_mul are not working, yet specified in
// pyopencl documentation. Here we are using float2 as in all available examples
// #include <pyopencl-complex.h>
// typedef cfloat_t complex;

static inline float2 complex_mul(float2 a, float2 b) {
    float2 res = (float2) (0, 0);
    res.x = a.x * b.x - a.y * b.y;
    res.y = a.y * b.x + a.x * b.y;
    return res;
}

// arr2D *= arr1D (line by line, i.e along fast dim)
kernel void inplace_complex_mul_2Dby1D(
    global float2* arr2D,
    global float2* arr1D,
    int width,
    int height)
{
    int x = get_global_id(0);
    int y = get_global_id(1);
    if ((x >= width) || (y >= height)) return;
    int i = y*width + x;
    arr2D[i] = complex_mul(arr2D[i], arr1D[x]);
}


// arr3D *= arr1D (along fast dim)
kernel void inplace_complex_mul_3Dby1D(
    global float2* arr3D,
    global float2* arr1D,
    int width,
    int height,
    int depth)
{
    int x = get_global_id(0);
    int y = get_global_id(1);
    int z = get_global_id(2);
    if ((x >= width) || (y >= height) || (z >= depth)) return;
    int i = (z*height + y)*width + x;
    arr3D[i] = complex_mul(arr3D[i], arr1D[x]);
}