summaryrefslogtreecommitdiff
path: root/silx/resources/opencl
diff options
context:
space:
mode:
authorAlexandre Marie <alexandre.marie@synchrotron-soleil.fr>2019-07-09 10:20:20 +0200
committerAlexandre Marie <alexandre.marie@synchrotron-soleil.fr>2019-07-09 10:20:20 +0200
commit654a6ac93513c3cc1ef97cacd782ff674c6f4559 (patch)
tree3b986e4972de7c57fa465820367602fc34bcb0d3 /silx/resources/opencl
parenta763e5d1b3921b3194f3d4e94ab9de3fbe08bbdd (diff)
New upstream version 0.11.0+dfsg
Diffstat (limited to 'silx/resources/opencl')
-rw-r--r--silx/resources/opencl/convolution.cl312
-rw-r--r--silx/resources/opencl/convolution_textures.cl374
-rw-r--r--silx/resources/opencl/sparse.cl84
3 files changed, 770 insertions, 0 deletions
diff --git a/silx/resources/opencl/convolution.cl b/silx/resources/opencl/convolution.cl
new file mode 100644
index 0000000..629b8fc
--- /dev/null
+++ b/silx/resources/opencl/convolution.cl
@@ -0,0 +1,312 @@
+#define MAX_CONST_SIZE 16384
+
+/******************************************************************************/
+/**************************** Macros ******************************************/
+/******************************************************************************/
+
+// Get the center index of the filter,
+// and the "half-Left" and "half-Right" lengths.
+// In the case of an even-sized filter, the center is shifted to the left.
+#define GET_CENTER_HL(hlen){\
+ if (hlen & 1) {\
+ c = hlen/2;\
+ hL = c;\
+ hR = c;\
+ }\
+ else {\
+ c = hlen/2 - 1;\
+ hL = c;\
+ hR = c+1;\
+ }\
+}\
+
+// Boundary handling modes
+#define CONV_MODE_REFLECT 0 // cba|abcd|dcb
+#define CONV_MODE_NEAREST 1 // aaa|abcd|ddd
+#define CONV_MODE_WRAP 2 // bcd|abcd|abc
+#define CONV_MODE_CONSTANT 3 // 000|abcd|000
+#ifndef USED_CONV_MODE
+ #define USED_CONV_MODE CONV_MODE_NEAREST
+#endif
+
+#define CONV_PERIODIC_IDX_X int idx_x = gidx - c + jx; if (idx_x < 0) idx_x += Nx; if (idx_x >= Nx) idx_x -= Nx;
+#define CONV_PERIODIC_IDX_Y int idx_y = gidy - c + jy; if (idx_y < 0) idx_y += Ny; if (idx_y >= Ny) idx_y -= Ny;
+#define CONV_PERIODIC_IDX_Z int idx_z = gidz - c + jz; if (idx_z < 0) idx_z += Nz; if (idx_z >= Nz) idx_z -= Nz;
+
+#define CONV_NEAREST_IDX_X int idx_x = clamp((int) (gidx - c + jx), 0, Nx-1);
+#define CONV_NEAREST_IDX_Y int idx_y = clamp((int) (gidy - c + jy), 0, Ny-1);
+#define CONV_NEAREST_IDX_Z int idx_z = clamp((int) (gidz - c + jz), 0, Nz-1);
+
+#define CONV_REFLECT_IDX_X int idx_x = gidx - c + jx; if (idx_x < 0) idx_x = -idx_x-1; if (idx_x >= Nx) idx_x = Nx-(idx_x-(Nx-1));
+#define CONV_REFLECT_IDX_Y int idx_y = gidy - c + jy; if (idx_y < 0) idx_y = -idx_y-1; if (idx_y >= Ny) idx_y = Ny-(idx_y-(Ny-1));
+#define CONV_REFLECT_IDX_Z int idx_z = gidz - c + jz; if (idx_z < 0) idx_z = -idx_z-1; if (idx_z >= Nz) idx_z = Nz-(idx_z-(Nz-1));
+
+
+#if USED_CONV_MODE == CONV_MODE_REFLECT
+ #define CONV_IDX_X CONV_REFLECT_IDX_X
+ #define CONV_IDX_Y CONV_REFLECT_IDX_Y
+ #define CONV_IDX_Z CONV_REFLECT_IDX_Z
+#elif USED_CONV_MODE == CONV_MODE_NEAREST
+ #define CONV_IDX_X CONV_NEAREST_IDX_X
+ #define CONV_IDX_Y CONV_NEAREST_IDX_Y
+ #define CONV_IDX_Z CONV_NEAREST_IDX_Z
+#elif USED_CONV_MODE == CONV_MODE_WRAP
+ #define CONV_IDX_X CONV_PERIODIC_IDX_X
+ #define CONV_IDX_Y CONV_PERIODIC_IDX_Y
+ #define CONV_IDX_Z CONV_PERIODIC_IDX_Z
+#elif USED_CONV_MODE == CONV_MODE_CONSTANT
+ #error "constant not implemented yet"
+#else
+ #error "Unknown convolution mode"
+#endif
+
+
+
+// Image access patterns
+#define READ_IMAGE_1D_X input[(gidz*Ny + gidy)*Nx + idx_x]
+#define READ_IMAGE_1D_Y input[(gidz*Ny + idx_y)*Nx + gidx]
+#define READ_IMAGE_1D_Z input[(idx_z*Ny + gidy)*Nx + gidx]
+
+#define READ_IMAGE_2D_XY input[(gidz*Ny + idx_y)*Nx + idx_x]
+#define READ_IMAGE_2D_XZ input[(idx_z*Ny + gidy)*Nx + idx_x]
+#define READ_IMAGE_2D_YZ input[(idx_z*Ny + idx_y)*Nx + gidx]
+
+#define READ_IMAGE_3D_XYZ input[(idx_z*Ny + idx_y)*Nx + idx_x]
+
+
+
+/******************************************************************************/
+/**************************** 1D Convolution **********************************/
+/******************************************************************************/
+
+
+// Convolution with 1D kernel along axis "X" (fast dimension)
+// Works for batched 1D on 2D and batched 2D on 3D, along axis "X".
+__kernel void convol_1D_X(
+ const __global float * input,
+ __global float * output,
+ __global float * filter,
+ int L, // filter size
+ int Nx, // input/output number of columns
+ int Ny, // input/output number of rows
+ int Nz // input/output depth
+)
+{
+ uint gidx = get_global_id(0);
+ uint gidy = get_global_id(1);
+ uint gidz = get_global_id(2);
+ if ((gidx >= Nx) || (gidy >= Ny) || (gidz >= Nz)) return;
+
+ int c, hL, hR;
+ GET_CENTER_HL(L);
+ float sum = 0.0f;
+
+ for (int jx = 0; jx <= hR+hL; jx++) {
+ CONV_IDX_X; // Get index "x"
+ sum += READ_IMAGE_1D_X * filter[L-1 - jx];
+ }
+ output[(gidz*Ny + gidy)*Nx + gidx] = sum;
+}
+
+
+// Convolution with 1D kernel along axis "Y"
+// Works for batched 1D on 2D and batched 2D on 3D, along axis "Y".
+__kernel void convol_1D_Y(
+ const __global float * input,
+ __global float * output,
+ __global float * filter,
+ int L, // filter size
+ int Nx, // input/output number of columns
+ int Ny, // input/output number of rows
+ int Nz // input/output depth
+)
+{
+ uint gidx = get_global_id(0);
+ uint gidy = get_global_id(1);
+ uint gidz = get_global_id(2);
+ if ((gidx >= Nx) || (gidy >= Ny) || (gidz >= Nz)) return;
+
+ int c, hL, hR;
+ GET_CENTER_HL(L);
+ float sum = 0.0f;
+
+ for (int jy = 0; jy <= hR+hL; jy++) {
+ CONV_IDX_Y; // Get index "y"
+ sum += READ_IMAGE_1D_Y * filter[L-1 - jy];
+ }
+ output[(gidz*Ny + gidy)*Nx + gidx] = sum;
+}
+
+
+// Convolution with 1D kernel along axis "Z"
+// Works for batched 1D on 2D and batched 2D on 3D, along axis "Z".
+__kernel void convol_1D_Z(
+ const __global float * input,
+ __global float * output,
+ __global float * filter,
+ int L, // filter size
+ int Nx, // input/output number of columns
+ int Ny, // input/output number of rows
+ int Nz // input/output depth
+)
+{
+ uint gidx = get_global_id(0);
+ uint gidy = get_global_id(1);
+ uint gidz = get_global_id(2);
+ if ((gidx >= Nx) || (gidy >= Ny) || (gidz >= Nz)) return;
+
+ int c, hL, hR;
+ GET_CENTER_HL(L);
+ float sum = 0.0f;
+
+ for (int jz = 0; jz <= hR+hL; jz++) {
+ CONV_IDX_Z; // Get index "z"
+ sum += READ_IMAGE_1D_Z * filter[L-1 - jz];
+ }
+ output[(gidz*Ny + gidy)*Nx + gidx] = sum;
+}
+
+
+/******************************************************************************/
+/**************************** 2D Convolution **********************************/
+/******************************************************************************/
+
+// Convolution with 2D kernel
+// Works for batched 2D on 3D.
+__kernel void convol_2D_XY(
+ const __global float * input,
+ __global float * output,
+ __global float * filter,
+ int Lx, // filter number of columns,
+ int Ly, // filter number of rows,
+ int Nx, // input/output number of columns
+ int Ny, // input/output number of rows
+ int Nz // input/output depth
+)
+{
+ uint gidx = get_global_id(0);
+ uint gidy = get_global_id(1);
+ uint gidz = get_global_id(2);
+ if ((gidx >= Nx) || (gidy >= Ny) || (gidz >= Nz)) return;
+
+ int c, hL, hR;
+ GET_CENTER_HL(Lx);
+ float sum = 0.0f;
+
+ for (int jy = 0; jy <= hR+hL; jy++) {
+ CONV_IDX_Y; // Get index "y"
+ for (int jx = 0; jx <= hR+hL; jx++) {
+ CONV_IDX_X; // Get index "x"
+ sum += READ_IMAGE_2D_XY * filter[(Ly-1-jy)*Lx + (Lx-1 - jx)];
+ }
+ }
+ output[(gidz*Ny + gidy)*Nx + gidx] = sum;
+}
+
+
+// Convolution with 2D kernel
+// Works for batched 2D on 3D.
+__kernel void convol_2D_XZ(
+ const __global float * input,
+ __global float * output,
+ __global float * filter,
+ int Lx, // filter number of columns,
+ int Lz, // filter number of rows,
+ int Nx, // input/output number of columns
+ int Ny, // input/output number of rows
+ int Nz // input/output depth
+)
+{
+ uint gidx = get_global_id(0);
+ uint gidy = get_global_id(1);
+ uint gidz = get_global_id(2);
+ if ((gidx >= Nx) || (gidy >= Ny) || (gidz >= Nz)) return;
+
+ int c, hL, hR;
+ GET_CENTER_HL(Lx);
+ float sum = 0.0f;
+
+ for (int jz = 0; jz <= hR+hL; jz++) {
+ CONV_IDX_Z; // Get index "z"
+ for (int jx = 0; jx <= hR+hL; jx++) {
+ CONV_IDX_X; // Get index "x"
+ sum += READ_IMAGE_2D_XZ * filter[(Lz-1-jz)*Lx + (Lx-1 - jx)];
+ }
+ }
+ output[(gidz*Ny + gidy)*Nx + gidx] = sum;
+}
+
+
+// Convolution with 2D kernel
+// Works for batched 2D on 3D.
+__kernel void convol_2D_YZ(
+ const __global float * input,
+ __global float * output,
+ __global float * filter,
+ int Ly, // filter number of columns,
+ int Lz, // filter number of rows,
+ int Nx, // input/output number of columns
+ int Ny, // input/output number of rows
+ int Nz // input/output depth
+)
+{
+ uint gidx = get_global_id(0);
+ uint gidy = get_global_id(1);
+ uint gidz = get_global_id(2);
+ if ((gidx >= Nx) || (gidy >= Ny) || (gidz >= Nz)) return;
+
+ int c, hL, hR;
+ GET_CENTER_HL(Ly);
+ float sum = 0.0f;
+
+ for (int jz = 0; jz <= hR+hL; jz++) {
+ CONV_IDX_Z; // Get index "z"
+ for (int jy = 0; jy <= hR+hL; jy++) {
+ CONV_IDX_Y; // Get index "y"
+ sum += READ_IMAGE_2D_YZ * filter[(Lz-1-jz)*Ly + (Ly-1 - jy)];
+ }
+ }
+ output[(gidz*Ny + gidy)*Nx + gidx] = sum;
+}
+
+
+
+/******************************************************************************/
+/**************************** 3D Convolution **********************************/
+/******************************************************************************/
+
+// Convolution with 3D kernel
+__kernel void convol_3D_XYZ(
+ const __global float * input,
+ __global float * output,
+ __global float * filter,
+ int Lx, // filter number of columns,
+ int Ly, // filter number of rows,
+ int Lz, // filter number of rows,
+ int Nx, // input/output number of columns
+ int Ny, // input/output number of rows
+ int Nz // input/output depth
+)
+{
+ uint gidx = get_global_id(0);
+ uint gidy = get_global_id(1);
+ uint gidz = get_global_id(2);
+ if ((gidx >= Nx) || (gidy >= Ny) || (gidz >= Nz)) return;
+
+ int c, hL, hR;
+ GET_CENTER_HL(Lx);
+ float sum = 0.0f;
+
+ for (int jz = 0; jz <= hR+hL; jz++) {
+ CONV_IDX_Z; // Get index "z"
+ for (int jy = 0; jy <= hR+hL; jy++) {
+ CONV_IDX_Y; // Get index "y"
+ for (int jx = 0; jx <= hR+hL; jx++) {
+ CONV_IDX_X; // Get index "x"
+ sum += READ_IMAGE_3D_XYZ * filter[((Lz-1-jz)*Ly + (Ly-1-jy))*Lx + (Lx-1 - jx)];
+ }
+ }
+ }
+ output[(gidz*Ny + gidy)*Nx + gidx] = sum;
+}
+
diff --git a/silx/resources/opencl/convolution_textures.cl b/silx/resources/opencl/convolution_textures.cl
new file mode 100644
index 0000000..517a67c
--- /dev/null
+++ b/silx/resources/opencl/convolution_textures.cl
@@ -0,0 +1,374 @@
+/******************************************************************************/
+/**************************** Macros ******************************************/
+/******************************************************************************/
+
+// Error handling
+#ifndef IMAGE_DIMS
+ #error "IMAGE_DIMS must be defined"
+#endif
+#ifndef FILTER_DIMS
+ #error "FILTER_DIMS must be defined"
+#endif
+#if FILTER_DIMS > IMAGE_DIMS
+ #error "Filter cannot have more dimensions than image"
+#endif
+
+// Boundary handling modes
+#define CONV_MODE_REFLECT 0 // CLK_ADDRESS_MIRRORED_REPEAT : cba|abcd|dcb
+#define CONV_MODE_NEAREST 1 // CLK_ADDRESS_CLAMP_TO_EDGE : aaa|abcd|ddd
+#define CONV_MODE_WRAP 2 // CLK_ADDRESS_REPEAT : bcd|abcd|abc
+#define CONV_MODE_CONSTANT 3 // CLK_ADDRESS_CLAMP : 000|abcd|000
+#ifndef USED_CONV_MODE
+ #define USED_CONV_MODE CONV_MODE_NEAREST
+#endif
+#if USED_CONV_MODE == CONV_MODE_REFLECT
+ #define CLK_BOUNDARY CLK_ADDRESS_MIRRORED_REPEAT
+ #define CLK_COORDS CLK_NORMALIZED_COORDS_TRUE
+ #define USE_NORM_COORDS
+#elif USED_CONV_MODE == CONV_MODE_NEAREST
+ #define CLK_BOUNDARY CLK_ADDRESS_CLAMP_TO_EDGE
+ #define CLK_COORDS CLK_NORMALIZED_COORDS_FALSE
+#elif USED_CONV_MODE == CONV_MODE_WRAP
+ #define CLK_BOUNDARY CLK_ADDRESS_REPEAT
+ #define CLK_COORDS CLK_NORMALIZED_COORDS_TRUE
+ #define USE_NORM_COORDS
+#elif USED_CONV_MODE == CONV_MODE_CONSTANT
+ #define CLK_BOUNDARY CLK_ADDRESS_CLAMP
+ #define CLK_COORDS CLK_NORMALIZED_COORDS_FALSE
+#else
+ #error "Unknown convolution mode"
+#endif
+
+
+
+// Convolution index for filter
+#define FILTER_INDEX(j) (Lx - 1 - j)
+
+// Filter access patterns
+#define READ_FILTER_1D(j) read_imagef(filter, (int2) (FILTER_INDEX(j), 0)).x;
+#define READ_FILTER_2D(jx, jy) read_imagef(filter, (int2) (FILTER_INDEX(jx), FILTER_INDEX(jy))).x;
+#define READ_FILTER_3D(jx, jy, jz) read_imagef(filter, (int4) (FILTER_INDEX(jx), FILTER_INDEX(jy), FILTER_INDEX(jz), 0)).x;
+
+
+// Convolution index for image
+#ifdef USE_NORM_COORDS
+ #define IMAGE_INDEX_X (gidx*1.0f +0.5f - c + jx)/Nx
+ #define IMAGE_INDEX_Y (gidy*1.0f +0.5f - c + jy)/Ny
+ #define IMAGE_INDEX_Z (gidz*1.0f +0.5f - c + jz)/Nz
+ #define RET_TYPE_1 float
+ #define RET_TYPE_2 float2
+ #define RET_TYPE_4 float4
+ #define C_ZERO 0.5f
+ #define GIDX (gidx*1.0f + 0.5f)/Nx
+ #define GIDY (gidy*1.0f + 0.5f)/Ny
+ #define GIDZ (gidz*1.0f + 0.5f)/Nz
+#else
+ #define IMAGE_INDEX_X (gidx - c + jx)
+ #define IMAGE_INDEX_Y (gidy - c + jy)
+ #define IMAGE_INDEX_Z (gidz - c + jz)
+ #define RET_TYPE_1 int
+ #define RET_TYPE_2 int2
+ #define RET_TYPE_4 int4
+ #define C_ZERO 0
+ #define GIDX gidx
+ #define GIDY gidy
+ #define GIDZ gidz
+#endif
+
+static const sampler_t sampler = CLK_COORDS | CLK_BOUNDARY | CLK_FILTER_NEAREST;
+
+// Image access patterns
+#define READ_IMAGE_1D read_imagef(input, sampler, (RET_TYPE_2) (IMAGE_INDEX_X, C_ZERO)).x
+
+#define READ_IMAGE_2D_X read_imagef(input, sampler, (RET_TYPE_2) (IMAGE_INDEX_X , GIDY)).x
+#define READ_IMAGE_2D_Y read_imagef(input, sampler, (RET_TYPE_2) (GIDX, IMAGE_INDEX_Y)).x
+#define READ_IMAGE_2D_XY read_imagef(input, sampler, (RET_TYPE_2) (IMAGE_INDEX_X, IMAGE_INDEX_Y)).x
+
+#define READ_IMAGE_3D_X read_imagef(input, sampler, (RET_TYPE_4) (IMAGE_INDEX_X, GIDY, GIDZ, C_ZERO)).x
+#define READ_IMAGE_3D_Y read_imagef(input, sampler, (RET_TYPE_4) (GIDX, IMAGE_INDEX_Y, GIDZ, C_ZERO)).x
+#define READ_IMAGE_3D_Z read_imagef(input, sampler, (RET_TYPE_4) (GIDX, GIDY, IMAGE_INDEX_Z, C_ZERO)).x
+#define READ_IMAGE_3D_XY read_imagef(input, sampler, (RET_TYPE_4) (IMAGE_INDEX_X, IMAGE_INDEX_Y, GIDZ, C_ZERO)).x
+#define READ_IMAGE_3D_XZ read_imagef(input, sampler, (RET_TYPE_4) (IMAGE_INDEX_X, GIDY, IMAGE_INDEX_Z, C_ZERO)).x
+#define READ_IMAGE_3D_YZ read_imagef(input, sampler, (RET_TYPE_4) (GIDX, IMAGE_INDEX_Y, IMAGE_INDEX_Z, C_ZERO)).x
+#define READ_IMAGE_3D_XYZ read_imagef(input, sampler, (RET_TYPE_4) (IMAGE_INDEX_X, IMAGE_INDEX_Y, IMAGE_INDEX_Z, C_ZERO)).x
+
+
+// NOTE: pyopencl and OpenCL < 1.2 do not support image1d_t
+#if FILTER_DIMS == 1
+ #define FILTER_TYPE image2d_t
+ #define READ_FILTER_VAL(j) READ_FILTER_1D(j)
+#elif FILTER_DIMS == 2
+ #define FILTER_TYPE image2d_t
+ #define READ_FILTER_VAL(jx, jy) READ_FILTER_2D(jx, jy)
+#elif FILTER_DIMS == 3
+ #define FILTER_TYPE image3d_t
+ #define READ_FILTER_VAL(jx, jy, jz) READ_FILTER_3D(jx, jy, jz)
+#endif
+
+#if IMAGE_DIMS == 1
+ #define IMAGE_TYPE image2d_t
+ #define READ_IMAGE_X READ_IMAGE_1D
+#elif IMAGE_DIMS == 2
+ #define IMAGE_TYPE image2d_t
+ #define READ_IMAGE_X READ_IMAGE_2D_X
+ #define READ_IMAGE_Y READ_IMAGE_2D_Y
+ #define READ_IMAGE_XY READ_IMAGE_2D_XY
+#elif IMAGE_DIMS == 3
+ #define IMAGE_TYPE image3d_t
+ #define READ_IMAGE_X READ_IMAGE_3D_X
+ #define READ_IMAGE_Y READ_IMAGE_3D_Y
+ #define READ_IMAGE_Z READ_IMAGE_3D_Z
+ #define READ_IMAGE_XY READ_IMAGE_3D_XY
+ #define READ_IMAGE_XZ READ_IMAGE_3D_XZ
+ #define READ_IMAGE_YZ READ_IMAGE_3D_YZ
+ #define READ_IMAGE_XYZ READ_IMAGE_3D_XYZ
+#endif
+
+
+// Get the center index of the filter,
+// and the "half-Left" and "half-Right" lengths.
+// In the case of an even-sized filter, the center is shifted to the left.
+#define GET_CENTER_HL(hlen){\
+ if (hlen & 1) {\
+ c = hlen/2;\
+ hL = c;\
+ hR = c;\
+ }\
+ else {\
+ c = hlen/2 - 1;\
+ hL = c;\
+ hR = c+1;\
+ }\
+}\
+
+
+
+/******************************************************************************/
+/**************************** 1D Convolution **********************************/
+/******************************************************************************/
+
+#if FILTER_DIMS == 1
+// Convolution with 1D kernel along axis "X" (fast dimension)
+// Works for batched 1D on 2D and batched 2D on 3D, along axis "X".
+__kernel void convol_1D_X_tex(
+ read_only IMAGE_TYPE input,
+ __global float * output,
+ read_only FILTER_TYPE filter,
+ int Lx, // filter size
+ int Nx, // input/output number of columns
+ int Ny, // input/output number of rows
+ int Nz // input/output depth
+)
+{
+ uint gidx = get_global_id(0);
+ uint gidy = get_global_id(1);
+ uint gidz = get_global_id(2);
+ if ((gidx >= Nx) || (gidy >= Ny) || (gidz >= Nz)) return;
+
+ int c, hL, hR;
+ GET_CENTER_HL(Lx);
+ float sum = 0.0f;
+
+ for (int jx = 0; jx <= hR+hL; jx++) {
+ sum += READ_IMAGE_X * READ_FILTER_VAL(jx);
+ }
+ output[(gidz*Ny + gidy)*Nx + gidx] = sum;
+}
+
+
+#if IMAGE_DIMS >= 2
+// Convolution with 1D kernel along axis "Y"
+// Works for batched 1D on 2D and batched 2D on 3D, along axis "Y".
+__kernel void convol_1D_Y_tex(
+ read_only IMAGE_TYPE input,
+ __global float * output,
+ read_only FILTER_TYPE filter,
+ int Lx, // filter size
+ int Nx, // input/output number of columns
+ int Ny, // input/output number of rows
+ int Nz // input/output depth
+)
+{
+ uint gidx = get_global_id(0);
+ uint gidy = get_global_id(1);
+ uint gidz = get_global_id(2);
+ if ((gidx >= Nx) || (gidy >= Ny) || (gidz >= Nz)) return;
+
+ int c, hL, hR;
+ GET_CENTER_HL(Lx);
+ float sum = 0.0f;
+
+ for (int jy = 0; jy <= hR+hL; jy++) {
+ sum += READ_IMAGE_Y * READ_FILTER_VAL(jy);
+ }
+ output[(gidz*Ny + gidy)*Nx + gidx] = sum;
+}
+#endif
+
+#if IMAGE_DIMS == 3
+// Convolution with 1D kernel along axis "Z"
+// Works for batched 1D on 2D and batched 2D on 3D, along axis "Z".
+__kernel void convol_1D_Z_tex(
+ read_only IMAGE_TYPE input,
+ __global float * output,
+ read_only FILTER_TYPE filter,
+ int Lx, // filter size
+ int Nx, // input/output number of columns
+ int Ny, // input/output number of rows
+ int Nz // input/output depth
+)
+{
+ uint gidx = get_global_id(0);
+ uint gidy = get_global_id(1);
+ uint gidz = get_global_id(2);
+ if ((gidx >= Nx) || (gidy >= Ny) || (gidz >= Nz)) return;
+
+ int c, hL, hR;
+ GET_CENTER_HL(Lx);
+ float sum = 0.0f;
+
+ for (int jz = 0; jz <= hR+hL; jz++) {
+ sum += READ_IMAGE_Z * READ_FILTER_VAL(jz);
+ }
+ output[(gidz*Ny + gidy)*Nx + gidx] = sum;
+}
+#endif
+#endif
+
+/******************************************************************************/
+/**************************** 2D Convolution **********************************/
+/******************************************************************************/
+
+#if IMAGE_DIMS >= 2 && FILTER_DIMS == 2
+// Convolution with 2D kernel
+// Works for batched 2D on 3D.
+__kernel void convol_2D_XY_tex(
+ read_only IMAGE_TYPE input,
+ __global float * output,
+ read_only FILTER_TYPE filter,
+ int Lx, // filter number of columns,
+ int Ly, // filter number of rows,
+ int Nx, // input/output number of columns
+ int Ny, // input/output number of rows
+ int Nz // input/output depth
+)
+{
+ uint gidx = get_global_id(0);
+ uint gidy = get_global_id(1);
+ uint gidz = get_global_id(2);
+ if ((gidx >= Nx) || (gidy >= Ny) || (gidz >= Nz)) return;
+
+ int c, hL, hR;
+ GET_CENTER_HL(Lx);
+ float sum = 0.0f;
+
+ for (int jy = 0; jy <= hR+hL; jy++) {
+ for (int jx = 0; jx <= hR+hL; jx++) {
+ sum += READ_IMAGE_XY * READ_FILTER_VAL(jx, jy);
+ }
+ }
+ output[(gidz*Ny + gidy)*Nx + gidx] = sum;
+}
+#endif
+
+#if IMAGE_DIMS == 3 && FILTER_DIMS == 2
+// Convolution with 2D kernel
+// Works for batched 2D on 3D.
+__kernel void convol_2D_XZ_tex(
+ read_only IMAGE_TYPE input,
+ __global float * output,
+ read_only FILTER_TYPE filter,
+ int Lx, // filter number of columns,
+ int Lz, // filter number of rows,
+ int Nx, // input/output number of columns
+ int Ny, // input/output number of rows
+ int Nz // input/output depth
+)
+{
+ uint gidx = get_global_id(0);
+ uint gidy = get_global_id(1);
+ uint gidz = get_global_id(2);
+ if ((gidx >= Nx) || (gidy >= Ny) || (gidz >= Nz)) return;
+
+ int c, hL, hR;
+ GET_CENTER_HL(Lx);
+ float sum = 0.0f;
+
+ for (int jz = 0; jz <= hR+hL; jz++) {
+ for (int jx = 0; jx <= hR+hL; jx++) {
+ sum += READ_IMAGE_XZ * READ_FILTER_VAL(jx, jz);
+ }
+ }
+ output[(gidz*Ny + gidy)*Nx + gidx] = sum;
+}
+
+
+// Convolution with 2D kernel
+// Works for batched 2D on 3D.
+__kernel void convol_2D_YZ_tex(
+ read_only IMAGE_TYPE input,
+ __global float * output,
+ read_only FILTER_TYPE filter,
+ int Lx, // filter number of columns,
+ int Lz, // filter number of rows,
+ int Nx, // input/output number of columns
+ int Ny, // input/output number of rows
+ int Nz // input/output depth
+)
+{
+ uint gidx = get_global_id(0);
+ uint gidy = get_global_id(1);
+ uint gidz = get_global_id(2);
+ if ((gidx >= Nx) || (gidy >= Ny) || (gidz >= Nz)) return;
+
+ int c, hL, hR;
+ GET_CENTER_HL(Lx);
+ float sum = 0.0f;
+
+ for (int jz = 0; jz <= hR+hL; jz++) {
+ for (int jy = 0; jy <= hR+hL; jy++) {
+ sum += READ_IMAGE_YZ * READ_FILTER_VAL(jy, jz);
+ }
+ }
+ output[(gidz*Ny + gidy)*Nx + gidx] = sum;
+}
+#endif
+
+
+/******************************************************************************/
+/**************************** 3D Convolution **********************************/
+/******************************************************************************/
+
+#if IMAGE_DIMS == 3 && FILTER_DIMS == 3
+// Convolution with 3D kernel
+__kernel void convol_3D_XYZ_tex(
+ read_only IMAGE_TYPE input,
+ __global float * output,
+ read_only FILTER_TYPE filter,
+ int Lx, // filter number of columns,
+ int Ly, // filter number of rows,
+ int Lz, // filter number of rows,
+ int Nx, // input/output number of columns
+ int Ny, // input/output number of rows
+ int Nz // input/output depth
+)
+{
+ uint gidx = get_global_id(0);
+ uint gidy = get_global_id(1);
+ uint gidz = get_global_id(2);
+ if ((gidx >= Nx) || (gidy >= Ny) || (gidz >= Nz)) return;
+
+ int c, hL, hR;
+ GET_CENTER_HL(Lx);
+ float sum = 0.0f;
+
+ for (int jz = 0; jz <= hR+hL; jz++) {
+ for (int jy = 0; jy <= hR+hL; jy++) {
+ for (int jx = 0; jx <= hR+hL; jx++) {
+ sum += READ_IMAGE_XYZ * READ_FILTER_VAL(jx, jy, jz);
+ }
+ }
+ }
+ output[(gidz*Ny + gidy)*Nx + gidx] = sum;
+}
+#endif
diff --git a/silx/resources/opencl/sparse.cl b/silx/resources/opencl/sparse.cl
new file mode 100644
index 0000000..29e09ad
--- /dev/null
+++ b/silx/resources/opencl/sparse.cl
@@ -0,0 +1,84 @@
+/*
+ * Copyright (C) 2016-2019 European Synchrotron Radiation Facility
+ * Grenoble, France
+ *
+ * Permission is hereby granted, free of charge, to any person
+ * obtaining a copy of this software and associated documentation
+ * files (the "Software"), to deal in the Software without
+ * restriction, including without limitation the rights to use,
+ * copy, modify, merge, publish, distribute, sublicense, and/or sell
+ * copies of the Software, and to permit persons to whom the
+ * Software is furnished to do so, subject to the following
+ * conditions:
+ *
+ * The above copyright notice and this permission notice shall be
+ * included in all copies or substantial portions of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
+ * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES
+ * OF MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
+ * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT
+ * HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY,
+ * WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
+ * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR
+ * OTHER DEALINGS IN THE SOFTWARE.
+ */
+
+#ifndef IMAGE_WIDTH
+ #error "Please define IMAGE_WIDTH parameter"
+#endif
+
+/**
+ * Densify a matric from CSR format to "dense" 2D format.
+ * The input CSR data consists in 3 arrays: (data, ind, iptr).
+ * The output array is a 2D array of dimensions IMAGE_WIDTH * image_height.
+ *
+ * data: 1D array containing the nonzero data items.
+ * ind: 1D array containing the column indices of the nonzero data items.
+ * iptr: 1D array containing indirection indices, such that range
+ * [iptr[i], iptr[i+1]-1] of "data" and "ind" contain the relevant data
+ * of output row "i".
+ * output: 2D array containing the densified data.
+ * image_height: height (number of rows) of the output data.
+**/
+
+kernel void densify_csr(
+ const global float* data,
+ const global int* ind,
+ const global int* iptr,
+ global float* output,
+ int image_height
+)
+{
+ uint tid = get_local_id(0);
+ uint row_idx = get_global_id(1);
+ if ((tid >= IMAGE_WIDTH) || (row_idx >= image_height)) return;
+
+ local float line[IMAGE_WIDTH];
+
+ // Memset
+ //~ #pragma unroll
+ for (int k = 0; tid+k < IMAGE_WIDTH; k += get_local_size(0)) {
+ if (tid+k >= IMAGE_WIDTH) break;
+ line[tid+k] = 0.0f;
+ }
+ barrier(CLK_LOCAL_MEM_FENCE);
+
+
+ uint start = iptr[row_idx], end = iptr[row_idx+1];
+ //~ #pragma unroll
+ for (int k = start; k < end; k += get_local_size(0)) {
+ // Current work group handles one line of the final array
+ // on the current line, write data[start+tid] at column index ind[start+tid]
+ if (k+tid < end)
+ line[ind[k+tid]] = data[k+tid];
+ }
+ barrier(CLK_LOCAL_MEM_FENCE);
+
+ // write the current line (shared mem) into the output array (global mem)
+ //~ #pragma unroll
+ for (int k = 0; tid+k < IMAGE_WIDTH; k += get_local_size(0)) {
+ output[row_idx*IMAGE_WIDTH + tid+k] = line[tid+k];
+ if (k+tid >= IMAGE_WIDTH) return;
+ }
+}