diff options
author | Alexandre Marie <alexandre.marie@synchrotron-soleil.fr> | 2019-07-09 10:20:20 +0200 |
---|---|---|
committer | Alexandre Marie <alexandre.marie@synchrotron-soleil.fr> | 2019-07-09 10:20:20 +0200 |
commit | 654a6ac93513c3cc1ef97cacd782ff674c6f4559 (patch) | |
tree | 3b986e4972de7c57fa465820367602fc34bcb0d3 /silx/resources/opencl | |
parent | a763e5d1b3921b3194f3d4e94ab9de3fbe08bbdd (diff) |
New upstream version 0.11.0+dfsg
Diffstat (limited to 'silx/resources/opencl')
-rw-r--r-- | silx/resources/opencl/convolution.cl | 312 | ||||
-rw-r--r-- | silx/resources/opencl/convolution_textures.cl | 374 | ||||
-rw-r--r-- | silx/resources/opencl/sparse.cl | 84 |
3 files changed, 770 insertions, 0 deletions
diff --git a/silx/resources/opencl/convolution.cl b/silx/resources/opencl/convolution.cl new file mode 100644 index 0000000..629b8fc --- /dev/null +++ b/silx/resources/opencl/convolution.cl @@ -0,0 +1,312 @@ +#define MAX_CONST_SIZE 16384 + +/******************************************************************************/ +/**************************** Macros ******************************************/ +/******************************************************************************/ + +// Get the center index of the filter, +// and the "half-Left" and "half-Right" lengths. +// In the case of an even-sized filter, the center is shifted to the left. +#define GET_CENTER_HL(hlen){\ + if (hlen & 1) {\ + c = hlen/2;\ + hL = c;\ + hR = c;\ + }\ + else {\ + c = hlen/2 - 1;\ + hL = c;\ + hR = c+1;\ + }\ +}\ + +// Boundary handling modes +#define CONV_MODE_REFLECT 0 // cba|abcd|dcb +#define CONV_MODE_NEAREST 1 // aaa|abcd|ddd +#define CONV_MODE_WRAP 2 // bcd|abcd|abc +#define CONV_MODE_CONSTANT 3 // 000|abcd|000 +#ifndef USED_CONV_MODE + #define USED_CONV_MODE CONV_MODE_NEAREST +#endif + +#define CONV_PERIODIC_IDX_X int idx_x = gidx - c + jx; if (idx_x < 0) idx_x += Nx; if (idx_x >= Nx) idx_x -= Nx; +#define CONV_PERIODIC_IDX_Y int idx_y = gidy - c + jy; if (idx_y < 0) idx_y += Ny; if (idx_y >= Ny) idx_y -= Ny; +#define CONV_PERIODIC_IDX_Z int idx_z = gidz - c + jz; if (idx_z < 0) idx_z += Nz; if (idx_z >= Nz) idx_z -= Nz; + +#define CONV_NEAREST_IDX_X int idx_x = clamp((int) (gidx - c + jx), 0, Nx-1); +#define CONV_NEAREST_IDX_Y int idx_y = clamp((int) (gidy - c + jy), 0, Ny-1); +#define CONV_NEAREST_IDX_Z int idx_z = clamp((int) (gidz - c + jz), 0, Nz-1); + +#define CONV_REFLECT_IDX_X int idx_x = gidx - c + jx; if (idx_x < 0) idx_x = -idx_x-1; if (idx_x >= Nx) idx_x = Nx-(idx_x-(Nx-1)); +#define CONV_REFLECT_IDX_Y int idx_y = gidy - c + jy; if (idx_y < 0) idx_y = -idx_y-1; if (idx_y >= Ny) idx_y = Ny-(idx_y-(Ny-1)); +#define CONV_REFLECT_IDX_Z int idx_z = gidz - c + jz; if (idx_z < 0) idx_z = -idx_z-1; if (idx_z >= Nz) idx_z = Nz-(idx_z-(Nz-1)); + + +#if USED_CONV_MODE == CONV_MODE_REFLECT + #define CONV_IDX_X CONV_REFLECT_IDX_X + #define CONV_IDX_Y CONV_REFLECT_IDX_Y + #define CONV_IDX_Z CONV_REFLECT_IDX_Z +#elif USED_CONV_MODE == CONV_MODE_NEAREST + #define CONV_IDX_X CONV_NEAREST_IDX_X + #define CONV_IDX_Y CONV_NEAREST_IDX_Y + #define CONV_IDX_Z CONV_NEAREST_IDX_Z +#elif USED_CONV_MODE == CONV_MODE_WRAP + #define CONV_IDX_X CONV_PERIODIC_IDX_X + #define CONV_IDX_Y CONV_PERIODIC_IDX_Y + #define CONV_IDX_Z CONV_PERIODIC_IDX_Z +#elif USED_CONV_MODE == CONV_MODE_CONSTANT + #error "constant not implemented yet" +#else + #error "Unknown convolution mode" +#endif + + + +// Image access patterns +#define READ_IMAGE_1D_X input[(gidz*Ny + gidy)*Nx + idx_x] +#define READ_IMAGE_1D_Y input[(gidz*Ny + idx_y)*Nx + gidx] +#define READ_IMAGE_1D_Z input[(idx_z*Ny + gidy)*Nx + gidx] + +#define READ_IMAGE_2D_XY input[(gidz*Ny + idx_y)*Nx + idx_x] +#define READ_IMAGE_2D_XZ input[(idx_z*Ny + gidy)*Nx + idx_x] +#define READ_IMAGE_2D_YZ input[(idx_z*Ny + idx_y)*Nx + gidx] + +#define READ_IMAGE_3D_XYZ input[(idx_z*Ny + idx_y)*Nx + idx_x] + + + +/******************************************************************************/ +/**************************** 1D Convolution **********************************/ +/******************************************************************************/ + + +// Convolution with 1D kernel along axis "X" (fast dimension) +// Works for batched 1D on 2D and batched 2D on 3D, along axis "X". +__kernel void convol_1D_X( + const __global float * input, + __global float * output, + __global float * filter, + int L, // filter size + int Nx, // input/output number of columns + int Ny, // input/output number of rows + int Nz // input/output depth +) +{ + uint gidx = get_global_id(0); + uint gidy = get_global_id(1); + uint gidz = get_global_id(2); + if ((gidx >= Nx) || (gidy >= Ny) || (gidz >= Nz)) return; + + int c, hL, hR; + GET_CENTER_HL(L); + float sum = 0.0f; + + for (int jx = 0; jx <= hR+hL; jx++) { + CONV_IDX_X; // Get index "x" + sum += READ_IMAGE_1D_X * filter[L-1 - jx]; + } + output[(gidz*Ny + gidy)*Nx + gidx] = sum; +} + + +// Convolution with 1D kernel along axis "Y" +// Works for batched 1D on 2D and batched 2D on 3D, along axis "Y". +__kernel void convol_1D_Y( + const __global float * input, + __global float * output, + __global float * filter, + int L, // filter size + int Nx, // input/output number of columns + int Ny, // input/output number of rows + int Nz // input/output depth +) +{ + uint gidx = get_global_id(0); + uint gidy = get_global_id(1); + uint gidz = get_global_id(2); + if ((gidx >= Nx) || (gidy >= Ny) || (gidz >= Nz)) return; + + int c, hL, hR; + GET_CENTER_HL(L); + float sum = 0.0f; + + for (int jy = 0; jy <= hR+hL; jy++) { + CONV_IDX_Y; // Get index "y" + sum += READ_IMAGE_1D_Y * filter[L-1 - jy]; + } + output[(gidz*Ny + gidy)*Nx + gidx] = sum; +} + + +// Convolution with 1D kernel along axis "Z" +// Works for batched 1D on 2D and batched 2D on 3D, along axis "Z". +__kernel void convol_1D_Z( + const __global float * input, + __global float * output, + __global float * filter, + int L, // filter size + int Nx, // input/output number of columns + int Ny, // input/output number of rows + int Nz // input/output depth +) +{ + uint gidx = get_global_id(0); + uint gidy = get_global_id(1); + uint gidz = get_global_id(2); + if ((gidx >= Nx) || (gidy >= Ny) || (gidz >= Nz)) return; + + int c, hL, hR; + GET_CENTER_HL(L); + float sum = 0.0f; + + for (int jz = 0; jz <= hR+hL; jz++) { + CONV_IDX_Z; // Get index "z" + sum += READ_IMAGE_1D_Z * filter[L-1 - jz]; + } + output[(gidz*Ny + gidy)*Nx + gidx] = sum; +} + + +/******************************************************************************/ +/**************************** 2D Convolution **********************************/ +/******************************************************************************/ + +// Convolution with 2D kernel +// Works for batched 2D on 3D. +__kernel void convol_2D_XY( + const __global float * input, + __global float * output, + __global float * filter, + int Lx, // filter number of columns, + int Ly, // filter number of rows, + int Nx, // input/output number of columns + int Ny, // input/output number of rows + int Nz // input/output depth +) +{ + uint gidx = get_global_id(0); + uint gidy = get_global_id(1); + uint gidz = get_global_id(2); + if ((gidx >= Nx) || (gidy >= Ny) || (gidz >= Nz)) return; + + int c, hL, hR; + GET_CENTER_HL(Lx); + float sum = 0.0f; + + for (int jy = 0; jy <= hR+hL; jy++) { + CONV_IDX_Y; // Get index "y" + for (int jx = 0; jx <= hR+hL; jx++) { + CONV_IDX_X; // Get index "x" + sum += READ_IMAGE_2D_XY * filter[(Ly-1-jy)*Lx + (Lx-1 - jx)]; + } + } + output[(gidz*Ny + gidy)*Nx + gidx] = sum; +} + + +// Convolution with 2D kernel +// Works for batched 2D on 3D. +__kernel void convol_2D_XZ( + const __global float * input, + __global float * output, + __global float * filter, + int Lx, // filter number of columns, + int Lz, // filter number of rows, + int Nx, // input/output number of columns + int Ny, // input/output number of rows + int Nz // input/output depth +) +{ + uint gidx = get_global_id(0); + uint gidy = get_global_id(1); + uint gidz = get_global_id(2); + if ((gidx >= Nx) || (gidy >= Ny) || (gidz >= Nz)) return; + + int c, hL, hR; + GET_CENTER_HL(Lx); + float sum = 0.0f; + + for (int jz = 0; jz <= hR+hL; jz++) { + CONV_IDX_Z; // Get index "z" + for (int jx = 0; jx <= hR+hL; jx++) { + CONV_IDX_X; // Get index "x" + sum += READ_IMAGE_2D_XZ * filter[(Lz-1-jz)*Lx + (Lx-1 - jx)]; + } + } + output[(gidz*Ny + gidy)*Nx + gidx] = sum; +} + + +// Convolution with 2D kernel +// Works for batched 2D on 3D. +__kernel void convol_2D_YZ( + const __global float * input, + __global float * output, + __global float * filter, + int Ly, // filter number of columns, + int Lz, // filter number of rows, + int Nx, // input/output number of columns + int Ny, // input/output number of rows + int Nz // input/output depth +) +{ + uint gidx = get_global_id(0); + uint gidy = get_global_id(1); + uint gidz = get_global_id(2); + if ((gidx >= Nx) || (gidy >= Ny) || (gidz >= Nz)) return; + + int c, hL, hR; + GET_CENTER_HL(Ly); + float sum = 0.0f; + + for (int jz = 0; jz <= hR+hL; jz++) { + CONV_IDX_Z; // Get index "z" + for (int jy = 0; jy <= hR+hL; jy++) { + CONV_IDX_Y; // Get index "y" + sum += READ_IMAGE_2D_YZ * filter[(Lz-1-jz)*Ly + (Ly-1 - jy)]; + } + } + output[(gidz*Ny + gidy)*Nx + gidx] = sum; +} + + + +/******************************************************************************/ +/**************************** 3D Convolution **********************************/ +/******************************************************************************/ + +// Convolution with 3D kernel +__kernel void convol_3D_XYZ( + const __global float * input, + __global float * output, + __global float * filter, + int Lx, // filter number of columns, + int Ly, // filter number of rows, + int Lz, // filter number of rows, + int Nx, // input/output number of columns + int Ny, // input/output number of rows + int Nz // input/output depth +) +{ + uint gidx = get_global_id(0); + uint gidy = get_global_id(1); + uint gidz = get_global_id(2); + if ((gidx >= Nx) || (gidy >= Ny) || (gidz >= Nz)) return; + + int c, hL, hR; + GET_CENTER_HL(Lx); + float sum = 0.0f; + + for (int jz = 0; jz <= hR+hL; jz++) { + CONV_IDX_Z; // Get index "z" + for (int jy = 0; jy <= hR+hL; jy++) { + CONV_IDX_Y; // Get index "y" + for (int jx = 0; jx <= hR+hL; jx++) { + CONV_IDX_X; // Get index "x" + sum += READ_IMAGE_3D_XYZ * filter[((Lz-1-jz)*Ly + (Ly-1-jy))*Lx + (Lx-1 - jx)]; + } + } + } + output[(gidz*Ny + gidy)*Nx + gidx] = sum; +} + diff --git a/silx/resources/opencl/convolution_textures.cl b/silx/resources/opencl/convolution_textures.cl new file mode 100644 index 0000000..517a67c --- /dev/null +++ b/silx/resources/opencl/convolution_textures.cl @@ -0,0 +1,374 @@ +/******************************************************************************/ +/**************************** Macros ******************************************/ +/******************************************************************************/ + +// Error handling +#ifndef IMAGE_DIMS + #error "IMAGE_DIMS must be defined" +#endif +#ifndef FILTER_DIMS + #error "FILTER_DIMS must be defined" +#endif +#if FILTER_DIMS > IMAGE_DIMS + #error "Filter cannot have more dimensions than image" +#endif + +// Boundary handling modes +#define CONV_MODE_REFLECT 0 // CLK_ADDRESS_MIRRORED_REPEAT : cba|abcd|dcb +#define CONV_MODE_NEAREST 1 // CLK_ADDRESS_CLAMP_TO_EDGE : aaa|abcd|ddd +#define CONV_MODE_WRAP 2 // CLK_ADDRESS_REPEAT : bcd|abcd|abc +#define CONV_MODE_CONSTANT 3 // CLK_ADDRESS_CLAMP : 000|abcd|000 +#ifndef USED_CONV_MODE + #define USED_CONV_MODE CONV_MODE_NEAREST +#endif +#if USED_CONV_MODE == CONV_MODE_REFLECT + #define CLK_BOUNDARY CLK_ADDRESS_MIRRORED_REPEAT + #define CLK_COORDS CLK_NORMALIZED_COORDS_TRUE + #define USE_NORM_COORDS +#elif USED_CONV_MODE == CONV_MODE_NEAREST + #define CLK_BOUNDARY CLK_ADDRESS_CLAMP_TO_EDGE + #define CLK_COORDS CLK_NORMALIZED_COORDS_FALSE +#elif USED_CONV_MODE == CONV_MODE_WRAP + #define CLK_BOUNDARY CLK_ADDRESS_REPEAT + #define CLK_COORDS CLK_NORMALIZED_COORDS_TRUE + #define USE_NORM_COORDS +#elif USED_CONV_MODE == CONV_MODE_CONSTANT + #define CLK_BOUNDARY CLK_ADDRESS_CLAMP + #define CLK_COORDS CLK_NORMALIZED_COORDS_FALSE +#else + #error "Unknown convolution mode" +#endif + + + +// Convolution index for filter +#define FILTER_INDEX(j) (Lx - 1 - j) + +// Filter access patterns +#define READ_FILTER_1D(j) read_imagef(filter, (int2) (FILTER_INDEX(j), 0)).x; +#define READ_FILTER_2D(jx, jy) read_imagef(filter, (int2) (FILTER_INDEX(jx), FILTER_INDEX(jy))).x; +#define READ_FILTER_3D(jx, jy, jz) read_imagef(filter, (int4) (FILTER_INDEX(jx), FILTER_INDEX(jy), FILTER_INDEX(jz), 0)).x; + + +// Convolution index for image +#ifdef USE_NORM_COORDS + #define IMAGE_INDEX_X (gidx*1.0f +0.5f - c + jx)/Nx + #define IMAGE_INDEX_Y (gidy*1.0f +0.5f - c + jy)/Ny + #define IMAGE_INDEX_Z (gidz*1.0f +0.5f - c + jz)/Nz + #define RET_TYPE_1 float + #define RET_TYPE_2 float2 + #define RET_TYPE_4 float4 + #define C_ZERO 0.5f + #define GIDX (gidx*1.0f + 0.5f)/Nx + #define GIDY (gidy*1.0f + 0.5f)/Ny + #define GIDZ (gidz*1.0f + 0.5f)/Nz +#else + #define IMAGE_INDEX_X (gidx - c + jx) + #define IMAGE_INDEX_Y (gidy - c + jy) + #define IMAGE_INDEX_Z (gidz - c + jz) + #define RET_TYPE_1 int + #define RET_TYPE_2 int2 + #define RET_TYPE_4 int4 + #define C_ZERO 0 + #define GIDX gidx + #define GIDY gidy + #define GIDZ gidz +#endif + +static const sampler_t sampler = CLK_COORDS | CLK_BOUNDARY | CLK_FILTER_NEAREST; + +// Image access patterns +#define READ_IMAGE_1D read_imagef(input, sampler, (RET_TYPE_2) (IMAGE_INDEX_X, C_ZERO)).x + +#define READ_IMAGE_2D_X read_imagef(input, sampler, (RET_TYPE_2) (IMAGE_INDEX_X , GIDY)).x +#define READ_IMAGE_2D_Y read_imagef(input, sampler, (RET_TYPE_2) (GIDX, IMAGE_INDEX_Y)).x +#define READ_IMAGE_2D_XY read_imagef(input, sampler, (RET_TYPE_2) (IMAGE_INDEX_X, IMAGE_INDEX_Y)).x + +#define READ_IMAGE_3D_X read_imagef(input, sampler, (RET_TYPE_4) (IMAGE_INDEX_X, GIDY, GIDZ, C_ZERO)).x +#define READ_IMAGE_3D_Y read_imagef(input, sampler, (RET_TYPE_4) (GIDX, IMAGE_INDEX_Y, GIDZ, C_ZERO)).x +#define READ_IMAGE_3D_Z read_imagef(input, sampler, (RET_TYPE_4) (GIDX, GIDY, IMAGE_INDEX_Z, C_ZERO)).x +#define READ_IMAGE_3D_XY read_imagef(input, sampler, (RET_TYPE_4) (IMAGE_INDEX_X, IMAGE_INDEX_Y, GIDZ, C_ZERO)).x +#define READ_IMAGE_3D_XZ read_imagef(input, sampler, (RET_TYPE_4) (IMAGE_INDEX_X, GIDY, IMAGE_INDEX_Z, C_ZERO)).x +#define READ_IMAGE_3D_YZ read_imagef(input, sampler, (RET_TYPE_4) (GIDX, IMAGE_INDEX_Y, IMAGE_INDEX_Z, C_ZERO)).x +#define READ_IMAGE_3D_XYZ read_imagef(input, sampler, (RET_TYPE_4) (IMAGE_INDEX_X, IMAGE_INDEX_Y, IMAGE_INDEX_Z, C_ZERO)).x + + +// NOTE: pyopencl and OpenCL < 1.2 do not support image1d_t +#if FILTER_DIMS == 1 + #define FILTER_TYPE image2d_t + #define READ_FILTER_VAL(j) READ_FILTER_1D(j) +#elif FILTER_DIMS == 2 + #define FILTER_TYPE image2d_t + #define READ_FILTER_VAL(jx, jy) READ_FILTER_2D(jx, jy) +#elif FILTER_DIMS == 3 + #define FILTER_TYPE image3d_t + #define READ_FILTER_VAL(jx, jy, jz) READ_FILTER_3D(jx, jy, jz) +#endif + +#if IMAGE_DIMS == 1 + #define IMAGE_TYPE image2d_t + #define READ_IMAGE_X READ_IMAGE_1D +#elif IMAGE_DIMS == 2 + #define IMAGE_TYPE image2d_t + #define READ_IMAGE_X READ_IMAGE_2D_X + #define READ_IMAGE_Y READ_IMAGE_2D_Y + #define READ_IMAGE_XY READ_IMAGE_2D_XY +#elif IMAGE_DIMS == 3 + #define IMAGE_TYPE image3d_t + #define READ_IMAGE_X READ_IMAGE_3D_X + #define READ_IMAGE_Y READ_IMAGE_3D_Y + #define READ_IMAGE_Z READ_IMAGE_3D_Z + #define READ_IMAGE_XY READ_IMAGE_3D_XY + #define READ_IMAGE_XZ READ_IMAGE_3D_XZ + #define READ_IMAGE_YZ READ_IMAGE_3D_YZ + #define READ_IMAGE_XYZ READ_IMAGE_3D_XYZ +#endif + + +// Get the center index of the filter, +// and the "half-Left" and "half-Right" lengths. +// In the case of an even-sized filter, the center is shifted to the left. +#define GET_CENTER_HL(hlen){\ + if (hlen & 1) {\ + c = hlen/2;\ + hL = c;\ + hR = c;\ + }\ + else {\ + c = hlen/2 - 1;\ + hL = c;\ + hR = c+1;\ + }\ +}\ + + + +/******************************************************************************/ +/**************************** 1D Convolution **********************************/ +/******************************************************************************/ + +#if FILTER_DIMS == 1 +// Convolution with 1D kernel along axis "X" (fast dimension) +// Works for batched 1D on 2D and batched 2D on 3D, along axis "X". +__kernel void convol_1D_X_tex( + read_only IMAGE_TYPE input, + __global float * output, + read_only FILTER_TYPE filter, + int Lx, // filter size + int Nx, // input/output number of columns + int Ny, // input/output number of rows + int Nz // input/output depth +) +{ + uint gidx = get_global_id(0); + uint gidy = get_global_id(1); + uint gidz = get_global_id(2); + if ((gidx >= Nx) || (gidy >= Ny) || (gidz >= Nz)) return; + + int c, hL, hR; + GET_CENTER_HL(Lx); + float sum = 0.0f; + + for (int jx = 0; jx <= hR+hL; jx++) { + sum += READ_IMAGE_X * READ_FILTER_VAL(jx); + } + output[(gidz*Ny + gidy)*Nx + gidx] = sum; +} + + +#if IMAGE_DIMS >= 2 +// Convolution with 1D kernel along axis "Y" +// Works for batched 1D on 2D and batched 2D on 3D, along axis "Y". +__kernel void convol_1D_Y_tex( + read_only IMAGE_TYPE input, + __global float * output, + read_only FILTER_TYPE filter, + int Lx, // filter size + int Nx, // input/output number of columns + int Ny, // input/output number of rows + int Nz // input/output depth +) +{ + uint gidx = get_global_id(0); + uint gidy = get_global_id(1); + uint gidz = get_global_id(2); + if ((gidx >= Nx) || (gidy >= Ny) || (gidz >= Nz)) return; + + int c, hL, hR; + GET_CENTER_HL(Lx); + float sum = 0.0f; + + for (int jy = 0; jy <= hR+hL; jy++) { + sum += READ_IMAGE_Y * READ_FILTER_VAL(jy); + } + output[(gidz*Ny + gidy)*Nx + gidx] = sum; +} +#endif + +#if IMAGE_DIMS == 3 +// Convolution with 1D kernel along axis "Z" +// Works for batched 1D on 2D and batched 2D on 3D, along axis "Z". +__kernel void convol_1D_Z_tex( + read_only IMAGE_TYPE input, + __global float * output, + read_only FILTER_TYPE filter, + int Lx, // filter size + int Nx, // input/output number of columns + int Ny, // input/output number of rows + int Nz // input/output depth +) +{ + uint gidx = get_global_id(0); + uint gidy = get_global_id(1); + uint gidz = get_global_id(2); + if ((gidx >= Nx) || (gidy >= Ny) || (gidz >= Nz)) return; + + int c, hL, hR; + GET_CENTER_HL(Lx); + float sum = 0.0f; + + for (int jz = 0; jz <= hR+hL; jz++) { + sum += READ_IMAGE_Z * READ_FILTER_VAL(jz); + } + output[(gidz*Ny + gidy)*Nx + gidx] = sum; +} +#endif +#endif + +/******************************************************************************/ +/**************************** 2D Convolution **********************************/ +/******************************************************************************/ + +#if IMAGE_DIMS >= 2 && FILTER_DIMS == 2 +// Convolution with 2D kernel +// Works for batched 2D on 3D. +__kernel void convol_2D_XY_tex( + read_only IMAGE_TYPE input, + __global float * output, + read_only FILTER_TYPE filter, + int Lx, // filter number of columns, + int Ly, // filter number of rows, + int Nx, // input/output number of columns + int Ny, // input/output number of rows + int Nz // input/output depth +) +{ + uint gidx = get_global_id(0); + uint gidy = get_global_id(1); + uint gidz = get_global_id(2); + if ((gidx >= Nx) || (gidy >= Ny) || (gidz >= Nz)) return; + + int c, hL, hR; + GET_CENTER_HL(Lx); + float sum = 0.0f; + + for (int jy = 0; jy <= hR+hL; jy++) { + for (int jx = 0; jx <= hR+hL; jx++) { + sum += READ_IMAGE_XY * READ_FILTER_VAL(jx, jy); + } + } + output[(gidz*Ny + gidy)*Nx + gidx] = sum; +} +#endif + +#if IMAGE_DIMS == 3 && FILTER_DIMS == 2 +// Convolution with 2D kernel +// Works for batched 2D on 3D. +__kernel void convol_2D_XZ_tex( + read_only IMAGE_TYPE input, + __global float * output, + read_only FILTER_TYPE filter, + int Lx, // filter number of columns, + int Lz, // filter number of rows, + int Nx, // input/output number of columns + int Ny, // input/output number of rows + int Nz // input/output depth +) +{ + uint gidx = get_global_id(0); + uint gidy = get_global_id(1); + uint gidz = get_global_id(2); + if ((gidx >= Nx) || (gidy >= Ny) || (gidz >= Nz)) return; + + int c, hL, hR; + GET_CENTER_HL(Lx); + float sum = 0.0f; + + for (int jz = 0; jz <= hR+hL; jz++) { + for (int jx = 0; jx <= hR+hL; jx++) { + sum += READ_IMAGE_XZ * READ_FILTER_VAL(jx, jz); + } + } + output[(gidz*Ny + gidy)*Nx + gidx] = sum; +} + + +// Convolution with 2D kernel +// Works for batched 2D on 3D. +__kernel void convol_2D_YZ_tex( + read_only IMAGE_TYPE input, + __global float * output, + read_only FILTER_TYPE filter, + int Lx, // filter number of columns, + int Lz, // filter number of rows, + int Nx, // input/output number of columns + int Ny, // input/output number of rows + int Nz // input/output depth +) +{ + uint gidx = get_global_id(0); + uint gidy = get_global_id(1); + uint gidz = get_global_id(2); + if ((gidx >= Nx) || (gidy >= Ny) || (gidz >= Nz)) return; + + int c, hL, hR; + GET_CENTER_HL(Lx); + float sum = 0.0f; + + for (int jz = 0; jz <= hR+hL; jz++) { + for (int jy = 0; jy <= hR+hL; jy++) { + sum += READ_IMAGE_YZ * READ_FILTER_VAL(jy, jz); + } + } + output[(gidz*Ny + gidy)*Nx + gidx] = sum; +} +#endif + + +/******************************************************************************/ +/**************************** 3D Convolution **********************************/ +/******************************************************************************/ + +#if IMAGE_DIMS == 3 && FILTER_DIMS == 3 +// Convolution with 3D kernel +__kernel void convol_3D_XYZ_tex( + read_only IMAGE_TYPE input, + __global float * output, + read_only FILTER_TYPE filter, + int Lx, // filter number of columns, + int Ly, // filter number of rows, + int Lz, // filter number of rows, + int Nx, // input/output number of columns + int Ny, // input/output number of rows + int Nz // input/output depth +) +{ + uint gidx = get_global_id(0); + uint gidy = get_global_id(1); + uint gidz = get_global_id(2); + if ((gidx >= Nx) || (gidy >= Ny) || (gidz >= Nz)) return; + + int c, hL, hR; + GET_CENTER_HL(Lx); + float sum = 0.0f; + + for (int jz = 0; jz <= hR+hL; jz++) { + for (int jy = 0; jy <= hR+hL; jy++) { + for (int jx = 0; jx <= hR+hL; jx++) { + sum += READ_IMAGE_XYZ * READ_FILTER_VAL(jx, jy, jz); + } + } + } + output[(gidz*Ny + gidy)*Nx + gidx] = sum; +} +#endif diff --git a/silx/resources/opencl/sparse.cl b/silx/resources/opencl/sparse.cl new file mode 100644 index 0000000..29e09ad --- /dev/null +++ b/silx/resources/opencl/sparse.cl @@ -0,0 +1,84 @@ +/* + * Copyright (C) 2016-2019 European Synchrotron Radiation Facility + * Grenoble, France + * + * Permission is hereby granted, free of charge, to any person + * obtaining a copy of this software and associated documentation + * files (the "Software"), to deal in the Software without + * restriction, including without limitation the rights to use, + * copy, modify, merge, publish, distribute, sublicense, and/or sell + * copies of the Software, and to permit persons to whom the + * Software is furnished to do so, subject to the following + * conditions: + * + * The above copyright notice and this permission notice shall be + * included in all copies or substantial portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, + * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES + * OF MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND + * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT + * HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, + * WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING + * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR + * OTHER DEALINGS IN THE SOFTWARE. + */ + +#ifndef IMAGE_WIDTH + #error "Please define IMAGE_WIDTH parameter" +#endif + +/** + * Densify a matric from CSR format to "dense" 2D format. + * The input CSR data consists in 3 arrays: (data, ind, iptr). + * The output array is a 2D array of dimensions IMAGE_WIDTH * image_height. + * + * data: 1D array containing the nonzero data items. + * ind: 1D array containing the column indices of the nonzero data items. + * iptr: 1D array containing indirection indices, such that range + * [iptr[i], iptr[i+1]-1] of "data" and "ind" contain the relevant data + * of output row "i". + * output: 2D array containing the densified data. + * image_height: height (number of rows) of the output data. +**/ + +kernel void densify_csr( + const global float* data, + const global int* ind, + const global int* iptr, + global float* output, + int image_height +) +{ + uint tid = get_local_id(0); + uint row_idx = get_global_id(1); + if ((tid >= IMAGE_WIDTH) || (row_idx >= image_height)) return; + + local float line[IMAGE_WIDTH]; + + // Memset + //~ #pragma unroll + for (int k = 0; tid+k < IMAGE_WIDTH; k += get_local_size(0)) { + if (tid+k >= IMAGE_WIDTH) break; + line[tid+k] = 0.0f; + } + barrier(CLK_LOCAL_MEM_FENCE); + + + uint start = iptr[row_idx], end = iptr[row_idx+1]; + //~ #pragma unroll + for (int k = start; k < end; k += get_local_size(0)) { + // Current work group handles one line of the final array + // on the current line, write data[start+tid] at column index ind[start+tid] + if (k+tid < end) + line[ind[k+tid]] = data[k+tid]; + } + barrier(CLK_LOCAL_MEM_FENCE); + + // write the current line (shared mem) into the output array (global mem) + //~ #pragma unroll + for (int k = 0; tid+k < IMAGE_WIDTH; k += get_local_size(0)) { + output[row_idx*IMAGE_WIDTH + tid+k] = line[tid+k]; + if (k+tid >= IMAGE_WIDTH) return; + } +} |