summaryrefslogtreecommitdiff
path: root/silx/resources/opencl/sparse.cl
diff options
context:
space:
mode:
Diffstat (limited to 'silx/resources/opencl/sparse.cl')
-rw-r--r--silx/resources/opencl/sparse.cl84
1 files changed, 84 insertions, 0 deletions
diff --git a/silx/resources/opencl/sparse.cl b/silx/resources/opencl/sparse.cl
new file mode 100644
index 0000000..29e09ad
--- /dev/null
+++ b/silx/resources/opencl/sparse.cl
@@ -0,0 +1,84 @@
+/*
+ * Copyright (C) 2016-2019 European Synchrotron Radiation Facility
+ * Grenoble, France
+ *
+ * Permission is hereby granted, free of charge, to any person
+ * obtaining a copy of this software and associated documentation
+ * files (the "Software"), to deal in the Software without
+ * restriction, including without limitation the rights to use,
+ * copy, modify, merge, publish, distribute, sublicense, and/or sell
+ * copies of the Software, and to permit persons to whom the
+ * Software is furnished to do so, subject to the following
+ * conditions:
+ *
+ * The above copyright notice and this permission notice shall be
+ * included in all copies or substantial portions of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
+ * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES
+ * OF MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
+ * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT
+ * HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY,
+ * WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
+ * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR
+ * OTHER DEALINGS IN THE SOFTWARE.
+ */
+
+#ifndef IMAGE_WIDTH
+ #error "Please define IMAGE_WIDTH parameter"
+#endif
+
+/**
+ * Densify a matric from CSR format to "dense" 2D format.
+ * The input CSR data consists in 3 arrays: (data, ind, iptr).
+ * The output array is a 2D array of dimensions IMAGE_WIDTH * image_height.
+ *
+ * data: 1D array containing the nonzero data items.
+ * ind: 1D array containing the column indices of the nonzero data items.
+ * iptr: 1D array containing indirection indices, such that range
+ * [iptr[i], iptr[i+1]-1] of "data" and "ind" contain the relevant data
+ * of output row "i".
+ * output: 2D array containing the densified data.
+ * image_height: height (number of rows) of the output data.
+**/
+
+kernel void densify_csr(
+ const global float* data,
+ const global int* ind,
+ const global int* iptr,
+ global float* output,
+ int image_height
+)
+{
+ uint tid = get_local_id(0);
+ uint row_idx = get_global_id(1);
+ if ((tid >= IMAGE_WIDTH) || (row_idx >= image_height)) return;
+
+ local float line[IMAGE_WIDTH];
+
+ // Memset
+ //~ #pragma unroll
+ for (int k = 0; tid+k < IMAGE_WIDTH; k += get_local_size(0)) {
+ if (tid+k >= IMAGE_WIDTH) break;
+ line[tid+k] = 0.0f;
+ }
+ barrier(CLK_LOCAL_MEM_FENCE);
+
+
+ uint start = iptr[row_idx], end = iptr[row_idx+1];
+ //~ #pragma unroll
+ for (int k = start; k < end; k += get_local_size(0)) {
+ // Current work group handles one line of the final array
+ // on the current line, write data[start+tid] at column index ind[start+tid]
+ if (k+tid < end)
+ line[ind[k+tid]] = data[k+tid];
+ }
+ barrier(CLK_LOCAL_MEM_FENCE);
+
+ // write the current line (shared mem) into the output array (global mem)
+ //~ #pragma unroll
+ for (int k = 0; tid+k < IMAGE_WIDTH; k += get_local_size(0)) {
+ output[row_idx*IMAGE_WIDTH + tid+k] = line[tid+k];
+ if (k+tid >= IMAGE_WIDTH) return;
+ }
+}