diff options
Diffstat (limited to 'silx/resources/opencl/image')
-rw-r--r-- | silx/resources/opencl/image/cast.cl | 181 | ||||
-rw-r--r-- | silx/resources/opencl/image/histogram.cl | 178 | ||||
-rw-r--r-- | silx/resources/opencl/image/map.cl | 85 | ||||
-rw-r--r-- | silx/resources/opencl/image/max_min.cl | 207 |
4 files changed, 651 insertions, 0 deletions
diff --git a/silx/resources/opencl/image/cast.cl b/silx/resources/opencl/image/cast.cl new file mode 100644 index 0000000..9e23a82 --- /dev/null +++ b/silx/resources/opencl/image/cast.cl @@ -0,0 +1,181 @@ +/* + * Project: SILX: Alogorithms for image processing + * + * Copyright (C) 2013-2017 European Synchrotron Radiation Facility + * Grenoble, France + * + * Principal authors: J. Kieffer (kieffer@esrf.fr) + * + * Permission is hereby granted, free of charge, to any person + * obtaining a copy of this software and associated documentation + * files (the "Software"), to deal in the Software without + * restriction, including without limitation the rights to use, + * copy, modify, merge, publish, distribute, sublicense, and/or sell + * copies of the Software, and to permit persons to whom the + * Software is furnished to do so, subject to the following + * conditions: + * + * The above copyright notice and this permission notice shall be + * included in all copies or substantial portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, + * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES + * OF MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND + * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT + * HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, + * WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING + * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR + * OTHER DEALINGS IN THE SOFTWARE. + */ + +#ifndef NB_COLOR + #define NB_COLOR 1 +#endif + + +/** + * \brief Cast values of an array of uint8 into a float output array. + * + * :param array_input: Pointer to global memory with the input data as unsigned8 array + * :param array_float: Pointer to global memory with the output data as float array + * :param width: Width of the image + * :param height: Height of the image + */ +kernel void u8_to_float( global unsigned char *array_input, + global float *array_float, + const int width, + const int height) +{ + //Global memory guard for padding + if ((get_global_id(0) < width) && (get_global_id(1)<height)) + { + int i = NB_COLOR * (get_global_id(0) + width * get_global_id(1)); + for (int c=0; c<NB_COLOR; c++) + { + array_float[i + c] = (float) array_input[i + c]; + } //end loop over colors + } //end test in image +} //end kernel + +/** + * \brief Cast values of an array of uint8 into a float output array. + * + * :param array_input: Pointer to global memory with the input data as signed8 array + * :param array_float: Pointer to global memory with the output data as float array + * :param width: Width of the image + * :param height: Height of the image + */ +kernel void s8_to_float( global char *array_input, + global float *array_float, + const int width, + const int height) +{ + //Global memory guard for padding + if ((get_global_id(0) < width) && (get_global_id(1)<height)) + { + int i = NB_COLOR * (get_global_id(0) + width * get_global_id(1)); + for (int c=0; c<NB_COLOR; c++) + { + array_float[i + c] = (float) array_input[i + c]; + } //end loop over colors + } //end test in image +} //end kernel + + +/** + * \brief cast values of an array of uint16 into a float output array. + * + * :param array_input: Pointer to global memory with the input data as unsigned16 array + * :param array_float: Pointer to global memory with the output data as float array + * :param width: Width of the image + * :param height: Height of the image + */ +kernel void u16_to_float(global unsigned short *array_input, + global float *array_float, + const int width, + const int height) +{ + //Global memory guard for padding + if ((get_global_id(0) < width) && (get_global_id(1)<height)) + { + int i = NB_COLOR * (get_global_id(0) + width * get_global_id(1)); + for (int c=0; c<NB_COLOR; c++) + { + array_float[i + c] = (float) array_input[i + c]; + } //end loop over colors + } //end test in image +} //end kernel + +/** + * \brief cast values of an array of int16 into a float output array. + * + * :param array_input: Pointer to global memory with the input data as signed16 array + * :param array_float: Pointer to global memory with the output data as float array + * :param width: Width of the image + * :param height: Height of the image + */ +kernel void s16_to_float(global short *array_input, + global float *array_float, + const int width, + const int height) +{ + //Global memory guard for padding + if ((get_global_id(0) < width) && (get_global_id(1)<height)) + { + int i = NB_COLOR * (get_global_id(0) + width * get_global_id(1)); + for (int c=0; c<NB_COLOR; c++) + { + array_float[i + c] = (float) array_input[i + c]; + } //end loop over colors + } //end test in image +}//end kernel + +/** + * \brief cast values of an array of uint32 into a float output array. + * + * :param array_input: Pointer to global memory with the input data as unsigned32 array + * :param array_float: Pointer to global memory with the output data as float array + * :param width: Width of the image + * :param height: Height of the image + */ +kernel void u32_to_float(global unsigned int *array_input, + global float *array_float, + const int width, + const int height) +{ + //Global memory guard for padding + if ((get_global_id(0) < width) && (get_global_id(1)<height)) + { + int i = NB_COLOR * (get_global_id(0) + width * get_global_id(1)); + for (int c=0; c<NB_COLOR; c++) + { + array_float[i + c] = (float) array_input[i + c]; + } //end loop over colors + } //end test in image +}//end kernel + + +/** + * \brief convert values of an array of int32 into a float output array. + * + * :param array_input: Pointer to global memory with the data in int + * :param array_float: Pointer to global memory with the data in float + * :param width: Width of the image + * :param height: Height of the image + */ +kernel void s32_to_float(global int *array_input, + global float *array_float, + const int width, + const int height) +{ + //Global memory guard for padding + if ((get_global_id(0) < width) && (get_global_id(1)<height)) + { + int i = NB_COLOR * (get_global_id(0) + width * get_global_id(1)); + for (int c=0; c<NB_COLOR; c++) + { + array_float[i + c] = (float) array_input[i + c]; + } //end loop over colors + } //end test in image +}//end kernel + diff --git a/silx/resources/opencl/image/histogram.cl b/silx/resources/opencl/image/histogram.cl new file mode 100644 index 0000000..7fb1485 --- /dev/null +++ b/silx/resources/opencl/image/histogram.cl @@ -0,0 +1,178 @@ +//CL// + +/* + * Project: SILX: Alogorithms for image processing + * + * Copyright (C) 2013-2018 European Synchrotron Radiation Facility + * Grenoble, France + * + * Principal authors: J. Kieffer (kieffer@esrf.fr) + * + * Permission is hereby granted, free of charge, to any person + * obtaining a copy of this software and associated documentation + * files (the "Software"), to deal in the Software without + * restriction, including without limitation the rights to use, + * copy, modify, merge, publish, distribute, sublicense, and/or sell + * copies of the Software, and to permit persons to whom the + * Software is furnished to do so, subject to the following + * conditions: + * + * The above copyright notice and this permission notice shall be + * included in all copies or substantial portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, + * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES + * OF MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND + * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT + * HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, + * WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING + * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR + * OTHER DEALINGS IN THE SOFTWARE. + */ + +/* Single kernel histogram for float array. + * + * Can perform histograming in log-scale (using the arcsinh) + +Parameters: + - data: buffer with the image content in float (input) + - data_size: input + - mini: Lower bound of the first bin + - maxi: upper bouns of the last bin + - map_operation: Type of pre-processor to use. if if set to !=0, use log scale + - hist: resulting histogram (ouptut) + - hist_size: number of bins + - tmp_hist: temporary storage of size hist_size*num_groups + - processed: temporary storage of size 1 initialially set to 0 + - local_hist: local storage of size hist_size + + +Grid information: + * use the largest WG size. If it is larger than the number of bins (hist_size), + take the power of 2 immediately above + *Schedule as many WG as the device has compute engines. No need to schuedule more, + it is just a waist of memory + * The histogram should fit in shared (local) memory and tmp_hist can be large! + +Assumes: + hist and local_hist have size hist_size + edges has size hist_size+2 + tmp_hist has size hist_size*num_groups + processed is of size one and initially set to 0 + +*/ + + +static float preprocess(float value, int code) +{ + //This function can be modified to have more scales + if (code!=0) + { + return asinh(value); + } + else + { + return value; + } +} + +kernel void histogram(global float *data, + int data_size, + float mini, + float maxi, + int map_operation, + global int *hist, + global float *edges, + int hist_size, + global int *tmp_hist, + global int *processed, + local int *local_hist) +{ + // each thread + int lid = get_local_id(0); + int ws = get_local_size(0); + int nb_engine = get_num_groups(0); + int engine_id = get_group_id(0); + + // memset the local_hist array + for (int i=0; i<hist_size; i+=ws) + { + int j = i + lid; + if (j<hist_size) + { + local_hist[j] = 0; + } + } + barrier(CLK_LOCAL_MEM_FENCE); + + // Process the local data and accumulate in shared memory + int bloc_size = (int) ceil((float)data_size/(float)nb_engine); + int start = bloc_size * engine_id; + int stop = min(start + bloc_size, data_size); + float vmini = preprocess(mini, map_operation); + float vscale = (float)hist_size/(preprocess(maxi, map_operation) -vmini); + for (int i = start; i<stop; i+=ws) + { + int address = i + lid; + if (address < stop) + { + float value = data[address]; + if ((!isnan(value)) && (value>=mini) && (value<=maxi)) + { + float vvalue = (preprocess(value, map_operation)-vmini)*vscale; + int idx = clamp((int) vvalue, 0, hist_size - 1); + atomic_inc(&local_hist[idx]); + } + } + } + barrier(CLK_LOCAL_MEM_FENCE); + + //Now copy result into the right place and reset the first value of the shared array + for (int i=0; i<hist_size; i+=ws) + { + int j = i + lid; + if (j<hist_size) + { + tmp_hist[hist_size * engine_id + j] = local_hist[j]; + } + } + barrier(CLK_LOCAL_MEM_FENCE); + + local_hist[0] = 0; + + barrier(CLK_LOCAL_MEM_FENCE); + + //Increment the system wide shared variable processed and share the result with the workgroup + if (lid == 0) + { + local_hist[0] = atomic_inc(processed); + } + barrier(CLK_LOCAL_MEM_FENCE); + + // If we are the engine last to work, perform the concatenation of all results + + if ((local_hist[0] + 1) == nb_engine) + { + for (int i=0; i<hist_size; i+=ws) + { + int j = i + lid; + int lsum = 0; + if (j<hist_size) + { + for (int k=0; k<nb_engine; k++) + { + lsum += tmp_hist[hist_size * k + j]; + } + hist[j] = lsum; + edges[j] = vmini + j/vscale; + } + } + // Finally reset the counter + if (lid == 0) + { + processed[0] = 0; + edges[hist_size] = vmini + hist_size/vscale;; + } + + } +} diff --git a/silx/resources/opencl/image/map.cl b/silx/resources/opencl/image/map.cl new file mode 100644 index 0000000..804a5a1 --- /dev/null +++ b/silx/resources/opencl/image/map.cl @@ -0,0 +1,85 @@ +/* + * Project: SILX: Data analysis library + * kernel for maximum and minimum calculation + * + * + * Copyright (C) 2013-2017 European Synchrotron Radiation Facility + * Grenoble, France + * + * Principal authors: J. Kieffer (kieffer@esrf.fr) + * + * Permission is hereby granted, free of charge, to any person + * obtaining a copy of this software and associated documentation + * files (the "Software"), to deal in the Software without + * restriction, including without limitation the rights to use, + * copy, modify, merge, publish, distribute, sublicense, and/or sell + * copies of the Software, and to permit persons to whom the + * Software is furnished to do so, subject to the following + * conditions: + * + * The above copyright notice and this permission notice shall be + * included in all copies or substantial portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, + * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES + * OF MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND + * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT + * HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, + * WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING + * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR + * OTHER DEALINGS IN THE SOFTWARE. + * + * + */ + +#ifndef NB_COLOR + #define NB_COLOR 1 +#endif + + +/** + * \brief Linear scale of signal in an image (maybe multi-color) + * + * :param image: contains the input image + * :param output: contains the output image after scaling + * :param max_min_input: 2-floats containing the maximum and the minimum value of image + * :param minimum: float scalar with the minimum of the output + * :param maximum: float scalar with the maximum of the output + * :param width: integer, number of columns the matrices + * :param height: integer, number of lines of the matrices + * + * + */ + + +kernel void normalize_image(global float* image, + global float* output, + int width, + int height, + global float* max_min_input, + float minimum, + float maximum + ) +{ + //Global memory guard for padding + if((get_global_id(0) < width) && (get_global_id(1)<height)) + { + int idx = NB_COLOR* (get_global_id(0) + width * get_global_id(1)); + float mini_in, maxi_in, scale; + maxi_in = max_min_input[0]; + mini_in = max_min_input[1]; + if (maxi_in == mini_in) + { + scale = NAN; + } + else + { + scale = (maximum - minimum) / (maxi_in - mini_in); + } + + for (int c=0; c<NB_COLOR; c++) + { + output[idx+c] = fma(scale, image[idx+c]-mini_in, minimum); + } // end color loop + }//end if in IMAGE +}//end kernel diff --git a/silx/resources/opencl/image/max_min.cl b/silx/resources/opencl/image/max_min.cl new file mode 100644 index 0000000..246cd48 --- /dev/null +++ b/silx/resources/opencl/image/max_min.cl @@ -0,0 +1,207 @@ +/* + * Project: SILX: Data analysis library + * kernel for maximum and minimum calculation + * + * + * Copyright (C) 2013-2017 European Synchrotron Radiation Facility + * Grenoble, France + * + * Principal authors: J. Kieffer (kieffer@esrf.fr) + * + * Permission is hereby granted, free of charge, to any person + * obtaining a copy of this software and associated documentation + * files (the "Software"), to deal in the Software without + * restriction, including without limitation the rights to use, + * copy, modify, merge, publish, distribute, sublicense, and/or sell + * copies of the Software, and to permit persons to whom the + * Software is furnished to do so, subject to the following + * conditions: + * + * The above copyright notice and this permission notice shall be + * included in all copies or substantial portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, + * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES + * OF MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND + * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT + * HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, + * WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING + * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR + * OTHER DEALINGS IN THE SOFTWARE. + * + * + */ + +#ifndef NB_COLOR + #define NB_COLOR 1 +#endif + + + +#define REDUCE(a, b) ((float2) (fmax(a.x, b.x), fmin(a.y, b.y))) + +static float2 read_and_map(int idx, + global float* data) +{ + idx *= NB_COLOR; + float tmp = data[idx]; + float2 res = (float2) (tmp, tmp); + if (NB_COLOR > 1) + { + for (int c=1; c<NB_COLOR; c++) + { + tmp = data[idx + c]; + res = (float2) (fmax(res.x, tmp), fmin(res.y, tmp)); + } + } + return res; +} + + +/** + * \brief max_min_global_stage1: Look for the maximum an the minimum of an array. stage1 + * + * optimal workgroup size: 2^n greater than sqrt(size), limited to 512 + * optimal total item size: (workgroup size)^2 + * if size >total item size: adjust seq_count. + * + * :param data: Float pointer to global memory storing the vector of data. + * :param out: Float2 pointer to global memory storing the temporary results (workgroup size) + * :param seq_count: how many blocksize each thread should read + * :param size: size of the problem (number of element in the image + * :param l_data Shared memory: 2 float per thread in workgroup + * +**/ + + +kernel void max_min_reduction_stage1( global const float *data, + global float2 *out, + int size, + local float2 *l_data)// local storage 2 float per thread +{ + int group_size = get_local_size(0); + int lid = get_local_id(0); + float2 acc; + int big_block = group_size * get_num_groups(0); + int i = lid + group_size * get_group_id(0); + + if (lid<size) + acc = read_and_map(lid, data); + else + acc = read_and_map(0, data); + + // Linear pre-reduction stage 0 + + while (i<size){ + acc = REDUCE(acc, read_and_map(i, data)); + i += big_block; + } + + // parallel reduction stage 1 + + l_data[lid] = acc; + barrier(CLK_LOCAL_MEM_FENCE); + for (int block=group_size/2; block>1; block/=2) + { + if ((lid < block) && ((lid + block)<group_size)){ + l_data[lid] = REDUCE(l_data[lid], l_data[lid + block]); + } + barrier(CLK_LOCAL_MEM_FENCE); + } + if (lid == 0) + { + if (group_size > 1) + { + acc = REDUCE(l_data[0], l_data[1]); + } + else + { + acc = l_data[0]; + } + out[get_group_id(0)] = acc; + } +} + + +/** + * \brief global_max_min: Look for the maximum an the minimum of an array. + * + * + * + * :param data2: Float2 pointer to global memory storing the vector of pre-reduced data (workgroup size). + * :param maximum: Float pointer to global memory storing the maximum value + * :param minumum: Float pointer to global memory storing the minimum value + * :param l_data Shared memory: 2 float per thread in workgroup + * +**/ + +kernel void max_min_reduction_stage2( + global const float2 *data2, + global float2 *maxmin, + local float2 *l_data)// local storage 2 float per thread +{ + int lid = get_local_id(0); + int group_size = get_local_size(0); + float2 acc = (float2)(-1.0f, -1.0f); + if (lid<=group_size) + { + l_data[lid] = data2[lid]; + } + else + { + l_data[lid] = acc; + } + + // parallel reduction stage 2 + + + barrier(CLK_LOCAL_MEM_FENCE); + for (int block=group_size/2; block>1; block/=2) + { + if ((lid < block) && ((lid + block)<group_size)) + { + l_data[lid] = REDUCE(l_data[lid], l_data[lid + block]); + } + barrier(CLK_LOCAL_MEM_FENCE); + + } + + if (lid == 0 ) + { + if ( group_size > 1) + { + acc = REDUCE(l_data[0], l_data[1]); + } + else + { + acc = l_data[0]; + } + maxmin[0] = acc; + } +} + +/*This is the serial version of the min_max kernel. + * + * It has to be launched with WG=1 and only 1 WG has to be launched ! + * + * :param data: Float pointer to global memory storing the vector of data. + * :param size: size of the + * :param maximum: Float pointer to global memory storing the maximum value + * :param minumum: Float pointer to global memory storing the minimum value + * + * + */ +kernel void max_min_serial( + global const float *data, + unsigned int size, + global float2 *maxmin + ) +{ + float2 acc = read_and_map(0, data); + for (int i=1; i<size; i++) + { + acc = REDUCE(acc, read_and_map(i, data)); + } + + maxmin[0] = acc; +} |