4 files changed, 651 insertions, 0 deletions
diff --git a/silx/resources/opencl/image/cast.cl b/silx/resources/opencl/image/cast.cl
new file mode 100644
index 0000000..9e23a82
--- /dev/null
+++ b/silx/resources/opencl/image/cast.cl
@@ -0,0 +1,181 @@
+/*
+ *   Project: SILX: Alogorithms for image processing
+ *
+ *   Copyright (C) 2013-2017 European Synchrotron Radiation Facility
+ *                           Grenoble, France
+ *
+ *   Principal authors: J. Kieffer (kieffer@esrf.fr)
+ *
+ * Permission is hereby granted, free of charge, to any person
+ * obtaining a copy of this software and associated documentation
+ * files (the "Software"), to deal in the Software without
+ * restriction, including without limitation the rights to use,
+ * copy, modify, merge, publish, distribute, sublicense, and/or sell
+ * copies of the Software, and to permit persons to whom the
+ * Software is furnished to do so, subject to the following
+ * conditions:
+ *
+ * The above copyright notice and this permission notice shall be
+ * included in all copies or substantial portions of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
+ * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES
+ * OF MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
+ * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT
+ * HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY,
+ * WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
+ * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR
+ * OTHER DEALINGS IN THE SOFTWARE.
+ */
+
+#ifndef NB_COLOR
+    #define NB_COLOR 1
+#endif
+
+
+/**
+ * \brief Cast values of an array of uint8 into a float output array.
+ *
+ * :param array_input:   Pointer to global memory with the input data as unsigned8 array
+ * :param array_float:   Pointer to global memory with the output data as float array
+ * :param width:         Width of the image
+ * :param height:        Height of the image
+ */
+kernel void u8_to_float( global unsigned char  *array_input,
+                         global float *array_float,
+                         const int width,
+                         const int height)
+{
+    //Global memory guard for padding
+    if ((get_global_id(0) < width) && (get_global_id(1)<height))
+    {
+        int i = NB_COLOR * (get_global_id(0) + width * get_global_id(1));
+        for (int c=0; c<NB_COLOR; c++)
+        {
+            array_float[i + c] = (float) array_input[i + c];
+        } //end loop over colors
+    } //end test in image
+} //end kernel
+
+/**
+ * \brief Cast values of an array of uint8 into a float output array.
+ *
+ * :param array_input:   Pointer to global memory with the input data as signed8 array
+ * :param array_float:   Pointer to global memory with the output data as float array
+ * :param width:         Width of the image
+ * :param height:        Height of the image
+ */
+kernel void s8_to_float( global char  *array_input,
+                         global float *array_float,
+                         const int width,
+                         const int height)
+{
+    //Global memory guard for padding
+    if ((get_global_id(0) < width) && (get_global_id(1)<height))
+    {
+        int i = NB_COLOR * (get_global_id(0) + width * get_global_id(1));
+        for (int c=0; c<NB_COLOR; c++)
+        {
+            array_float[i + c] = (float) array_input[i + c];
+        } //end loop over colors
+    } //end test in image
+} //end kernel
+
+
+/**
+ * \brief cast values of an array of uint16 into a float output array.
+ *
+ * :param array_input:    Pointer to global memory with the input data as unsigned16 array
+ * :param array_float:    Pointer to global memory with the output data as float array
+ * :param width:          Width of the image
+ * :param height:         Height of the image
+ */
+kernel void u16_to_float(global unsigned short  *array_input,
+                         global float *array_float,
+                         const int width,
+                         const int height)
+{
+    //Global memory guard for padding
+    if ((get_global_id(0) < width) && (get_global_id(1)<height))
+    {
+        int i = NB_COLOR * (get_global_id(0) + width * get_global_id(1));
+        for (int c=0; c<NB_COLOR; c++)
+        {
+            array_float[i + c] = (float) array_input[i + c];
+        }           //end loop over colors
+    } //end test in image
+} //end kernel
+
+/**
+ * \brief cast values of an array of int16 into a float output array.
+ *
+ * :param array_input:    Pointer to global memory with the input data as signed16 array
+ * :param array_float:    Pointer to global memory with the output data as float array
+ * :param width:          Width of the image
+ * :param height:         Height of the image
+ */
+kernel void s16_to_float(global short *array_input,
+                         global float *array_float,
+                         const int width,
+                         const int height)
+{
+    //Global memory guard for padding
+    if ((get_global_id(0) < width) && (get_global_id(1)<height))
+    {
+        int i = NB_COLOR * (get_global_id(0) + width * get_global_id(1));
+        for (int c=0; c<NB_COLOR; c++)
+        {
+            array_float[i + c] = (float) array_input[i + c];
+        } //end loop over colors
+    } //end test in image
+}//end kernel
+
+/**
+ * \brief cast values of an array of uint32 into a float output array.
+ *
+ * :param array_input:    Pointer to global memory with the input data as unsigned32 array
+ * :param array_float:    Pointer to global memory with the output data as float array
+ * :param width:          Width of the image
+ * :param height:         Height of the image
+ */
+kernel void u32_to_float(global unsigned int  *array_input,
+                         global float *array_float,
+                         const int width,
+                         const int height)
+{
+    //Global memory guard for padding
+    if ((get_global_id(0) < width) && (get_global_id(1)<height))
+    {
+        int i = NB_COLOR * (get_global_id(0) + width * get_global_id(1));
+        for (int c=0; c<NB_COLOR; c++)
+        {
+            array_float[i + c] = (float) array_input[i + c];
+        } //end loop over colors
+    } //end test in image
+}//end kernel
+
+
+/**
+ * \brief convert values of an array of int32 into a float output array.
+ *
+ * :param array_input:    Pointer to global memory with the data in int
+ * :param array_float:    Pointer to global memory with the data in float
+ * :param width:          Width of the image
+ * :param height:         Height of the image
+ */
+kernel void s32_to_float(global int  *array_input,
+                         global float  *array_float,
+                         const int width,
+                         const int height)
+{
+    //Global memory guard for padding
+    if ((get_global_id(0) < width) && (get_global_id(1)<height))
+    {
+        int i = NB_COLOR * (get_global_id(0) + width * get_global_id(1));
+        for (int c=0; c<NB_COLOR; c++)
+        {
+            array_float[i + c] = (float) array_input[i + c];
+        } //end loop over colors
+    } //end test in image
+}//end kernel
+
diff --git a/silx/resources/opencl/image/histogram.cl b/silx/resources/opencl/image/histogram.cl
new file mode 100644
index 0000000..7fb1485
--- /dev/null
+++ b/silx/resources/opencl/image/histogram.cl
@@ -0,0 +1,178 @@
+//CL//
+
+/*
+ *   Project: SILX: Alogorithms for image processing
+ *
+ *   Copyright (C) 2013-2018 European Synchrotron Radiation Facility
+ *                           Grenoble, France
+ *
+ *   Principal authors: J. Kieffer (kieffer@esrf.fr)
+ *
+ * Permission is hereby granted, free of charge, to any person
+ * obtaining a copy of this software and associated documentation
+ * files (the "Software"), to deal in the Software without
+ * restriction, including without limitation the rights to use,
+ * copy, modify, merge, publish, distribute, sublicense, and/or sell
+ * copies of the Software, and to permit persons to whom the
+ * Software is furnished to do so, subject to the following
+ * conditions:
+ *
+ * The above copyright notice and this permission notice shall be
+ * included in all copies or substantial portions of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
+ * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES
+ * OF MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
+ * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT
+ * HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY,
+ * WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
+ * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR
+ * OTHER DEALINGS IN THE SOFTWARE.
+ */
+
+/* Single kernel histogram for float array.
+ *
+ * Can perform histograming in log-scale (using the arcsinh)
+
+Parameters:
+  - data: buffer with the image content in float (input)
+  - data_size: input
+  - mini: Lower bound of the first bin
+  - maxi: upper bouns of the last bin
+  - map_operation: Type of pre-processor to use. if if set to !=0, use log scale
+  - hist: resulting histogram (ouptut)
+  - hist_size: number of bins
+  - tmp_hist: temporary storage of size  hist_size*num_groups
+  - processed: temporary storage of size 1 initialially set to 0
+  - local_hist: local storage of size hist_size
+
+
+Grid information:
+    * use the largest WG size. If it is larger than the number of bins (hist_size),
+    take the power of 2 immediately above
+    *Schedule as many WG as the device has compute engines. No need to schuedule more,
+    it is just a waist of memory
+    * The histogram should fit in shared (local) memory and tmp_hist can be large!
+
+Assumes:
+    hist and local_hist have size hist_size
+    edges has size hist_size+2
+    tmp_hist has size hist_size*num_groups
+    processed is of size one and initially set to 0
+
+*/
+
+
+static float preprocess(float value, int code)
+{
+    //This function can be modified to have more scales
+    if (code!=0)
+    {
+        return asinh(value);
+    }
+    else
+    {
+        return value;
+    }
+}
+
+kernel void histogram(global float *data,
+                      int data_size,
+                      float mini,
+                      float maxi,
+                      int map_operation,
+                      global int *hist,
+                      global float *edges,
+                      int hist_size,
+                      global int *tmp_hist,
+                      global int *processed,
+                      local int *local_hist)
+{
+    // each thread
+    int lid = get_local_id(0);
+    int ws = get_local_size(0);
+    int nb_engine = get_num_groups(0);
+    int engine_id = get_group_id(0);
+
+    // memset the local_hist array
+    for (int i=0; i<hist_size; i+=ws)
+    {
+        int j = i + lid;
+        if (j<hist_size)
+        {
+            local_hist[j] = 0;
+        }
+    }
+    barrier(CLK_LOCAL_MEM_FENCE);
+
+    // Process the local data and accumulate in shared memory
+    int bloc_size = (int) ceil((float)data_size/(float)nb_engine);
+    int start = bloc_size * engine_id;
+    int stop = min(start + bloc_size, data_size);
+    float vmini = preprocess(mini, map_operation);
+    float vscale = (float)hist_size/(preprocess(maxi, map_operation) -vmini);
+    for (int i = start; i<stop; i+=ws)
+    {
+        int address = i + lid;
+        if (address < stop)
+        {
+            float value = data[address];
+            if ((!isnan(value)) && (value>=mini) && (value<=maxi))
+            {
+                float vvalue = (preprocess(value, map_operation)-vmini)*vscale;
+                int idx = clamp((int) vvalue, 0, hist_size - 1);
+                atomic_inc(&local_hist[idx]);
+            }
+        }
+    }
+    barrier(CLK_LOCAL_MEM_FENCE);
+
+    //Now copy result into the right place and reset the first value of the shared array
+    for (int i=0; i<hist_size; i+=ws)
+    {
+        int j = i + lid;
+        if (j<hist_size)
+        {
+            tmp_hist[hist_size * engine_id + j] = local_hist[j];
+        }
+    }
+    barrier(CLK_LOCAL_MEM_FENCE);
+
+    local_hist[0] = 0;
+
+    barrier(CLK_LOCAL_MEM_FENCE);
+
+    //Increment the system wide shared variable processed and share the result with the workgroup
+    if (lid == 0)
+    {
+        local_hist[0] = atomic_inc(processed);
+    }
+    barrier(CLK_LOCAL_MEM_FENCE);
+
+    // If we are the engine last to work, perform the concatenation of all results
+
+    if ((local_hist[0] + 1) == nb_engine)
+    {
+        for (int i=0; i<hist_size; i+=ws)
+        {
+            int j = i + lid;
+            int lsum = 0;
+            if (j<hist_size)
+            {
+                for (int k=0; k<nb_engine; k++)
+                {
+                    lsum += tmp_hist[hist_size * k + j];
+                }
+                hist[j] = lsum;
+                edges[j] = vmini + j/vscale;
+            }
+        }
+        // Finally reset the counter
+        if (lid == 0)
+        {
+            processed[0] = 0;
+            edges[hist_size] = vmini + hist_size/vscale;;
+        }
+
+    }
+}
diff --git a/silx/resources/opencl/image/map.cl b/silx/resources/opencl/image/map.cl
new file mode 100644
index 0000000..804a5a1
--- /dev/null
+++ b/silx/resources/opencl/image/map.cl
@@ -0,0 +1,85 @@
+/*
+ *   Project: SILX: Data analysis library
+ *            kernel for maximum and minimum calculation
+ *
+ *
+ *   Copyright (C) 2013-2017 European Synchrotron Radiation Facility
+ *                           Grenoble, France
+ *
+ *   Principal authors: J. Kieffer (kieffer@esrf.fr)
+ *
+ * Permission is hereby granted, free of charge, to any person
+ * obtaining a copy of this software and associated documentation
+ * files (the "Software"), to deal in the Software without
+ * restriction, including without limitation the rights to use,
+ * copy, modify, merge, publish, distribute, sublicense, and/or sell
+ * copies of the Software, and to permit persons to whom the
+ * Software is furnished to do so, subject to the following
+ * conditions:
+ *
+ * The above copyright notice and this permission notice shall be
+ * included in all copies or substantial portions of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
+ * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES
+ * OF MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
+ * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT
+ * HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY,
+ * WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
+ * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR
+ * OTHER DEALINGS IN THE SOFTWARE.
+ *
+ *
+ */
+
+#ifndef NB_COLOR
+    #define NB_COLOR 1
+#endif
+
+
+/**
+ * \brief Linear scale of signal in an image (maybe multi-color)
+ *
+ * :param image: contains the input image
+ * :param output: contains the output image after scaling
+ * :param max_min_input: 2-floats containing the maximum and the minimum value of image
+ * :param minimum: float scalar with the minimum of the output
+ * :param maximum: float scalar with the maximum of the output
+ * :param width: integer, number of columns the matrices
+ * :param height: integer, number of lines of the matrices
+ *
+ *
+ */
+
+
+kernel void normalize_image(global float* image,
+                            global float* output,
+                            int width,
+                            int height,
+                            global float* max_min_input,
+                            float minimum,
+                            float maximum
+                            )
+{
+    //Global memory guard for padding
+    if((get_global_id(0) < width) && (get_global_id(1)<height))
+    {
+        int idx = NB_COLOR* (get_global_id(0) + width * get_global_id(1));
+        float mini_in, maxi_in, scale;
+        maxi_in = max_min_input[0];
+        mini_in = max_min_input[1];
+        if (maxi_in == mini_in)
+        {
+            scale = NAN;
+        }
+        else
+        {
+            scale = (maximum - minimum) / (maxi_in - mini_in);
+        }
+
+        for (int c=0; c<NB_COLOR; c++)
+        {
+            output[idx+c] = fma(scale, image[idx+c]-mini_in, minimum);
+        } // end color loop
+    }//end if in IMAGE
+}//end kernel
diff --git a/silx/resources/opencl/image/max_min.cl b/silx/resources/opencl/image/max_min.cl
new file mode 100644
index 0000000..246cd48
--- /dev/null
+++ b/silx/resources/opencl/image/max_min.cl
@@ -0,0 +1,207 @@
+/*
+ *   Project: SILX: Data analysis library
+ *            kernel for maximum and minimum calculation
+ *
+ *
+ *   Copyright (C) 2013-2017 European Synchrotron Radiation Facility
+ *                           Grenoble, France
+ *
+ *   Principal authors: J. Kieffer (kieffer@esrf.fr)
+ *
+ * Permission is hereby granted, free of charge, to any person
+ * obtaining a copy of this software and associated documentation
+ * files (the "Software"), to deal in the Software without
+ * restriction, including without limitation the rights to use,
+ * copy, modify, merge, publish, distribute, sublicense, and/or sell
+ * copies of the Software, and to permit persons to whom the
+ * Software is furnished to do so, subject to the following
+ * conditions:
+ *
+ * The above copyright notice and this permission notice shall be
+ * included in all copies or substantial portions of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
+ * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES
+ * OF MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
+ * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT
+ * HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY,
+ * WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
+ * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR
+ * OTHER DEALINGS IN THE SOFTWARE.
+ *
+ *
+ */
+
+#ifndef NB_COLOR
+    #define NB_COLOR 1
+#endif
+
+
+
+#define REDUCE(a, b) ((float2) (fmax(a.x, b.x), fmin(a.y, b.y)))
+
+static float2 read_and_map(int idx,
+                           global float* data)
+{
+    idx *= NB_COLOR;
+    float tmp = data[idx];
+    float2 res = (float2) (tmp, tmp);
+    if (NB_COLOR > 1)
+    {
+        for (int c=1; c<NB_COLOR; c++)
+        {
+            tmp = data[idx + c];
+            res = (float2) (fmax(res.x, tmp), fmin(res.y, tmp));
+        }
+    }
+    return res;
+}
+
+
+/**
+ * \brief max_min_global_stage1: Look for the maximum an the minimum of an array. stage1
+ *
+ * optimal workgroup size: 2^n greater than sqrt(size), limited to 512
+ * optimal total item size:  (workgroup size)^2
+ * if size >total item size: adjust seq_count.
+ *
+ * :param data:       Float pointer to global memory storing the vector of data.
+ * :param out:        Float2 pointer to global memory storing the temporary results (workgroup size)
+ * :param seq_count:  how many blocksize each thread should read
+ * :param size:       size of the problem (number of element in the image
+ * :param l_data      Shared memory: 2 float per thread in workgroup
+ *
+**/
+
+
+kernel void max_min_reduction_stage1( global const float *data,
+                                      global float2 *out,
+                                      int size,
+                                      local  float2 *l_data)// local storage 2 float per thread
+{
+    int group_size =  get_local_size(0);
+    int lid = get_local_id(0);
+    float2 acc;
+    int big_block = group_size * get_num_groups(0);
+    int i =  lid + group_size * get_group_id(0);
+
+    if (lid<size)
+        acc = read_and_map(lid, data);
+    else
+        acc = read_and_map(0, data);
+
+    // Linear pre-reduction stage 0
+
+    while (i<size){
+      acc = REDUCE(acc, read_and_map(i, data));
+      i += big_block;
+    }
+
+    // parallel reduction stage 1
+
+    l_data[lid] = acc;
+    barrier(CLK_LOCAL_MEM_FENCE);
+    for (int block=group_size/2; block>1; block/=2)
+        {
+            if ((lid < block) && ((lid + block)<group_size)){
+                l_data[lid] = REDUCE(l_data[lid], l_data[lid + block]);
+            }
+            barrier(CLK_LOCAL_MEM_FENCE);
+        }
+    if (lid == 0)
+    {
+        if (group_size > 1)
+        {
+            acc = REDUCE(l_data[0], l_data[1]);
+        }
+        else
+        {
+            acc = l_data[0];
+        }
+        out[get_group_id(0)] = acc;
+    }
+}
+
+
+/**
+ * \brief global_max_min: Look for the maximum an the minimum of an array.
+ *
+ *
+ *
+ * :param data2:      Float2 pointer to global memory storing the vector of pre-reduced data (workgroup size).
+ * :param maximum:    Float pointer to global memory storing the maximum value
+ * :param minumum:    Float pointer to global memory storing the minimum value
+ * :param l_data      Shared memory: 2 float per thread in workgroup
+ *
+**/
+
+kernel void max_min_reduction_stage2(
+        global const float2 *data2,
+        global float2 *maxmin,
+        local  float2 *l_data)// local storage 2 float per thread
+{
+    int lid = get_local_id(0);
+    int group_size =  get_local_size(0);
+    float2 acc = (float2)(-1.0f, -1.0f);
+    if (lid<=group_size)
+    {
+        l_data[lid] = data2[lid];
+    }
+    else
+    {
+        l_data[lid] = acc;
+    }
+
+    // parallel reduction stage 2
+
+
+    barrier(CLK_LOCAL_MEM_FENCE);
+    for (int block=group_size/2; block>1; block/=2)
+    {
+        if ((lid < block) && ((lid + block)<group_size))
+        {
+            l_data[lid] = REDUCE(l_data[lid], l_data[lid + block]);
+        }
+        barrier(CLK_LOCAL_MEM_FENCE);
+
+    }
+
+    if (lid == 0  )
+    {
+        if ( group_size > 1)
+        {
+            acc = REDUCE(l_data[0], l_data[1]);
+        }
+        else
+        {
+            acc = l_data[0];
+        }
+        maxmin[0] = acc;
+    }
+}
+
+/*This is the serial version of the min_max kernel.
+ *
+ * It has to be launched with WG=1 and only 1 WG has to be launched !
+ *
+ * :param data:       Float pointer to global memory storing the vector of data.
+ * :param size:       size of the
+ * :param maximum:    Float pointer to global memory storing the maximum value
+ * :param minumum:    Float pointer to global memory storing the minimum value
+ *
+ *
+ */
+kernel void max_min_serial(
+                            global const float *data,
+                            unsigned int size,
+                            global float2 *maxmin
+                            )
+{
+    float2 acc = read_and_map(0, data);
+    for (int i=1; i<size; i++)
+    {
+        acc = REDUCE(acc, read_and_map(i, data));
+    }
+
+    maxmin[0] = acc;
+}