/* * Project: SILX: A data analysis tool-kit * * Copyright (C) 2017 European Synchrotron Radiation Facility * Grenoble, France * * Principal authors: J. Kieffer (kieffer@esrf.fr) * * Permission is hereby granted, free of charge, to any person * obtaining a copy of this software and associated documentation * files (the "Software"), to deal in the Software without * restriction, including without limitation the rights to use, * copy, modify, merge, publish, distribute, sublicense, and/or sell * copies of the Software, and to permit persons to whom the * Software is furnished to do so, subject to the following * conditions: * * The above copyright notice and this permission notice shall be * included in all copies or substantial portions of the Software. * * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES * OF MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT * HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, * WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR * OTHER DEALINGS IN THE SOFTWARE. */ /* To decompress CBF byte-offset compressed in parallel on GPU one needs to: * - Set all values in mask and exception counter to zero. * - Mark regions with exceptions and set values without exception. * This generates the values (zeros for exceptions), the exception mask, * counts the number of exception region and provides a start position for * each exception. * - Treat exceptions. For this, one thread in a workgoup treats a complete * masked region in a serial fashion. All regions are treated in parallel. * Values written at this stage are marked in the mask with -1. * - Double scan: inclusive cum sum for values, exclusive cum sum to generate * indices in output array. Values with mask = 1 are considered as 0. * - Compact and copy output by removing duplicated values in exceptions. */ kernel void mark_exceptions(global char* raw, int size, int full_size, global int* mask, global int* values, global int* cnt, global int* exc) { int gid; gid = get_global_id(0); if (gid= (in_size - 1)) ? in_size + 1 : indexes[gid + 1]; //we keep always the last element if ((current <= out_size) && (current < next)) { output[current] = values[gid]; } } } // copy the values of the elements to definitive position kernel void copy_result_float(global int* values, global int* indexes, int in_size, int out_size, global float* output ) { int gid = get_global_id(0); if (gid= (in_size - 1)) ? in_size + 1 : indexes[gid + 1]; if ((current < out_size) && (current < next)) { output[current] = (float) values[gid]; } } } // combined memset for all arrays used for Byte Offset decompression kernel void byte_offset_memset(global char* raw, global int* mask, global int* index, global int* result, int full_size, int actual_size ) { int gid = get_global_id(0); if (gid < full_size) { raw[gid] = 0; index[gid] = 0; result[gid] = 0; if (gid= start_at) && (gid < size)) { ary[gid] = pattern; } } //Simple memset kernel for int arrays kernel void fill_int_mem(global int* ary, int size, int pattern, int start_at) { int gid = get_global_id(0); if ((gid >= start_at) && (gid < size)) { ary[gid] = pattern; } }