diff options
Diffstat (limited to 'silx/resources/opencl/proj.cl')
-rw-r--r-- | silx/resources/opencl/proj.cl | 345 |
1 files changed, 345 insertions, 0 deletions
diff --git a/silx/resources/opencl/proj.cl b/silx/resources/opencl/proj.cl new file mode 100644 index 0000000..afc58ff --- /dev/null +++ b/silx/resources/opencl/proj.cl @@ -0,0 +1,345 @@ +/* + * Copyright (C) 2017-2017 European Synchrotron Radiation Facility + * Grenoble, France + * + * Based on the projector of PyHST2 - https://forge.epn-campus.eu/projects/pyhst2 + * + * Permission is hereby granted, free of charge, to any person obtaining a copy + * of this software and associated documentation files (the "Software"), to deal + * in the Software without restriction, including without limitation the rights + * to use, copy, modify, merge, publish, distribute, sublicense, and/or sell + * copies of the Software, and to permit persons to whom the Software is + * furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice shall be included in + * all copies or substantial portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE + * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER + * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, + * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN + * THE SOFTWARE. + * + */ + +/*******************************************************************************/ +/************************ GPU VERSION (with textures) **************************/ +/*******************************************************************************/ + + +kernel void forward_kernel( + global float *d_Sino, + read_only image2d_t d_slice, + int dimslice, + int num_bins, + global float* angles_per_project , + float axis_position, + global float *d_axis_corrections, + global int* d_beginPos , + global int* d_strideJoseph, + global int* d_strideLine , + int num_projections, + int dimrecx, + int dimrecy, + float cpu_offset_x, + float cpu_offset_y, + int josephnoclip, + int normalize) +{ + const sampler_t sampler = CLK_NORMALIZED_COORDS_FALSE | CLK_ADDRESS_CLAMP_TO_EDGE | CLK_FILTER_LINEAR; + const int tidx = get_local_id(0); + const int bidx = get_group_id(0); + const int tidy = get_local_id(1); + const int bidy = get_group_id(1); + float angle; + float cos_angle,sin_angle ; + + local float corrections[16]; + local int beginPos[16*2]; + local int strideJoseph[16*2]; + local int strideLine[16*2]; + + // thread will use corrections[tidy] + // All are read by first warp + int offset, OFFSET; + switch(tidy) { + case 0: + corrections[ tidx ]= d_axis_corrections[ bidy*16+tidx]; + break; + case 1: + case 2: + offset = 16*(tidy-1); + OFFSET = dimrecy*(tidy-1); + beginPos [offset + tidx ]= d_beginPos[ OFFSET+ bidy*16+tidx] ; + break; + case 3: + case 4: + offset = 16*(tidy-3); + OFFSET = dimrecy*(tidy-3); + strideJoseph[offset + tidx ]= d_strideJoseph[OFFSET + bidy*16+tidx] ; + break; + case 5: + case 6: + offset = 16*(tidy-5); + OFFSET = dimrecy*(tidy-5); + strideLine[offset + tidx ]= d_strideLine[OFFSET + bidy*16+tidx] ; + break; + } + barrier(CLK_LOCAL_MEM_FENCE); + + angle = angles_per_project[ bidy*16+tidy ] ; + cos_angle = cos(angle); + sin_angle = sin(angle); + + if(fabs(cos_angle) > 0.70710678f ) { + if( cos_angle>0) { + cos_angle = cos(angle); + sin_angle = sin(angle); + } + else { + cos_angle = -cos(angle); + sin_angle = -sin(angle); + } + } + else { + if( sin_angle>0) { + cos_angle = sin(angle); + sin_angle = -cos(angle); + } + else { + cos_angle = -sin(angle); + sin_angle = cos(angle); + } + } + float res=0.0f; + float axis_corr = axis_position + corrections[ tidy ]; + float axis = axis_position ; + float xpix = ( bidx*16+tidx )-cpu_offset_x; + float posx = axis*(1.0f-sin_angle/cos_angle ) +(xpix-(axis_corr) )/cos_angle ; + + float shiftJ = sin_angle/cos_angle; + float x1 = min(-sin_angle/cos_angle ,0.f); + float x2 = max(-sin_angle/cos_angle ,0.f); + + float Area; + Area=1.0f/cos_angle; + int stlA, stlB , stlAJ, stlBJ ; + stlA=strideLine[16+tidy]; + stlB=strideLine[tidy]; + stlAJ=strideJoseph[16+tidy]; + stlBJ=strideJoseph[tidy]; + + int beginA = beginPos[16+tidy ]; + int beginB = beginPos[tidy ]; + float add; + int l; + + if(josephnoclip) { + for(int j=0; j<dimslice; j++) { // j: Joseph + x1 = beginA +(posx)*stlA + (j)*stlAJ+1.5f; + x2 = beginB +(posx)*stlB + (j)*stlBJ+1.5f; + add = read_imagef(d_slice, sampler, (float2) (x1, x2)).x; // add = tex2D(texSlice, x1,x2); + res += add; + posx += shiftJ; + } + } + else { + for(int j=0; j<dimslice; j++) { // j: Joseph + x1 = beginA +(posx)*stlA + (j)*stlAJ+1.5f; + x2 = beginB +(posx)*stlB + (j)*stlBJ+1.5f; + l = (x1>=0.0f )*(x1<(dimslice+2))*( x2>=0.0f)*( x2<(dimslice+2) ) ; + add = read_imagef(d_slice, sampler, (float2) (x1, x2)).x; // add = tex2D(texSlice, x1,x2); + res += add*l; + posx += shiftJ; + } + } + + if((bidy*16 + tidy) < num_projections && (bidx*16 + tidx) < num_bins) { + res *= Area; + if (normalize) + res *= M_PI_F * 0.5f / num_projections; + d_Sino[dimrecx*(bidy*16 + tidy) + (bidx*16 + tidx)] = res; + } +} + + + +/*******************************************************************************/ +/********************* CPU VERSION (without textures) **************************/ +/*******************************************************************************/ + + +kernel void forward_kernel_cpu( + global float *d_Sino, + global float* d_slice, + int dimslice, + int num_bins, + global float* angles_per_project , + float axis_position, + global float *d_axis_corrections, + global int* d_beginPos , + global int* d_strideJoseph, + global int* d_strideLine , + int num_projections, + int dimrecx, + int dimrecy, + float cpu_offset_x, + float cpu_offset_y, + int josephnoclip, + int normalize) +{ + + const int tidx = get_local_id(0); + const int bidx = get_group_id(0); + const int tidy = get_local_id(1); + const int bidy = get_group_id(1); + float angle; + float cos_angle,sin_angle ; + + local float corrections[16]; + local int beginPos[16*2]; + local int strideJoseph[16*2]; + local int strideLine[16*2]; + + // thread will use corrections[tidy] + // All are read by first warp + int offset, OFFSET; + switch(tidy) { + case 0: + corrections[ tidx ]= d_axis_corrections[ bidy*16+tidx]; + break; + case 1: + case 2: + offset = 16*(tidy-1); + OFFSET = dimrecy*(tidy-1); + beginPos [offset + tidx ]= d_beginPos[ OFFSET+ bidy*16+tidx] ; + break; + case 3: + case 4: + offset = 16*(tidy-3); + OFFSET = dimrecy*(tidy-3); + strideJoseph[offset + tidx ]= d_strideJoseph[OFFSET + bidy*16+tidx] ; + break; + case 5: + case 6: + offset = 16*(tidy-5); + OFFSET = dimrecy*(tidy-5); + strideLine[offset + tidx ]= d_strideLine[OFFSET + bidy*16+tidx] ; + break; + } + barrier(CLK_LOCAL_MEM_FENCE); + + angle = angles_per_project[ bidy*16+tidy ] ; + cos_angle = cos(angle); + sin_angle = sin(angle); + + if(fabs(cos_angle) > 0.70710678f ) { + if( cos_angle>0) { + cos_angle = cos(angle); + sin_angle = sin(angle); + } + else { + cos_angle = -cos(angle); + sin_angle = -sin(angle); + } + } + else { + if( sin_angle>0) { + cos_angle = sin(angle); + sin_angle = -cos(angle); + } + else { + cos_angle = -sin(angle); + sin_angle = cos(angle); + } + } + float res=0.0f; + float axis_corr = axis_position + corrections[ tidy ]; + float axis = axis_position ; + float xpix = ( bidx*16+tidx )-cpu_offset_x; + float posx = axis*(1.0f-sin_angle/cos_angle ) +(xpix-(axis_corr) )/cos_angle ; + + float shiftJ = sin_angle/cos_angle; + float x1 = min(-sin_angle/cos_angle ,0.f); + float x2 = max(-sin_angle/cos_angle ,0.f); + + float Area; + Area=1.0f/cos_angle; + int stlA, stlB , stlAJ, stlBJ ; + stlA=strideLine[16+tidy]; + stlB=strideLine[tidy]; + stlAJ=strideJoseph[16+tidy]; + stlBJ=strideJoseph[tidy]; + + int beginA = beginPos[16+tidy ]; + int beginB = beginPos[tidy ]; + int l; + + int ym, yp, xm, xp; + float yc, xc; + float val; + if(josephnoclip) { + for(int j=0; j<dimslice; j++) { // j: Joseph + x1 = beginA +(posx)*stlA + (j)*stlAJ+1.0f; + x2 = beginB +(posx)*stlB + (j)*stlBJ+1.0f; + /* + Bilinear interpolation + */ + yc = fmin(fmax(x2, 0.0f), ((dimslice+2) - 1.0f)); // y_clipped + ym = (int) floor(yc); // y_minus + yp = (int) ceil(yc); // y_plus + + xc = fmin(fmax(x1, 0.0f), ((dimslice+2) - 1.0f)); // x_clipped + xm = (int) floor(xc); // x_minus + xp = (int) ceil(xc); // x_plus + + if ((ym == yp) && (xm == xp)) val = d_slice[ym*(dimslice+2) + xm]; + else if (ym == yp) val = (d_slice[ym*(dimslice+2) + xm] * (xp - xc)) + (d_slice[ym*(dimslice+2) + xp] * (xc - xm)); + else if (xm == xp) val = (d_slice[ym*(dimslice+2) + xm] * (yp - yc)) + (d_slice[yp*(dimslice+2) + xm] * (yc - ym)); + else val = (d_slice[ym*(dimslice+2) + xm] * (yp - yc) * (xp - xc)) + + (d_slice[yp*(dimslice+2) + xm] * (yc - ym) * (xp - xc)) + + (d_slice[ym*(dimslice+2) + xp] * (yp - yc) * (xc - xm)) + + (d_slice[yp*(dimslice+2) + xp] * (yc - ym) * (xc - xm)); + // ---------- + res += val; + posx += shiftJ; + } + } + else { + for(int j=0; j<dimslice; j++) { // j: Joseph + x1 = beginA +(posx)*stlA + (j)*stlAJ+1.5f; + x2 = beginB +(posx)*stlB + (j)*stlBJ+1.5f; + l = (x1>=0.0f )*(x1<(dimslice+2))*( x2>=0.0f)*( x2<(dimslice+2) ) ; + /* + Bilinear interpolation + */ + yc = fmin(fmax(x2, 0.0f), ((dimslice+2) - 1.0f)); // y_clipped + ym = (int) floor(yc); // y_minus + yp = (int) ceil(yc); // y_plus + + xc = fmin(fmax(x1, 0.0f), ((dimslice+2) - 1.0f)); // x_clipped + xm = (int) floor(xc); // x_minus + xp = (int) ceil(xc); // x_plus + + if ((ym == yp) && (xm == xp)) val = d_slice[ym*(dimslice+2) + xm]; + else if (ym == yp) val = (d_slice[ym*(dimslice+2) + xm] * (xp - xc)) + (d_slice[ym*(dimslice+2) + xp] * (xc - xm)); + else if (xm == xp) val = (d_slice[ym*(dimslice+2) + xm] * (yp - yc)) + (d_slice[yp*(dimslice+2) + xm] * (yc - ym)); + else val = (d_slice[ym*(dimslice+2) + xm] * (yp - yc) * (xp - xc)) + + (d_slice[yp*(dimslice+2) + xm] * (yc - ym) * (xp - xc)) + + (d_slice[ym*(dimslice+2) + xp] * (yp - yc) * (xc - xm)) + + (d_slice[yp*(dimslice+2) + xp] * (yc - ym) * (xc - xm)); + // ---------- + res += val*l; + posx += shiftJ; + } + } + + if((bidy*16 + tidy) < num_projections && (bidx*16 + tidx) < num_bins) { + res *= Area; + if (normalize) + res *= M_PI_F * 0.5f / num_projections; + d_Sino[dimrecx*(bidy*16 + tidy) + (bidx*16 + tidx)] = res; + } +} |