/* * Copyright (C) 2017-2017 European Synchrotron Radiation Facility * Grenoble, France * * Based on the projector of PyHST2 - https://forge.epn-campus.eu/projects/pyhst2 * * Permission is hereby granted, free of charge, to any person obtaining a copy * of this software and associated documentation files (the "Software"), to deal * in the Software without restriction, including without limitation the rights * to use, copy, modify, merge, publish, distribute, sublicense, and/or sell * copies of the Software, and to permit persons to whom the Software is * furnished to do so, subject to the following conditions: * * The above copyright notice and this permission notice shall be included in * all copies or substantial portions of the Software. * * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN * THE SOFTWARE. * */ /*******************************************************************************/ /************************ GPU VERSION (with textures) **************************/ /*******************************************************************************/ kernel void forward_kernel( global float *d_Sino, read_only image2d_t d_slice, int dimslice, int num_bins, global float* angles_per_project , float axis_position, global float *d_axis_corrections, global int* d_beginPos , global int* d_strideJoseph, global int* d_strideLine , int num_projections, int dimrecx, int dimrecy, float cpu_offset_x, float cpu_offset_y, int josephnoclip, int normalize) { const sampler_t sampler = CLK_NORMALIZED_COORDS_FALSE | CLK_ADDRESS_CLAMP_TO_EDGE | CLK_FILTER_LINEAR; const int tidx = get_local_id(0); const int bidx = get_group_id(0); const int tidy = get_local_id(1); const int bidy = get_group_id(1); float angle; float cos_angle,sin_angle ; local float corrections[16]; local int beginPos[16*2]; local int strideJoseph[16*2]; local int strideLine[16*2]; // thread will use corrections[tidy] // All are read by first warp int offset, OFFSET; switch(tidy) { case 0: corrections[ tidx ]= d_axis_corrections[ bidy*16+tidx]; break; case 1: case 2: offset = 16*(tidy-1); OFFSET = dimrecy*(tidy-1); beginPos [offset + tidx ]= d_beginPos[ OFFSET+ bidy*16+tidx] ; break; case 3: case 4: offset = 16*(tidy-3); OFFSET = dimrecy*(tidy-3); strideJoseph[offset + tidx ]= d_strideJoseph[OFFSET + bidy*16+tidx] ; break; case 5: case 6: offset = 16*(tidy-5); OFFSET = dimrecy*(tidy-5); strideLine[offset + tidx ]= d_strideLine[OFFSET + bidy*16+tidx] ; break; } barrier(CLK_LOCAL_MEM_FENCE); angle = angles_per_project[ bidy*16+tidy ] ; cos_angle = cos(angle); sin_angle = sin(angle); if(fabs(cos_angle) > 0.70710678f ) { if( cos_angle>0) { cos_angle = cos(angle); sin_angle = sin(angle); } else { cos_angle = -cos(angle); sin_angle = -sin(angle); } } else { if( sin_angle>0) { cos_angle = sin(angle); sin_angle = -cos(angle); } else { cos_angle = -sin(angle); sin_angle = cos(angle); } } float res=0.0f; float axis_corr = axis_position + corrections[ tidy ]; float axis = axis_position ; float xpix = ( bidx*16+tidx )-cpu_offset_x; float posx = axis*(1.0f-sin_angle/cos_angle ) +(xpix-(axis_corr) )/cos_angle ; float shiftJ = sin_angle/cos_angle; float x1 = min(-sin_angle/cos_angle ,0.f); float x2 = max(-sin_angle/cos_angle ,0.f); float Area; Area=1.0f/cos_angle; int stlA, stlB , stlAJ, stlBJ ; stlA=strideLine[16+tidy]; stlB=strideLine[tidy]; stlAJ=strideJoseph[16+tidy]; stlBJ=strideJoseph[tidy]; int beginA = beginPos[16+tidy ]; int beginB = beginPos[tidy ]; float add; int l; if(josephnoclip) { for(int j=0; j=0.0f )*(x1<(dimslice+2))*( x2>=0.0f)*( x2<(dimslice+2) ) ; add = read_imagef(d_slice, sampler, (float2) (x1, x2)).x; // add = tex2D(texSlice, x1,x2); res += add*l; posx += shiftJ; } } if((bidy*16 + tidy) < num_projections && (bidx*16 + tidx) < num_bins) { res *= Area; if (normalize) res *= M_PI_F * 0.5f / num_projections; d_Sino[dimrecx*(bidy*16 + tidy) + (bidx*16 + tidx)] = res; } } /*******************************************************************************/ /********************* CPU VERSION (without textures) **************************/ /*******************************************************************************/ kernel void forward_kernel_cpu( global float *d_Sino, global float* d_slice, int dimslice, int num_bins, global float* angles_per_project , float axis_position, global float *d_axis_corrections, global int* d_beginPos , global int* d_strideJoseph, global int* d_strideLine , int num_projections, int dimrecx, int dimrecy, float cpu_offset_x, float cpu_offset_y, int josephnoclip, int normalize) { const int tidx = get_local_id(0); const int bidx = get_group_id(0); const int tidy = get_local_id(1); const int bidy = get_group_id(1); float angle; float cos_angle,sin_angle ; local float corrections[16]; local int beginPos[16*2]; local int strideJoseph[16*2]; local int strideLine[16*2]; // thread will use corrections[tidy] // All are read by first warp int offset, OFFSET; switch(tidy) { case 0: corrections[ tidx ]= d_axis_corrections[ bidy*16+tidx]; break; case 1: case 2: offset = 16*(tidy-1); OFFSET = dimrecy*(tidy-1); beginPos [offset + tidx ]= d_beginPos[ OFFSET+ bidy*16+tidx] ; break; case 3: case 4: offset = 16*(tidy-3); OFFSET = dimrecy*(tidy-3); strideJoseph[offset + tidx ]= d_strideJoseph[OFFSET + bidy*16+tidx] ; break; case 5: case 6: offset = 16*(tidy-5); OFFSET = dimrecy*(tidy-5); strideLine[offset + tidx ]= d_strideLine[OFFSET + bidy*16+tidx] ; break; } barrier(CLK_LOCAL_MEM_FENCE); angle = angles_per_project[ bidy*16+tidy ] ; cos_angle = cos(angle); sin_angle = sin(angle); if(fabs(cos_angle) > 0.70710678f ) { if( cos_angle>0) { cos_angle = cos(angle); sin_angle = sin(angle); } else { cos_angle = -cos(angle); sin_angle = -sin(angle); } } else { if( sin_angle>0) { cos_angle = sin(angle); sin_angle = -cos(angle); } else { cos_angle = -sin(angle); sin_angle = cos(angle); } } float res=0.0f; float axis_corr = axis_position + corrections[ tidy ]; float axis = axis_position ; float xpix = ( bidx*16+tidx )-cpu_offset_x; float posx = axis*(1.0f-sin_angle/cos_angle ) +(xpix-(axis_corr) )/cos_angle ; float shiftJ = sin_angle/cos_angle; float x1 = min(-sin_angle/cos_angle ,0.f); float x2 = max(-sin_angle/cos_angle ,0.f); float Area; Area=1.0f/cos_angle; int stlA, stlB , stlAJ, stlBJ ; stlA=strideLine[16+tidy]; stlB=strideLine[tidy]; stlAJ=strideJoseph[16+tidy]; stlBJ=strideJoseph[tidy]; int beginA = beginPos[16+tidy ]; int beginB = beginPos[tidy ]; int l; int ym, yp, xm, xp; float yc, xc; float val; if(josephnoclip) { for(int j=0; j=0.0f )*(x1<(dimslice+2))*( x2>=0.0f)*( x2<(dimslice+2) ) ; /* Bilinear interpolation */ yc = fmin(fmax(x2, 0.0f), ((dimslice+2) - 1.0f)); // y_clipped ym = (int) floor(yc); // y_minus yp = (int) ceil(yc); // y_plus xc = fmin(fmax(x1, 0.0f), ((dimslice+2) - 1.0f)); // x_clipped xm = (int) floor(xc); // x_minus xp = (int) ceil(xc); // x_plus if ((ym == yp) && (xm == xp)) val = d_slice[ym*(dimslice+2) + xm]; else if (ym == yp) val = (d_slice[ym*(dimslice+2) + xm] * (xp - xc)) + (d_slice[ym*(dimslice+2) + xp] * (xc - xm)); else if (xm == xp) val = (d_slice[ym*(dimslice+2) + xm] * (yp - yc)) + (d_slice[yp*(dimslice+2) + xm] * (yc - ym)); else val = (d_slice[ym*(dimslice+2) + xm] * (yp - yc) * (xp - xc)) + (d_slice[yp*(dimslice+2) + xm] * (yc - ym) * (xp - xc)) + (d_slice[ym*(dimslice+2) + xp] * (yp - yc) * (xc - xm)) + (d_slice[yp*(dimslice+2) + xp] * (yc - ym) * (xc - xm)); // ---------- res += val*l; posx += shiftJ; } } if((bidy*16 + tidy) < num_projections && (bidx*16 + tidx) < num_bins) { res *= Area; if (normalize) res *= M_PI_F * 0.5f / num_projections; d_Sino[dimrecx*(bidy*16 + tidy) + (bidx*16 + tidx)] = res; } }