diff options
Diffstat (limited to 'ffts/src/fft2d.c')
-rw-r--r-- | ffts/src/fft2d.c | 402 |
1 files changed, 402 insertions, 0 deletions
diff --git a/ffts/src/fft2d.c b/ffts/src/fft2d.c new file mode 100644 index 0000000..0d8c3d8 --- /dev/null +++ b/ffts/src/fft2d.c @@ -0,0 +1,402 @@ +/******************************************************************* + This file extends the fftlib with 2d and 3d complex fft's and + 2d real fft's. All fft's return results in-place. Temporary buffers + for transposing columns are maintained privately via calls to + fft2dInit, fft2dFree, fft3dInit, and fft3dFree. + Note that you can call fft2dInit and fft3dInit repeatedly + with the same sizes, the extra calls will be ignored. + So, you could make a macro to call fft2dInit every time you call fft2d. + *** Warning *** fft2dFree and fft3dFree also call fftFree + so you must re-init all 1d fft sizes you are going to continue using +*******************************************************************/ +#include <stdlib.h> +#include <fp.h> +#include "fftlib.h" +#include "fftext.h" +#include "matlib.h" +#include "dxpose.h" +#include "fft2d.h" + // use trick of using a real double transpose in place of a complex transpose if it fits +#define cxpose(a,b,c,d,e,f) (2*sizeof(float)==sizeof(xdouble)) ? dxpose((xdouble *)(a), b, (xdouble *)(c), d, e, f) : cxpose(a,b,c,d,e,f); + // for this trick to work you must NOT replace the xdouble declarations in + // dxpose with float declarations. + + + // pointers for temporary storage for four columns +static float *Array2d[8*sizeof(long)] = {0,0,0,0,0,0,0,0, 0,0,0,0,0,0,0,0, + 0,0,0,0,0,0,0,0, 0,0,0,0,0,0,0,0}; +int fft2dInit(long M2, long M){ + // init for fft2d, ifft2d, rfft2d, and rifft2d + // malloc storage for 4 columns of 2d ffts then call fftinit for both row and column ffts sizes +/* INPUTS */ +/* M = log2 of number of columns */ +/* M2 = log2 of number of rows */ +/* of 2d matrix to be fourier transformed */ +/* OUTPUTS */ +/* private storage for columns of 2d ffts */ +/* calls fftInit for cosine and bit reversed tables */ +int theError = 1; +if ((M2 >= 0) && (M2 < 8*sizeof(long))){ + theError = 0; + if (Array2d[M2] == 0){ + Array2d[M2] = (float *) malloc( 4*2*POW2(M2)*sizeof(float) ); + if (Array2d[M2] == 0) + theError = 2; + else{ + theError = fftInit(M2); + } + } + if (theError == 0) + theError = fftInit(M); +} +return theError; +} + +void fft2dFree(){ +// free storage for columns of 2d ffts and call fftFree to free all BRLow and Utbl storage +long i1; +for (i1=8*sizeof(long)-1; i1>=0; i1--){ + if (Array2d[i1] != 0){ + free(Array2d[i1]); + Array2d[i1] = 0; + }; +}; +fftFree(); +} + +void fft2d(float *data, long M2, long M){ +/* Compute 2D complex fft and return results in-place */ +/* INPUTS */ +/* *data = input data array */ +/* M2 = log2 of fft size number of rows */ +/* M = log2 of fft size number of columns */ +/* OUTPUTS */ +/* *data = output data array */ +long i1; +if((M2>0)&&(M>0)){ + ffts(data, M, POW2(M2)); + if (M>2) + for (i1=0; i1<POW2(M); i1+=4){ + cxpose(data + i1*2, POW2(M), Array2d[M2], POW2(M2), POW2(M2), 4); + ffts(Array2d[M2], M2, 4); + cxpose(Array2d[M2], POW2(M2), data + i1*2, POW2(M), 4, POW2(M2)); + } + else{ + cxpose(data, POW2(M), Array2d[M2], POW2(M2), POW2(M2), POW2(M)); + ffts(Array2d[M2], M2, POW2(M)); + cxpose(Array2d[M2], POW2(M2), data, POW2(M), POW2(M), POW2(M2)); + } +} +else + ffts(data, M2+M, 1); +} + +void ifft2d(float *data, long M2, long M){ +/* Compute 2D complex ifft and return results in-place */ +/* INPUTS */ +/* *data = input data array */ +/* M2 = log2 of fft size number of rows */ +/* M = log2 of fft size number of columns */ +/* OUTPUTS */ +/* *data = output data array */ +long i1; +if((M2>0)&&(M>0)){ + iffts(data, M, POW2(M2)); + if (M>2) + for (i1=0; i1<POW2(M); i1+=4){ + cxpose(data + i1*2, POW2(M), Array2d[M2], POW2(M2), POW2(M2), 4); + iffts(Array2d[M2], M2, 4); + cxpose(Array2d[M2], POW2(M2), data + i1*2, POW2(M), 4, POW2(M2)); + } + else{ + cxpose(data, POW2(M), Array2d[M2], POW2(M2), POW2(M2), POW2(M)); + iffts(Array2d[M2], M2, POW2(M)); + cxpose(Array2d[M2], POW2(M2), data, POW2(M), POW2(M), POW2(M2)); + } +} +else + iffts(data, M2+M, 1); +} + +int fft3dInit(long L, long M2, long M){ + // init for fft3d, ifft3d + // malloc storage for 4 columns and 4 pages of 3d ffts + // then call fftinit for page, row and column ffts sizes +//* M = log2 of number of columns */ +/* M2 = log2 of number of rows */ +/* L = log2 of number of pages */ +/* of 3d matrix to be fourier transformed */ +/* OUTPUTS */ +/* private storage for columns and pages of 3d ffts */ +/* calls fftInit for cosine and bit reversed tables */ +int theError = 1; +if ((L >= 0) && (L < 8*sizeof(long))){ + theError = 0; + if (Array2d[L] == 0){ + Array2d[L] = (float *) malloc( 4*2*POW2(L)*sizeof(float) ); + if (Array2d[L] == 0) + theError = 2; + else{ + theError = fftInit(L); + } + } + if (theError == 0){ + if (Array2d[M2] == 0){ + Array2d[M2] = (float *) malloc( 4*2*POW2(M2)*sizeof(float) ); + if (Array2d[M2] == 0) + theError = 2; + else{ + theError = fftInit(M2); + } + } + } + if (theError == 0) + theError = fftInit(M); +} +return theError; +} + +void fft3dFree(){ +// free storage for columns of all 2d&3d ffts and call fftFree to free all BRLow and Utbl storage +fft2dFree(); +} + +void fft3d(float *data, long M3, long M2, long M){ +/* Compute 2D complex fft and return results in-place */ +/* INPUTS */ +/* *data = input data array */ +/* M3 = log2 of fft size number of pages */ +/* M2 = log2 of fft size number of rows */ +/* M = log2 of fft size number of columns */ +/* OUTPUTS */ +/* *data = output data array */ +long i1; +long i2; +const long N = POW2(M); +const long N2 = POW2(M2); +const long N3 = POW2(M3); +if((M3>0)&&(M2>0)&&(M>0)){ + ffts(data, M, N3*N2); + if (M>2) + for (i2=0; i2<N3; i2++){ + for (i1=0; i1<N; i1+=4){ + cxpose(data + i2*2*POW2(M2+M) + i1*2, N, Array2d[M2], N2, N2, 4); + ffts(Array2d[M2], M2, 4); + cxpose(Array2d[M2], N2, data + i2*2*POW2(M2+M) + i1*2, N, 4, N2); + } + } + else{ + for (i2=0; i2<N3; i2++){ + cxpose(data + i2*2*POW2(M2+M), N, Array2d[M2], N2, N2, N); + ffts(Array2d[M2], M2, N); + cxpose(Array2d[M2], N2, data + i2*2*POW2(M2+M), N, N, N2); + } + } + if ((M2+M)>2) + for (i1=0; i1<POW2(M2+M); i1+=4){ + cxpose(data + i1*2, POW2(M2+M), Array2d[M3], N3, N3, 4); + ffts(Array2d[M3], M3, 4); + cxpose(Array2d[M3], N3, data + i1*2, POW2(M2+M), 4, N3); + } + else{ + cxpose(data, POW2(M2+M), Array2d[M3], N3, N3, POW2(M2+M)); + ffts(Array2d[M3], M3, POW2(M2+M)); + cxpose(Array2d[M3], N3, data, POW2(M2+M), POW2(M2+M), N3); + } +} +else + if(M3==0) fft2d(data, M2, M); + else + if(M2==0) fft2d(data, M3, M); + else + if(M==0) fft2d(data, M3, M2); +} + +void ifft3d(float *data, long M3, long M2, long M){ +/* Compute 2D complex ifft and return results in-place */ +/* INPUTS */ +/* *data = input data array */ +/* M3 = log2 of fft size number of pages */ +/* M2 = log2 of fft size number of rows */ +/* M = log2 of fft size number of columns */ +/* OUTPUTS */ +/* *data = output data array */ +long i1; +long i2; +const long N = POW2(M); +const long N2 = POW2(M2); +const long N3 = POW2(M3); +if((M3>0)&&(M2>0)&&(M>0)){ + iffts(data, M, N3*N2); + if (M>2) + for (i2=0; i2<N3; i2++){ + for (i1=0; i1<N; i1+=4){ + cxpose(data + i2*2*POW2(M2+M) + i1*2, N, Array2d[M2], N2, N2, 4); + iffts(Array2d[M2], M2, 4); + cxpose(Array2d[M2], N2, data + i2*2*POW2(M2+M) + i1*2, N, 4, N2); + } + } + else{ + for (i2=0; i2<N3; i2++){ + cxpose(data + i2*2*POW2(M2+M), N, Array2d[M2], N2, N2, N); + iffts(Array2d[M2], M2, N); + cxpose(Array2d[M2], N2, data + i2*2*POW2(M2+M), N, N, N2); + } + } + if ((M2+M)>2) + for (i1=0; i1<POW2(M2+M); i1+=4){ + cxpose(data + i1*2, POW2(M2+M), Array2d[M3], N3, N3, 4); + iffts(Array2d[M3], M3, 4); + cxpose(Array2d[M3], N3, data + i1*2, POW2(M2+M), 4, N3); + } + else{ + cxpose(data, POW2(M2+M), Array2d[M3], N3, N3, POW2(M2+M)); + iffts(Array2d[M3], M3, POW2(M2+M)); + cxpose(Array2d[M3], N3, data, POW2(M2+M), POW2(M2+M), N3); + } +} +else + if(M3==0) ifft2d(data, M2, M); + else + if(M2==0) ifft2d(data, M3, M); + else + if(M==0) ifft2d(data, M3, M2); +} + +void rfft2d(float *data, long M2, long M){ +/* Compute 2D real fft and return results in-place */ +/* First performs real fft on rows using size from M to compute positive frequencies */ +/* then performs transform on columns using size from M2 to compute wavenumbers */ +/* If you think of the result as a complex pow(2,M2) by pow(2,M-1) matrix */ +/* then the first column contains the positive wavenumber spectra of DC frequency */ +/* followed by the positive wavenumber spectra of the nyquest frequency */ +/* since these are two positive wavenumber spectra the first complex value */ +/* of each is really the real values for the zero and nyquest wavenumber packed together */ +/* All other columns contain the positive and negative wavenumber spectra of a positive frequency */ +/* See rspect2dprod for multiplying two of these spectra together- ex. for fast convolution */ +/* INPUTS */ +/* *data = input data array */ +/* M2 = log2 of fft size number of rows in */ +/* M = log2 of fft size number of columns in */ +/* OUTPUTS */ +/* *data = output data array */ +long i1; +if((M2>0)&&(M>0)){ + rffts(data, M, POW2(M2)); + if (M==1){ + cxpose(data, POW2(M)/2, Array2d[M2]+POW2(M2)*2, POW2(M2), POW2(M2), 1); + xpose(Array2d[M2]+POW2(M2)*2, 2, Array2d[M2], POW2(M2), POW2(M2), 2); + rffts(Array2d[M2], M2, 2); + cxpose(Array2d[M2], POW2(M2), data, POW2(M)/2, 1, POW2(M2)); + } + else if (M==2){ + cxpose(data, POW2(M)/2, Array2d[M2]+POW2(M2)*2, POW2(M2), POW2(M2), 1); + xpose(Array2d[M2]+POW2(M2)*2, 2, Array2d[M2], POW2(M2), POW2(M2), 2); + rffts(Array2d[M2], M2, 2); + cxpose(Array2d[M2], POW2(M2), data, POW2(M)/2, 1, POW2(M2)); + + cxpose(data + 2, POW2(M)/2, Array2d[M2], POW2(M2), POW2(M2), 1); + ffts(Array2d[M2], M2, 1); + cxpose(Array2d[M2], POW2(M2), data + 2, POW2(M)/2, 1, POW2(M2)); + } + else{ + cxpose(data, POW2(M)/2, Array2d[M2]+POW2(M2)*2, POW2(M2), POW2(M2), 1); + xpose(Array2d[M2]+POW2(M2)*2, 2, Array2d[M2], POW2(M2), POW2(M2), 2); + rffts(Array2d[M2], M2, 2); + cxpose(Array2d[M2], POW2(M2), data, POW2(M)/2, 1, POW2(M2)); + + cxpose(data + 2, POW2(M)/2, Array2d[M2], POW2(M2), POW2(M2), 3); + ffts(Array2d[M2], M2, 3); + cxpose(Array2d[M2], POW2(M2), data + 2, POW2(M)/2, 3, POW2(M2)); + for (i1=4; i1<POW2(M)/2; i1+=4){ + cxpose(data + i1*2, POW2(M)/2, Array2d[M2], POW2(M2), POW2(M2), 4); + ffts(Array2d[M2], M2, 4); + cxpose(Array2d[M2], POW2(M2), data + i1*2, POW2(M)/2, 4, POW2(M2)); + } + } +} +else + rffts(data, M2+M, 1); +} + +void rifft2d(float *data, long M2, long M){ +/* Compute 2D real ifft and return results in-place */ +/* The input must be in the order as outout from rfft2d */ +/* INPUTS */ +/* *data = input data array */ +/* M2 = log2 of fft size number of rows out */ +/* M = log2 of fft size number of columns out */ +/* OUTPUTS */ +/* *data = output data array */ +long i1; +if((M2>0)&&(M>0)){ + if (M==1){ + cxpose(data, POW2(M)/2, Array2d[M2], POW2(M2), POW2(M2), 1); + riffts(Array2d[M2], M2, 2); + xpose(Array2d[M2], POW2(M2), Array2d[M2]+POW2(M2)*2, 2, 2, POW2(M2)); + cxpose(Array2d[M2]+POW2(M2)*2, POW2(M2), data, POW2(M)/2, 1, POW2(M2)); + } + else if (M==2){ + cxpose(data, POW2(M)/2, Array2d[M2], POW2(M2), POW2(M2), 1); + riffts(Array2d[M2], M2, 2); + xpose(Array2d[M2], POW2(M2), Array2d[M2]+POW2(M2)*2, 2, 2, POW2(M2)); + cxpose(Array2d[M2]+POW2(M2)*2, POW2(M2), data, POW2(M)/2, 1, POW2(M2)); + + cxpose(data + 2, POW2(M)/2, Array2d[M2], POW2(M2), POW2(M2), 1); + iffts(Array2d[M2], M2, 1); + cxpose(Array2d[M2], POW2(M2), data + 2, POW2(M)/2, 1, POW2(M2)); + } + else{ + cxpose(data, POW2(M)/2, Array2d[M2], POW2(M2), POW2(M2), 1); + riffts(Array2d[M2], M2, 2); + xpose(Array2d[M2], POW2(M2), Array2d[M2]+POW2(M2)*2, 2, 2, POW2(M2)); + cxpose(Array2d[M2]+POW2(M2)*2, POW2(M2), data, POW2(M)/2, 1, POW2(M2)); + + cxpose(data + 2, POW2(M)/2, Array2d[M2], POW2(M2), POW2(M2), 3); + iffts(Array2d[M2], M2, 3); + cxpose(Array2d[M2], POW2(M2), data + 2, POW2(M)/2, 3, POW2(M2)); + for (i1=4; i1<POW2(M)/2; i1+=4){ + cxpose(data + i1*2, POW2(M)/2, Array2d[M2], POW2(M2), POW2(M2), 4); + iffts(Array2d[M2], M2, 4); + cxpose(Array2d[M2], POW2(M2), data + i1*2, POW2(M)/2, 4, POW2(M2)); + } + } + riffts(data, M, POW2(M2)); +} +else + riffts(data, M2+M, 1); +} + +void rspect2dprod(float *data1, float *data2, float *outdata, long N2, long N1){ +// When multiplying a pair of 2d spectra from rfft2d care must be taken to multiply the +// four real values seperately from the complex ones. This routine does it correctly. +// the result can be stored in-place over one of the inputs +/* *data1 = input data array first spectra */ +/* *data2 = input data array second spectra */ +/* N2 = fft size number of rows into rfft2d for both data1 and data2 */ +/* N1 = fft size number of columns into rfft2d for both data1 and data2 */ +long N = N2 * N1/2; // number of "complex" numbers in spectra + +if( (N2 > 1) && (N1>1)){ + outdata[0] = data1[0] * data2[0]; // multiply the zero freq, zero wavenumber values + outdata[1] = data1[1] * data2[1]; // multiply the zero freq, nyquest wavenumber values + + cvprod(data1 + 2, data2 + 2, outdata + 2, N/2-1); + + outdata[N] = data1[N] * data2[N]; // multiply the nyquest freq, zero wavenumber values + outdata[N+1] = data1[N+1] * data2[N+1]; // multiply the nyquest freq, nyquest wavenumber values + + cvprod(data1 + N+2, data2 + N+2, outdata + N+2, N/2-1); +} +else{ // really 1D rfft spectra + N = N2 * N1; // one of these is a 1 + if(N>1){ + outdata[0] = data1[0] * data2[0]; // multiply the zero freq values + outdata[1] = data1[1] * data2[1]; // multiply the nyquest freq values + cvprod(data1 + 2, data2 + 2, outdata + 2, N/2-1); // multiply the other positive freq values + } + else{ + outdata[0] = data1[0] * data2[0]; + } +} +}
\ No newline at end of file |