summaryrefslogtreecommitdiff
path: root/ffts/src/matlib.c
diff options
context:
space:
mode:
authorSteve M. Robbins <smr@debian.org>2011-10-22 04:54:51 +0200
committerSteve M. Robbins <smr@debian.org>2011-10-22 04:54:51 +0200
commitdd657ad3f1428b026486db3ec36691df17ddf515 (patch)
tree6ffb465595479fb5a76c1a6ea3ec992abaa8c1c1 /ffts/src/matlib.c
Import nyquist_3.05.orig.tar.gz
[dgit import orig nyquist_3.05.orig.tar.gz]
Diffstat (limited to 'ffts/src/matlib.c')
-rw-r--r--ffts/src/matlib.c297
1 files changed, 297 insertions, 0 deletions
diff --git a/ffts/src/matlib.c b/ffts/src/matlib.c
new file mode 100644
index 0000000..4e8b682
--- /dev/null
+++ b/ffts/src/matlib.c
@@ -0,0 +1,297 @@
+/* a few routines from a vector/matrix library */
+#include "matlib.h"
+
+void xpose(float *indata, long iRsiz, float *outdata, long oRsiz, long Nrows, long Ncols){
+/* not in-place matrix transpose */
+/* INPUTS */
+/* *indata = input data array */
+/* iRsiz = offset to between rows of input data array */
+/* oRsiz = offset to between rows of output data array */
+/* Nrows = number of rows in input data array */
+/* Ncols = number of columns in input data array */
+/* OUTPUTS */
+/* *outdata = output data array */
+
+float *irow; /* pointer to input row start */
+float *ocol; /* pointer to output col start */
+float *idata; /* pointer to input data */
+float *odata; /* pointer to output data */
+long RowCnt; /* row counter */
+long ColCnt; /* col counter */
+float T0; /* data storage */
+float T1; /* data storage */
+float T2; /* data storage */
+float T3; /* data storage */
+float T4; /* data storage */
+float T5; /* data storage */
+float T6; /* data storage */
+float T7; /* data storage */
+const long inRsizd1 = iRsiz;
+const long inRsizd2 = 2*iRsiz;
+const long inRsizd3 = inRsizd2+iRsiz;
+const long inRsizd4 = 4*iRsiz;
+const long inRsizd5 = inRsizd3+inRsizd2;
+const long inRsizd6 = inRsizd4+inRsizd2;
+const long inRsizd7 = inRsizd4+inRsizd3;
+const long inRsizd8 = 8*iRsiz;
+
+ocol = outdata;
+irow = indata;
+for (RowCnt=Nrows/8; RowCnt>0; RowCnt--){
+ idata = irow;
+ odata = ocol;
+ for (ColCnt=Ncols; ColCnt>0; ColCnt--){
+ T0 = *idata;
+ T1 = *(idata+inRsizd1);
+ T2 = *(idata+inRsizd2);
+ T3 = *(idata+inRsizd3);
+ T4 = *(idata+inRsizd4);
+ T5 = *(idata+inRsizd5);
+ T6 = *(idata+inRsizd6);
+ T7 = *(idata+inRsizd7);
+ *odata = T0;
+ *(odata+1) = T1;
+ *(odata+2) = T2;
+ *(odata+3) = T3;
+ *(odata+4) = T4;
+ *(odata+5) = T5;
+ *(odata+6) = T6;
+ *(odata+7) = T7;
+ idata++;
+ odata += oRsiz;
+ }
+ irow += inRsizd8;
+ ocol += 8;
+}
+if (Nrows%8 != 0){
+ for (ColCnt=Ncols; ColCnt>0; ColCnt--){
+ idata = irow++;
+ odata = ocol;
+ ocol += oRsiz;
+ for (RowCnt=Nrows%8; RowCnt>0; RowCnt--){
+ T0 = *idata;
+ *odata++ = T0;
+ idata += iRsiz;
+ }
+ }
+}
+}
+
+void cxpose(float *indata, long iRsiz, float *outdata, long oRsiz, long Nrows, long Ncols){
+/* not in-place complex float matrix transpose */
+/* INPUTS */
+/* *indata = input data array */
+/* iRsiz = offset to between rows of input data array */
+/* oRsiz = offset to between rows of output data array */
+/* Nrows = number of rows in input data array */
+/* Ncols = number of columns in input data array */
+/* OUTPUTS */
+/* *outdata = output data array */
+
+float *irow; /* pointer to input row start */
+float *ocol; /* pointer to output col start */
+float *idata; /* pointer to input data */
+float *odata; /* pointer to output data */
+long RowCnt; /* row counter */
+long ColCnt; /* col counter */
+float T0r; /* data storage */
+float T0i; /* data storage */
+float T1r; /* data storage */
+float T1i; /* data storage */
+float T2r; /* data storage */
+float T2i; /* data storage */
+float T3r; /* data storage */
+float T3i; /* data storage */
+const long inRsizd1 = 2*iRsiz;
+const long inRsizd1i = 2*iRsiz + 1;
+const long inRsizd2 = 4*iRsiz;
+const long inRsizd2i = 4*iRsiz + 1;
+const long inRsizd3 = inRsizd2+inRsizd1;
+const long inRsizd3i = inRsizd2+inRsizd1 + 1;
+const long inRsizd4 = 8*iRsiz;
+
+ocol = outdata;
+irow = indata;
+for (RowCnt=Nrows/4; RowCnt>0; RowCnt--){
+ idata = irow;
+ odata = ocol;
+ for (ColCnt=Ncols; ColCnt>0; ColCnt--){
+ T0r = *idata;
+ T0i = *(idata +1);
+ T1r = *(idata+inRsizd1);
+ T1i = *(idata+inRsizd1i);
+ T2r = *(idata+inRsizd2);
+ T2i = *(idata+inRsizd2i);
+ T3r = *(idata+inRsizd3);
+ T3i = *(idata+inRsizd3i);
+ *odata = T0r;
+ *(odata+1) = T0i;
+ *(odata+2) = T1r;
+ *(odata+3) = T1i;
+ *(odata+4) = T2r;
+ *(odata+5) = T2i;
+ *(odata+6) = T3r;
+ *(odata+7) = T3i;
+ idata+=2;
+ odata += 2*oRsiz;
+ }
+ irow += inRsizd4;
+ ocol += 8;
+}
+if (Nrows%4 != 0){
+ for (ColCnt=Ncols; ColCnt>0; ColCnt--){
+ idata = irow;
+ odata = ocol;
+ for (RowCnt=Nrows%4; RowCnt>0; RowCnt--){
+ T0r = *idata;
+ T0i = *(idata+1);
+ *odata = T0r;
+ *(odata+1) = T0i;
+ odata+=2;
+ idata += 2*iRsiz;
+ }
+ irow+=2;
+ ocol += 2*oRsiz;
+ }
+}
+}
+
+void cvprod(float *a, float *b, float *out, long N){
+/* complex vector product, can be in-place */
+/* product of complex vector *a times complex vector *b */
+/* INPUTS */
+/* N vector length */
+/* *a complex vector length N complex numbers */
+/* *b complex vector length N complex numbers */
+/* OUTPUTS */
+/* *out complex vector length N */
+
+long OutCnt; /* output counter */
+float A0R; /* A storage */
+float A0I; /* A storage */
+float A1R; /* A storage */
+float A1I; /* A storage */
+float A2R; /* A storage */
+float A2I; /* A storage */
+float A3R; /* A storage */
+float A3I; /* A storage */
+float B0R; /* B storage */
+float B0I; /* B storage */
+float B1R; /* B storage */
+float B1I; /* B storage */
+float B2R; /* B storage */
+float B2I; /* B storage */
+float B3R; /* B storage */
+float B3I; /* B storage */
+float T0R; /* TMP storage */
+float T0I; /* TMP storage */
+float T1R; /* TMP storage */
+float T1I; /* TMP storage */
+float T2R; /* TMP storage */
+float T2I; /* TMP storage */
+float T3R; /* TMP storage */
+float T3I; /* TMP storage */
+
+if (N>=4){
+ A0R = *a;
+ B0R = *b;
+ A0I = *(a +1);
+ B0I = *(b +1);
+ A1R = *(a +2);
+ B1R = *(b +2);
+ A1I = *(a +3);
+ B1I = *(b +3);
+ A2R = *(a +4);
+ B2R = *(b +4);
+ A2I = *(a +5);
+ B2I = *(b +5);
+ A3R = *(a +6);
+ B3R = *(b +6);
+ A3I = *(a +7);
+ B3I = *(b +7);
+ T0R = A0R * B0R;
+ T0I = (A0R * B0I);
+ T1R = A1R * B1R;
+ T1I = (A1R * B1I);
+ T2R = A2R * B2R;
+ T2I = (A2R * B2I);
+ T3R = A3R * B3R;
+ T3I = (A3R * B3I);
+ T0R -= (A0I * B0I);
+ T0I = A0I * B0R + T0I;
+ T1R -= (A1I * B1I);
+ T1I = A1I * B1R + T1I;
+ T2R -= (A2I * B2I);
+ T2I = A2I * B2R + T2I;
+ T3R -= (A3I * B3I);
+ T3I = A3I * B3R + T3I;
+ for (OutCnt=N/4-1; OutCnt > 0; OutCnt--){
+ a += 8;
+ b += 8;
+ A0R = *a;
+ B0R = *b;
+ A0I = *(a +1);
+ B0I = *(b +1);
+ A1R = *(a +2);
+ B1R = *(b +2);
+ A1I = *(a +3);
+ B1I = *(b +3);
+ A2R = *(a +4);
+ B2R = *(b +4);
+ A2I = *(a +5);
+ B2I = *(b +5);
+ A3R = *(a +6);
+ B3R = *(b +6);
+ A3I = *(a +7);
+ B3I = *(b +7);
+ *out = T0R;
+ *(out +1) = T0I;
+ *(out +2) = T1R;
+ *(out +3) = T1I;
+ *(out +4) = T2R;
+ *(out +5) = T2I;
+ *(out +6) = T3R;
+ *(out +7) = T3I;
+ T0R = A0R * B0R;
+ T0I = (A0R * B0I);
+ T1R = A1R * B1R;
+ T1I = (A1R * B1I);
+ T2R = A2R * B2R;
+ T2I = (A2R * B2I);
+ T3R = A3R * B3R;
+ T3I = (A3R * B3I);
+ T0R -= (A0I * B0I);
+ T0I = A0I * B0R + T0I;
+ T1R -= (A1I * B1I);
+ T1I = A1I * B1R + T1I;
+ T2R -= (A2I * B2I);
+ T2I = A2I * B2R + T2I;
+ T3R -= (A3I * B3I);
+ T3I = A3I * B3R + T3I;
+ out += 8;
+ }
+ a += 8;
+ b += 8;
+ *out = T0R;
+ *(out +1) = T0I;
+ *(out +2) = T1R;
+ *(out +3) = T1I;
+ *(out +4) = T2R;
+ *(out +5) = T2I;
+ *(out +6) = T3R;
+ *(out +7) = T3I;
+ out += 8;
+}
+for (OutCnt=N%4; OutCnt > 0; OutCnt--){
+ A0R = *a++;
+ B0R = *b++;
+ A0I = *a++;
+ B0I = *b++;
+ T0R = A0R * B0R;
+ T0I = (A0R * B0I);
+ T0R -= (A0I * B0I);
+ T0I = A0I * B0R + T0I;
+ *out++ = T0R;
+ *out++ = T0I;
+}
+}