summaryrefslogtreecommitdiff
path: root/ffts/src/dxpose.c
blob: 9ee88369715dddc81cae6d0a69b6b8042e50b288 (plain)
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
/*********************
This matrix transpose is in a seperate file because it should always be double precision.
*********************/
#include "dxpose.h"
void dxpose(xdouble *indata, long iRsiz, xdouble *outdata, long oRsiz, long Nrows, long Ncols){
/* not in-place double precision matrix transpose	*/
/* INPUTS */
/* *indata = input data array	*/
/* iRsiz = offset to between rows of input data array	*/
/* oRsiz = offset to between rows of output data array	*/
/* Nrows = number of rows in input data array	*/
/* Ncols = number of columns in input data array	*/
/* OUTPUTS */
/* *outdata = output data array	*/

xdouble	*irow; 		/* pointer to input row start */
xdouble	*ocol; 		/* pointer to output col start */
xdouble	*idata; 	/* pointer to input data */
xdouble	*odata; 	/* pointer to output data */
long 	RowCnt;		/* row counter */
long 	ColCnt;		/* col counter */
xdouble	T0; 		/* data storage */
xdouble	T1; 		/* data storage */
xdouble	T2; 		/* data storage */
xdouble	T3; 		/* data storage */
xdouble	T4; 		/* data storage */
xdouble	T5; 		/* data storage */
xdouble	T6; 		/* data storage */
xdouble	T7; 		/* data storage */
const long inRsizd1 = iRsiz;
const long inRsizd2 = 2*iRsiz;
const long inRsizd3 = inRsizd2+iRsiz;
const long inRsizd4 = 4*iRsiz;
const long inRsizd5 = inRsizd3+inRsizd2;
const long inRsizd6 = inRsizd4+inRsizd2;
const long inRsizd7 = inRsizd4+inRsizd3;
const long inRsizd8 = 8*iRsiz;

ocol = outdata;
irow = indata;
for (RowCnt=Nrows/8; RowCnt>0; RowCnt--){
	idata = irow;
	odata = ocol;
	for (ColCnt=Ncols; ColCnt>0; ColCnt--){
		T0 = *idata;
		T1 = *(idata+inRsizd1);
		T2 = *(idata+inRsizd2);
		T3 = *(idata+inRsizd3);
		T4 = *(idata+inRsizd4);
		T5 = *(idata+inRsizd5);
		T6 = *(idata+inRsizd6);
		T7 = *(idata+inRsizd7);
		*odata = T0;
		*(odata+1) = T1;
		*(odata+2) = T2;
		*(odata+3) = T3;
		*(odata+4) = T4;
		*(odata+5) = T5;
		*(odata+6) = T6;
		*(odata+7) = T7;
		idata++;
		odata += oRsiz;
	}
	irow += inRsizd8;
	ocol += 8;
}
if (Nrows%8 != 0){
	for (ColCnt=Ncols; ColCnt>0; ColCnt--){
		idata = irow++;
		odata = ocol;
		ocol += oRsiz;
		for (RowCnt=Nrows%8; RowCnt>0; RowCnt--){
			T0 = *idata;
			*odata++ = T0;
			idata += iRsiz;
		}
	}
}
}