1 /* Copyright Takuya OOURA, 1996-2001.
2
3 You may use, copy, modify and distribute this code for any
4 purpose (include commercial use) and without fee. Please
5 refer to this package when you modify this code.
6
7 Package home: http://www.kurims.kyoto-u.ac.jp/~ooura/fft.html
8
9 Fast Fourier/Cosine/Sine Transform
10 dimension :one
11 data length :power of 2
12 decimation :frequency
13 radix :4, 2
14 data :inplace
15 table :use
16 functions
17 cdft: Complex Discrete Fourier Transform
18 rdft: Real Discrete Fourier Transform
19 ddct: Discrete Cosine Transform
20 ddst: Discrete Sine Transform
21 dfct: Cosine Transform of RDFT (Real Symmetric DFT)
22 dfst: Sine Transform of RDFT (Real Anti-symmetric DFT)
23 function prototypes
24 void cdft(int, int, double *, int *, double *);
25 void rdft(int, int, double *, int *, double *);
26 void ddct(int, int, double *, int *, double *);
27 void ddst(int, int, double *, int *, double *);
28 void dfct(int, double *, double *, int *, double *);
29 void dfst(int, double *, double *, int *, double *);
30
31
32 -------- Complex DFT (Discrete Fourier Transform) --------
33 [definition]
34 <case1>
35 X[k] = sum_j=0^n-1 x[j]*exp(2*pi*i*j*k/n), 0<=k<n
36 <case2>
37 X[k] = sum_j=0^n-1 x[j]*exp(-2*pi*i*j*k/n), 0<=k<n
38 (notes: sum_j=0^n-1 is a summation from j=0 to n-1)
39 [usage]
40 <case1>
41 ip[0] = 0; // first time only
42 cdft(2*n, 1, a, ip, w);
43 <case2>
44 ip[0] = 0; // first time only
45 cdft(2*n, -1, a, ip, w);
46 [parameters]
47 2*n :data length (int)
48 n >= 1, n = power of 2
49 a[0...2*n-1] :input/output data (double *)
50 input data
51 a[2*j] = Re(x[j]),
52 a[2*j+1] = Im(x[j]), 0<=j<n
53 output data
54 a[2*k] = Re(X[k]),
55 a[2*k+1] = Im(X[k]), 0<=k<n
56 ip[0...*] :work area for bit reversal (int *)
57 length of ip >= 2+sqrt(n)
58 strictly,
59 length of ip >=
60 2+(1<<(int)(log(n+0.5)/log(2))/2).
61 ip[0],ip[1] are pointers of the cos/sin table.
62 w[0...n/2-1] :cos/sin table (double *)
63 w[],ip[] are initialized if ip[0] == 0.
64 [remark]
65 Inverse of
66 cdft(2*n, -1, a, ip, w);
67 is
68 cdft(2*n, 1, a, ip, w);
69 for (j = 0; j <= 2 * n - 1; j++) {
70 a[j] *= 1.0 / n;
71 }
72 .
73
74
75 -------- Real DFT / Inverse of Real DFT --------
76 [definition]
77 <case1> RDFT
78 R[k] = sum_j=0^n-1 a[j]*cos(2*pi*j*k/n), 0<=k<=n/2
79 I[k] = sum_j=0^n-1 a[j]*sin(2*pi*j*k/n), 0<k<n/2
80 <case2> IRDFT (excluding scale)
81 a[k] = (R[0] + R[n/2]*cos(pi*k))/2 +
82 sum_j=1^n/2-1 R[j]*cos(2*pi*j*k/n) +
83 sum_j=1^n/2-1 I[j]*sin(2*pi*j*k/n), 0<=k<n
84 [usage]
85 <case1>
86 ip[0] = 0; // first time only
87 rdft(n, 1, a, ip, w);
88 <case2>
89 ip[0] = 0; // first time only
90 rdft(n, -1, a, ip, w);
91 [parameters]
92 n :data length (int)
93 n >= 2, n = power of 2
94 a[0...n-1] :input/output data (double *)
95 <case1>
96 output data
97 a[2*k] = R[k], 0<=k<n/2
98 a[2*k+1] = I[k], 0<k<n/2
99 a[1] = R[n/2]
100 <case2>
101 input data
102 a[2*j] = R[j], 0<=j<n/2
103 a[2*j+1] = I[j], 0<j<n/2
104 a[1] = R[n/2]
105 ip[0...*] :work area for bit reversal (int *)
106 length of ip >= 2+sqrt(n/2)
107 strictly,
108 length of ip >=
109 2+(1<<(int)(log(n/2+0.5)/log(2))/2).
110 ip[0],ip[1] are pointers of the cos/sin table.
111 w[0...n/2-1] :cos/sin table (double *)
112 w[],ip[] are initialized if ip[0] == 0.
113 [remark]
114 Inverse of
115 rdft(n, 1, a, ip, w);
116 is
117 rdft(n, -1, a, ip, w);
118 for (j = 0; j <= n - 1; j++) {
119 a[j] *= 2.0 / n;
120 }
121 .
122
123
124 -------- DCT (Discrete Cosine Transform) / Inverse of DCT --------
125 [definition]
126 <case1> IDCT (excluding scale)
127 C[k] = sum_j=0^n-1 a[j]*cos(pi*j*(k+1/2)/n), 0<=k<n
128 <case2> DCT
129 C[k] = sum_j=0^n-1 a[j]*cos(pi*(j+1/2)*k/n), 0<=k<n
130 [usage]
131 <case1>
132 ip[0] = 0; // first time only
133 ddct(n, 1, a, ip, w);
134 <case2>
135 ip[0] = 0; // first time only
136 ddct(n, -1, a, ip, w);
137 [parameters]
138 n :data length (int)
139 n >= 2, n = power of 2
140 a[0...n-1] :input/output data (double *)
141 output data
142 a[k] = C[k], 0<=k<n
143 ip[0...*] :work area for bit reversal (int *)
144 length of ip >= 2+sqrt(n/2)
145 strictly,
146 length of ip >=
147 2+(1<<(int)(log(n/2+0.5)/log(2))/2).
148 ip[0],ip[1] are pointers of the cos/sin table.
149 w[0...n*5/4-1] :cos/sin table (double *)
150 w[],ip[] are initialized if ip[0] == 0.
151 [remark]
152 Inverse of
153 ddct(n, -1, a, ip, w);
154 is
155 a[0] *= 0.5;
156 ddct(n, 1, a, ip, w);
157 for (j = 0; j <= n - 1; j++) {
158 a[j] *= 2.0 / n;
159 }
160 .
161
162
163 -------- DST (Discrete Sine Transform) / Inverse of DST --------
164 [definition]
165 <case1> IDST (excluding scale)
166 S[k] = sum_j=1^n A[j]*sin(pi*j*(k+1/2)/n), 0<=k<n
167 <case2> DST
168 S[k] = sum_j=0^n-1 a[j]*sin(pi*(j+1/2)*k/n), 0<k<=n
169 [usage]
170 <case1>
171 ip[0] = 0; // first time only
172 ddst(n, 1, a, ip, w);
173 <case2>
174 ip[0] = 0; // first time only
175 ddst(n, -1, a, ip, w);
176 [parameters]
177 n :data length (int)
178 n >= 2, n = power of 2
179 a[0...n-1] :input/output data (double *)
180 <case1>
181 input data
182 a[j] = A[j], 0<j<n
183 a[0] = A[n]
184 output data
185 a[k] = S[k], 0<=k<n
186 <case2>
187 output data
188 a[k] = S[k], 0<k<n
189 a[0] = S[n]
190 ip[0...*] :work area for bit reversal (int *)
191 length of ip >= 2+sqrt(n/2)
192 strictly,
193 length of ip >=
194 2+(1<<(int)(log(n/2+0.5)/log(2))/2).
195 ip[0],ip[1] are pointers of the cos/sin table.
196 w[0...n*5/4-1] :cos/sin table (double *)
197 w[],ip[] are initialized if ip[0] == 0.
198 [remark]
199 Inverse of
200 ddst(n, -1, a, ip, w);
201 is
202 a[0] *= 0.5;
203 ddst(n, 1, a, ip, w);
204 for (j = 0; j <= n - 1; j++) {
205 a[j] *= 2.0 / n;
206 }
207 .
208
209
210 -------- Cosine Transform of RDFT (Real Symmetric DFT) --------
211 [definition]
212 C[k] = sum_j=0^n a[j]*cos(pi*j*k/n), 0<=k<=n
213 [usage]
214 ip[0] = 0; // first time only
215 dfct(n, a, t, ip, w);
216 [parameters]
217 n :data length - 1 (int)
218 n >= 2, n = power of 2
219 a[0...n] :input/output data (double *)
220 output data
221 a[k] = C[k], 0<=k<=n
222 t[0...n/2] :work area (double *)
223 ip[0...*] :work area for bit reversal (int *)
224 length of ip >= 2+sqrt(n/4)
225 strictly,
226 length of ip >=
227 2+(1<<(int)(log(n/4+0.5)/log(2))/2).
228 ip[0],ip[1] are pointers of the cos/sin table.
229 w[0...n*5/8-1] :cos/sin table (double *)
230 w[],ip[] are initialized if ip[0] == 0.
231 [remark]
232 Inverse of
233 a[0] *= 0.5;
234 a[n] *= 0.5;
235 dfct(n, a, t, ip, w);
236 is
237 a[0] *= 0.5;
238 a[n] *= 0.5;
239 dfct(n, a, t, ip, w);
240 for (j = 0; j <= n; j++) {
241 a[j] *= 2.0 / n;
242 }
243 .
244
245
246 -------- Sine Transform of RDFT (Real Anti-symmetric DFT) --------
247 [definition]
248 S[k] = sum_j=1^n-1 a[j]*sin(pi*j*k/n), 0<k<n
249 [usage]
250 ip[0] = 0; // first time only
251 dfst(n, a, t, ip, w);
252 [parameters]
253 n :data length + 1 (int)
254 n >= 2, n = power of 2
255 a[0...n-1] :input/output data (double *)
256 output data
257 a[k] = S[k], 0<k<n
258 (a[0] is used for work area)
259 t[0...n/2-1] :work area (double *)
260 ip[0...*] :work area for bit reversal (int *)
261 length of ip >= 2+sqrt(n/4)
262 strictly,
263 length of ip >=
264 2+(1<<(int)(log(n/4+0.5)/log(2))/2).
265 ip[0],ip[1] are pointers of the cos/sin table.
266 w[0...n*5/8-1] :cos/sin table (double *)
267 w[],ip[] are initialized if ip[0] == 0.
268 [remark]
269 Inverse of
270 dfst(n, a, t, ip, w);
271 is
272 dfst(n, a, t, ip, w);
273 for (j = 1; j <= n - 1; j++) {
274 a[j] *= 2.0 / n;
275 }
276 .
277
278
279 Appendix :
280 The cos/sin table is recalculated when the larger table required.
281 w[] and ip[] are compatible with all routines.
282 */
283
284
285 #include <math.h>
286 #include "fft4g.h"
287
288 #ifdef FFT4G_FLOAT
289 #define double float
290 #define sin sinf
291 #define cos cosf
292 #define atan atanf
293
294 #define cdft lsx_cdft_f
295 #define rdft lsx_rdft_f
296 #define ddct lsx_ddct_f
297 #define ddst lsx_ddst_f
298 #define dfct lsx_dfct_f
299 #define dfst lsx_dfst_f
300 #else
301 #define cdft lsx_cdft
302 #define rdft lsx_rdft
303 #define ddct lsx_ddct
304 #define ddst lsx_ddst
305 #define dfct lsx_dfct
306 #define dfst lsx_dfst
307 #endif
308
309 static void bitrv2conj(int n, int *ip, double *a);
310 static void bitrv2(int n, int *ip, double *a);
311 static void cft1st(int n, double *a, double const *w);
312 static void cftbsub(int n, double *a, double const *w);
313 static void cftfsub(int n, double *a, double const *w);
314 static void cftmdl(int n, int l, double *a, double const *w);
315 static void dctsub(int n, double *a, int nc, double const *c);
316 static void dstsub(int n, double *a, int nc, double const *c);
317 static void makect(int nc, int *ip, double *c);
318 static void makewt(int nw, int *ip, double *w);
319 static void rftbsub(int n, double *a, int nc, double const *c);
320 static void rftfsub(int n, double *a, int nc, double const *c);
321
322
cdft(int n,int isgn,double * a,int * ip,double * w)323 void cdft(int n, int isgn, double *a, int *ip, double *w)
324 {
325 if (n > FFT4G_MAX_SIZE)
326 return;
327
328 if (n > (ip[0] << 2)) {
329 makewt(n >> 2, ip, w);
330 }
331 if (n > 4) {
332 if (isgn >= 0) {
333 bitrv2(n, ip + 2, a);
334 cftfsub(n, a, w);
335 } else {
336 bitrv2conj(n, ip + 2, a);
337 cftbsub(n, a, w);
338 }
339 } else if (n == 4) {
340 cftfsub(n, a, w);
341 }
342 }
343
344
rdft(int n,int isgn,double * a,int * ip,double * w)345 void rdft(int n, int isgn, double *a, int *ip, double *w)
346 {
347 int nw, nc;
348 double xi;
349
350 if (n > FFT4G_MAX_SIZE)
351 return;
352
353 nw = ip[0];
354 if (n > (nw << 2)) {
355 nw = n >> 2;
356 makewt(nw, ip, w);
357 }
358 nc = ip[1];
359 if (n > (nc << 2)) {
360 nc = n >> 2;
361 makect(nc, ip, w + nw);
362 }
363 if (isgn >= 0) {
364 if (n > 4) {
365 bitrv2(n, ip + 2, a);
366 cftfsub(n, a, w);
367 rftfsub(n, a, nc, w + nw);
368 } else if (n == 4) {
369 cftfsub(n, a, w);
370 }
371 xi = a[0] - a[1];
372 a[0] += a[1];
373 a[1] = xi;
374 } else {
375 a[1] = 0.5 * (a[0] - a[1]);
376 a[0] -= a[1];
377 if (n > 4) {
378 rftbsub(n, a, nc, w + nw);
379 bitrv2(n, ip + 2, a);
380 cftbsub(n, a, w);
381 } else if (n == 4) {
382 cftfsub(n, a, w);
383 }
384 }
385 }
386
387
ddct(int n,int isgn,double * a,int * ip,double * w)388 void ddct(int n, int isgn, double *a, int *ip, double *w)
389 {
390 int j, nw, nc;
391 double xr;
392
393 if (n > FFT4G_MAX_SIZE)
394 return;
395
396 nw = ip[0];
397 if (n > (nw << 2)) {
398 nw = n >> 2;
399 makewt(nw, ip, w);
400 }
401 nc = ip[1];
402 if (n > nc) {
403 nc = n;
404 makect(nc, ip, w + nw);
405 }
406 if (isgn < 0) {
407 xr = a[n - 1];
408 for (j = n - 2; j >= 2; j -= 2) {
409 a[j + 1] = a[j] - a[j - 1];
410 a[j] += a[j - 1];
411 }
412 a[1] = a[0] - xr;
413 a[0] += xr;
414 if (n > 4) {
415 rftbsub(n, a, nc, w + nw);
416 bitrv2(n, ip + 2, a);
417 cftbsub(n, a, w);
418 } else if (n == 4) {
419 cftfsub(n, a, w);
420 }
421 }
422 dctsub(n, a, nc, w + nw);
423 if (isgn >= 0) {
424 if (n > 4) {
425 bitrv2(n, ip + 2, a);
426 cftfsub(n, a, w);
427 rftfsub(n, a, nc, w + nw);
428 } else if (n == 4) {
429 cftfsub(n, a, w);
430 }
431 xr = a[0] - a[1];
432 a[0] += a[1];
433 for (j = 2; j < n; j += 2) {
434 a[j - 1] = a[j] - a[j + 1];
435 a[j] += a[j + 1];
436 }
437 a[n - 1] = xr;
438 }
439 }
440
441
ddst(int n,int isgn,double * a,int * ip,double * w)442 void ddst(int n, int isgn, double *a, int *ip, double *w)
443 {
444 int j, nw, nc;
445 double xr;
446
447 if (n > FFT4G_MAX_SIZE)
448 return;
449
450 nw = ip[0];
451 if (n > (nw << 2)) {
452 nw = n >> 2;
453 makewt(nw, ip, w);
454 }
455 nc = ip[1];
456 if (n > nc) {
457 nc = n;
458 makect(nc, ip, w + nw);
459 }
460 if (isgn < 0) {
461 xr = a[n - 1];
462 for (j = n - 2; j >= 2; j -= 2) {
463 a[j + 1] = -a[j] - a[j - 1];
464 a[j] -= a[j - 1];
465 }
466 a[1] = a[0] + xr;
467 a[0] -= xr;
468 if (n > 4) {
469 rftbsub(n, a, nc, w + nw);
470 bitrv2(n, ip + 2, a);
471 cftbsub(n, a, w);
472 } else if (n == 4) {
473 cftfsub(n, a, w);
474 }
475 }
476 dstsub(n, a, nc, w + nw);
477 if (isgn >= 0) {
478 if (n > 4) {
479 bitrv2(n, ip + 2, a);
480 cftfsub(n, a, w);
481 rftfsub(n, a, nc, w + nw);
482 } else if (n == 4) {
483 cftfsub(n, a, w);
484 }
485 xr = a[0] - a[1];
486 a[0] += a[1];
487 for (j = 2; j < n; j += 2) {
488 a[j - 1] = -a[j] - a[j + 1];
489 a[j] -= a[j + 1];
490 }
491 a[n - 1] = -xr;
492 }
493 }
494
495
dfct(int n,double * a,double * t,int * ip,double * w)496 void dfct(int n, double *a, double *t, int *ip, double *w)
497 {
498 int j, k, l, m, mh, nw, nc;
499 double xr, xi, yr, yi;
500
501 if (n > FFT4G_MAX_SIZE)
502 return;
503
504 nw = ip[0];
505 if (n > (nw << 3)) {
506 nw = n >> 3;
507 makewt(nw, ip, w);
508 }
509 nc = ip[1];
510 if (n > (nc << 1)) {
511 nc = n >> 1;
512 makect(nc, ip, w + nw);
513 }
514 m = n >> 1;
515 yi = a[m];
516 xi = a[0] + a[n];
517 a[0] -= a[n];
518 t[0] = xi - yi;
519 t[m] = xi + yi;
520 if (n > 2) {
521 mh = m >> 1;
522 for (j = 1; j < mh; j++) {
523 k = m - j;
524 xr = a[j] - a[n - j];
525 xi = a[j] + a[n - j];
526 yr = a[k] - a[n - k];
527 yi = a[k] + a[n - k];
528 a[j] = xr;
529 a[k] = yr;
530 t[j] = xi - yi;
531 t[k] = xi + yi;
532 }
533 t[mh] = a[mh] + a[n - mh];
534 a[mh] -= a[n - mh];
535 dctsub(m, a, nc, w + nw);
536 if (m > 4) {
537 bitrv2(m, ip + 2, a);
538 cftfsub(m, a, w);
539 rftfsub(m, a, nc, w + nw);
540 } else if (m == 4) {
541 cftfsub(m, a, w);
542 }
543 a[n - 1] = a[0] - a[1];
544 a[1] = a[0] + a[1];
545 for (j = m - 2; j >= 2; j -= 2) {
546 a[2 * j + 1] = a[j] + a[j + 1];
547 a[2 * j - 1] = a[j] - a[j + 1];
548 }
549 l = 2;
550 m = mh;
551 while (m >= 2) {
552 dctsub(m, t, nc, w + nw);
553 if (m > 4) {
554 bitrv2(m, ip + 2, t);
555 cftfsub(m, t, w);
556 rftfsub(m, t, nc, w + nw);
557 } else if (m == 4) {
558 cftfsub(m, t, w);
559 }
560 a[n - l] = t[0] - t[1];
561 a[l] = t[0] + t[1];
562 k = 0;
563 for (j = 2; j < m; j += 2) {
564 k += l << 2;
565 a[k - l] = t[j] - t[j + 1];
566 a[k + l] = t[j] + t[j + 1];
567 }
568 l <<= 1;
569 mh = m >> 1;
570 for (j = 0; j < mh; j++) {
571 k = m - j;
572 t[j] = t[m + k] - t[m + j];
573 t[k] = t[m + k] + t[m + j];
574 }
575 t[mh] = t[m + mh];
576 m = mh;
577 }
578 a[l] = t[0];
579 a[n] = t[2] - t[1];
580 a[0] = t[2] + t[1];
581 } else {
582 a[1] = a[0];
583 a[2] = t[0];
584 a[0] = t[1];
585 }
586 }
587
588
dfst(int n,double * a,double * t,int * ip,double * w)589 void dfst(int n, double *a, double *t, int *ip, double *w)
590 {
591 int j, k, l, m, mh, nw, nc;
592 double xr, xi, yr, yi;
593
594 if (n > FFT4G_MAX_SIZE)
595 return;
596
597 nw = ip[0];
598 if (n > (nw << 3)) {
599 nw = n >> 3;
600 makewt(nw, ip, w);
601 }
602 nc = ip[1];
603 if (n > (nc << 1)) {
604 nc = n >> 1;
605 makect(nc, ip, w + nw);
606 }
607 if (n > 2) {
608 m = n >> 1;
609 mh = m >> 1;
610 for (j = 1; j < mh; j++) {
611 k = m - j;
612 xr = a[j] + a[n - j];
613 xi = a[j] - a[n - j];
614 yr = a[k] + a[n - k];
615 yi = a[k] - a[n - k];
616 a[j] = xr;
617 a[k] = yr;
618 t[j] = xi + yi;
619 t[k] = xi - yi;
620 }
621 t[0] = a[mh] - a[n - mh];
622 a[mh] += a[n - mh];
623 a[0] = a[m];
624 dstsub(m, a, nc, w + nw);
625 if (m > 4) {
626 bitrv2(m, ip + 2, a);
627 cftfsub(m, a, w);
628 rftfsub(m, a, nc, w + nw);
629 } else if (m == 4) {
630 cftfsub(m, a, w);
631 }
632 a[n - 1] = a[1] - a[0];
633 a[1] = a[0] + a[1];
634 for (j = m - 2; j >= 2; j -= 2) {
635 a[2 * j + 1] = a[j] - a[j + 1];
636 a[2 * j - 1] = -a[j] - a[j + 1];
637 }
638 l = 2;
639 m = mh;
640 while (m >= 2) {
641 dstsub(m, t, nc, w + nw);
642 if (m > 4) {
643 bitrv2(m, ip + 2, t);
644 cftfsub(m, t, w);
645 rftfsub(m, t, nc, w + nw);
646 } else if (m == 4) {
647 cftfsub(m, t, w);
648 }
649 a[n - l] = t[1] - t[0];
650 a[l] = t[0] + t[1];
651 k = 0;
652 for (j = 2; j < m; j += 2) {
653 k += l << 2;
654 a[k - l] = -t[j] - t[j + 1];
655 a[k + l] = t[j] - t[j + 1];
656 }
657 l <<= 1;
658 mh = m >> 1;
659 for (j = 1; j < mh; j++) {
660 k = m - j;
661 t[j] = t[m + k] + t[m + j];
662 t[k] = t[m + k] - t[m + j];
663 }
664 t[0] = t[m + mh];
665 m = mh;
666 }
667 a[l] = t[0];
668 }
669 a[0] = 0;
670 }
671
672
673 /* -------- initializing routines -------- */
674
675
makewt(int nw,int * ip,double * w)676 static void makewt(int nw, int *ip, double *w)
677 {
678 int j, nwh;
679 double delta, x, y;
680
681 ip[0] = nw;
682 ip[1] = 1;
683 if (nw > 2) {
684 nwh = nw >> 1;
685 delta = atan(1.0) / nwh;
686 w[0] = 1;
687 w[1] = 0;
688 w[nwh] = cos(delta * nwh);
689 w[nwh + 1] = w[nwh];
690 if (nwh > 2) {
691 for (j = 2; j < nwh; j += 2) {
692 x = cos(delta * j);
693 y = sin(delta * j);
694 w[j] = x;
695 w[j + 1] = y;
696 w[nw - j] = y;
697 w[nw - j + 1] = x;
698 }
699 bitrv2(nw, ip + 2, w);
700 }
701 }
702 }
703
704
makect(int nc,int * ip,double * c)705 static void makect(int nc, int *ip, double *c)
706 {
707 int j, nch;
708 double delta;
709
710 ip[1] = nc;
711 if (nc > 1) {
712 nch = nc >> 1;
713 delta = atan(1.0) / nch;
714 c[0] = cos(delta * nch);
715 c[nch] = 0.5 * c[0];
716 for (j = 1; j < nch; j++) {
717 c[j] = 0.5 * cos(delta * j);
718 c[nc - j] = 0.5 * sin(delta * j);
719 }
720 }
721 }
722
723
724 /* -------- child routines -------- */
725
726
bitrv2(int n,int * ip0,double * a)727 static void bitrv2(int n, int *ip0, double *a)
728 {
729 int j, j1, k, k1, l, m, m2, ip[256];
730 double xr, xi, yr, yi;
731
732 (void)ip0;
733 ip[0] = 0;
734 l = n;
735 m = 1;
736 while ((m << 3) < l) {
737 l >>= 1;
738 for (j = 0; j < m; j++) {
739 ip[m + j] = ip[j] + l;
740 }
741 m <<= 1;
742 }
743 m2 = 2 * m;
744 if ((m << 3) == l) {
745 for (k = 0; k < m; k++) {
746 for (j = 0; j < k; j++) {
747 j1 = 2 * j + ip[k];
748 k1 = 2 * k + ip[j];
749 xr = a[j1];
750 xi = a[j1 + 1];
751 yr = a[k1];
752 yi = a[k1 + 1];
753 a[j1] = yr;
754 a[j1 + 1] = yi;
755 a[k1] = xr;
756 a[k1 + 1] = xi;
757 j1 += m2;
758 k1 += 2 * m2;
759 xr = a[j1];
760 xi = a[j1 + 1];
761 yr = a[k1];
762 yi = a[k1 + 1];
763 a[j1] = yr;
764 a[j1 + 1] = yi;
765 a[k1] = xr;
766 a[k1 + 1] = xi;
767 j1 += m2;
768 k1 -= m2;
769 xr = a[j1];
770 xi = a[j1 + 1];
771 yr = a[k1];
772 yi = a[k1 + 1];
773 a[j1] = yr;
774 a[j1 + 1] = yi;
775 a[k1] = xr;
776 a[k1 + 1] = xi;
777 j1 += m2;
778 k1 += 2 * m2;
779 xr = a[j1];
780 xi = a[j1 + 1];
781 yr = a[k1];
782 yi = a[k1 + 1];
783 a[j1] = yr;
784 a[j1 + 1] = yi;
785 a[k1] = xr;
786 a[k1 + 1] = xi;
787 }
788 j1 = 2 * k + m2 + ip[k];
789 k1 = j1 + m2;
790 xr = a[j1];
791 xi = a[j1 + 1];
792 yr = a[k1];
793 yi = a[k1 + 1];
794 a[j1] = yr;
795 a[j1 + 1] = yi;
796 a[k1] = xr;
797 a[k1 + 1] = xi;
798 }
799 } else {
800 for (k = 1; k < m; k++) {
801 for (j = 0; j < k; j++) {
802 j1 = 2 * j + ip[k];
803 k1 = 2 * k + ip[j];
804 xr = a[j1];
805 xi = a[j1 + 1];
806 yr = a[k1];
807 yi = a[k1 + 1];
808 a[j1] = yr;
809 a[j1 + 1] = yi;
810 a[k1] = xr;
811 a[k1 + 1] = xi;
812 j1 += m2;
813 k1 += m2;
814 xr = a[j1];
815 xi = a[j1 + 1];
816 yr = a[k1];
817 yi = a[k1 + 1];
818 a[j1] = yr;
819 a[j1 + 1] = yi;
820 a[k1] = xr;
821 a[k1 + 1] = xi;
822 }
823 }
824 }
825 }
826
827
bitrv2conj(int n,int * ip0,double * a)828 static void bitrv2conj(int n, int *ip0, double *a)
829 {
830 int j, j1, k, k1, l, m, m2, ip[256];
831 double xr, xi, yr, yi;
832
833 (void)ip0;
834 ip[0] = 0;
835 l = n;
836 m = 1;
837 while ((m << 3) < l) {
838 l >>= 1;
839 for (j = 0; j < m; j++) {
840 ip[m + j] = ip[j] + l;
841 }
842 m <<= 1;
843 }
844 m2 = 2 * m;
845 if ((m << 3) == l) {
846 for (k = 0; k < m; k++) {
847 for (j = 0; j < k; j++) {
848 j1 = 2 * j + ip[k];
849 k1 = 2 * k + ip[j];
850 xr = a[j1];
851 xi = -a[j1 + 1];
852 yr = a[k1];
853 yi = -a[k1 + 1];
854 a[j1] = yr;
855 a[j1 + 1] = yi;
856 a[k1] = xr;
857 a[k1 + 1] = xi;
858 j1 += m2;
859 k1 += 2 * m2;
860 xr = a[j1];
861 xi = -a[j1 + 1];
862 yr = a[k1];
863 yi = -a[k1 + 1];
864 a[j1] = yr;
865 a[j1 + 1] = yi;
866 a[k1] = xr;
867 a[k1 + 1] = xi;
868 j1 += m2;
869 k1 -= m2;
870 xr = a[j1];
871 xi = -a[j1 + 1];
872 yr = a[k1];
873 yi = -a[k1 + 1];
874 a[j1] = yr;
875 a[j1 + 1] = yi;
876 a[k1] = xr;
877 a[k1 + 1] = xi;
878 j1 += m2;
879 k1 += 2 * m2;
880 xr = a[j1];
881 xi = -a[j1 + 1];
882 yr = a[k1];
883 yi = -a[k1 + 1];
884 a[j1] = yr;
885 a[j1 + 1] = yi;
886 a[k1] = xr;
887 a[k1 + 1] = xi;
888 }
889 k1 = 2 * k + ip[k];
890 a[k1 + 1] = -a[k1 + 1];
891 j1 = k1 + m2;
892 k1 = j1 + m2;
893 xr = a[j1];
894 xi = -a[j1 + 1];
895 yr = a[k1];
896 yi = -a[k1 + 1];
897 a[j1] = yr;
898 a[j1 + 1] = yi;
899 a[k1] = xr;
900 a[k1 + 1] = xi;
901 k1 += m2;
902 a[k1 + 1] = -a[k1 + 1];
903 }
904 } else {
905 a[1] = -a[1];
906 a[m2 + 1] = -a[m2 + 1];
907 for (k = 1; k < m; k++) {
908 for (j = 0; j < k; j++) {
909 j1 = 2 * j + ip[k];
910 k1 = 2 * k + ip[j];
911 xr = a[j1];
912 xi = -a[j1 + 1];
913 yr = a[k1];
914 yi = -a[k1 + 1];
915 a[j1] = yr;
916 a[j1 + 1] = yi;
917 a[k1] = xr;
918 a[k1 + 1] = xi;
919 j1 += m2;
920 k1 += m2;
921 xr = a[j1];
922 xi = -a[j1 + 1];
923 yr = a[k1];
924 yi = -a[k1 + 1];
925 a[j1] = yr;
926 a[j1 + 1] = yi;
927 a[k1] = xr;
928 a[k1 + 1] = xi;
929 }
930 k1 = 2 * k + ip[k];
931 a[k1 + 1] = -a[k1 + 1];
932 a[k1 + m2 + 1] = -a[k1 + m2 + 1];
933 }
934 }
935 }
936
937
cftfsub(int n,double * a,double const * w)938 static void cftfsub(int n, double *a, double const *w)
939 {
940 int j, j1, j2, j3, l;
941 double x0r, x0i, x1r, x1i, x2r, x2i, x3r, x3i;
942
943 l = 2;
944 if (n > 8) {
945 cft1st(n, a, w);
946 l = 8;
947 while ((l << 2) < n) {
948 cftmdl(n, l, a, w);
949 l <<= 2;
950 }
951 }
952 if ((l << 2) == n) {
953 for (j = 0; j < l; j += 2) {
954 j1 = j + l;
955 j2 = j1 + l;
956 j3 = j2 + l;
957 x0r = a[j] + a[j1];
958 x0i = a[j + 1] + a[j1 + 1];
959 x1r = a[j] - a[j1];
960 x1i = a[j + 1] - a[j1 + 1];
961 x2r = a[j2] + a[j3];
962 x2i = a[j2 + 1] + a[j3 + 1];
963 x3r = a[j2] - a[j3];
964 x3i = a[j2 + 1] - a[j3 + 1];
965 a[j] = x0r + x2r;
966 a[j + 1] = x0i + x2i;
967 a[j2] = x0r - x2r;
968 a[j2 + 1] = x0i - x2i;
969 a[j1] = x1r - x3i;
970 a[j1 + 1] = x1i + x3r;
971 a[j3] = x1r + x3i;
972 a[j3 + 1] = x1i - x3r;
973 }
974 } else {
975 for (j = 0; j < l; j += 2) {
976 j1 = j + l;
977 x0r = a[j] - a[j1];
978 x0i = a[j + 1] - a[j1 + 1];
979 a[j] += a[j1];
980 a[j + 1] += a[j1 + 1];
981 a[j1] = x0r;
982 a[j1 + 1] = x0i;
983 }
984 }
985 }
986
987
cftbsub(int n,double * a,double const * w)988 static void cftbsub(int n, double *a, double const *w)
989 {
990 int j, j1, j2, j3, l;
991 double x0r, x0i, x1r, x1i, x2r, x2i, x3r, x3i;
992
993 l = 2;
994 if (n > 8) {
995 cft1st(n, a, w);
996 l = 8;
997 while ((l << 2) < n) {
998 cftmdl(n, l, a, w);
999 l <<= 2;
1000 }
1001 }
1002 if ((l << 2) == n) {
1003 for (j = 0; j < l; j += 2) {
1004 j1 = j + l;
1005 j2 = j1 + l;
1006 j3 = j2 + l;
1007 x0r = a[j] + a[j1];
1008 x0i = -a[j + 1] - a[j1 + 1];
1009 x1r = a[j] - a[j1];
1010 x1i = -a[j + 1] + a[j1 + 1];
1011 x2r = a[j2] + a[j3];
1012 x2i = a[j2 + 1] + a[j3 + 1];
1013 x3r = a[j2] - a[j3];
1014 x3i = a[j2 + 1] - a[j3 + 1];
1015 a[j] = x0r + x2r;
1016 a[j + 1] = x0i - x2i;
1017 a[j2] = x0r - x2r;
1018 a[j2 + 1] = x0i + x2i;
1019 a[j1] = x1r - x3i;
1020 a[j1 + 1] = x1i - x3r;
1021 a[j3] = x1r + x3i;
1022 a[j3 + 1] = x1i + x3r;
1023 }
1024 } else {
1025 for (j = 0; j < l; j += 2) {
1026 j1 = j + l;
1027 x0r = a[j] - a[j1];
1028 x0i = -a[j + 1] + a[j1 + 1];
1029 a[j] += a[j1];
1030 a[j + 1] = -a[j + 1] - a[j1 + 1];
1031 a[j1] = x0r;
1032 a[j1 + 1] = x0i;
1033 }
1034 }
1035 }
1036
1037
cft1st(int n,double * a,double const * w)1038 static void cft1st(int n, double *a, double const *w)
1039 {
1040 int j, k1, k2;
1041 double wk1r, wk1i, wk2r, wk2i, wk3r, wk3i;
1042 double x0r, x0i, x1r, x1i, x2r, x2i, x3r, x3i;
1043
1044 x0r = a[0] + a[2];
1045 x0i = a[1] + a[3];
1046 x1r = a[0] - a[2];
1047 x1i = a[1] - a[3];
1048 x2r = a[4] + a[6];
1049 x2i = a[5] + a[7];
1050 x3r = a[4] - a[6];
1051 x3i = a[5] - a[7];
1052 a[0] = x0r + x2r;
1053 a[1] = x0i + x2i;
1054 a[4] = x0r - x2r;
1055 a[5] = x0i - x2i;
1056 a[2] = x1r - x3i;
1057 a[3] = x1i + x3r;
1058 a[6] = x1r + x3i;
1059 a[7] = x1i - x3r;
1060 wk1r = w[2];
1061 x0r = a[8] + a[10];
1062 x0i = a[9] + a[11];
1063 x1r = a[8] - a[10];
1064 x1i = a[9] - a[11];
1065 x2r = a[12] + a[14];
1066 x2i = a[13] + a[15];
1067 x3r = a[12] - a[14];
1068 x3i = a[13] - a[15];
1069 a[8] = x0r + x2r;
1070 a[9] = x0i + x2i;
1071 a[12] = x2i - x0i;
1072 a[13] = x0r - x2r;
1073 x0r = x1r - x3i;
1074 x0i = x1i + x3r;
1075 a[10] = wk1r * (x0r - x0i);
1076 a[11] = wk1r * (x0r + x0i);
1077 x0r = x3i + x1r;
1078 x0i = x3r - x1i;
1079 a[14] = wk1r * (x0i - x0r);
1080 a[15] = wk1r * (x0i + x0r);
1081 k1 = 0;
1082 for (j = 16; j < n; j += 16) {
1083 k1 += 2;
1084 k2 = 2 * k1;
1085 wk2r = w[k1];
1086 wk2i = w[k1 + 1];
1087 wk1r = w[k2];
1088 wk1i = w[k2 + 1];
1089 wk3r = wk1r - 2 * wk2i * wk1i;
1090 wk3i = 2 * wk2i * wk1r - wk1i;
1091 x0r = a[j] + a[j + 2];
1092 x0i = a[j + 1] + a[j + 3];
1093 x1r = a[j] - a[j + 2];
1094 x1i = a[j + 1] - a[j + 3];
1095 x2r = a[j + 4] + a[j + 6];
1096 x2i = a[j + 5] + a[j + 7];
1097 x3r = a[j + 4] - a[j + 6];
1098 x3i = a[j + 5] - a[j + 7];
1099 a[j] = x0r + x2r;
1100 a[j + 1] = x0i + x2i;
1101 x0r -= x2r;
1102 x0i -= x2i;
1103 a[j + 4] = wk2r * x0r - wk2i * x0i;
1104 a[j + 5] = wk2r * x0i + wk2i * x0r;
1105 x0r = x1r - x3i;
1106 x0i = x1i + x3r;
1107 a[j + 2] = wk1r * x0r - wk1i * x0i;
1108 a[j + 3] = wk1r * x0i + wk1i * x0r;
1109 x0r = x1r + x3i;
1110 x0i = x1i - x3r;
1111 a[j + 6] = wk3r * x0r - wk3i * x0i;
1112 a[j + 7] = wk3r * x0i + wk3i * x0r;
1113 wk1r = w[k2 + 2];
1114 wk1i = w[k2 + 3];
1115 wk3r = wk1r - 2 * wk2r * wk1i;
1116 wk3i = 2 * wk2r * wk1r - wk1i;
1117 x0r = a[j + 8] + a[j + 10];
1118 x0i = a[j + 9] + a[j + 11];
1119 x1r = a[j + 8] - a[j + 10];
1120 x1i = a[j + 9] - a[j + 11];
1121 x2r = a[j + 12] + a[j + 14];
1122 x2i = a[j + 13] + a[j + 15];
1123 x3r = a[j + 12] - a[j + 14];
1124 x3i = a[j + 13] - a[j + 15];
1125 a[j + 8] = x0r + x2r;
1126 a[j + 9] = x0i + x2i;
1127 x0r -= x2r;
1128 x0i -= x2i;
1129 a[j + 12] = -wk2i * x0r - wk2r * x0i;
1130 a[j + 13] = -wk2i * x0i + wk2r * x0r;
1131 x0r = x1r - x3i;
1132 x0i = x1i + x3r;
1133 a[j + 10] = wk1r * x0r - wk1i * x0i;
1134 a[j + 11] = wk1r * x0i + wk1i * x0r;
1135 x0r = x1r + x3i;
1136 x0i = x1i - x3r;
1137 a[j + 14] = wk3r * x0r - wk3i * x0i;
1138 a[j + 15] = wk3r * x0i + wk3i * x0r;
1139 }
1140 }
1141
1142
cftmdl(int n,int l,double * a,double const * w)1143 static void cftmdl(int n, int l, double *a, double const *w)
1144 {
1145 int j, j1, j2, j3, k, k1, k2, m, m2;
1146 double wk1r, wk1i, wk2r, wk2i, wk3r, wk3i;
1147 double x0r, x0i, x1r, x1i, x2r, x2i, x3r, x3i;
1148
1149 m = l << 2;
1150 for (j = 0; j < l; j += 2) {
1151 j1 = j + l;
1152 j2 = j1 + l;
1153 j3 = j2 + l;
1154 x0r = a[j] + a[j1];
1155 x0i = a[j + 1] + a[j1 + 1];
1156 x1r = a[j] - a[j1];
1157 x1i = a[j + 1] - a[j1 + 1];
1158 x2r = a[j2] + a[j3];
1159 x2i = a[j2 + 1] + a[j3 + 1];
1160 x3r = a[j2] - a[j3];
1161 x3i = a[j2 + 1] - a[j3 + 1];
1162 a[j] = x0r + x2r;
1163 a[j + 1] = x0i + x2i;
1164 a[j2] = x0r - x2r;
1165 a[j2 + 1] = x0i - x2i;
1166 a[j1] = x1r - x3i;
1167 a[j1 + 1] = x1i + x3r;
1168 a[j3] = x1r + x3i;
1169 a[j3 + 1] = x1i - x3r;
1170 }
1171 wk1r = w[2];
1172 for (j = m; j < l + m; j += 2) {
1173 j1 = j + l;
1174 j2 = j1 + l;
1175 j3 = j2 + l;
1176 x0r = a[j] + a[j1];
1177 x0i = a[j + 1] + a[j1 + 1];
1178 x1r = a[j] - a[j1];
1179 x1i = a[j + 1] - a[j1 + 1];
1180 x2r = a[j2] + a[j3];
1181 x2i = a[j2 + 1] + a[j3 + 1];
1182 x3r = a[j2] - a[j3];
1183 x3i = a[j2 + 1] - a[j3 + 1];
1184 a[j] = x0r + x2r;
1185 a[j + 1] = x0i + x2i;
1186 a[j2] = x2i - x0i;
1187 a[j2 + 1] = x0r - x2r;
1188 x0r = x1r - x3i;
1189 x0i = x1i + x3r;
1190 a[j1] = wk1r * (x0r - x0i);
1191 a[j1 + 1] = wk1r * (x0r + x0i);
1192 x0r = x3i + x1r;
1193 x0i = x3r - x1i;
1194 a[j3] = wk1r * (x0i - x0r);
1195 a[j3 + 1] = wk1r * (x0i + x0r);
1196 }
1197 k1 = 0;
1198 m2 = 2 * m;
1199 for (k = m2; k < n; k += m2) {
1200 k1 += 2;
1201 k2 = 2 * k1;
1202 wk2r = w[k1];
1203 wk2i = w[k1 + 1];
1204 wk1r = w[k2];
1205 wk1i = w[k2 + 1];
1206 wk3r = wk1r - 2 * wk2i * wk1i;
1207 wk3i = 2 * wk2i * wk1r - wk1i;
1208 for (j = k; j < l + k; j += 2) {
1209 j1 = j + l;
1210 j2 = j1 + l;
1211 j3 = j2 + l;
1212 x0r = a[j] + a[j1];
1213 x0i = a[j + 1] + a[j1 + 1];
1214 x1r = a[j] - a[j1];
1215 x1i = a[j + 1] - a[j1 + 1];
1216 x2r = a[j2] + a[j3];
1217 x2i = a[j2 + 1] + a[j3 + 1];
1218 x3r = a[j2] - a[j3];
1219 x3i = a[j2 + 1] - a[j3 + 1];
1220 a[j] = x0r + x2r;
1221 a[j + 1] = x0i + x2i;
1222 x0r -= x2r;
1223 x0i -= x2i;
1224 a[j2] = wk2r * x0r - wk2i * x0i;
1225 a[j2 + 1] = wk2r * x0i + wk2i * x0r;
1226 x0r = x1r - x3i;
1227 x0i = x1i + x3r;
1228 a[j1] = wk1r * x0r - wk1i * x0i;
1229 a[j1 + 1] = wk1r * x0i + wk1i * x0r;
1230 x0r = x1r + x3i;
1231 x0i = x1i - x3r;
1232 a[j3] = wk3r * x0r - wk3i * x0i;
1233 a[j3 + 1] = wk3r * x0i + wk3i * x0r;
1234 }
1235 wk1r = w[k2 + 2];
1236 wk1i = w[k2 + 3];
1237 wk3r = wk1r - 2 * wk2r * wk1i;
1238 wk3i = 2 * wk2r * wk1r - wk1i;
1239 for (j = k + m; j < l + (k + m); j += 2) {
1240 j1 = j + l;
1241 j2 = j1 + l;
1242 j3 = j2 + l;
1243 x0r = a[j] + a[j1];
1244 x0i = a[j + 1] + a[j1 + 1];
1245 x1r = a[j] - a[j1];
1246 x1i = a[j + 1] - a[j1 + 1];
1247 x2r = a[j2] + a[j3];
1248 x2i = a[j2 + 1] + a[j3 + 1];
1249 x3r = a[j2] - a[j3];
1250 x3i = a[j2 + 1] - a[j3 + 1];
1251 a[j] = x0r + x2r;
1252 a[j + 1] = x0i + x2i;
1253 x0r -= x2r;
1254 x0i -= x2i;
1255 a[j2] = -wk2i * x0r - wk2r * x0i;
1256 a[j2 + 1] = -wk2i * x0i + wk2r * x0r;
1257 x0r = x1r - x3i;
1258 x0i = x1i + x3r;
1259 a[j1] = wk1r * x0r - wk1i * x0i;
1260 a[j1 + 1] = wk1r * x0i + wk1i * x0r;
1261 x0r = x1r + x3i;
1262 x0i = x1i - x3r;
1263 a[j3] = wk3r * x0r - wk3i * x0i;
1264 a[j3 + 1] = wk3r * x0i + wk3i * x0r;
1265 }
1266 }
1267 }
1268
1269
rftfsub(int n,double * a,int nc,double const * c)1270 static void rftfsub(int n, double *a, int nc, double const *c)
1271 {
1272 int j, k, kk, ks, m;
1273 double wkr, wki, xr, xi, yr, yi;
1274
1275 m = n >> 1;
1276 ks = 2 * nc / m;
1277 kk = 0;
1278 for (j = 2; j < m; j += 2) {
1279 k = n - j;
1280 kk += ks;
1281 wkr = 0.5 - c[nc - kk];
1282 wki = c[kk];
1283 xr = a[j] - a[k];
1284 xi = a[j + 1] + a[k + 1];
1285 yr = wkr * xr - wki * xi;
1286 yi = wkr * xi + wki * xr;
1287 a[j] -= yr;
1288 a[j + 1] -= yi;
1289 a[k] += yr;
1290 a[k + 1] -= yi;
1291 }
1292 }
1293
1294
rftbsub(int n,double * a,int nc,double const * c)1295 static void rftbsub(int n, double *a, int nc, double const *c)
1296 {
1297 int j, k, kk, ks, m;
1298 double wkr, wki, xr, xi, yr, yi;
1299
1300 a[1] = -a[1];
1301 m = n >> 1;
1302 ks = 2 * nc / m;
1303 kk = 0;
1304 for (j = 2; j < m; j += 2) {
1305 k = n - j;
1306 kk += ks;
1307 wkr = 0.5 - c[nc - kk];
1308 wki = c[kk];
1309 xr = a[j] - a[k];
1310 xi = a[j + 1] + a[k + 1];
1311 yr = wkr * xr + wki * xi;
1312 yi = wkr * xi - wki * xr;
1313 a[j] -= yr;
1314 a[j + 1] = yi - a[j + 1];
1315 a[k] += yr;
1316 a[k + 1] = yi - a[k + 1];
1317 }
1318 a[m + 1] = -a[m + 1];
1319 }
1320
1321
dctsub(int n,double * a,int nc,double const * c)1322 static void dctsub(int n, double *a, int nc, double const *c)
1323 {
1324 int j, k, kk, ks, m;
1325 double wkr, wki, xr;
1326
1327 m = n >> 1;
1328 ks = nc / n;
1329 kk = 0;
1330 for (j = 1; j < m; j++) {
1331 k = n - j;
1332 kk += ks;
1333 wkr = c[kk] - c[nc - kk];
1334 wki = c[kk] + c[nc - kk];
1335 xr = wki * a[j] - wkr * a[k];
1336 a[j] = wkr * a[j] + wki * a[k];
1337 a[k] = xr;
1338 }
1339 a[m] *= c[0];
1340 }
1341
1342
dstsub(int n,double * a,int nc,double const * c)1343 static void dstsub(int n, double *a, int nc, double const *c)
1344 {
1345 int j, k, kk, ks, m;
1346 double wkr, wki, xr;
1347
1348 m = n >> 1;
1349 ks = nc / n;
1350 kk = 0;
1351 for (j = 1; j < m; j++) {
1352 k = n - j;
1353 kk += ks;
1354 wkr = c[kk] - c[nc - kk];
1355 wki = c[kk] + c[nc - kk];
1356 xr = wki * a[k] - wkr * a[j];
1357 a[k] = wkr * a[k] + wki * a[j];
1358 a[j] = xr;
1359 }
1360 a[m] *= c[0];
1361 }
1362
1363