1
2 /*
3 * M_APM - mapm_fft.c
4 *
5 * This FFT (Fast Fourier Transform) is from Takuya OOURA
6 *
7 * Copyright(C) 1996-1999 Takuya OOURA
8 * email: ooura@mmm.t.u-tokyo.ac.jp
9 *
10 * See full FFT documentation below ... (MCR)
11 *
12 * This software is provided "as is" without express or implied warranty.
13 */
14
15 /*
16 * $Id: mapm_fft.c,v 1.15 2007/12/03 01:37:42 mike Exp $
17 *
18 * This file contains the FFT based FAST MULTIPLICATION function
19 * as well as its support functions.
20 *
21 * $Log: mapm_fft.c,v $
22 * Revision 1.15 2007/12/03 01:37:42 mike
23 * no changes
24 *
25 * Revision 1.14 2003/07/28 19:39:01 mike
26 * change 16 bit constant
27 *
28 * Revision 1.13 2003/07/21 20:11:55 mike
29 * Modify error messages to be in a consistent format.
30 *
31 * Revision 1.12 2003/05/01 21:55:36 mike
32 * remove math.h
33 *
34 * Revision 1.11 2003/03/31 22:10:09 mike
35 * call generic error handling function
36 *
37 * Revision 1.10 2002/11/03 22:11:48 mike
38 * Updated function parameters to use the modern style
39 *
40 * Revision 1.9 2001/07/16 19:16:15 mike
41 * add function M_free_all_fft
42 *
43 * Revision 1.8 2000/08/01 22:23:24 mike
44 * use sizeof(int) from function call to stop
45 * some compilers from complaining.
46 *
47 * Revision 1.7 2000/07/30 22:39:21 mike
48 * lower 16 bit malloc size
49 *
50 * Revision 1.6 2000/07/10 22:54:26 mike
51 * malloc the local data arrays
52 *
53 * Revision 1.5 2000/07/10 00:09:02 mike
54 * use local static arrays for smaller numbers
55 *
56 * Revision 1.4 2000/07/08 18:24:23 mike
57 * minor optimization tweak
58 *
59 * Revision 1.3 2000/07/08 17:52:49 mike
60 * do the FFT in base 10000 instead of MAPM numbers base 100
61 * this runs faster and uses 1/2 the RAM
62 *
63 * Revision 1.2 2000/07/06 21:04:34 mike
64 * added more comments
65 *
66 * Revision 1.1 2000/07/06 20:42:05 mike
67 * Initial revision
68 */
69
70 #include "m_apm_lc.h"
71
72 #ifndef MM_PI_2
73 #define MM_PI_2 1.570796326794896619231321691639751442098584699687
74 #endif
75
76 #ifndef WR5000 /* cos(MM_PI_2*0.5000) */
77 #define WR5000 0.707106781186547524400844362104849039284835937688
78 #endif
79
80 #ifndef RDFT_LOOP_DIV /* control of the RDFT's speed & tolerance */
81 #define RDFT_LOOP_DIV 64
82 #endif
83
84 extern void M_fast_mul_fft(UCHAR *, UCHAR *, UCHAR *, int);
85
86 extern void M_rdft(int, int, double *);
87 extern void M_bitrv2(int, double *);
88 extern void M_cftfsub(int, double *);
89 extern void M_cftbsub(int, double *);
90 extern void M_rftfsub(int, double *);
91 extern void M_rftbsub(int, double *);
92 extern void M_cft1st(int, double *);
93 extern void M_cftmdl(int, int, double *);
94
95 static double *M_aa_array, *M_bb_array;
96 static int M_size = -1;
97
98 static char *M_fft_error_msg = "\'M_fast_mul_fft\', Out of memory";
99
100 /****************************************************************************/
M_free_all_fft()101 void M_free_all_fft()
102 {
103 if (M_size > 0)
104 {
105 MAPM_FREE(M_aa_array);
106 MAPM_FREE(M_bb_array);
107 M_size = -1;
108 }
109 }
110 /****************************************************************************/
111 /*
112 * multiply 'uu' by 'vv' with nbytes each
113 * yielding a 2*nbytes result in 'ww'.
114 * each byte contains a base 100 'digit',
115 * i.e.: range from 0-99.
116 *
117 * MSB LSB
118 *
119 * uu,vv [0] [1] [2] ... [N-1]
120 * ww [0] [1] [2] ... [2N-1]
121 */
122
M_fast_mul_fft(UCHAR * ww,UCHAR * uu,UCHAR * vv,int nbytes)123 void M_fast_mul_fft(UCHAR *ww, UCHAR *uu, UCHAR *vv, int nbytes)
124 {
125 int mflag, i, j, nn2, nn;
126 double carry, nnr, dtemp, *a, *b;
127 UCHAR *w0;
128 unsigned long ul;
129
130 if (M_size < 0) /* if first time in, setup working arrays */
131 {
132 if (M_get_sizeof_int() == 2) /* if still using 16 bit compilers */
133 M_size = 516;
134 else
135 M_size = 8200;
136
137 M_aa_array = (double *)MAPM_MALLOC(M_size * sizeof(double));
138 M_bb_array = (double *)MAPM_MALLOC(M_size * sizeof(double));
139
140 if ((M_aa_array == NULL) || (M_bb_array == NULL))
141 {
142 /* fatal, this does not return */
143
144 M_apm_log_error_msg(M_APM_FATAL, M_fft_error_msg);
145 }
146 }
147
148 nn = nbytes;
149 nn2 = nbytes >> 1;
150
151 if (nn > M_size)
152 {
153 mflag = TRUE;
154
155 a = (double *)MAPM_MALLOC((nn + 8) * sizeof(double));
156 b = (double *)MAPM_MALLOC((nn + 8) * sizeof(double));
157
158 if ((a == NULL) || (b == NULL))
159 {
160 /* fatal, this does not return */
161
162 M_apm_log_error_msg(M_APM_FATAL, M_fft_error_msg);
163 }
164 }
165 else
166 {
167 mflag = FALSE;
168
169 a = M_aa_array;
170 b = M_bb_array;
171 }
172
173 /*
174 * convert normal base 100 MAPM numbers to base 10000
175 * for the FFT operation.
176 */
177
178 i = 0;
179 for (j=0; j < nn2; j++)
180 {
181 a[j] = (double)((int)uu[i] * 100 + uu[i+1]);
182 b[j] = (double)((int)vv[i] * 100 + vv[i+1]);
183 i += 2;
184 }
185
186 /* zero fill the second half of the arrays */
187
188 for (j=nn2; j < nn; j++)
189 {
190 a[j] = 0.0;
191 b[j] = 0.0;
192 }
193
194 /* perform the forward Fourier transforms for both numbers */
195
196 M_rdft(nn, 1, a);
197 M_rdft(nn, 1, b);
198
199 /* perform the convolution ... */
200
201 b[0] *= a[0];
202 b[1] *= a[1];
203
204 for (j=3; j <= nn; j += 2)
205 {
206 dtemp = b[j-1];
207 b[j-1] = dtemp * a[j-1] - b[j] * a[j];
208 b[j] = dtemp * a[j] + b[j] * a[j-1];
209 }
210
211 /* perform the inverse transform on the result */
212
213 M_rdft(nn, -1, b);
214
215 /* perform a final pass to release all the carries */
216 /* we are still in base 10000 at this point */
217
218 carry = 0.0;
219 j = nn;
220 nnr = 2.0 / (double)nn;
221
222 while (1)
223 {
224 dtemp = b[--j] * nnr + carry + 0.5;
225 ul = (unsigned long)(dtemp * 1.0E-4);
226 carry = (double)ul;
227 b[j] = dtemp - carry * 10000.0;
228
229 if (j == 0)
230 break;
231 }
232
233 /* copy result to our destination after converting back to base 100 */
234
235 w0 = ww;
236 M_get_div_rem((int)ul, w0, (w0 + 1));
237
238 for (j=0; j <= (nn - 2); j++)
239 {
240 w0 += 2;
241 M_get_div_rem((int)b[j], w0, (w0 + 1));
242 }
243
244 if (mflag)
245 {
246 MAPM_FREE(b);
247 MAPM_FREE(a);
248 }
249 }
250 /****************************************************************************/
251
252 /*
253 * The following info is from Takuya OOURA's documentation :
254 *
255 * NOTE : MAPM only uses the 'RDFT' function (as well as the
256 * functions RDFT calls). All the code from here down
257 * in this file is from Takuya OOURA. The only change I
258 * made was to add 'M_' in front of all the functions
259 * I used. This was to guard against any possible
260 * name collisions in the future.
261 *
262 * MCR 06 July 2000
263 *
264 *
265 * General Purpose FFT (Fast Fourier/Cosine/Sine Transform) Package
266 *
267 * Description:
268 * A package to calculate Discrete Fourier/Cosine/Sine Transforms of
269 * 1-dimensional sequences of length 2^N.
270 *
271 * fft4g_h.c : FFT Package in C - Simple Version I (radix 4,2)
272 *
273 * rdft: Real Discrete Fourier Transform
274 *
275 * Method:
276 * -------- rdft --------
277 * A method with a following butterfly operation appended to "cdft".
278 * In forward transform :
279 * A[k] = sum_j=0^n-1 a[j]*W(n)^(j*k), 0<=k<=n/2,
280 * W(n) = exp(2*pi*i/n),
281 * this routine makes an array x[] :
282 * x[j] = a[2*j] + i*a[2*j+1], 0<=j<n/2
283 * and calls "cdft" of length n/2 :
284 * X[k] = sum_j=0^n/2-1 x[j] * W(n/2)^(j*k), 0<=k<n.
285 * The result A[k] are :
286 * A[k] = X[k] - (1+i*W(n)^k)/2 * (X[k]-conjg(X[n/2-k])),
287 * A[n/2-k] = X[n/2-k] +
288 * conjg((1+i*W(n)^k)/2 * (X[k]-conjg(X[n/2-k]))),
289 * 0<=k<=n/2
290 * (notes: conjg() is a complex conjugate, X[n/2]=X[0]).
291 * ----------------------
292 *
293 * Reference:
294 * * Masatake MORI, Makoto NATORI, Tatuo TORII: Suchikeisan,
295 * Iwanamikouzajyouhoukagaku18, Iwanami, 1982 (Japanese)
296 * * Henri J. Nussbaumer: Fast Fourier Transform and Convolution
297 * Algorithms, Springer Verlag, 1982
298 * * C. S. Burrus, Notes on the FFT (with large FFT paper list)
299 * http://www-dsp.rice.edu/research/fft/fftnote.asc
300 *
301 * Copyright:
302 * Copyright(C) 1996-1999 Takuya OOURA
303 * email: ooura@mmm.t.u-tokyo.ac.jp
304 * download: http://momonga.t.u-tokyo.ac.jp/~ooura/fft.html
305 * You may use, copy, modify this code for any purpose and
306 * without fee. You may distribute this ORIGINAL package.
307 */
308
309 /*
310
311 functions
312 rdft: Real Discrete Fourier Transform
313
314 function prototypes
315 void rdft(int, int, double *);
316
317 -------- Real DFT / Inverse of Real DFT --------
318 [definition]
319 <case1> RDFT
320 R[k] = sum_j=0^n-1 a[j]*cos(2*pi*j*k/n), 0<=k<=n/2
321 I[k] = sum_j=0^n-1 a[j]*sin(2*pi*j*k/n), 0<k<n/2
322 <case2> IRDFT (excluding scale)
323 a[k] = (R[0] + R[n/2]*cos(pi*k))/2 +
324 sum_j=1^n/2-1 R[j]*cos(2*pi*j*k/n) +
325 sum_j=1^n/2-1 I[j]*sin(2*pi*j*k/n), 0<=k<n
326 [usage]
327 <case1>
328 rdft(n, 1, a);
329 <case2>
330 rdft(n, -1, a);
331 [parameters]
332 n :data length (int)
333 n >= 2, n = power of 2
334 a[0...n-1] :input/output data (double *)
335 <case1>
336 output data
337 a[2*k] = R[k], 0<=k<n/2
338 a[2*k+1] = I[k], 0<k<n/2
339 a[1] = R[n/2]
340 <case2>
341 input data
342 a[2*j] = R[j], 0<=j<n/2
343 a[2*j+1] = I[j], 0<j<n/2
344 a[1] = R[n/2]
345 [remark]
346 Inverse of
347 rdft(n, 1, a);
348 is
349 rdft(n, -1, a);
350 for (j = 0; j <= n - 1; j++) {
351 a[j] *= 2.0 / n;
352 }
353 */
354
355
M_rdft(int n,int isgn,double * a)356 void M_rdft(int n, int isgn, double *a)
357 {
358 double xi;
359
360 if (isgn >= 0) {
361 if (n > 4) {
362 M_bitrv2(n, a);
363 M_cftfsub(n, a);
364 M_rftfsub(n, a);
365 } else if (n == 4) {
366 M_cftfsub(n, a);
367 }
368 xi = a[0] - a[1];
369 a[0] += a[1];
370 a[1] = xi;
371 } else {
372 a[1] = 0.5 * (a[0] - a[1]);
373 a[0] -= a[1];
374 if (n > 4) {
375 M_rftbsub(n, a);
376 M_bitrv2(n, a);
377 M_cftbsub(n, a);
378 } else if (n == 4) {
379 M_cftfsub(n, a);
380 }
381 }
382 }
383
384
385
M_bitrv2(int n,double * a)386 void M_bitrv2(int n, double *a)
387 {
388 int j0, k0, j1, k1, l, m, i, j, k;
389 double xr, xi, yr, yi;
390
391 l = n >> 2;
392 m = 2;
393 while (m < l) {
394 l >>= 1;
395 m <<= 1;
396 }
397 if (m == l) {
398 j0 = 0;
399 for (k0 = 0; k0 < m; k0 += 2) {
400 k = k0;
401 for (j = j0; j < j0 + k0; j += 2) {
402 xr = a[j];
403 xi = a[j + 1];
404 yr = a[k];
405 yi = a[k + 1];
406 a[j] = yr;
407 a[j + 1] = yi;
408 a[k] = xr;
409 a[k + 1] = xi;
410 j1 = j + m;
411 k1 = k + 2 * m;
412 xr = a[j1];
413 xi = a[j1 + 1];
414 yr = a[k1];
415 yi = a[k1 + 1];
416 a[j1] = yr;
417 a[j1 + 1] = yi;
418 a[k1] = xr;
419 a[k1 + 1] = xi;
420 j1 += m;
421 k1 -= m;
422 xr = a[j1];
423 xi = a[j1 + 1];
424 yr = a[k1];
425 yi = a[k1 + 1];
426 a[j1] = yr;
427 a[j1 + 1] = yi;
428 a[k1] = xr;
429 a[k1 + 1] = xi;
430 j1 += m;
431 k1 += 2 * m;
432 xr = a[j1];
433 xi = a[j1 + 1];
434 yr = a[k1];
435 yi = a[k1 + 1];
436 a[j1] = yr;
437 a[j1 + 1] = yi;
438 a[k1] = xr;
439 a[k1 + 1] = xi;
440 for (i = n >> 1; i > (k ^= i); i >>= 1);
441 }
442 j1 = j0 + k0 + m;
443 k1 = j1 + m;
444 xr = a[j1];
445 xi = a[j1 + 1];
446 yr = a[k1];
447 yi = a[k1 + 1];
448 a[j1] = yr;
449 a[j1 + 1] = yi;
450 a[k1] = xr;
451 a[k1 + 1] = xi;
452 for (i = n >> 1; i > (j0 ^= i); i >>= 1);
453 }
454 } else {
455 j0 = 0;
456 for (k0 = 2; k0 < m; k0 += 2) {
457 for (i = n >> 1; i > (j0 ^= i); i >>= 1);
458 k = k0;
459 for (j = j0; j < j0 + k0; j += 2) {
460 xr = a[j];
461 xi = a[j + 1];
462 yr = a[k];
463 yi = a[k + 1];
464 a[j] = yr;
465 a[j + 1] = yi;
466 a[k] = xr;
467 a[k + 1] = xi;
468 j1 = j + m;
469 k1 = k + m;
470 xr = a[j1];
471 xi = a[j1 + 1];
472 yr = a[k1];
473 yi = a[k1 + 1];
474 a[j1] = yr;
475 a[j1 + 1] = yi;
476 a[k1] = xr;
477 a[k1 + 1] = xi;
478 for (i = n >> 1; i > (k ^= i); i >>= 1);
479 }
480 }
481 }
482 }
483
484
485
M_cftfsub(int n,double * a)486 void M_cftfsub(int n, double *a)
487 {
488 int j, j1, j2, j3, l;
489 double x0r, x0i, x1r, x1i, x2r, x2i, x3r, x3i;
490
491 l = 2;
492 if (n > 8) {
493 M_cft1st(n, a);
494 l = 8;
495 while ((l << 2) < n) {
496 M_cftmdl(n, l, a);
497 l <<= 2;
498 }
499 }
500 if ((l << 2) == n) {
501 for (j = 0; j < l; j += 2) {
502 j1 = j + l;
503 j2 = j1 + l;
504 j3 = j2 + l;
505 x0r = a[j] + a[j1];
506 x0i = a[j + 1] + a[j1 + 1];
507 x1r = a[j] - a[j1];
508 x1i = a[j + 1] - a[j1 + 1];
509 x2r = a[j2] + a[j3];
510 x2i = a[j2 + 1] + a[j3 + 1];
511 x3r = a[j2] - a[j3];
512 x3i = a[j2 + 1] - a[j3 + 1];
513 a[j] = x0r + x2r;
514 a[j + 1] = x0i + x2i;
515 a[j2] = x0r - x2r;
516 a[j2 + 1] = x0i - x2i;
517 a[j1] = x1r - x3i;
518 a[j1 + 1] = x1i + x3r;
519 a[j3] = x1r + x3i;
520 a[j3 + 1] = x1i - x3r;
521 }
522 } else {
523 for (j = 0; j < l; j += 2) {
524 j1 = j + l;
525 x0r = a[j] - a[j1];
526 x0i = a[j + 1] - a[j1 + 1];
527 a[j] += a[j1];
528 a[j + 1] += a[j1 + 1];
529 a[j1] = x0r;
530 a[j1 + 1] = x0i;
531 }
532 }
533 }
534
535
536
M_cftbsub(int n,double * a)537 void M_cftbsub(int n, double *a)
538 {
539 int j, j1, j2, j3, l;
540 double x0r, x0i, x1r, x1i, x2r, x2i, x3r, x3i;
541
542 l = 2;
543 if (n > 8) {
544 M_cft1st(n, a);
545 l = 8;
546 while ((l << 2) < n) {
547 M_cftmdl(n, l, a);
548 l <<= 2;
549 }
550 }
551 if ((l << 2) == n) {
552 for (j = 0; j < l; j += 2) {
553 j1 = j + l;
554 j2 = j1 + l;
555 j3 = j2 + l;
556 x0r = a[j] + a[j1];
557 x0i = -a[j + 1] - a[j1 + 1];
558 x1r = a[j] - a[j1];
559 x1i = -a[j + 1] + a[j1 + 1];
560 x2r = a[j2] + a[j3];
561 x2i = a[j2 + 1] + a[j3 + 1];
562 x3r = a[j2] - a[j3];
563 x3i = a[j2 + 1] - a[j3 + 1];
564 a[j] = x0r + x2r;
565 a[j + 1] = x0i - x2i;
566 a[j2] = x0r - x2r;
567 a[j2 + 1] = x0i + x2i;
568 a[j1] = x1r - x3i;
569 a[j1 + 1] = x1i - x3r;
570 a[j3] = x1r + x3i;
571 a[j3 + 1] = x1i + x3r;
572 }
573 } else {
574 for (j = 0; j < l; j += 2) {
575 j1 = j + l;
576 x0r = a[j] - a[j1];
577 x0i = -a[j + 1] + a[j1 + 1];
578 a[j] += a[j1];
579 a[j + 1] = -a[j + 1] - a[j1 + 1];
580 a[j1] = x0r;
581 a[j1 + 1] = x0i;
582 }
583 }
584 }
585
586
587
M_cft1st(int n,double * a)588 void M_cft1st(int n, double *a)
589 {
590 int j, kj, kr;
591 double ew, wn4r, wk1r, wk1i, wk2r, wk2i, wk3r, wk3i;
592 double x0r, x0i, x1r, x1i, x2r, x2i, x3r, x3i;
593
594 x0r = a[0] + a[2];
595 x0i = a[1] + a[3];
596 x1r = a[0] - a[2];
597 x1i = a[1] - a[3];
598 x2r = a[4] + a[6];
599 x2i = a[5] + a[7];
600 x3r = a[4] - a[6];
601 x3i = a[5] - a[7];
602 a[0] = x0r + x2r;
603 a[1] = x0i + x2i;
604 a[4] = x0r - x2r;
605 a[5] = x0i - x2i;
606 a[2] = x1r - x3i;
607 a[3] = x1i + x3r;
608 a[6] = x1r + x3i;
609 a[7] = x1i - x3r;
610 wn4r = WR5000;
611 x0r = a[8] + a[10];
612 x0i = a[9] + a[11];
613 x1r = a[8] - a[10];
614 x1i = a[9] - a[11];
615 x2r = a[12] + a[14];
616 x2i = a[13] + a[15];
617 x3r = a[12] - a[14];
618 x3i = a[13] - a[15];
619 a[8] = x0r + x2r;
620 a[9] = x0i + x2i;
621 a[12] = x2i - x0i;
622 a[13] = x0r - x2r;
623 x0r = x1r - x3i;
624 x0i = x1i + x3r;
625 a[10] = wn4r * (x0r - x0i);
626 a[11] = wn4r * (x0r + x0i);
627 x0r = x3i + x1r;
628 x0i = x3r - x1i;
629 a[14] = wn4r * (x0i - x0r);
630 a[15] = wn4r * (x0i + x0r);
631 ew = MM_PI_2 / n;
632 kr = 0;
633 for (j = 16; j < n; j += 16) {
634 for (kj = n >> 2; kj > (kr ^= kj); kj >>= 1);
635 wk1r = cos(ew * kr);
636 wk1i = sin(ew * kr);
637 wk2r = 1 - 2 * wk1i * wk1i;
638 wk2i = 2 * wk1i * wk1r;
639 wk3r = wk1r - 2 * wk2i * wk1i;
640 wk3i = 2 * wk2i * wk1r - wk1i;
641 x0r = a[j] + a[j + 2];
642 x0i = a[j + 1] + a[j + 3];
643 x1r = a[j] - a[j + 2];
644 x1i = a[j + 1] - a[j + 3];
645 x2r = a[j + 4] + a[j + 6];
646 x2i = a[j + 5] + a[j + 7];
647 x3r = a[j + 4] - a[j + 6];
648 x3i = a[j + 5] - a[j + 7];
649 a[j] = x0r + x2r;
650 a[j + 1] = x0i + x2i;
651 x0r -= x2r;
652 x0i -= x2i;
653 a[j + 4] = wk2r * x0r - wk2i * x0i;
654 a[j + 5] = wk2r * x0i + wk2i * x0r;
655 x0r = x1r - x3i;
656 x0i = x1i + x3r;
657 a[j + 2] = wk1r * x0r - wk1i * x0i;
658 a[j + 3] = wk1r * x0i + wk1i * x0r;
659 x0r = x1r + x3i;
660 x0i = x1i - x3r;
661 a[j + 6] = wk3r * x0r - wk3i * x0i;
662 a[j + 7] = wk3r * x0i + wk3i * x0r;
663 x0r = wn4r * (wk1r - wk1i);
664 wk1i = wn4r * (wk1r + wk1i);
665 wk1r = x0r;
666 wk3r = wk1r - 2 * wk2r * wk1i;
667 wk3i = 2 * wk2r * wk1r - wk1i;
668 x0r = a[j + 8] + a[j + 10];
669 x0i = a[j + 9] + a[j + 11];
670 x1r = a[j + 8] - a[j + 10];
671 x1i = a[j + 9] - a[j + 11];
672 x2r = a[j + 12] + a[j + 14];
673 x2i = a[j + 13] + a[j + 15];
674 x3r = a[j + 12] - a[j + 14];
675 x3i = a[j + 13] - a[j + 15];
676 a[j + 8] = x0r + x2r;
677 a[j + 9] = x0i + x2i;
678 x0r -= x2r;
679 x0i -= x2i;
680 a[j + 12] = -wk2i * x0r - wk2r * x0i;
681 a[j + 13] = -wk2i * x0i + wk2r * x0r;
682 x0r = x1r - x3i;
683 x0i = x1i + x3r;
684 a[j + 10] = wk1r * x0r - wk1i * x0i;
685 a[j + 11] = wk1r * x0i + wk1i * x0r;
686 x0r = x1r + x3i;
687 x0i = x1i - x3r;
688 a[j + 14] = wk3r * x0r - wk3i * x0i;
689 a[j + 15] = wk3r * x0i + wk3i * x0r;
690 }
691 }
692
693
694
M_cftmdl(int n,int l,double * a)695 void M_cftmdl(int n, int l, double *a)
696 {
697 int j, j1, j2, j3, k, kj, kr, m, m2;
698 double ew, wn4r, wk1r, wk1i, wk2r, wk2i, wk3r, wk3i;
699 double x0r, x0i, x1r, x1i, x2r, x2i, x3r, x3i;
700
701 m = l << 2;
702 for (j = 0; j < l; j += 2) {
703 j1 = j + l;
704 j2 = j1 + l;
705 j3 = j2 + l;
706 x0r = a[j] + a[j1];
707 x0i = a[j + 1] + a[j1 + 1];
708 x1r = a[j] - a[j1];
709 x1i = a[j + 1] - a[j1 + 1];
710 x2r = a[j2] + a[j3];
711 x2i = a[j2 + 1] + a[j3 + 1];
712 x3r = a[j2] - a[j3];
713 x3i = a[j2 + 1] - a[j3 + 1];
714 a[j] = x0r + x2r;
715 a[j + 1] = x0i + x2i;
716 a[j2] = x0r - x2r;
717 a[j2 + 1] = x0i - x2i;
718 a[j1] = x1r - x3i;
719 a[j1 + 1] = x1i + x3r;
720 a[j3] = x1r + x3i;
721 a[j3 + 1] = x1i - x3r;
722 }
723 wn4r = WR5000;
724 for (j = m; j < l + m; j += 2) {
725 j1 = j + l;
726 j2 = j1 + l;
727 j3 = j2 + l;
728 x0r = a[j] + a[j1];
729 x0i = a[j + 1] + a[j1 + 1];
730 x1r = a[j] - a[j1];
731 x1i = a[j + 1] - a[j1 + 1];
732 x2r = a[j2] + a[j3];
733 x2i = a[j2 + 1] + a[j3 + 1];
734 x3r = a[j2] - a[j3];
735 x3i = a[j2 + 1] - a[j3 + 1];
736 a[j] = x0r + x2r;
737 a[j + 1] = x0i + x2i;
738 a[j2] = x2i - x0i;
739 a[j2 + 1] = x0r - x2r;
740 x0r = x1r - x3i;
741 x0i = x1i + x3r;
742 a[j1] = wn4r * (x0r - x0i);
743 a[j1 + 1] = wn4r * (x0r + x0i);
744 x0r = x3i + x1r;
745 x0i = x3r - x1i;
746 a[j3] = wn4r * (x0i - x0r);
747 a[j3 + 1] = wn4r * (x0i + x0r);
748 }
749 ew = MM_PI_2 / n;
750 kr = 0;
751 m2 = 2 * m;
752 for (k = m2; k < n; k += m2) {
753 for (kj = n >> 2; kj > (kr ^= kj); kj >>= 1);
754 wk1r = cos(ew * kr);
755 wk1i = sin(ew * kr);
756 wk2r = 1 - 2 * wk1i * wk1i;
757 wk2i = 2 * wk1i * wk1r;
758 wk3r = wk1r - 2 * wk2i * wk1i;
759 wk3i = 2 * wk2i * wk1r - wk1i;
760 for (j = k; j < l + k; j += 2) {
761 j1 = j + l;
762 j2 = j1 + l;
763 j3 = j2 + l;
764 x0r = a[j] + a[j1];
765 x0i = a[j + 1] + a[j1 + 1];
766 x1r = a[j] - a[j1];
767 x1i = a[j + 1] - a[j1 + 1];
768 x2r = a[j2] + a[j3];
769 x2i = a[j2 + 1] + a[j3 + 1];
770 x3r = a[j2] - a[j3];
771 x3i = a[j2 + 1] - a[j3 + 1];
772 a[j] = x0r + x2r;
773 a[j + 1] = x0i + x2i;
774 x0r -= x2r;
775 x0i -= x2i;
776 a[j2] = wk2r * x0r - wk2i * x0i;
777 a[j2 + 1] = wk2r * x0i + wk2i * x0r;
778 x0r = x1r - x3i;
779 x0i = x1i + x3r;
780 a[j1] = wk1r * x0r - wk1i * x0i;
781 a[j1 + 1] = wk1r * x0i + wk1i * x0r;
782 x0r = x1r + x3i;
783 x0i = x1i - x3r;
784 a[j3] = wk3r * x0r - wk3i * x0i;
785 a[j3 + 1] = wk3r * x0i + wk3i * x0r;
786 }
787 x0r = wn4r * (wk1r - wk1i);
788 wk1i = wn4r * (wk1r + wk1i);
789 wk1r = x0r;
790 wk3r = wk1r - 2 * wk2r * wk1i;
791 wk3i = 2 * wk2r * wk1r - wk1i;
792 for (j = k + m; j < l + (k + m); j += 2) {
793 j1 = j + l;
794 j2 = j1 + l;
795 j3 = j2 + l;
796 x0r = a[j] + a[j1];
797 x0i = a[j + 1] + a[j1 + 1];
798 x1r = a[j] - a[j1];
799 x1i = a[j + 1] - a[j1 + 1];
800 x2r = a[j2] + a[j3];
801 x2i = a[j2 + 1] + a[j3 + 1];
802 x3r = a[j2] - a[j3];
803 x3i = a[j2 + 1] - a[j3 + 1];
804 a[j] = x0r + x2r;
805 a[j + 1] = x0i + x2i;
806 x0r -= x2r;
807 x0i -= x2i;
808 a[j2] = -wk2i * x0r - wk2r * x0i;
809 a[j2 + 1] = -wk2i * x0i + wk2r * x0r;
810 x0r = x1r - x3i;
811 x0i = x1i + x3r;
812 a[j1] = wk1r * x0r - wk1i * x0i;
813 a[j1 + 1] = wk1r * x0i + wk1i * x0r;
814 x0r = x1r + x3i;
815 x0i = x1i - x3r;
816 a[j3] = wk3r * x0r - wk3i * x0i;
817 a[j3 + 1] = wk3r * x0i + wk3i * x0r;
818 }
819 }
820 }
821
822
823
M_rftfsub(int n,double * a)824 void M_rftfsub(int n, double *a)
825 {
826 int i, i0, j, k;
827 double ec, w1r, w1i, wkr, wki, wdr, wdi, ss, xr, xi, yr, yi;
828
829 ec = 2 * MM_PI_2 / n;
830 wkr = 0;
831 wki = 0;
832 wdi = cos(ec);
833 wdr = sin(ec);
834 wdi *= wdr;
835 wdr *= wdr;
836 w1r = 1 - 2 * wdr;
837 w1i = 2 * wdi;
838 ss = 2 * w1i;
839 i = n >> 1;
840 while (1) {
841 i0 = i - 4 * RDFT_LOOP_DIV;
842 if (i0 < 4) {
843 i0 = 4;
844 }
845 for (j = i - 4; j >= i0; j -= 4) {
846 k = n - j;
847 xr = a[j + 2] - a[k - 2];
848 xi = a[j + 3] + a[k - 1];
849 yr = wdr * xr - wdi * xi;
850 yi = wdr * xi + wdi * xr;
851 a[j + 2] -= yr;
852 a[j + 3] -= yi;
853 a[k - 2] += yr;
854 a[k - 1] -= yi;
855 wkr += ss * wdi;
856 wki += ss * (0.5 - wdr);
857 xr = a[j] - a[k];
858 xi = a[j + 1] + a[k + 1];
859 yr = wkr * xr - wki * xi;
860 yi = wkr * xi + wki * xr;
861 a[j] -= yr;
862 a[j + 1] -= yi;
863 a[k] += yr;
864 a[k + 1] -= yi;
865 wdr += ss * wki;
866 wdi += ss * (0.5 - wkr);
867 }
868 if (i0 == 4) {
869 break;
870 }
871 wkr = 0.5 * sin(ec * i0);
872 wki = 0.5 * cos(ec * i0);
873 wdr = 0.5 - (wkr * w1r - wki * w1i);
874 wdi = wkr * w1i + wki * w1r;
875 wkr = 0.5 - wkr;
876 i = i0;
877 }
878 xr = a[2] - a[n - 2];
879 xi = a[3] + a[n - 1];
880 yr = wdr * xr - wdi * xi;
881 yi = wdr * xi + wdi * xr;
882 a[2] -= yr;
883 a[3] -= yi;
884 a[n - 2] += yr;
885 a[n - 1] -= yi;
886 }
887
888
889
M_rftbsub(int n,double * a)890 void M_rftbsub(int n, double *a)
891 {
892 int i, i0, j, k;
893 double ec, w1r, w1i, wkr, wki, wdr, wdi, ss, xr, xi, yr, yi;
894
895 ec = 2 * MM_PI_2 / n;
896 wkr = 0;
897 wki = 0;
898 wdi = cos(ec);
899 wdr = sin(ec);
900 wdi *= wdr;
901 wdr *= wdr;
902 w1r = 1 - 2 * wdr;
903 w1i = 2 * wdi;
904 ss = 2 * w1i;
905 i = n >> 1;
906 a[i + 1] = -a[i + 1];
907 while (1) {
908 i0 = i - 4 * RDFT_LOOP_DIV;
909 if (i0 < 4) {
910 i0 = 4;
911 }
912 for (j = i - 4; j >= i0; j -= 4) {
913 k = n - j;
914 xr = a[j + 2] - a[k - 2];
915 xi = a[j + 3] + a[k - 1];
916 yr = wdr * xr + wdi * xi;
917 yi = wdr * xi - wdi * xr;
918 a[j + 2] -= yr;
919 a[j + 3] = yi - a[j + 3];
920 a[k - 2] += yr;
921 a[k - 1] = yi - a[k - 1];
922 wkr += ss * wdi;
923 wki += ss * (0.5 - wdr);
924 xr = a[j] - a[k];
925 xi = a[j + 1] + a[k + 1];
926 yr = wkr * xr + wki * xi;
927 yi = wkr * xi - wki * xr;
928 a[j] -= yr;
929 a[j + 1] = yi - a[j + 1];
930 a[k] += yr;
931 a[k + 1] = yi - a[k + 1];
932 wdr += ss * wki;
933 wdi += ss * (0.5 - wkr);
934 }
935 if (i0 == 4) {
936 break;
937 }
938 wkr = 0.5 * sin(ec * i0);
939 wki = 0.5 * cos(ec * i0);
940 wdr = 0.5 - (wkr * w1r - wki * w1i);
941 wdi = wkr * w1i + wki * w1r;
942 wkr = 0.5 - wkr;
943 i = i0;
944 }
945 xr = a[2] - a[n - 2];
946 xi = a[3] + a[n - 1];
947 yr = wdr * xr + wdi * xi;
948 yi = wdr * xi - wdi * xr;
949 a[2] -= yr;
950 a[3] = yi - a[3];
951 a[n - 2] += yr;
952 a[n - 1] = yi - a[n - 1];
953 a[1] = -a[1];
954 }
955
956