xref: /haiku/src/libs/mapm/mapm_fft.c (revision cbe0a0c436162d78cc3f92a305b64918c839d079)
1 
2 /*
3  *  M_APM  -  mapm_fft.c
4  *
5  *  This FFT (Fast Fourier Transform) is from Takuya OOURA
6  *
7  *  Copyright(C) 1996-1999 Takuya OOURA
8  *  email: ooura@mmm.t.u-tokyo.ac.jp
9  *
10  *  See full FFT documentation below ...  (MCR)
11  *
12  *  This software is provided "as is" without express or implied warranty.
13  */
14 
15 /*
16  *      $Id: mapm_fft.c,v 1.15 2007/12/03 01:37:42 mike Exp $
17  *
18  *      This file contains the FFT based FAST MULTIPLICATION function
19  *      as well as its support functions.
20  *
21  *      $Log: mapm_fft.c,v $
22  *      Revision 1.15  2007/12/03 01:37:42  mike
23  *      no changes
24  *
25  *      Revision 1.14  2003/07/28 19:39:01  mike
26  *      change 16 bit constant
27  *
28  *      Revision 1.13  2003/07/21 20:11:55  mike
29  *      Modify error messages to be in a consistent format.
30  *
31  *      Revision 1.12  2003/05/01 21:55:36  mike
32  *      remove math.h
33  *
34  *      Revision 1.11  2003/03/31 22:10:09  mike
35  *      call generic error handling function
36  *
37  *      Revision 1.10  2002/11/03 22:11:48  mike
38  *      Updated function parameters to use the modern style
39  *
40  *      Revision 1.9  2001/07/16 19:16:15  mike
41  *      add function M_free_all_fft
42  *
43  *      Revision 1.8  2000/08/01 22:23:24  mike
44  *      use sizeof(int) from function call to stop
45  *      some compilers from complaining.
46  *
47  *      Revision 1.7  2000/07/30 22:39:21  mike
48  *      lower 16 bit malloc size
49  *
50  *      Revision 1.6  2000/07/10 22:54:26  mike
51  *      malloc the local data arrays
52  *
53  *      Revision 1.5  2000/07/10 00:09:02  mike
54  *      use local static arrays for smaller numbers
55  *
56  *      Revision 1.4  2000/07/08 18:24:23  mike
57  *      minor optimization tweak
58  *
59  *      Revision 1.3  2000/07/08 17:52:49  mike
60  *      do the FFT in base 10000 instead of MAPM numbers base 100
61  *      this runs faster and uses 1/2 the RAM
62  *
63  *      Revision 1.2  2000/07/06 21:04:34  mike
64  *      added more comments
65  *
66  *      Revision 1.1  2000/07/06 20:42:05  mike
67  *      Initial revision
68  */
69 
70 #include "m_apm_lc.h"
71 
72 #ifndef MM_PI_2
73 #define MM_PI_2      1.570796326794896619231321691639751442098584699687
74 #endif
75 
76 #ifndef WR5000       /* cos(MM_PI_2*0.5000) */
77 #define WR5000       0.707106781186547524400844362104849039284835937688
78 #endif
79 
80 #ifndef RDFT_LOOP_DIV     /* control of the RDFT's speed & tolerance */
81 #define RDFT_LOOP_DIV 64
82 #endif
83 
84 extern void   M_fast_mul_fft(UCHAR *, UCHAR *, UCHAR *, int);
85 
86 extern void   M_rdft(int, int, double *);
87 extern void   M_bitrv2(int, double *);
88 extern void   M_cftfsub(int, double *);
89 extern void   M_cftbsub(int, double *);
90 extern void   M_rftfsub(int, double *);
91 extern void   M_rftbsub(int, double *);
92 extern void   M_cft1st(int, double *);
93 extern void   M_cftmdl(int, int, double *);
94 
95 static double *M_aa_array, *M_bb_array;
96 static int    M_size = -1;
97 
98 static char   *M_fft_error_msg = "\'M_fast_mul_fft\', Out of memory";
99 
100 /****************************************************************************/
101 void	M_free_all_fft()
102 {
103 if (M_size > 0)
104   {
105    MAPM_FREE(M_aa_array);
106    MAPM_FREE(M_bb_array);
107    M_size = -1;
108   }
109 }
110 /****************************************************************************/
111 /*
112  *      multiply 'uu' by 'vv' with nbytes each
113  *      yielding a 2*nbytes result in 'ww'.
114  *      each byte contains a base 100 'digit',
115  *      i.e.: range from 0-99.
116  *
117  *             MSB              LSB
118  *
119  *   uu,vv     [0] [1] [2] ... [N-1]
120  *   ww        [0] [1] [2] ... [2N-1]
121  */
122 
123 void	M_fast_mul_fft(UCHAR *ww, UCHAR *uu, UCHAR *vv, int nbytes)
124 {
125 int             mflag, i, j, nn2, nn;
126 double          carry, nnr, dtemp, *a, *b;
127 UCHAR           *w0;
128 unsigned long   ul;
129 
130 if (M_size < 0)                  /* if first time in, setup working arrays */
131   {
132    if (M_get_sizeof_int() == 2)  /* if still using 16 bit compilers */
133      M_size = 516;
134    else
135      M_size = 8200;
136 
137    M_aa_array = (double *)MAPM_MALLOC(M_size * sizeof(double));
138    M_bb_array = (double *)MAPM_MALLOC(M_size * sizeof(double));
139 
140    if ((M_aa_array == NULL) || (M_bb_array == NULL))
141      {
142       /* fatal, this does not return */
143 
144       M_apm_log_error_msg(M_APM_FATAL, M_fft_error_msg);
145      }
146   }
147 
148 nn  = nbytes;
149 nn2 = nbytes >> 1;
150 
151 if (nn > M_size)
152   {
153    mflag = TRUE;
154 
155    a = (double *)MAPM_MALLOC((nn + 8) * sizeof(double));
156    b = (double *)MAPM_MALLOC((nn + 8) * sizeof(double));
157 
158    if ((a == NULL) || (b == NULL))
159      {
160       /* fatal, this does not return */
161 
162       M_apm_log_error_msg(M_APM_FATAL, M_fft_error_msg);
163      }
164   }
165 else
166   {
167    mflag = FALSE;
168 
169    a = M_aa_array;
170    b = M_bb_array;
171   }
172 
173 /*
174  *   convert normal base 100 MAPM numbers to base 10000
175  *   for the FFT operation.
176  */
177 
178 i = 0;
179 for (j=0; j < nn2; j++)
180   {
181    a[j] = (double)((int)uu[i] * 100 + uu[i+1]);
182    b[j] = (double)((int)vv[i] * 100 + vv[i+1]);
183    i += 2;
184   }
185 
186 /* zero fill the second half of the arrays */
187 
188 for (j=nn2; j < nn; j++)
189   {
190    a[j] = 0.0;
191    b[j] = 0.0;
192   }
193 
194 /* perform the forward Fourier transforms for both numbers */
195 
196 M_rdft(nn, 1, a);
197 M_rdft(nn, 1, b);
198 
199 /* perform the convolution ... */
200 
201 b[0] *= a[0];
202 b[1] *= a[1];
203 
204 for (j=3; j <= nn; j += 2)
205   {
206    dtemp  = b[j-1];
207    b[j-1] = dtemp * a[j-1] - b[j] * a[j];
208    b[j]   = dtemp * a[j] + b[j] * a[j-1];
209   }
210 
211 /* perform the inverse transform on the result */
212 
213 M_rdft(nn, -1, b);
214 
215 /* perform a final pass to release all the carries */
216 /* we are still in base 10000 at this point        */
217 
218 carry = 0.0;
219 j     = nn;
220 nnr   = 2.0 / (double)nn;
221 
222 while (1)
223   {
224    dtemp = b[--j] * nnr + carry + 0.5;
225    ul    = (unsigned long)(dtemp * 1.0E-4);
226    carry = (double)ul;
227    b[j]  = dtemp - carry * 10000.0;
228 
229    if (j == 0)
230      break;
231   }
232 
233 /* copy result to our destination after converting back to base 100 */
234 
235 w0 = ww;
236 M_get_div_rem((int)ul, w0, (w0 + 1));
237 
238 for (j=0; j <= (nn - 2); j++)
239   {
240    w0 += 2;
241    M_get_div_rem((int)b[j], w0, (w0 + 1));
242   }
243 
244 if (mflag)
245   {
246    MAPM_FREE(b);
247    MAPM_FREE(a);
248   }
249 }
250 /****************************************************************************/
251 
252 /*
253  *    The following info is from Takuya OOURA's documentation :
254  *
255  *    NOTE : MAPM only uses the 'RDFT' function (as well as the
256  *           functions RDFT calls). All the code from here down
257  *           in this file is from Takuya OOURA. The only change I
258  *           made was to add 'M_' in front of all the functions
259  *           I used. This was to guard against any possible
260  *           name collisions in the future.
261  *
262  *    MCR  06 July 2000
263  *
264  *
265  *    General Purpose FFT (Fast Fourier/Cosine/Sine Transform) Package
266  *
267  *    Description:
268  *        A package to calculate Discrete Fourier/Cosine/Sine Transforms of
269  *        1-dimensional sequences of length 2^N.
270  *
271  *        fft4g_h.c  : FFT Package in C       - Simple Version I   (radix 4,2)
272  *
273  *        rdft: Real Discrete Fourier Transform
274  *
275  *    Method:
276  *        -------- rdft --------
277  *        A method with a following butterfly operation appended to "cdft".
278  *        In forward transform :
279  *            A[k] = sum_j=0^n-1 a[j]*W(n)^(j*k), 0<=k<=n/2,
280  *                W(n) = exp(2*pi*i/n),
281  *        this routine makes an array x[] :
282  *            x[j] = a[2*j] + i*a[2*j+1], 0<=j<n/2
283  *        and calls "cdft" of length n/2 :
284  *            X[k] = sum_j=0^n/2-1 x[j] * W(n/2)^(j*k), 0<=k<n.
285  *        The result A[k] are :
286  *            A[k]     = X[k]     - (1+i*W(n)^k)/2 * (X[k]-conjg(X[n/2-k])),
287  *            A[n/2-k] = X[n/2-k] +
288  *                            conjg((1+i*W(n)^k)/2 * (X[k]-conjg(X[n/2-k]))),
289  *                0<=k<=n/2
290  *            (notes: conjg() is a complex conjugate, X[n/2]=X[0]).
291  *        ----------------------
292  *
293  *    Reference:
294  *        * Masatake MORI, Makoto NATORI, Tatuo TORII: Suchikeisan,
295  *          Iwanamikouzajyouhoukagaku18, Iwanami, 1982 (Japanese)
296  *        * Henri J. Nussbaumer: Fast Fourier Transform and Convolution
297  *          Algorithms, Springer Verlag, 1982
298  *        * C. S. Burrus, Notes on the FFT (with large FFT paper list)
299  *          http://www-dsp.rice.edu/research/fft/fftnote.asc
300  *
301  *    Copyright:
302  *        Copyright(C) 1996-1999 Takuya OOURA
303  *        email: ooura@mmm.t.u-tokyo.ac.jp
304  *        download: http://momonga.t.u-tokyo.ac.jp/~ooura/fft.html
305  *        You may use, copy, modify this code for any purpose and
306  *        without fee. You may distribute this ORIGINAL package.
307  */
308 
309 /*
310 
311 functions
312     rdft: Real Discrete Fourier Transform
313 
314 function prototypes
315     void rdft(int, int, double *);
316 
317 -------- Real DFT / Inverse of Real DFT --------
318     [definition]
319         <case1> RDFT
320             R[k] = sum_j=0^n-1 a[j]*cos(2*pi*j*k/n), 0<=k<=n/2
321             I[k] = sum_j=0^n-1 a[j]*sin(2*pi*j*k/n), 0<k<n/2
322         <case2> IRDFT (excluding scale)
323             a[k] = (R[0] + R[n/2]*cos(pi*k))/2 +
324                    sum_j=1^n/2-1 R[j]*cos(2*pi*j*k/n) +
325                    sum_j=1^n/2-1 I[j]*sin(2*pi*j*k/n), 0<=k<n
326     [usage]
327         <case1>
328             rdft(n, 1, a);
329         <case2>
330             rdft(n, -1, a);
331     [parameters]
332         n              :data length (int)
333                         n >= 2, n = power of 2
334         a[0...n-1]     :input/output data (double *)
335                         <case1>
336                             output data
337                                 a[2*k] = R[k], 0<=k<n/2
338                                 a[2*k+1] = I[k], 0<k<n/2
339                                 a[1] = R[n/2]
340                         <case2>
341                             input data
342                                 a[2*j] = R[j], 0<=j<n/2
343                                 a[2*j+1] = I[j], 0<j<n/2
344                                 a[1] = R[n/2]
345     [remark]
346         Inverse of
347             rdft(n, 1, a);
348         is
349             rdft(n, -1, a);
350             for (j = 0; j <= n - 1; j++) {
351                 a[j] *= 2.0 / n;
352             }
353 */
354 
355 
356 void	M_rdft(int n, int isgn, double *a)
357 {
358     double xi;
359 
360     if (isgn >= 0) {
361         if (n > 4) {
362             M_bitrv2(n, a);
363             M_cftfsub(n, a);
364             M_rftfsub(n, a);
365         } else if (n == 4) {
366             M_cftfsub(n, a);
367         }
368         xi = a[0] - a[1];
369         a[0] += a[1];
370         a[1] = xi;
371     } else {
372         a[1] = 0.5 * (a[0] - a[1]);
373         a[0] -= a[1];
374         if (n > 4) {
375             M_rftbsub(n, a);
376             M_bitrv2(n, a);
377             M_cftbsub(n, a);
378         } else if (n == 4) {
379             M_cftfsub(n, a);
380         }
381     }
382 }
383 
384 
385 
386 void    M_bitrv2(int n, double *a)
387 {
388     int j0, k0, j1, k1, l, m, i, j, k;
389     double xr, xi, yr, yi;
390 
391     l = n >> 2;
392     m = 2;
393     while (m < l) {
394         l >>= 1;
395         m <<= 1;
396     }
397     if (m == l) {
398         j0 = 0;
399         for (k0 = 0; k0 < m; k0 += 2) {
400             k = k0;
401             for (j = j0; j < j0 + k0; j += 2) {
402                 xr = a[j];
403                 xi = a[j + 1];
404                 yr = a[k];
405                 yi = a[k + 1];
406                 a[j] = yr;
407                 a[j + 1] = yi;
408                 a[k] = xr;
409                 a[k + 1] = xi;
410                 j1 = j + m;
411                 k1 = k + 2 * m;
412                 xr = a[j1];
413                 xi = a[j1 + 1];
414                 yr = a[k1];
415                 yi = a[k1 + 1];
416                 a[j1] = yr;
417                 a[j1 + 1] = yi;
418                 a[k1] = xr;
419                 a[k1 + 1] = xi;
420                 j1 += m;
421                 k1 -= m;
422                 xr = a[j1];
423                 xi = a[j1 + 1];
424                 yr = a[k1];
425                 yi = a[k1 + 1];
426                 a[j1] = yr;
427                 a[j1 + 1] = yi;
428                 a[k1] = xr;
429                 a[k1 + 1] = xi;
430                 j1 += m;
431                 k1 += 2 * m;
432                 xr = a[j1];
433                 xi = a[j1 + 1];
434                 yr = a[k1];
435                 yi = a[k1 + 1];
436                 a[j1] = yr;
437                 a[j1 + 1] = yi;
438                 a[k1] = xr;
439                 a[k1 + 1] = xi;
440                 for (i = n >> 1; i > (k ^= i); i >>= 1);
441             }
442             j1 = j0 + k0 + m;
443             k1 = j1 + m;
444             xr = a[j1];
445             xi = a[j1 + 1];
446             yr = a[k1];
447             yi = a[k1 + 1];
448             a[j1] = yr;
449             a[j1 + 1] = yi;
450             a[k1] = xr;
451             a[k1 + 1] = xi;
452             for (i = n >> 1; i > (j0 ^= i); i >>= 1);
453         }
454     } else {
455         j0 = 0;
456         for (k0 = 2; k0 < m; k0 += 2) {
457             for (i = n >> 1; i > (j0 ^= i); i >>= 1);
458             k = k0;
459             for (j = j0; j < j0 + k0; j += 2) {
460                 xr = a[j];
461                 xi = a[j + 1];
462                 yr = a[k];
463                 yi = a[k + 1];
464                 a[j] = yr;
465                 a[j + 1] = yi;
466                 a[k] = xr;
467                 a[k + 1] = xi;
468                 j1 = j + m;
469                 k1 = k + m;
470                 xr = a[j1];
471                 xi = a[j1 + 1];
472                 yr = a[k1];
473                 yi = a[k1 + 1];
474                 a[j1] = yr;
475                 a[j1 + 1] = yi;
476                 a[k1] = xr;
477                 a[k1 + 1] = xi;
478                 for (i = n >> 1; i > (k ^= i); i >>= 1);
479             }
480         }
481     }
482 }
483 
484 
485 
486 void    M_cftfsub(int n, double *a)
487 {
488     int j, j1, j2, j3, l;
489     double x0r, x0i, x1r, x1i, x2r, x2i, x3r, x3i;
490 
491     l = 2;
492     if (n > 8) {
493         M_cft1st(n, a);
494         l = 8;
495         while ((l << 2) < n) {
496             M_cftmdl(n, l, a);
497             l <<= 2;
498         }
499     }
500     if ((l << 2) == n) {
501         for (j = 0; j < l; j += 2) {
502             j1 = j + l;
503             j2 = j1 + l;
504             j3 = j2 + l;
505             x0r = a[j] + a[j1];
506             x0i = a[j + 1] + a[j1 + 1];
507             x1r = a[j] - a[j1];
508             x1i = a[j + 1] - a[j1 + 1];
509             x2r = a[j2] + a[j3];
510             x2i = a[j2 + 1] + a[j3 + 1];
511             x3r = a[j2] - a[j3];
512             x3i = a[j2 + 1] - a[j3 + 1];
513             a[j] = x0r + x2r;
514             a[j + 1] = x0i + x2i;
515             a[j2] = x0r - x2r;
516             a[j2 + 1] = x0i - x2i;
517             a[j1] = x1r - x3i;
518             a[j1 + 1] = x1i + x3r;
519             a[j3] = x1r + x3i;
520             a[j3 + 1] = x1i - x3r;
521         }
522     } else {
523         for (j = 0; j < l; j += 2) {
524             j1 = j + l;
525             x0r = a[j] - a[j1];
526             x0i = a[j + 1] - a[j1 + 1];
527             a[j] += a[j1];
528             a[j + 1] += a[j1 + 1];
529             a[j1] = x0r;
530             a[j1 + 1] = x0i;
531         }
532     }
533 }
534 
535 
536 
537 void 	M_cftbsub(int n, double *a)
538 {
539     int j, j1, j2, j3, l;
540     double x0r, x0i, x1r, x1i, x2r, x2i, x3r, x3i;
541 
542     l = 2;
543     if (n > 8) {
544         M_cft1st(n, a);
545         l = 8;
546         while ((l << 2) < n) {
547             M_cftmdl(n, l, a);
548             l <<= 2;
549         }
550     }
551     if ((l << 2) == n) {
552         for (j = 0; j < l; j += 2) {
553             j1 = j + l;
554             j2 = j1 + l;
555             j3 = j2 + l;
556             x0r = a[j] + a[j1];
557             x0i = -a[j + 1] - a[j1 + 1];
558             x1r = a[j] - a[j1];
559             x1i = -a[j + 1] + a[j1 + 1];
560             x2r = a[j2] + a[j3];
561             x2i = a[j2 + 1] + a[j3 + 1];
562             x3r = a[j2] - a[j3];
563             x3i = a[j2 + 1] - a[j3 + 1];
564             a[j] = x0r + x2r;
565             a[j + 1] = x0i - x2i;
566             a[j2] = x0r - x2r;
567             a[j2 + 1] = x0i + x2i;
568             a[j1] = x1r - x3i;
569             a[j1 + 1] = x1i - x3r;
570             a[j3] = x1r + x3i;
571             a[j3 + 1] = x1i + x3r;
572         }
573     } else {
574         for (j = 0; j < l; j += 2) {
575             j1 = j + l;
576             x0r = a[j] - a[j1];
577             x0i = -a[j + 1] + a[j1 + 1];
578             a[j] += a[j1];
579             a[j + 1] = -a[j + 1] - a[j1 + 1];
580             a[j1] = x0r;
581             a[j1 + 1] = x0i;
582         }
583     }
584 }
585 
586 
587 
588 void 	M_cft1st(int n, double *a)
589 {
590     int j, kj, kr;
591     double ew, wn4r, wk1r, wk1i, wk2r, wk2i, wk3r, wk3i;
592     double x0r, x0i, x1r, x1i, x2r, x2i, x3r, x3i;
593 
594     x0r = a[0] + a[2];
595     x0i = a[1] + a[3];
596     x1r = a[0] - a[2];
597     x1i = a[1] - a[3];
598     x2r = a[4] + a[6];
599     x2i = a[5] + a[7];
600     x3r = a[4] - a[6];
601     x3i = a[5] - a[7];
602     a[0] = x0r + x2r;
603     a[1] = x0i + x2i;
604     a[4] = x0r - x2r;
605     a[5] = x0i - x2i;
606     a[2] = x1r - x3i;
607     a[3] = x1i + x3r;
608     a[6] = x1r + x3i;
609     a[7] = x1i - x3r;
610     wn4r = WR5000;
611     x0r = a[8] + a[10];
612     x0i = a[9] + a[11];
613     x1r = a[8] - a[10];
614     x1i = a[9] - a[11];
615     x2r = a[12] + a[14];
616     x2i = a[13] + a[15];
617     x3r = a[12] - a[14];
618     x3i = a[13] - a[15];
619     a[8] = x0r + x2r;
620     a[9] = x0i + x2i;
621     a[12] = x2i - x0i;
622     a[13] = x0r - x2r;
623     x0r = x1r - x3i;
624     x0i = x1i + x3r;
625     a[10] = wn4r * (x0r - x0i);
626     a[11] = wn4r * (x0r + x0i);
627     x0r = x3i + x1r;
628     x0i = x3r - x1i;
629     a[14] = wn4r * (x0i - x0r);
630     a[15] = wn4r * (x0i + x0r);
631     ew = MM_PI_2 / n;
632     kr = 0;
633     for (j = 16; j < n; j += 16) {
634         for (kj = n >> 2; kj > (kr ^= kj); kj >>= 1);
635         wk1r = cos(ew * kr);
636         wk1i = sin(ew * kr);
637         wk2r = 1 - 2 * wk1i * wk1i;
638         wk2i = 2 * wk1i * wk1r;
639         wk3r = wk1r - 2 * wk2i * wk1i;
640         wk3i = 2 * wk2i * wk1r - wk1i;
641         x0r = a[j] + a[j + 2];
642         x0i = a[j + 1] + a[j + 3];
643         x1r = a[j] - a[j + 2];
644         x1i = a[j + 1] - a[j + 3];
645         x2r = a[j + 4] + a[j + 6];
646         x2i = a[j + 5] + a[j + 7];
647         x3r = a[j + 4] - a[j + 6];
648         x3i = a[j + 5] - a[j + 7];
649         a[j] = x0r + x2r;
650         a[j + 1] = x0i + x2i;
651         x0r -= x2r;
652         x0i -= x2i;
653         a[j + 4] = wk2r * x0r - wk2i * x0i;
654         a[j + 5] = wk2r * x0i + wk2i * x0r;
655         x0r = x1r - x3i;
656         x0i = x1i + x3r;
657         a[j + 2] = wk1r * x0r - wk1i * x0i;
658         a[j + 3] = wk1r * x0i + wk1i * x0r;
659         x0r = x1r + x3i;
660         x0i = x1i - x3r;
661         a[j + 6] = wk3r * x0r - wk3i * x0i;
662         a[j + 7] = wk3r * x0i + wk3i * x0r;
663         x0r = wn4r * (wk1r - wk1i);
664         wk1i = wn4r * (wk1r + wk1i);
665         wk1r = x0r;
666         wk3r = wk1r - 2 * wk2r * wk1i;
667         wk3i = 2 * wk2r * wk1r - wk1i;
668         x0r = a[j + 8] + a[j + 10];
669         x0i = a[j + 9] + a[j + 11];
670         x1r = a[j + 8] - a[j + 10];
671         x1i = a[j + 9] - a[j + 11];
672         x2r = a[j + 12] + a[j + 14];
673         x2i = a[j + 13] + a[j + 15];
674         x3r = a[j + 12] - a[j + 14];
675         x3i = a[j + 13] - a[j + 15];
676         a[j + 8] = x0r + x2r;
677         a[j + 9] = x0i + x2i;
678         x0r -= x2r;
679         x0i -= x2i;
680         a[j + 12] = -wk2i * x0r - wk2r * x0i;
681         a[j + 13] = -wk2i * x0i + wk2r * x0r;
682         x0r = x1r - x3i;
683         x0i = x1i + x3r;
684         a[j + 10] = wk1r * x0r - wk1i * x0i;
685         a[j + 11] = wk1r * x0i + wk1i * x0r;
686         x0r = x1r + x3i;
687         x0i = x1i - x3r;
688         a[j + 14] = wk3r * x0r - wk3i * x0i;
689         a[j + 15] = wk3r * x0i + wk3i * x0r;
690     }
691 }
692 
693 
694 
695 void 	M_cftmdl(int n, int l, double *a)
696 {
697     int j, j1, j2, j3, k, kj, kr, m, m2;
698     double ew, wn4r, wk1r, wk1i, wk2r, wk2i, wk3r, wk3i;
699     double x0r, x0i, x1r, x1i, x2r, x2i, x3r, x3i;
700 
701     m = l << 2;
702     for (j = 0; j < l; j += 2) {
703         j1 = j + l;
704         j2 = j1 + l;
705         j3 = j2 + l;
706         x0r = a[j] + a[j1];
707         x0i = a[j + 1] + a[j1 + 1];
708         x1r = a[j] - a[j1];
709         x1i = a[j + 1] - a[j1 + 1];
710         x2r = a[j2] + a[j3];
711         x2i = a[j2 + 1] + a[j3 + 1];
712         x3r = a[j2] - a[j3];
713         x3i = a[j2 + 1] - a[j3 + 1];
714         a[j] = x0r + x2r;
715         a[j + 1] = x0i + x2i;
716         a[j2] = x0r - x2r;
717         a[j2 + 1] = x0i - x2i;
718         a[j1] = x1r - x3i;
719         a[j1 + 1] = x1i + x3r;
720         a[j3] = x1r + x3i;
721         a[j3 + 1] = x1i - x3r;
722     }
723     wn4r = WR5000;
724     for (j = m; j < l + m; j += 2) {
725         j1 = j + l;
726         j2 = j1 + l;
727         j3 = j2 + l;
728         x0r = a[j] + a[j1];
729         x0i = a[j + 1] + a[j1 + 1];
730         x1r = a[j] - a[j1];
731         x1i = a[j + 1] - a[j1 + 1];
732         x2r = a[j2] + a[j3];
733         x2i = a[j2 + 1] + a[j3 + 1];
734         x3r = a[j2] - a[j3];
735         x3i = a[j2 + 1] - a[j3 + 1];
736         a[j] = x0r + x2r;
737         a[j + 1] = x0i + x2i;
738         a[j2] = x2i - x0i;
739         a[j2 + 1] = x0r - x2r;
740         x0r = x1r - x3i;
741         x0i = x1i + x3r;
742         a[j1] = wn4r * (x0r - x0i);
743         a[j1 + 1] = wn4r * (x0r + x0i);
744         x0r = x3i + x1r;
745         x0i = x3r - x1i;
746         a[j3] = wn4r * (x0i - x0r);
747         a[j3 + 1] = wn4r * (x0i + x0r);
748     }
749     ew = MM_PI_2 / n;
750     kr = 0;
751     m2 = 2 * m;
752     for (k = m2; k < n; k += m2) {
753         for (kj = n >> 2; kj > (kr ^= kj); kj >>= 1);
754         wk1r = cos(ew * kr);
755         wk1i = sin(ew * kr);
756         wk2r = 1 - 2 * wk1i * wk1i;
757         wk2i = 2 * wk1i * wk1r;
758         wk3r = wk1r - 2 * wk2i * wk1i;
759         wk3i = 2 * wk2i * wk1r - wk1i;
760         for (j = k; j < l + k; j += 2) {
761             j1 = j + l;
762             j2 = j1 + l;
763             j3 = j2 + l;
764             x0r = a[j] + a[j1];
765             x0i = a[j + 1] + a[j1 + 1];
766             x1r = a[j] - a[j1];
767             x1i = a[j + 1] - a[j1 + 1];
768             x2r = a[j2] + a[j3];
769             x2i = a[j2 + 1] + a[j3 + 1];
770             x3r = a[j2] - a[j3];
771             x3i = a[j2 + 1] - a[j3 + 1];
772             a[j] = x0r + x2r;
773             a[j + 1] = x0i + x2i;
774             x0r -= x2r;
775             x0i -= x2i;
776             a[j2] = wk2r * x0r - wk2i * x0i;
777             a[j2 + 1] = wk2r * x0i + wk2i * x0r;
778             x0r = x1r - x3i;
779             x0i = x1i + x3r;
780             a[j1] = wk1r * x0r - wk1i * x0i;
781             a[j1 + 1] = wk1r * x0i + wk1i * x0r;
782             x0r = x1r + x3i;
783             x0i = x1i - x3r;
784             a[j3] = wk3r * x0r - wk3i * x0i;
785             a[j3 + 1] = wk3r * x0i + wk3i * x0r;
786         }
787         x0r = wn4r * (wk1r - wk1i);
788         wk1i = wn4r * (wk1r + wk1i);
789         wk1r = x0r;
790         wk3r = wk1r - 2 * wk2r * wk1i;
791         wk3i = 2 * wk2r * wk1r - wk1i;
792         for (j = k + m; j < l + (k + m); j += 2) {
793             j1 = j + l;
794             j2 = j1 + l;
795             j3 = j2 + l;
796             x0r = a[j] + a[j1];
797             x0i = a[j + 1] + a[j1 + 1];
798             x1r = a[j] - a[j1];
799             x1i = a[j + 1] - a[j1 + 1];
800             x2r = a[j2] + a[j3];
801             x2i = a[j2 + 1] + a[j3 + 1];
802             x3r = a[j2] - a[j3];
803             x3i = a[j2 + 1] - a[j3 + 1];
804             a[j] = x0r + x2r;
805             a[j + 1] = x0i + x2i;
806             x0r -= x2r;
807             x0i -= x2i;
808             a[j2] = -wk2i * x0r - wk2r * x0i;
809             a[j2 + 1] = -wk2i * x0i + wk2r * x0r;
810             x0r = x1r - x3i;
811             x0i = x1i + x3r;
812             a[j1] = wk1r * x0r - wk1i * x0i;
813             a[j1 + 1] = wk1r * x0i + wk1i * x0r;
814             x0r = x1r + x3i;
815             x0i = x1i - x3r;
816             a[j3] = wk3r * x0r - wk3i * x0i;
817             a[j3 + 1] = wk3r * x0i + wk3i * x0r;
818         }
819     }
820 }
821 
822 
823 
824 void 	M_rftfsub(int n, double *a)
825 {
826     int i, i0, j, k;
827     double ec, w1r, w1i, wkr, wki, wdr, wdi, ss, xr, xi, yr, yi;
828 
829     ec = 2 * MM_PI_2 / n;
830     wkr = 0;
831     wki = 0;
832     wdi = cos(ec);
833     wdr = sin(ec);
834     wdi *= wdr;
835     wdr *= wdr;
836     w1r = 1 - 2 * wdr;
837     w1i = 2 * wdi;
838     ss = 2 * w1i;
839     i = n >> 1;
840     while (1) {
841         i0 = i - 4 * RDFT_LOOP_DIV;
842         if (i0 < 4) {
843             i0 = 4;
844         }
845         for (j = i - 4; j >= i0; j -= 4) {
846             k = n - j;
847             xr = a[j + 2] - a[k - 2];
848             xi = a[j + 3] + a[k - 1];
849             yr = wdr * xr - wdi * xi;
850             yi = wdr * xi + wdi * xr;
851             a[j + 2] -= yr;
852             a[j + 3] -= yi;
853             a[k - 2] += yr;
854             a[k - 1] -= yi;
855             wkr += ss * wdi;
856             wki += ss * (0.5 - wdr);
857             xr = a[j] - a[k];
858             xi = a[j + 1] + a[k + 1];
859             yr = wkr * xr - wki * xi;
860             yi = wkr * xi + wki * xr;
861             a[j] -= yr;
862             a[j + 1] -= yi;
863             a[k] += yr;
864             a[k + 1] -= yi;
865             wdr += ss * wki;
866             wdi += ss * (0.5 - wkr);
867         }
868         if (i0 == 4) {
869             break;
870         }
871         wkr = 0.5 * sin(ec * i0);
872         wki = 0.5 * cos(ec * i0);
873         wdr = 0.5 - (wkr * w1r - wki * w1i);
874         wdi = wkr * w1i + wki * w1r;
875         wkr = 0.5 - wkr;
876         i = i0;
877     }
878     xr = a[2] - a[n - 2];
879     xi = a[3] + a[n - 1];
880     yr = wdr * xr - wdi * xi;
881     yi = wdr * xi + wdi * xr;
882     a[2] -= yr;
883     a[3] -= yi;
884     a[n - 2] += yr;
885     a[n - 1] -= yi;
886 }
887 
888 
889 
890 void 	M_rftbsub(int n, double *a)
891 {
892     int i, i0, j, k;
893     double ec, w1r, w1i, wkr, wki, wdr, wdi, ss, xr, xi, yr, yi;
894 
895     ec = 2 * MM_PI_2 / n;
896     wkr = 0;
897     wki = 0;
898     wdi = cos(ec);
899     wdr = sin(ec);
900     wdi *= wdr;
901     wdr *= wdr;
902     w1r = 1 - 2 * wdr;
903     w1i = 2 * wdi;
904     ss = 2 * w1i;
905     i = n >> 1;
906     a[i + 1] = -a[i + 1];
907     while (1) {
908         i0 = i - 4 * RDFT_LOOP_DIV;
909         if (i0 < 4) {
910             i0 = 4;
911         }
912         for (j = i - 4; j >= i0; j -= 4) {
913             k = n - j;
914             xr = a[j + 2] - a[k - 2];
915             xi = a[j + 3] + a[k - 1];
916             yr = wdr * xr + wdi * xi;
917             yi = wdr * xi - wdi * xr;
918             a[j + 2] -= yr;
919             a[j + 3] = yi - a[j + 3];
920             a[k - 2] += yr;
921             a[k - 1] = yi - a[k - 1];
922             wkr += ss * wdi;
923             wki += ss * (0.5 - wdr);
924             xr = a[j] - a[k];
925             xi = a[j + 1] + a[k + 1];
926             yr = wkr * xr + wki * xi;
927             yi = wkr * xi - wki * xr;
928             a[j] -= yr;
929             a[j + 1] = yi - a[j + 1];
930             a[k] += yr;
931             a[k + 1] = yi - a[k + 1];
932             wdr += ss * wki;
933             wdi += ss * (0.5 - wkr);
934         }
935         if (i0 == 4) {
936             break;
937         }
938         wkr = 0.5 * sin(ec * i0);
939         wki = 0.5 * cos(ec * i0);
940         wdr = 0.5 - (wkr * w1r - wki * w1i);
941         wdi = wkr * w1i + wki * w1r;
942         wkr = 0.5 - wkr;
943         i = i0;
944     }
945     xr = a[2] - a[n - 2];
946     xi = a[3] + a[n - 1];
947     yr = wdr * xr + wdi * xi;
948     yi = wdr * xi - wdi * xr;
949     a[2] -= yr;
950     a[3] = yi - a[3];
951     a[n - 2] += yr;
952     a[n - 1] = yi - a[n - 1];
953     a[1] = -a[1];
954 }
955 
956