1 2 /* 3 * M_APM - mapm_fft.c 4 * 5 * This FFT (Fast Fourier Transform) is from Takuya OOURA 6 * 7 * Copyright(C) 1996-1999 Takuya OOURA 8 * email: ooura@mmm.t.u-tokyo.ac.jp 9 * 10 * See full FFT documentation below ... (MCR) 11 * 12 * This software is provided "as is" without express or implied warranty. 13 */ 14 15 /* 16 * $Id: mapm_fft.c,v 1.15 2007/12/03 01:37:42 mike Exp $ 17 * 18 * This file contains the FFT based FAST MULTIPLICATION function 19 * as well as its support functions. 20 * 21 * $Log: mapm_fft.c,v $ 22 * Revision 1.15 2007/12/03 01:37:42 mike 23 * no changes 24 * 25 * Revision 1.14 2003/07/28 19:39:01 mike 26 * change 16 bit constant 27 * 28 * Revision 1.13 2003/07/21 20:11:55 mike 29 * Modify error messages to be in a consistent format. 30 * 31 * Revision 1.12 2003/05/01 21:55:36 mike 32 * remove math.h 33 * 34 * Revision 1.11 2003/03/31 22:10:09 mike 35 * call generic error handling function 36 * 37 * Revision 1.10 2002/11/03 22:11:48 mike 38 * Updated function parameters to use the modern style 39 * 40 * Revision 1.9 2001/07/16 19:16:15 mike 41 * add function M_free_all_fft 42 * 43 * Revision 1.8 2000/08/01 22:23:24 mike 44 * use sizeof(int) from function call to stop 45 * some compilers from complaining. 46 * 47 * Revision 1.7 2000/07/30 22:39:21 mike 48 * lower 16 bit malloc size 49 * 50 * Revision 1.6 2000/07/10 22:54:26 mike 51 * malloc the local data arrays 52 * 53 * Revision 1.5 2000/07/10 00:09:02 mike 54 * use local static arrays for smaller numbers 55 * 56 * Revision 1.4 2000/07/08 18:24:23 mike 57 * minor optimization tweak 58 * 59 * Revision 1.3 2000/07/08 17:52:49 mike 60 * do the FFT in base 10000 instead of MAPM numbers base 100 61 * this runs faster and uses 1/2 the RAM 62 * 63 * Revision 1.2 2000/07/06 21:04:34 mike 64 * added more comments 65 * 66 * Revision 1.1 2000/07/06 20:42:05 mike 67 * Initial revision 68 */ 69 70 #include "m_apm_lc.h" 71 72 #ifndef MM_PI_2 73 #define MM_PI_2 1.570796326794896619231321691639751442098584699687 74 #endif 75 76 #ifndef WR5000 /* cos(MM_PI_2*0.5000) */ 77 #define WR5000 0.707106781186547524400844362104849039284835937688 78 #endif 79 80 #ifndef RDFT_LOOP_DIV /* control of the RDFT's speed & tolerance */ 81 #define RDFT_LOOP_DIV 64 82 #endif 83 84 extern void M_fast_mul_fft(UCHAR *, UCHAR *, UCHAR *, int); 85 86 extern void M_rdft(int, int, double *); 87 extern void M_bitrv2(int, double *); 88 extern void M_cftfsub(int, double *); 89 extern void M_cftbsub(int, double *); 90 extern void M_rftfsub(int, double *); 91 extern void M_rftbsub(int, double *); 92 extern void M_cft1st(int, double *); 93 extern void M_cftmdl(int, int, double *); 94 95 static double *M_aa_array, *M_bb_array; 96 static int M_size = -1; 97 98 static char *M_fft_error_msg = "\'M_fast_mul_fft\', Out of memory"; 99 100 /****************************************************************************/ 101 void M_free_all_fft() 102 { 103 if (M_size > 0) 104 { 105 MAPM_FREE(M_aa_array); 106 MAPM_FREE(M_bb_array); 107 M_size = -1; 108 } 109 } 110 /****************************************************************************/ 111 /* 112 * multiply 'uu' by 'vv' with nbytes each 113 * yielding a 2*nbytes result in 'ww'. 114 * each byte contains a base 100 'digit', 115 * i.e.: range from 0-99. 116 * 117 * MSB LSB 118 * 119 * uu,vv [0] [1] [2] ... [N-1] 120 * ww [0] [1] [2] ... [2N-1] 121 */ 122 123 void M_fast_mul_fft(UCHAR *ww, UCHAR *uu, UCHAR *vv, int nbytes) 124 { 125 int mflag, i, j, nn2, nn; 126 double carry, nnr, dtemp, *a, *b; 127 UCHAR *w0; 128 unsigned long ul; 129 130 if (M_size < 0) /* if first time in, setup working arrays */ 131 { 132 if (M_get_sizeof_int() == 2) /* if still using 16 bit compilers */ 133 M_size = 516; 134 else 135 M_size = 8200; 136 137 M_aa_array = (double *)MAPM_MALLOC(M_size * sizeof(double)); 138 M_bb_array = (double *)MAPM_MALLOC(M_size * sizeof(double)); 139 140 if ((M_aa_array == NULL) || (M_bb_array == NULL)) 141 { 142 /* fatal, this does not return */ 143 144 M_apm_log_error_msg(M_APM_FATAL, M_fft_error_msg); 145 } 146 } 147 148 nn = nbytes; 149 nn2 = nbytes >> 1; 150 151 if (nn > M_size) 152 { 153 mflag = TRUE; 154 155 a = (double *)MAPM_MALLOC((nn + 8) * sizeof(double)); 156 b = (double *)MAPM_MALLOC((nn + 8) * sizeof(double)); 157 158 if ((a == NULL) || (b == NULL)) 159 { 160 /* fatal, this does not return */ 161 162 M_apm_log_error_msg(M_APM_FATAL, M_fft_error_msg); 163 } 164 } 165 else 166 { 167 mflag = FALSE; 168 169 a = M_aa_array; 170 b = M_bb_array; 171 } 172 173 /* 174 * convert normal base 100 MAPM numbers to base 10000 175 * for the FFT operation. 176 */ 177 178 i = 0; 179 for (j=0; j < nn2; j++) 180 { 181 a[j] = (double)((int)uu[i] * 100 + uu[i+1]); 182 b[j] = (double)((int)vv[i] * 100 + vv[i+1]); 183 i += 2; 184 } 185 186 /* zero fill the second half of the arrays */ 187 188 for (j=nn2; j < nn; j++) 189 { 190 a[j] = 0.0; 191 b[j] = 0.0; 192 } 193 194 /* perform the forward Fourier transforms for both numbers */ 195 196 M_rdft(nn, 1, a); 197 M_rdft(nn, 1, b); 198 199 /* perform the convolution ... */ 200 201 b[0] *= a[0]; 202 b[1] *= a[1]; 203 204 for (j=3; j <= nn; j += 2) 205 { 206 dtemp = b[j-1]; 207 b[j-1] = dtemp * a[j-1] - b[j] * a[j]; 208 b[j] = dtemp * a[j] + b[j] * a[j-1]; 209 } 210 211 /* perform the inverse transform on the result */ 212 213 M_rdft(nn, -1, b); 214 215 /* perform a final pass to release all the carries */ 216 /* we are still in base 10000 at this point */ 217 218 carry = 0.0; 219 j = nn; 220 nnr = 2.0 / (double)nn; 221 222 while (1) 223 { 224 dtemp = b[--j] * nnr + carry + 0.5; 225 ul = (unsigned long)(dtemp * 1.0E-4); 226 carry = (double)ul; 227 b[j] = dtemp - carry * 10000.0; 228 229 if (j == 0) 230 break; 231 } 232 233 /* copy result to our destination after converting back to base 100 */ 234 235 w0 = ww; 236 M_get_div_rem((int)ul, w0, (w0 + 1)); 237 238 for (j=0; j <= (nn - 2); j++) 239 { 240 w0 += 2; 241 M_get_div_rem((int)b[j], w0, (w0 + 1)); 242 } 243 244 if (mflag) 245 { 246 MAPM_FREE(b); 247 MAPM_FREE(a); 248 } 249 } 250 /****************************************************************************/ 251 252 /* 253 * The following info is from Takuya OOURA's documentation : 254 * 255 * NOTE : MAPM only uses the 'RDFT' function (as well as the 256 * functions RDFT calls). All the code from here down 257 * in this file is from Takuya OOURA. The only change I 258 * made was to add 'M_' in front of all the functions 259 * I used. This was to guard against any possible 260 * name collisions in the future. 261 * 262 * MCR 06 July 2000 263 * 264 * 265 * General Purpose FFT (Fast Fourier/Cosine/Sine Transform) Package 266 * 267 * Description: 268 * A package to calculate Discrete Fourier/Cosine/Sine Transforms of 269 * 1-dimensional sequences of length 2^N. 270 * 271 * fft4g_h.c : FFT Package in C - Simple Version I (radix 4,2) 272 * 273 * rdft: Real Discrete Fourier Transform 274 * 275 * Method: 276 * -------- rdft -------- 277 * A method with a following butterfly operation appended to "cdft". 278 * In forward transform : 279 * A[k] = sum_j=0^n-1 a[j]*W(n)^(j*k), 0<=k<=n/2, 280 * W(n) = exp(2*pi*i/n), 281 * this routine makes an array x[] : 282 * x[j] = a[2*j] + i*a[2*j+1], 0<=j<n/2 283 * and calls "cdft" of length n/2 : 284 * X[k] = sum_j=0^n/2-1 x[j] * W(n/2)^(j*k), 0<=k<n. 285 * The result A[k] are : 286 * A[k] = X[k] - (1+i*W(n)^k)/2 * (X[k]-conjg(X[n/2-k])), 287 * A[n/2-k] = X[n/2-k] + 288 * conjg((1+i*W(n)^k)/2 * (X[k]-conjg(X[n/2-k]))), 289 * 0<=k<=n/2 290 * (notes: conjg() is a complex conjugate, X[n/2]=X[0]). 291 * ---------------------- 292 * 293 * Reference: 294 * * Masatake MORI, Makoto NATORI, Tatuo TORII: Suchikeisan, 295 * Iwanamikouzajyouhoukagaku18, Iwanami, 1982 (Japanese) 296 * * Henri J. Nussbaumer: Fast Fourier Transform and Convolution 297 * Algorithms, Springer Verlag, 1982 298 * * C. S. Burrus, Notes on the FFT (with large FFT paper list) 299 * http://www-dsp.rice.edu/research/fft/fftnote.asc 300 * 301 * Copyright: 302 * Copyright(C) 1996-1999 Takuya OOURA 303 * email: ooura@mmm.t.u-tokyo.ac.jp 304 * download: http://momonga.t.u-tokyo.ac.jp/~ooura/fft.html 305 * You may use, copy, modify this code for any purpose and 306 * without fee. You may distribute this ORIGINAL package. 307 */ 308 309 /* 310 311 functions 312 rdft: Real Discrete Fourier Transform 313 314 function prototypes 315 void rdft(int, int, double *); 316 317 -------- Real DFT / Inverse of Real DFT -------- 318 [definition] 319 <case1> RDFT 320 R[k] = sum_j=0^n-1 a[j]*cos(2*pi*j*k/n), 0<=k<=n/2 321 I[k] = sum_j=0^n-1 a[j]*sin(2*pi*j*k/n), 0<k<n/2 322 <case2> IRDFT (excluding scale) 323 a[k] = (R[0] + R[n/2]*cos(pi*k))/2 + 324 sum_j=1^n/2-1 R[j]*cos(2*pi*j*k/n) + 325 sum_j=1^n/2-1 I[j]*sin(2*pi*j*k/n), 0<=k<n 326 [usage] 327 <case1> 328 rdft(n, 1, a); 329 <case2> 330 rdft(n, -1, a); 331 [parameters] 332 n :data length (int) 333 n >= 2, n = power of 2 334 a[0...n-1] :input/output data (double *) 335 <case1> 336 output data 337 a[2*k] = R[k], 0<=k<n/2 338 a[2*k+1] = I[k], 0<k<n/2 339 a[1] = R[n/2] 340 <case2> 341 input data 342 a[2*j] = R[j], 0<=j<n/2 343 a[2*j+1] = I[j], 0<j<n/2 344 a[1] = R[n/2] 345 [remark] 346 Inverse of 347 rdft(n, 1, a); 348 is 349 rdft(n, -1, a); 350 for (j = 0; j <= n - 1; j++) { 351 a[j] *= 2.0 / n; 352 } 353 */ 354 355 356 void M_rdft(int n, int isgn, double *a) 357 { 358 double xi; 359 360 if (isgn >= 0) { 361 if (n > 4) { 362 M_bitrv2(n, a); 363 M_cftfsub(n, a); 364 M_rftfsub(n, a); 365 } else if (n == 4) { 366 M_cftfsub(n, a); 367 } 368 xi = a[0] - a[1]; 369 a[0] += a[1]; 370 a[1] = xi; 371 } else { 372 a[1] = 0.5 * (a[0] - a[1]); 373 a[0] -= a[1]; 374 if (n > 4) { 375 M_rftbsub(n, a); 376 M_bitrv2(n, a); 377 M_cftbsub(n, a); 378 } else if (n == 4) { 379 M_cftfsub(n, a); 380 } 381 } 382 } 383 384 385 386 void M_bitrv2(int n, double *a) 387 { 388 int j0, k0, j1, k1, l, m, i, j, k; 389 double xr, xi, yr, yi; 390 391 l = n >> 2; 392 m = 2; 393 while (m < l) { 394 l >>= 1; 395 m <<= 1; 396 } 397 if (m == l) { 398 j0 = 0; 399 for (k0 = 0; k0 < m; k0 += 2) { 400 k = k0; 401 for (j = j0; j < j0 + k0; j += 2) { 402 xr = a[j]; 403 xi = a[j + 1]; 404 yr = a[k]; 405 yi = a[k + 1]; 406 a[j] = yr; 407 a[j + 1] = yi; 408 a[k] = xr; 409 a[k + 1] = xi; 410 j1 = j + m; 411 k1 = k + 2 * m; 412 xr = a[j1]; 413 xi = a[j1 + 1]; 414 yr = a[k1]; 415 yi = a[k1 + 1]; 416 a[j1] = yr; 417 a[j1 + 1] = yi; 418 a[k1] = xr; 419 a[k1 + 1] = xi; 420 j1 += m; 421 k1 -= m; 422 xr = a[j1]; 423 xi = a[j1 + 1]; 424 yr = a[k1]; 425 yi = a[k1 + 1]; 426 a[j1] = yr; 427 a[j1 + 1] = yi; 428 a[k1] = xr; 429 a[k1 + 1] = xi; 430 j1 += m; 431 k1 += 2 * m; 432 xr = a[j1]; 433 xi = a[j1 + 1]; 434 yr = a[k1]; 435 yi = a[k1 + 1]; 436 a[j1] = yr; 437 a[j1 + 1] = yi; 438 a[k1] = xr; 439 a[k1 + 1] = xi; 440 for (i = n >> 1; i > (k ^= i); i >>= 1); 441 } 442 j1 = j0 + k0 + m; 443 k1 = j1 + m; 444 xr = a[j1]; 445 xi = a[j1 + 1]; 446 yr = a[k1]; 447 yi = a[k1 + 1]; 448 a[j1] = yr; 449 a[j1 + 1] = yi; 450 a[k1] = xr; 451 a[k1 + 1] = xi; 452 for (i = n >> 1; i > (j0 ^= i); i >>= 1); 453 } 454 } else { 455 j0 = 0; 456 for (k0 = 2; k0 < m; k0 += 2) { 457 for (i = n >> 1; i > (j0 ^= i); i >>= 1); 458 k = k0; 459 for (j = j0; j < j0 + k0; j += 2) { 460 xr = a[j]; 461 xi = a[j + 1]; 462 yr = a[k]; 463 yi = a[k + 1]; 464 a[j] = yr; 465 a[j + 1] = yi; 466 a[k] = xr; 467 a[k + 1] = xi; 468 j1 = j + m; 469 k1 = k + m; 470 xr = a[j1]; 471 xi = a[j1 + 1]; 472 yr = a[k1]; 473 yi = a[k1 + 1]; 474 a[j1] = yr; 475 a[j1 + 1] = yi; 476 a[k1] = xr; 477 a[k1 + 1] = xi; 478 for (i = n >> 1; i > (k ^= i); i >>= 1); 479 } 480 } 481 } 482 } 483 484 485 486 void M_cftfsub(int n, double *a) 487 { 488 int j, j1, j2, j3, l; 489 double x0r, x0i, x1r, x1i, x2r, x2i, x3r, x3i; 490 491 l = 2; 492 if (n > 8) { 493 M_cft1st(n, a); 494 l = 8; 495 while ((l << 2) < n) { 496 M_cftmdl(n, l, a); 497 l <<= 2; 498 } 499 } 500 if ((l << 2) == n) { 501 for (j = 0; j < l; j += 2) { 502 j1 = j + l; 503 j2 = j1 + l; 504 j3 = j2 + l; 505 x0r = a[j] + a[j1]; 506 x0i = a[j + 1] + a[j1 + 1]; 507 x1r = a[j] - a[j1]; 508 x1i = a[j + 1] - a[j1 + 1]; 509 x2r = a[j2] + a[j3]; 510 x2i = a[j2 + 1] + a[j3 + 1]; 511 x3r = a[j2] - a[j3]; 512 x3i = a[j2 + 1] - a[j3 + 1]; 513 a[j] = x0r + x2r; 514 a[j + 1] = x0i + x2i; 515 a[j2] = x0r - x2r; 516 a[j2 + 1] = x0i - x2i; 517 a[j1] = x1r - x3i; 518 a[j1 + 1] = x1i + x3r; 519 a[j3] = x1r + x3i; 520 a[j3 + 1] = x1i - x3r; 521 } 522 } else { 523 for (j = 0; j < l; j += 2) { 524 j1 = j + l; 525 x0r = a[j] - a[j1]; 526 x0i = a[j + 1] - a[j1 + 1]; 527 a[j] += a[j1]; 528 a[j + 1] += a[j1 + 1]; 529 a[j1] = x0r; 530 a[j1 + 1] = x0i; 531 } 532 } 533 } 534 535 536 537 void M_cftbsub(int n, double *a) 538 { 539 int j, j1, j2, j3, l; 540 double x0r, x0i, x1r, x1i, x2r, x2i, x3r, x3i; 541 542 l = 2; 543 if (n > 8) { 544 M_cft1st(n, a); 545 l = 8; 546 while ((l << 2) < n) { 547 M_cftmdl(n, l, a); 548 l <<= 2; 549 } 550 } 551 if ((l << 2) == n) { 552 for (j = 0; j < l; j += 2) { 553 j1 = j + l; 554 j2 = j1 + l; 555 j3 = j2 + l; 556 x0r = a[j] + a[j1]; 557 x0i = -a[j + 1] - a[j1 + 1]; 558 x1r = a[j] - a[j1]; 559 x1i = -a[j + 1] + a[j1 + 1]; 560 x2r = a[j2] + a[j3]; 561 x2i = a[j2 + 1] + a[j3 + 1]; 562 x3r = a[j2] - a[j3]; 563 x3i = a[j2 + 1] - a[j3 + 1]; 564 a[j] = x0r + x2r; 565 a[j + 1] = x0i - x2i; 566 a[j2] = x0r - x2r; 567 a[j2 + 1] = x0i + x2i; 568 a[j1] = x1r - x3i; 569 a[j1 + 1] = x1i - x3r; 570 a[j3] = x1r + x3i; 571 a[j3 + 1] = x1i + x3r; 572 } 573 } else { 574 for (j = 0; j < l; j += 2) { 575 j1 = j + l; 576 x0r = a[j] - a[j1]; 577 x0i = -a[j + 1] + a[j1 + 1]; 578 a[j] += a[j1]; 579 a[j + 1] = -a[j + 1] - a[j1 + 1]; 580 a[j1] = x0r; 581 a[j1 + 1] = x0i; 582 } 583 } 584 } 585 586 587 588 void M_cft1st(int n, double *a) 589 { 590 int j, kj, kr; 591 double ew, wn4r, wk1r, wk1i, wk2r, wk2i, wk3r, wk3i; 592 double x0r, x0i, x1r, x1i, x2r, x2i, x3r, x3i; 593 594 x0r = a[0] + a[2]; 595 x0i = a[1] + a[3]; 596 x1r = a[0] - a[2]; 597 x1i = a[1] - a[3]; 598 x2r = a[4] + a[6]; 599 x2i = a[5] + a[7]; 600 x3r = a[4] - a[6]; 601 x3i = a[5] - a[7]; 602 a[0] = x0r + x2r; 603 a[1] = x0i + x2i; 604 a[4] = x0r - x2r; 605 a[5] = x0i - x2i; 606 a[2] = x1r - x3i; 607 a[3] = x1i + x3r; 608 a[6] = x1r + x3i; 609 a[7] = x1i - x3r; 610 wn4r = WR5000; 611 x0r = a[8] + a[10]; 612 x0i = a[9] + a[11]; 613 x1r = a[8] - a[10]; 614 x1i = a[9] - a[11]; 615 x2r = a[12] + a[14]; 616 x2i = a[13] + a[15]; 617 x3r = a[12] - a[14]; 618 x3i = a[13] - a[15]; 619 a[8] = x0r + x2r; 620 a[9] = x0i + x2i; 621 a[12] = x2i - x0i; 622 a[13] = x0r - x2r; 623 x0r = x1r - x3i; 624 x0i = x1i + x3r; 625 a[10] = wn4r * (x0r - x0i); 626 a[11] = wn4r * (x0r + x0i); 627 x0r = x3i + x1r; 628 x0i = x3r - x1i; 629 a[14] = wn4r * (x0i - x0r); 630 a[15] = wn4r * (x0i + x0r); 631 ew = MM_PI_2 / n; 632 kr = 0; 633 for (j = 16; j < n; j += 16) { 634 for (kj = n >> 2; kj > (kr ^= kj); kj >>= 1); 635 wk1r = cos(ew * kr); 636 wk1i = sin(ew * kr); 637 wk2r = 1 - 2 * wk1i * wk1i; 638 wk2i = 2 * wk1i * wk1r; 639 wk3r = wk1r - 2 * wk2i * wk1i; 640 wk3i = 2 * wk2i * wk1r - wk1i; 641 x0r = a[j] + a[j + 2]; 642 x0i = a[j + 1] + a[j + 3]; 643 x1r = a[j] - a[j + 2]; 644 x1i = a[j + 1] - a[j + 3]; 645 x2r = a[j + 4] + a[j + 6]; 646 x2i = a[j + 5] + a[j + 7]; 647 x3r = a[j + 4] - a[j + 6]; 648 x3i = a[j + 5] - a[j + 7]; 649 a[j] = x0r + x2r; 650 a[j + 1] = x0i + x2i; 651 x0r -= x2r; 652 x0i -= x2i; 653 a[j + 4] = wk2r * x0r - wk2i * x0i; 654 a[j + 5] = wk2r * x0i + wk2i * x0r; 655 x0r = x1r - x3i; 656 x0i = x1i + x3r; 657 a[j + 2] = wk1r * x0r - wk1i * x0i; 658 a[j + 3] = wk1r * x0i + wk1i * x0r; 659 x0r = x1r + x3i; 660 x0i = x1i - x3r; 661 a[j + 6] = wk3r * x0r - wk3i * x0i; 662 a[j + 7] = wk3r * x0i + wk3i * x0r; 663 x0r = wn4r * (wk1r - wk1i); 664 wk1i = wn4r * (wk1r + wk1i); 665 wk1r = x0r; 666 wk3r = wk1r - 2 * wk2r * wk1i; 667 wk3i = 2 * wk2r * wk1r - wk1i; 668 x0r = a[j + 8] + a[j + 10]; 669 x0i = a[j + 9] + a[j + 11]; 670 x1r = a[j + 8] - a[j + 10]; 671 x1i = a[j + 9] - a[j + 11]; 672 x2r = a[j + 12] + a[j + 14]; 673 x2i = a[j + 13] + a[j + 15]; 674 x3r = a[j + 12] - a[j + 14]; 675 x3i = a[j + 13] - a[j + 15]; 676 a[j + 8] = x0r + x2r; 677 a[j + 9] = x0i + x2i; 678 x0r -= x2r; 679 x0i -= x2i; 680 a[j + 12] = -wk2i * x0r - wk2r * x0i; 681 a[j + 13] = -wk2i * x0i + wk2r * x0r; 682 x0r = x1r - x3i; 683 x0i = x1i + x3r; 684 a[j + 10] = wk1r * x0r - wk1i * x0i; 685 a[j + 11] = wk1r * x0i + wk1i * x0r; 686 x0r = x1r + x3i; 687 x0i = x1i - x3r; 688 a[j + 14] = wk3r * x0r - wk3i * x0i; 689 a[j + 15] = wk3r * x0i + wk3i * x0r; 690 } 691 } 692 693 694 695 void M_cftmdl(int n, int l, double *a) 696 { 697 int j, j1, j2, j3, k, kj, kr, m, m2; 698 double ew, wn4r, wk1r, wk1i, wk2r, wk2i, wk3r, wk3i; 699 double x0r, x0i, x1r, x1i, x2r, x2i, x3r, x3i; 700 701 m = l << 2; 702 for (j = 0; j < l; j += 2) { 703 j1 = j + l; 704 j2 = j1 + l; 705 j3 = j2 + l; 706 x0r = a[j] + a[j1]; 707 x0i = a[j + 1] + a[j1 + 1]; 708 x1r = a[j] - a[j1]; 709 x1i = a[j + 1] - a[j1 + 1]; 710 x2r = a[j2] + a[j3]; 711 x2i = a[j2 + 1] + a[j3 + 1]; 712 x3r = a[j2] - a[j3]; 713 x3i = a[j2 + 1] - a[j3 + 1]; 714 a[j] = x0r + x2r; 715 a[j + 1] = x0i + x2i; 716 a[j2] = x0r - x2r; 717 a[j2 + 1] = x0i - x2i; 718 a[j1] = x1r - x3i; 719 a[j1 + 1] = x1i + x3r; 720 a[j3] = x1r + x3i; 721 a[j3 + 1] = x1i - x3r; 722 } 723 wn4r = WR5000; 724 for (j = m; j < l + m; j += 2) { 725 j1 = j + l; 726 j2 = j1 + l; 727 j3 = j2 + l; 728 x0r = a[j] + a[j1]; 729 x0i = a[j + 1] + a[j1 + 1]; 730 x1r = a[j] - a[j1]; 731 x1i = a[j + 1] - a[j1 + 1]; 732 x2r = a[j2] + a[j3]; 733 x2i = a[j2 + 1] + a[j3 + 1]; 734 x3r = a[j2] - a[j3]; 735 x3i = a[j2 + 1] - a[j3 + 1]; 736 a[j] = x0r + x2r; 737 a[j + 1] = x0i + x2i; 738 a[j2] = x2i - x0i; 739 a[j2 + 1] = x0r - x2r; 740 x0r = x1r - x3i; 741 x0i = x1i + x3r; 742 a[j1] = wn4r * (x0r - x0i); 743 a[j1 + 1] = wn4r * (x0r + x0i); 744 x0r = x3i + x1r; 745 x0i = x3r - x1i; 746 a[j3] = wn4r * (x0i - x0r); 747 a[j3 + 1] = wn4r * (x0i + x0r); 748 } 749 ew = MM_PI_2 / n; 750 kr = 0; 751 m2 = 2 * m; 752 for (k = m2; k < n; k += m2) { 753 for (kj = n >> 2; kj > (kr ^= kj); kj >>= 1); 754 wk1r = cos(ew * kr); 755 wk1i = sin(ew * kr); 756 wk2r = 1 - 2 * wk1i * wk1i; 757 wk2i = 2 * wk1i * wk1r; 758 wk3r = wk1r - 2 * wk2i * wk1i; 759 wk3i = 2 * wk2i * wk1r - wk1i; 760 for (j = k; j < l + k; j += 2) { 761 j1 = j + l; 762 j2 = j1 + l; 763 j3 = j2 + l; 764 x0r = a[j] + a[j1]; 765 x0i = a[j + 1] + a[j1 + 1]; 766 x1r = a[j] - a[j1]; 767 x1i = a[j + 1] - a[j1 + 1]; 768 x2r = a[j2] + a[j3]; 769 x2i = a[j2 + 1] + a[j3 + 1]; 770 x3r = a[j2] - a[j3]; 771 x3i = a[j2 + 1] - a[j3 + 1]; 772 a[j] = x0r + x2r; 773 a[j + 1] = x0i + x2i; 774 x0r -= x2r; 775 x0i -= x2i; 776 a[j2] = wk2r * x0r - wk2i * x0i; 777 a[j2 + 1] = wk2r * x0i + wk2i * x0r; 778 x0r = x1r - x3i; 779 x0i = x1i + x3r; 780 a[j1] = wk1r * x0r - wk1i * x0i; 781 a[j1 + 1] = wk1r * x0i + wk1i * x0r; 782 x0r = x1r + x3i; 783 x0i = x1i - x3r; 784 a[j3] = wk3r * x0r - wk3i * x0i; 785 a[j3 + 1] = wk3r * x0i + wk3i * x0r; 786 } 787 x0r = wn4r * (wk1r - wk1i); 788 wk1i = wn4r * (wk1r + wk1i); 789 wk1r = x0r; 790 wk3r = wk1r - 2 * wk2r * wk1i; 791 wk3i = 2 * wk2r * wk1r - wk1i; 792 for (j = k + m; j < l + (k + m); j += 2) { 793 j1 = j + l; 794 j2 = j1 + l; 795 j3 = j2 + l; 796 x0r = a[j] + a[j1]; 797 x0i = a[j + 1] + a[j1 + 1]; 798 x1r = a[j] - a[j1]; 799 x1i = a[j + 1] - a[j1 + 1]; 800 x2r = a[j2] + a[j3]; 801 x2i = a[j2 + 1] + a[j3 + 1]; 802 x3r = a[j2] - a[j3]; 803 x3i = a[j2 + 1] - a[j3 + 1]; 804 a[j] = x0r + x2r; 805 a[j + 1] = x0i + x2i; 806 x0r -= x2r; 807 x0i -= x2i; 808 a[j2] = -wk2i * x0r - wk2r * x0i; 809 a[j2 + 1] = -wk2i * x0i + wk2r * x0r; 810 x0r = x1r - x3i; 811 x0i = x1i + x3r; 812 a[j1] = wk1r * x0r - wk1i * x0i; 813 a[j1 + 1] = wk1r * x0i + wk1i * x0r; 814 x0r = x1r + x3i; 815 x0i = x1i - x3r; 816 a[j3] = wk3r * x0r - wk3i * x0i; 817 a[j3 + 1] = wk3r * x0i + wk3i * x0r; 818 } 819 } 820 } 821 822 823 824 void M_rftfsub(int n, double *a) 825 { 826 int i, i0, j, k; 827 double ec, w1r, w1i, wkr, wki, wdr, wdi, ss, xr, xi, yr, yi; 828 829 ec = 2 * MM_PI_2 / n; 830 wkr = 0; 831 wki = 0; 832 wdi = cos(ec); 833 wdr = sin(ec); 834 wdi *= wdr; 835 wdr *= wdr; 836 w1r = 1 - 2 * wdr; 837 w1i = 2 * wdi; 838 ss = 2 * w1i; 839 i = n >> 1; 840 while (1) { 841 i0 = i - 4 * RDFT_LOOP_DIV; 842 if (i0 < 4) { 843 i0 = 4; 844 } 845 for (j = i - 4; j >= i0; j -= 4) { 846 k = n - j; 847 xr = a[j + 2] - a[k - 2]; 848 xi = a[j + 3] + a[k - 1]; 849 yr = wdr * xr - wdi * xi; 850 yi = wdr * xi + wdi * xr; 851 a[j + 2] -= yr; 852 a[j + 3] -= yi; 853 a[k - 2] += yr; 854 a[k - 1] -= yi; 855 wkr += ss * wdi; 856 wki += ss * (0.5 - wdr); 857 xr = a[j] - a[k]; 858 xi = a[j + 1] + a[k + 1]; 859 yr = wkr * xr - wki * xi; 860 yi = wkr * xi + wki * xr; 861 a[j] -= yr; 862 a[j + 1] -= yi; 863 a[k] += yr; 864 a[k + 1] -= yi; 865 wdr += ss * wki; 866 wdi += ss * (0.5 - wkr); 867 } 868 if (i0 == 4) { 869 break; 870 } 871 wkr = 0.5 * sin(ec * i0); 872 wki = 0.5 * cos(ec * i0); 873 wdr = 0.5 - (wkr * w1r - wki * w1i); 874 wdi = wkr * w1i + wki * w1r; 875 wkr = 0.5 - wkr; 876 i = i0; 877 } 878 xr = a[2] - a[n - 2]; 879 xi = a[3] + a[n - 1]; 880 yr = wdr * xr - wdi * xi; 881 yi = wdr * xi + wdi * xr; 882 a[2] -= yr; 883 a[3] -= yi; 884 a[n - 2] += yr; 885 a[n - 1] -= yi; 886 } 887 888 889 890 void M_rftbsub(int n, double *a) 891 { 892 int i, i0, j, k; 893 double ec, w1r, w1i, wkr, wki, wdr, wdi, ss, xr, xi, yr, yi; 894 895 ec = 2 * MM_PI_2 / n; 896 wkr = 0; 897 wki = 0; 898 wdi = cos(ec); 899 wdr = sin(ec); 900 wdi *= wdr; 901 wdr *= wdr; 902 w1r = 1 - 2 * wdr; 903 w1i = 2 * wdi; 904 ss = 2 * w1i; 905 i = n >> 1; 906 a[i + 1] = -a[i + 1]; 907 while (1) { 908 i0 = i - 4 * RDFT_LOOP_DIV; 909 if (i0 < 4) { 910 i0 = 4; 911 } 912 for (j = i - 4; j >= i0; j -= 4) { 913 k = n - j; 914 xr = a[j + 2] - a[k - 2]; 915 xi = a[j + 3] + a[k - 1]; 916 yr = wdr * xr + wdi * xi; 917 yi = wdr * xi - wdi * xr; 918 a[j + 2] -= yr; 919 a[j + 3] = yi - a[j + 3]; 920 a[k - 2] += yr; 921 a[k - 1] = yi - a[k - 1]; 922 wkr += ss * wdi; 923 wki += ss * (0.5 - wdr); 924 xr = a[j] - a[k]; 925 xi = a[j + 1] + a[k + 1]; 926 yr = wkr * xr + wki * xi; 927 yi = wkr * xi - wki * xr; 928 a[j] -= yr; 929 a[j + 1] = yi - a[j + 1]; 930 a[k] += yr; 931 a[k + 1] = yi - a[k + 1]; 932 wdr += ss * wki; 933 wdi += ss * (0.5 - wkr); 934 } 935 if (i0 == 4) { 936 break; 937 } 938 wkr = 0.5 * sin(ec * i0); 939 wki = 0.5 * cos(ec * i0); 940 wdr = 0.5 - (wkr * w1r - wki * w1i); 941 wdi = wkr * w1i + wki * w1r; 942 wkr = 0.5 - wkr; 943 i = i0; 944 } 945 xr = a[2] - a[n - 2]; 946 xi = a[3] + a[n - 1]; 947 yr = wdr * xr + wdi * xi; 948 yi = wdr * xi - wdi * xr; 949 a[2] -= yr; 950 a[3] = yi - a[3]; 951 a[n - 2] += yr; 952 a[n - 1] = yi - a[n - 1]; 953 a[1] = -a[1]; 954 } 955 956