xref: /haiku/src/libs/mapm/mapmfmul.c (revision 59d799dabcba86f92658ddb402f634e262d9aae7)
1 
2 /*
3  *  M_APM  -  mapmfmul.c
4  *
5  *  Copyright (C) 1999 - 2007   Michael C. Ring
6  *
7  *  Permission to use, copy, and distribute this software and its
8  *  documentation for any purpose with or without fee is hereby granted,
9  *  provided that the above copyright notice appear in all copies and
10  *  that both that copyright notice and this permission notice appear
11  *  in supporting documentation.
12  *
13  *  Permission to modify the software is granted. Permission to distribute
14  *  the modified code is granted. Modifications are to be distributed by
15  *  using the file 'license.txt' as a template to modify the file header.
16  *  'license.txt' is available in the official MAPM distribution.
17  *
18  *  This software is provided "as is" without express or implied warranty.
19  */
20 
21 /*
22  *      $Id: mapmfmul.c,v 1.33 2007/12/03 01:52:22 mike Exp $
23  *
24  *      This file contains the divide-and-conquer FAST MULTIPLICATION
25  *	function as well as its support functions.
26  *
27  *      $Log: mapmfmul.c,v $
28  *      Revision 1.33  2007/12/03 01:52:22  mike
29  *      Update license
30  *
31  *      Revision 1.32  2004/02/18 03:16:15  mike
32  *      optimize 4 byte multiply (when FFT is disabled)
33  *
34  *      Revision 1.31  2003/12/04 01:14:06  mike
35  *      redo math on 'borrow'
36  *
37  *      Revision 1.30  2003/07/21 20:34:18  mike
38  *      Modify error messages to be in a consistent format.
39  *
40  *      Revision 1.29  2003/03/31 21:55:07  mike
41  *      call generic error handling function
42  *
43  *      Revision 1.28  2002/11/03 22:38:15  mike
44  *      Updated function parameters to use the modern style
45  *
46  *      Revision 1.27  2002/02/14 19:53:32  mike
47  *      add conditional compiler option to disable use
48  *      of FFT multiply if the user so chooses.
49  *
50  *      Revision 1.26  2001/07/26 20:56:38  mike
51  *      fix comment, no code change
52  *
53  *      Revision 1.25  2001/07/16 19:43:45  mike
54  *      add function M_free_all_fmul
55  *
56  *      Revision 1.24  2001/02/11 22:34:47  mike
57  *      modify parameters to REALLOC
58  *
59  *      Revision 1.23  2000/10/20 19:23:26  mike
60  *      adjust power_of_2 function so it should work with
61  *      64 bit processors and beyond.
62  *
63  *      Revision 1.22  2000/08/23 22:27:34  mike
64  *      no real code change, re-named 2 local functions
65  *      so they make more sense.
66  *
67  *      Revision 1.21  2000/08/01 22:24:38  mike
68  *      use sizeof(int) function call to stop some
69  *      compilers from complaining.
70  *
71  *      Revision 1.20  2000/07/19 17:12:00  mike
72  *      lower the number of bytes that the FFT can handle. worst case
73  *      testing indicated math overflow when >= 1048576
74  *
75  *      Revision 1.19  2000/07/08 18:29:03  mike
76  *      increase define so FFT can handle bigger numbers
77  *
78  *      Revision 1.18  2000/07/06 23:20:12  mike
79  *      changed my mind. use static local MAPM numbers
80  *      for temp data storage
81  *
82  *      Revision 1.17  2000/07/06 20:52:34  mike
83  *      use init function to get local writable copies
84  *      instead of using the stack
85  *
86  *      Revision 1.16  2000/07/04 17:25:09  mike
87  *      guarantee 16 bit compilers still work OK
88  *
89  *      Revision 1.15  2000/07/04 15:40:02  mike
90  *      add call to use FFT algorithm
91  *
92  *      Revision 1.14  2000/05/05 21:10:46  mike
93  *      add comment indicating availability of assembly language
94  *      version of M_4_byte_multiply for Linux on x86 platforms.
95  *
96  *      Revision 1.13  2000/04/20 19:30:45  mike
97  *      minor optimization to 4 byte multiply
98  *
99  *      Revision 1.12  2000/04/14 15:39:30  mike
100  *      optimize the fast multiply function. don't re-curse down
101  *      to a size of 1. recurse down to a size of '4' and then
102  *      call a special 4 byte multiply function.
103  *
104  *      Revision 1.11  2000/02/03 23:02:13  mike
105  *      put in RCS for real...
106  *
107  *      Revision 1.10  2000/02/03 22:59:08  mike
108  *      remove the extra recursive function. not needed any
109  *      longer since all current compilers should not have
110  *      any problem with true recursive calls.
111  *
112  *      Revision 1.9  2000/02/03 22:47:39  mike
113  *      use MAPM_* generic memory function
114  *
115  *      Revision 1.8  1999/09/19 21:13:44  mike
116  *      eliminate unneeded local int in _split
117  *
118  *      Revision 1.7  1999/08/12 22:36:23  mike
119  *      move the 3 'simple' function to the top of file
120  *      so GCC can in-line the code.
121  *
122  *      Revision 1.6  1999/08/12 22:01:14  mike
123  *      more minor optimizations
124  *
125  *      Revision 1.5  1999/08/12 02:02:06  mike
126  *      minor optimization
127  *
128  *      Revision 1.4  1999/08/10 22:51:59  mike
129  *      minor tweak
130  *
131  *      Revision 1.3  1999/08/10 00:45:47  mike
132  *      added more comments and a few minor tweaks
133  *
134  *      Revision 1.2  1999/08/09 02:50:02  mike
135  *      add some comments
136  *
137  *      Revision 1.1  1999/08/08 18:27:57  mike
138  *      Initial revision
139  */
140 
141 #include "m_apm_lc.h"
142 
143 static int M_firsttimef = TRUE;
144 
145 /*
146  *      specify the max size the FFT routine can handle
147  *      (in MAPM, #digits = 2 * #bytes)
148  *
149  *      this number *must* be an exact power of 2.
150  *
151  *      **WORST** case input numbers (all 9's) has shown that
152  *	the FFT math will overflow if the #define here is
153  *      >= 1048576. On my system, 524,288 worked OK. I will
154  *      factor down another factor of 2 to safeguard against
155  *	other computers have less precise floating point math.
156  *	If you are confident in your system, 524288 will
157  *	theoretically work fine.
158  *
159  *      the define here allows the FFT algorithm to multiply two
160  *      524,288 digit numbers yielding a 1,048,576 digit result.
161  */
162 
163 #define MAX_FFT_BYTES 262144
164 
165 /*
166  *      the Divide-and-Conquer multiplication kicks in when the size of
167  *	the numbers exceed the capability of the FFT (#define just above).
168  *
169  *	#bytes    D&C call depth
170  *	------    --------------
171  *       512K           1
172  *        1M            2
173  *        2M            3
174  *        4M            4
175  *       ...           ...
176  *    2.1990E+12       23
177  *
178  *	the following stack sizes are sized to meet the
179  *      above 2.199E+12 example, though I wouldn't want to
180  *	wait for it to finish...
181  *
182  *      Each call requires 7 stack variables to be saved so
183  *	we need a stack depth of 23 * 7 + PAD.  (we use 164)
184  *
185  *      For 'exp_stack', 3 integers also are required to be saved
186  *      for each recursive call so we need a stack depth of
187  *      23 * 3 + PAD. (we use 72)
188  *
189  *
190  *      If the FFT multiply is disabled, resize the arrays
191  *	as follows:
192  *
193  *      the following stack sizes are sized to meet the
194  *      worst case expected assuming we are multiplying
195  *      numbers with 2.14E+9 (2 ^ 31) digits.
196  *
197  *      For sizeof(int) == 4 (32 bits) there may be up to 32 recursive
198  *      calls. Each call requires 7 stack variables so we need a
199  *      stack depth of 32 * 7 + PAD.  (we use 240)
200  *
201  *      For 'exp_stack', 3 integers also are required to be saved
202  *      for each recursive call so we need a stack depth of
203  *      32 * 3 + PAD. (we use 100)
204  */
205 
206 #ifdef NO_FFT_MULTIPLY
207 #define M_STACK_SIZE 240
208 #define M_ISTACK_SIZE 100
209 #else
210 #define M_STACK_SIZE 164
211 #define M_ISTACK_SIZE 72
212 #endif
213 
214 static int    exp_stack[M_ISTACK_SIZE];
215 static int    exp_stack_ptr;
216 
217 static UCHAR  *mul_stack_data[M_STACK_SIZE];
218 static int    mul_stack_data_size[M_STACK_SIZE];
219 static int    M_mul_stack_ptr;
220 
221 static UCHAR  *fmul_a1, *fmul_a0, *fmul_a9, *fmul_b1, *fmul_b0,
222 	      *fmul_b9, *fmul_t0;
223 
224 static int    size_flag, bit_limit, stmp, itmp, mii;
225 
226 static M_APM  M_ain;
227 static M_APM  M_bin;
228 
229 static char   *M_stack_ptr_error_msg = "\'M_get_stack_ptr\', Out of memory";
230 
231 extern void   M_fast_multiply(M_APM, M_APM, M_APM);
232 extern void   M_fmul_div_conq(UCHAR *, UCHAR *, UCHAR *, int);
233 extern void   M_fmul_add(UCHAR *, UCHAR *, int, int);
234 extern int    M_fmul_subtract(UCHAR *, UCHAR *, UCHAR *, int);
235 extern void   M_fmul_split(UCHAR *, UCHAR *, UCHAR *, int);
236 extern int    M_next_power_of_2(int);
237 extern int    M_get_stack_ptr(int);
238 extern void   M_push_mul_int(int);
239 extern int    M_pop_mul_int(void);
240 
241 #ifdef NO_FFT_MULTIPLY
242 extern void   M_4_byte_multiply(UCHAR *, UCHAR *, UCHAR *);
243 #else
244 extern void   M_fast_mul_fft(UCHAR *, UCHAR *, UCHAR *, int);
245 #endif
246 
247 /*
248  *      the following algorithm is used in this fast multiply routine
249  *	(sometimes called the divide-and-conquer technique.)
250  *
251  *	assume we have 2 numbers (a & b) with 2N digits.
252  *
253  *      let : a = (2^N) * A1 + A0 , b = (2^N) * B1 + B0
254  *
255  *	where 'A1' is the 'most significant half' of 'a' and
256  *      'A0' is the 'least significant half' of 'a'. Same for
257  *	B1 and B0.
258  *
259  *	Now use the identity :
260  *
261  *               2N   N            N                    N
262  *	ab  =  (2  + 2 ) A1B1  +  2 (A1-A0)(B0-B1)  + (2 + 1)A0B0
263  *
264  *
265  *      The original problem of multiplying 2 (2N) digit numbers has
266  *	been reduced to 3 multiplications of N digit numbers plus some
267  *	additions, subtractions, and shifts.
268  *
269  *	The fast multiplication algorithm used here uses the above
270  *	identity in a recursive process. This algorithm results in
271  *	O(n ^ 1.585) growth.
272  */
273 
274 
275 /****************************************************************************/
M_free_all_fmul()276 void	M_free_all_fmul()
277 {
278 int	k;
279 
280 if (M_firsttimef == FALSE)
281   {
282    m_apm_free(M_ain);
283    m_apm_free(M_bin);
284 
285    for (k=0; k < M_STACK_SIZE; k++)
286      {
287       if (mul_stack_data_size[k] != 0)
288         {
289          MAPM_FREE(mul_stack_data[k]);
290 	}
291      }
292 
293    M_firsttimef = TRUE;
294   }
295 }
296 /****************************************************************************/
M_push_mul_int(int val)297 void	M_push_mul_int(int val)
298 {
299 exp_stack[++exp_stack_ptr] = val;
300 }
301 /****************************************************************************/
M_pop_mul_int()302 int	M_pop_mul_int()
303 {
304 return(exp_stack[exp_stack_ptr--]);
305 }
306 /****************************************************************************/
M_fmul_split(UCHAR * x1,UCHAR * x0,UCHAR * xin,int nbytes)307 void   	M_fmul_split(UCHAR *x1, UCHAR *x0, UCHAR *xin, int nbytes)
308 {
309 memcpy(x1, xin, nbytes);
310 memcpy(x0, (xin + nbytes), nbytes);
311 }
312 /****************************************************************************/
M_fast_multiply(M_APM rr,M_APM aa,M_APM bb)313 void	M_fast_multiply(M_APM rr, M_APM aa, M_APM bb)
314 {
315 void	*vp;
316 int	ii, k, nexp, sign;
317 
318 if (M_firsttimef)
319   {
320    M_firsttimef = FALSE;
321 
322    for (k=0; k < M_STACK_SIZE; k++)
323      mul_stack_data_size[k] = 0;
324 
325    size_flag = M_get_sizeof_int();
326    bit_limit = 8 * size_flag + 1;
327 
328    M_ain = m_apm_init();
329    M_bin = m_apm_init();
330   }
331 
332 exp_stack_ptr   = -1;
333 M_mul_stack_ptr = -1;
334 
335 m_apm_copy(M_ain, aa);
336 m_apm_copy(M_bin, bb);
337 
338 sign = M_ain->m_apm_sign * M_bin->m_apm_sign;
339 nexp = M_ain->m_apm_exponent + M_bin->m_apm_exponent;
340 
341 if (M_ain->m_apm_datalength >= M_bin->m_apm_datalength)
342   ii = M_ain->m_apm_datalength;
343 else
344   ii = M_bin->m_apm_datalength;
345 
346 ii = (ii + 1) >> 1;
347 ii = M_next_power_of_2(ii);
348 
349 /* Note: 'ii' must be >= 4 here. this is guaranteed
350    by the caller: m_apm_multiply
351 */
352 
353 k = 2 * ii;                   /* required size of result, in bytes  */
354 
355 M_apm_pad(M_ain, k);          /* fill out the data so the number of */
356 M_apm_pad(M_bin, k);          /* bytes is an exact power of 2       */
357 
358 if (k > rr->m_apm_malloclength)
359   {
360    if ((vp = MAPM_REALLOC(rr->m_apm_data, (k + 32))) == NULL)
361      {
362       /* fatal, this does not return */
363 
364       M_apm_log_error_msg(M_APM_FATAL, "\'M_fast_multiply\', Out of memory");
365      }
366 
367    rr->m_apm_malloclength = k + 28;
368    rr->m_apm_data = (UCHAR *)vp;
369   }
370 
371 #ifdef NO_FFT_MULTIPLY
372 
373 M_fmul_div_conq(rr->m_apm_data, M_ain->m_apm_data,
374                                 M_bin->m_apm_data, ii);
375 #else
376 
377 /*
378  *     if the numbers are *really* big, use the divide-and-conquer
379  *     routine first until the numbers are small enough to be handled
380  *     by the FFT algorithm. If the numbers are already small enough,
381  *     call the FFT multiplication now.
382  *
383  *     Note that 'ii' here is (and must be) an exact power of 2.
384  */
385 
386 if (size_flag == 2)   /* if still using 16 bit compilers .... */
387   {
388    M_fast_mul_fft(rr->m_apm_data, M_ain->m_apm_data,
389                                   M_bin->m_apm_data, ii);
390   }
391 else                  /* >= 32 bit compilers */
392   {
393    if (ii > (MAX_FFT_BYTES + 2))
394      {
395       M_fmul_div_conq(rr->m_apm_data, M_ain->m_apm_data,
396                                       M_bin->m_apm_data, ii);
397      }
398    else
399      {
400       M_fast_mul_fft(rr->m_apm_data, M_ain->m_apm_data,
401                                      M_bin->m_apm_data, ii);
402      }
403   }
404 
405 #endif
406 
407 rr->m_apm_sign       = sign;
408 rr->m_apm_exponent   = nexp;
409 rr->m_apm_datalength = 4 * ii;
410 
411 M_apm_normalize(rr);
412 }
413 /****************************************************************************/
414 /*
415  *      This is the recursive function to perform the multiply. The
416  *      design intent here is to have no local variables. Any local
417  *      data that needs to be saved is saved on one of the two stacks.
418  */
M_fmul_div_conq(UCHAR * rr,UCHAR * aa,UCHAR * bb,int sz)419 void	M_fmul_div_conq(UCHAR *rr, UCHAR *aa, UCHAR *bb, int sz)
420 {
421 
422 #ifdef NO_FFT_MULTIPLY
423 
424 if (sz == 4)                /* multiply 4x4 yielding an 8 byte result */
425   {
426    M_4_byte_multiply(rr, aa, bb);
427    return;
428   }
429 
430 #else
431 
432 /*
433  *  if the numbers are now small enough, let the FFT algorithm
434  *  finish up.
435  */
436 
437 if (sz == MAX_FFT_BYTES)
438   {
439    M_fast_mul_fft(rr, aa, bb, sz);
440    return;
441   }
442 
443 #endif
444 
445 memset(rr, 0, (2 * sz));    /* zero out the result */
446 mii = sz >> 1;
447 
448 itmp = M_get_stack_ptr(mii);
449 M_push_mul_int(itmp);
450 
451 fmul_a1 = mul_stack_data[itmp];
452 
453 itmp    = M_get_stack_ptr(mii);
454 fmul_a0 = mul_stack_data[itmp];
455 
456 itmp    = M_get_stack_ptr(2 * sz);
457 fmul_a9 = mul_stack_data[itmp];
458 
459 itmp    = M_get_stack_ptr(mii);
460 fmul_b1 = mul_stack_data[itmp];
461 
462 itmp    = M_get_stack_ptr(mii);
463 fmul_b0 = mul_stack_data[itmp];
464 
465 itmp    = M_get_stack_ptr(2 * sz);
466 fmul_b9 = mul_stack_data[itmp];
467 
468 itmp    = M_get_stack_ptr(2 * sz);
469 fmul_t0 = mul_stack_data[itmp];
470 
471 M_fmul_split(fmul_a1, fmul_a0, aa, mii);
472 M_fmul_split(fmul_b1, fmul_b0, bb, mii);
473 
474 stmp  = M_fmul_subtract(fmul_a9, fmul_a1, fmul_a0, mii);
475 stmp *= M_fmul_subtract(fmul_b9, fmul_b0, fmul_b1, mii);
476 
477 M_push_mul_int(stmp);
478 M_push_mul_int(mii);
479 
480 M_fmul_div_conq(fmul_t0, fmul_a0, fmul_b0, mii);
481 
482 mii  = M_pop_mul_int();
483 stmp = M_pop_mul_int();
484 itmp = M_pop_mul_int();
485 
486 M_push_mul_int(itmp);
487 M_push_mul_int(stmp);
488 M_push_mul_int(mii);
489 
490 /*   to restore all stack variables ...
491 fmul_a1 = mul_stack_data[itmp];
492 fmul_a0 = mul_stack_data[itmp+1];
493 fmul_a9 = mul_stack_data[itmp+2];
494 fmul_b1 = mul_stack_data[itmp+3];
495 fmul_b0 = mul_stack_data[itmp+4];
496 fmul_b9 = mul_stack_data[itmp+5];
497 fmul_t0 = mul_stack_data[itmp+6];
498 */
499 
500 fmul_a1 = mul_stack_data[itmp];
501 fmul_b1 = mul_stack_data[itmp+3];
502 fmul_t0 = mul_stack_data[itmp+6];
503 
504 memcpy((rr + sz), fmul_t0, sz);    /* first 'add', result is now zero */
505 				   /* so we just copy in the bytes    */
506 M_fmul_add(rr, fmul_t0, mii, sz);
507 
508 M_fmul_div_conq(fmul_t0, fmul_a1, fmul_b1, mii);
509 
510 mii  = M_pop_mul_int();
511 stmp = M_pop_mul_int();
512 itmp = M_pop_mul_int();
513 
514 M_push_mul_int(itmp);
515 M_push_mul_int(stmp);
516 M_push_mul_int(mii);
517 
518 fmul_a9 = mul_stack_data[itmp+2];
519 fmul_b9 = mul_stack_data[itmp+5];
520 fmul_t0 = mul_stack_data[itmp+6];
521 
522 M_fmul_add(rr, fmul_t0, 0, sz);
523 M_fmul_add(rr, fmul_t0, mii, sz);
524 
525 if (stmp != 0)
526   M_fmul_div_conq(fmul_t0, fmul_a9, fmul_b9, mii);
527 
528 mii  = M_pop_mul_int();
529 stmp = M_pop_mul_int();
530 itmp = M_pop_mul_int();
531 
532 fmul_t0 = mul_stack_data[itmp+6];
533 
534 /*
535  *  if the sign of (A1 - A0)(B0 - B1) is positive, ADD to
536  *  the result. if it is negative, SUBTRACT from the result.
537  */
538 
539 if (stmp < 0)
540   {
541    fmul_a9 = mul_stack_data[itmp+2];
542    fmul_b9 = mul_stack_data[itmp+5];
543 
544    memset(fmul_b9, 0, (2 * sz));
545    memcpy((fmul_b9 + mii), fmul_t0, sz);
546    M_fmul_subtract(fmul_a9, rr, fmul_b9, (2 * sz));
547    memcpy(rr, fmul_a9, (2 * sz));
548   }
549 
550 if (stmp > 0)
551   M_fmul_add(rr, fmul_t0, mii, sz);
552 
553 M_mul_stack_ptr -= 7;
554 }
555 /****************************************************************************/
556 /*
557  *	special addition function for use with the fast multiply operation
558  */
M_fmul_add(UCHAR * r,UCHAR * a,int offset,int sz)559 void    M_fmul_add(UCHAR *r, UCHAR *a, int offset, int sz)
560 {
561 int	i, j;
562 UCHAR   carry;
563 
564 carry = 0;
565 j = offset + sz;
566 i = sz;
567 
568 while (TRUE)
569   {
570    r[--j] += carry + a[--i];
571 
572    if (r[j] >= 100)
573      {
574       r[j] -= 100;
575       carry = 1;
576      }
577    else
578       carry = 0;
579 
580    if (i == 0)
581      break;
582   }
583 
584 if (carry)
585   {
586    while (TRUE)
587      {
588       r[--j] += 1;
589 
590       if (r[j] < 100)
591         break;
592 
593       r[j] -= 100;
594      }
595   }
596 }
597 /****************************************************************************/
598 /*
599  *	special subtraction function for use with the fast multiply operation
600  */
M_fmul_subtract(UCHAR * r,UCHAR * a,UCHAR * b,int sz)601 int     M_fmul_subtract(UCHAR *r, UCHAR *a, UCHAR *b, int sz)
602 {
603 int	k, jtmp, sflag, nb, borrow;
604 
605 nb    = sz;
606 sflag = 0;      /* sign flag: assume the numbers are equal */
607 
608 /*
609  *   find if a > b (so we perform a-b)
610  *   or      a < b (so we perform b-a)
611  */
612 
613 for (k=0; k < nb; k++)
614   {
615    if (a[k] < b[k])
616      {
617       sflag = -1;
618       break;
619      }
620 
621    if (a[k] > b[k])
622      {
623       sflag = 1;
624       break;
625      }
626   }
627 
628 if (sflag == 0)
629   {
630    memset(r, 0, nb);            /* zero out the result */
631   }
632 else
633   {
634    k = nb;
635    borrow = 0;
636 
637    while (TRUE)
638      {
639       k--;
640 
641       if (sflag == 1)
642         jtmp = (int)a[k] - ((int)b[k] + borrow);
643       else
644         jtmp = (int)b[k] - ((int)a[k] + borrow);
645 
646       if (jtmp >= 0)
647         {
648          r[k] = (UCHAR)jtmp;
649          borrow = 0;
650         }
651       else
652         {
653          r[k] = (UCHAR)(100 + jtmp);
654          borrow = 1;
655         }
656 
657       if (k == 0)
658         break;
659      }
660   }
661 
662 return(sflag);
663 }
664 /****************************************************************************/
M_next_power_of_2(int n)665 int     M_next_power_of_2(int n)
666 {
667 int     ct, k;
668 
669 if (n <= 2)
670   return(n);
671 
672 k  = 2;
673 ct = 0;
674 
675 while (TRUE)
676   {
677    if (k >= n)
678      break;
679 
680    k = k << 1;
681 
682    if (++ct == bit_limit)
683      {
684       /* fatal, this does not return */
685 
686       M_apm_log_error_msg(M_APM_FATAL,
687                "\'M_next_power_of_2\', ERROR :sizeof(int) too small ??");
688      }
689   }
690 
691 return(k);
692 }
693 /****************************************************************************/
M_get_stack_ptr(int sz)694 int	M_get_stack_ptr(int sz)
695 {
696 int	i, k;
697 UCHAR   *cp;
698 
699 k = ++M_mul_stack_ptr;
700 
701 /* if size is 0, just need to malloc and return */
702 if (mul_stack_data_size[k] == 0)
703   {
704    if ((i = sz) < 16)
705      i = 16;
706 
707    if ((cp = (UCHAR *)MAPM_MALLOC(i + 4)) == NULL)
708      {
709       /* fatal, this does not return */
710 
711       M_apm_log_error_msg(M_APM_FATAL, M_stack_ptr_error_msg);
712      }
713 
714    mul_stack_data[k]      = cp;
715    mul_stack_data_size[k] = i;
716   }
717 else        /* it has been malloc'ed, see if it's big enough */
718   {
719    if (sz > mul_stack_data_size[k])
720      {
721       cp = mul_stack_data[k];
722 
723       if ((cp = (UCHAR *)MAPM_REALLOC(cp, (sz + 4))) == NULL)
724         {
725          /* fatal, this does not return */
726 
727          M_apm_log_error_msg(M_APM_FATAL, M_stack_ptr_error_msg);
728         }
729 
730       mul_stack_data[k]      = cp;
731       mul_stack_data_size[k] = sz;
732      }
733   }
734 
735 return(k);
736 }
737 /****************************************************************************/
738 
739 #ifdef NO_FFT_MULTIPLY
740 
741 /*
742  *      multiply a 4 byte number by a 4 byte number
743  *      yielding an 8 byte result. each byte contains
744  *      a base 100 'digit', i.e.: range from 0-99.
745  *
746  *             MSB         LSB
747  *
748  *      a,b    [0] [1] [2] [3]
749  *   result    [0]  .....  [7]
750  */
751 
M_4_byte_multiply(UCHAR * r,UCHAR * a,UCHAR * b)752 void	M_4_byte_multiply(UCHAR *r, UCHAR *a, UCHAR *b)
753 {
754 int	      jj;
755 unsigned int  *ip, t1, rr[8];
756 
757 memset(rr, 0, (8 * sizeof(int)));        /* zero out result */
758 jj = 3;
759 ip = rr + 5;
760 
761 /*
762  *   loop for one number [b], un-roll the inner 'loop' [a]
763  *
764  *   accumulate partial sums in UINT array, release carries
765  *   and convert back to base 100 at the end
766  */
767 
768 while (1)
769   {
770    t1  = (unsigned int)b[jj];
771    ip += 2;
772 
773    *ip-- += t1 * a[3];
774    *ip-- += t1 * a[2];
775    *ip-- += t1 * a[1];
776    *ip   += t1 * a[0];
777 
778    if (jj-- == 0)
779      break;
780   }
781 
782 jj = 7;
783 
784 while (1)
785   {
786    t1 = rr[jj] / 100;
787    r[jj] = (UCHAR)(rr[jj] - 100 * t1);
788 
789    if (jj == 0)
790      break;
791 
792    rr[--jj] += t1;
793   }
794 }
795 
796 #endif
797 
798 /****************************************************************************/
799