1
2 /*
3 * M_APM - mapmfact.c
4 *
5 * Copyright (C) 1999 - 2007 Michael C. Ring
6 *
7 * Permission to use, copy, and distribute this software and its
8 * documentation for any purpose with or without fee is hereby granted,
9 * provided that the above copyright notice appear in all copies and
10 * that both that copyright notice and this permission notice appear
11 * in supporting documentation.
12 *
13 * Permission to modify the software is granted. Permission to distribute
14 * the modified code is granted. Modifications are to be distributed by
15 * using the file 'license.txt' as a template to modify the file header.
16 * 'license.txt' is available in the official MAPM distribution.
17 *
18 * This software is provided "as is" without express or implied warranty.
19 */
20
21 /*
22 * $Id: mapmfact.c,v 1.11 2007/12/03 01:51:50 mike Exp $
23 *
24 * This file contains the FACTORIAL function.
25 *
26 * $Log: mapmfact.c,v $
27 * Revision 1.11 2007/12/03 01:51:50 mike
28 * Update license
29 *
30 * Revision 1.10 2003/07/21 21:05:31 mike
31 * facilitate 'gcov' code coverage tool by making an array smaller
32 *
33 * Revision 1.9 2002/11/03 21:27:28 mike
34 * Updated function parameters to use the modern style
35 *
36 * Revision 1.8 2000/06/14 20:36:16 mike
37 * increase size of DOS array
38 *
39 * Revision 1.7 2000/05/29 13:15:59 mike
40 * minor tweaks, fixed comment
41 *
42 * Revision 1.6 2000/05/26 16:39:03 mike
43 * minor update to comments and code
44 *
45 * Revision 1.5 2000/05/25 23:12:53 mike
46 * change 'nd' calculation
47 *
48 * Revision 1.4 2000/05/25 22:17:45 mike
49 * implement new algorithm for speed. approx 5 - 10X
50 * faster on my PC when N! is large (> 6000)
51 *
52 * Revision 1.3 1999/06/19 21:25:21 mike
53 * changed local static variables to MAPM stack variables
54 *
55 * Revision 1.2 1999/05/23 18:21:12 mike
56 * minor variable name tweaks
57 *
58 * Revision 1.1 1999/05/15 21:06:11 mike
59 * Initial revision
60 */
61
62 /*
63 * Brief explanation of the factorial algorithm.
64 * ----------------------------------------------
65 *
66 * The old algorithm simply multiplied N * (N-1) * (N-2) etc, until
67 * the number counted down to '2'. So one term of the multiplication
68 * kept getting bigger while multiplying by the next number in the
69 * sequence.
70 *
71 * The new algorithm takes advantage of the fast multiplication
72 * algorithm. The "ideal" setup for fast multiplication is when
73 * both numbers have approx the same number of significant digits
74 * and the number of digits is very near (but not over) an exact
75 * power of 2.
76 *
77 * So, we will multiply N * (N-1) * (N-2), etc until the number of
78 * significant digits is approx 256.
79 *
80 * Store this temp product into an array.
81 *
82 * Then we will multiply the next sequence until the number of
83 * significant digits is approx 256.
84 *
85 * Store this temp product into the next element of the array.
86 *
87 * Continue until we've counted down to 2.
88 *
89 * We now have an array of numbers with approx the same number
90 * of digits (except for the last element, depending on where it
91 * ended.) Now multiply each of the array elements together to
92 * get the final product.
93 *
94 * The array multiplies are done as follows (assume we used 11
95 * array elements for this example, indicated by [0] - [10] ) :
96 *
97 * initial iter-1 iter-2 iter-3 iter-4
98 *
99 * [0]
100 * * -> [0]
101 * [1]
102 * * -> [0]
103 *
104 * [2]
105 * * -> [1]
106 * [3]
107 * * -> [0]
108 *
109 * [4]
110 * * -> [2]
111 * [5]
112 *
113 * * -> [1]
114 *
115 * [6]
116 * * -> [3] * -> [0]
117 * [7]
118 *
119 *
120 * [8]
121 * * -> [4]
122 * [9]
123 * * -> [2] -> [1]
124 *
125 *
126 * [10] -> [5]
127 *
128 */
129
130 #include "m_apm_lc.h"
131
132 /* define size of local array for temp storage */
133
134 #define NDIM 32
135
136 /****************************************************************************/
m_apm_factorial(M_APM moutput,M_APM minput)137 void m_apm_factorial(M_APM moutput, M_APM minput)
138 {
139 int ii, nmul, ndigits, nd, jj, kk, mm, ct;
140 M_APM array[NDIM];
141 M_APM iprod1, iprod2, tmp1, tmp2;
142
143 /* return 1 for any input <= 1 */
144
145 if (m_apm_compare(minput, MM_One) <= 0)
146 {
147 m_apm_copy(moutput, MM_One);
148 return;
149 }
150
151 ct = 0;
152 mm = NDIM - 2;
153 ndigits = 256;
154 nd = ndigits - 20;
155 tmp1 = m_apm_init();
156 tmp2 = m_apm_init();
157 iprod1 = m_apm_init();
158 iprod2 = m_apm_init();
159 array[0] = m_apm_init();
160
161 m_apm_copy(tmp2, minput);
162
163 /* loop until multiply count-down has reached '2' */
164
165 while (TRUE)
166 {
167 m_apm_copy(iprod1, MM_One);
168
169 /*
170 * loop until the number of significant digits in this
171 * partial result is slightly less than 256
172 */
173
174 while (TRUE)
175 {
176 m_apm_multiply(iprod2, iprod1, tmp2);
177
178 m_apm_subtract(tmp1, tmp2, MM_One);
179
180 m_apm_multiply(iprod1, iprod2, tmp1);
181
182 /*
183 * I know, I know. There just isn't a *clean* way
184 * to break out of 2 nested loops.
185 */
186
187 if (m_apm_compare(tmp1, MM_Two) <= 0)
188 goto PHASE2;
189
190 m_apm_subtract(tmp2, tmp1, MM_One);
191
192 if (iprod1->m_apm_datalength > nd)
193 break;
194 }
195
196 if (ct == (NDIM - 1))
197 {
198 /*
199 * if the array has filled up, start multiplying
200 * some of the partial products now.
201 */
202
203 m_apm_copy(tmp1, array[mm]);
204 m_apm_multiply(array[mm], iprod1, tmp1);
205
206 if (mm == 0)
207 {
208 mm = NDIM - 2;
209 ndigits = ndigits << 1;
210 nd = ndigits - 20;
211 }
212 else
213 mm--;
214 }
215 else
216 {
217 /*
218 * store this partial product in the array
219 * and allocate the next array element
220 */
221
222 m_apm_copy(array[ct], iprod1);
223 array[++ct] = m_apm_init();
224 }
225 }
226
227 PHASE2:
228
229 m_apm_copy(array[ct], iprod1);
230
231 kk = ct;
232
233 while (kk != 0)
234 {
235 ii = 0;
236 jj = 0;
237 nmul = (kk + 1) >> 1;
238
239 while (TRUE)
240 {
241 /* must use tmp var when ii,jj point to same element */
242
243 if (ii == 0)
244 {
245 m_apm_copy(tmp1, array[ii]);
246 m_apm_multiply(array[jj], tmp1, array[ii+1]);
247 }
248 else
249 m_apm_multiply(array[jj], array[ii], array[ii+1]);
250
251 if (++jj == nmul)
252 break;
253
254 ii += 2;
255 }
256
257 if ((kk & 1) == 0)
258 {
259 jj = kk >> 1;
260 m_apm_copy(array[jj], array[kk]);
261 }
262
263 kk = kk >> 1;
264 }
265
266 m_apm_copy(moutput, array[0]);
267
268 for (ii=0; ii <= ct; ii++)
269 {
270 m_apm_free(array[ii]);
271 }
272
273 m_apm_free(tmp1);
274 m_apm_free(tmp2);
275 m_apm_free(iprod1);
276 m_apm_free(iprod2);
277 }
278 /****************************************************************************/
279