1 2 /* 3 * M_APM - mapmfact.c 4 * 5 * Copyright (C) 1999 - 2007 Michael C. Ring 6 * 7 * Permission to use, copy, and distribute this software and its 8 * documentation for any purpose with or without fee is hereby granted, 9 * provided that the above copyright notice appear in all copies and 10 * that both that copyright notice and this permission notice appear 11 * in supporting documentation. 12 * 13 * Permission to modify the software is granted. Permission to distribute 14 * the modified code is granted. Modifications are to be distributed by 15 * using the file 'license.txt' as a template to modify the file header. 16 * 'license.txt' is available in the official MAPM distribution. 17 * 18 * This software is provided "as is" without express or implied warranty. 19 */ 20 21 /* 22 * $Id: mapmfact.c,v 1.11 2007/12/03 01:51:50 mike Exp $ 23 * 24 * This file contains the FACTORIAL function. 25 * 26 * $Log: mapmfact.c,v $ 27 * Revision 1.11 2007/12/03 01:51:50 mike 28 * Update license 29 * 30 * Revision 1.10 2003/07/21 21:05:31 mike 31 * facilitate 'gcov' code coverage tool by making an array smaller 32 * 33 * Revision 1.9 2002/11/03 21:27:28 mike 34 * Updated function parameters to use the modern style 35 * 36 * Revision 1.8 2000/06/14 20:36:16 mike 37 * increase size of DOS array 38 * 39 * Revision 1.7 2000/05/29 13:15:59 mike 40 * minor tweaks, fixed comment 41 * 42 * Revision 1.6 2000/05/26 16:39:03 mike 43 * minor update to comments and code 44 * 45 * Revision 1.5 2000/05/25 23:12:53 mike 46 * change 'nd' calculation 47 * 48 * Revision 1.4 2000/05/25 22:17:45 mike 49 * implement new algorithm for speed. approx 5 - 10X 50 * faster on my PC when N! is large (> 6000) 51 * 52 * Revision 1.3 1999/06/19 21:25:21 mike 53 * changed local static variables to MAPM stack variables 54 * 55 * Revision 1.2 1999/05/23 18:21:12 mike 56 * minor variable name tweaks 57 * 58 * Revision 1.1 1999/05/15 21:06:11 mike 59 * Initial revision 60 */ 61 62 /* 63 * Brief explanation of the factorial algorithm. 64 * ---------------------------------------------- 65 * 66 * The old algorithm simply multiplied N * (N-1) * (N-2) etc, until 67 * the number counted down to '2'. So one term of the multiplication 68 * kept getting bigger while multiplying by the next number in the 69 * sequence. 70 * 71 * The new algorithm takes advantage of the fast multiplication 72 * algorithm. The "ideal" setup for fast multiplication is when 73 * both numbers have approx the same number of significant digits 74 * and the number of digits is very near (but not over) an exact 75 * power of 2. 76 * 77 * So, we will multiply N * (N-1) * (N-2), etc until the number of 78 * significant digits is approx 256. 79 * 80 * Store this temp product into an array. 81 * 82 * Then we will multiply the next sequence until the number of 83 * significant digits is approx 256. 84 * 85 * Store this temp product into the next element of the array. 86 * 87 * Continue until we've counted down to 2. 88 * 89 * We now have an array of numbers with approx the same number 90 * of digits (except for the last element, depending on where it 91 * ended.) Now multiply each of the array elements together to 92 * get the final product. 93 * 94 * The array multiplies are done as follows (assume we used 11 95 * array elements for this example, indicated by [0] - [10] ) : 96 * 97 * initial iter-1 iter-2 iter-3 iter-4 98 * 99 * [0] 100 * * -> [0] 101 * [1] 102 * * -> [0] 103 * 104 * [2] 105 * * -> [1] 106 * [3] 107 * * -> [0] 108 * 109 * [4] 110 * * -> [2] 111 * [5] 112 * 113 * * -> [1] 114 * 115 * [6] 116 * * -> [3] * -> [0] 117 * [7] 118 * 119 * 120 * [8] 121 * * -> [4] 122 * [9] 123 * * -> [2] -> [1] 124 * 125 * 126 * [10] -> [5] 127 * 128 */ 129 130 #include "m_apm_lc.h" 131 132 /* define size of local array for temp storage */ 133 134 #define NDIM 32 135 136 /****************************************************************************/ 137 void m_apm_factorial(M_APM moutput, M_APM minput) 138 { 139 int ii, nmul, ndigits, nd, jj, kk, mm, ct; 140 M_APM array[NDIM]; 141 M_APM iprod1, iprod2, tmp1, tmp2; 142 143 /* return 1 for any input <= 1 */ 144 145 if (m_apm_compare(minput, MM_One) <= 0) 146 { 147 m_apm_copy(moutput, MM_One); 148 return; 149 } 150 151 ct = 0; 152 mm = NDIM - 2; 153 ndigits = 256; 154 nd = ndigits - 20; 155 tmp1 = m_apm_init(); 156 tmp2 = m_apm_init(); 157 iprod1 = m_apm_init(); 158 iprod2 = m_apm_init(); 159 array[0] = m_apm_init(); 160 161 m_apm_copy(tmp2, minput); 162 163 /* loop until multiply count-down has reached '2' */ 164 165 while (TRUE) 166 { 167 m_apm_copy(iprod1, MM_One); 168 169 /* 170 * loop until the number of significant digits in this 171 * partial result is slightly less than 256 172 */ 173 174 while (TRUE) 175 { 176 m_apm_multiply(iprod2, iprod1, tmp2); 177 178 m_apm_subtract(tmp1, tmp2, MM_One); 179 180 m_apm_multiply(iprod1, iprod2, tmp1); 181 182 /* 183 * I know, I know. There just isn't a *clean* way 184 * to break out of 2 nested loops. 185 */ 186 187 if (m_apm_compare(tmp1, MM_Two) <= 0) 188 goto PHASE2; 189 190 m_apm_subtract(tmp2, tmp1, MM_One); 191 192 if (iprod1->m_apm_datalength > nd) 193 break; 194 } 195 196 if (ct == (NDIM - 1)) 197 { 198 /* 199 * if the array has filled up, start multiplying 200 * some of the partial products now. 201 */ 202 203 m_apm_copy(tmp1, array[mm]); 204 m_apm_multiply(array[mm], iprod1, tmp1); 205 206 if (mm == 0) 207 { 208 mm = NDIM - 2; 209 ndigits = ndigits << 1; 210 nd = ndigits - 20; 211 } 212 else 213 mm--; 214 } 215 else 216 { 217 /* 218 * store this partial product in the array 219 * and allocate the next array element 220 */ 221 222 m_apm_copy(array[ct], iprod1); 223 array[++ct] = m_apm_init(); 224 } 225 } 226 227 PHASE2: 228 229 m_apm_copy(array[ct], iprod1); 230 231 kk = ct; 232 233 while (kk != 0) 234 { 235 ii = 0; 236 jj = 0; 237 nmul = (kk + 1) >> 1; 238 239 while (TRUE) 240 { 241 /* must use tmp var when ii,jj point to same element */ 242 243 if (ii == 0) 244 { 245 m_apm_copy(tmp1, array[ii]); 246 m_apm_multiply(array[jj], tmp1, array[ii+1]); 247 } 248 else 249 m_apm_multiply(array[jj], array[ii], array[ii+1]); 250 251 if (++jj == nmul) 252 break; 253 254 ii += 2; 255 } 256 257 if ((kk & 1) == 0) 258 { 259 jj = kk >> 1; 260 m_apm_copy(array[jj], array[kk]); 261 } 262 263 kk = kk >> 1; 264 } 265 266 m_apm_copy(moutput, array[0]); 267 268 for (ii=0; ii <= ct; ii++) 269 { 270 m_apm_free(array[ii]); 271 } 272 273 m_apm_free(tmp1); 274 m_apm_free(tmp2); 275 m_apm_free(iprod1); 276 m_apm_free(iprod2); 277 } 278 /****************************************************************************/ 279