xref: /haiku/src/system/libroot/posix/glibc/arch/generic/mul_n.c (revision c1437015ddbea841cbd0e154b416199886c83624)
1*c1437015SIngo Weinhold /* mpn_mul_n -- Multiply two natural numbers of length n.
2*c1437015SIngo Weinhold 
3*c1437015SIngo Weinhold Copyright (C) 1991, 1992, 1993, 1994, 1996 Free Software Foundation, Inc.
4*c1437015SIngo Weinhold 
5*c1437015SIngo Weinhold This file is part of the GNU MP Library.
6*c1437015SIngo Weinhold 
7*c1437015SIngo Weinhold The GNU MP Library is free software; you can redistribute it and/or modify
8*c1437015SIngo Weinhold it under the terms of the GNU Lesser General Public License as published by
9*c1437015SIngo Weinhold the Free Software Foundation; either version 2.1 of the License, or (at your
10*c1437015SIngo Weinhold option) any later version.
11*c1437015SIngo Weinhold 
12*c1437015SIngo Weinhold The GNU MP Library is distributed in the hope that it will be useful, but
13*c1437015SIngo Weinhold WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY
14*c1437015SIngo Weinhold or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU Lesser General Public
15*c1437015SIngo Weinhold License for more details.
16*c1437015SIngo Weinhold 
17*c1437015SIngo Weinhold You should have received a copy of the GNU Lesser General Public License
18*c1437015SIngo Weinhold along with the GNU MP Library; see the file COPYING.LIB.  If not, write to
19*c1437015SIngo Weinhold the Free Software Foundation, Inc., 59 Temple Place - Suite 330, Boston,
20*c1437015SIngo Weinhold MA 02111-1307, USA. */
21*c1437015SIngo Weinhold 
22*c1437015SIngo Weinhold #include "gmp.h"
23*c1437015SIngo Weinhold #include "gmp-impl.h"
24*c1437015SIngo Weinhold 
25*c1437015SIngo Weinhold /* Multiply the natural numbers u (pointed to by UP) and v (pointed to by VP),
26*c1437015SIngo Weinhold    both with SIZE limbs, and store the result at PRODP.  2 * SIZE limbs are
27*c1437015SIngo Weinhold    always stored.  Return the most significant limb.
28*c1437015SIngo Weinhold 
29*c1437015SIngo Weinhold    Argument constraints:
30*c1437015SIngo Weinhold    1. PRODP != UP and PRODP != VP, i.e. the destination
31*c1437015SIngo Weinhold       must be distinct from the multiplier and the multiplicand.  */
32*c1437015SIngo Weinhold 
33*c1437015SIngo Weinhold /* If KARATSUBA_THRESHOLD is not already defined, define it to a
34*c1437015SIngo Weinhold    value which is good on most machines.  */
35*c1437015SIngo Weinhold #ifndef KARATSUBA_THRESHOLD
36*c1437015SIngo Weinhold #define KARATSUBA_THRESHOLD 32
37*c1437015SIngo Weinhold #endif
38*c1437015SIngo Weinhold 
39*c1437015SIngo Weinhold /* The code can't handle KARATSUBA_THRESHOLD smaller than 2.  */
40*c1437015SIngo Weinhold #if KARATSUBA_THRESHOLD < 2
41*c1437015SIngo Weinhold #undef KARATSUBA_THRESHOLD
42*c1437015SIngo Weinhold #define KARATSUBA_THRESHOLD 2
43*c1437015SIngo Weinhold #endif
44*c1437015SIngo Weinhold 
45*c1437015SIngo Weinhold /* Handle simple cases with traditional multiplication.
46*c1437015SIngo Weinhold 
47*c1437015SIngo Weinhold    This is the most critical code of multiplication.  All multiplies rely
48*c1437015SIngo Weinhold    on this, both small and huge.  Small ones arrive here immediately.  Huge
49*c1437015SIngo Weinhold    ones arrive here as this is the base case for Karatsuba's recursive
50*c1437015SIngo Weinhold    algorithm below.  */
51*c1437015SIngo Weinhold 
52*c1437015SIngo Weinhold void
53*c1437015SIngo Weinhold #if __STDC__
impn_mul_n_basecase(mp_ptr prodp,mp_srcptr up,mp_srcptr vp,mp_size_t size)54*c1437015SIngo Weinhold impn_mul_n_basecase (mp_ptr prodp, mp_srcptr up, mp_srcptr vp, mp_size_t size)
55*c1437015SIngo Weinhold #else
56*c1437015SIngo Weinhold impn_mul_n_basecase (prodp, up, vp, size)
57*c1437015SIngo Weinhold      mp_ptr prodp;
58*c1437015SIngo Weinhold      mp_srcptr up;
59*c1437015SIngo Weinhold      mp_srcptr vp;
60*c1437015SIngo Weinhold      mp_size_t size;
61*c1437015SIngo Weinhold #endif
62*c1437015SIngo Weinhold {
63*c1437015SIngo Weinhold   mp_size_t i;
64*c1437015SIngo Weinhold   mp_limb_t cy_limb;
65*c1437015SIngo Weinhold   mp_limb_t v_limb;
66*c1437015SIngo Weinhold 
67*c1437015SIngo Weinhold   /* Multiply by the first limb in V separately, as the result can be
68*c1437015SIngo Weinhold      stored (not added) to PROD.  We also avoid a loop for zeroing.  */
69*c1437015SIngo Weinhold   v_limb = vp[0];
70*c1437015SIngo Weinhold   if (v_limb <= 1)
71*c1437015SIngo Weinhold     {
72*c1437015SIngo Weinhold       if (v_limb == 1)
73*c1437015SIngo Weinhold 	MPN_COPY (prodp, up, size);
74*c1437015SIngo Weinhold       else
75*c1437015SIngo Weinhold 	MPN_ZERO (prodp, size);
76*c1437015SIngo Weinhold       cy_limb = 0;
77*c1437015SIngo Weinhold     }
78*c1437015SIngo Weinhold   else
79*c1437015SIngo Weinhold     cy_limb = mpn_mul_1 (prodp, up, size, v_limb);
80*c1437015SIngo Weinhold 
81*c1437015SIngo Weinhold   prodp[size] = cy_limb;
82*c1437015SIngo Weinhold   prodp++;
83*c1437015SIngo Weinhold 
84*c1437015SIngo Weinhold   /* For each iteration in the outer loop, multiply one limb from
85*c1437015SIngo Weinhold      U with one limb from V, and add it to PROD.  */
86*c1437015SIngo Weinhold   for (i = 1; i < size; i++)
87*c1437015SIngo Weinhold     {
88*c1437015SIngo Weinhold       v_limb = vp[i];
89*c1437015SIngo Weinhold       if (v_limb <= 1)
90*c1437015SIngo Weinhold 	{
91*c1437015SIngo Weinhold 	  cy_limb = 0;
92*c1437015SIngo Weinhold 	  if (v_limb == 1)
93*c1437015SIngo Weinhold 	    cy_limb = mpn_add_n (prodp, prodp, up, size);
94*c1437015SIngo Weinhold 	}
95*c1437015SIngo Weinhold       else
96*c1437015SIngo Weinhold 	cy_limb = mpn_addmul_1 (prodp, up, size, v_limb);
97*c1437015SIngo Weinhold 
98*c1437015SIngo Weinhold       prodp[size] = cy_limb;
99*c1437015SIngo Weinhold       prodp++;
100*c1437015SIngo Weinhold     }
101*c1437015SIngo Weinhold }
102*c1437015SIngo Weinhold 
103*c1437015SIngo Weinhold void
104*c1437015SIngo Weinhold #if __STDC__
impn_mul_n(mp_ptr prodp,mp_srcptr up,mp_srcptr vp,mp_size_t size,mp_ptr tspace)105*c1437015SIngo Weinhold impn_mul_n (mp_ptr prodp,
106*c1437015SIngo Weinhold 	     mp_srcptr up, mp_srcptr vp, mp_size_t size, mp_ptr tspace)
107*c1437015SIngo Weinhold #else
108*c1437015SIngo Weinhold impn_mul_n (prodp, up, vp, size, tspace)
109*c1437015SIngo Weinhold      mp_ptr prodp;
110*c1437015SIngo Weinhold      mp_srcptr up;
111*c1437015SIngo Weinhold      mp_srcptr vp;
112*c1437015SIngo Weinhold      mp_size_t size;
113*c1437015SIngo Weinhold      mp_ptr tspace;
114*c1437015SIngo Weinhold #endif
115*c1437015SIngo Weinhold {
116*c1437015SIngo Weinhold   if ((size & 1) != 0)
117*c1437015SIngo Weinhold     {
118*c1437015SIngo Weinhold       /* The size is odd, the code code below doesn't handle that.
119*c1437015SIngo Weinhold 	 Multiply the least significant (size - 1) limbs with a recursive
120*c1437015SIngo Weinhold 	 call, and handle the most significant limb of S1 and S2
121*c1437015SIngo Weinhold 	 separately.  */
122*c1437015SIngo Weinhold       /* A slightly faster way to do this would be to make the Karatsuba
123*c1437015SIngo Weinhold 	 code below behave as if the size were even, and let it check for
124*c1437015SIngo Weinhold 	 odd size in the end.  I.e., in essence move this code to the end.
125*c1437015SIngo Weinhold 	 Doing so would save us a recursive call, and potentially make the
126*c1437015SIngo Weinhold 	 stack grow a lot less.  */
127*c1437015SIngo Weinhold 
128*c1437015SIngo Weinhold       mp_size_t esize = size - 1;	/* even size */
129*c1437015SIngo Weinhold       mp_limb_t cy_limb;
130*c1437015SIngo Weinhold 
131*c1437015SIngo Weinhold       MPN_MUL_N_RECURSE (prodp, up, vp, esize, tspace);
132*c1437015SIngo Weinhold       cy_limb = mpn_addmul_1 (prodp + esize, up, esize, vp[esize]);
133*c1437015SIngo Weinhold       prodp[esize + esize] = cy_limb;
134*c1437015SIngo Weinhold       cy_limb = mpn_addmul_1 (prodp + esize, vp, size, up[esize]);
135*c1437015SIngo Weinhold 
136*c1437015SIngo Weinhold       prodp[esize + size] = cy_limb;
137*c1437015SIngo Weinhold     }
138*c1437015SIngo Weinhold   else
139*c1437015SIngo Weinhold     {
140*c1437015SIngo Weinhold       /* Anatolij Alekseevich Karatsuba's divide-and-conquer algorithm.
141*c1437015SIngo Weinhold 
142*c1437015SIngo Weinhold 	 Split U in two pieces, U1 and U0, such that
143*c1437015SIngo Weinhold 	 U = U0 + U1*(B**n),
144*c1437015SIngo Weinhold 	 and V in V1 and V0, such that
145*c1437015SIngo Weinhold 	 V = V0 + V1*(B**n).
146*c1437015SIngo Weinhold 
147*c1437015SIngo Weinhold 	 UV is then computed recursively using the identity
148*c1437015SIngo Weinhold 
149*c1437015SIngo Weinhold 		2n   n          n                     n
150*c1437015SIngo Weinhold 	 UV = (B  + B )U V  +  B (U -U )(V -V )  +  (B + 1)U V
151*c1437015SIngo Weinhold 			1 1        1  0   0  1              0 0
152*c1437015SIngo Weinhold 
153*c1437015SIngo Weinhold 	 Where B = 2**BITS_PER_MP_LIMB.  */
154*c1437015SIngo Weinhold 
155*c1437015SIngo Weinhold       mp_size_t hsize = size >> 1;
156*c1437015SIngo Weinhold       mp_limb_t cy;
157*c1437015SIngo Weinhold       int negflg;
158*c1437015SIngo Weinhold 
159*c1437015SIngo Weinhold       /*** Product H.	 ________________  ________________
160*c1437015SIngo Weinhold 			|_____U1 x V1____||____U0 x V0_____|  */
161*c1437015SIngo Weinhold       /* Put result in upper part of PROD and pass low part of TSPACE
162*c1437015SIngo Weinhold 	 as new TSPACE.  */
163*c1437015SIngo Weinhold       MPN_MUL_N_RECURSE (prodp + size, up + hsize, vp + hsize, hsize, tspace);
164*c1437015SIngo Weinhold 
165*c1437015SIngo Weinhold       /*** Product M.	 ________________
166*c1437015SIngo Weinhold 			|_(U1-U0)(V0-V1)_|  */
167*c1437015SIngo Weinhold       if (mpn_cmp (up + hsize, up, hsize) >= 0)
168*c1437015SIngo Weinhold 	{
169*c1437015SIngo Weinhold 	  mpn_sub_n (prodp, up + hsize, up, hsize);
170*c1437015SIngo Weinhold 	  negflg = 0;
171*c1437015SIngo Weinhold 	}
172*c1437015SIngo Weinhold       else
173*c1437015SIngo Weinhold 	{
174*c1437015SIngo Weinhold 	  mpn_sub_n (prodp, up, up + hsize, hsize);
175*c1437015SIngo Weinhold 	  negflg = 1;
176*c1437015SIngo Weinhold 	}
177*c1437015SIngo Weinhold       if (mpn_cmp (vp + hsize, vp, hsize) >= 0)
178*c1437015SIngo Weinhold 	{
179*c1437015SIngo Weinhold 	  mpn_sub_n (prodp + hsize, vp + hsize, vp, hsize);
180*c1437015SIngo Weinhold 	  negflg ^= 1;
181*c1437015SIngo Weinhold 	}
182*c1437015SIngo Weinhold       else
183*c1437015SIngo Weinhold 	{
184*c1437015SIngo Weinhold 	  mpn_sub_n (prodp + hsize, vp, vp + hsize, hsize);
185*c1437015SIngo Weinhold 	  /* No change of NEGFLG.  */
186*c1437015SIngo Weinhold 	}
187*c1437015SIngo Weinhold       /* Read temporary operands from low part of PROD.
188*c1437015SIngo Weinhold 	 Put result in low part of TSPACE using upper part of TSPACE
189*c1437015SIngo Weinhold 	 as new TSPACE.  */
190*c1437015SIngo Weinhold       MPN_MUL_N_RECURSE (tspace, prodp, prodp + hsize, hsize, tspace + size);
191*c1437015SIngo Weinhold 
192*c1437015SIngo Weinhold       /*** Add/copy product H.  */
193*c1437015SIngo Weinhold       MPN_COPY (prodp + hsize, prodp + size, hsize);
194*c1437015SIngo Weinhold       cy = mpn_add_n (prodp + size, prodp + size, prodp + size + hsize, hsize);
195*c1437015SIngo Weinhold 
196*c1437015SIngo Weinhold       /*** Add product M (if NEGFLG M is a negative number).  */
197*c1437015SIngo Weinhold       if (negflg)
198*c1437015SIngo Weinhold 	cy -= mpn_sub_n (prodp + hsize, prodp + hsize, tspace, size);
199*c1437015SIngo Weinhold       else
200*c1437015SIngo Weinhold 	cy += mpn_add_n (prodp + hsize, prodp + hsize, tspace, size);
201*c1437015SIngo Weinhold 
202*c1437015SIngo Weinhold       /*** Product L.	 ________________  ________________
203*c1437015SIngo Weinhold 			|________________||____U0 x V0_____|  */
204*c1437015SIngo Weinhold       /* Read temporary operands from low part of PROD.
205*c1437015SIngo Weinhold 	 Put result in low part of TSPACE using upper part of TSPACE
206*c1437015SIngo Weinhold 	 as new TSPACE.  */
207*c1437015SIngo Weinhold       MPN_MUL_N_RECURSE (tspace, up, vp, hsize, tspace + size);
208*c1437015SIngo Weinhold 
209*c1437015SIngo Weinhold       /*** Add/copy Product L (twice).  */
210*c1437015SIngo Weinhold 
211*c1437015SIngo Weinhold       cy += mpn_add_n (prodp + hsize, prodp + hsize, tspace, size);
212*c1437015SIngo Weinhold       if (cy)
213*c1437015SIngo Weinhold 	mpn_add_1 (prodp + hsize + size, prodp + hsize + size, hsize, cy);
214*c1437015SIngo Weinhold 
215*c1437015SIngo Weinhold       MPN_COPY (prodp, tspace, hsize);
216*c1437015SIngo Weinhold       cy = mpn_add_n (prodp + hsize, prodp + hsize, tspace + hsize, hsize);
217*c1437015SIngo Weinhold       if (cy)
218*c1437015SIngo Weinhold 	mpn_add_1 (prodp + size, prodp + size, size, 1);
219*c1437015SIngo Weinhold     }
220*c1437015SIngo Weinhold }
221*c1437015SIngo Weinhold 
222*c1437015SIngo Weinhold void
223*c1437015SIngo Weinhold #if __STDC__
impn_sqr_n_basecase(mp_ptr prodp,mp_srcptr up,mp_size_t size)224*c1437015SIngo Weinhold impn_sqr_n_basecase (mp_ptr prodp, mp_srcptr up, mp_size_t size)
225*c1437015SIngo Weinhold #else
226*c1437015SIngo Weinhold impn_sqr_n_basecase (prodp, up, size)
227*c1437015SIngo Weinhold      mp_ptr prodp;
228*c1437015SIngo Weinhold      mp_srcptr up;
229*c1437015SIngo Weinhold      mp_size_t size;
230*c1437015SIngo Weinhold #endif
231*c1437015SIngo Weinhold {
232*c1437015SIngo Weinhold   mp_size_t i;
233*c1437015SIngo Weinhold   mp_limb_t cy_limb;
234*c1437015SIngo Weinhold   mp_limb_t v_limb;
235*c1437015SIngo Weinhold 
236*c1437015SIngo Weinhold   /* Multiply by the first limb in V separately, as the result can be
237*c1437015SIngo Weinhold      stored (not added) to PROD.  We also avoid a loop for zeroing.  */
238*c1437015SIngo Weinhold   v_limb = up[0];
239*c1437015SIngo Weinhold   if (v_limb <= 1)
240*c1437015SIngo Weinhold     {
241*c1437015SIngo Weinhold       if (v_limb == 1)
242*c1437015SIngo Weinhold 	MPN_COPY (prodp, up, size);
243*c1437015SIngo Weinhold       else
244*c1437015SIngo Weinhold 	MPN_ZERO (prodp, size);
245*c1437015SIngo Weinhold       cy_limb = 0;
246*c1437015SIngo Weinhold     }
247*c1437015SIngo Weinhold   else
248*c1437015SIngo Weinhold     cy_limb = mpn_mul_1 (prodp, up, size, v_limb);
249*c1437015SIngo Weinhold 
250*c1437015SIngo Weinhold   prodp[size] = cy_limb;
251*c1437015SIngo Weinhold   prodp++;
252*c1437015SIngo Weinhold 
253*c1437015SIngo Weinhold   /* For each iteration in the outer loop, multiply one limb from
254*c1437015SIngo Weinhold      U with one limb from V, and add it to PROD.  */
255*c1437015SIngo Weinhold   for (i = 1; i < size; i++)
256*c1437015SIngo Weinhold     {
257*c1437015SIngo Weinhold       v_limb = up[i];
258*c1437015SIngo Weinhold       if (v_limb <= 1)
259*c1437015SIngo Weinhold 	{
260*c1437015SIngo Weinhold 	  cy_limb = 0;
261*c1437015SIngo Weinhold 	  if (v_limb == 1)
262*c1437015SIngo Weinhold 	    cy_limb = mpn_add_n (prodp, prodp, up, size);
263*c1437015SIngo Weinhold 	}
264*c1437015SIngo Weinhold       else
265*c1437015SIngo Weinhold 	cy_limb = mpn_addmul_1 (prodp, up, size, v_limb);
266*c1437015SIngo Weinhold 
267*c1437015SIngo Weinhold       prodp[size] = cy_limb;
268*c1437015SIngo Weinhold       prodp++;
269*c1437015SIngo Weinhold     }
270*c1437015SIngo Weinhold }
271*c1437015SIngo Weinhold 
272*c1437015SIngo Weinhold void
273*c1437015SIngo Weinhold #if __STDC__
impn_sqr_n(mp_ptr prodp,mp_srcptr up,mp_size_t size,mp_ptr tspace)274*c1437015SIngo Weinhold impn_sqr_n (mp_ptr prodp,
275*c1437015SIngo Weinhold 	     mp_srcptr up, mp_size_t size, mp_ptr tspace)
276*c1437015SIngo Weinhold #else
277*c1437015SIngo Weinhold impn_sqr_n (prodp, up, size, tspace)
278*c1437015SIngo Weinhold      mp_ptr prodp;
279*c1437015SIngo Weinhold      mp_srcptr up;
280*c1437015SIngo Weinhold      mp_size_t size;
281*c1437015SIngo Weinhold      mp_ptr tspace;
282*c1437015SIngo Weinhold #endif
283*c1437015SIngo Weinhold {
284*c1437015SIngo Weinhold   if ((size & 1) != 0)
285*c1437015SIngo Weinhold     {
286*c1437015SIngo Weinhold       /* The size is odd, the code code below doesn't handle that.
287*c1437015SIngo Weinhold 	 Multiply the least significant (size - 1) limbs with a recursive
288*c1437015SIngo Weinhold 	 call, and handle the most significant limb of S1 and S2
289*c1437015SIngo Weinhold 	 separately.  */
290*c1437015SIngo Weinhold       /* A slightly faster way to do this would be to make the Karatsuba
291*c1437015SIngo Weinhold 	 code below behave as if the size were even, and let it check for
292*c1437015SIngo Weinhold 	 odd size in the end.  I.e., in essence move this code to the end.
293*c1437015SIngo Weinhold 	 Doing so would save us a recursive call, and potentially make the
294*c1437015SIngo Weinhold 	 stack grow a lot less.  */
295*c1437015SIngo Weinhold 
296*c1437015SIngo Weinhold       mp_size_t esize = size - 1;	/* even size */
297*c1437015SIngo Weinhold       mp_limb_t cy_limb;
298*c1437015SIngo Weinhold 
299*c1437015SIngo Weinhold       MPN_SQR_N_RECURSE (prodp, up, esize, tspace);
300*c1437015SIngo Weinhold       cy_limb = mpn_addmul_1 (prodp + esize, up, esize, up[esize]);
301*c1437015SIngo Weinhold       prodp[esize + esize] = cy_limb;
302*c1437015SIngo Weinhold       cy_limb = mpn_addmul_1 (prodp + esize, up, size, up[esize]);
303*c1437015SIngo Weinhold 
304*c1437015SIngo Weinhold       prodp[esize + size] = cy_limb;
305*c1437015SIngo Weinhold     }
306*c1437015SIngo Weinhold   else
307*c1437015SIngo Weinhold     {
308*c1437015SIngo Weinhold       mp_size_t hsize = size >> 1;
309*c1437015SIngo Weinhold       mp_limb_t cy;
310*c1437015SIngo Weinhold 
311*c1437015SIngo Weinhold       /*** Product H.	 ________________  ________________
312*c1437015SIngo Weinhold 			|_____U1 x U1____||____U0 x U0_____|  */
313*c1437015SIngo Weinhold       /* Put result in upper part of PROD and pass low part of TSPACE
314*c1437015SIngo Weinhold 	 as new TSPACE.  */
315*c1437015SIngo Weinhold       MPN_SQR_N_RECURSE (prodp + size, up + hsize, hsize, tspace);
316*c1437015SIngo Weinhold 
317*c1437015SIngo Weinhold       /*** Product M.	 ________________
318*c1437015SIngo Weinhold 			|_(U1-U0)(U0-U1)_|  */
319*c1437015SIngo Weinhold       if (mpn_cmp (up + hsize, up, hsize) >= 0)
320*c1437015SIngo Weinhold 	{
321*c1437015SIngo Weinhold 	  mpn_sub_n (prodp, up + hsize, up, hsize);
322*c1437015SIngo Weinhold 	}
323*c1437015SIngo Weinhold       else
324*c1437015SIngo Weinhold 	{
325*c1437015SIngo Weinhold 	  mpn_sub_n (prodp, up, up + hsize, hsize);
326*c1437015SIngo Weinhold 	}
327*c1437015SIngo Weinhold 
328*c1437015SIngo Weinhold       /* Read temporary operands from low part of PROD.
329*c1437015SIngo Weinhold 	 Put result in low part of TSPACE using upper part of TSPACE
330*c1437015SIngo Weinhold 	 as new TSPACE.  */
331*c1437015SIngo Weinhold       MPN_SQR_N_RECURSE (tspace, prodp, hsize, tspace + size);
332*c1437015SIngo Weinhold 
333*c1437015SIngo Weinhold       /*** Add/copy product H.  */
334*c1437015SIngo Weinhold       MPN_COPY (prodp + hsize, prodp + size, hsize);
335*c1437015SIngo Weinhold       cy = mpn_add_n (prodp + size, prodp + size, prodp + size + hsize, hsize);
336*c1437015SIngo Weinhold 
337*c1437015SIngo Weinhold       /*** Add product M (if NEGFLG M is a negative number).  */
338*c1437015SIngo Weinhold       cy -= mpn_sub_n (prodp + hsize, prodp + hsize, tspace, size);
339*c1437015SIngo Weinhold 
340*c1437015SIngo Weinhold       /*** Product L.	 ________________  ________________
341*c1437015SIngo Weinhold 			|________________||____U0 x U0_____|  */
342*c1437015SIngo Weinhold       /* Read temporary operands from low part of PROD.
343*c1437015SIngo Weinhold 	 Put result in low part of TSPACE using upper part of TSPACE
344*c1437015SIngo Weinhold 	 as new TSPACE.  */
345*c1437015SIngo Weinhold       MPN_SQR_N_RECURSE (tspace, up, hsize, tspace + size);
346*c1437015SIngo Weinhold 
347*c1437015SIngo Weinhold       /*** Add/copy Product L (twice).  */
348*c1437015SIngo Weinhold 
349*c1437015SIngo Weinhold       cy += mpn_add_n (prodp + hsize, prodp + hsize, tspace, size);
350*c1437015SIngo Weinhold       if (cy)
351*c1437015SIngo Weinhold 	mpn_add_1 (prodp + hsize + size, prodp + hsize + size, hsize, cy);
352*c1437015SIngo Weinhold 
353*c1437015SIngo Weinhold       MPN_COPY (prodp, tspace, hsize);
354*c1437015SIngo Weinhold       cy = mpn_add_n (prodp + hsize, prodp + hsize, tspace + hsize, hsize);
355*c1437015SIngo Weinhold       if (cy)
356*c1437015SIngo Weinhold 	mpn_add_1 (prodp + size, prodp + size, size, 1);
357*c1437015SIngo Weinhold     }
358*c1437015SIngo Weinhold }
359*c1437015SIngo Weinhold 
360*c1437015SIngo Weinhold /* This should be made into an inline function in gmp.h.  */
361*c1437015SIngo Weinhold void
362*c1437015SIngo Weinhold #if __STDC__
mpn_mul_n(mp_ptr prodp,mp_srcptr up,mp_srcptr vp,mp_size_t size)363*c1437015SIngo Weinhold mpn_mul_n (mp_ptr prodp, mp_srcptr up, mp_srcptr vp, mp_size_t size)
364*c1437015SIngo Weinhold #else
365*c1437015SIngo Weinhold mpn_mul_n (prodp, up, vp, size)
366*c1437015SIngo Weinhold      mp_ptr prodp;
367*c1437015SIngo Weinhold      mp_srcptr up;
368*c1437015SIngo Weinhold      mp_srcptr vp;
369*c1437015SIngo Weinhold      mp_size_t size;
370*c1437015SIngo Weinhold #endif
371*c1437015SIngo Weinhold {
372*c1437015SIngo Weinhold   TMP_DECL (marker);
373*c1437015SIngo Weinhold   TMP_MARK (marker);
374*c1437015SIngo Weinhold   if (up == vp)
375*c1437015SIngo Weinhold     {
376*c1437015SIngo Weinhold       if (size < KARATSUBA_THRESHOLD)
377*c1437015SIngo Weinhold 	{
378*c1437015SIngo Weinhold 	  impn_sqr_n_basecase (prodp, up, size);
379*c1437015SIngo Weinhold 	}
380*c1437015SIngo Weinhold       else
381*c1437015SIngo Weinhold 	{
382*c1437015SIngo Weinhold 	  mp_ptr tspace;
383*c1437015SIngo Weinhold 	  tspace = (mp_ptr) TMP_ALLOC (2 * size * BYTES_PER_MP_LIMB);
384*c1437015SIngo Weinhold 	  impn_sqr_n (prodp, up, size, tspace);
385*c1437015SIngo Weinhold 	}
386*c1437015SIngo Weinhold     }
387*c1437015SIngo Weinhold   else
388*c1437015SIngo Weinhold     {
389*c1437015SIngo Weinhold       if (size < KARATSUBA_THRESHOLD)
390*c1437015SIngo Weinhold 	{
391*c1437015SIngo Weinhold 	  impn_mul_n_basecase (prodp, up, vp, size);
392*c1437015SIngo Weinhold 	}
393*c1437015SIngo Weinhold       else
394*c1437015SIngo Weinhold 	{
395*c1437015SIngo Weinhold 	  mp_ptr tspace;
396*c1437015SIngo Weinhold 	  tspace = (mp_ptr) TMP_ALLOC (2 * size * BYTES_PER_MP_LIMB);
397*c1437015SIngo Weinhold 	  impn_mul_n (prodp, up, vp, size, tspace);
398*c1437015SIngo Weinhold 	}
399*c1437015SIngo Weinhold     }
400*c1437015SIngo Weinhold   TMP_FREE (marker);
401*c1437015SIngo Weinhold }
402