1 #include <stdint.h> 2 #include <float.h> 3 #include <math.h> 4 #include "atomic.h" 5 6 #define ASUINT64(x) ((union {double f; uint64_t i;}){x}).i 7 #define ZEROINFNAN (0x7ff-0x3ff-52-1) 8 9 struct num { uint64_t m; int e; int sign; }; 10 11 static struct num normalize(double x) 12 { 13 uint64_t ix = ASUINT64(x); 14 int e = ix>>52; 15 int sign = e & 0x800; 16 e &= 0x7ff; 17 if (!e) { 18 ix = ASUINT64(x*0x1p63); 19 e = ix>>52 & 0x7ff; 20 e = e ? e-63 : 0x800; 21 } 22 ix &= (1ull<<52)-1; 23 ix |= 1ull<<52; 24 ix <<= 1; 25 e -= 0x3ff + 52 + 1; 26 return (struct num){ix,e,sign}; 27 } 28 29 static void mul(uint64_t *hi, uint64_t *lo, uint64_t x, uint64_t y) 30 { 31 uint64_t t1,t2,t3; 32 uint64_t xlo = (uint32_t)x, xhi = x>>32; 33 uint64_t ylo = (uint32_t)y, yhi = y>>32; 34 35 t1 = xlo*ylo; 36 t2 = xlo*yhi + xhi*ylo; 37 t3 = xhi*yhi; 38 *lo = t1 + (t2<<32); 39 *hi = t3 + (t2>>32) + (t1 > *lo); 40 } 41 42 double fma(double x, double y, double z) 43 { 44 #pragma STDC FENV_ACCESS ON 45 46 /* normalize so top 10bits and last bit are 0 */ 47 struct num nx, ny, nz; 48 nx = normalize(x); 49 ny = normalize(y); 50 nz = normalize(z); 51 52 if (nx.e >= ZEROINFNAN || ny.e >= ZEROINFNAN) 53 return x*y + z; 54 if (nz.e >= ZEROINFNAN) { 55 if (nz.e > ZEROINFNAN) /* z==0 */ 56 return x*y + z; 57 return z; 58 } 59 60 /* mul: r = x*y */ 61 uint64_t rhi, rlo, zhi, zlo; 62 mul(&rhi, &rlo, nx.m, ny.m); 63 /* either top 20 or 21 bits of rhi and last 2 bits of rlo are 0 */ 64 65 /* align exponents */ 66 int e = nx.e + ny.e; 67 int d = nz.e - e; 68 /* shift bits z<<=kz, r>>=kr, so kz+kr == d, set e = e+kr (== ez-kz) */ 69 if (d > 0) { 70 if (d < 64) { 71 zlo = nz.m<<d; 72 zhi = nz.m>>64-d; 73 } else { 74 zlo = 0; 75 zhi = nz.m; 76 e = nz.e - 64; 77 d -= 64; 78 if (d == 0) { 79 } else if (d < 64) { 80 rlo = rhi<<64-d | rlo>>d | !!(rlo<<64-d); 81 rhi = rhi>>d; 82 } else { 83 rlo = 1; 84 rhi = 0; 85 } 86 } 87 } else { 88 zhi = 0; 89 d = -d; 90 if (d == 0) { 91 zlo = nz.m; 92 } else if (d < 64) { 93 zlo = nz.m>>d | !!(nz.m<<64-d); 94 } else { 95 zlo = 1; 96 } 97 } 98 99 /* add */ 100 int sign = nx.sign^ny.sign; 101 int samesign = !(sign^nz.sign); 102 int nonzero = 1; 103 if (samesign) { 104 /* r += z */ 105 rlo += zlo; 106 rhi += zhi + (rlo < zlo); 107 } else { 108 /* r -= z */ 109 uint64_t t = rlo; 110 rlo -= zlo; 111 rhi = rhi - zhi - (t < rlo); 112 if (rhi>>63) { 113 rlo = -rlo; 114 rhi = -rhi-!!rlo; 115 sign = !sign; 116 } 117 nonzero = !!rhi; 118 } 119 120 /* set rhi to top 63bit of the result (last bit is sticky) */ 121 if (nonzero) { 122 e += 64; 123 d = a_clz_64(rhi)-1; 124 /* note: d > 0 */ 125 rhi = rhi<<d | rlo>>64-d | !!(rlo<<d); 126 } else if (rlo) { 127 d = a_clz_64(rlo)-1; 128 if (d < 0) 129 rhi = rlo>>1 | (rlo&1); 130 else 131 rhi = rlo<<d; 132 } else { 133 /* exact +-0 */ 134 return x*y + z; 135 } 136 e -= d; 137 138 /* convert to double */ 139 int64_t i = rhi; /* i is in [1<<62,(1<<63)-1] */ 140 if (sign) 141 i = -i; 142 double r = i; /* |r| is in [0x1p62,0x1p63] */ 143 144 if (e < -1022-62) { 145 /* result is subnormal before rounding */ 146 if (e == -1022-63) { 147 double c = 0x1p63; 148 if (sign) 149 c = -c; 150 if (r == c) { 151 /* min normal after rounding, underflow depends 152 on arch behaviour which can be imitated by 153 a double to float conversion */ 154 float fltmin = 0x0.ffffff8p-63*FLT_MIN * r; 155 return DBL_MIN/FLT_MIN * fltmin; 156 } 157 /* one bit is lost when scaled, add another top bit to 158 only round once at conversion if it is inexact */ 159 if (rhi << 53) { 160 i = rhi>>1 | (rhi&1) | 1ull<<62; 161 if (sign) 162 i = -i; 163 r = i; 164 r = 2*r - c; /* remove top bit */ 165 166 /* raise underflow portably, such that it 167 cannot be optimized away */ 168 { 169 double_t tiny = DBL_MIN/FLT_MIN * r; 170 r += (double)(tiny*tiny) * (r-r); 171 } 172 } 173 } else { 174 /* only round once when scaled */ 175 d = 10; 176 i = ( rhi>>d | !!(rhi<<64-d) ) << d; 177 if (sign) 178 i = -i; 179 r = i; 180 } 181 } 182 return scalbn(r, e); 183 } 184