1*f504f610SAugustin Cavalier #include <math.h> 2*f504f610SAugustin Cavalier #include <stdint.h> 3*f504f610SAugustin Cavalier fmod(double x,double y)4*f504f610SAugustin Cavalierdouble fmod(double x, double y) 5*f504f610SAugustin Cavalier { 6*f504f610SAugustin Cavalier union {double f; uint64_t i;} ux = {x}, uy = {y}; 7*f504f610SAugustin Cavalier int ex = ux.i>>52 & 0x7ff; 8*f504f610SAugustin Cavalier int ey = uy.i>>52 & 0x7ff; 9*f504f610SAugustin Cavalier int sx = ux.i>>63; 10*f504f610SAugustin Cavalier uint64_t i; 11*f504f610SAugustin Cavalier 12*f504f610SAugustin Cavalier /* in the followings uxi should be ux.i, but then gcc wrongly adds */ 13*f504f610SAugustin Cavalier /* float load/store to inner loops ruining performance and code size */ 14*f504f610SAugustin Cavalier uint64_t uxi = ux.i; 15*f504f610SAugustin Cavalier 16*f504f610SAugustin Cavalier if (uy.i<<1 == 0 || isnan(y) || ex == 0x7ff) 17*f504f610SAugustin Cavalier return (x*y)/(x*y); 18*f504f610SAugustin Cavalier if (uxi<<1 <= uy.i<<1) { 19*f504f610SAugustin Cavalier if (uxi<<1 == uy.i<<1) 20*f504f610SAugustin Cavalier return 0*x; 21*f504f610SAugustin Cavalier return x; 22*f504f610SAugustin Cavalier } 23*f504f610SAugustin Cavalier 24*f504f610SAugustin Cavalier /* normalize x and y */ 25*f504f610SAugustin Cavalier if (!ex) { 26*f504f610SAugustin Cavalier for (i = uxi<<12; i>>63 == 0; ex--, i <<= 1); 27*f504f610SAugustin Cavalier uxi <<= -ex + 1; 28*f504f610SAugustin Cavalier } else { 29*f504f610SAugustin Cavalier uxi &= -1ULL >> 12; 30*f504f610SAugustin Cavalier uxi |= 1ULL << 52; 31*f504f610SAugustin Cavalier } 32*f504f610SAugustin Cavalier if (!ey) { 33*f504f610SAugustin Cavalier for (i = uy.i<<12; i>>63 == 0; ey--, i <<= 1); 34*f504f610SAugustin Cavalier uy.i <<= -ey + 1; 35*f504f610SAugustin Cavalier } else { 36*f504f610SAugustin Cavalier uy.i &= -1ULL >> 12; 37*f504f610SAugustin Cavalier uy.i |= 1ULL << 52; 38*f504f610SAugustin Cavalier } 39*f504f610SAugustin Cavalier 40*f504f610SAugustin Cavalier /* x mod y */ 41*f504f610SAugustin Cavalier for (; ex > ey; ex--) { 42*f504f610SAugustin Cavalier i = uxi - uy.i; 43*f504f610SAugustin Cavalier if (i >> 63 == 0) { 44*f504f610SAugustin Cavalier if (i == 0) 45*f504f610SAugustin Cavalier return 0*x; 46*f504f610SAugustin Cavalier uxi = i; 47*f504f610SAugustin Cavalier } 48*f504f610SAugustin Cavalier uxi <<= 1; 49*f504f610SAugustin Cavalier } 50*f504f610SAugustin Cavalier i = uxi - uy.i; 51*f504f610SAugustin Cavalier if (i >> 63 == 0) { 52*f504f610SAugustin Cavalier if (i == 0) 53*f504f610SAugustin Cavalier return 0*x; 54*f504f610SAugustin Cavalier uxi = i; 55*f504f610SAugustin Cavalier } 56*f504f610SAugustin Cavalier for (; uxi>>52 == 0; uxi <<= 1, ex--); 57*f504f610SAugustin Cavalier 58*f504f610SAugustin Cavalier /* scale result */ 59*f504f610SAugustin Cavalier if (ex > 0) { 60*f504f610SAugustin Cavalier uxi -= 1ULL << 52; 61*f504f610SAugustin Cavalier uxi |= (uint64_t)ex << 52; 62*f504f610SAugustin Cavalier } else { 63*f504f610SAugustin Cavalier uxi >>= -ex + 1; 64*f504f610SAugustin Cavalier } 65*f504f610SAugustin Cavalier uxi |= (uint64_t)sx << 63; 66*f504f610SAugustin Cavalier ux.i = uxi; 67*f504f610SAugustin Cavalier return ux.f; 68*f504f610SAugustin Cavalier } 69