1 /* origin: FreeBSD /usr/src/lib/msun/src/s_fmaf.c */ 2 /*- 3 * Copyright (c) 2005-2011 David Schultz <das@FreeBSD.ORG> 4 * All rights reserved. 5 * 6 * Redistribution and use in source and binary forms, with or without 7 * modification, are permitted provided that the following conditions 8 * are met: 9 * 1. Redistributions of source code must retain the above copyright 10 * notice, this list of conditions and the following disclaimer. 11 * 2. Redistributions in binary form must reproduce the above copyright 12 * notice, this list of conditions and the following disclaimer in the 13 * documentation and/or other materials provided with the distribution. 14 * 15 * THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS'' AND 16 * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE 17 * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE 18 * ARE DISCLAIMED. IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE 19 * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL 20 * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS 21 * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) 22 * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT 23 * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY 24 * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF 25 * SUCH DAMAGE. 26 */ 27 28 #include <fenv.h> 29 #include <math.h> 30 #include <stdint.h> 31 32 /* 33 * Fused multiply-add: Compute x * y + z with a single rounding error. 34 * 35 * A double has more than twice as much precision than a float, so 36 * direct double-precision arithmetic suffices, except where double 37 * rounding occurs. 38 */ 39 float fmaf(float x, float y, float z) 40 { 41 #pragma STDC FENV_ACCESS ON 42 double xy, result; 43 union {double f; uint64_t i;} u; 44 int e; 45 46 xy = (double)x * y; 47 result = xy + z; 48 u.f = result; 49 e = u.i>>52 & 0x7ff; 50 /* Common case: The double precision result is fine. */ 51 if ((u.i & 0x1fffffff) != 0x10000000 || /* not a halfway case */ 52 e == 0x7ff || /* NaN */ 53 (result - xy == z && result - z == xy) || /* exact */ 54 fegetround() != FE_TONEAREST) /* not round-to-nearest */ 55 { 56 /* 57 underflow may not be raised correctly, example: 58 fmaf(0x1p-120f, 0x1p-120f, 0x1p-149f) 59 */ 60 #if defined(FE_INEXACT) && defined(FE_UNDERFLOW) 61 if (e < 0x3ff-126 && e >= 0x3ff-149 && fetestexcept(FE_INEXACT)) { 62 feclearexcept(FE_INEXACT); 63 /* TODO: gcc and clang bug workaround */ 64 volatile float vz = z; 65 result = xy + vz; 66 if (fetestexcept(FE_INEXACT)) 67 feraiseexcept(FE_UNDERFLOW); 68 else 69 feraiseexcept(FE_INEXACT); 70 } 71 #endif 72 z = result; 73 return z; 74 } 75 76 /* 77 * If result is inexact, and exactly halfway between two float values, 78 * we need to adjust the low-order bit in the direction of the error. 79 */ 80 #ifdef FE_TOWARDZERO 81 fesetround(FE_TOWARDZERO); 82 #endif 83 volatile double vxy = xy; /* XXX work around gcc CSE bug */ 84 double adjusted_result = vxy + z; 85 fesetround(FE_TONEAREST); 86 if (result == adjusted_result) { 87 u.f = adjusted_result; 88 u.i++; 89 adjusted_result = u.f; 90 } 91 z = adjusted_result; 92 return z; 93 } 94