From 865fedeac63f00c180c62b279f01eade7dee0761 Mon Sep 17 00:00:00 2001 From: Erik Date: Sat, 14 Jul 2018 15:22:41 -0400 Subject: [PATCH] implement fmaf --- library/compiler-builtins/libm/src/lib.rs | 2 - .../compiler-builtins/libm/src/math/fmaf.rs | 94 +++++++++++++++++++ .../compiler-builtins/libm/src/math/mod.rs | 2 + .../libm/test-generator/src/main.rs | 2 +- 4 files changed, 97 insertions(+), 3 deletions(-) create mode 100644 library/compiler-builtins/libm/src/math/fmaf.rs diff --git a/library/compiler-builtins/libm/src/lib.rs b/library/compiler-builtins/libm/src/lib.rs index 45213a25f7ab..f0fdd464c644 100644 --- a/library/compiler-builtins/libm/src/lib.rs +++ b/library/compiler-builtins/libm/src/lib.rs @@ -52,7 +52,6 @@ pub trait F32Ext: private::Sealed { #[cfg(todo)] fn signum(self) -> Self; - #[cfg(todo)] fn mul_add(self, a: Self, b: Self) -> Self; #[cfg(todo)] @@ -161,7 +160,6 @@ impl F32Ext for f32 { fabsf(self) } - #[cfg(todo)] #[inline] fn mul_add(self, a: Self, b: Self) -> Self { fmaf(self, a, b) diff --git a/library/compiler-builtins/libm/src/math/fmaf.rs b/library/compiler-builtins/libm/src/math/fmaf.rs new file mode 100644 index 000000000000..70d2c54a27e3 --- /dev/null +++ b/library/compiler-builtins/libm/src/math/fmaf.rs @@ -0,0 +1,94 @@ +/* origin: FreeBSD /usr/src/lib/msun/src/s_fmaf.c */ +/*- + * Copyright (c) 2005-2011 David Schultz + * All rights reserved. + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions + * are met: + * 1. Redistributions of source code must retain the above copyright + * notice, this list of conditions and the following disclaimer. + * 2. Redistributions in binary form must reproduce the above copyright + * notice, this list of conditions and the following disclaimer in the + * documentation and/or other materials provided with the distribution. + * + * THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS'' AND + * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE + * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE + * ARE DISCLAIMED. IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE + * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL + * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS + * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) + * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT + * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY + * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF + * SUCH DAMAGE. + */ + +use core::f32; +use core::ptr::read_volatile; + +/* + * Fused multiply-add: Compute x * y + z with a single rounding error. + * + * A double has more than twice as much precision than a float, so + * direct double-precision arithmetic suffices, except where double + * rounding occurs. + */ +#[inline] +pub fn fmaf(x: f32, y: f32, mut z: f32) -> f32 { + let xy: f64; + let mut result: f64; + let mut ui: u64; + let e: i32; + + xy = x as f64 * y as f64; + result = xy + z as f64; + ui = result.to_bits(); + e = (ui >> 52) as i32 & 0x7ff; + /* Common case: The double precision result is fine. */ + if ( + /* not a halfway case */ + ui & 0x1fffffff) != 0x10000000 || + /* NaN */ + e == 0x7ff || + /* exact */ + (result - xy == z as f64 && result - z as f64 == xy) || + /* not round-to-nearest */ + fegetround() != FE_TONEAREST + { + /* + underflow may not be raised correctly, example: + fmaf(0x1p-120f, 0x1p-120f, 0x1p-149f) + */ + if e < 0x3ff - 126 && e >= 0x3ff - 149 && fetestexcept(FE_INEXACT) { + feclearexcept(FE_INEXACT); + /* TODO: gcc and clang bug workaround */ + let vz: f32 = unsafe { read_volatile(&z) }; + result = xy + vz as f64; + if fetestexcept(FE_INEXACT) { + feraiseexcept(FE_UNDERFLOW); + } else { + feraiseexcept(FE_INEXACT); + } + } + z = result as f32; + return z; + } + + /* + * If result is inexact, and exactly halfway between two float values, + * we need to adjust the low-order bit in the direction of the error. + */ + fesetround(FE_TOWARDZERO); + let vxy: f64 = unsafe { read_volatile(&xy) }; /* XXX work around gcc CSE bug */ + let mut adjusted_result: f64 = vxy + z as f64; + fesetround(FE_TONEAREST); + if result == adjusted_result { + ui = adjusted_result.to_bits(); + ui += 1; + adjusted_result = f64::from_bits(ui); + } + z = adjusted_result as f32; + z +} diff --git a/library/compiler-builtins/libm/src/math/mod.rs b/library/compiler-builtins/libm/src/math/mod.rs index eda6b5b72931..ad9aead7c386 100644 --- a/library/compiler-builtins/libm/src/math/mod.rs +++ b/library/compiler-builtins/libm/src/math/mod.rs @@ -33,6 +33,7 @@ mod fdimf; mod floor; mod floorf; mod fma; +mod fmaf; mod fmod; mod fmodf; mod hypot; @@ -89,6 +90,7 @@ pub use self::fdimf::fdimf; pub use self::floor::floor; pub use self::floorf::floorf; pub use self::fma::fma; +pub use self::fmaf::fmaf; pub use self::fmod::fmod; pub use self::fmodf::fmodf; pub use self::hypot::hypot; diff --git a/library/compiler-builtins/libm/test-generator/src/main.rs b/library/compiler-builtins/libm/test-generator/src/main.rs index 7caada4ef3e9..a2a301d133cf 100644 --- a/library/compiler-builtins/libm/test-generator/src/main.rs +++ b/library/compiler-builtins/libm/test-generator/src/main.rs @@ -687,7 +687,7 @@ f32f32_f32! { // With signature `fn(f32, f32, f32) -> f32` f32f32f32_f32! { - // fmaf, + fmaf, } // With signature `fn(f32, i32) -> f32`