Merge rust-lang/libm#106

106: implement fmaf r=japaric a=erikdesjardins closes rust-lang/libm#20 Co-authored-by: Erik <erikdesjardins@users.noreply.github.com>
2018-07-15 18:09:03 +00:00 · 2018-07-15 18:09:03 +00:00 · d04494b693
commit d04494b693
parent 8d94ce8a9d 7084ce52b7
5 changed files with 137 additions and 3 deletions
--- a/library/compiler-builtins/libm/src/lib.rs
+++ b/library/compiler-builtins/libm/src/lib.rs
@ -52,7 +52,6 @@ pub trait F32Ext: private::Sealed {
    #[cfg(todo)]
    fn signum(self) -> Self;

-    #[cfg(todo)]
    fn mul_add(self, a: Self, b: Self) -> Self;

    #[cfg(todo)]
@ -161,7 +160,6 @@ impl F32Ext for f32 {
        fabsf(self)
    }

-    #[cfg(todo)]
    #[inline]
    fn mul_add(self, a: Self, b: Self) -> Self {
        fmaf(self, a, b)
--- a/library/compiler-builtins/libm/src/math/fenv.rs
+++ b/library/compiler-builtins/libm/src/math/fenv.rs
@ -0,0 +1,33 @@
+// src: musl/src/fenv/fenv.c
+/* Dummy functions for archs lacking fenv implementation */
+
+pub(crate) const FE_UNDERFLOW: i32 = 0;
+pub(crate) const FE_INEXACT: i32 = 0;
+
+pub(crate) const FE_TONEAREST: i32 = 0;
+pub(crate) const FE_TOWARDZERO: i32 = 0;
+
+#[inline]
+pub(crate) fn feclearexcept(_mask: i32) -> i32 {
+    0
+}
+
+#[inline]
+pub(crate) fn feraiseexcept(_mask: i32) -> i32 {
+    0
+}
+
+#[inline]
+pub(crate) fn fetestexcept(_mask: i32) -> i32 {
+    0
+}
+
+#[inline]
+pub(crate) fn fegetround() -> i32 {
+    FE_TONEAREST
+}
+
+#[inline]
+pub(crate) fn fesetround(_r: i32) -> i32 {
+    0
+}
--- a/library/compiler-builtins/libm/src/math/fmaf.rs
+++ b/library/compiler-builtins/libm/src/math/fmaf.rs
@ -0,0 +1,100 @@
+/* origin: FreeBSD /usr/src/lib/msun/src/s_fmaf.c */
+/*-
+ * Copyright (c) 2005-2011 David Schultz <das@FreeBSD.ORG>
+ * All rights reserved.
+ *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions
+ * are met:
+ * 1. Redistributions of source code must retain the above copyright
+ *    notice, this list of conditions and the following disclaimer.
+ * 2. Redistributions in binary form must reproduce the above copyright
+ *    notice, this list of conditions and the following disclaimer in the
+ *    documentation and/or other materials provided with the distribution.
+ *
+ * THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS'' AND
+ * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
+ * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
+ * ARE DISCLAIMED.  IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE
+ * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
+ * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
+ * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
+ * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
+ * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
+ * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
+ * SUCH DAMAGE.
+ */
+
+use core::f32;
+use core::ptr::read_volatile;
+
+use super::fenv::{
+    feclearexcept, fegetround, feraiseexcept, fesetround, fetestexcept, FE_INEXACT, FE_TONEAREST,
+    FE_TOWARDZERO, FE_UNDERFLOW,
+};
+
+/*
+ * Fused multiply-add: Compute x * y + z with a single rounding error.
+ *
+ * A double has more than twice as much precision than a float, so
+ * direct double-precision arithmetic suffices, except where double
+ * rounding occurs.
+ */
+#[inline]
+pub fn fmaf(x: f32, y: f32, mut z: f32) -> f32 {
+    let xy: f64;
+    let mut result: f64;
+    let mut ui: u64;
+    let e: i32;
+
+    xy = x as f64 * y as f64;
+    result = xy + z as f64;
+    ui = result.to_bits();
+    e = (ui >> 52) as i32 & 0x7ff;
+    /* Common case: The double precision result is fine. */
+    if (
+        /* not a halfway case */
+        ui & 0x1fffffff) != 0x10000000 ||
+        /* NaN */
+        e == 0x7ff ||
+        /* exact */
+        (result - xy == z as f64 && result - z as f64 == xy) ||
+        /* not round-to-nearest */
+        fegetround() != FE_TONEAREST
+    {
+        /*
+            underflow may not be raised correctly, example:
+            fmaf(0x1p-120f, 0x1p-120f, 0x1p-149f)
+        */
+        if e < 0x3ff - 126 && e >= 0x3ff - 149 && fetestexcept(FE_INEXACT) != 0 {
+            feclearexcept(FE_INEXACT);
+            // prevent `xy + vz` from being CSE'd with `xy + z` above
+            let vz: f32 = unsafe { read_volatile(&z) };
+            result = xy + vz as f64;
+            if fetestexcept(FE_INEXACT) != 0 {
+                feraiseexcept(FE_UNDERFLOW);
+            } else {
+                feraiseexcept(FE_INEXACT);
+            }
+        }
+        z = result as f32;
+        return z;
+    }
+
+    /*
+     * If result is inexact, and exactly halfway between two float values,
+     * we need to adjust the low-order bit in the direction of the error.
+     */
+    fesetround(FE_TOWARDZERO);
+    // prevent `vxy + z` from being CSE'd with `xy + z` above
+    let vxy: f64 = unsafe { read_volatile(&xy) };
+    let mut adjusted_result: f64 = vxy + z as f64;
+    fesetround(FE_TONEAREST);
+    if result == adjusted_result {
+        ui = adjusted_result.to_bits();
+        ui += 1;
+        adjusted_result = f64::from_bits(ui);
+    }
+    z = adjusted_result as f32;
+    z
+}
--- a/library/compiler-builtins/libm/src/math/mod.rs
+++ b/library/compiler-builtins/libm/src/math/mod.rs
@ -33,6 +33,7 @@ mod fdimf;
 mod floor;
 mod floorf;
 mod fma;
+mod fmaf;
 mod fmod;
 mod fmodf;
 mod hypot;
@ -90,6 +91,7 @@ pub use self::fdimf::fdimf;
 pub use self::floor::floor;
 pub use self::floorf::floorf;
 pub use self::fma::fma;
+pub use self::fmaf::fmaf;
 pub use self::fmod::fmod;
 pub use self::fmodf::fmodf;
 pub use self::hypot::hypot;
@ -122,6 +124,7 @@ pub use self::truncf::truncf;

 // Private modules
 mod expo2;
+mod fenv;
 mod k_cos;
 mod k_cosf;
 mod k_expo2f;
--- a/library/compiler-builtins/libm/test-generator/src/main.rs
+++ b/library/compiler-builtins/libm/test-generator/src/main.rs
@ -687,7 +687,7 @@ f32f32_f32! {

 // With signature `fn(f32, f32, f32) -> f32`
 f32f32f32_f32! {
-    // fmaf,
+    fmaf,
 }

 // With signature `fn(f32, i32) -> f32`