From 865fedeac63f00c180c62b279f01eade7dee0761 Mon Sep 17 00:00:00 2001
From: Erik <erikdesjardins@users.noreply.github.com>
Date: Sat, 14 Jul 2018 15:22:41 -0400
Subject: [PATCH] implement fmaf

---
 library/compiler-builtins/libm/src/lib.rs     |  2 -
 .../compiler-builtins/libm/src/math/fmaf.rs   | 94 +++++++++++++++++++
 .../compiler-builtins/libm/src/math/mod.rs    |  2 +
 .../libm/test-generator/src/main.rs           |  2 +-
 4 files changed, 97 insertions(+), 3 deletions(-)
 create mode 100644 library/compiler-builtins/libm/src/math/fmaf.rs

diff --git a/library/compiler-builtins/libm/src/lib.rs b/library/compiler-builtins/libm/src/lib.rs
index 45213a25f7ab..f0fdd464c644 100644
--- a/library/compiler-builtins/libm/src/lib.rs
+++ b/library/compiler-builtins/libm/src/lib.rs
@@ -52,7 +52,6 @@ pub trait F32Ext: private::Sealed {
     #[cfg(todo)]
     fn signum(self) -> Self;
 
-    #[cfg(todo)]
     fn mul_add(self, a: Self, b: Self) -> Self;
 
     #[cfg(todo)]
@@ -161,7 +160,6 @@ impl F32Ext for f32 {
         fabsf(self)
     }
 
-    #[cfg(todo)]
     #[inline]
     fn mul_add(self, a: Self, b: Self) -> Self {
         fmaf(self, a, b)
diff --git a/library/compiler-builtins/libm/src/math/fmaf.rs b/library/compiler-builtins/libm/src/math/fmaf.rs
new file mode 100644
index 000000000000..70d2c54a27e3
--- /dev/null
+++ b/library/compiler-builtins/libm/src/math/fmaf.rs
@@ -0,0 +1,94 @@
+/* origin: FreeBSD /usr/src/lib/msun/src/s_fmaf.c */
+/*-
+ * Copyright (c) 2005-2011 David Schultz <das@FreeBSD.ORG>
+ * All rights reserved.
+ *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions
+ * are met:
+ * 1. Redistributions of source code must retain the above copyright
+ *    notice, this list of conditions and the following disclaimer.
+ * 2. Redistributions in binary form must reproduce the above copyright
+ *    notice, this list of conditions and the following disclaimer in the
+ *    documentation and/or other materials provided with the distribution.
+ *
+ * THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS'' AND
+ * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
+ * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
+ * ARE DISCLAIMED.  IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE
+ * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
+ * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
+ * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
+ * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
+ * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
+ * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
+ * SUCH DAMAGE.
+ */
+
+use core::f32;
+use core::ptr::read_volatile;
+
+/*
+ * Fused multiply-add: Compute x * y + z with a single rounding error.
+ *
+ * A double has more than twice as much precision than a float, so
+ * direct double-precision arithmetic suffices, except where double
+ * rounding occurs.
+ */
+#[inline]
+pub fn fmaf(x: f32, y: f32, mut z: f32) -> f32 {
+    let xy: f64;
+    let mut result: f64;
+    let mut ui: u64;
+    let e: i32;
+
+    xy = x as f64 * y as f64;
+    result = xy + z as f64;
+    ui = result.to_bits();
+    e = (ui >> 52) as i32 & 0x7ff;
+    /* Common case: The double precision result is fine. */
+    if (
+        /* not a halfway case */
+        ui & 0x1fffffff) != 0x10000000 ||
+        /* NaN */
+        e == 0x7ff ||
+        /* exact */
+        (result - xy == z as f64 && result - z as f64 == xy) ||
+        /* not round-to-nearest */
+        fegetround() != FE_TONEAREST
+    {
+        /*
+            underflow may not be raised correctly, example:
+            fmaf(0x1p-120f, 0x1p-120f, 0x1p-149f)
+        */
+        if e < 0x3ff - 126 && e >= 0x3ff - 149 && fetestexcept(FE_INEXACT) {
+            feclearexcept(FE_INEXACT);
+            /* TODO: gcc and clang bug workaround */
+            let vz: f32 = unsafe { read_volatile(&z) };
+            result = xy + vz as f64;
+            if fetestexcept(FE_INEXACT) {
+                feraiseexcept(FE_UNDERFLOW);
+            } else {
+                feraiseexcept(FE_INEXACT);
+            }
+        }
+        z = result as f32;
+        return z;
+    }
+
+    /*
+     * If result is inexact, and exactly halfway between two float values,
+     * we need to adjust the low-order bit in the direction of the error.
+     */
+    fesetround(FE_TOWARDZERO);
+    let vxy: f64 = unsafe { read_volatile(&xy) }; /* XXX work around gcc CSE bug */
+    let mut adjusted_result: f64 = vxy + z as f64;
+    fesetround(FE_TONEAREST);
+    if result == adjusted_result {
+        ui = adjusted_result.to_bits();
+        ui += 1;
+        adjusted_result = f64::from_bits(ui);
+    }
+    z = adjusted_result as f32;
+    z
+}
diff --git a/library/compiler-builtins/libm/src/math/mod.rs b/library/compiler-builtins/libm/src/math/mod.rs
index eda6b5b72931..ad9aead7c386 100644
--- a/library/compiler-builtins/libm/src/math/mod.rs
+++ b/library/compiler-builtins/libm/src/math/mod.rs
@@ -33,6 +33,7 @@ mod fdimf;
 mod floor;
 mod floorf;
 mod fma;
+mod fmaf;
 mod fmod;
 mod fmodf;
 mod hypot;
@@ -89,6 +90,7 @@ pub use self::fdimf::fdimf;
 pub use self::floor::floor;
 pub use self::floorf::floorf;
 pub use self::fma::fma;
+pub use self::fmaf::fmaf;
 pub use self::fmod::fmod;
 pub use self::fmodf::fmodf;
 pub use self::hypot::hypot;
diff --git a/library/compiler-builtins/libm/test-generator/src/main.rs b/library/compiler-builtins/libm/test-generator/src/main.rs
index 7caada4ef3e9..a2a301d133cf 100644
--- a/library/compiler-builtins/libm/test-generator/src/main.rs
+++ b/library/compiler-builtins/libm/test-generator/src/main.rs
@@ -687,7 +687,7 @@ f32f32_f32! {
 
 // With signature `fn(f32, f32, f32) -> f32`
 f32f32f32_f32! {
-    // fmaf,
+    fmaf,
 }
 
 // With signature `fn(f32, i32) -> f32`