From 65b49bb7a0fa37ff6fd9481844fc0af56e2a613f Mon Sep 17 00:00:00 2001 From: Erik Date: Sat, 14 Jul 2018 15:21:23 -0400 Subject: [PATCH] implement fma --- library/compiler-builtins/libm/src/lib.rs | 2 - .../compiler-builtins/libm/src/math/fma.rs | 201 ++++++++++++++++++ .../compiler-builtins/libm/src/math/mod.rs | 2 + .../libm/test-generator/src/main.rs | 2 +- 4 files changed, 204 insertions(+), 3 deletions(-) create mode 100644 library/compiler-builtins/libm/src/math/fma.rs diff --git a/library/compiler-builtins/libm/src/lib.rs b/library/compiler-builtins/libm/src/lib.rs index bef4667875a2..c3a20059330c 100644 --- a/library/compiler-builtins/libm/src/lib.rs +++ b/library/compiler-builtins/libm/src/lib.rs @@ -366,7 +366,6 @@ pub trait F64Ext: private::Sealed { #[cfg(todo)] fn signum(self) -> Self; - #[cfg(todo)] fn mul_add(self, a: Self, b: Self) -> Self; #[cfg(todo)] @@ -485,7 +484,6 @@ impl F64Ext for f64 { fabs(self) } - #[cfg(todo)] #[inline] fn mul_add(self, a: Self, b: Self) -> Self { fma(self, a, b) diff --git a/library/compiler-builtins/libm/src/math/fma.rs b/library/compiler-builtins/libm/src/math/fma.rs new file mode 100644 index 000000000000..99a27164a81e --- /dev/null +++ b/library/compiler-builtins/libm/src/math/fma.rs @@ -0,0 +1,201 @@ +use core::{f32, f64}; + +use super::scalbn; + +const ZEROINFNAN: i32 = 0x7ff - 0x3ff - 52 - 1; + +struct Num { + m: u64, + e: i32, + sign: i32, +} + +#[inline] +fn normalize(x: f64) -> Num { + let x1p63: f64 = f64::from_bits(0x43e0000000000000); // 0x1p63 === 2 ^ 63 + + let mut ix: u64 = x.to_bits(); + let mut e: i32 = (ix >> 52) as i32; + let sign: i32 = e & 0x800; + e &= 0x7ff; + if e == 0 { + ix = (x * x1p63).to_bits(); + e = (ix >> 52) as i32 & 0x7ff; + e = if e != 0 { e - 63 } else { 0x800 }; + } + ix &= (1 << 52) - 1; + ix |= 1 << 52; + ix <<= 1; + e -= 0x3ff + 52 + 1; + Num { m: ix, e, sign } +} + +#[inline] +fn mul(x: u64, y: u64) -> (u64, u64) { + let t1: u64; + let t2: u64; + let t3: u64; + let xlo: u64 = x as u32 as u64; + let xhi: u64 = x >> 32; + let ylo: u64 = y as u32 as u64; + let yhi: u64 = y >> 32; + + t1 = xlo * ylo; + t2 = xlo * yhi + xhi * ylo; + t3 = xhi * yhi; + let lo = t1 + (t2 << 32); + let hi = t3 + (t2 >> 32) + (t1 > lo) as u64; + (hi, lo) +} + +#[inline] +pub fn fma(x: f64, y: f64, z: f64) -> f64 { + let x1p63: f64 = f64::from_bits(0x43e0000000000000); // 0x1p63 === 2 ^ 63 + let x0_ffffff8p_63 = f64::from_bits(0x3bfffffff0000000); // 0x0.ffffff8p-63 + + /* normalize so top 10bits and last bit are 0 */ + let nx = normalize(x); + let ny = normalize(y); + let nz = normalize(z); + + if nx.e >= ZEROINFNAN || ny.e >= ZEROINFNAN { + return x * y + z; + } + if nz.e >= ZEROINFNAN { + if nz.e > ZEROINFNAN { + /* z==0 */ + return x * y + z; + } + return z; + } + + /* mul: r = x*y */ + let zhi: u64; + let zlo: u64; + let (mut rhi, mut rlo) = mul(nx.m, ny.m); + /* either top 20 or 21 bits of rhi and last 2 bits of rlo are 0 */ + + /* align exponents */ + let mut e: i32 = nx.e + ny.e; + let mut d: i32 = nz.e - e; + /* shift bits z<<=kz, r>>=kr, so kz+kr == d, set e = e+kr (== ez-kz) */ + if d > 0 { + if d < 64 { + zlo = nz.m << d; + zhi = nz.m >> 64 - d; + } else { + zlo = 0; + zhi = nz.m; + e = nz.e - 64; + d -= 64; + if d == 0 { + } else if d < 64 { + rlo = rhi << 64 - d | rlo >> d | ((rlo << 64 - d) != 0) as u64; + rhi = rhi >> d; + } else { + rlo = 1; + rhi = 0; + } + } + } else { + zhi = 0; + d = -d; + if d == 0 { + zlo = nz.m; + } else if d < 64 { + zlo = nz.m >> d | ((nz.m << 64 - d) != 0) as u64; + } else { + zlo = 1; + } + } + + /* add */ + let mut sign: i32 = nx.sign ^ ny.sign; + let samesign: bool = (sign ^ nz.sign) == 0; + let mut nonzero: i32 = 1; + if samesign { + /* r += z */ + rlo += zlo; + rhi += zhi + (rlo < zlo) as u64; + } else { + /* r -= z */ + let t = rlo; + rlo -= zlo; + rhi = rhi - zhi - (t < rlo) as u64; + if (rhi >> 63) != 0 { + rlo = (-(rlo as i64)) as u64; + rhi = (-(rhi as i64)) as u64 - (rlo != 0) as u64; + sign = (sign == 0) as i32; + } + nonzero = (rhi != 0) as i32; + } + + /* set rhi to top 63bit of the result (last bit is sticky) */ + if nonzero != 0 { + e += 64; + d = rhi.leading_zeros() as i32 - 1; + /* note: d > 0 */ + rhi = rhi << d | rlo >> 64 - d | ((rlo << d) != 0) as u64; + } else if rlo != 0 { + d = rlo.leading_zeros() as i32 - 1; + if d < 0 { + rhi = rlo >> 1 | (rlo & 1); + } else { + rhi = rlo << d; + } + } else { + /* exact +-0 */ + return x * y + z; + } + e -= d; + + /* convert to double */ + let mut i: i64 = rhi as i64; /* i is in [1<<62,(1<<63)-1] */ + if sign != 0 { + i = -i; + } + let mut r: f64 = i as f64; /* |r| is in [0x1p62,0x1p63] */ + + if e < -1022 - 62 { + /* result is subnormal before rounding */ + if e == -1022 - 63 { + let mut c: f64 = x1p63; + if sign != 0 { + c = -c; + } + if r == c { + /* min normal after rounding, underflow depends + on arch behaviour which can be imitated by + a double to float conversion */ + let fltmin: f32 = (x0_ffffff8p_63 * f32::MIN_POSITIVE as f64 * r) as f32; + return f64::MIN_POSITIVE / f32::MIN_POSITIVE as f64 * fltmin as f64; + } + /* one bit is lost when scaled, add another top bit to + only round once at conversion if it is inexact */ + if (rhi << 53) != 0 { + i = (rhi >> 1 | (rhi & 1) | 1 << 62) as i64; + if sign != 0 { + i = -i; + } + r = i as f64; + r = 2. * r - c; /* remove top bit */ + + /* raise underflow portably, such that it + cannot be optimized away */ + { + let tiny: f64 = f64::MIN_POSITIVE / f32::MIN_POSITIVE as f64 * r; + r += (tiny * tiny) * (r - r); + } + } + } else { + /* only round once when scaled */ + d = 10; + i = ((rhi >> d | ((rhi << 64 - d) != 0) as u64) << d) as i64; + if sign != 0 { + i = -i; + } + r = i as f64; + } + } + scalbn(r, e) +} diff --git a/library/compiler-builtins/libm/src/math/mod.rs b/library/compiler-builtins/libm/src/math/mod.rs index 828e3cb1a7e3..c9a34fae42b7 100644 --- a/library/compiler-builtins/libm/src/math/mod.rs +++ b/library/compiler-builtins/libm/src/math/mod.rs @@ -22,6 +22,7 @@ mod fdim; mod fdimf; mod floor; mod floorf; +mod fma; mod fmod; mod fmodf; mod hypot; @@ -61,6 +62,7 @@ pub use self::fdim::fdim; pub use self::fdimf::fdimf; pub use self::floor::floor; pub use self::floorf::floorf; +pub use self::fma::fma; pub use self::fmod::fmod; pub use self::fmodf::fmodf; pub use self::hypot::hypot; diff --git a/library/compiler-builtins/libm/test-generator/src/main.rs b/library/compiler-builtins/libm/test-generator/src/main.rs index e5b8fdc1d4f3..f4b7cd7cafb8 100644 --- a/library/compiler-builtins/libm/test-generator/src/main.rs +++ b/library/compiler-builtins/libm/test-generator/src/main.rs @@ -732,7 +732,7 @@ f64f64_f64! { // With signature `fn(f64, f64, f64) -> f64` f64f64f64_f64! { - // fma, + fma, } // With signature `fn(f64, i32) -> f64`