diff --git a/library/compiler-builtins/libm/src/lib.rs b/library/compiler-builtins/libm/src/lib.rs index bef4667875a2..27925e806c09 100644 --- a/library/compiler-builtins/libm/src/lib.rs +++ b/library/compiler-builtins/libm/src/lib.rs @@ -366,7 +366,6 @@ pub trait F64Ext: private::Sealed { #[cfg(todo)] fn signum(self) -> Self; - #[cfg(todo)] fn mul_add(self, a: Self, b: Self) -> Self; #[cfg(todo)] @@ -425,7 +424,6 @@ pub trait F64Ext: private::Sealed { (self.sin(), self.cos()) } - #[cfg(todo)] fn exp_m1(self) -> Self; fn ln_1p(self) -> Self; @@ -485,7 +483,6 @@ impl F64Ext for f64 { fabs(self) } - #[cfg(todo)] #[inline] fn mul_add(self, a: Self, b: Self) -> Self { fma(self, a, b) @@ -604,7 +601,6 @@ impl F64Ext for f64 { atan2(self, other) } - #[cfg(todo)] #[inline] fn exp_m1(self) -> Self { expm1(self) diff --git a/library/compiler-builtins/libm/src/math/expm1.rs b/library/compiler-builtins/libm/src/math/expm1.rs new file mode 100644 index 000000000000..48082cd745cf --- /dev/null +++ b/library/compiler-builtins/libm/src/math/expm1.rs @@ -0,0 +1,125 @@ +use core::f64; + +const O_THRESHOLD: f64 = 7.09782712893383973096e+02; /* 0x40862E42, 0xFEFA39EF */ +const LN2_HI: f64 = 6.93147180369123816490e-01; /* 0x3fe62e42, 0xfee00000 */ +const LN2_LO: f64 = 1.90821492927058770002e-10; /* 0x3dea39ef, 0x35793c76 */ +const INVLN2: f64 = 1.44269504088896338700e+00; /* 0x3ff71547, 0x652b82fe */ +/* Scaled Q's: Qn_here = 2**n * Qn_above, for R(2*z) where z = hxs = x*x/2: */ +const Q1: f64 = -3.33333333333331316428e-02; /* BFA11111 111110F4 */ +const Q2: f64 = 1.58730158725481460165e-03; /* 3F5A01A0 19FE5585 */ +const Q3: f64 = -7.93650757867487942473e-05; /* BF14CE19 9EAADBB7 */ +const Q4: f64 = 4.00821782732936239552e-06; /* 3ED0CFCA 86E65239 */ +const Q5: f64 = -2.01099218183624371326e-07; /* BE8AFDB7 6E09C32D */ + +#[allow(warnings)] +pub fn expm1(mut x: f64) -> f64 { + let hi: f64; + let lo: f64; + let k: i32; + let c: f64; + let mut t: f64; + let mut y: f64; + + let mut ui = x.to_bits(); + let hx = ((ui >> 32) & 0x7fffffff) as u32; + let sign = (ui >> 63) as i32; + + /* filter out huge and non-finite argument */ + if hx >= 0x4043687A { + /* if |x|>=56*ln2 */ + if x.is_nan() { + return x; + } + if sign != 0 { + return -1.0; + } + if x > O_THRESHOLD { + x *= f64::from_bits(0x7fe0000000000000); + return x; + } + } + + /* argument reduction */ + if hx > 0x3fd62e42 { + /* if |x| > 0.5 ln2 */ + if hx < 0x3FF0A2B2 { + /* and |x| < 1.5 ln2 */ + if sign == 0 { + hi = x - LN2_HI; + lo = LN2_LO; + k = 1; + } else { + hi = x + LN2_HI; + lo = -LN2_LO; + k = -1; + } + } else { + k = (INVLN2 * x + if sign != 0 { -0.5 } else { 0.5 }) as i32; + t = k as f64; + hi = x - t * LN2_HI; /* t*ln2_hi is exact here */ + lo = t * LN2_LO; + } + x = hi - lo; + c = (hi - x) - lo; + } else if hx < 0x3c900000 { + /* |x| < 2**-54, return x */ + if hx < 0x00100000 { + force_eval!(x); + } + return x; + } else { + c = 0.0; + k = 0; + } + + /* x is now in primary range */ + let hfx = 0.5 * x; + let hxs = x * hfx; + let r1 = 1.0 + hxs * (Q1 + hxs * (Q2 + hxs * (Q3 + hxs * (Q4 + hxs * Q5)))); + t = 3.0 - r1 * hfx; + let mut e = hxs * ((r1 - t) / (6.0 - x * t)); + if k == 0 { + /* c is 0 */ + return x - (x * e - hxs); + } + e = x * (e - c) - c; + e -= hxs; + /* exp(x) ~ 2^k (x_reduced - e + 1) */ + if k == -1 { + return 0.5 * (x - e) - 0.5; + } + if k == 1 { + if x < -0.25 { + return -2.0 * (e - (x + 0.5)); + } + return 1.0 + 2.0 * (x - e); + } + ui = ((0x3ff + k) as u64) << 52; /* 2^k */ + let twopk = f64::from_bits(ui); + if k < 0 || k > 56 { + /* suffice to return exp(x)-1 */ + y = x - e + 1.0; + if k == 1024 { + y = y * 2.0 * f64::from_bits(0x7fe0000000000000); + } else { + y = y * twopk; + } + return y - 1.0; + } + ui = ((0x3ff - k) as u64) << 52; /* 2^-k */ + let uf = f64::from_bits(ui); + if k < 20 { + y = (x - e + (1.0 - uf)) * twopk; + } else { + y = (x - (e + uf) + 1.0) * twopk; + } + y +} + +#[cfg(test)] +mod tests { + #[test] + fn sanity_check() { + assert_eq!(super::expm1(1.1), 2.0041660239464334); + } +} diff --git a/library/compiler-builtins/libm/src/math/fma.rs b/library/compiler-builtins/libm/src/math/fma.rs new file mode 100644 index 000000000000..99a27164a81e --- /dev/null +++ b/library/compiler-builtins/libm/src/math/fma.rs @@ -0,0 +1,201 @@ +use core::{f32, f64}; + +use super::scalbn; + +const ZEROINFNAN: i32 = 0x7ff - 0x3ff - 52 - 1; + +struct Num { + m: u64, + e: i32, + sign: i32, +} + +#[inline] +fn normalize(x: f64) -> Num { + let x1p63: f64 = f64::from_bits(0x43e0000000000000); // 0x1p63 === 2 ^ 63 + + let mut ix: u64 = x.to_bits(); + let mut e: i32 = (ix >> 52) as i32; + let sign: i32 = e & 0x800; + e &= 0x7ff; + if e == 0 { + ix = (x * x1p63).to_bits(); + e = (ix >> 52) as i32 & 0x7ff; + e = if e != 0 { e - 63 } else { 0x800 }; + } + ix &= (1 << 52) - 1; + ix |= 1 << 52; + ix <<= 1; + e -= 0x3ff + 52 + 1; + Num { m: ix, e, sign } +} + +#[inline] +fn mul(x: u64, y: u64) -> (u64, u64) { + let t1: u64; + let t2: u64; + let t3: u64; + let xlo: u64 = x as u32 as u64; + let xhi: u64 = x >> 32; + let ylo: u64 = y as u32 as u64; + let yhi: u64 = y >> 32; + + t1 = xlo * ylo; + t2 = xlo * yhi + xhi * ylo; + t3 = xhi * yhi; + let lo = t1 + (t2 << 32); + let hi = t3 + (t2 >> 32) + (t1 > lo) as u64; + (hi, lo) +} + +#[inline] +pub fn fma(x: f64, y: f64, z: f64) -> f64 { + let x1p63: f64 = f64::from_bits(0x43e0000000000000); // 0x1p63 === 2 ^ 63 + let x0_ffffff8p_63 = f64::from_bits(0x3bfffffff0000000); // 0x0.ffffff8p-63 + + /* normalize so top 10bits and last bit are 0 */ + let nx = normalize(x); + let ny = normalize(y); + let nz = normalize(z); + + if nx.e >= ZEROINFNAN || ny.e >= ZEROINFNAN { + return x * y + z; + } + if nz.e >= ZEROINFNAN { + if nz.e > ZEROINFNAN { + /* z==0 */ + return x * y + z; + } + return z; + } + + /* mul: r = x*y */ + let zhi: u64; + let zlo: u64; + let (mut rhi, mut rlo) = mul(nx.m, ny.m); + /* either top 20 or 21 bits of rhi and last 2 bits of rlo are 0 */ + + /* align exponents */ + let mut e: i32 = nx.e + ny.e; + let mut d: i32 = nz.e - e; + /* shift bits z<<=kz, r>>=kr, so kz+kr == d, set e = e+kr (== ez-kz) */ + if d > 0 { + if d < 64 { + zlo = nz.m << d; + zhi = nz.m >> 64 - d; + } else { + zlo = 0; + zhi = nz.m; + e = nz.e - 64; + d -= 64; + if d == 0 { + } else if d < 64 { + rlo = rhi << 64 - d | rlo >> d | ((rlo << 64 - d) != 0) as u64; + rhi = rhi >> d; + } else { + rlo = 1; + rhi = 0; + } + } + } else { + zhi = 0; + d = -d; + if d == 0 { + zlo = nz.m; + } else if d < 64 { + zlo = nz.m >> d | ((nz.m << 64 - d) != 0) as u64; + } else { + zlo = 1; + } + } + + /* add */ + let mut sign: i32 = nx.sign ^ ny.sign; + let samesign: bool = (sign ^ nz.sign) == 0; + let mut nonzero: i32 = 1; + if samesign { + /* r += z */ + rlo += zlo; + rhi += zhi + (rlo < zlo) as u64; + } else { + /* r -= z */ + let t = rlo; + rlo -= zlo; + rhi = rhi - zhi - (t < rlo) as u64; + if (rhi >> 63) != 0 { + rlo = (-(rlo as i64)) as u64; + rhi = (-(rhi as i64)) as u64 - (rlo != 0) as u64; + sign = (sign == 0) as i32; + } + nonzero = (rhi != 0) as i32; + } + + /* set rhi to top 63bit of the result (last bit is sticky) */ + if nonzero != 0 { + e += 64; + d = rhi.leading_zeros() as i32 - 1; + /* note: d > 0 */ + rhi = rhi << d | rlo >> 64 - d | ((rlo << d) != 0) as u64; + } else if rlo != 0 { + d = rlo.leading_zeros() as i32 - 1; + if d < 0 { + rhi = rlo >> 1 | (rlo & 1); + } else { + rhi = rlo << d; + } + } else { + /* exact +-0 */ + return x * y + z; + } + e -= d; + + /* convert to double */ + let mut i: i64 = rhi as i64; /* i is in [1<<62,(1<<63)-1] */ + if sign != 0 { + i = -i; + } + let mut r: f64 = i as f64; /* |r| is in [0x1p62,0x1p63] */ + + if e < -1022 - 62 { + /* result is subnormal before rounding */ + if e == -1022 - 63 { + let mut c: f64 = x1p63; + if sign != 0 { + c = -c; + } + if r == c { + /* min normal after rounding, underflow depends + on arch behaviour which can be imitated by + a double to float conversion */ + let fltmin: f32 = (x0_ffffff8p_63 * f32::MIN_POSITIVE as f64 * r) as f32; + return f64::MIN_POSITIVE / f32::MIN_POSITIVE as f64 * fltmin as f64; + } + /* one bit is lost when scaled, add another top bit to + only round once at conversion if it is inexact */ + if (rhi << 53) != 0 { + i = (rhi >> 1 | (rhi & 1) | 1 << 62) as i64; + if sign != 0 { + i = -i; + } + r = i as f64; + r = 2. * r - c; /* remove top bit */ + + /* raise underflow portably, such that it + cannot be optimized away */ + { + let tiny: f64 = f64::MIN_POSITIVE / f32::MIN_POSITIVE as f64 * r; + r += (tiny * tiny) * (r - r); + } + } + } else { + /* only round once when scaled */ + d = 10; + i = ((rhi >> d | ((rhi << 64 - d) != 0) as u64) << d) as i64; + if sign != 0 { + i = -i; + } + r = i as f64; + } + } + scalbn(r, e) +} diff --git a/library/compiler-builtins/libm/src/math/mod.rs b/library/compiler-builtins/libm/src/math/mod.rs index 828e3cb1a7e3..c903d3787620 100644 --- a/library/compiler-builtins/libm/src/math/mod.rs +++ b/library/compiler-builtins/libm/src/math/mod.rs @@ -16,12 +16,14 @@ mod exp; mod exp2; mod exp2f; mod expf; +mod expm1; mod fabs; mod fabsf; mod fdim; mod fdimf; mod floor; mod floorf; +mod fma; mod fmod; mod fmodf; mod hypot; @@ -55,12 +57,14 @@ pub use self::exp::exp; pub use self::exp2::exp2; pub use self::exp2f::exp2f; pub use self::expf::expf; +pub use self::expm1::expm1; pub use self::fabs::fabs; pub use self::fabsf::fabsf; pub use self::fdim::fdim; pub use self::fdimf::fdimf; pub use self::floor::floor; pub use self::floorf::floorf; +pub use self::fma::fma; pub use self::fmod::fmod; pub use self::fmodf::fmodf; pub use self::hypot::hypot; diff --git a/library/compiler-builtins/libm/test-generator/src/main.rs b/library/compiler-builtins/libm/test-generator/src/main.rs index e5b8fdc1d4f3..aa50f57cdb72 100644 --- a/library/compiler-builtins/libm/test-generator/src/main.rs +++ b/library/compiler-builtins/libm/test-generator/src/main.rs @@ -705,7 +705,7 @@ f64_f64! { // cosh, exp, exp2, - // expm1, + expm1, floor, log, log10, @@ -732,7 +732,7 @@ f64f64_f64! { // With signature `fn(f64, f64, f64) -> f64` f64f64f64_f64! { - // fma, + fma, } // With signature `fn(f64, i32) -> f64`