100: Implement expm1 r=japaric a=Veykril ~~Closes 13~~, closes rust-lang/libm#18 and ~~closes 14~~. ~~I wasn't sure where to put `__expo2(x: f64) -> f64` so I left it in `src/math/cosh.rs` for now.~~ Moved the function into it's own module. Edit: Didn't see that `exp` was already done in a pull request, I'll take it out once rust-lang/libm#90 lands then. 103: implement fma r=japaric a=erikdesjardins closes rust-lang/libm#19 Co-authored-by: Lukas Wirth <lukastw97@gmail.com> Co-authored-by: Erik <erikdesjardins@users.noreply.github.com>
This commit is contained in:
commit
f6bbea901b
5 changed files with 332 additions and 6 deletions
|
|
@ -366,7 +366,6 @@ pub trait F64Ext: private::Sealed {
|
|||
#[cfg(todo)]
|
||||
fn signum(self) -> Self;
|
||||
|
||||
#[cfg(todo)]
|
||||
fn mul_add(self, a: Self, b: Self) -> Self;
|
||||
|
||||
#[cfg(todo)]
|
||||
|
|
@ -425,7 +424,6 @@ pub trait F64Ext: private::Sealed {
|
|||
(self.sin(), self.cos())
|
||||
}
|
||||
|
||||
#[cfg(todo)]
|
||||
fn exp_m1(self) -> Self;
|
||||
|
||||
fn ln_1p(self) -> Self;
|
||||
|
|
@ -485,7 +483,6 @@ impl F64Ext for f64 {
|
|||
fabs(self)
|
||||
}
|
||||
|
||||
#[cfg(todo)]
|
||||
#[inline]
|
||||
fn mul_add(self, a: Self, b: Self) -> Self {
|
||||
fma(self, a, b)
|
||||
|
|
@ -604,7 +601,6 @@ impl F64Ext for f64 {
|
|||
atan2(self, other)
|
||||
}
|
||||
|
||||
#[cfg(todo)]
|
||||
#[inline]
|
||||
fn exp_m1(self) -> Self {
|
||||
expm1(self)
|
||||
|
|
|
|||
125
library/compiler-builtins/libm/src/math/expm1.rs
Normal file
125
library/compiler-builtins/libm/src/math/expm1.rs
Normal file
|
|
@ -0,0 +1,125 @@
|
|||
use core::f64;
|
||||
|
||||
const O_THRESHOLD: f64 = 7.09782712893383973096e+02; /* 0x40862E42, 0xFEFA39EF */
|
||||
const LN2_HI: f64 = 6.93147180369123816490e-01; /* 0x3fe62e42, 0xfee00000 */
|
||||
const LN2_LO: f64 = 1.90821492927058770002e-10; /* 0x3dea39ef, 0x35793c76 */
|
||||
const INVLN2: f64 = 1.44269504088896338700e+00; /* 0x3ff71547, 0x652b82fe */
|
||||
/* Scaled Q's: Qn_here = 2**n * Qn_above, for R(2*z) where z = hxs = x*x/2: */
|
||||
const Q1: f64 = -3.33333333333331316428e-02; /* BFA11111 111110F4 */
|
||||
const Q2: f64 = 1.58730158725481460165e-03; /* 3F5A01A0 19FE5585 */
|
||||
const Q3: f64 = -7.93650757867487942473e-05; /* BF14CE19 9EAADBB7 */
|
||||
const Q4: f64 = 4.00821782732936239552e-06; /* 3ED0CFCA 86E65239 */
|
||||
const Q5: f64 = -2.01099218183624371326e-07; /* BE8AFDB7 6E09C32D */
|
||||
|
||||
#[allow(warnings)]
|
||||
pub fn expm1(mut x: f64) -> f64 {
|
||||
let hi: f64;
|
||||
let lo: f64;
|
||||
let k: i32;
|
||||
let c: f64;
|
||||
let mut t: f64;
|
||||
let mut y: f64;
|
||||
|
||||
let mut ui = x.to_bits();
|
||||
let hx = ((ui >> 32) & 0x7fffffff) as u32;
|
||||
let sign = (ui >> 63) as i32;
|
||||
|
||||
/* filter out huge and non-finite argument */
|
||||
if hx >= 0x4043687A {
|
||||
/* if |x|>=56*ln2 */
|
||||
if x.is_nan() {
|
||||
return x;
|
||||
}
|
||||
if sign != 0 {
|
||||
return -1.0;
|
||||
}
|
||||
if x > O_THRESHOLD {
|
||||
x *= f64::from_bits(0x7fe0000000000000);
|
||||
return x;
|
||||
}
|
||||
}
|
||||
|
||||
/* argument reduction */
|
||||
if hx > 0x3fd62e42 {
|
||||
/* if |x| > 0.5 ln2 */
|
||||
if hx < 0x3FF0A2B2 {
|
||||
/* and |x| < 1.5 ln2 */
|
||||
if sign == 0 {
|
||||
hi = x - LN2_HI;
|
||||
lo = LN2_LO;
|
||||
k = 1;
|
||||
} else {
|
||||
hi = x + LN2_HI;
|
||||
lo = -LN2_LO;
|
||||
k = -1;
|
||||
}
|
||||
} else {
|
||||
k = (INVLN2 * x + if sign != 0 { -0.5 } else { 0.5 }) as i32;
|
||||
t = k as f64;
|
||||
hi = x - t * LN2_HI; /* t*ln2_hi is exact here */
|
||||
lo = t * LN2_LO;
|
||||
}
|
||||
x = hi - lo;
|
||||
c = (hi - x) - lo;
|
||||
} else if hx < 0x3c900000 {
|
||||
/* |x| < 2**-54, return x */
|
||||
if hx < 0x00100000 {
|
||||
force_eval!(x);
|
||||
}
|
||||
return x;
|
||||
} else {
|
||||
c = 0.0;
|
||||
k = 0;
|
||||
}
|
||||
|
||||
/* x is now in primary range */
|
||||
let hfx = 0.5 * x;
|
||||
let hxs = x * hfx;
|
||||
let r1 = 1.0 + hxs * (Q1 + hxs * (Q2 + hxs * (Q3 + hxs * (Q4 + hxs * Q5))));
|
||||
t = 3.0 - r1 * hfx;
|
||||
let mut e = hxs * ((r1 - t) / (6.0 - x * t));
|
||||
if k == 0 {
|
||||
/* c is 0 */
|
||||
return x - (x * e - hxs);
|
||||
}
|
||||
e = x * (e - c) - c;
|
||||
e -= hxs;
|
||||
/* exp(x) ~ 2^k (x_reduced - e + 1) */
|
||||
if k == -1 {
|
||||
return 0.5 * (x - e) - 0.5;
|
||||
}
|
||||
if k == 1 {
|
||||
if x < -0.25 {
|
||||
return -2.0 * (e - (x + 0.5));
|
||||
}
|
||||
return 1.0 + 2.0 * (x - e);
|
||||
}
|
||||
ui = ((0x3ff + k) as u64) << 52; /* 2^k */
|
||||
let twopk = f64::from_bits(ui);
|
||||
if k < 0 || k > 56 {
|
||||
/* suffice to return exp(x)-1 */
|
||||
y = x - e + 1.0;
|
||||
if k == 1024 {
|
||||
y = y * 2.0 * f64::from_bits(0x7fe0000000000000);
|
||||
} else {
|
||||
y = y * twopk;
|
||||
}
|
||||
return y - 1.0;
|
||||
}
|
||||
ui = ((0x3ff - k) as u64) << 52; /* 2^-k */
|
||||
let uf = f64::from_bits(ui);
|
||||
if k < 20 {
|
||||
y = (x - e + (1.0 - uf)) * twopk;
|
||||
} else {
|
||||
y = (x - (e + uf) + 1.0) * twopk;
|
||||
}
|
||||
y
|
||||
}
|
||||
|
||||
#[cfg(test)]
|
||||
mod tests {
|
||||
#[test]
|
||||
fn sanity_check() {
|
||||
assert_eq!(super::expm1(1.1), 2.0041660239464334);
|
||||
}
|
||||
}
|
||||
201
library/compiler-builtins/libm/src/math/fma.rs
Normal file
201
library/compiler-builtins/libm/src/math/fma.rs
Normal file
|
|
@ -0,0 +1,201 @@
|
|||
use core::{f32, f64};
|
||||
|
||||
use super::scalbn;
|
||||
|
||||
const ZEROINFNAN: i32 = 0x7ff - 0x3ff - 52 - 1;
|
||||
|
||||
struct Num {
|
||||
m: u64,
|
||||
e: i32,
|
||||
sign: i32,
|
||||
}
|
||||
|
||||
#[inline]
|
||||
fn normalize(x: f64) -> Num {
|
||||
let x1p63: f64 = f64::from_bits(0x43e0000000000000); // 0x1p63 === 2 ^ 63
|
||||
|
||||
let mut ix: u64 = x.to_bits();
|
||||
let mut e: i32 = (ix >> 52) as i32;
|
||||
let sign: i32 = e & 0x800;
|
||||
e &= 0x7ff;
|
||||
if e == 0 {
|
||||
ix = (x * x1p63).to_bits();
|
||||
e = (ix >> 52) as i32 & 0x7ff;
|
||||
e = if e != 0 { e - 63 } else { 0x800 };
|
||||
}
|
||||
ix &= (1 << 52) - 1;
|
||||
ix |= 1 << 52;
|
||||
ix <<= 1;
|
||||
e -= 0x3ff + 52 + 1;
|
||||
Num { m: ix, e, sign }
|
||||
}
|
||||
|
||||
#[inline]
|
||||
fn mul(x: u64, y: u64) -> (u64, u64) {
|
||||
let t1: u64;
|
||||
let t2: u64;
|
||||
let t3: u64;
|
||||
let xlo: u64 = x as u32 as u64;
|
||||
let xhi: u64 = x >> 32;
|
||||
let ylo: u64 = y as u32 as u64;
|
||||
let yhi: u64 = y >> 32;
|
||||
|
||||
t1 = xlo * ylo;
|
||||
t2 = xlo * yhi + xhi * ylo;
|
||||
t3 = xhi * yhi;
|
||||
let lo = t1 + (t2 << 32);
|
||||
let hi = t3 + (t2 >> 32) + (t1 > lo) as u64;
|
||||
(hi, lo)
|
||||
}
|
||||
|
||||
#[inline]
|
||||
pub fn fma(x: f64, y: f64, z: f64) -> f64 {
|
||||
let x1p63: f64 = f64::from_bits(0x43e0000000000000); // 0x1p63 === 2 ^ 63
|
||||
let x0_ffffff8p_63 = f64::from_bits(0x3bfffffff0000000); // 0x0.ffffff8p-63
|
||||
|
||||
/* normalize so top 10bits and last bit are 0 */
|
||||
let nx = normalize(x);
|
||||
let ny = normalize(y);
|
||||
let nz = normalize(z);
|
||||
|
||||
if nx.e >= ZEROINFNAN || ny.e >= ZEROINFNAN {
|
||||
return x * y + z;
|
||||
}
|
||||
if nz.e >= ZEROINFNAN {
|
||||
if nz.e > ZEROINFNAN {
|
||||
/* z==0 */
|
||||
return x * y + z;
|
||||
}
|
||||
return z;
|
||||
}
|
||||
|
||||
/* mul: r = x*y */
|
||||
let zhi: u64;
|
||||
let zlo: u64;
|
||||
let (mut rhi, mut rlo) = mul(nx.m, ny.m);
|
||||
/* either top 20 or 21 bits of rhi and last 2 bits of rlo are 0 */
|
||||
|
||||
/* align exponents */
|
||||
let mut e: i32 = nx.e + ny.e;
|
||||
let mut d: i32 = nz.e - e;
|
||||
/* shift bits z<<=kz, r>>=kr, so kz+kr == d, set e = e+kr (== ez-kz) */
|
||||
if d > 0 {
|
||||
if d < 64 {
|
||||
zlo = nz.m << d;
|
||||
zhi = nz.m >> 64 - d;
|
||||
} else {
|
||||
zlo = 0;
|
||||
zhi = nz.m;
|
||||
e = nz.e - 64;
|
||||
d -= 64;
|
||||
if d == 0 {
|
||||
} else if d < 64 {
|
||||
rlo = rhi << 64 - d | rlo >> d | ((rlo << 64 - d) != 0) as u64;
|
||||
rhi = rhi >> d;
|
||||
} else {
|
||||
rlo = 1;
|
||||
rhi = 0;
|
||||
}
|
||||
}
|
||||
} else {
|
||||
zhi = 0;
|
||||
d = -d;
|
||||
if d == 0 {
|
||||
zlo = nz.m;
|
||||
} else if d < 64 {
|
||||
zlo = nz.m >> d | ((nz.m << 64 - d) != 0) as u64;
|
||||
} else {
|
||||
zlo = 1;
|
||||
}
|
||||
}
|
||||
|
||||
/* add */
|
||||
let mut sign: i32 = nx.sign ^ ny.sign;
|
||||
let samesign: bool = (sign ^ nz.sign) == 0;
|
||||
let mut nonzero: i32 = 1;
|
||||
if samesign {
|
||||
/* r += z */
|
||||
rlo += zlo;
|
||||
rhi += zhi + (rlo < zlo) as u64;
|
||||
} else {
|
||||
/* r -= z */
|
||||
let t = rlo;
|
||||
rlo -= zlo;
|
||||
rhi = rhi - zhi - (t < rlo) as u64;
|
||||
if (rhi >> 63) != 0 {
|
||||
rlo = (-(rlo as i64)) as u64;
|
||||
rhi = (-(rhi as i64)) as u64 - (rlo != 0) as u64;
|
||||
sign = (sign == 0) as i32;
|
||||
}
|
||||
nonzero = (rhi != 0) as i32;
|
||||
}
|
||||
|
||||
/* set rhi to top 63bit of the result (last bit is sticky) */
|
||||
if nonzero != 0 {
|
||||
e += 64;
|
||||
d = rhi.leading_zeros() as i32 - 1;
|
||||
/* note: d > 0 */
|
||||
rhi = rhi << d | rlo >> 64 - d | ((rlo << d) != 0) as u64;
|
||||
} else if rlo != 0 {
|
||||
d = rlo.leading_zeros() as i32 - 1;
|
||||
if d < 0 {
|
||||
rhi = rlo >> 1 | (rlo & 1);
|
||||
} else {
|
||||
rhi = rlo << d;
|
||||
}
|
||||
} else {
|
||||
/* exact +-0 */
|
||||
return x * y + z;
|
||||
}
|
||||
e -= d;
|
||||
|
||||
/* convert to double */
|
||||
let mut i: i64 = rhi as i64; /* i is in [1<<62,(1<<63)-1] */
|
||||
if sign != 0 {
|
||||
i = -i;
|
||||
}
|
||||
let mut r: f64 = i as f64; /* |r| is in [0x1p62,0x1p63] */
|
||||
|
||||
if e < -1022 - 62 {
|
||||
/* result is subnormal before rounding */
|
||||
if e == -1022 - 63 {
|
||||
let mut c: f64 = x1p63;
|
||||
if sign != 0 {
|
||||
c = -c;
|
||||
}
|
||||
if r == c {
|
||||
/* min normal after rounding, underflow depends
|
||||
on arch behaviour which can be imitated by
|
||||
a double to float conversion */
|
||||
let fltmin: f32 = (x0_ffffff8p_63 * f32::MIN_POSITIVE as f64 * r) as f32;
|
||||
return f64::MIN_POSITIVE / f32::MIN_POSITIVE as f64 * fltmin as f64;
|
||||
}
|
||||
/* one bit is lost when scaled, add another top bit to
|
||||
only round once at conversion if it is inexact */
|
||||
if (rhi << 53) != 0 {
|
||||
i = (rhi >> 1 | (rhi & 1) | 1 << 62) as i64;
|
||||
if sign != 0 {
|
||||
i = -i;
|
||||
}
|
||||
r = i as f64;
|
||||
r = 2. * r - c; /* remove top bit */
|
||||
|
||||
/* raise underflow portably, such that it
|
||||
cannot be optimized away */
|
||||
{
|
||||
let tiny: f64 = f64::MIN_POSITIVE / f32::MIN_POSITIVE as f64 * r;
|
||||
r += (tiny * tiny) * (r - r);
|
||||
}
|
||||
}
|
||||
} else {
|
||||
/* only round once when scaled */
|
||||
d = 10;
|
||||
i = ((rhi >> d | ((rhi << 64 - d) != 0) as u64) << d) as i64;
|
||||
if sign != 0 {
|
||||
i = -i;
|
||||
}
|
||||
r = i as f64;
|
||||
}
|
||||
}
|
||||
scalbn(r, e)
|
||||
}
|
||||
|
|
@ -16,12 +16,14 @@ mod exp;
|
|||
mod exp2;
|
||||
mod exp2f;
|
||||
mod expf;
|
||||
mod expm1;
|
||||
mod fabs;
|
||||
mod fabsf;
|
||||
mod fdim;
|
||||
mod fdimf;
|
||||
mod floor;
|
||||
mod floorf;
|
||||
mod fma;
|
||||
mod fmod;
|
||||
mod fmodf;
|
||||
mod hypot;
|
||||
|
|
@ -55,12 +57,14 @@ pub use self::exp::exp;
|
|||
pub use self::exp2::exp2;
|
||||
pub use self::exp2f::exp2f;
|
||||
pub use self::expf::expf;
|
||||
pub use self::expm1::expm1;
|
||||
pub use self::fabs::fabs;
|
||||
pub use self::fabsf::fabsf;
|
||||
pub use self::fdim::fdim;
|
||||
pub use self::fdimf::fdimf;
|
||||
pub use self::floor::floor;
|
||||
pub use self::floorf::floorf;
|
||||
pub use self::fma::fma;
|
||||
pub use self::fmod::fmod;
|
||||
pub use self::fmodf::fmodf;
|
||||
pub use self::hypot::hypot;
|
||||
|
|
|
|||
|
|
@ -705,7 +705,7 @@ f64_f64! {
|
|||
// cosh,
|
||||
exp,
|
||||
exp2,
|
||||
// expm1,
|
||||
expm1,
|
||||
floor,
|
||||
log,
|
||||
log10,
|
||||
|
|
@ -732,7 +732,7 @@ f64f64_f64! {
|
|||
|
||||
// With signature `fn(f64, f64, f64) -> f64`
|
||||
f64f64f64_f64! {
|
||||
// fma,
|
||||
fma,
|
||||
}
|
||||
|
||||
// With signature `fn(f64, i32) -> f64`
|
||||
|
|
|
|||
Loading…
Add table
Add a link
Reference in a new issue