105: acosf asinf atanf expm1f sinf tanf r=japaric a=japaric

rebased version of rust-lang/libm#97
closes rust-lang/libm#97
cc @burrbull

Co-authored-by: Andrey Zgarbul <zgarbul.andrey@gmail.com>
This commit is contained in:
bors[bot] 2018-07-14 21:19:37 +00:00
commit e51fef4c66
10 changed files with 515 additions and 18 deletions

View file

@ -84,21 +84,16 @@ pub trait F32Ext: private::Sealed {
fn hypot(self, other: Self) -> Self;
#[cfg(todo)]
fn sin(self) -> Self;
fn cos(self) -> Self;
#[cfg(todo)]
fn tan(self) -> Self;
#[cfg(todo)]
fn asin(self) -> Self;
#[cfg(todo)]
fn acos(self) -> Self;
#[cfg(todo)]
fn atan(self) -> Self;
#[cfg(todo)]
@ -110,7 +105,6 @@ pub trait F32Ext: private::Sealed {
(self.sin(), self.cos())
}
#[cfg(todo)]
fn exp_m1(self) -> Self;
fn ln_1p(self) -> Self;
@ -248,7 +242,6 @@ impl F32Ext for f32 {
hypotf(self, other)
}
#[cfg(todo)]
#[inline]
fn sin(self) -> Self {
sinf(self)
@ -259,25 +252,21 @@ impl F32Ext for f32 {
cosf(self)
}
#[cfg(todo)]
#[inline]
fn tan(self) -> Self {
tanf(self)
}
#[cfg(todo)]
#[inline]
fn asin(self) -> Self {
asinf(self)
}
#[cfg(todo)]
#[inline]
fn acos(self) -> Self {
acosf(self)
}
#[cfg(todo)]
#[inline]
fn atan(self) -> Self {
atanf(self)
@ -289,7 +278,6 @@ impl F32Ext for f32 {
atan2f(self, other)
}
#[cfg(todo)]
#[inline]
fn exp_m1(self) -> Self {
expm1f(self)

View file

@ -0,0 +1,59 @@
use super::sqrtf::sqrtf;
const PIO2_HI: f32 = 1.5707962513e+00; /* 0x3fc90fda */
const PIO2_LO: f32 = 7.5497894159e-08; /* 0x33a22168 */
const P_S0: f32 = 1.6666586697e-01;
const P_S1: f32 = -4.2743422091e-02;
const P_S2: f32 = -8.6563630030e-03;
const Q_S1: f32 = -7.0662963390e-01;
fn r(z: f32) -> f32 {
let p = z * (P_S0 + z * (P_S1 + z * P_S2));
let q = 1. + z * Q_S1;
p / q
}
#[inline]
pub fn acosf(x: f32) -> f32 {
let x1p_120 = f32::from_bits(0x03800000); // 0x1p-120 === 2 ^ (-120)
let z: f32;
let w: f32;
let s: f32;
let mut hx = x.to_bits();
let ix = hx & 0x7fffffff;
/* |x| >= 1 or nan */
if ix >= 0x3f800000 {
if ix == 0x3f800000 {
if (hx >> 31) != 0 {
return 2. * PIO2_HI + x1p_120;
}
return 0.;
}
return 0. / (x - x);
}
/* |x| < 0.5 */
if ix < 0x3f000000 {
if ix <= 0x32800000 {
/* |x| < 2**-26 */
return PIO2_HI + x1p_120;
}
return PIO2_HI - (x - (PIO2_LO - x * r(x * x)));
}
/* x < -0.5 */
if (hx >> 31) != 0 {
z = (1. + x) * 0.5;
s = sqrtf(z);
w = r(z) * s - PIO2_LO;
return 2. * (PIO2_HI - (s + w));
}
/* x > 0.5 */
z = (1. - x) * 0.5;
s = sqrtf(z);
hx = s.to_bits();
let df = f32::from_bits(hx & 0xfffff000);
let c = (z - df * df) / (s + df);
w = r(z) * s + c;
2. * (df + w)
}

View file

@ -0,0 +1,52 @@
use super::fabsf::fabsf;
use super::sqrt::sqrt;
const PIO2: f64 = 1.570796326794896558e+00;
/* coefficients for R(x^2) */
const P_S0: f32 = 1.6666586697e-01;
const P_S1: f32 = -4.2743422091e-02;
const P_S2: f32 = -8.6563630030e-03;
const Q_S1: f32 = -7.0662963390e-01;
fn r(z: f32) -> f32 {
let p = z * (P_S0 + z * (P_S1 + z * P_S2));
let q = 1. + z * Q_S1;
p / q
}
#[inline]
pub fn asinf(mut x: f32) -> f32 {
let x1p_120 = f64::from_bits(0x3870000000000000); // 0x1p-120 === 2 ^ (-120)
let hx = x.to_bits();
let ix = hx & 0x7fffffff;
if ix >= 0x3f800000 {
/* |x| >= 1 */
if ix == 0x3f800000 {
/* |x| == 1 */
return ((x as f64) * PIO2 + x1p_120) as f32; /* asin(+-1) = +-pi/2 with inexact */
}
return 0. / (x - x); /* asin(|x|>1) is NaN */
}
if ix < 0x3f000000 {
/* |x| < 0.5 */
/* if 0x1p-126 <= |x| < 0x1p-12, avoid raising underflow */
if (ix < 0x39800000) && (ix >= 0x00800000) {
return x;
}
return x + x * r(x * x);
}
/* 1 > |x| >= 0.5 */
let z = (1. - fabsf(x)) * 0.5;
let s = sqrt(z as f64);
x = (PIO2 - 2. * (s + s * (r(z) as f64))) as f32;
if (hx >> 31) != 0 {
-x
} else {
x
}
}

View file

@ -0,0 +1,95 @@
use super::fabsf;
const ATAN_HI: [f32; 4] = [
4.6364760399e-01, /* atan(0.5)hi 0x3eed6338 */
7.8539812565e-01, /* atan(1.0)hi 0x3f490fda */
9.8279368877e-01, /* atan(1.5)hi 0x3f7b985e */
1.5707962513e+00, /* atan(inf)hi 0x3fc90fda */
];
const ATAN_LO: [f32; 4] = [
5.0121582440e-09, /* atan(0.5)lo 0x31ac3769 */
3.7748947079e-08, /* atan(1.0)lo 0x33222168 */
3.4473217170e-08, /* atan(1.5)lo 0x33140fb4 */
7.5497894159e-08, /* atan(inf)lo 0x33a22168 */
];
const A_T: [f32; 5] = [
3.3333328366e-01,
-1.9999158382e-01,
1.4253635705e-01,
-1.0648017377e-01,
6.1687607318e-02,
];
#[inline]
pub fn atanf(mut x: f32) -> f32 {
let x1p_120 = f32::from_bits(0x03800000); // 0x1p-120 === 2 ^ (-120)
let z: f32;
let mut ix = x.to_bits();
let sign = (ix >> 31) != 0;
ix &= 0x7fffffff;
if ix >= 0x4c800000 {
/* if |x| >= 2**26 */
if x.is_nan() {
return x;
}
z = ATAN_HI[3] + x1p_120;
return if sign { -z } else { z };
}
let id = if ix < 0x3ee00000 {
/* |x| < 0.4375 */
if ix < 0x39800000 {
/* |x| < 2**-12 */
if ix < 0x00800000 {
/* raise underflow for subnormal x */
force_eval!(x * x);
}
return x;
}
-1
} else {
x = fabsf(x);
if ix < 0x3f980000 {
/* |x| < 1.1875 */
if ix < 0x3f300000 {
/* 7/16 <= |x| < 11/16 */
x = (2. * x - 1.) / (2. + x);
0
} else {
/* 11/16 <= |x| < 19/16 */
x = (x - 1.) / (x + 1.);
1
}
} else {
if ix < 0x401c0000 {
/* |x| < 2.4375 */
x = (x - 1.5) / (1. + 1.5 * x);
2
} else {
/* 2.4375 <= |x| < 2**26 */
x = -1. / x;
3
}
}
};
/* end of argument reduction */
z = x * x;
let w = z * z;
/* break sum from i=0 to 10 aT[i]z**(i+1) into odd and even poly */
let s1 = z * (A_T[0] + w * (A_T[2] + w * A_T[4]));
let s2 = w * (A_T[1] + w * A_T[3]);
if id < 0 {
return x - x * (s1 + s2);
}
let id = id as usize;
let z = ATAN_HI[id] - ((x * (s1 + s2) - ATAN_LO[id]) - x);
if sign {
-z
} else {
z
}
}

View file

@ -0,0 +1,112 @@
const O_THRESHOLD: f32 = 8.8721679688e+01; /* 0x42b17180 */
const LN2_HI: f32 = 6.9313812256e-01; /* 0x3f317180 */
const LN2_LO: f32 = 9.0580006145e-06; /* 0x3717f7d1 */
const INV_LN2: f32 = 1.4426950216e+00; /* 0x3fb8aa3b */
/*
* Domain [-0.34568, 0.34568], range ~[-6.694e-10, 6.696e-10]:
* |6 / x * (1 + 2 * (1 / (exp(x) - 1) - 1 / x)) - q(x)| < 2**-30.04
* Scaled coefficients: Qn_here = 2**n * Qn_for_q (see s_expm1.c):
*/
const Q1: f32 = -3.3333212137e-2; /* -0x888868.0p-28 */
const Q2: f32 = 1.5807170421e-3; /* 0xcf3010.0p-33 */
#[inline]
pub fn expm1f(mut x: f32) -> f32 {
let x1p127 = f32::from_bits(0x7f000000); // 0x1p127f === 2 ^ 127
let mut hx = x.to_bits();
let sign = (hx >> 31) != 0;
hx &= 0x7fffffff;
/* filter out huge and non-finite argument */
if hx >= 0x4195b844 {
/* if |x|>=27*ln2 */
if hx > 0x7f800000 {
/* NaN */
return x;
}
if sign {
return -1.;
}
if x > O_THRESHOLD {
x *= x1p127;
return x;
}
}
let k: i32;
let hi: f32;
let lo: f32;
let mut c = 0f32;
/* argument reduction */
if hx > 0x3eb17218 {
/* if |x| > 0.5 ln2 */
if hx < 0x3F851592 {
/* and |x| < 1.5 ln2 */
if !sign {
hi = x - LN2_HI;
lo = LN2_LO;
k = 1;
} else {
hi = x + LN2_HI;
lo = -LN2_LO;
k = -1;
}
} else {
k = (INV_LN2 * x + (if sign { -0.5 } else { 0.5 })) as i32;
let t = k as f32;
hi = x - t * LN2_HI; /* t*ln2_hi is exact here */
lo = t * LN2_LO;
}
x = hi - lo;
c = (hi - x) - lo;
} else if hx < 0x33000000 {
/* when |x|<2**-25, return x */
if hx < 0x00800000 {
force_eval!(x * x);
}
return x;
} else {
k = 0;
}
/* x is now in primary range */
let hfx = 0.5 * x;
let hxs = x * hfx;
let r1 = 1. + hxs * (Q1 + hxs * Q2);
let t = 3. - r1 * hfx;
let mut e = hxs * ((r1 - t) / (6. - x * t));
if k == 0 {
/* c is 0 */
return x - (x * e - hxs);
}
e = x * (e - c) - c;
e -= hxs;
/* exp(x) ~ 2^k (x_reduced - e + 1) */
if k == -1 {
return 0.5 * (x - e) - 0.5;
}
if k == 1 {
if x < -0.25 {
return -2. * (e - (x + 0.5));
}
return 1. + 2. * (x - e);
}
let twopk = f32::from_bits(((0x7f + k) << 23) as u32); /* 2^k */
if (k < 0) || (k > 56) {
/* suffice to return exp(x)-1 */
let mut y = x - e + 1.;
if k == 128 {
y = y * 2. * x1p127;
} else {
y = y * twopk;
}
return y - 1.;
}
let uf = f32::from_bits(((0x7f - k) << 23) as u32); /* 2^-k */
if k < 23 {
(x - e + (1. - uf)) * twopk
} else {
(x - (e + uf) + 1.) * twopk
}
}

View file

@ -0,0 +1,35 @@
/* |tan(x)/x - t(x)| < 2**-25.5 (~[-2e-08, 2e-08]). */
const T: [f64; 6] = [
0.333331395030791399758, /* 0x15554d3418c99f.0p-54 */
0.133392002712976742718, /* 0x1112fd38999f72.0p-55 */
0.0533812378445670393523, /* 0x1b54c91d865afe.0p-57 */
0.0245283181166547278873, /* 0x191df3908c33ce.0p-58 */
0.00297435743359967304927, /* 0x185dadfcecf44e.0p-61 */
0.00946564784943673166728, /* 0x1362b9bf971bcd.0p-59 */
];
#[inline]
pub(crate) fn k_tanf(x: f64, odd: bool) -> f32 {
let z = x * x;
/*
* Split up the polynomial into small independent terms to give
* opportunities for parallel evaluation. The chosen splitting is
* micro-optimized for Athlons (XP, X64). It costs 2 multiplications
* relative to Horner's method on sequential machines.
*
* We add the small terms from lowest degree up for efficiency on
* non-sequential machines (the lowest degree terms tend to be ready
* earlier). Apart from this, we don't care about order of
* operations, and don't need to to care since we have precision to
* spare. However, the chosen splitting is good for accuracy too,
* and would give results as accurate as Horner's method if the
* small terms were added from highest degree down.
*/
let mut r = T[4] + z * T[5];
let t = T[2] + z * T[3];
let w = z * z;
let s = z * x;
let u = T[0] + z * T[1];
r = (x + s * u) + (s * w) * (t + w * r);
(if odd { -1. / r } else { r }) as f32
}

View file

@ -7,6 +7,9 @@ macro_rules! force_eval {
}
mod acos;
mod acosf;
mod asinf;
mod atanf;
mod cbrt;
mod cbrtf;
mod ceil;
@ -17,6 +20,7 @@ mod exp2;
mod exp2f;
mod expf;
mod expm1;
mod expm1f;
mod fabs;
mod fabsf;
mod fdim;
@ -41,13 +45,18 @@ mod round;
mod roundf;
mod scalbn;
mod scalbnf;
mod sinf;
mod sqrt;
mod sqrtf;
mod tanf;
mod trunc;
mod truncf;
// Use separated imports instead of {}-grouped imports for easier merging.
pub use self::acos::acos;
pub use self::acosf::acosf;
pub use self::asinf::asinf;
pub use self::atanf::atanf;
pub use self::cbrt::cbrt;
pub use self::cbrtf::cbrtf;
pub use self::ceil::ceil;
@ -58,6 +67,7 @@ pub use self::exp2::exp2;
pub use self::exp2f::exp2f;
pub use self::expf::expf;
pub use self::expm1::expm1;
pub use self::expm1f::expm1f;
pub use self::fabs::fabs;
pub use self::fabsf::fabsf;
pub use self::fdim::fdim;
@ -82,14 +92,20 @@ pub use self::round::round;
pub use self::roundf::roundf;
pub use self::scalbn::scalbn;
pub use self::scalbnf::scalbnf;
pub use self::sinf::sinf;
pub use self::sqrt::sqrt;
pub use self::sqrtf::sqrtf;
pub use self::tanf::tanf;
pub use self::trunc::trunc;
pub use self::truncf::truncf;
mod k_cosf;
mod k_sinf;
mod k_tanf;
mod rem_pio2_large;
mod rem_pio2f;
use self::{k_cosf::k_cosf, k_sinf::k_sinf, rem_pio2_large::rem_pio2_large, rem_pio2f::rem_pio2f};
use self::{
k_cosf::k_cosf, k_sinf::k_sinf, k_tanf::k_tanf, rem_pio2_large::rem_pio2_large,
rem_pio2f::rem_pio2f,
};

View file

@ -0,0 +1,77 @@
use super::{k_cosf, k_sinf, rem_pio2f};
use core::f64::consts::FRAC_PI_2;
/* Small multiples of pi/2 rounded to double precision. */
const S1_PIO2: f64 = 1. * FRAC_PI_2; /* 0x3FF921FB, 0x54442D18 */
const S2_PIO2: f64 = 2. * FRAC_PI_2; /* 0x400921FB, 0x54442D18 */
const S3_PIO2: f64 = 3. * FRAC_PI_2; /* 0x4012D97C, 0x7F3321D2 */
const S4_PIO2: f64 = 4. * FRAC_PI_2; /* 0x401921FB, 0x54442D18 */
#[inline]
pub fn sinf(x: f32) -> f32 {
let x64 = x as f64;
let x1p120 = f32::from_bits(0x7b800000); // 0x1p120f === 2 ^ 120
let mut ix = x.to_bits();
let sign = (ix >> 31) != 0;
ix &= 0x7fffffff;
if ix <= 0x3f490fda {
/* |x| ~<= pi/4 */
if ix < 0x39800000 {
/* |x| < 2**-12 */
/* raise inexact if x!=0 and underflow if subnormal */
force_eval!(if ix < 0x00800000 {
x / x1p120
} else {
x + x1p120
});
return x;
}
return k_sinf(x64);
}
if ix <= 0x407b53d1 {
/* |x| ~<= 5*pi/4 */
if ix <= 0x4016cbe3 {
/* |x| ~<= 3pi/4 */
if sign {
return -k_cosf(x64 + S1_PIO2);
} else {
return k_cosf(x64 - S1_PIO2);
}
}
return k_sinf(if sign {
-(x64 + S2_PIO2)
} else {
-(x64 - S2_PIO2)
});
}
if ix <= 0x40e231d5 {
/* |x| ~<= 9*pi/4 */
if ix <= 0x40afeddf {
/* |x| ~<= 7*pi/4 */
if sign {
return k_cosf(x64 + S3_PIO2);
} else {
return -k_cosf(x64 - S3_PIO2);
}
}
return k_sinf(if sign { x64 + S4_PIO2 } else { x64 - S4_PIO2 });
}
/* sin(Inf or NaN) is NaN */
if ix >= 0x7f800000 {
return x - x;
}
/* general argument reduction needed */
let (n, y) = rem_pio2f(x);
match n & 3 {
0 => k_sinf(y),
1 => k_cosf(y),
2 => return k_sinf(-y),
_ => -k_cosf(y),
}
}

View file

@ -0,0 +1,62 @@
use super::{k_tanf, rem_pio2f};
use core::f64::consts::FRAC_PI_2;
/* Small multiples of pi/2 rounded to double precision. */
const T1_PIO2: f64 = 1. * FRAC_PI_2; /* 0x3FF921FB, 0x54442D18 */
const T2_PIO2: f64 = 2. * FRAC_PI_2; /* 0x400921FB, 0x54442D18 */
const T3_PIO2: f64 = 3. * FRAC_PI_2; /* 0x4012D97C, 0x7F3321D2 */
const T4_PIO2: f64 = 4. * FRAC_PI_2; /* 0x401921FB, 0x54442D18 */
#[inline]
pub fn tanf(x: f32) -> f32 {
let x64 = x as f64;
let x1p120 = f32::from_bits(0x7b800000); // 0x1p120f === 2 ^ 120
let mut ix = x.to_bits();
let sign = (ix >> 31) != 0;
ix &= 0x7fffffff;
if ix <= 0x3f490fda {
/* |x| ~<= pi/4 */
if ix < 0x39800000 {
/* |x| < 2**-12 */
/* raise inexact if x!=0 and underflow if subnormal */
force_eval!(if ix < 0x00800000 {
x / x1p120
} else {
x + x1p120
});
return x;
}
return k_tanf(x64, false);
}
if ix <= 0x407b53d1 {
/* |x| ~<= 5*pi/4 */
if ix <= 0x4016cbe3 {
/* |x| ~<= 3pi/4 */
return k_tanf(if sign { x64 + T1_PIO2 } else { x64 - T1_PIO2 }, true);
} else {
return k_tanf(if sign { x64 + T2_PIO2 } else { x64 - T2_PIO2 }, false);
}
}
if ix <= 0x40e231d5 {
/* |x| ~<= 9*pi/4 */
if ix <= 0x40afeddf {
/* |x| ~<= 7*pi/4 */
return k_tanf(if sign { x64 + T3_PIO2 } else { x64 - T3_PIO2 }, true);
} else {
return k_tanf(if sign { x64 + T4_PIO2 } else { x64 - T4_PIO2 }, false);
}
}
/* tan(Inf or NaN) is NaN */
if ix >= 0x7f800000 {
return x - x;
}
/* argument reduction */
let (n, y) = rem_pio2f(x);
k_tanf(y, n & 1 != 0)
}

View file

@ -651,25 +651,26 @@ fn main() -> Result<(), Box<Error>> {
// With signature `fn(f32) -> f32`
f32_f32! {
// acosf,
acosf,
floorf,
truncf,
// asinf,
// atanf,
asinf,
atanf,
cbrtf,
cosf,
ceilf,
// coshf,
exp2f,
expf,
expm1f,
log10f,
log1pf,
log2f,
logf,
roundf,
// sinf,
sinf,
// sinhf,
// tanf,
tanf,
// tanhf,
fabsf,
sqrtf,