diff --git a/library/compiler-builtins/libm/.travis.yml b/library/compiler-builtins/libm/.travis.yml index 2bbb60fc64d1..223b586ea6da 100644 --- a/library/compiler-builtins/libm/.travis.yml +++ b/library/compiler-builtins/libm/.travis.yml @@ -35,6 +35,5 @@ before_cache: branches: only: - - master - staging - trying diff --git a/library/compiler-builtins/libm/src/lib.rs b/library/compiler-builtins/libm/src/lib.rs index 9b3a6457f4ab..f40f6fb9cb8a 100644 --- a/library/compiler-builtins/libm/src/lib.rs +++ b/library/compiler-builtins/libm/src/lib.rs @@ -42,6 +42,8 @@ pub trait F32Ext: private::Sealed { fn trunc(self) -> Self; + fn fdim(self, rhs: Self) -> Self; + #[cfg(todo)] fn fract(self) -> Self; @@ -87,7 +89,6 @@ pub trait F32Ext: private::Sealed { #[cfg(todo)] fn sin(self) -> Self; - #[cfg(todo)] fn cos(self) -> Self; #[cfg(todo)] @@ -156,6 +157,11 @@ impl F32Ext for f32 { truncf(self) } + #[inline] + fn fdim(self, rhs: Self) -> Self { + fdimf(self, rhs) + } + #[cfg(todo)] #[inline] fn fract(self) -> Self { @@ -252,7 +258,6 @@ impl F32Ext for f32 { sinf(self) } - #[cfg(todo)] #[inline] fn cos(self) -> Self { cosf(self) @@ -349,13 +354,14 @@ impl F32Ext for f32 { pub trait F64Ext: private::Sealed { fn floor(self) -> Self; - #[cfg(todo)] fn ceil(self) -> Self; fn round(self) -> Self; fn trunc(self) -> Self; + fn fdim(self, rhs: Self) -> Self; + #[cfg(todo)] fn fract(self) -> Self; @@ -381,7 +387,6 @@ pub trait F64Ext: private::Sealed { fn sqrt(self) -> Self; - #[cfg(todo)] fn exp(self) -> Self; #[cfg(todo)] @@ -456,7 +461,6 @@ impl F64Ext for f64 { floor(self) } - #[cfg(todo)] #[inline] fn ceil(self) -> Self { ceil(self) @@ -472,6 +476,10 @@ impl F64Ext for f64 { trunc(self) } + #[inline] + fn fdim(self, rhs: Self) -> Self { + fdim(self, rhs) + } #[cfg(todo)] #[inline] fn fract(self) -> Self { @@ -521,7 +529,6 @@ impl F64Ext for f64 { sqrt(self) } - #[cfg(todo)] #[inline] fn exp(self) -> Self { exp(self) diff --git a/library/compiler-builtins/libm/src/math/ceil.rs b/library/compiler-builtins/libm/src/math/ceil.rs new file mode 100644 index 000000000000..4db2ca840368 --- /dev/null +++ b/library/compiler-builtins/libm/src/math/ceil.rs @@ -0,0 +1,39 @@ +use core::f64; + +const TOINT: f64 = 1. / f64::EPSILON; + +#[inline] +pub fn ceil(x: f64) -> f64 { + let u: u64 = x.to_bits(); + let e: i64 = (u >> 52 & 0x7ff) as i64; + let y: f64; + + if e >= 0x3ff + 52 || x == 0. { + return x; + } + // y = int(x) - x, where int(x) is an integer neighbor of x + y = if (u >> 63) != 0 { + x - TOINT + TOINT - x + } else { + x + TOINT - TOINT - x + }; + // special case because of non-nearest rounding modes + if e <= 0x3ff - 1 { + force_eval!(y); + return if (u >> 63) != 0 { -0. } else { 1. }; + } + if y < 0. { + x + y + 1. + } else { + x + y + } +} + +#[cfg(test)] +mod tests { + #[test] + fn sanity_check() { + assert_eq!(super::ceil(1.1), 2.0); + assert_eq!(super::ceil(2.9), 3.0); + } +} diff --git a/library/compiler-builtins/libm/src/math/cosf.rs b/library/compiler-builtins/libm/src/math/cosf.rs new file mode 100644 index 000000000000..79df97e354a3 --- /dev/null +++ b/library/compiler-builtins/libm/src/math/cosf.rs @@ -0,0 +1,71 @@ +use super::{k_cosf, k_sinf, rem_pio2f}; + +use core::f64::consts::FRAC_PI_2; + +/* Small multiples of pi/2 rounded to double precision. */ +const C1_PIO2: f64 = 1. * FRAC_PI_2; /* 0x3FF921FB, 0x54442D18 */ +const C2_PIO2: f64 = 2. * FRAC_PI_2; /* 0x400921FB, 0x54442D18 */ +const C3_PIO2: f64 = 3. * FRAC_PI_2; /* 0x4012D97C, 0x7F3321D2 */ +const C4_PIO2: f64 = 4. * FRAC_PI_2; /* 0x401921FB, 0x54442D18 */ + +#[inline] +pub fn cosf(x: f32) -> f32 { + let x64 = x as f64; + + let x1p120 = f32::from_bits(0x7b800000); // 0x1p120f === 2 ^ 120 + + let mut ix = x.to_bits(); + let sign = (ix >> 31) != 0; + ix &= 0x7fffffff; + + if ix <= 0x3f490fda { + /* |x| ~<= pi/4 */ + if ix < 0x39800000 { + /* |x| < 2**-12 */ + /* raise inexact if x != 0 */ + force_eval!(x + x1p120); + return 1.; + } + return k_cosf(x64); + } + if ix <= 0x407b53d1 { + /* |x| ~<= 5*pi/4 */ + if ix > 0x4016cbe3 { + /* |x| ~> 3*pi/4 */ + return -k_cosf(if sign { x64 + C2_PIO2 } else { x64 - C2_PIO2 }); + } else { + if sign { + return k_sinf(x64 + C1_PIO2); + } else { + return k_sinf(C1_PIO2 - x64); + } + } + } + if ix <= 0x40e231d5 { + /* |x| ~<= 9*pi/4 */ + if ix > 0x40afeddf { + /* |x| ~> 7*pi/4 */ + return k_cosf(if sign { x64 + C4_PIO2 } else { x64 - C4_PIO2 }); + } else { + if sign { + return k_sinf(-x64 - C3_PIO2); + } else { + return k_sinf(x64 - C3_PIO2); + } + } + } + + /* cos(Inf or NaN) is NaN */ + if ix >= 0x7f800000 { + return x - x; + } + + /* general argument reduction needed */ + let (n, y) = rem_pio2f(x); + match n & 3 { + 0 => k_cosf(y), + 1 => k_sinf(-y), + 2 => -k_cosf(y), + _ => k_sinf(y), + } +} diff --git a/library/compiler-builtins/libm/src/math/exp.rs b/library/compiler-builtins/libm/src/math/exp.rs new file mode 100644 index 000000000000..cd63b8fb3f15 --- /dev/null +++ b/library/compiler-builtins/libm/src/math/exp.rs @@ -0,0 +1,150 @@ +/* origin: FreeBSD /usr/src/lib/msun/src/e_exp.c */ +/* + * ==================================================== + * Copyright (C) 2004 by Sun Microsystems, Inc. All rights reserved. + * + * Permission to use, copy, modify, and distribute this + * software is freely granted, provided that this notice + * is preserved. + * ==================================================== + */ +/* exp(x) + * Returns the exponential of x. + * + * Method + * 1. Argument reduction: + * Reduce x to an r so that |r| <= 0.5*ln2 ~ 0.34658. + * Given x, find r and integer k such that + * + * x = k*ln2 + r, |r| <= 0.5*ln2. + * + * Here r will be represented as r = hi-lo for better + * accuracy. + * + * 2. Approximation of exp(r) by a special rational function on + * the interval [0,0.34658]: + * Write + * R(r**2) = r*(exp(r)+1)/(exp(r)-1) = 2 + r*r/6 - r**4/360 + ... + * We use a special Remez algorithm on [0,0.34658] to generate + * a polynomial of degree 5 to approximate R. The maximum error + * of this polynomial approximation is bounded by 2**-59. In + * other words, + * R(z) ~ 2.0 + P1*z + P2*z**2 + P3*z**3 + P4*z**4 + P5*z**5 + * (where z=r*r, and the values of P1 to P5 are listed below) + * and + * | 5 | -59 + * | 2.0+P1*z+...+P5*z - R(z) | <= 2 + * | | + * The computation of exp(r) thus becomes + * 2*r + * exp(r) = 1 + ---------- + * R(r) - r + * r*c(r) + * = 1 + r + ----------- (for better accuracy) + * 2 - c(r) + * where + * 2 4 10 + * c(r) = r - (P1*r + P2*r + ... + P5*r ). + * + * 3. Scale back to obtain exp(x): + * From step 1, we have + * exp(x) = 2^k * exp(r) + * + * Special cases: + * exp(INF) is INF, exp(NaN) is NaN; + * exp(-INF) is 0, and + * for finite argument, only exp(0)=1 is exact. + * + * Accuracy: + * according to an error analysis, the error is always less than + * 1 ulp (unit in the last place). + * + * Misc. info. + * For IEEE double + * if x > 709.782712893383973096 then exp(x) overflows + * if x < -745.133219101941108420 then exp(x) underflows + */ + +use super::scalbn; + +const HALF: [f64; 2] = [0.5, -0.5]; +const LN2HI: f64 = 6.93147180369123816490e-01; /* 0x3fe62e42, 0xfee00000 */ +const LN2LO: f64 = 1.90821492927058770002e-10; /* 0x3dea39ef, 0x35793c76 */ +const INVLN2: f64 = 1.44269504088896338700e+00; /* 0x3ff71547, 0x652b82fe */ +const P1: f64 = 1.66666666666666019037e-01; /* 0x3FC55555, 0x5555553E */ +const P2: f64 = -2.77777777770155933842e-03; /* 0xBF66C16C, 0x16BEBD93 */ +const P3: f64 = 6.61375632143793436117e-05; /* 0x3F11566A, 0xAF25DE2C */ +const P4: f64 = -1.65339022054652515390e-06; /* 0xBEBBBD41, 0xC5D26BF1 */ +const P5: f64 = 4.13813679705723846039e-08; /* 0x3E663769, 0x72BEA4D0 */ + +#[inline] +pub fn exp(mut x: f64) -> f64 { + let x1p1023 = f64::from_bits(0x7fe0000000000000); // 0x1p1023 === 2 ^ 1023 + let x1p_149 = f64::from_bits(0x36a0000000000000); // 0x1p-149 === 2 ^ -149 + + let hi: f64; + let lo: f64; + let c: f64; + let xx: f64; + let y: f64; + let k: i32; + let sign: i32; + let mut hx: u32; + + hx = (x.to_bits() >> 32) as u32; + sign = (hx >> 31) as i32; + hx &= 0x7fffffff; /* high word of |x| */ + + /* special cases */ + if hx >= 0x4086232b { + /* if |x| >= 708.39... */ + if x.is_nan() { + return x; + } + if x > 709.782712893383973096 { + /* overflow if x!=inf */ + x *= x1p1023; + return x; + } + if x < -708.39641853226410622 { + /* underflow if x!=-inf */ + force_eval!((-x1p_149 / x) as f32); + if x < -745.13321910194110842 { + return 0.; + } + } + } + + /* argument reduction */ + if hx > 0x3fd62e42 { + /* if |x| > 0.5 ln2 */ + if hx >= 0x3ff0a2b2 { + /* if |x| >= 1.5 ln2 */ + k = (INVLN2 * x + HALF[sign as usize]) as i32; + } else { + k = 1 - sign - sign; + } + hi = x - k as f64 * LN2HI; /* k*ln2hi is exact here */ + lo = k as f64 * LN2LO; + x = hi - lo; + } else if hx > 0x3e300000 { + /* if |x| > 2**-28 */ + k = 0; + hi = x; + lo = 0.; + } else { + /* inexact if x!=0 */ + force_eval!(x1p1023 + x); + return 1. + x; + } + + /* x is now in primary range */ + xx = x * x; + c = x - xx * (P1 + xx * (P2 + xx * (P3 + xx * (P4 + xx * P5)))); + y = 1. + (x * c / (2. - c) - lo + hi); + if k == 0 { + y + } else { + scalbn(y, k) + } +} diff --git a/library/compiler-builtins/libm/src/math/expf.rs b/library/compiler-builtins/libm/src/math/expf.rs index cffb557719f0..8ecc3b6abd61 100644 --- a/library/compiler-builtins/libm/src/math/expf.rs +++ b/library/compiler-builtins/libm/src/math/expf.rs @@ -1,3 +1,18 @@ +/* origin: FreeBSD /usr/src/lib/msun/src/e_expf.c */ +/* + * Conversion to float by Ian Lance Taylor, Cygnus Support, ian@cygnus.com. + */ +/* + * ==================================================== + * Copyright (C) 1993 by Sun Microsystems, Inc. All rights reserved. + * + * Developed at SunPro, a Sun Microsystems, Inc. business. + * Permission to use, copy, modify, and distribute this + * software is freely granted, provided that this notice + * is preserved. + * ==================================================== + */ + use super::scalbnf; const HALF: [f32; 2] = [0.5, -0.5]; diff --git a/library/compiler-builtins/libm/src/math/fdim.rs b/library/compiler-builtins/libm/src/math/fdim.rs new file mode 100644 index 000000000000..2b277eab03e1 --- /dev/null +++ b/library/compiler-builtins/libm/src/math/fdim.rs @@ -0,0 +1,15 @@ +use core::f64; + +pub fn fdim(x: f64, y: f64) -> f64 { + if x.is_nan() { + x + } else if y.is_nan() { + y + } else { + if x > y { + x - y + } else { + 0.0 + } + } +} diff --git a/library/compiler-builtins/libm/src/math/fdimf.rs b/library/compiler-builtins/libm/src/math/fdimf.rs new file mode 100644 index 000000000000..44bf2d6806ef --- /dev/null +++ b/library/compiler-builtins/libm/src/math/fdimf.rs @@ -0,0 +1,15 @@ +use core::f32; + +pub fn fdimf(x: f32, y: f32) -> f32 { + if x.is_nan() { + x + } else if y.is_nan() { + y + } else { + if x > y { + x - y + } else { + 0.0 + } + } +} diff --git a/library/compiler-builtins/libm/src/math/fmodf.rs b/library/compiler-builtins/libm/src/math/fmodf.rs index 909775249085..d84cfeb01b83 100644 --- a/library/compiler-builtins/libm/src/math/fmodf.rs +++ b/library/compiler-builtins/libm/src/math/fmodf.rs @@ -1,7 +1,6 @@ +use core::f32; use core::u32; -use super::isnanf; - #[inline] pub fn fmodf(x: f32, y: f32) -> f32 { let mut uxi = x.to_bits(); @@ -11,7 +10,7 @@ pub fn fmodf(x: f32, y: f32) -> f32 { let sx = uxi & 0x80000000; let mut i; - if uyi << 1 == 0 || isnanf(y) || ex == 0xff { + if uyi << 1 == 0 || y.is_nan() || ex == 0xff { return (x * y) / (x * y); } diff --git a/library/compiler-builtins/libm/src/math/k_cosf.rs b/library/compiler-builtins/libm/src/math/k_cosf.rs new file mode 100644 index 000000000000..83d13b2e998a --- /dev/null +++ b/library/compiler-builtins/libm/src/math/k_cosf.rs @@ -0,0 +1,13 @@ +/* |cos(x) - c(x)| < 2**-34.1 (~[-5.37e-11, 5.295e-11]). */ +const C0: f64 = -0.499999997251031003120; /* -0x1ffffffd0c5e81.0p-54 */ +const C1: f64 = 0.0416666233237390631894; /* 0x155553e1053a42.0p-57 */ +const C2: f64 = -0.00138867637746099294692; /* -0x16c087e80f1e27.0p-62 */ +const C3: f64 = 0.0000243904487962774090654; /* 0x199342e0ee5069.0p-68 */ + +#[inline] +pub(crate) fn k_cosf(x: f64) -> f32 { + let z = x * x; + let w = z * z; + let r = C2 + z * C3; + (((1.0 + z * C0) + w * C1) + (w * z) * r) as f32 +} diff --git a/library/compiler-builtins/libm/src/math/k_sinf.rs b/library/compiler-builtins/libm/src/math/k_sinf.rs new file mode 100644 index 000000000000..bb2183afc7ca --- /dev/null +++ b/library/compiler-builtins/libm/src/math/k_sinf.rs @@ -0,0 +1,14 @@ +/* |sin(x)/x - s(x)| < 2**-37.5 (~[-4.89e-12, 4.824e-12]). */ +const S1: f64 = -0.166666666416265235595; /* -0x15555554cbac77.0p-55 */ +const S2: f64 = 0.0083333293858894631756; /* 0x111110896efbb2.0p-59 */ +const S3: f64 = -0.000198393348360966317347; /* -0x1a00f9e2cae774.0p-65 */ +const S4: f64 = 0.0000027183114939898219064; /* 0x16cd878c3b46a7.0p-71 */ + +#[inline] +pub(crate) fn k_sinf(x: f64) -> f32 { + let z = x * x; + let w = z * z; + let r = S3 + z * S4; + let s = z * x; + ((x + s * (S1 + z * S2)) + s * w * r) as f32 +} diff --git a/library/compiler-builtins/libm/src/math/log10.rs b/library/compiler-builtins/libm/src/math/log10.rs index 137d8966f521..7c7afefa3456 100644 --- a/library/compiler-builtins/libm/src/math/log10.rs +++ b/library/compiler-builtins/libm/src/math/log10.rs @@ -1,3 +1,22 @@ +/* origin: FreeBSD /usr/src/lib/msun/src/e_log10.c */ +/* + * ==================================================== + * Copyright (C) 1993 by Sun Microsystems, Inc. All rights reserved. + * + * Developed at SunSoft, a Sun Microsystems, Inc. business. + * Permission to use, copy, modify, and distribute this + * software is freely granted, provided that this notice + * is preserved. + * ==================================================== + */ +/* + * Return the base 10 logarithm of x. See log.c for most comments. + * + * Reduce x to 2^k (1+f) and calculate r = log(1+f) - f + f*f/2 + * as in log.c, then combine and scale in extra precision: + * log10(x) = (f - f*f/2 + r)/log(10) + k*log10(2) + */ + use core::f64; const IVLN10HI: f64 = 4.34294481878168880939e-01; /* 0x3fdbcb7b, 0x15200000 */ diff --git a/library/compiler-builtins/libm/src/math/log10f.rs b/library/compiler-builtins/libm/src/math/log10f.rs index 58db093441f3..82b87c044b25 100644 --- a/library/compiler-builtins/libm/src/math/log10f.rs +++ b/library/compiler-builtins/libm/src/math/log10f.rs @@ -1,3 +1,18 @@ +/* origin: FreeBSD /usr/src/lib/msun/src/e_log10f.c */ +/* + * ==================================================== + * Copyright (C) 1993 by Sun Microsystems, Inc. All rights reserved. + * + * Developed at SunPro, a Sun Microsystems, Inc. business. + * Permission to use, copy, modify, and distribute this + * software is freely granted, provided that this notice + * is preserved. + * ==================================================== + */ +/* + * See comments in log10.c. + */ + use core::f32; const IVLN10HI: f32 = 4.3432617188e-01; /* 0x3ede6000 */ diff --git a/library/compiler-builtins/libm/src/math/log2.rs b/library/compiler-builtins/libm/src/math/log2.rs index c0d3263e3b46..f6640d296c30 100644 --- a/library/compiler-builtins/libm/src/math/log2.rs +++ b/library/compiler-builtins/libm/src/math/log2.rs @@ -1,3 +1,22 @@ +/* origin: FreeBSD /usr/src/lib/msun/src/e_log2.c */ +/* + * ==================================================== + * Copyright (C) 1993 by Sun Microsystems, Inc. All rights reserved. + * + * Developed at SunSoft, a Sun Microsystems, Inc. business. + * Permission to use, copy, modify, and distribute this + * software is freely granted, provided that this notice + * is preserved. + * ==================================================== + */ +/* + * Return the base 2 logarithm of x. See log.c for most comments. + * + * Reduce x to 2^k (1+f) and calculate r = log(1+f) - f + f*f/2 + * as in log.c, then combine and scale in extra precision: + * log2(x) = (f - f*f/2 + r)/log(2) + k + */ + use core::f64; const IVLN2HI: f64 = 1.44269504072144627571e+00; /* 0x3ff71547, 0x65200000 */ diff --git a/library/compiler-builtins/libm/src/math/log2f.rs b/library/compiler-builtins/libm/src/math/log2f.rs index 47a917bdb8d2..c007ff9b097a 100644 --- a/library/compiler-builtins/libm/src/math/log2f.rs +++ b/library/compiler-builtins/libm/src/math/log2f.rs @@ -1,3 +1,18 @@ +/* origin: FreeBSD /usr/src/lib/msun/src/e_log2f.c */ +/* + * ==================================================== + * Copyright (C) 1993 by Sun Microsystems, Inc. All rights reserved. + * + * Developed at SunPro, a Sun Microsystems, Inc. business. + * Permission to use, copy, modify, and distribute this + * software is freely granted, provided that this notice + * is preserved. + * ==================================================== + */ +/* + * See comments in log2.c. + */ + use core::f32; const IVLN2HI: f32 = 1.4428710938e+00; /* 0x3fb8b000 */ diff --git a/library/compiler-builtins/libm/src/math/logf.rs b/library/compiler-builtins/libm/src/math/logf.rs index 78c5e94ad92a..09519104174b 100644 --- a/library/compiler-builtins/libm/src/math/logf.rs +++ b/library/compiler-builtins/libm/src/math/logf.rs @@ -1,3 +1,18 @@ +/* origin: FreeBSD /usr/src/lib/msun/src/e_logf.c */ +/* + * Conversion to float by Ian Lance Taylor, Cygnus Support, ian@cygnus.com. + */ +/* + * ==================================================== + * Copyright (C) 1993 by Sun Microsystems, Inc. All rights reserved. + * + * Developed at SunPro, a Sun Microsystems, Inc. business. + * Permission to use, copy, modify, and distribute this + * software is freely granted, provided that this notice + * is preserved. + * ==================================================== + */ + const LN2_HI: f32 = 6.9313812256e-01; /* 0x3f317180 */ const LN2_LO: f32 = 9.0580006145e-06; /* 0x3717f7d1 */ /* |(log(1+s)-log(1-s))/s - Lg(s)| < 2**-34.24 (~[-4.95e-11, 4.97e-11]). */ diff --git a/library/compiler-builtins/libm/src/math/mod.rs b/library/compiler-builtins/libm/src/math/mod.rs index f70555e9cc2a..f21cecef3b3c 100644 --- a/library/compiler-builtins/libm/src/math/mod.rs +++ b/library/compiler-builtins/libm/src/math/mod.rs @@ -7,10 +7,15 @@ macro_rules! force_eval { } mod acos; +mod ceil; mod ceilf; +mod cosf; +mod exp; mod expf; mod fabs; mod fabsf; +mod fdim; +mod fdimf; mod floor; mod floorf; mod fmodf; @@ -34,16 +39,43 @@ mod sqrtf; mod trunc; mod truncf; -//mod service; +// Use separated imports instead of {}-grouped imports for easier merging. +pub use self::acos::acos; +pub use self::ceil::ceil; +pub use self::ceilf::ceilf; +pub use self::cosf::cosf; +pub use self::exp::exp; +pub use self::expf::expf; +pub use self::fabs::fabs; +pub use self::fabsf::fabsf; +pub use self::fdim::fdim; +pub use self::fdimf::fdimf; +pub use self::floor::floor; +pub use self::floorf::floorf; +pub use self::fmodf::fmodf; +pub use self::hypot::hypot; +pub use self::hypotf::hypotf; +pub use self::log::log; +pub use self::log10::log10; +pub use self::log10f::log10f; +pub use self::log1p::log1p; +pub use self::log1pf::log1pf; +pub use self::log2::log2; +pub use self::log2f::log2f; +pub use self::logf::logf; +pub use self::powf::powf; +pub use self::round::round; +pub use self::roundf::roundf; +pub use self::scalbn::scalbn; +pub use self::scalbnf::scalbnf; +pub use self::sqrt::sqrt; +pub use self::sqrtf::sqrtf; +pub use self::trunc::trunc; +pub use self::truncf::truncf; -pub use self::{ - acos::acos, ceilf::ceilf, expf::expf, fabs::fabs, fabsf::fabsf, floor::floor, floorf::floorf, - fmodf::fmodf, hypot::hypot, hypotf::hypotf, log::log, log10::log10, log10f::log10f, - log1p::log1p, log1pf::log1pf, log2::log2, log2f::log2f, logf::logf, powf::powf, round::round, - roundf::roundf, scalbn::scalbn, scalbnf::scalbnf, sqrt::sqrt, sqrtf::sqrtf, trunc::trunc, - truncf::truncf, -}; +mod k_cosf; +mod k_sinf; +mod rem_pio2_large; +mod rem_pio2f; -fn isnanf(x: f32) -> bool { - x.to_bits() & 0x7fffffff > 0x7f800000 -} +use self::{k_cosf::k_cosf, k_sinf::k_sinf, rem_pio2_large::rem_pio2_large, rem_pio2f::rem_pio2f}; diff --git a/library/compiler-builtins/libm/src/math/powf.rs b/library/compiler-builtins/libm/src/math/powf.rs index f1dc3a5b8678..8d0afe6693cf 100644 --- a/library/compiler-builtins/libm/src/math/powf.rs +++ b/library/compiler-builtins/libm/src/math/powf.rs @@ -1,3 +1,18 @@ +/* origin: FreeBSD /usr/src/lib/msun/src/e_powf.c */ +/* + * Conversion to float by Ian Lance Taylor, Cygnus Support, ian@cygnus.com. + */ +/* + * ==================================================== + * Copyright (C) 1993 by Sun Microsystems, Inc. All rights reserved. + * + * Developed at SunPro, a Sun Microsystems, Inc. business. + * Permission to use, copy, modify, and distribute this + * software is freely granted, provided that this notice + * is preserved. + * ==================================================== + */ + use super::{fabsf, scalbnf, sqrtf}; const BP: [f32; 2] = [1.0, 1.5]; diff --git a/library/compiler-builtins/libm/src/math/rem_pio2_large.rs b/library/compiler-builtins/libm/src/math/rem_pio2_large.rs new file mode 100644 index 000000000000..52b47279cd91 --- /dev/null +++ b/library/compiler-builtins/libm/src/math/rem_pio2_large.rs @@ -0,0 +1,450 @@ +use super::floor; +use super::scalbn; + +// initial value for jk +const INIT_JK: [usize; 4] = [3, 4, 4, 6]; + +// Table of constants for 2/pi, 396 Hex digits (476 decimal) of 2/pi +// +// integer array, contains the (24*i)-th to (24*i+23)-th +// bit of 2/pi after binary point. The corresponding +// floating value is +// +// ipio2[i] * 2^(-24(i+1)). +// +// NB: This table must have at least (e0-3)/24 + jk terms. +// For quad precision (e0 <= 16360, jk = 6), this is 686. +#[cfg(target_pointer_width = "32")] +const IPIO2: [i32; 66] = [ + 0xA2F983, 0x6E4E44, 0x1529FC, 0x2757D1, 0xF534DD, 0xC0DB62, 0x95993C, 0x439041, 0xFE5163, + 0xABDEBB, 0xC561B7, 0x246E3A, 0x424DD2, 0xE00649, 0x2EEA09, 0xD1921C, 0xFE1DEB, 0x1CB129, + 0xA73EE8, 0x8235F5, 0x2EBB44, 0x84E99C, 0x7026B4, 0x5F7E41, 0x3991D6, 0x398353, 0x39F49C, + 0x845F8B, 0xBDF928, 0x3B1FF8, 0x97FFDE, 0x05980F, 0xEF2F11, 0x8B5A0A, 0x6D1F6D, 0x367ECF, + 0x27CB09, 0xB74F46, 0x3F669E, 0x5FEA2D, 0x7527BA, 0xC7EBE5, 0xF17B3D, 0x0739F7, 0x8A5292, + 0xEA6BFB, 0x5FB11F, 0x8D5D08, 0x560330, 0x46FC7B, 0x6BABF0, 0xCFBC20, 0x9AF436, 0x1DA9E3, + 0x91615E, 0xE61B08, 0x659985, 0x5F14A0, 0x68408D, 0xFFD880, 0x4D7327, 0x310606, 0x1556CA, + 0x73A8C9, 0x60E27B, 0xC08C6B, +]; + +#[cfg(target_pointer_width = "64")] +const IPIO2: [i32; 690] = [ + 0xA2F983, 0x6E4E44, 0x1529FC, 0x2757D1, 0xF534DD, 0xC0DB62, 0x95993C, 0x439041, 0xFE5163, + 0xABDEBB, 0xC561B7, 0x246E3A, 0x424DD2, 0xE00649, 0x2EEA09, 0xD1921C, 0xFE1DEB, 0x1CB129, + 0xA73EE8, 0x8235F5, 0x2EBB44, 0x84E99C, 0x7026B4, 0x5F7E41, 0x3991D6, 0x398353, 0x39F49C, + 0x845F8B, 0xBDF928, 0x3B1FF8, 0x97FFDE, 0x05980F, 0xEF2F11, 0x8B5A0A, 0x6D1F6D, 0x367ECF, + 0x27CB09, 0xB74F46, 0x3F669E, 0x5FEA2D, 0x7527BA, 0xC7EBE5, 0xF17B3D, 0x0739F7, 0x8A5292, + 0xEA6BFB, 0x5FB11F, 0x8D5D08, 0x560330, 0x46FC7B, 0x6BABF0, 0xCFBC20, 0x9AF436, 0x1DA9E3, + 0x91615E, 0xE61B08, 0x659985, 0x5F14A0, 0x68408D, 0xFFD880, 0x4D7327, 0x310606, 0x1556CA, + 0x73A8C9, 0x60E27B, 0xC08C6B, 0x47C419, 0xC367CD, 0xDCE809, 0x2A8359, 0xC4768B, 0x961CA6, + 0xDDAF44, 0xD15719, 0x053EA5, 0xFF0705, 0x3F7E33, 0xE832C2, 0xDE4F98, 0x327DBB, 0xC33D26, + 0xEF6B1E, 0x5EF89F, 0x3A1F35, 0xCAF27F, 0x1D87F1, 0x21907C, 0x7C246A, 0xFA6ED5, 0x772D30, + 0x433B15, 0xC614B5, 0x9D19C3, 0xC2C4AD, 0x414D2C, 0x5D000C, 0x467D86, 0x2D71E3, 0x9AC69B, + 0x006233, 0x7CD2B4, 0x97A7B4, 0xD55537, 0xF63ED7, 0x1810A3, 0xFC764D, 0x2A9D64, 0xABD770, + 0xF87C63, 0x57B07A, 0xE71517, 0x5649C0, 0xD9D63B, 0x3884A7, 0xCB2324, 0x778AD6, 0x23545A, + 0xB91F00, 0x1B0AF1, 0xDFCE19, 0xFF319F, 0x6A1E66, 0x615799, 0x47FBAC, 0xD87F7E, 0xB76522, + 0x89E832, 0x60BFE6, 0xCDC4EF, 0x09366C, 0xD43F5D, 0xD7DE16, 0xDE3B58, 0x929BDE, 0x2822D2, + 0xE88628, 0x4D58E2, 0x32CAC6, 0x16E308, 0xCB7DE0, 0x50C017, 0xA71DF3, 0x5BE018, 0x34132E, + 0x621283, 0x014883, 0x5B8EF5, 0x7FB0AD, 0xF2E91E, 0x434A48, 0xD36710, 0xD8DDAA, 0x425FAE, + 0xCE616A, 0xA4280A, 0xB499D3, 0xF2A606, 0x7F775C, 0x83C2A3, 0x883C61, 0x78738A, 0x5A8CAF, + 0xBDD76F, 0x63A62D, 0xCBBFF4, 0xEF818D, 0x67C126, 0x45CA55, 0x36D9CA, 0xD2A828, 0x8D61C2, + 0x77C912, 0x142604, 0x9B4612, 0xC459C4, 0x44C5C8, 0x91B24D, 0xF31700, 0xAD43D4, 0xE54929, + 0x10D5FD, 0xFCBE00, 0xCC941E, 0xEECE70, 0xF53E13, 0x80F1EC, 0xC3E7B3, 0x28F8C7, 0x940593, + 0x3E71C1, 0xB3092E, 0xF3450B, 0x9C1288, 0x7B20AB, 0x9FB52E, 0xC29247, 0x2F327B, 0x6D550C, + 0x90A772, 0x1FE76B, 0x96CB31, 0x4A1679, 0xE27941, 0x89DFF4, 0x9794E8, 0x84E6E2, 0x973199, + 0x6BED88, 0x365F5F, 0x0EFDBB, 0xB49A48, 0x6CA467, 0x427271, 0x325D8D, 0xB8159F, 0x09E5BC, + 0x25318D, 0x3974F7, 0x1C0530, 0x010C0D, 0x68084B, 0x58EE2C, 0x90AA47, 0x02E774, 0x24D6BD, + 0xA67DF7, 0x72486E, 0xEF169F, 0xA6948E, 0xF691B4, 0x5153D1, 0xF20ACF, 0x339820, 0x7E4BF5, + 0x6863B2, 0x5F3EDD, 0x035D40, 0x7F8985, 0x295255, 0xC06437, 0x10D86D, 0x324832, 0x754C5B, + 0xD4714E, 0x6E5445, 0xC1090B, 0x69F52A, 0xD56614, 0x9D0727, 0x50045D, 0xDB3BB4, 0xC576EA, + 0x17F987, 0x7D6B49, 0xBA271D, 0x296996, 0xACCCC6, 0x5414AD, 0x6AE290, 0x89D988, 0x50722C, + 0xBEA404, 0x940777, 0x7030F3, 0x27FC00, 0xA871EA, 0x49C266, 0x3DE064, 0x83DD97, 0x973FA3, + 0xFD9443, 0x8C860D, 0xDE4131, 0x9D3992, 0x8C70DD, 0xE7B717, 0x3BDF08, 0x2B3715, 0xA0805C, + 0x93805A, 0x921110, 0xD8E80F, 0xAF806C, 0x4BFFDB, 0x0F9038, 0x761859, 0x15A562, 0xBBCB61, + 0xB989C7, 0xBD4010, 0x04F2D2, 0x277549, 0xF6B6EB, 0xBB22DB, 0xAA140A, 0x2F2689, 0x768364, + 0x333B09, 0x1A940E, 0xAA3A51, 0xC2A31D, 0xAEEDAF, 0x12265C, 0x4DC26D, 0x9C7A2D, 0x9756C0, + 0x833F03, 0xF6F009, 0x8C402B, 0x99316D, 0x07B439, 0x15200C, 0x5BC3D8, 0xC492F5, 0x4BADC6, + 0xA5CA4E, 0xCD37A7, 0x36A9E6, 0x9492AB, 0x6842DD, 0xDE6319, 0xEF8C76, 0x528B68, 0x37DBFC, + 0xABA1AE, 0x3115DF, 0xA1AE00, 0xDAFB0C, 0x664D64, 0xB705ED, 0x306529, 0xBF5657, 0x3AFF47, + 0xB9F96A, 0xF3BE75, 0xDF9328, 0x3080AB, 0xF68C66, 0x15CB04, 0x0622FA, 0x1DE4D9, 0xA4B33D, + 0x8F1B57, 0x09CD36, 0xE9424E, 0xA4BE13, 0xB52333, 0x1AAAF0, 0xA8654F, 0xA5C1D2, 0x0F3F0B, + 0xCD785B, 0x76F923, 0x048B7B, 0x721789, 0x53A6C6, 0xE26E6F, 0x00EBEF, 0x584A9B, 0xB7DAC4, + 0xBA66AA, 0xCFCF76, 0x1D02D1, 0x2DF1B1, 0xC1998C, 0x77ADC3, 0xDA4886, 0xA05DF7, 0xF480C6, + 0x2FF0AC, 0x9AECDD, 0xBC5C3F, 0x6DDED0, 0x1FC790, 0xB6DB2A, 0x3A25A3, 0x9AAF00, 0x9353AD, + 0x0457B6, 0xB42D29, 0x7E804B, 0xA707DA, 0x0EAA76, 0xA1597B, 0x2A1216, 0x2DB7DC, 0xFDE5FA, + 0xFEDB89, 0xFDBE89, 0x6C76E4, 0xFCA906, 0x70803E, 0x156E85, 0xFF87FD, 0x073E28, 0x336761, + 0x86182A, 0xEABD4D, 0xAFE7B3, 0x6E6D8F, 0x396795, 0x5BBF31, 0x48D784, 0x16DF30, 0x432DC7, + 0x356125, 0xCE70C9, 0xB8CB30, 0xFD6CBF, 0xA200A4, 0xE46C05, 0xA0DD5A, 0x476F21, 0xD21262, + 0x845CB9, 0x496170, 0xE0566B, 0x015299, 0x375550, 0xB7D51E, 0xC4F133, 0x5F6E13, 0xE4305D, + 0xA92E85, 0xC3B21D, 0x3632A1, 0xA4B708, 0xD4B1EA, 0x21F716, 0xE4698F, 0x77FF27, 0x80030C, + 0x2D408D, 0xA0CD4F, 0x99A520, 0xD3A2B3, 0x0A5D2F, 0x42F9B4, 0xCBDA11, 0xD0BE7D, 0xC1DB9B, + 0xBD17AB, 0x81A2CA, 0x5C6A08, 0x17552E, 0x550027, 0xF0147F, 0x8607E1, 0x640B14, 0x8D4196, + 0xDEBE87, 0x2AFDDA, 0xB6256B, 0x34897B, 0xFEF305, 0x9EBFB9, 0x4F6A68, 0xA82A4A, 0x5AC44F, + 0xBCF82D, 0x985AD7, 0x95C7F4, 0x8D4D0D, 0xA63A20, 0x5F57A4, 0xB13F14, 0x953880, 0x0120CC, + 0x86DD71, 0xB6DEC9, 0xF560BF, 0x11654D, 0x6B0701, 0xACB08C, 0xD0C0B2, 0x485551, 0x0EFB1E, + 0xC37295, 0x3B06A3, 0x3540C0, 0x7BDC06, 0xCC45E0, 0xFA294E, 0xC8CAD6, 0x41F3E8, 0xDE647C, + 0xD8649B, 0x31BED9, 0xC397A4, 0xD45877, 0xC5E369, 0x13DAF0, 0x3C3ABA, 0x461846, 0x5F7555, + 0xF5BDD2, 0xC6926E, 0x5D2EAC, 0xED440E, 0x423E1C, 0x87C461, 0xE9FD29, 0xF3D6E7, 0xCA7C22, + 0x35916F, 0xC5E008, 0x8DD7FF, 0xE26A6E, 0xC6FDB0, 0xC10893, 0x745D7C, 0xB2AD6B, 0x9D6ECD, + 0x7B723E, 0x6A11C6, 0xA9CFF7, 0xDF7329, 0xBAC9B5, 0x5100B7, 0x0DB2E2, 0x24BA74, 0x607DE5, + 0x8AD874, 0x2C150D, 0x0C1881, 0x94667E, 0x162901, 0x767A9F, 0xBEFDFD, 0xEF4556, 0x367ED9, + 0x13D9EC, 0xB9BA8B, 0xFC97C4, 0x27A831, 0xC36EF1, 0x36C594, 0x56A8D8, 0xB5A8B4, 0x0ECCCF, + 0x2D8912, 0x34576F, 0x89562C, 0xE3CE99, 0xB920D6, 0xAA5E6B, 0x9C2A3E, 0xCC5F11, 0x4A0BFD, + 0xFBF4E1, 0x6D3B8E, 0x2C86E2, 0x84D4E9, 0xA9B4FC, 0xD1EEEF, 0xC9352E, 0x61392F, 0x442138, + 0xC8D91B, 0x0AFC81, 0x6A4AFB, 0xD81C2F, 0x84B453, 0x8C994E, 0xCC2254, 0xDC552A, 0xD6C6C0, + 0x96190B, 0xB8701A, 0x649569, 0x605A26, 0xEE523F, 0x0F117F, 0x11B5F4, 0xF5CBFC, 0x2DBC34, + 0xEEBC34, 0xCC5DE8, 0x605EDD, 0x9B8E67, 0xEF3392, 0xB817C9, 0x9B5861, 0xBC57E1, 0xC68351, + 0x103ED8, 0x4871DD, 0xDD1C2D, 0xA118AF, 0x462C21, 0xD7F359, 0x987AD9, 0xC0549E, 0xFA864F, + 0xFC0656, 0xAE79E5, 0x362289, 0x22AD38, 0xDC9367, 0xAAE855, 0x382682, 0x9BE7CA, 0xA40D51, + 0xB13399, 0x0ED7A9, 0x480569, 0xF0B265, 0xA7887F, 0x974C88, 0x36D1F9, 0xB39221, 0x4A827B, + 0x21CF98, 0xDC9F40, 0x5547DC, 0x3A74E1, 0x42EB67, 0xDF9DFE, 0x5FD45E, 0xA4677B, 0x7AACBA, + 0xA2F655, 0x23882B, 0x55BA41, 0x086E59, 0x862A21, 0x834739, 0xE6E389, 0xD49EE5, 0x40FB49, + 0xE956FF, 0xCA0F1C, 0x8A59C5, 0x2BFA94, 0xC5C1D3, 0xCFC50F, 0xAE5ADB, 0x86C547, 0x624385, + 0x3B8621, 0x94792C, 0x876110, 0x7B4C2A, 0x1A2C80, 0x12BF43, 0x902688, 0x893C78, 0xE4C4A8, + 0x7BDBE5, 0xC23AC4, 0xEAF426, 0x8A67F7, 0xBF920D, 0x2BA365, 0xB1933D, 0x0B7CBD, 0xDC51A4, + 0x63DD27, 0xDDE169, 0x19949A, 0x9529A8, 0x28CE68, 0xB4ED09, 0x209F44, 0xCA984E, 0x638270, + 0x237C7E, 0x32B90F, 0x8EF5A7, 0xE75614, 0x08F121, 0x2A9DB5, 0x4D7E6F, 0x5119A5, 0xABF9B5, + 0xD6DF82, 0x61DD96, 0x023616, 0x9F3AC4, 0xA1A283, 0x6DED72, 0x7A8D39, 0xA9B882, 0x5C326B, + 0x5B2746, 0xED3400, 0x7700D2, 0x55F4FC, 0x4D5901, 0x8071E0, +]; + +const PIO2: [f64; 8] = [ + 1.57079625129699707031e+00, /* 0x3FF921FB, 0x40000000 */ + 7.54978941586159635335e-08, /* 0x3E74442D, 0x00000000 */ + 5.39030252995776476554e-15, /* 0x3CF84698, 0x80000000 */ + 3.28200341580791294123e-22, /* 0x3B78CC51, 0x60000000 */ + 1.27065575308067607349e-29, /* 0x39F01B83, 0x80000000 */ + 1.22933308981111328932e-36, /* 0x387A2520, 0x40000000 */ + 2.73370053816464559624e-44, /* 0x36E38222, 0x80000000 */ + 2.16741683877804819444e-51, /* 0x3569F31D, 0x00000000 */ +]; + +// fn rem_pio2_large(x : &[f64], y : &mut [f64], e0 : i32, prec : usize) -> i32 +// +// Input parameters: +// x[] The input value (must be positive) is broken into nx +// pieces of 24-bit integers in double precision format. +// x[i] will be the i-th 24 bit of x. The scaled exponent +// of x[0] is given in input parameter e0 (i.e., x[0]*2^e0 +// match x's up to 24 bits. +// +// Example of breaking a double positive z into x[0]+x[1]+x[2]: +// e0 = ilogb(z)-23 +// z = scalbn(z,-e0) +// for i = 0,1,2 +// x[i] = floor(z) +// z = (z-x[i])*2**24 +// +// y[] ouput result in an array of double precision numbers. +// The dimension of y[] is: +// 24-bit precision 1 +// 53-bit precision 2 +// 64-bit precision 2 +// 113-bit precision 3 +// The actual value is the sum of them. Thus for 113-bit +// precison, one may have to do something like: +// +// long double t,w,r_head, r_tail; +// t = (long double)y[2] + (long double)y[1]; +// w = (long double)y[0]; +// r_head = t+w; +// r_tail = w - (r_head - t); +// +// e0 The exponent of x[0]. Must be <= 16360 or you need to +// expand the ipio2 table. +// +// prec an integer indicating the precision: +// 0 24 bits (single) +// 1 53 bits (double) +// 2 64 bits (extended) +// 3 113 bits (quad) +// +// Here is the description of some local variables: +// +// jk jk+1 is the initial number of terms of ipio2[] needed +// in the computation. The minimum and recommended value +// for jk is 3,4,4,6 for single, double, extended, and quad. +// jk+1 must be 2 larger than you might expect so that our +// recomputation test works. (Up to 24 bits in the integer +// part (the 24 bits of it that we compute) and 23 bits in +// the fraction part may be lost to cancelation before we +// recompute.) +// +// jz local integer variable indicating the number of +// terms of ipio2[] used. +// +// jx nx - 1 +// +// jv index for pointing to the suitable ipio2[] for the +// computation. In general, we want +// ( 2^e0*x[0] * ipio2[jv-1]*2^(-24jv) )/8 +// is an integer. Thus +// e0-3-24*jv >= 0 or (e0-3)/24 >= jv +// Hence jv = max(0,(e0-3)/24). +// +// jp jp+1 is the number of terms in PIo2[] needed, jp = jk. +// +// q[] double array with integral value, representing the +// 24-bits chunk of the product of x and 2/pi. +// +// q0 the corresponding exponent of q[0]. Note that the +// exponent for q[i] would be q0-24*i. +// +// PIo2[] double precision array, obtained by cutting pi/2 +// into 24 bits chunks. +// +// f[] ipio2[] in floating point +// +// iq[] integer array by breaking up q[] in 24-bits chunk. +// +// fq[] final product of x*(2/pi) in fq[0],..,fq[jk] +// +// ih integer. If >0 it indicates q[] is >= 0.5, hence +// it also indicates the *sign* of the result. + +/// Return the last three digits of N with y = x - N*pi/2 +/// so that |y| < pi/2. +/// +/// The method is to compute the integer (mod 8) and fraction parts of +/// (2/pi)*x without doing the full multiplication. In general we +/// skip the part of the product that are known to be a huge integer ( +/// more accurately, = 0 mod 8 ). Thus the number of operations are +/// independent of the exponent of the input. +#[inline] +pub(crate) fn rem_pio2_large(x: &[f64], y: &mut [f64], e0: i32, prec: usize) -> i32 { + let x1p24 = f64::from_bits(0x4170000000000000); // 0x1p24 === 2 ^ 24 + let x1p_24 = f64::from_bits(0x3e70000000000000); // 0x1p_24 === 2 ^ (-24) + + #[cfg(target_pointer_width = "64")] + assert!(e0 <= 16360); + + let nx = x.len(); + + let mut fw: f64; + let mut n: i32; + let mut ih: i32; + let mut z: f64; + let mut f: [f64; 20] = [0.; 20]; + let mut fq: [f64; 20] = [0.; 20]; + let mut q: [f64; 20] = [0.; 20]; + let mut iq: [i32; 20] = [0; 20]; + + /* initialize jk*/ + let jk = INIT_JK[prec]; + let jp = jk; + + /* determine jx,jv,q0, note that 3>q0 */ + let jx = nx - 1; + let mut jv = (e0 - 3) / 24; + if jv < 0 { + jv = 0; + } + let mut q0 = e0 - 24 * (jv + 1); + let jv = jv as usize; + + /* set up f[0] to f[jx+jk] where f[jx+jk] = ipio2[jv+jk] */ + let mut j = (jv - jx) as i32; + let m = jx + jk; + for i in 0..=m { + f[i] = if j < 0 { 0. } else { IPIO2[j as usize] as f64 }; + j += 1 + } + + /* compute q[0],q[1],...q[jk] */ + for i in 0..=jk { + fw = 0f64; + for j in 0..=jx { + fw += x[j] * f[jx + i - j]; + } + q[i] = fw; + } + + let mut jz = jk; + + 'recompute: loop { + /* distill q[] into iq[] reversingly */ + let mut i = 0i32; + z = q[jz]; + for j in (1..=jz).rev() { + fw = (x1p_24 * z) as i32 as f64; + iq[i as usize] = (z - x1p24 * fw) as i32; + z = q[j - 1] + fw; + i += 1; + } + + /* compute n */ + z = scalbn(z, q0); /* actual value of z */ + z -= 8.0 * floor(z * 0.125); /* trim off integer >= 8 */ + n = z as i32; + z -= n as f64; + ih = 0; + if q0 > 0 { + /* need iq[jz-1] to determine n */ + i = iq[jz - 1] >> (24 - q0); + n += i; + iq[jz - 1] -= i << (24 - q0); + ih = iq[jz - 1] >> (23 - q0); + } else if q0 == 0 { + ih = iq[jz - 1] >> 23; + } else if z >= 0.5 { + ih = 2; + } + + if ih > 0 { + /* q > 0.5 */ + n += 1; + let mut carry = 0i32; + for i in 0..jz { + /* compute 1-q */ + let j = iq[i]; + if carry == 0 { + if j != 0 { + carry = 1; + iq[i] = 0x1000000 - j; + } + } else { + iq[i] = 0xffffff - j; + } + } + if q0 > 0 { + /* rare case: chance is 1 in 12 */ + match q0 { + 1 => { + iq[jz - 1] &= 0x7fffff; + } + 2 => { + iq[jz - 1] &= 0x3fffff; + } + _ => {} + } + } + if ih == 2 { + z = 1. - z; + if carry != 0 { + z -= scalbn(1., q0); + } + } + } + + /* check if recomputation is needed */ + if z == 0. { + let mut j = 0; + for i in (jk..=jz - 1).rev() { + j |= iq[i]; + } + if j == 0 { + /* need recomputation */ + let mut k = 1; + while iq[jk - k] == 0 { + k += 1; /* k = no. of terms needed */ + } + + for i in (jz + 1)..=(jz + k) { + /* add q[jz+1] to q[jz+k] */ + f[jx + i] = IPIO2[jv + i] as f64; + fw = 0f64; + for j in 0..=jx { + fw += x[j] * f[jx + i - j]; + } + q[i] = fw; + } + jz += k; + continue 'recompute; + } + } + + break; + } + + /* chop off zero terms */ + if z == 0. { + jz -= 1; + q0 -= 24; + while iq[jz] == 0 { + jz -= 1; + q0 -= 24; + } + } else { + /* break z into 24-bit if necessary */ + z = scalbn(z, -q0); + if z >= x1p24 { + fw = (x1p_24 * z) as i32 as f64; + iq[jz] = (z - x1p24 * fw) as i32; + jz += 1; + q0 += 24; + iq[jz] = fw as i32; + } else { + iq[jz] = z as i32; + } + } + + /* convert integer "bit" chunk to floating-point value */ + fw = scalbn(1., q0); + for i in (0..=jz).rev() { + q[i] = fw * (iq[i] as f64); + fw *= x1p_24; + } + + /* compute PIo2[0,...,jp]*q[jz,...,0] */ + for i in (0..=jz).rev() { + fw = 0f64; + let mut k = 0; + while (k <= jp) && (k <= jz - i) { + fw += PIO2[k] * q[i + k]; + k += 1; + } + fq[jz - i] = fw; + } + + /* compress fq[] into y[] */ + match prec { + 0 => { + fw = 0f64; + for i in (0..=jz).rev() { + fw += fq[i]; + } + y[0] = if ih == 0 { fw } else { -fw }; + } + 1 | 2 => { + fw = 0f64; + for i in (0..=jz).rev() { + fw += fq[i]; + } + // TODO: drop excess precision here once double_t is used + fw = fw as f64; + y[0] = if ih == 0 { fw } else { -fw }; + fw = fq[0] - fw; + for i in 1..=jz { + fw += fq[i]; + } + y[1] = if ih == 0 { fw } else { -fw }; + } + 3 => { + /* painful */ + for i in (1..=jz).rev() { + fw = fq[i - 1] + fq[i]; + fq[i] += fq[i - 1] - fw; + fq[i - 1] = fw; + } + for i in (2..=jz).rev() { + fw = fq[i - 1] + fq[i]; + fq[i] += fq[i - 1] - fw; + fq[i - 1] = fw; + } + fw = 0f64; + for i in (2..=jz).rev() { + fw += fq[i]; + } + if ih == 0 { + y[0] = fq[0]; + y[1] = fq[1]; + y[2] = fw; + } else { + y[0] = -fq[0]; + y[1] = -fq[1]; + y[2] = -fw; + } + } + _ => unreachable!(), + } + n & 7 +} diff --git a/library/compiler-builtins/libm/src/math/rem_pio2f.rs b/library/compiler-builtins/libm/src/math/rem_pio2f.rs new file mode 100644 index 000000000000..73ec3775d3dc --- /dev/null +++ b/library/compiler-builtins/libm/src/math/rem_pio2f.rs @@ -0,0 +1,46 @@ +use super::rem_pio2_large; + +use core::f64; + +const TOINT: f64 = 1.5 / f64::EPSILON; + +/// 53 bits of 2/pi +const INV_PIO2: f64 = 6.36619772367581382433e-01; /* 0x3FE45F30, 0x6DC9C883 */ +/// first 25 bits of pi/2 +const PIO2_1: f64 = 1.57079631090164184570e+00; /* 0x3FF921FB, 0x50000000 */ +/// pi/2 - pio2_1 +const PIO2_1T: f64 = 1.58932547735281966916e-08; /* 0x3E5110b4, 0x611A6263 */ + +/// Return the remainder of x rem pi/2 in *y +/// +/// use double precision for everything except passing x +/// use __rem_pio2_large() for large x +#[inline] +pub(crate) fn rem_pio2f(x: f32) -> (i32, f64) { + let x64 = x as f64; + + let mut tx: [f64; 1] = [0.]; + let mut ty: [f64; 1] = [0.]; + + let ix = x.to_bits() & 0x7fffffff; + /* 25+53 bit pi is good enough for medium size */ + if ix < 0x4dc90fdb { + /* |x| ~< 2^28*(pi/2), medium size */ + /* Use a specialized rint() to get fn. Assume round-to-nearest. */ + let f_n = x64 * INV_PIO2 + TOINT - TOINT; + return (f_n as i32, x64 - f_n * PIO2_1 - f_n * PIO2_1T); + } + if ix >= 0x7f800000 { + /* x is inf or NaN */ + return (0, x64 - x64); + } + /* scale x into [2^23, 2^24-1] */ + let sign = (x.to_bits() >> 31) != 0; + let e0 = ((ix >> 23) - (0x7f + 23)) as i32; /* e0 = ilogb(|x|)-23, positive */ + tx[0] = f32::from_bits(ix - (e0 << 23) as u32) as f64; + let n = rem_pio2_large(&tx, &mut ty, e0, 0); + if sign { + return (-n, -ty[0]); + } + (n, ty[0]) +} diff --git a/library/compiler-builtins/libm/src/math/sqrt.rs b/library/compiler-builtins/libm/src/math/sqrt.rs index 17de5a2e0308..cbadb49bba03 100644 --- a/library/compiler-builtins/libm/src/math/sqrt.rs +++ b/library/compiler-builtins/libm/src/math/sqrt.rs @@ -1,3 +1,81 @@ +/* origin: FreeBSD /usr/src/lib/msun/src/e_sqrt.c */ +/* + * ==================================================== + * Copyright (C) 1993 by Sun Microsystems, Inc. All rights reserved. + * + * Developed at SunSoft, a Sun Microsystems, Inc. business. + * Permission to use, copy, modify, and distribute this + * software is freely granted, provided that this notice + * is preserved. + * ==================================================== + */ +/* sqrt(x) + * Return correctly rounded sqrt. + * ------------------------------------------ + * | Use the hardware sqrt if you have one | + * ------------------------------------------ + * Method: + * Bit by bit method using integer arithmetic. (Slow, but portable) + * 1. Normalization + * Scale x to y in [1,4) with even powers of 2: + * find an integer k such that 1 <= (y=x*2^(2k)) < 4, then + * sqrt(x) = 2^k * sqrt(y) + * 2. Bit by bit computation + * Let q = sqrt(y) truncated to i bit after binary point (q = 1), + * i 0 + * i+1 2 + * s = 2*q , and y = 2 * ( y - q ). (1) + * i i i i + * + * To compute q from q , one checks whether + * i+1 i + * + * -(i+1) 2 + * (q + 2 ) <= y. (2) + * i + * -(i+1) + * If (2) is false, then q = q ; otherwise q = q + 2 . + * i+1 i i+1 i + * + * With some algebric manipulation, it is not difficult to see + * that (2) is equivalent to + * -(i+1) + * s + 2 <= y (3) + * i i + * + * The advantage of (3) is that s and y can be computed by + * i i + * the following recurrence formula: + * if (3) is false + * + * s = s , y = y ; (4) + * i+1 i i+1 i + * + * otherwise, + * -i -(i+1) + * s = s + 2 , y = y - s - 2 (5) + * i+1 i i+1 i i + * + * One may easily use induction to prove (4) and (5). + * Note. Since the left hand side of (3) contain only i+2 bits, + * it does not necessary to do a full (53-bit) comparison + * in (3). + * 3. Final rounding + * After generating the 53 bits result, we compute one more bit. + * Together with the remainder, we can decide whether the + * result is exact, bigger than 1/2ulp, or less than 1/2ulp + * (it will never equal to 1/2ulp). + * The rounding mode can be detected by checking whether + * huge + tiny is equal to huge, and whether huge - tiny is + * equal to huge for some floating point number "huge" and "tiny". + * + * Special cases: + * sqrt(+-0) = +-0 ... exact + * sqrt(inf) = inf + * sqrt(-ve) = NaN ... with invalid signal + * sqrt(NaN) = NaN ... with invalid signal for signaling NaN + */ + use core::f64; const TINY: f64 = 1.0e-300; diff --git a/library/compiler-builtins/libm/src/math/sqrtf.rs b/library/compiler-builtins/libm/src/math/sqrtf.rs index a265bef48eb2..49984689efc2 100644 --- a/library/compiler-builtins/libm/src/math/sqrtf.rs +++ b/library/compiler-builtins/libm/src/math/sqrtf.rs @@ -1,3 +1,18 @@ +/* origin: FreeBSD /usr/src/lib/msun/src/e_sqrtf.c */ +/* + * Conversion to float by Ian Lance Taylor, Cygnus Support, ian@cygnus.com. + */ +/* + * ==================================================== + * Copyright (C) 1993 by Sun Microsystems, Inc. All rights reserved. + * + * Developed at SunPro, a Sun Microsystems, Inc. business. + * Permission to use, copy, modify, and distribute this + * software is freely granted, provided that this notice + * is preserved. + * ==================================================== + */ + const TINY: f32 = 1.0e-30; #[inline] diff --git a/library/compiler-builtins/libm/test-generator/src/main.rs b/library/compiler-builtins/libm/test-generator/src/main.rs index d48851a9e630..a83923ab4aef 100644 --- a/library/compiler-builtins/libm/test-generator/src/main.rs +++ b/library/compiler-builtins/libm/test-generator/src/main.rs @@ -657,12 +657,11 @@ f32_f32! { // asinf, // atanf, // cbrtf, + cosf, ceilf, - // cosf, // coshf, // exp2f, expf, - // fdimf, log10f, log1pf, log2f, @@ -679,6 +678,7 @@ f32_f32! { // With signature `fn(f32, f32) -> f32` f32f32_f32! { // atan2f, + fdimf, hypotf, fmodf, powf, @@ -700,10 +700,10 @@ f64_f64! { // asin, // atan, // cbrt, - // ceil, + ceil, // cos, // cosh, - // exp, + exp, // exp2, // expm1, floor, @@ -724,7 +724,7 @@ f64_f64! { // With signature `fn(f64, f64) -> f64` f64f64_f64! { // atan2, - // fdim, + fdim, // fmod, hypot, // pow,