Port the most recent version of Musl's sqrt as a generic algorithm
Musl commit 97e9b73d59 ("math: new software sqrt") adds a new algorithm
using Goldschmidt division. Port this algorithm to Rust and make it
generic, which shows a notable performance improvement over the existing
algorithm.
This also allows adding square root routines for `f16` and `f128`.
This commit is contained in:
parent
8927014e91
commit
573ded2ee8
8 changed files with 450 additions and 393 deletions
|
|
@ -704,6 +704,7 @@
|
|||
"src/libm_helper.rs",
|
||||
"src/math/arch/i686.rs",
|
||||
"src/math/arch/wasm32.rs",
|
||||
"src/math/generic/sqrt.rs",
|
||||
"src/math/sqrt.rs"
|
||||
],
|
||||
"type": "f64"
|
||||
|
|
@ -712,6 +713,7 @@
|
|||
"sources": [
|
||||
"src/math/arch/i686.rs",
|
||||
"src/math/arch/wasm32.rs",
|
||||
"src/math/generic/sqrt.rs",
|
||||
"src/math/sqrtf.rs"
|
||||
],
|
||||
"type": "f32"
|
||||
|
|
|
|||
|
|
@ -1,9 +1,11 @@
|
|||
mod copysign;
|
||||
mod fabs;
|
||||
mod fdim;
|
||||
mod sqrt;
|
||||
mod trunc;
|
||||
|
||||
pub use copysign::copysign;
|
||||
pub use fabs::fabs;
|
||||
pub use fdim::fdim;
|
||||
pub use sqrt::sqrt;
|
||||
pub use trunc::trunc;
|
||||
|
|
|
|||
419
library/compiler-builtins/libm/src/math/generic/sqrt.rs
Normal file
419
library/compiler-builtins/libm/src/math/generic/sqrt.rs
Normal file
|
|
@ -0,0 +1,419 @@
|
|||
/* SPDX-License-Identifier: MIT */
|
||||
/* origin: musl src/math/sqrt.c. Ported to generic Rust algorithm in 2025, TG. */
|
||||
|
||||
//! Generic square root algorithm.
|
||||
//!
|
||||
//! This routine operates around `m_u2`, a U.2 (fixed point with two integral bits) mantissa
|
||||
//! within the range [1, 4). A table lookup provides an initial estimate, then goldschmidt
|
||||
//! iterations at various widths are used to approach the real values.
|
||||
//!
|
||||
//! For the iterations, `r` is a U0 number that approaches `1/sqrt(m_u2)`, and `s` is a U2 number
|
||||
//! that approaches `sqrt(m_u2)`. Recall that m_u2 ∈ [1, 4).
|
||||
//!
|
||||
//! With Newton-Raphson iterations, this would be:
|
||||
//!
|
||||
//! - `w = r * r w ~ 1 / m`
|
||||
//! - `u = 3 - m * w u ~ 3 - m * w = 3 - m / m = 2`
|
||||
//! - `r = r * u / 2 r ~ r`
|
||||
//!
|
||||
//! (Note that the righthand column does not show anything analytically meaningful (i.e. r ~ r),
|
||||
//! since the value of performing one iteration is in reducing the error representable by `~`).
|
||||
//!
|
||||
//! Instead of Newton-Raphson iterations, Goldschmidt iterations are used to calculate
|
||||
//! `s = m * r`:
|
||||
//!
|
||||
//! - `s = m * r s ~ m / sqrt(m)`
|
||||
//! - `u = 3 - s * r u ~ 3 - (m / sqrt(m)) * (1 / sqrt(m)) = 3 - m / m = 2`
|
||||
//! - `r = r * u / 2 r ~ r`
|
||||
//! - `s = s * u / 2 s ~ s`
|
||||
//!
|
||||
//! The above is precise because it uses the original value `m`. There is also a faster version
|
||||
//! that performs fewer steps but does not use `m`:
|
||||
//!
|
||||
//! - `u = 3 - s * r u ~ 3 - 1`
|
||||
//! - `r = r * u / 2 r ~ r`
|
||||
//! - `s = s * u / 2 s ~ s`
|
||||
//!
|
||||
//! Rounding errors accumulate faster with the second version, so it is only used for subsequent
|
||||
//! iterations within the same width integer. The first version is always used for the first
|
||||
//! iteration at a new width in order to avoid this accumulation.
|
||||
//!
|
||||
//! Goldschmidt has the advantage over Newton-Raphson that `sqrt(x)` and `1/sqrt(x)` are
|
||||
//! computed at the same time, i.e. there is no need to calculate `1/sqrt(x)` and invert it.
|
||||
|
||||
use super::super::support::{IntTy, cold_path, raise_invalid};
|
||||
use super::super::{CastFrom, CastInto, DInt, Float, HInt, Int, MinInt};
|
||||
|
||||
pub fn sqrt<F>(x: F) -> F
|
||||
where
|
||||
F: Float + SqrtHelper,
|
||||
F::Int: HInt,
|
||||
F::Int: From<u8>,
|
||||
F::Int: From<F::ISet2>,
|
||||
F::Int: CastInto<F::ISet1>,
|
||||
F::Int: CastInto<F::ISet2>,
|
||||
u32: CastInto<F::Int>,
|
||||
{
|
||||
let zero = IntTy::<F>::ZERO;
|
||||
let one = IntTy::<F>::ONE;
|
||||
|
||||
let mut ix = x.to_bits();
|
||||
|
||||
// Top is the exponent and sign, which may or may not be shifted. If the float fits into a
|
||||
// `u32`, we can get by without paying shifting costs.
|
||||
let noshift = F::BITS <= u32::BITS;
|
||||
let (mut top, special_case) = if noshift {
|
||||
let exp_lsb = one << F::SIG_BITS;
|
||||
let special_case = ix.wrapping_sub(exp_lsb) >= F::EXP_MASK - exp_lsb;
|
||||
(Exp::NoShift(()), special_case)
|
||||
} else {
|
||||
let top = u32::cast_from(ix >> F::SIG_BITS);
|
||||
let special_case = top.wrapping_sub(1) >= F::EXP_MAX - 1;
|
||||
(Exp::Shifted(top), special_case)
|
||||
};
|
||||
|
||||
// Handle NaN, zero, and out of domain (<= 0)
|
||||
if special_case {
|
||||
cold_path();
|
||||
|
||||
// +/-0
|
||||
if ix << 1 == zero {
|
||||
return x;
|
||||
}
|
||||
|
||||
// Positive infinity
|
||||
if ix == F::EXP_MASK {
|
||||
return x;
|
||||
}
|
||||
|
||||
// NaN or negative
|
||||
if ix > F::EXP_MASK {
|
||||
return raise_invalid(x);
|
||||
}
|
||||
|
||||
// Normalize subnormals by multiplying by 1.0 << SIG_BITS (e.g. 0x1p52 for doubles).
|
||||
let scaled = x * F::from_parts(false, (F::SIG_BITS + F::EXP_BIAS) as i32, zero);
|
||||
ix = scaled.to_bits();
|
||||
match top {
|
||||
Exp::Shifted(ref mut v) => {
|
||||
*v = scaled.exp().unsigned();
|
||||
*v = (*v).wrapping_sub(F::SIG_BITS);
|
||||
}
|
||||
Exp::NoShift(()) => {
|
||||
ix = ix.wrapping_sub((F::SIG_BITS << F::SIG_BITS).cast());
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
// Reduce arguments such that `x = 4^e * m`:
|
||||
//
|
||||
// - m_u2 ∈ [1, 4), a fixed point U2.BITS number
|
||||
// - 2^e is the exponent part of the result
|
||||
let (m_u2, exp) = match top {
|
||||
Exp::Shifted(top) => {
|
||||
// We now know `x` is positive, so `top` is just its (biased) exponent
|
||||
let mut e = top;
|
||||
// Construct a fixed point representation of the mantissa.
|
||||
let mut m_u2 = (ix | F::IMPLICIT_BIT) << F::EXP_BITS;
|
||||
let even = (e & 1) != 0;
|
||||
if even {
|
||||
m_u2 >>= 1;
|
||||
}
|
||||
e = (e.wrapping_add(F::EXP_MAX >> 1)) >> 1;
|
||||
(m_u2, Exp::Shifted(e))
|
||||
}
|
||||
Exp::NoShift(()) => {
|
||||
let even = ix & (one << F::SIG_BITS) != zero;
|
||||
|
||||
// Exponent part of the return value
|
||||
let mut e_noshift = ix >> 1;
|
||||
// ey &= (F::EXP_MASK << 2) >> 2; // clear the top exponent bit (result = 1.0)
|
||||
e_noshift += (F::EXP_MASK ^ (F::SIGN_MASK >> 1)) >> 1;
|
||||
e_noshift &= F::EXP_MASK;
|
||||
|
||||
let m1 = (ix << F::EXP_BITS) | F::SIGN_MASK;
|
||||
let m0 = (ix << (F::EXP_BITS - 1)) & !F::SIGN_MASK;
|
||||
let m_u2 = if even { m0 } else { m1 };
|
||||
|
||||
(m_u2, Exp::NoShift(e_noshift))
|
||||
}
|
||||
};
|
||||
|
||||
// Extract the top 6 bits of the significand with the lowest bit of the exponent.
|
||||
let i = usize::cast_from(ix >> (F::SIG_BITS - 6)) & 0b1111111;
|
||||
|
||||
// Start with an initial guess for `r = 1 / sqrt(m)` from the table, and shift `m` as an
|
||||
// initial value for `s = sqrt(m)`. See the module documentation for details.
|
||||
let r1_u0: F::ISet1 = F::ISet1::cast_from(RSQRT_TAB[i]) << (F::ISet1::BITS - 16);
|
||||
let s1_u2: F::ISet1 = ((m_u2) >> (F::BITS - F::ISet1::BITS)).cast();
|
||||
|
||||
// Perform iterations, if any, at quarter width (used for `f128`).
|
||||
let (r1_u0, _s1_u2) = goldschmidt::<F, F::ISet1>(r1_u0, s1_u2, F::SET1_ROUNDS, false);
|
||||
|
||||
// Widen values and perform iterations at half width (used for `f64` and `f128`).
|
||||
let r2_u0: F::ISet2 = F::ISet2::from(r1_u0) << (F::ISet2::BITS - F::ISet1::BITS);
|
||||
let s2_u2: F::ISet2 = ((m_u2) >> (F::BITS - F::ISet2::BITS)).cast();
|
||||
let (r2_u0, _s2_u2) = goldschmidt::<F, F::ISet2>(r2_u0, s2_u2, F::SET2_ROUNDS, false);
|
||||
|
||||
// Perform final iterations at full width (used for all float types).
|
||||
let r_u0: F::Int = F::Int::from(r2_u0) << (F::BITS - F::ISet2::BITS);
|
||||
let s_u2: F::Int = m_u2;
|
||||
let (_r_u0, s_u2) = goldschmidt::<F, F::Int>(r_u0, s_u2, F::FINAL_ROUNDS, true);
|
||||
|
||||
// Shift back to mantissa position.
|
||||
let mut m = s_u2 >> (F::EXP_BITS - 2);
|
||||
|
||||
// The musl source includes the following comment (with literals replaced):
|
||||
//
|
||||
// > s < sqrt(m) < s + 0x1.09p-SIG_BITS
|
||||
// > compute nearest rounded result: the nearest result to SIG_BITS bits is either s or
|
||||
// > s+0x1p-SIG_BITS, we can decide by comparing (2^SIG_BITS s + 0.5)^2 to 2^(2*SIG_BITS) m.
|
||||
//
|
||||
// Expanding this with , with `SIG_BITS = p` and adjusting based on the operations done to
|
||||
// `d0` and `d1`:
|
||||
//
|
||||
// - `2^(2p)m ≟ ((2^p)m + 0.5)^2`
|
||||
// - `2^(2p)m ≟ 2^(2p)m^2 + (2^p)m + 0.25`
|
||||
// - `2^(2p)m - m^2 ≟ (2^(2p) - 1)m^2 + (2^p)m + 0.25`
|
||||
// - `(1 - 2^(2p))m + m^2 ≟ (1 - 2^(2p))m^2 + (1 - 2^p)m + 0.25` (?)
|
||||
//
|
||||
// I do not follow how the rounding bit is extracted from this comparison with the below
|
||||
// operations. In any case, the algorithm is well tested.
|
||||
|
||||
// The value needed to shift `m_u2` by to create `m*2^(2p)`. `2p = 2 * F::SIG_BITS`,
|
||||
// `F::BITS - 2` accounts for the offset that `m_u2` already has.
|
||||
let shift = 2 * F::SIG_BITS - (F::BITS - 2);
|
||||
|
||||
// `2^(2p)m - m^2`
|
||||
let d0 = (m_u2 << shift).wrapping_sub(m.wrapping_mul(m));
|
||||
// `m - 2^(2p)m + m^2`
|
||||
let d1 = m.wrapping_sub(d0);
|
||||
m += d1 >> (F::BITS - 1);
|
||||
m &= F::SIG_MASK;
|
||||
|
||||
match exp {
|
||||
Exp::Shifted(e) => m |= IntTy::<F>::cast_from(e) << F::SIG_BITS,
|
||||
Exp::NoShift(e) => m |= e,
|
||||
};
|
||||
|
||||
let mut y = F::from_bits(m);
|
||||
|
||||
// FIXME(f16): the fenv math does not work for `f16`
|
||||
if F::BITS > 16 {
|
||||
// Handle rounding and inexact. `(m + 1)^2 == 2^shift m` is exact; for all other cases, add
|
||||
// a tiny value to cause fenv effects.
|
||||
let d2 = d1.wrapping_add(m).wrapping_add(one);
|
||||
let mut tiny = if d2 == zero {
|
||||
cold_path();
|
||||
zero
|
||||
} else {
|
||||
F::IMPLICIT_BIT
|
||||
};
|
||||
|
||||
tiny |= (d1 ^ d2) & F::SIGN_MASK;
|
||||
let t = F::from_bits(tiny);
|
||||
y = y + t;
|
||||
}
|
||||
|
||||
y
|
||||
}
|
||||
|
||||
/// Multiply at the wider integer size, returning the high half.
|
||||
fn wmulh<I: HInt>(a: I, b: I) -> I {
|
||||
a.widen_mul(b).hi()
|
||||
}
|
||||
|
||||
/// Perform `count` goldschmidt iterations, returning `(r_u0, s_u?)`.
|
||||
///
|
||||
/// - `r_u0` is the reciprocal `r ~ 1 / sqrt(m)`, as U0.
|
||||
/// - `s_u2` is the square root, `s ~ sqrt(m)`, as U2.
|
||||
/// - `count` is the number of iterations to perform.
|
||||
/// - `final_set` should be true if this is the last round (same-sized integer). If so, the
|
||||
/// returned `s` will be U3, for later shifting. Otherwise, the returned `s` is U2.
|
||||
///
|
||||
/// Note that performance relies on the optimizer being able to unroll these loops (reasonably
|
||||
/// trivial, `count` is a constant when called).
|
||||
#[inline]
|
||||
fn goldschmidt<F, I>(mut r_u0: I, mut s_u2: I, count: u32, final_set: bool) -> (I, I)
|
||||
where
|
||||
F: SqrtHelper,
|
||||
I: HInt + From<u8>,
|
||||
{
|
||||
let three_u2 = I::from(0b11u8) << (I::BITS - 2);
|
||||
let mut u_u0 = r_u0;
|
||||
|
||||
for i in 0..count {
|
||||
// First iteration: `s = m*r` (`u_u0 = r_u0` set above)
|
||||
// Subsequent iterations: `s=s*u/2`
|
||||
s_u2 = wmulh(s_u2, u_u0);
|
||||
|
||||
// Perform `s /= 2` if:
|
||||
//
|
||||
// 1. This is not the first iteration (the first iteration is `s = m*r`)...
|
||||
// 2. ... and this is not the last set of iterations
|
||||
// 3. ... or, if this is the last set, it is not the last iteration
|
||||
//
|
||||
// This step is not performed for the final iteration because the shift is combined with
|
||||
// a later shift (moving `s` into the mantissa).
|
||||
if i > 0 && (!final_set || i + 1 < count) {
|
||||
s_u2 <<= 1;
|
||||
}
|
||||
|
||||
// u = 3 - s*r
|
||||
let d_u2 = wmulh(s_u2, r_u0);
|
||||
u_u0 = three_u2.wrapping_sub(d_u2);
|
||||
|
||||
// r = r*u/2
|
||||
r_u0 = wmulh(r_u0, u_u0) << 1;
|
||||
}
|
||||
|
||||
(r_u0, s_u2)
|
||||
}
|
||||
|
||||
/// Representation of whether we shift the exponent into a `u32`, or modify it in place to save
|
||||
/// the shift operations.
|
||||
enum Exp<T> {
|
||||
/// The exponent has been shifted to a `u32` and is LSB-aligned.
|
||||
Shifted(u32),
|
||||
/// The exponent is in its natural position in integer repr.
|
||||
NoShift(T),
|
||||
}
|
||||
|
||||
/// Size-specific constants related to the square root routine.
|
||||
pub trait SqrtHelper: Float {
|
||||
/// Integer for the first set of rounds. If unused, set to the same type as the next set.
|
||||
type ISet1: HInt + Into<Self::ISet2> + CastFrom<Self::Int> + From<u8>;
|
||||
/// Integer for the second set of rounds. If unused, set to the same type as the next set.
|
||||
type ISet2: HInt + From<Self::ISet1> + From<u8>;
|
||||
|
||||
/// Number of rounds at `ISet1`.
|
||||
const SET1_ROUNDS: u32 = 0;
|
||||
/// Number of rounds at `ISet2`.
|
||||
const SET2_ROUNDS: u32 = 0;
|
||||
/// Number of rounds at `Self::Int`.
|
||||
const FINAL_ROUNDS: u32;
|
||||
}
|
||||
|
||||
impl SqrtHelper for f32 {
|
||||
type ISet1 = u32; // unused
|
||||
type ISet2 = u32; // unused
|
||||
|
||||
const FINAL_ROUNDS: u32 = 3;
|
||||
}
|
||||
|
||||
impl SqrtHelper for f64 {
|
||||
type ISet1 = u32; // unused
|
||||
type ISet2 = u32;
|
||||
|
||||
const SET2_ROUNDS: u32 = 2;
|
||||
const FINAL_ROUNDS: u32 = 2;
|
||||
}
|
||||
|
||||
/// A U0.16 representation of `1/sqrt(x)`.
|
||||
///
|
||||
// / The index is a 7-bit number consisting of a single exponent bit and 6 bits of significand.
|
||||
#[rustfmt::skip]
|
||||
static RSQRT_TAB: [u16; 128] = [
|
||||
0xb451, 0xb2f0, 0xb196, 0xb044, 0xaef9, 0xadb6, 0xac79, 0xab43,
|
||||
0xaa14, 0xa8eb, 0xa7c8, 0xa6aa, 0xa592, 0xa480, 0xa373, 0xa26b,
|
||||
0xa168, 0xa06a, 0x9f70, 0x9e7b, 0x9d8a, 0x9c9d, 0x9bb5, 0x9ad1,
|
||||
0x99f0, 0x9913, 0x983a, 0x9765, 0x9693, 0x95c4, 0x94f8, 0x9430,
|
||||
0x936b, 0x92a9, 0x91ea, 0x912e, 0x9075, 0x8fbe, 0x8f0a, 0x8e59,
|
||||
0x8daa, 0x8cfe, 0x8c54, 0x8bac, 0x8b07, 0x8a64, 0x89c4, 0x8925,
|
||||
0x8889, 0x87ee, 0x8756, 0x86c0, 0x862b, 0x8599, 0x8508, 0x8479,
|
||||
0x83ec, 0x8361, 0x82d8, 0x8250, 0x81c9, 0x8145, 0x80c2, 0x8040,
|
||||
0xff02, 0xfd0e, 0xfb25, 0xf947, 0xf773, 0xf5aa, 0xf3ea, 0xf234,
|
||||
0xf087, 0xeee3, 0xed47, 0xebb3, 0xea27, 0xe8a3, 0xe727, 0xe5b2,
|
||||
0xe443, 0xe2dc, 0xe17a, 0xe020, 0xdecb, 0xdd7d, 0xdc34, 0xdaf1,
|
||||
0xd9b3, 0xd87b, 0xd748, 0xd61a, 0xd4f1, 0xd3cd, 0xd2ad, 0xd192,
|
||||
0xd07b, 0xcf69, 0xce5b, 0xcd51, 0xcc4a, 0xcb48, 0xca4a, 0xc94f,
|
||||
0xc858, 0xc764, 0xc674, 0xc587, 0xc49d, 0xc3b7, 0xc2d4, 0xc1f4,
|
||||
0xc116, 0xc03c, 0xbf65, 0xbe90, 0xbdbe, 0xbcef, 0xbc23, 0xbb59,
|
||||
0xba91, 0xb9cc, 0xb90a, 0xb84a, 0xb78c, 0xb6d0, 0xb617, 0xb560,
|
||||
];
|
||||
|
||||
#[cfg(test)]
|
||||
mod tests {
|
||||
use super::*;
|
||||
|
||||
/// Test against edge cases from https://en.cppreference.com/w/cpp/numeric/math/sqrt
|
||||
fn spec_test<F>()
|
||||
where
|
||||
F: Float + SqrtHelper,
|
||||
F::Int: HInt,
|
||||
F::Int: From<u8>,
|
||||
F::Int: From<F::ISet2>,
|
||||
F::Int: CastInto<F::ISet1>,
|
||||
F::Int: CastInto<F::ISet2>,
|
||||
u32: CastInto<F::Int>,
|
||||
{
|
||||
// Not Asserted: FE_INVALID exception is raised if argument is negative.
|
||||
assert!(sqrt(F::NEG_ONE).is_nan());
|
||||
assert!(sqrt(F::NAN).is_nan());
|
||||
for f in [F::ZERO, F::NEG_ZERO, F::INFINITY].iter().copied() {
|
||||
assert_biteq!(sqrt(f), f);
|
||||
}
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn sanity_check_f32() {
|
||||
assert_biteq!(sqrt(100.0f32), 10.0);
|
||||
assert_biteq!(sqrt(4.0f32), 2.0);
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn spec_tests_f32() {
|
||||
spec_test::<f32>();
|
||||
}
|
||||
|
||||
#[test]
|
||||
#[allow(clippy::approx_constant)]
|
||||
fn conformance_tests_f32() {
|
||||
let cases = [
|
||||
(f32::PI, 0x3fe2dfc5_u32),
|
||||
(10000.0f32, 0x42c80000_u32),
|
||||
(f32::from_bits(0x0000000f), 0x1b2f456f_u32),
|
||||
(f32::INFINITY, f32::INFINITY.to_bits()),
|
||||
];
|
||||
|
||||
for (input, output) in cases {
|
||||
assert_biteq!(
|
||||
sqrt(input),
|
||||
f32::from_bits(output),
|
||||
"input: {input:?} ({:#018x})",
|
||||
input.to_bits()
|
||||
);
|
||||
}
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn sanity_check_f64() {
|
||||
assert_biteq!(sqrt(100.0f64), 10.0);
|
||||
assert_biteq!(sqrt(4.0f64), 2.0);
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn spec_tests_f64() {
|
||||
spec_test::<f64>();
|
||||
}
|
||||
|
||||
#[test]
|
||||
#[allow(clippy::approx_constant)]
|
||||
fn conformance_tests_f64() {
|
||||
let cases = [
|
||||
(f64::PI, 0x3ffc5bf891b4ef6a_u64),
|
||||
(10000.0, 0x4059000000000000_u64),
|
||||
(f64::from_bits(0x0000000f), 0x1e7efbdeb14f4eda_u64),
|
||||
(f64::INFINITY, f64::INFINITY.to_bits()),
|
||||
];
|
||||
|
||||
for (input, output) in cases {
|
||||
assert_biteq!(
|
||||
sqrt(input),
|
||||
f64::from_bits(output),
|
||||
"input: {input:?} ({:#018x})",
|
||||
input.to_bits()
|
||||
);
|
||||
}
|
||||
}
|
||||
}
|
||||
|
|
@ -1,83 +1,3 @@
|
|||
/* origin: FreeBSD /usr/src/lib/msun/src/e_sqrt.c */
|
||||
/*
|
||||
* ====================================================
|
||||
* Copyright (C) 1993 by Sun Microsystems, Inc. All rights reserved.
|
||||
*
|
||||
* Developed at SunSoft, a Sun Microsystems, Inc. business.
|
||||
* Permission to use, copy, modify, and distribute this
|
||||
* software is freely granted, provided that this notice
|
||||
* is preserved.
|
||||
* ====================================================
|
||||
*/
|
||||
/* sqrt(x)
|
||||
* Return correctly rounded sqrt.
|
||||
* ------------------------------------------
|
||||
* | Use the hardware sqrt if you have one |
|
||||
* ------------------------------------------
|
||||
* Method:
|
||||
* Bit by bit method using integer arithmetic. (Slow, but portable)
|
||||
* 1. Normalization
|
||||
* Scale x to y in [1,4) with even powers of 2:
|
||||
* find an integer k such that 1 <= (y=x*2^(2k)) < 4, then
|
||||
* sqrt(x) = 2^k * sqrt(y)
|
||||
* 2. Bit by bit computation
|
||||
* Let q = sqrt(y) truncated to i bit after binary point (q = 1),
|
||||
* i 0
|
||||
* i+1 2
|
||||
* s = 2*q , and y = 2 * ( y - q ). (1)
|
||||
* i i i i
|
||||
*
|
||||
* To compute q from q , one checks whether
|
||||
* i+1 i
|
||||
*
|
||||
* -(i+1) 2
|
||||
* (q + 2 ) <= y. (2)
|
||||
* i
|
||||
* -(i+1)
|
||||
* If (2) is false, then q = q ; otherwise q = q + 2 .
|
||||
* i+1 i i+1 i
|
||||
*
|
||||
* With some algebraic manipulation, it is not difficult to see
|
||||
* that (2) is equivalent to
|
||||
* -(i+1)
|
||||
* s + 2 <= y (3)
|
||||
* i i
|
||||
*
|
||||
* The advantage of (3) is that s and y can be computed by
|
||||
* i i
|
||||
* the following recurrence formula:
|
||||
* if (3) is false
|
||||
*
|
||||
* s = s , y = y ; (4)
|
||||
* i+1 i i+1 i
|
||||
*
|
||||
* otherwise,
|
||||
* -i -(i+1)
|
||||
* s = s + 2 , y = y - s - 2 (5)
|
||||
* i+1 i i+1 i i
|
||||
*
|
||||
* One may easily use induction to prove (4) and (5).
|
||||
* Note. Since the left hand side of (3) contain only i+2 bits,
|
||||
* it does not necessary to do a full (53-bit) comparison
|
||||
* in (3).
|
||||
* 3. Final rounding
|
||||
* After generating the 53 bits result, we compute one more bit.
|
||||
* Together with the remainder, we can decide whether the
|
||||
* result is exact, bigger than 1/2ulp, or less than 1/2ulp
|
||||
* (it will never equal to 1/2ulp).
|
||||
* The rounding mode can be detected by checking whether
|
||||
* huge + tiny is equal to huge, and whether huge - tiny is
|
||||
* equal to huge for some floating point number "huge" and "tiny".
|
||||
*
|
||||
* Special cases:
|
||||
* sqrt(+-0) = +-0 ... exact
|
||||
* sqrt(inf) = inf
|
||||
* sqrt(-ve) = NaN ... with invalid signal
|
||||
* sqrt(NaN) = NaN ... with invalid signal for signaling NaN
|
||||
*/
|
||||
|
||||
use core::f64;
|
||||
|
||||
/// The square root of `x` (f64).
|
||||
#[cfg_attr(all(test, assert_no_panic), no_panic::no_panic)]
|
||||
pub fn sqrt(x: f64) -> f64 {
|
||||
|
|
@ -90,175 +10,5 @@ pub fn sqrt(x: f64) -> f64 {
|
|||
args: x,
|
||||
}
|
||||
|
||||
use core::num::Wrapping;
|
||||
|
||||
const TINY: f64 = 1.0e-300;
|
||||
|
||||
let mut z: f64;
|
||||
let sign: Wrapping<u32> = Wrapping(0x80000000);
|
||||
let mut ix0: i32;
|
||||
let mut s0: i32;
|
||||
let mut q: i32;
|
||||
let mut m: i32;
|
||||
let mut t: i32;
|
||||
let mut i: i32;
|
||||
let mut r: Wrapping<u32>;
|
||||
let mut t1: Wrapping<u32>;
|
||||
let mut s1: Wrapping<u32>;
|
||||
let mut ix1: Wrapping<u32>;
|
||||
let mut q1: Wrapping<u32>;
|
||||
|
||||
ix0 = (x.to_bits() >> 32) as i32;
|
||||
ix1 = Wrapping(x.to_bits() as u32);
|
||||
|
||||
/* take care of Inf and NaN */
|
||||
if (ix0 & 0x7ff00000) == 0x7ff00000 {
|
||||
return x * x + x; /* sqrt(NaN)=NaN, sqrt(+inf)=+inf, sqrt(-inf)=sNaN */
|
||||
}
|
||||
/* take care of zero */
|
||||
if ix0 <= 0 {
|
||||
if ((ix0 & !(sign.0 as i32)) | ix1.0 as i32) == 0 {
|
||||
return x; /* sqrt(+-0) = +-0 */
|
||||
}
|
||||
if ix0 < 0 {
|
||||
return (x - x) / (x - x); /* sqrt(-ve) = sNaN */
|
||||
}
|
||||
}
|
||||
/* normalize x */
|
||||
m = ix0 >> 20;
|
||||
if m == 0 {
|
||||
/* subnormal x */
|
||||
while ix0 == 0 {
|
||||
m -= 21;
|
||||
ix0 |= (ix1 >> 11).0 as i32;
|
||||
ix1 <<= 21;
|
||||
}
|
||||
i = 0;
|
||||
while (ix0 & 0x00100000) == 0 {
|
||||
i += 1;
|
||||
ix0 <<= 1;
|
||||
}
|
||||
m -= i - 1;
|
||||
ix0 |= (ix1 >> (32 - i) as usize).0 as i32;
|
||||
ix1 = ix1 << i as usize;
|
||||
}
|
||||
m -= 1023; /* unbias exponent */
|
||||
ix0 = (ix0 & 0x000fffff) | 0x00100000;
|
||||
if (m & 1) == 1 {
|
||||
/* odd m, double x to make it even */
|
||||
ix0 *= 2;
|
||||
ix0 += ((ix1 & sign) >> 31).0 as i32;
|
||||
ix1 += ix1;
|
||||
}
|
||||
m >>= 1; /* m = [m/2] */
|
||||
|
||||
/* generate sqrt(x) bit by bit */
|
||||
ix0 *= 2;
|
||||
ix0 += ((ix1 & sign) >> 31).0 as i32;
|
||||
ix1 += ix1;
|
||||
q = 0; /* [q,q1] = sqrt(x) */
|
||||
q1 = Wrapping(0);
|
||||
s0 = 0;
|
||||
s1 = Wrapping(0);
|
||||
r = Wrapping(0x00200000); /* r = moving bit from right to left */
|
||||
|
||||
while r != Wrapping(0) {
|
||||
t = s0 + r.0 as i32;
|
||||
if t <= ix0 {
|
||||
s0 = t + r.0 as i32;
|
||||
ix0 -= t;
|
||||
q += r.0 as i32;
|
||||
}
|
||||
ix0 *= 2;
|
||||
ix0 += ((ix1 & sign) >> 31).0 as i32;
|
||||
ix1 += ix1;
|
||||
r >>= 1;
|
||||
}
|
||||
|
||||
r = sign;
|
||||
while r != Wrapping(0) {
|
||||
t1 = s1 + r;
|
||||
t = s0;
|
||||
if t < ix0 || (t == ix0 && t1 <= ix1) {
|
||||
s1 = t1 + r;
|
||||
if (t1 & sign) == sign && (s1 & sign) == Wrapping(0) {
|
||||
s0 += 1;
|
||||
}
|
||||
ix0 -= t;
|
||||
if ix1 < t1 {
|
||||
ix0 -= 1;
|
||||
}
|
||||
ix1 -= t1;
|
||||
q1 += r;
|
||||
}
|
||||
ix0 *= 2;
|
||||
ix0 += ((ix1 & sign) >> 31).0 as i32;
|
||||
ix1 += ix1;
|
||||
r >>= 1;
|
||||
}
|
||||
|
||||
/* use floating add to find out rounding direction */
|
||||
if (ix0 as u32 | ix1.0) != 0 {
|
||||
z = 1.0 - TINY; /* raise inexact flag */
|
||||
if z >= 1.0 {
|
||||
z = 1.0 + TINY;
|
||||
if q1.0 == 0xffffffff {
|
||||
q1 = Wrapping(0);
|
||||
q += 1;
|
||||
} else if z > 1.0 {
|
||||
if q1.0 == 0xfffffffe {
|
||||
q += 1;
|
||||
}
|
||||
q1 += Wrapping(2);
|
||||
} else {
|
||||
q1 += q1 & Wrapping(1);
|
||||
}
|
||||
}
|
||||
}
|
||||
ix0 = (q >> 1) + 0x3fe00000;
|
||||
ix1 = q1 >> 1;
|
||||
if (q & 1) == 1 {
|
||||
ix1 |= sign;
|
||||
}
|
||||
ix0 += m << 20;
|
||||
f64::from_bits(((ix0 as u64) << 32) | ix1.0 as u64)
|
||||
}
|
||||
|
||||
#[cfg(test)]
|
||||
mod tests {
|
||||
use super::*;
|
||||
|
||||
#[test]
|
||||
fn sanity_check() {
|
||||
assert_eq!(sqrt(100.0), 10.0);
|
||||
assert_eq!(sqrt(4.0), 2.0);
|
||||
}
|
||||
|
||||
/// The spec: https://en.cppreference.com/w/cpp/numeric/math/sqrt
|
||||
#[test]
|
||||
fn spec_tests() {
|
||||
// Not Asserted: FE_INVALID exception is raised if argument is negative.
|
||||
assert!(sqrt(-1.0).is_nan());
|
||||
assert!(sqrt(f64::NAN).is_nan());
|
||||
for f in [0.0, -0.0, f64::INFINITY].iter().copied() {
|
||||
assert_eq!(sqrt(f), f);
|
||||
}
|
||||
}
|
||||
|
||||
#[test]
|
||||
#[allow(clippy::approx_constant)]
|
||||
fn conformance_tests() {
|
||||
let values = [3.14159265359, 10000.0, f64::from_bits(0x0000000f), f64::INFINITY];
|
||||
let results = [
|
||||
4610661241675116657u64,
|
||||
4636737291354636288u64,
|
||||
2197470602079456986u64,
|
||||
9218868437227405312u64,
|
||||
];
|
||||
|
||||
for i in 0..values.len() {
|
||||
let bits = f64::to_bits(sqrt(values[i]));
|
||||
assert_eq!(results[i], bits);
|
||||
}
|
||||
}
|
||||
super::generic::sqrt(x)
|
||||
}
|
||||
|
|
|
|||
|
|
@ -1,18 +1,3 @@
|
|||
/* origin: FreeBSD /usr/src/lib/msun/src/e_sqrtf.c */
|
||||
/*
|
||||
* Conversion to float by Ian Lance Taylor, Cygnus Support, ian@cygnus.com.
|
||||
*/
|
||||
/*
|
||||
* ====================================================
|
||||
* Copyright (C) 1993 by Sun Microsystems, Inc. All rights reserved.
|
||||
*
|
||||
* Developed at SunPro, a Sun Microsystems, Inc. business.
|
||||
* Permission to use, copy, modify, and distribute this
|
||||
* software is freely granted, provided that this notice
|
||||
* is preserved.
|
||||
* ====================================================
|
||||
*/
|
||||
|
||||
/// The square root of `x` (f32).
|
||||
#[cfg_attr(all(test, assert_no_panic), no_panic::no_panic)]
|
||||
pub fn sqrtf(x: f32) -> f32 {
|
||||
|
|
@ -25,121 +10,5 @@ pub fn sqrtf(x: f32) -> f32 {
|
|||
args: x,
|
||||
}
|
||||
|
||||
const TINY: f32 = 1.0e-30;
|
||||
|
||||
let mut z: f32;
|
||||
let sign: i32 = 0x80000000u32 as i32;
|
||||
let mut ix: i32;
|
||||
let mut s: i32;
|
||||
let mut q: i32;
|
||||
let mut m: i32;
|
||||
let mut t: i32;
|
||||
let mut i: i32;
|
||||
let mut r: u32;
|
||||
|
||||
ix = x.to_bits() as i32;
|
||||
|
||||
/* take care of Inf and NaN */
|
||||
if (ix as u32 & 0x7f800000) == 0x7f800000 {
|
||||
return x * x + x; /* sqrt(NaN)=NaN, sqrt(+inf)=+inf, sqrt(-inf)=sNaN */
|
||||
}
|
||||
|
||||
/* take care of zero */
|
||||
if ix <= 0 {
|
||||
if (ix & !sign) == 0 {
|
||||
return x; /* sqrt(+-0) = +-0 */
|
||||
}
|
||||
if ix < 0 {
|
||||
return (x - x) / (x - x); /* sqrt(-ve) = sNaN */
|
||||
}
|
||||
}
|
||||
|
||||
/* normalize x */
|
||||
m = ix >> 23;
|
||||
if m == 0 {
|
||||
/* subnormal x */
|
||||
i = 0;
|
||||
while ix & 0x00800000 == 0 {
|
||||
ix <<= 1;
|
||||
i = i + 1;
|
||||
}
|
||||
m -= i - 1;
|
||||
}
|
||||
m -= 127; /* unbias exponent */
|
||||
ix = (ix & 0x007fffff) | 0x00800000;
|
||||
if m & 1 == 1 {
|
||||
/* odd m, double x to make it even */
|
||||
ix += ix;
|
||||
}
|
||||
m >>= 1; /* m = [m/2] */
|
||||
|
||||
/* generate sqrt(x) bit by bit */
|
||||
ix += ix;
|
||||
q = 0;
|
||||
s = 0;
|
||||
r = 0x01000000; /* r = moving bit from right to left */
|
||||
|
||||
while r != 0 {
|
||||
t = s + r as i32;
|
||||
if t <= ix {
|
||||
s = t + r as i32;
|
||||
ix -= t;
|
||||
q += r as i32;
|
||||
}
|
||||
ix += ix;
|
||||
r >>= 1;
|
||||
}
|
||||
|
||||
/* use floating add to find out rounding direction */
|
||||
if ix != 0 {
|
||||
z = 1.0 - TINY; /* raise inexact flag */
|
||||
if z >= 1.0 {
|
||||
z = 1.0 + TINY;
|
||||
if z > 1.0 {
|
||||
q += 2;
|
||||
} else {
|
||||
q += q & 1;
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
ix = (q >> 1) + 0x3f000000;
|
||||
ix += m << 23;
|
||||
f32::from_bits(ix as u32)
|
||||
}
|
||||
|
||||
// PowerPC tests are failing on LLVM 13: https://github.com/rust-lang/rust/issues/88520
|
||||
#[cfg(not(target_arch = "powerpc64"))]
|
||||
#[cfg(test)]
|
||||
mod tests {
|
||||
use super::*;
|
||||
|
||||
#[test]
|
||||
fn sanity_check() {
|
||||
assert_eq!(sqrtf(100.0), 10.0);
|
||||
assert_eq!(sqrtf(4.0), 2.0);
|
||||
}
|
||||
|
||||
/// The spec: https://en.cppreference.com/w/cpp/numeric/math/sqrt
|
||||
#[test]
|
||||
fn spec_tests() {
|
||||
// Not Asserted: FE_INVALID exception is raised if argument is negative.
|
||||
assert!(sqrtf(-1.0).is_nan());
|
||||
assert!(sqrtf(f32::NAN).is_nan());
|
||||
for f in [0.0, -0.0, f32::INFINITY].iter().copied() {
|
||||
assert_eq!(sqrtf(f), f);
|
||||
}
|
||||
}
|
||||
|
||||
#[test]
|
||||
#[allow(clippy::approx_constant)]
|
||||
fn conformance_tests() {
|
||||
let values = [3.14159265359f32, 10000.0f32, f32::from_bits(0x0000000f), f32::INFINITY];
|
||||
let results = [1071833029u32, 1120403456u32, 456082799u32, 2139095040u32];
|
||||
|
||||
for i in 0..values.len() {
|
||||
let bits = f32::to_bits(sqrtf(values[i]));
|
||||
assert_eq!(results[i], bits);
|
||||
}
|
||||
}
|
||||
super::generic::sqrt(x)
|
||||
}
|
||||
|
|
|
|||
|
|
@ -55,10 +55,12 @@ pub trait Int:
|
|||
+ ops::BitAnd<Output = Self>
|
||||
+ cmp::Ord
|
||||
+ CastFrom<i32>
|
||||
+ CastFrom<u16>
|
||||
+ CastFrom<u32>
|
||||
+ CastFrom<u8>
|
||||
+ CastFrom<usize>
|
||||
+ CastInto<i32>
|
||||
+ CastInto<u16>
|
||||
+ CastInto<u32>
|
||||
+ CastInto<u8>
|
||||
+ CastInto<usize>
|
||||
|
|
|
|||
|
|
@ -110,19 +110,21 @@ macro_rules! hf64 {
|
|||
/// Assert `F::biteq` with better messages.
|
||||
#[cfg(test)]
|
||||
macro_rules! assert_biteq {
|
||||
($left:expr, $right:expr, $($arg:tt)*) => {{
|
||||
let bits = ($left.to_bits() * 0).leading_zeros(); // hack to get the width from the value
|
||||
($left:expr, $right:expr, $($tt:tt)*) => {{
|
||||
let l = $left;
|
||||
let r = $right;
|
||||
let bits = (l.to_bits() - l.to_bits()).leading_zeros(); // hack to get the width from the value
|
||||
assert!(
|
||||
$left.biteq($right),
|
||||
"\nl: {l:?} ({lb:#0width$x})\nr: {r:?} ({rb:#0width$x})",
|
||||
l = $left,
|
||||
lb = $left.to_bits(),
|
||||
r = $right,
|
||||
rb = $right.to_bits(),
|
||||
width = ((bits / 4) + 2) as usize
|
||||
l.biteq(r),
|
||||
"{}\nl: {l:?} ({lb:#0width$x})\nr: {r:?} ({rb:#0width$x})",
|
||||
format_args!($($tt)*),
|
||||
lb = l.to_bits(),
|
||||
rb = r.to_bits(),
|
||||
width = ((bits / 4) + 2) as usize,
|
||||
|
||||
);
|
||||
}};
|
||||
($left:expr, $right:expr $(,)?) => {
|
||||
assert_biteq!($left, $right,)
|
||||
assert_biteq!($left, $right, "")
|
||||
};
|
||||
}
|
||||
|
|
|
|||
|
|
@ -10,3 +10,14 @@ pub(crate) use float_traits::{f32_from_bits, f64_from_bits};
|
|||
#[allow(unused_imports)]
|
||||
pub use hex_float::{hf32, hf64};
|
||||
pub use int_traits::{CastFrom, CastInto, DInt, HInt, Int, MinInt};
|
||||
|
||||
/// Hint to the compiler that the current path is cold.
|
||||
pub fn cold_path() {
|
||||
#[cfg(intrinsics_enabled)]
|
||||
core::intrinsics::cold_path();
|
||||
}
|
||||
|
||||
/// Return `x`, first raising `FE_INVALID`.
|
||||
pub fn raise_invalid<F: Float>(x: F) -> F {
|
||||
(x - x) / (x - x)
|
||||
}
|
||||
|
|
|
|||
Loading…
Add table
Add a link
Reference in a new issue