From 9c98c461473a26354c519702d79a689dda7f9645 Mon Sep 17 00:00:00 2001 From: Trevor Gross Date: Wed, 15 Jan 2025 11:34:17 +0000 Subject: [PATCH 1/5] Don't set `opt_level` in the musl build script `cc` automatically reads this from Cargo's `OPT_LEVEL` variable so we don't need to set it explicitly. Remove this so running in a debugger makes more sense. --- library/compiler-builtins/libm/crates/musl-math-sys/build.rs | 1 - 1 file changed, 1 deletion(-) diff --git a/library/compiler-builtins/libm/crates/musl-math-sys/build.rs b/library/compiler-builtins/libm/crates/musl-math-sys/build.rs index 03deb4ff0ac0..d75748159cad 100644 --- a/library/compiler-builtins/libm/crates/musl-math-sys/build.rs +++ b/library/compiler-builtins/libm/crates/musl-math-sys/build.rs @@ -151,7 +151,6 @@ fn build_musl_math(cfg: &Config) { .flag_if_supported("-ffreestanding") .flag_if_supported("-nostdinc") .define("_ALL_SOURCE", "1") - .opt_level(3) .define( "ROOT_INCLUDE_FEATURES", Some(musl_dir.join("include/features.h").to_str().unwrap()), From 8927014e919b789ef957d82ed13eaebe33922e29 Mon Sep 17 00:00:00 2001 From: Trevor Gross Date: Wed, 22 Jan 2025 05:29:36 +0000 Subject: [PATCH 2/5] Enable `force-soft-floats` for extensive tests Any architecture-specific float operations are likely to consist of only a few instructions, but the softfloat implementations are much more complex. Ensure this is what gets tested. --- library/compiler-builtins/libm/.github/workflows/main.yaml | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/library/compiler-builtins/libm/.github/workflows/main.yaml b/library/compiler-builtins/libm/.github/workflows/main.yaml index 7693de6559a1..89c5facef44d 100644 --- a/library/compiler-builtins/libm/.github/workflows/main.yaml +++ b/library/compiler-builtins/libm/.github/workflows/main.yaml @@ -270,7 +270,7 @@ jobs: fi LIBM_EXTENSIVE_TESTS="$CHANGED" cargo t \ - --features build-mpfr,unstable \ + --features build-mpfr,unstable,force-soft-floats \ --profile release-checked \ -- extensive - name: Print test logs if available From 573ded2ee8a59d3f3bb2a56477fc939b0922cf44 Mon Sep 17 00:00:00 2001 From: Trevor Gross Date: Sun, 12 Jan 2025 11:45:40 +0000 Subject: [PATCH 3/5] Port the most recent version of Musl's `sqrt` as a generic algorithm Musl commit 97e9b73d59 ("math: new software sqrt") adds a new algorithm using Goldschmidt division. Port this algorithm to Rust and make it generic, which shows a notable performance improvement over the existing algorithm. This also allows adding square root routines for `f16` and `f128`. --- .../libm/etc/function-definitions.json | 2 + .../libm/src/math/generic/mod.rs | 2 + .../libm/src/math/generic/sqrt.rs | 419 ++++++++++++++++++ .../compiler-builtins/libm/src/math/sqrt.rs | 252 +---------- .../compiler-builtins/libm/src/math/sqrtf.rs | 133 +----- .../libm/src/math/support/int_traits.rs | 2 + .../libm/src/math/support/macros.rs | 22 +- .../libm/src/math/support/mod.rs | 11 + 8 files changed, 450 insertions(+), 393 deletions(-) create mode 100644 library/compiler-builtins/libm/src/math/generic/sqrt.rs diff --git a/library/compiler-builtins/libm/etc/function-definitions.json b/library/compiler-builtins/libm/etc/function-definitions.json index dbaac931c769..9f7c8ab25a03 100644 --- a/library/compiler-builtins/libm/etc/function-definitions.json +++ b/library/compiler-builtins/libm/etc/function-definitions.json @@ -704,6 +704,7 @@ "src/libm_helper.rs", "src/math/arch/i686.rs", "src/math/arch/wasm32.rs", + "src/math/generic/sqrt.rs", "src/math/sqrt.rs" ], "type": "f64" @@ -712,6 +713,7 @@ "sources": [ "src/math/arch/i686.rs", "src/math/arch/wasm32.rs", + "src/math/generic/sqrt.rs", "src/math/sqrtf.rs" ], "type": "f32" diff --git a/library/compiler-builtins/libm/src/math/generic/mod.rs b/library/compiler-builtins/libm/src/math/generic/mod.rs index 2b068d6c54d3..3b5a2c3effd7 100644 --- a/library/compiler-builtins/libm/src/math/generic/mod.rs +++ b/library/compiler-builtins/libm/src/math/generic/mod.rs @@ -1,9 +1,11 @@ mod copysign; mod fabs; mod fdim; +mod sqrt; mod trunc; pub use copysign::copysign; pub use fabs::fabs; pub use fdim::fdim; +pub use sqrt::sqrt; pub use trunc::trunc; diff --git a/library/compiler-builtins/libm/src/math/generic/sqrt.rs b/library/compiler-builtins/libm/src/math/generic/sqrt.rs new file mode 100644 index 000000000000..a2e054f3cf2b --- /dev/null +++ b/library/compiler-builtins/libm/src/math/generic/sqrt.rs @@ -0,0 +1,419 @@ +/* SPDX-License-Identifier: MIT */ +/* origin: musl src/math/sqrt.c. Ported to generic Rust algorithm in 2025, TG. */ + +//! Generic square root algorithm. +//! +//! This routine operates around `m_u2`, a U.2 (fixed point with two integral bits) mantissa +//! within the range [1, 4). A table lookup provides an initial estimate, then goldschmidt +//! iterations at various widths are used to approach the real values. +//! +//! For the iterations, `r` is a U0 number that approaches `1/sqrt(m_u2)`, and `s` is a U2 number +//! that approaches `sqrt(m_u2)`. Recall that m_u2 ∈ [1, 4). +//! +//! With Newton-Raphson iterations, this would be: +//! +//! - `w = r * r w ~ 1 / m` +//! - `u = 3 - m * w u ~ 3 - m * w = 3 - m / m = 2` +//! - `r = r * u / 2 r ~ r` +//! +//! (Note that the righthand column does not show anything analytically meaningful (i.e. r ~ r), +//! since the value of performing one iteration is in reducing the error representable by `~`). +//! +//! Instead of Newton-Raphson iterations, Goldschmidt iterations are used to calculate +//! `s = m * r`: +//! +//! - `s = m * r s ~ m / sqrt(m)` +//! - `u = 3 - s * r u ~ 3 - (m / sqrt(m)) * (1 / sqrt(m)) = 3 - m / m = 2` +//! - `r = r * u / 2 r ~ r` +//! - `s = s * u / 2 s ~ s` +//! +//! The above is precise because it uses the original value `m`. There is also a faster version +//! that performs fewer steps but does not use `m`: +//! +//! - `u = 3 - s * r u ~ 3 - 1` +//! - `r = r * u / 2 r ~ r` +//! - `s = s * u / 2 s ~ s` +//! +//! Rounding errors accumulate faster with the second version, so it is only used for subsequent +//! iterations within the same width integer. The first version is always used for the first +//! iteration at a new width in order to avoid this accumulation. +//! +//! Goldschmidt has the advantage over Newton-Raphson that `sqrt(x)` and `1/sqrt(x)` are +//! computed at the same time, i.e. there is no need to calculate `1/sqrt(x)` and invert it. + +use super::super::support::{IntTy, cold_path, raise_invalid}; +use super::super::{CastFrom, CastInto, DInt, Float, HInt, Int, MinInt}; + +pub fn sqrt(x: F) -> F +where + F: Float + SqrtHelper, + F::Int: HInt, + F::Int: From, + F::Int: From, + F::Int: CastInto, + F::Int: CastInto, + u32: CastInto, +{ + let zero = IntTy::::ZERO; + let one = IntTy::::ONE; + + let mut ix = x.to_bits(); + + // Top is the exponent and sign, which may or may not be shifted. If the float fits into a + // `u32`, we can get by without paying shifting costs. + let noshift = F::BITS <= u32::BITS; + let (mut top, special_case) = if noshift { + let exp_lsb = one << F::SIG_BITS; + let special_case = ix.wrapping_sub(exp_lsb) >= F::EXP_MASK - exp_lsb; + (Exp::NoShift(()), special_case) + } else { + let top = u32::cast_from(ix >> F::SIG_BITS); + let special_case = top.wrapping_sub(1) >= F::EXP_MAX - 1; + (Exp::Shifted(top), special_case) + }; + + // Handle NaN, zero, and out of domain (<= 0) + if special_case { + cold_path(); + + // +/-0 + if ix << 1 == zero { + return x; + } + + // Positive infinity + if ix == F::EXP_MASK { + return x; + } + + // NaN or negative + if ix > F::EXP_MASK { + return raise_invalid(x); + } + + // Normalize subnormals by multiplying by 1.0 << SIG_BITS (e.g. 0x1p52 for doubles). + let scaled = x * F::from_parts(false, (F::SIG_BITS + F::EXP_BIAS) as i32, zero); + ix = scaled.to_bits(); + match top { + Exp::Shifted(ref mut v) => { + *v = scaled.exp().unsigned(); + *v = (*v).wrapping_sub(F::SIG_BITS); + } + Exp::NoShift(()) => { + ix = ix.wrapping_sub((F::SIG_BITS << F::SIG_BITS).cast()); + } + } + } + + // Reduce arguments such that `x = 4^e * m`: + // + // - m_u2 ∈ [1, 4), a fixed point U2.BITS number + // - 2^e is the exponent part of the result + let (m_u2, exp) = match top { + Exp::Shifted(top) => { + // We now know `x` is positive, so `top` is just its (biased) exponent + let mut e = top; + // Construct a fixed point representation of the mantissa. + let mut m_u2 = (ix | F::IMPLICIT_BIT) << F::EXP_BITS; + let even = (e & 1) != 0; + if even { + m_u2 >>= 1; + } + e = (e.wrapping_add(F::EXP_MAX >> 1)) >> 1; + (m_u2, Exp::Shifted(e)) + } + Exp::NoShift(()) => { + let even = ix & (one << F::SIG_BITS) != zero; + + // Exponent part of the return value + let mut e_noshift = ix >> 1; + // ey &= (F::EXP_MASK << 2) >> 2; // clear the top exponent bit (result = 1.0) + e_noshift += (F::EXP_MASK ^ (F::SIGN_MASK >> 1)) >> 1; + e_noshift &= F::EXP_MASK; + + let m1 = (ix << F::EXP_BITS) | F::SIGN_MASK; + let m0 = (ix << (F::EXP_BITS - 1)) & !F::SIGN_MASK; + let m_u2 = if even { m0 } else { m1 }; + + (m_u2, Exp::NoShift(e_noshift)) + } + }; + + // Extract the top 6 bits of the significand with the lowest bit of the exponent. + let i = usize::cast_from(ix >> (F::SIG_BITS - 6)) & 0b1111111; + + // Start with an initial guess for `r = 1 / sqrt(m)` from the table, and shift `m` as an + // initial value for `s = sqrt(m)`. See the module documentation for details. + let r1_u0: F::ISet1 = F::ISet1::cast_from(RSQRT_TAB[i]) << (F::ISet1::BITS - 16); + let s1_u2: F::ISet1 = ((m_u2) >> (F::BITS - F::ISet1::BITS)).cast(); + + // Perform iterations, if any, at quarter width (used for `f128`). + let (r1_u0, _s1_u2) = goldschmidt::(r1_u0, s1_u2, F::SET1_ROUNDS, false); + + // Widen values and perform iterations at half width (used for `f64` and `f128`). + let r2_u0: F::ISet2 = F::ISet2::from(r1_u0) << (F::ISet2::BITS - F::ISet1::BITS); + let s2_u2: F::ISet2 = ((m_u2) >> (F::BITS - F::ISet2::BITS)).cast(); + let (r2_u0, _s2_u2) = goldschmidt::(r2_u0, s2_u2, F::SET2_ROUNDS, false); + + // Perform final iterations at full width (used for all float types). + let r_u0: F::Int = F::Int::from(r2_u0) << (F::BITS - F::ISet2::BITS); + let s_u2: F::Int = m_u2; + let (_r_u0, s_u2) = goldschmidt::(r_u0, s_u2, F::FINAL_ROUNDS, true); + + // Shift back to mantissa position. + let mut m = s_u2 >> (F::EXP_BITS - 2); + + // The musl source includes the following comment (with literals replaced): + // + // > s < sqrt(m) < s + 0x1.09p-SIG_BITS + // > compute nearest rounded result: the nearest result to SIG_BITS bits is either s or + // > s+0x1p-SIG_BITS, we can decide by comparing (2^SIG_BITS s + 0.5)^2 to 2^(2*SIG_BITS) m. + // + // Expanding this with , with `SIG_BITS = p` and adjusting based on the operations done to + // `d0` and `d1`: + // + // - `2^(2p)m ≟ ((2^p)m + 0.5)^2` + // - `2^(2p)m ≟ 2^(2p)m^2 + (2^p)m + 0.25` + // - `2^(2p)m - m^2 ≟ (2^(2p) - 1)m^2 + (2^p)m + 0.25` + // - `(1 - 2^(2p))m + m^2 ≟ (1 - 2^(2p))m^2 + (1 - 2^p)m + 0.25` (?) + // + // I do not follow how the rounding bit is extracted from this comparison with the below + // operations. In any case, the algorithm is well tested. + + // The value needed to shift `m_u2` by to create `m*2^(2p)`. `2p = 2 * F::SIG_BITS`, + // `F::BITS - 2` accounts for the offset that `m_u2` already has. + let shift = 2 * F::SIG_BITS - (F::BITS - 2); + + // `2^(2p)m - m^2` + let d0 = (m_u2 << shift).wrapping_sub(m.wrapping_mul(m)); + // `m - 2^(2p)m + m^2` + let d1 = m.wrapping_sub(d0); + m += d1 >> (F::BITS - 1); + m &= F::SIG_MASK; + + match exp { + Exp::Shifted(e) => m |= IntTy::::cast_from(e) << F::SIG_BITS, + Exp::NoShift(e) => m |= e, + }; + + let mut y = F::from_bits(m); + + // FIXME(f16): the fenv math does not work for `f16` + if F::BITS > 16 { + // Handle rounding and inexact. `(m + 1)^2 == 2^shift m` is exact; for all other cases, add + // a tiny value to cause fenv effects. + let d2 = d1.wrapping_add(m).wrapping_add(one); + let mut tiny = if d2 == zero { + cold_path(); + zero + } else { + F::IMPLICIT_BIT + }; + + tiny |= (d1 ^ d2) & F::SIGN_MASK; + let t = F::from_bits(tiny); + y = y + t; + } + + y +} + +/// Multiply at the wider integer size, returning the high half. +fn wmulh(a: I, b: I) -> I { + a.widen_mul(b).hi() +} + +/// Perform `count` goldschmidt iterations, returning `(r_u0, s_u?)`. +/// +/// - `r_u0` is the reciprocal `r ~ 1 / sqrt(m)`, as U0. +/// - `s_u2` is the square root, `s ~ sqrt(m)`, as U2. +/// - `count` is the number of iterations to perform. +/// - `final_set` should be true if this is the last round (same-sized integer). If so, the +/// returned `s` will be U3, for later shifting. Otherwise, the returned `s` is U2. +/// +/// Note that performance relies on the optimizer being able to unroll these loops (reasonably +/// trivial, `count` is a constant when called). +#[inline] +fn goldschmidt(mut r_u0: I, mut s_u2: I, count: u32, final_set: bool) -> (I, I) +where + F: SqrtHelper, + I: HInt + From, +{ + let three_u2 = I::from(0b11u8) << (I::BITS - 2); + let mut u_u0 = r_u0; + + for i in 0..count { + // First iteration: `s = m*r` (`u_u0 = r_u0` set above) + // Subsequent iterations: `s=s*u/2` + s_u2 = wmulh(s_u2, u_u0); + + // Perform `s /= 2` if: + // + // 1. This is not the first iteration (the first iteration is `s = m*r`)... + // 2. ... and this is not the last set of iterations + // 3. ... or, if this is the last set, it is not the last iteration + // + // This step is not performed for the final iteration because the shift is combined with + // a later shift (moving `s` into the mantissa). + if i > 0 && (!final_set || i + 1 < count) { + s_u2 <<= 1; + } + + // u = 3 - s*r + let d_u2 = wmulh(s_u2, r_u0); + u_u0 = three_u2.wrapping_sub(d_u2); + + // r = r*u/2 + r_u0 = wmulh(r_u0, u_u0) << 1; + } + + (r_u0, s_u2) +} + +/// Representation of whether we shift the exponent into a `u32`, or modify it in place to save +/// the shift operations. +enum Exp { + /// The exponent has been shifted to a `u32` and is LSB-aligned. + Shifted(u32), + /// The exponent is in its natural position in integer repr. + NoShift(T), +} + +/// Size-specific constants related to the square root routine. +pub trait SqrtHelper: Float { + /// Integer for the first set of rounds. If unused, set to the same type as the next set. + type ISet1: HInt + Into + CastFrom + From; + /// Integer for the second set of rounds. If unused, set to the same type as the next set. + type ISet2: HInt + From + From; + + /// Number of rounds at `ISet1`. + const SET1_ROUNDS: u32 = 0; + /// Number of rounds at `ISet2`. + const SET2_ROUNDS: u32 = 0; + /// Number of rounds at `Self::Int`. + const FINAL_ROUNDS: u32; +} + +impl SqrtHelper for f32 { + type ISet1 = u32; // unused + type ISet2 = u32; // unused + + const FINAL_ROUNDS: u32 = 3; +} + +impl SqrtHelper for f64 { + type ISet1 = u32; // unused + type ISet2 = u32; + + const SET2_ROUNDS: u32 = 2; + const FINAL_ROUNDS: u32 = 2; +} + +/// A U0.16 representation of `1/sqrt(x)`. +/// +// / The index is a 7-bit number consisting of a single exponent bit and 6 bits of significand. +#[rustfmt::skip] +static RSQRT_TAB: [u16; 128] = [ + 0xb451, 0xb2f0, 0xb196, 0xb044, 0xaef9, 0xadb6, 0xac79, 0xab43, + 0xaa14, 0xa8eb, 0xa7c8, 0xa6aa, 0xa592, 0xa480, 0xa373, 0xa26b, + 0xa168, 0xa06a, 0x9f70, 0x9e7b, 0x9d8a, 0x9c9d, 0x9bb5, 0x9ad1, + 0x99f0, 0x9913, 0x983a, 0x9765, 0x9693, 0x95c4, 0x94f8, 0x9430, + 0x936b, 0x92a9, 0x91ea, 0x912e, 0x9075, 0x8fbe, 0x8f0a, 0x8e59, + 0x8daa, 0x8cfe, 0x8c54, 0x8bac, 0x8b07, 0x8a64, 0x89c4, 0x8925, + 0x8889, 0x87ee, 0x8756, 0x86c0, 0x862b, 0x8599, 0x8508, 0x8479, + 0x83ec, 0x8361, 0x82d8, 0x8250, 0x81c9, 0x8145, 0x80c2, 0x8040, + 0xff02, 0xfd0e, 0xfb25, 0xf947, 0xf773, 0xf5aa, 0xf3ea, 0xf234, + 0xf087, 0xeee3, 0xed47, 0xebb3, 0xea27, 0xe8a3, 0xe727, 0xe5b2, + 0xe443, 0xe2dc, 0xe17a, 0xe020, 0xdecb, 0xdd7d, 0xdc34, 0xdaf1, + 0xd9b3, 0xd87b, 0xd748, 0xd61a, 0xd4f1, 0xd3cd, 0xd2ad, 0xd192, + 0xd07b, 0xcf69, 0xce5b, 0xcd51, 0xcc4a, 0xcb48, 0xca4a, 0xc94f, + 0xc858, 0xc764, 0xc674, 0xc587, 0xc49d, 0xc3b7, 0xc2d4, 0xc1f4, + 0xc116, 0xc03c, 0xbf65, 0xbe90, 0xbdbe, 0xbcef, 0xbc23, 0xbb59, + 0xba91, 0xb9cc, 0xb90a, 0xb84a, 0xb78c, 0xb6d0, 0xb617, 0xb560, +]; + +#[cfg(test)] +mod tests { + use super::*; + + /// Test against edge cases from https://en.cppreference.com/w/cpp/numeric/math/sqrt + fn spec_test() + where + F: Float + SqrtHelper, + F::Int: HInt, + F::Int: From, + F::Int: From, + F::Int: CastInto, + F::Int: CastInto, + u32: CastInto, + { + // Not Asserted: FE_INVALID exception is raised if argument is negative. + assert!(sqrt(F::NEG_ONE).is_nan()); + assert!(sqrt(F::NAN).is_nan()); + for f in [F::ZERO, F::NEG_ZERO, F::INFINITY].iter().copied() { + assert_biteq!(sqrt(f), f); + } + } + + #[test] + fn sanity_check_f32() { + assert_biteq!(sqrt(100.0f32), 10.0); + assert_biteq!(sqrt(4.0f32), 2.0); + } + + #[test] + fn spec_tests_f32() { + spec_test::(); + } + + #[test] + #[allow(clippy::approx_constant)] + fn conformance_tests_f32() { + let cases = [ + (f32::PI, 0x3fe2dfc5_u32), + (10000.0f32, 0x42c80000_u32), + (f32::from_bits(0x0000000f), 0x1b2f456f_u32), + (f32::INFINITY, f32::INFINITY.to_bits()), + ]; + + for (input, output) in cases { + assert_biteq!( + sqrt(input), + f32::from_bits(output), + "input: {input:?} ({:#018x})", + input.to_bits() + ); + } + } + + #[test] + fn sanity_check_f64() { + assert_biteq!(sqrt(100.0f64), 10.0); + assert_biteq!(sqrt(4.0f64), 2.0); + } + + #[test] + fn spec_tests_f64() { + spec_test::(); + } + + #[test] + #[allow(clippy::approx_constant)] + fn conformance_tests_f64() { + let cases = [ + (f64::PI, 0x3ffc5bf891b4ef6a_u64), + (10000.0, 0x4059000000000000_u64), + (f64::from_bits(0x0000000f), 0x1e7efbdeb14f4eda_u64), + (f64::INFINITY, f64::INFINITY.to_bits()), + ]; + + for (input, output) in cases { + assert_biteq!( + sqrt(input), + f64::from_bits(output), + "input: {input:?} ({:#018x})", + input.to_bits() + ); + } + } +} diff --git a/library/compiler-builtins/libm/src/math/sqrt.rs b/library/compiler-builtins/libm/src/math/sqrt.rs index 2fd7070b1101..0e1d0cd2c1c9 100644 --- a/library/compiler-builtins/libm/src/math/sqrt.rs +++ b/library/compiler-builtins/libm/src/math/sqrt.rs @@ -1,83 +1,3 @@ -/* origin: FreeBSD /usr/src/lib/msun/src/e_sqrt.c */ -/* - * ==================================================== - * Copyright (C) 1993 by Sun Microsystems, Inc. All rights reserved. - * - * Developed at SunSoft, a Sun Microsystems, Inc. business. - * Permission to use, copy, modify, and distribute this - * software is freely granted, provided that this notice - * is preserved. - * ==================================================== - */ -/* sqrt(x) - * Return correctly rounded sqrt. - * ------------------------------------------ - * | Use the hardware sqrt if you have one | - * ------------------------------------------ - * Method: - * Bit by bit method using integer arithmetic. (Slow, but portable) - * 1. Normalization - * Scale x to y in [1,4) with even powers of 2: - * find an integer k such that 1 <= (y=x*2^(2k)) < 4, then - * sqrt(x) = 2^k * sqrt(y) - * 2. Bit by bit computation - * Let q = sqrt(y) truncated to i bit after binary point (q = 1), - * i 0 - * i+1 2 - * s = 2*q , and y = 2 * ( y - q ). (1) - * i i i i - * - * To compute q from q , one checks whether - * i+1 i - * - * -(i+1) 2 - * (q + 2 ) <= y. (2) - * i - * -(i+1) - * If (2) is false, then q = q ; otherwise q = q + 2 . - * i+1 i i+1 i - * - * With some algebraic manipulation, it is not difficult to see - * that (2) is equivalent to - * -(i+1) - * s + 2 <= y (3) - * i i - * - * The advantage of (3) is that s and y can be computed by - * i i - * the following recurrence formula: - * if (3) is false - * - * s = s , y = y ; (4) - * i+1 i i+1 i - * - * otherwise, - * -i -(i+1) - * s = s + 2 , y = y - s - 2 (5) - * i+1 i i+1 i i - * - * One may easily use induction to prove (4) and (5). - * Note. Since the left hand side of (3) contain only i+2 bits, - * it does not necessary to do a full (53-bit) comparison - * in (3). - * 3. Final rounding - * After generating the 53 bits result, we compute one more bit. - * Together with the remainder, we can decide whether the - * result is exact, bigger than 1/2ulp, or less than 1/2ulp - * (it will never equal to 1/2ulp). - * The rounding mode can be detected by checking whether - * huge + tiny is equal to huge, and whether huge - tiny is - * equal to huge for some floating point number "huge" and "tiny". - * - * Special cases: - * sqrt(+-0) = +-0 ... exact - * sqrt(inf) = inf - * sqrt(-ve) = NaN ... with invalid signal - * sqrt(NaN) = NaN ... with invalid signal for signaling NaN - */ - -use core::f64; - /// The square root of `x` (f64). #[cfg_attr(all(test, assert_no_panic), no_panic::no_panic)] pub fn sqrt(x: f64) -> f64 { @@ -90,175 +10,5 @@ pub fn sqrt(x: f64) -> f64 { args: x, } - use core::num::Wrapping; - - const TINY: f64 = 1.0e-300; - - let mut z: f64; - let sign: Wrapping = Wrapping(0x80000000); - let mut ix0: i32; - let mut s0: i32; - let mut q: i32; - let mut m: i32; - let mut t: i32; - let mut i: i32; - let mut r: Wrapping; - let mut t1: Wrapping; - let mut s1: Wrapping; - let mut ix1: Wrapping; - let mut q1: Wrapping; - - ix0 = (x.to_bits() >> 32) as i32; - ix1 = Wrapping(x.to_bits() as u32); - - /* take care of Inf and NaN */ - if (ix0 & 0x7ff00000) == 0x7ff00000 { - return x * x + x; /* sqrt(NaN)=NaN, sqrt(+inf)=+inf, sqrt(-inf)=sNaN */ - } - /* take care of zero */ - if ix0 <= 0 { - if ((ix0 & !(sign.0 as i32)) | ix1.0 as i32) == 0 { - return x; /* sqrt(+-0) = +-0 */ - } - if ix0 < 0 { - return (x - x) / (x - x); /* sqrt(-ve) = sNaN */ - } - } - /* normalize x */ - m = ix0 >> 20; - if m == 0 { - /* subnormal x */ - while ix0 == 0 { - m -= 21; - ix0 |= (ix1 >> 11).0 as i32; - ix1 <<= 21; - } - i = 0; - while (ix0 & 0x00100000) == 0 { - i += 1; - ix0 <<= 1; - } - m -= i - 1; - ix0 |= (ix1 >> (32 - i) as usize).0 as i32; - ix1 = ix1 << i as usize; - } - m -= 1023; /* unbias exponent */ - ix0 = (ix0 & 0x000fffff) | 0x00100000; - if (m & 1) == 1 { - /* odd m, double x to make it even */ - ix0 *= 2; - ix0 += ((ix1 & sign) >> 31).0 as i32; - ix1 += ix1; - } - m >>= 1; /* m = [m/2] */ - - /* generate sqrt(x) bit by bit */ - ix0 *= 2; - ix0 += ((ix1 & sign) >> 31).0 as i32; - ix1 += ix1; - q = 0; /* [q,q1] = sqrt(x) */ - q1 = Wrapping(0); - s0 = 0; - s1 = Wrapping(0); - r = Wrapping(0x00200000); /* r = moving bit from right to left */ - - while r != Wrapping(0) { - t = s0 + r.0 as i32; - if t <= ix0 { - s0 = t + r.0 as i32; - ix0 -= t; - q += r.0 as i32; - } - ix0 *= 2; - ix0 += ((ix1 & sign) >> 31).0 as i32; - ix1 += ix1; - r >>= 1; - } - - r = sign; - while r != Wrapping(0) { - t1 = s1 + r; - t = s0; - if t < ix0 || (t == ix0 && t1 <= ix1) { - s1 = t1 + r; - if (t1 & sign) == sign && (s1 & sign) == Wrapping(0) { - s0 += 1; - } - ix0 -= t; - if ix1 < t1 { - ix0 -= 1; - } - ix1 -= t1; - q1 += r; - } - ix0 *= 2; - ix0 += ((ix1 & sign) >> 31).0 as i32; - ix1 += ix1; - r >>= 1; - } - - /* use floating add to find out rounding direction */ - if (ix0 as u32 | ix1.0) != 0 { - z = 1.0 - TINY; /* raise inexact flag */ - if z >= 1.0 { - z = 1.0 + TINY; - if q1.0 == 0xffffffff { - q1 = Wrapping(0); - q += 1; - } else if z > 1.0 { - if q1.0 == 0xfffffffe { - q += 1; - } - q1 += Wrapping(2); - } else { - q1 += q1 & Wrapping(1); - } - } - } - ix0 = (q >> 1) + 0x3fe00000; - ix1 = q1 >> 1; - if (q & 1) == 1 { - ix1 |= sign; - } - ix0 += m << 20; - f64::from_bits(((ix0 as u64) << 32) | ix1.0 as u64) -} - -#[cfg(test)] -mod tests { - use super::*; - - #[test] - fn sanity_check() { - assert_eq!(sqrt(100.0), 10.0); - assert_eq!(sqrt(4.0), 2.0); - } - - /// The spec: https://en.cppreference.com/w/cpp/numeric/math/sqrt - #[test] - fn spec_tests() { - // Not Asserted: FE_INVALID exception is raised if argument is negative. - assert!(sqrt(-1.0).is_nan()); - assert!(sqrt(f64::NAN).is_nan()); - for f in [0.0, -0.0, f64::INFINITY].iter().copied() { - assert_eq!(sqrt(f), f); - } - } - - #[test] - #[allow(clippy::approx_constant)] - fn conformance_tests() { - let values = [3.14159265359, 10000.0, f64::from_bits(0x0000000f), f64::INFINITY]; - let results = [ - 4610661241675116657u64, - 4636737291354636288u64, - 2197470602079456986u64, - 9218868437227405312u64, - ]; - - for i in 0..values.len() { - let bits = f64::to_bits(sqrt(values[i])); - assert_eq!(results[i], bits); - } - } + super::generic::sqrt(x) } diff --git a/library/compiler-builtins/libm/src/math/sqrtf.rs b/library/compiler-builtins/libm/src/math/sqrtf.rs index 319335163ae8..2e69a4b66942 100644 --- a/library/compiler-builtins/libm/src/math/sqrtf.rs +++ b/library/compiler-builtins/libm/src/math/sqrtf.rs @@ -1,18 +1,3 @@ -/* origin: FreeBSD /usr/src/lib/msun/src/e_sqrtf.c */ -/* - * Conversion to float by Ian Lance Taylor, Cygnus Support, ian@cygnus.com. - */ -/* - * ==================================================== - * Copyright (C) 1993 by Sun Microsystems, Inc. All rights reserved. - * - * Developed at SunPro, a Sun Microsystems, Inc. business. - * Permission to use, copy, modify, and distribute this - * software is freely granted, provided that this notice - * is preserved. - * ==================================================== - */ - /// The square root of `x` (f32). #[cfg_attr(all(test, assert_no_panic), no_panic::no_panic)] pub fn sqrtf(x: f32) -> f32 { @@ -25,121 +10,5 @@ pub fn sqrtf(x: f32) -> f32 { args: x, } - const TINY: f32 = 1.0e-30; - - let mut z: f32; - let sign: i32 = 0x80000000u32 as i32; - let mut ix: i32; - let mut s: i32; - let mut q: i32; - let mut m: i32; - let mut t: i32; - let mut i: i32; - let mut r: u32; - - ix = x.to_bits() as i32; - - /* take care of Inf and NaN */ - if (ix as u32 & 0x7f800000) == 0x7f800000 { - return x * x + x; /* sqrt(NaN)=NaN, sqrt(+inf)=+inf, sqrt(-inf)=sNaN */ - } - - /* take care of zero */ - if ix <= 0 { - if (ix & !sign) == 0 { - return x; /* sqrt(+-0) = +-0 */ - } - if ix < 0 { - return (x - x) / (x - x); /* sqrt(-ve) = sNaN */ - } - } - - /* normalize x */ - m = ix >> 23; - if m == 0 { - /* subnormal x */ - i = 0; - while ix & 0x00800000 == 0 { - ix <<= 1; - i = i + 1; - } - m -= i - 1; - } - m -= 127; /* unbias exponent */ - ix = (ix & 0x007fffff) | 0x00800000; - if m & 1 == 1 { - /* odd m, double x to make it even */ - ix += ix; - } - m >>= 1; /* m = [m/2] */ - - /* generate sqrt(x) bit by bit */ - ix += ix; - q = 0; - s = 0; - r = 0x01000000; /* r = moving bit from right to left */ - - while r != 0 { - t = s + r as i32; - if t <= ix { - s = t + r as i32; - ix -= t; - q += r as i32; - } - ix += ix; - r >>= 1; - } - - /* use floating add to find out rounding direction */ - if ix != 0 { - z = 1.0 - TINY; /* raise inexact flag */ - if z >= 1.0 { - z = 1.0 + TINY; - if z > 1.0 { - q += 2; - } else { - q += q & 1; - } - } - } - - ix = (q >> 1) + 0x3f000000; - ix += m << 23; - f32::from_bits(ix as u32) -} - -// PowerPC tests are failing on LLVM 13: https://github.com/rust-lang/rust/issues/88520 -#[cfg(not(target_arch = "powerpc64"))] -#[cfg(test)] -mod tests { - use super::*; - - #[test] - fn sanity_check() { - assert_eq!(sqrtf(100.0), 10.0); - assert_eq!(sqrtf(4.0), 2.0); - } - - /// The spec: https://en.cppreference.com/w/cpp/numeric/math/sqrt - #[test] - fn spec_tests() { - // Not Asserted: FE_INVALID exception is raised if argument is negative. - assert!(sqrtf(-1.0).is_nan()); - assert!(sqrtf(f32::NAN).is_nan()); - for f in [0.0, -0.0, f32::INFINITY].iter().copied() { - assert_eq!(sqrtf(f), f); - } - } - - #[test] - #[allow(clippy::approx_constant)] - fn conformance_tests() { - let values = [3.14159265359f32, 10000.0f32, f32::from_bits(0x0000000f), f32::INFINITY]; - let results = [1071833029u32, 1120403456u32, 456082799u32, 2139095040u32]; - - for i in 0..values.len() { - let bits = f32::to_bits(sqrtf(values[i])); - assert_eq!(results[i], bits); - } - } + super::generic::sqrt(x) } diff --git a/library/compiler-builtins/libm/src/math/support/int_traits.rs b/library/compiler-builtins/libm/src/math/support/int_traits.rs index db799c030c28..cf19762e8169 100644 --- a/library/compiler-builtins/libm/src/math/support/int_traits.rs +++ b/library/compiler-builtins/libm/src/math/support/int_traits.rs @@ -55,10 +55,12 @@ pub trait Int: + ops::BitAnd + cmp::Ord + CastFrom + + CastFrom + CastFrom + CastFrom + CastFrom + CastInto + + CastInto + CastInto + CastInto + CastInto diff --git a/library/compiler-builtins/libm/src/math/support/macros.rs b/library/compiler-builtins/libm/src/math/support/macros.rs index 076fdf1f775e..c9a36c0db1a7 100644 --- a/library/compiler-builtins/libm/src/math/support/macros.rs +++ b/library/compiler-builtins/libm/src/math/support/macros.rs @@ -110,19 +110,21 @@ macro_rules! hf64 { /// Assert `F::biteq` with better messages. #[cfg(test)] macro_rules! assert_biteq { - ($left:expr, $right:expr, $($arg:tt)*) => {{ - let bits = ($left.to_bits() * 0).leading_zeros(); // hack to get the width from the value + ($left:expr, $right:expr, $($tt:tt)*) => {{ + let l = $left; + let r = $right; + let bits = (l.to_bits() - l.to_bits()).leading_zeros(); // hack to get the width from the value assert!( - $left.biteq($right), - "\nl: {l:?} ({lb:#0width$x})\nr: {r:?} ({rb:#0width$x})", - l = $left, - lb = $left.to_bits(), - r = $right, - rb = $right.to_bits(), - width = ((bits / 4) + 2) as usize + l.biteq(r), + "{}\nl: {l:?} ({lb:#0width$x})\nr: {r:?} ({rb:#0width$x})", + format_args!($($tt)*), + lb = l.to_bits(), + rb = r.to_bits(), + width = ((bits / 4) + 2) as usize, + ); }}; ($left:expr, $right:expr $(,)?) => { - assert_biteq!($left, $right,) + assert_biteq!($left, $right, "") }; } diff --git a/library/compiler-builtins/libm/src/math/support/mod.rs b/library/compiler-builtins/libm/src/math/support/mod.rs index e2f4e0e981d6..b4a57a34ed42 100644 --- a/library/compiler-builtins/libm/src/math/support/mod.rs +++ b/library/compiler-builtins/libm/src/math/support/mod.rs @@ -10,3 +10,14 @@ pub(crate) use float_traits::{f32_from_bits, f64_from_bits}; #[allow(unused_imports)] pub use hex_float::{hf32, hf64}; pub use int_traits::{CastFrom, CastInto, DInt, HInt, Int, MinInt}; + +/// Hint to the compiler that the current path is cold. +pub fn cold_path() { + #[cfg(intrinsics_enabled)] + core::intrinsics::cold_path(); +} + +/// Return `x`, first raising `FE_INVALID`. +pub fn raise_invalid(x: F) -> F { + (x - x) / (x - x) +} From 03041a0371565f1a32583d770e15f4ffb60232b5 Mon Sep 17 00:00:00 2001 From: Trevor Gross Date: Wed, 15 Jan 2025 11:49:28 +0000 Subject: [PATCH 4/5] Copy the u256 implementation from compiler_builtins --- .../libm/src/math/support/big.rs | 302 ++++++++++++++++++ .../libm/src/math/support/big/tests.rs | 110 +++++++ .../libm/src/math/support/mod.rs | 1 + 3 files changed, 413 insertions(+) create mode 100644 library/compiler-builtins/libm/src/math/support/big.rs create mode 100644 library/compiler-builtins/libm/src/math/support/big/tests.rs diff --git a/library/compiler-builtins/libm/src/math/support/big.rs b/library/compiler-builtins/libm/src/math/support/big.rs new file mode 100644 index 000000000000..e0f5e52634ef --- /dev/null +++ b/library/compiler-builtins/libm/src/math/support/big.rs @@ -0,0 +1,302 @@ +//! Integers used for wide operations, larger than `u128`. + +#![allow(unused)] + +#[cfg(test)] +mod tests; + +use core::{fmt, ops}; + +use super::{DInt, HInt, Int, MinInt}; + +const WORD_LO_MASK: u64 = 0x00000000ffffffff; +const WORD_HI_MASK: u64 = 0xffffffff00000000; +const WORD_FULL_MASK: u64 = 0xffffffffffffffff; +const U128_LO_MASK: u128 = u64::MAX as u128; +const U128_HI_MASK: u128 = (u64::MAX as u128) << 64; + +/// A 256-bit unsigned integer represented as 4 64-bit limbs. +/// +/// Each limb is a native-endian number, but the array is little-limb-endian. +#[allow(non_camel_case_types)] +#[derive(Clone, Copy, Debug, PartialEq, PartialOrd)] +pub struct u256(pub [u64; 4]); + +impl u256 { + pub const MAX: Self = Self([u64::MAX, u64::MAX, u64::MAX, u64::MAX]); + + /// Reinterpret as a signed integer + pub fn signed(self) -> i256 { + i256(self.0) + } +} + +/// A 256-bit signed integer represented as 4 64-bit limbs. +/// +/// Each limb is a native-endian number, but the array is little-limb-endian. +#[allow(non_camel_case_types)] +#[derive(Clone, Copy, Debug, PartialEq, PartialOrd)] +pub struct i256(pub [u64; 4]); + +impl i256 { + /// Reinterpret as an unsigned integer + pub fn unsigned(self) -> u256 { + u256(self.0) + } +} + +impl MinInt for u256 { + type OtherSign = i256; + + type Unsigned = u256; + + const SIGNED: bool = false; + const BITS: u32 = 256; + const ZERO: Self = Self([0u64; 4]); + const ONE: Self = Self([1, 0, 0, 0]); + const MIN: Self = Self([0u64; 4]); + const MAX: Self = Self([u64::MAX; 4]); +} + +impl MinInt for i256 { + type OtherSign = u256; + + type Unsigned = u256; + + const SIGNED: bool = false; + const BITS: u32 = 256; + const ZERO: Self = Self([0u64; 4]); + const ONE: Self = Self([1, 0, 0, 0]); + const MIN: Self = Self([0, 0, 0, 1 << 63]); + const MAX: Self = Self([u64::MAX, u64::MAX, u64::MAX, u64::MAX << 1]); +} + +macro_rules! impl_common { + ($ty:ty) => { + impl ops::BitOr for $ty { + type Output = Self; + + fn bitor(mut self, rhs: Self) -> Self::Output { + self.0[0] |= rhs.0[0]; + self.0[1] |= rhs.0[1]; + self.0[2] |= rhs.0[2]; + self.0[3] |= rhs.0[3]; + self + } + } + + impl ops::Not for $ty { + type Output = Self; + + fn not(self) -> Self::Output { + Self([!self.0[0], !self.0[1], !self.0[2], !self.0[3]]) + } + } + + impl ops::Shl for $ty { + type Output = Self; + + fn shl(self, rhs: u32) -> Self::Output { + unimplemented!("only used to meet trait bounds") + } + } + }; +} + +impl_common!(i256); +impl_common!(u256); + +impl ops::Shr for u256 { + type Output = Self; + + fn shr(self, rhs: u32) -> Self::Output { + assert!(rhs < Self::BITS, "attempted to shift right with overflow"); + + if rhs == 0 { + return self; + } + + let mut ret = self; + let byte_shift = rhs / 64; + let bit_shift = rhs % 64; + + for idx in 0..4 { + let base_idx = idx + byte_shift as usize; + + // FIXME(msrv): could be let...else. + let base = match ret.0.get(base_idx) { + Some(v) => v, + None => { + ret.0[idx] = 0; + continue; + } + }; + + let mut new_val = base >> bit_shift; + + if let Some(new) = ret.0.get(base_idx + 1) { + new_val |= new.overflowing_shl(64 - bit_shift).0; + } + + ret.0[idx] = new_val; + } + + ret + } +} + +macro_rules! word { + (1, $val:expr) => { + (($val >> (32 * 3)) & Self::from(WORD_LO_MASK)) as u64 + }; + (2, $val:expr) => { + (($val >> (32 * 2)) & Self::from(WORD_LO_MASK)) as u64 + }; + (3, $val:expr) => { + (($val >> (32 * 1)) & Self::from(WORD_LO_MASK)) as u64 + }; + (4, $val:expr) => { + (($val >> (32 * 0)) & Self::from(WORD_LO_MASK)) as u64 + }; +} + +impl HInt for u128 { + type D = u256; + + fn widen(self) -> Self::D { + let w0 = self & u128::from(u64::MAX); + let w1 = (self >> u64::BITS) & u128::from(u64::MAX); + u256([w0 as u64, w1 as u64, 0, 0]) + } + + fn zero_widen(self) -> Self::D { + self.widen() + } + + fn zero_widen_mul(self, rhs: Self) -> Self::D { + let product11: u64 = word!(1, self) * word!(1, rhs); + let product12: u64 = word!(1, self) * word!(2, rhs); + let product13: u64 = word!(1, self) * word!(3, rhs); + let product14: u64 = word!(1, self) * word!(4, rhs); + let product21: u64 = word!(2, self) * word!(1, rhs); + let product22: u64 = word!(2, self) * word!(2, rhs); + let product23: u64 = word!(2, self) * word!(3, rhs); + let product24: u64 = word!(2, self) * word!(4, rhs); + let product31: u64 = word!(3, self) * word!(1, rhs); + let product32: u64 = word!(3, self) * word!(2, rhs); + let product33: u64 = word!(3, self) * word!(3, rhs); + let product34: u64 = word!(3, self) * word!(4, rhs); + let product41: u64 = word!(4, self) * word!(1, rhs); + let product42: u64 = word!(4, self) * word!(2, rhs); + let product43: u64 = word!(4, self) * word!(3, rhs); + let product44: u64 = word!(4, self) * word!(4, rhs); + + let sum0: u128 = u128::from(product44); + let sum1: u128 = u128::from(product34) + u128::from(product43); + let sum2: u128 = u128::from(product24) + u128::from(product33) + u128::from(product42); + let sum3: u128 = u128::from(product14) + + u128::from(product23) + + u128::from(product32) + + u128::from(product41); + let sum4: u128 = u128::from(product13) + u128::from(product22) + u128::from(product31); + let sum5: u128 = u128::from(product12) + u128::from(product21); + let sum6: u128 = u128::from(product11); + + let r0: u128 = + (sum0 & u128::from(WORD_FULL_MASK)) + ((sum1 & u128::from(WORD_LO_MASK)) << 32); + let r1: u128 = (sum0 >> 64) + + ((sum1 >> 32) & u128::from(WORD_FULL_MASK)) + + (sum2 & u128::from(WORD_FULL_MASK)) + + ((sum3 << 32) & u128::from(WORD_HI_MASK)); + + let (lo, carry) = r0.overflowing_add(r1 << 64); + let hi = (r1 >> 64) + + (sum1 >> 96) + + (sum2 >> 64) + + (sum3 >> 32) + + sum4 + + (sum5 << 32) + + (sum6 << 64) + + u128::from(carry); + + u256([ + (lo & U128_LO_MASK) as u64, + ((lo >> 64) & U128_LO_MASK) as u64, + (hi & U128_LO_MASK) as u64, + ((hi >> 64) & U128_LO_MASK) as u64, + ]) + } + + fn widen_mul(self, rhs: Self) -> Self::D { + self.zero_widen_mul(rhs) + } + + fn widen_hi(self) -> Self::D { + self.widen() << ::BITS + } +} + +impl HInt for i128 { + type D = i256; + + fn widen(self) -> Self::D { + let mut ret = self.unsigned().zero_widen().signed(); + if self.is_negative() { + ret.0[2] = u64::MAX; + ret.0[3] = u64::MAX; + } + ret + } + + fn zero_widen(self) -> Self::D { + self.unsigned().zero_widen().signed() + } + + fn zero_widen_mul(self, rhs: Self) -> Self::D { + self.unsigned().zero_widen_mul(rhs.unsigned()).signed() + } + + fn widen_mul(self, rhs: Self) -> Self::D { + unimplemented!("signed i128 widening multiply is not used") + } + + fn widen_hi(self) -> Self::D { + self.widen() << ::BITS + } +} + +impl DInt for u256 { + type H = u128; + + fn lo(self) -> Self::H { + let mut tmp = [0u8; 16]; + tmp[..8].copy_from_slice(&self.0[0].to_le_bytes()); + tmp[8..].copy_from_slice(&self.0[1].to_le_bytes()); + u128::from_le_bytes(tmp) + } + + fn hi(self) -> Self::H { + let mut tmp = [0u8; 16]; + tmp[..8].copy_from_slice(&self.0[2].to_le_bytes()); + tmp[8..].copy_from_slice(&self.0[3].to_le_bytes()); + u128::from_le_bytes(tmp) + } +} + +impl DInt for i256 { + type H = i128; + + fn lo(self) -> Self::H { + let mut tmp = [0u8; 16]; + tmp[..8].copy_from_slice(&self.0[0].to_le_bytes()); + tmp[8..].copy_from_slice(&self.0[1].to_le_bytes()); + i128::from_le_bytes(tmp) + } + + fn hi(self) -> Self::H { + let mut tmp = [0u8; 16]; + tmp[..8].copy_from_slice(&self.0[2].to_le_bytes()); + tmp[8..].copy_from_slice(&self.0[3].to_le_bytes()); + i128::from_le_bytes(tmp) + } +} diff --git a/library/compiler-builtins/libm/src/math/support/big/tests.rs b/library/compiler-builtins/libm/src/math/support/big/tests.rs new file mode 100644 index 000000000000..f95f82973161 --- /dev/null +++ b/library/compiler-builtins/libm/src/math/support/big/tests.rs @@ -0,0 +1,110 @@ +extern crate std; +use std::string::String; +use std::vec::Vec; +use std::{eprintln, format}; + +use super::{HInt, MinInt, i256, u256}; + +const LOHI_SPLIT: u128 = 0xaaaaaaaaaaaaaaaaffffffffffffffff; + +/// Print a `u256` as hex since we can't add format implementations +fn hexu(v: u256) -> String { + format!("0x{:016x}{:016x}{:016x}{:016x}", v.0[3], v.0[2], v.0[1], v.0[0]) +} + +#[test] +fn widen_u128() { + assert_eq!(u128::MAX.widen(), u256([u64::MAX, u64::MAX, 0, 0])); + assert_eq!(LOHI_SPLIT.widen(), u256([u64::MAX, 0xaaaaaaaaaaaaaaaa, 0, 0])); +} + +#[test] +fn widen_i128() { + assert_eq!((-1i128).widen(), u256::MAX.signed()); + assert_eq!( + (LOHI_SPLIT as i128).widen(), + i256([u64::MAX, 0xaaaaaaaaaaaaaaaa, u64::MAX, u64::MAX]) + ); + assert_eq!((-1i128).zero_widen().unsigned(), (u128::MAX).widen()); +} + +#[test] +fn widen_mul_u128() { + let tests = [ + (u128::MAX / 2, 2_u128, u256([u64::MAX - 1, u64::MAX, 0, 0])), + (u128::MAX, 2_u128, u256([u64::MAX - 1, u64::MAX, 1, 0])), + (u128::MAX, u128::MAX, u256([1, 0, u64::MAX - 1, u64::MAX])), + (u128::MIN, u128::MIN, u256::ZERO), + (1234, 0, u256::ZERO), + (0, 1234, u256::ZERO), + ]; + + let mut errors = Vec::new(); + for (i, (a, b, exp)) in tests.iter().copied().enumerate() { + let res = a.widen_mul(b); + let res_z = a.zero_widen_mul(b); + assert_eq!(res, res_z); + if res != exp { + errors.push((i, a, b, exp, res)); + } + } + + for (i, a, b, exp, res) in &errors { + eprintln!("FAILURE ({i}): {a:#034x} * {b:#034x} = {} got {}", hexu(*exp), hexu(*res)); + } + assert!(errors.is_empty()); +} + +#[test] +fn not_u128() { + assert_eq!(!u256::ZERO, u256::MAX); +} + +#[test] +fn shr_u128() { + let only_low = [1, u16::MAX.into(), u32::MAX.into(), u64::MAX.into(), u128::MAX]; + + let mut errors = Vec::new(); + + for a in only_low { + for perturb in 0..10 { + let a = a.saturating_add(perturb); + for shift in 0..128 { + let res = a.widen() >> shift; + let expected = (a >> shift).widen(); + if res != expected { + errors.push((a.widen(), shift, res, expected)); + } + } + } + } + + let check = [ + (u256::MAX, 1, u256([u64::MAX, u64::MAX, u64::MAX, u64::MAX >> 1])), + (u256::MAX, 5, u256([u64::MAX, u64::MAX, u64::MAX, u64::MAX >> 5])), + (u256::MAX, 63, u256([u64::MAX, u64::MAX, u64::MAX, 1])), + (u256::MAX, 64, u256([u64::MAX, u64::MAX, u64::MAX, 0])), + (u256::MAX, 65, u256([u64::MAX, u64::MAX, u64::MAX >> 1, 0])), + (u256::MAX, 127, u256([u64::MAX, u64::MAX, 1, 0])), + (u256::MAX, 128, u256([u64::MAX, u64::MAX, 0, 0])), + (u256::MAX, 129, u256([u64::MAX, u64::MAX >> 1, 0, 0])), + (u256::MAX, 191, u256([u64::MAX, 1, 0, 0])), + (u256::MAX, 192, u256([u64::MAX, 0, 0, 0])), + (u256::MAX, 193, u256([u64::MAX >> 1, 0, 0, 0])), + (u256::MAX, 191, u256([u64::MAX, 1, 0, 0])), + (u256::MAX, 254, u256([0b11, 0, 0, 0])), + (u256::MAX, 255, u256([1, 0, 0, 0])), + ]; + + for (input, shift, expected) in check { + let res = input >> shift; + if res != expected { + errors.push((input, shift, res, expected)); + } + } + + for (a, b, res, expected) in &errors { + eprintln!("FAILURE: {} >> {b} = {} got {}", hexu(*a), hexu(*expected), hexu(*res),); + } + assert!(errors.is_empty()); +} diff --git a/library/compiler-builtins/libm/src/math/support/mod.rs b/library/compiler-builtins/libm/src/math/support/mod.rs index b4a57a34ed42..ddfc2e3e0717 100644 --- a/library/compiler-builtins/libm/src/math/support/mod.rs +++ b/library/compiler-builtins/libm/src/math/support/mod.rs @@ -1,5 +1,6 @@ #[macro_use] pub mod macros; +mod big; mod float_traits; mod hex_float; mod int_traits; From 186eac9227e83b9ba0d771ac9326632d42e25086 Mon Sep 17 00:00:00 2001 From: Trevor Gross Date: Tue, 21 Jan 2025 23:59:07 +0000 Subject: [PATCH 5/5] Add `sqrtf16` and `sqrtf128` Use the generic algorithms to provide implementations for these routines. --- .../libm/crates/libm-macros/src/shared.rs | 4 +- .../libm/crates/libm-test/benches/icount.rs | 2 + .../libm/crates/libm-test/benches/random.rs | 2 + .../libm-test/tests/compare_built_musl.rs | 2 + .../libm/crates/util/src/main.rs | 2 + .../libm/etc/function-definitions.json | 14 +++ .../libm/etc/function-list.txt | 2 + .../libm/src/math/generic/sqrt.rs | 92 +++++++++++++++++++ .../compiler-builtins/libm/src/math/mod.rs | 4 + .../libm/src/math/sqrtf128.rs | 5 + .../libm/src/math/sqrtf16.rs | 5 + 11 files changed, 132 insertions(+), 2 deletions(-) create mode 100644 library/compiler-builtins/libm/src/math/sqrtf128.rs create mode 100644 library/compiler-builtins/libm/src/math/sqrtf16.rs diff --git a/library/compiler-builtins/libm/crates/libm-macros/src/shared.rs b/library/compiler-builtins/libm/crates/libm-macros/src/shared.rs index 60838196236f..d17bc6ffccb7 100644 --- a/library/compiler-builtins/libm/crates/libm-macros/src/shared.rs +++ b/library/compiler-builtins/libm/crates/libm-macros/src/shared.rs @@ -9,7 +9,7 @@ const ALL_OPERATIONS_NESTED: &[(FloatTy, Signature, Option, &[&str])] FloatTy::F16, Signature { args: &[Ty::F16], returns: &[Ty::F16] }, None, - &["fabsf16", "truncf16"], + &["fabsf16", "sqrtf16", "truncf16"], ), ( // `fn(f32) -> f32` @@ -40,7 +40,7 @@ const ALL_OPERATIONS_NESTED: &[(FloatTy, Signature, Option, &[&str])] FloatTy::F128, Signature { args: &[Ty::F128], returns: &[Ty::F128] }, None, - &["fabsf128", "truncf128"], + &["fabsf128", "sqrtf128", "truncf128"], ), ( // `(f16, f16) -> f16` diff --git a/library/compiler-builtins/libm/crates/libm-test/benches/icount.rs b/library/compiler-builtins/libm/crates/libm-test/benches/icount.rs index 3a66249e85dc..c8451f88cd38 100644 --- a/library/compiler-builtins/libm/crates/libm-test/benches/icount.rs +++ b/library/compiler-builtins/libm/crates/libm-test/benches/icount.rs @@ -155,6 +155,8 @@ main!( icount_bench_sinh_group, icount_bench_sinhf_group, icount_bench_sqrt_group, + icount_bench_sqrtf128_group, + icount_bench_sqrtf16_group, icount_bench_sqrtf_group, icount_bench_tan_group, icount_bench_tanf_group, diff --git a/library/compiler-builtins/libm/crates/libm-test/benches/random.rs b/library/compiler-builtins/libm/crates/libm-test/benches/random.rs index 888161265100..026841202532 100644 --- a/library/compiler-builtins/libm/crates/libm-test/benches/random.rs +++ b/library/compiler-builtins/libm/crates/libm-test/benches/random.rs @@ -123,6 +123,8 @@ libm_macros::for_each_function! { | fabsf16 | fdimf128 | fdimf16 + | sqrtf16 + | sqrtf128 | truncf128 | truncf16 => (false, None), diff --git a/library/compiler-builtins/libm/crates/libm-test/tests/compare_built_musl.rs b/library/compiler-builtins/libm/crates/libm-test/tests/compare_built_musl.rs index f540a0b15517..24703f273168 100644 --- a/library/compiler-builtins/libm/crates/libm-test/tests/compare_built_musl.rs +++ b/library/compiler-builtins/libm/crates/libm-test/tests/compare_built_musl.rs @@ -87,5 +87,7 @@ libm_macros::for_each_function! { fdimf16, truncf128, truncf16, + sqrtf16, + sqrtf128, ], } diff --git a/library/compiler-builtins/libm/crates/util/src/main.rs b/library/compiler-builtins/libm/crates/util/src/main.rs index b979c60ad3c0..cd68d9afd1a2 100644 --- a/library/compiler-builtins/libm/crates/util/src/main.rs +++ b/library/compiler-builtins/libm/crates/util/src/main.rs @@ -90,6 +90,8 @@ fn do_eval(basis: &str, op: &str, inputs: &[&str]) { | fabsf16 | fdimf128 | fdimf16 + | sqrtf128 + | sqrtf16 | truncf128 | truncf16 => None, _ => Some(musl_math_sys::MACRO_FN_NAME) diff --git a/library/compiler-builtins/libm/etc/function-definitions.json b/library/compiler-builtins/libm/etc/function-definitions.json index 9f7c8ab25a03..2d0af3bcff02 100644 --- a/library/compiler-builtins/libm/etc/function-definitions.json +++ b/library/compiler-builtins/libm/etc/function-definitions.json @@ -718,6 +718,20 @@ ], "type": "f32" }, + "sqrtf128": { + "sources": [ + "src/math/generic/sqrt.rs", + "src/math/sqrtf128.rs" + ], + "type": "f128" + }, + "sqrtf16": { + "sources": [ + "src/math/generic/sqrt.rs", + "src/math/sqrtf16.rs" + ], + "type": "f16" + }, "tan": { "sources": [ "src/libm_helper.rs", diff --git a/library/compiler-builtins/libm/etc/function-list.txt b/library/compiler-builtins/libm/etc/function-list.txt index 7f96a436227e..47c34ab90521 100644 --- a/library/compiler-builtins/libm/etc/function-list.txt +++ b/library/compiler-builtins/libm/etc/function-list.txt @@ -105,6 +105,8 @@ sinh sinhf sqrt sqrtf +sqrtf128 +sqrtf16 tan tanf tanh diff --git a/library/compiler-builtins/libm/src/math/generic/sqrt.rs b/library/compiler-builtins/libm/src/math/generic/sqrt.rs index a2e054f3cf2b..c892f9997241 100644 --- a/library/compiler-builtins/libm/src/math/generic/sqrt.rs +++ b/library/compiler-builtins/libm/src/math/generic/sqrt.rs @@ -294,6 +294,14 @@ pub trait SqrtHelper: Float { const FINAL_ROUNDS: u32; } +#[cfg(f16_enabled)] +impl SqrtHelper for f16 { + type ISet1 = u16; // unused + type ISet2 = u16; // unused + + const FINAL_ROUNDS: u32 = 2; +} + impl SqrtHelper for f32 { type ISet1 = u32; // unused type ISet2 = u32; // unused @@ -309,6 +317,16 @@ impl SqrtHelper for f64 { const FINAL_ROUNDS: u32 = 2; } +#[cfg(f128_enabled)] +impl SqrtHelper for f128 { + type ISet1 = u32; + type ISet2 = u64; + + const SET1_ROUNDS: u32 = 1; + const SET2_ROUNDS: u32 = 2; + const FINAL_ROUNDS: u32 = 2; +} + /// A U0.16 representation of `1/sqrt(x)`. /// // / The index is a 7-bit number consisting of a single exponent bit and 6 bits of significand. @@ -355,6 +373,42 @@ mod tests { } } + #[test] + #[cfg(f16_enabled)] + fn sanity_check_f16() { + assert_biteq!(sqrt(100.0f16), 10.0); + assert_biteq!(sqrt(4.0f16), 2.0); + } + + #[test] + #[cfg(f16_enabled)] + fn spec_tests_f16() { + spec_test::(); + } + + #[test] + #[cfg(f16_enabled)] + #[allow(clippy::approx_constant)] + fn conformance_tests_f16() { + let cases = [ + (f16::PI, 0x3f17_u16), + // 10_000.0, using a hex literal for MSRV hack (Rust < 1.67 checks literal widths as + // part of the AST, so the `cfg` is irrelevant here). + (f16::from_bits(0x70e2), 0x5640_u16), + (f16::from_bits(0x0000000f), 0x13bf_u16), + (f16::INFINITY, f16::INFINITY.to_bits()), + ]; + + for (input, output) in cases { + assert_biteq!( + sqrt(input), + f16::from_bits(output), + "input: {input:?} ({:#018x})", + input.to_bits() + ); + } + } + #[test] fn sanity_check_f32() { assert_biteq!(sqrt(100.0f32), 10.0); @@ -416,4 +470,42 @@ mod tests { ); } } + + #[test] + #[cfg(f128_enabled)] + fn sanity_check_f128() { + assert_biteq!(sqrt(100.0f128), 10.0); + assert_biteq!(sqrt(4.0f128), 2.0); + } + + #[test] + #[cfg(f128_enabled)] + fn spec_tests_f128() { + spec_test::(); + } + + #[test] + #[cfg(f128_enabled)] + #[allow(clippy::approx_constant)] + fn conformance_tests_f128() { + let cases = [ + (f128::PI, 0x3fffc5bf891b4ef6aa79c3b0520d5db9_u128), + // 10_000.0, see `f16` for reasoning. + ( + f128::from_bits(0x400c3880000000000000000000000000), + 0x40059000000000000000000000000000_u128, + ), + (f128::from_bits(0x0000000f), 0x1fc9efbdeb14f4ed9b17ae807907e1e9_u128), + (f128::INFINITY, f128::INFINITY.to_bits()), + ]; + + for (input, output) in cases { + assert_biteq!( + sqrt(input), + f128::from_bits(output), + "input: {input:?} ({:#018x})", + input.to_bits() + ); + } + } } diff --git a/library/compiler-builtins/libm/src/math/mod.rs b/library/compiler-builtins/libm/src/math/mod.rs index 03adb6be1934..3684025a6c10 100644 --- a/library/compiler-builtins/libm/src/math/mod.rs +++ b/library/compiler-builtins/libm/src/math/mod.rs @@ -344,11 +344,13 @@ cfg_if! { mod copysignf16; mod fabsf16; mod fdimf16; + mod sqrtf16; mod truncf16; pub use self::copysignf16::copysignf16; pub use self::fabsf16::fabsf16; pub use self::fdimf16::fdimf16; + pub use self::sqrtf16::sqrtf16; pub use self::truncf16::truncf16; } } @@ -358,11 +360,13 @@ cfg_if! { mod copysignf128; mod fabsf128; mod fdimf128; + mod sqrtf128; mod truncf128; pub use self::copysignf128::copysignf128; pub use self::fabsf128::fabsf128; pub use self::fdimf128::fdimf128; + pub use self::sqrtf128::sqrtf128; pub use self::truncf128::truncf128; } } diff --git a/library/compiler-builtins/libm/src/math/sqrtf128.rs b/library/compiler-builtins/libm/src/math/sqrtf128.rs new file mode 100644 index 000000000000..eaef6ae0c1c8 --- /dev/null +++ b/library/compiler-builtins/libm/src/math/sqrtf128.rs @@ -0,0 +1,5 @@ +/// The square root of `x` (f128). +#[cfg_attr(all(test, assert_no_panic), no_panic::no_panic)] +pub fn sqrtf128(x: f128) -> f128 { + return super::generic::sqrt(x); +} diff --git a/library/compiler-builtins/libm/src/math/sqrtf16.rs b/library/compiler-builtins/libm/src/math/sqrtf16.rs new file mode 100644 index 000000000000..549bf902c722 --- /dev/null +++ b/library/compiler-builtins/libm/src/math/sqrtf16.rs @@ -0,0 +1,5 @@ +/// The square root of `x` (f16). +#[cfg_attr(all(test, assert_no_panic), no_panic::no_panic)] +pub fn sqrtf16(x: f16) -> f16 { + return super::generic::sqrt(x); +}