From ad1c65229384b21e3dafdc40f8d2119df3186cae Mon Sep 17 00:00:00 2001 From: Trevor Gross Date: Sat, 26 Oct 2024 03:05:01 -0500 Subject: [PATCH 1/5] Update `libm-test/build.rs` to skip directories Don't try to generate tests for directories, or for files that contain `f16` or `f128` (as these types are not provided by musl's math implementations). (cherry picked from commit fd7ad36b70d0bbc0f0b9bc7e54d10258423fda29) --- .../libm/crates/libm-test/build.rs | 19 ++++++++++++++++++- 1 file changed, 18 insertions(+), 1 deletion(-) diff --git a/library/compiler-builtins/libm/crates/libm-test/build.rs b/library/compiler-builtins/libm/crates/libm-test/build.rs index 472dec9d33e1..40b3e56c046b 100644 --- a/library/compiler-builtins/libm/crates/libm-test/build.rs +++ b/library/compiler-builtins/libm/crates/libm-test/build.rs @@ -156,7 +156,11 @@ mod musl_serialized_tests { return; } - let files = fs::read_dir(math_src).unwrap().map(|f| f.unwrap().path()).collect::>(); + let files = fs::read_dir(math_src) + .unwrap() + .map(|f| f.unwrap().path()) + .filter(file_needs_test) + .collect::>(); let mut math = Vec::new(); for file in files { @@ -187,6 +191,19 @@ mod musl_serialized_tests { generate_unit_tests(&math); } + /// Check whether a path within `src/math` should get tests generated. + fn file_needs_test(path: &PathBuf) -> bool { + // Skip directories + if path.is_dir() { + return false; + } + + let fname = path.file_name().unwrap().to_str().unwrap(); + + // Musl doesn't support `f16` or `f128` + !(fname.contains("f16") || fname.contains("f128")) + } + /// A "poor man's" parser for the signature of a function fn parse(s: &str) -> Function { let s = eat(s, "pub fn "); From ec1ca5129826bdaf4f6f1c97255547bfc8d12b77 Mon Sep 17 00:00:00 2001 From: Trevor Gross Date: Sat, 26 Oct 2024 02:56:22 -0500 Subject: [PATCH 2/5] Vendor `cfg_if::cfg_if!` `cfg_if` is helpful for applying `cfg` attributes to groups of items, like we will need to do with architecture-specific modules of `f16` and `f128`. However, `libm` can't have dependencies. The `cfg_if` macro is complex but small, so just vendor it here. --- .../libm/src/math/support/macros.rs | 46 +++++++++++++++++-- 1 file changed, 43 insertions(+), 3 deletions(-) diff --git a/library/compiler-builtins/libm/src/math/support/macros.rs b/library/compiler-builtins/libm/src/math/support/macros.rs index 6bc75837a349..39a6fe827725 100644 --- a/library/compiler-builtins/libm/src/math/support/macros.rs +++ b/library/compiler-builtins/libm/src/math/support/macros.rs @@ -1,6 +1,46 @@ -/// Choose among using an intrinsic (if available) and falling back to the default function body. -/// Returns directly if the intrinsic version is used, otherwise continues to the rest of the -/// function. +/// `libm` cannot have dependencies, so this is vendored directly from the `cfg-if` crate +/// (with some comments stripped for compactness). +macro_rules! cfg_if { + // match if/else chains with a final `else` + ($( + if #[cfg($meta:meta)] { $($tokens:tt)* } + ) else * else { + $($tokens2:tt)* + }) => { + cfg_if! { @__items () ; $( ( ($meta) ($($tokens)*) ), )* ( () ($($tokens2)*) ), } + }; + + // match if/else chains lacking a final `else` + ( + if #[cfg($i_met:meta)] { $($i_tokens:tt)* } + $( else if #[cfg($e_met:meta)] { $($e_tokens:tt)* } )* + ) => { + cfg_if! { + @__items + () ; + ( ($i_met) ($($i_tokens)*) ), + $( ( ($e_met) ($($e_tokens)*) ), )* + ( () () ), + } + }; + + // Internal and recursive macro to emit all the items + // + // Collects all the negated cfgs in a list at the beginning and after the + // semicolon is all the remaining items + (@__items ($($not:meta,)*) ; ) => {}; + (@__items ($($not:meta,)*) ; ( ($($m:meta),*) ($($tokens:tt)*) ), $($rest:tt)*) => { + #[cfg(all($($m,)* not(any($($not),*))))] cfg_if! { @__identity $($tokens)* } + cfg_if! { @__items ($($not,)* $($m,)*) ; $($rest)* } + }; + + // Internal macro to make __apply work out right for different match types, + // because of how macros matching/expand stuff. + (@__identity $($tokens:tt)*) => { $($tokens)* }; +} + +/// Choose between using an intrinsic (if available) and the function body. Returns directly if +/// the intrinsic is used, otherwise the rest of the function body is used. /// /// Use this if the intrinsic is likely to be more performant on the platform(s) specified /// in `intrinsic_available`. From 6d1033e7fc2beb6ce494c7099770326a8a3883c4 Mon Sep 17 00:00:00 2001 From: Trevor Gross Date: Mon, 28 Oct 2024 20:24:46 -0500 Subject: [PATCH 3/5] Add an "arch" Cargo feature that is on by default Introduce a Cargo feature to enable or disable architecture-specific features (SIMD, assembly), which is on by default. This allows for more fine grained control compared to relying on the `force-soft-floats` feature. Similar to "unstable-intrinsics", introduce a build.rs config option for `unstable-intrinsics AND NOT force-soft-floats`, which makes this easier to work with in code. Effectively, this allows moving our non-additive Cargo feature (force-soft-floats) to a positive one by default, allowing for an override when needed. --- library/compiler-builtins/libm/Cargo.toml | 5 ++++- library/compiler-builtins/libm/build.rs | 12 ++++++++++++ library/compiler-builtins/libm/ci/run.sh | 1 + .../crates/compiler-builtins-smoke-test/Cargo.toml | 1 + 4 files changed, 18 insertions(+), 1 deletion(-) diff --git a/library/compiler-builtins/libm/Cargo.toml b/library/compiler-builtins/libm/Cargo.toml index 5e4565556b02..2e74012eae44 100644 --- a/library/compiler-builtins/libm/Cargo.toml +++ b/library/compiler-builtins/libm/Cargo.toml @@ -14,7 +14,10 @@ exclude = ["/ci/", "/.github/workflows/"] rust-version = "1.63" [features] -default = [] +default = ["arch"] + +# Enable architecture-specific features such as SIMD or assembly routines. +arch = [] # This tells the compiler to assume that a Nightly toolchain is being used and # that it should activate any useful Nightly things accordingly. diff --git a/library/compiler-builtins/libm/build.rs b/library/compiler-builtins/libm/build.rs index adb521407308..001029236113 100644 --- a/library/compiler-builtins/libm/build.rs +++ b/library/compiler-builtins/libm/build.rs @@ -15,6 +15,7 @@ fn main() { } configure_intrinsics(); + configure_arch(); } /// Simplify the feature logic for enabling intrinsics so code only needs to use @@ -28,3 +29,14 @@ fn configure_intrinsics() { println!("cargo:rustc-cfg=intrinsics_enabled"); } } + +/// Simplify the feature logic for enabling arch-specific features so code only needs to use +/// `cfg(arch_enabled)`. +fn configure_arch() { + println!("cargo:rustc-check-cfg=cfg(arch_enabled)"); + + // Enabled by default via the "arch" feature, `force-soft-floats` overrides to disable. + if cfg!(feature = "arch") && !cfg!(feature = "force-soft-floats") { + println!("cargo:rustc-cfg=arch_enabled"); + } +} diff --git a/library/compiler-builtins/libm/ci/run.sh b/library/compiler-builtins/libm/ci/run.sh index 94612adc72f4..9f642326ba82 100755 --- a/library/compiler-builtins/libm/ci/run.sh +++ b/library/compiler-builtins/libm/ci/run.sh @@ -64,6 +64,7 @@ fi # Make sure we can build with overriding features. We test the indibidual # features it controls separately. +cargo check --no-default-features cargo check --features "force-soft-floats" if [ "${BUILD_ONLY:-}" = "1" ]; then diff --git a/library/compiler-builtins/libm/crates/compiler-builtins-smoke-test/Cargo.toml b/library/compiler-builtins/libm/crates/compiler-builtins-smoke-test/Cargo.toml index 2a6c62961e36..7118bfe06f1d 100644 --- a/library/compiler-builtins/libm/crates/compiler-builtins-smoke-test/Cargo.toml +++ b/library/compiler-builtins/libm/crates/compiler-builtins-smoke-test/Cargo.toml @@ -18,6 +18,7 @@ force-soft-floats = [] [lints.rust] unexpected_cfgs = { level = "warn", check-cfg = [ + "cfg(arch_enabled)", "cfg(assert_no_panic)", "cfg(intrinsics_enabled)", ] } From 7108e80dc23b9c4971151b0627925554013346de Mon Sep 17 00:00:00 2001 From: Trevor Gross Date: Mon, 28 Oct 2024 20:29:55 -0500 Subject: [PATCH 4/5] Update `select_implementation` to accept arch configuration --- .../libm/src/math/support/macros.rs | 45 ++++++++++++++++--- 1 file changed, 39 insertions(+), 6 deletions(-) diff --git a/library/compiler-builtins/libm/src/math/support/macros.rs b/library/compiler-builtins/libm/src/math/support/macros.rs index 39a6fe827725..f85a6122ee8a 100644 --- a/library/compiler-builtins/libm/src/math/support/macros.rs +++ b/library/compiler-builtins/libm/src/math/support/macros.rs @@ -39,17 +39,31 @@ macro_rules! cfg_if { (@__identity $($tokens:tt)*) => { $($tokens)* }; } -/// Choose between using an intrinsic (if available) and the function body. Returns directly if -/// the intrinsic is used, otherwise the rest of the function body is used. +/// Choose among using an intrinsic, an arch-specific implementation, and the function body. +/// Returns directly if the intrinsic or arch is used, otherwise continue with the rest of the +/// function. /// -/// Use this if the intrinsic is likely to be more performant on the platform(s) specified -/// in `intrinsic_available`. +/// Specify a `use_intrinsic` meta field if the intrinsic is (1) available on the platforms (i.e. +/// LLVM lowers it without libcalls that may recurse), (2) it is likely to be more performant. +/// Intrinsics require wrappers in the `math::arch::intrinsics` module. /// -/// The `cfg` used here is controlled by `build.rs` so the passed meta does not need to account -/// for e.g. the `unstable-intrinsics` or `force-soft-float` features. +/// Specify a `use_arch` meta field if an architecture-specific implementation is provided. +/// These live in the `math::arch::some_target_arch` module. +/// +/// Specify a `use_arch_required` meta field if something architecture-specific must be used +/// regardless of feature configuration (`force-soft-floats`). +/// +/// The passed meta options do not need to account for relevant Cargo features +/// (`unstable-intrinsics`, `arch`, `force-soft-floats`), this macro handles that part. macro_rules! select_implementation { ( name: $fname:ident, + // Configuration meta for when to use arch-specific implementation that requires hard + // float ops + $( use_arch: $use_arch:meta, )? + // Configuration meta for when to use the arch module regardless of whether softfloats + // have been requested. + $( use_arch_required: $use_arch_required:meta, )? // Configuration meta for when to call intrinsics and let LLVM figure it out $( use_intrinsic: $use_intrinsic:meta, )? args: $($arg:ident),+ , @@ -57,6 +71,25 @@ macro_rules! select_implementation { // FIXME: these use paths that are a pretty fragile (`super`). We should figure out // something better w.r.t. how this is vendored into compiler-builtins. + // However, we do need a few things from `arch` that are used even with soft floats. + // + select_implementation! { + @cfg $($use_arch_required)?; + if true { + return super::arch::$fname( $($arg),+ ); + } + } + + // By default, never use arch-specific implementations if we have force-soft-floats + #[cfg(arch_enabled)] + select_implementation! { + @cfg $($use_arch)?; + // Wrap in `if true` to avoid unused warnings + if true { + return super::arch::$fname( $($arg),+ ); + } + } + // Never use intrinsics if we are forcing soft floats, and only enable with the // `unstable-intrinsics` feature. #[cfg(intrinsics_enabled)] From d6646ae2d1170e2716c5f964cc394f14b4b6a7da Mon Sep 17 00:00:00 2001 From: Trevor Gross Date: Mon, 28 Oct 2024 20:31:48 -0500 Subject: [PATCH 5/5] Move architecture-specific code to `src/math/arch` Move the code and call into its new location with `select_implementation`. --- .../libm/src/math/arch/i586.rs | 37 +++ .../libm/src/math/arch/i686.rs | 24 ++ .../libm/src/math/arch/mod.rs | 19 ++ .../compiler-builtins/libm/src/math/ceil.rs | 19 +- .../compiler-builtins/libm/src/math/floor.rs | 19 +- .../compiler-builtins/libm/src/math/sqrt.rs | 261 ++++++++---------- .../compiler-builtins/libm/src/math/sqrtf.rs | 169 +++++------- 7 files changed, 280 insertions(+), 268 deletions(-) create mode 100644 library/compiler-builtins/libm/src/math/arch/i586.rs create mode 100644 library/compiler-builtins/libm/src/math/arch/i686.rs diff --git a/library/compiler-builtins/libm/src/math/arch/i586.rs b/library/compiler-builtins/libm/src/math/arch/i586.rs new file mode 100644 index 000000000000..f92b9a2af711 --- /dev/null +++ b/library/compiler-builtins/libm/src/math/arch/i586.rs @@ -0,0 +1,37 @@ +//! Architecture-specific support for x86-32 without SSE2 + +use super::super::fabs; + +/// Use an alternative implementation on x86, because the +/// main implementation fails with the x87 FPU used by +/// debian i386, probably due to excess precision issues. +/// Basic implementation taken from https://github.com/rust-lang/libm/issues/219. +pub fn ceil(x: f64) -> f64 { + if fabs(x).to_bits() < 4503599627370496.0_f64.to_bits() { + let truncated = x as i64 as f64; + if truncated < x { + return truncated + 1.0; + } else { + return truncated; + } + } else { + return x; + } +} + +/// Use an alternative implementation on x86, because the +/// main implementation fails with the x87 FPU used by +/// debian i386, probably due to excess precision issues. +/// Basic implementation taken from https://github.com/rust-lang/libm/issues/219. +pub fn floor(x: f64) -> f64 { + if fabs(x).to_bits() < 4503599627370496.0_f64.to_bits() { + let truncated = x as i64 as f64; + if truncated > x { + return truncated - 1.0; + } else { + return truncated; + } + } else { + return x; + } +} diff --git a/library/compiler-builtins/libm/src/math/arch/i686.rs b/library/compiler-builtins/libm/src/math/arch/i686.rs new file mode 100644 index 000000000000..80f7face1742 --- /dev/null +++ b/library/compiler-builtins/libm/src/math/arch/i686.rs @@ -0,0 +1,24 @@ +//! Architecture-specific support for x86-32 and x86-64 with SSE2 + +#![cfg(not(feature = "force-soft-floats"))] + +#[cfg(target_arch = "x86")] +use core::arch::x86::*; +#[cfg(target_arch = "x86_64")] +use core::arch::x86_64::*; + +pub fn sqrtf(x: f32) -> f32 { + unsafe { + let m = _mm_set_ss(x); + let m_sqrt = _mm_sqrt_ss(m); + _mm_cvtss_f32(m_sqrt) + } +} + +pub fn sqrt(x: f64) -> f64 { + unsafe { + let m = _mm_set_sd(x); + let m_sqrt = _mm_sqrt_pd(m); + _mm_cvtsd_f64(m_sqrt) + } +} diff --git a/library/compiler-builtins/libm/src/math/arch/mod.rs b/library/compiler-builtins/libm/src/math/arch/mod.rs index a4bc218b743d..cf9547117b83 100644 --- a/library/compiler-builtins/libm/src/math/arch/mod.rs +++ b/library/compiler-builtins/libm/src/math/arch/mod.rs @@ -7,3 +7,22 @@ #[cfg(intrinsics_enabled)] pub mod intrinsics; + +// Most implementations should be defined here, to ensure they are not made available when +// soft floats are required. +#[cfg(arch_enabled)] +cfg_if! { + if #[cfg(target_feature = "sse2")] { + mod i686; + pub use i686::{sqrt, sqrtf}; + } +} + +// There are certain architecture-specific implementations that are needed for correctness +// even with `force-soft-float`. These are configured here. +cfg_if! { + if #[cfg(all(target_arch = "x86", not(target_feature = "sse2")))] { + mod i586; + pub use i586::{ceil, floor}; + } +} diff --git a/library/compiler-builtins/libm/src/math/ceil.rs b/library/compiler-builtins/libm/src/math/ceil.rs index 0da01b4d0b69..c7e857dbb6fb 100644 --- a/library/compiler-builtins/libm/src/math/ceil.rs +++ b/library/compiler-builtins/libm/src/math/ceil.rs @@ -10,28 +10,11 @@ const TOINT: f64 = 1. / f64::EPSILON; pub fn ceil(x: f64) -> f64 { select_implementation! { name: ceil, + use_arch_required: all(target_arch = "x86", not(target_feature = "sse2")), use_intrinsic: target_arch = "wasm32", args: x, } - #[cfg(all(target_arch = "x86", not(target_feature = "sse2")))] - { - //use an alternative implementation on x86, because the - //main implementation fails with the x87 FPU used by - //debian i386, probably due to excess precision issues. - //basic implementation taken from https://github.com/rust-lang/libm/issues/219 - use super::fabs; - if fabs(x).to_bits() < 4503599627370496.0_f64.to_bits() { - let truncated = x as i64 as f64; - if truncated < x { - return truncated + 1.0; - } else { - return truncated; - } - } else { - return x; - } - } let u: u64 = x.to_bits(); let e: i64 = (u >> 52 & 0x7ff) as i64; let y: f64; diff --git a/library/compiler-builtins/libm/src/math/floor.rs b/library/compiler-builtins/libm/src/math/floor.rs index 2b9955ebae34..532226b9f86a 100644 --- a/library/compiler-builtins/libm/src/math/floor.rs +++ b/library/compiler-builtins/libm/src/math/floor.rs @@ -10,28 +10,11 @@ const TOINT: f64 = 1. / f64::EPSILON; pub fn floor(x: f64) -> f64 { select_implementation! { name: floor, + use_arch_required: all(target_arch = "x86", not(target_feature = "sse2")), use_intrinsic: target_arch = "wasm32", args: x, } - #[cfg(all(target_arch = "x86", not(target_feature = "sse2")))] - { - //use an alternative implementation on x86, because the - //main implementation fails with the x87 FPU used by - //debian i386, probably due to excess precision issues. - //basic implementation taken from https://github.com/rust-lang/libm/issues/219 - use super::fabs; - if fabs(x).to_bits() < 4503599627370496.0_f64.to_bits() { - let truncated = x as i64 as f64; - if truncated > x { - return truncated - 1.0; - } else { - return truncated; - } - } else { - return x; - } - } let ui = x.to_bits(); let e = ((ui >> 52) & 0x7ff) as i32; diff --git a/library/compiler-builtins/libm/src/math/sqrt.rs b/library/compiler-builtins/libm/src/math/sqrt.rs index 2e856100f7dd..a443b7e4c613 100644 --- a/library/compiler-builtins/libm/src/math/sqrt.rs +++ b/library/compiler-builtins/libm/src/math/sqrt.rs @@ -83,156 +83,139 @@ use core::f64; pub fn sqrt(x: f64) -> f64 { select_implementation! { name: sqrt, + use_arch: target_feature = "sse2", use_intrinsic: target_arch = "wasm32", args: x, } - #[cfg(all(target_feature = "sse2", not(feature = "force-soft-floats")))] - { - // Note: This path is unlikely since LLVM will usually have already - // optimized sqrt calls into hardware instructions if sse2 is available, - // but if someone does end up here they'll appreciate the speed increase. - #[cfg(target_arch = "x86")] - use core::arch::x86::*; - #[cfg(target_arch = "x86_64")] - use core::arch::x86_64::*; - unsafe { - let m = _mm_set_sd(x); - let m_sqrt = _mm_sqrt_pd(m); - _mm_cvtsd_f64(m_sqrt) + use core::num::Wrapping; + + const TINY: f64 = 1.0e-300; + + let mut z: f64; + let sign: Wrapping = Wrapping(0x80000000); + let mut ix0: i32; + let mut s0: i32; + let mut q: i32; + let mut m: i32; + let mut t: i32; + let mut i: i32; + let mut r: Wrapping; + let mut t1: Wrapping; + let mut s1: Wrapping; + let mut ix1: Wrapping; + let mut q1: Wrapping; + + ix0 = (x.to_bits() >> 32) as i32; + ix1 = Wrapping(x.to_bits() as u32); + + /* take care of Inf and NaN */ + if (ix0 & 0x7ff00000) == 0x7ff00000 { + return x * x + x; /* sqrt(NaN)=NaN, sqrt(+inf)=+inf, sqrt(-inf)=sNaN */ + } + /* take care of zero */ + if ix0 <= 0 { + if ((ix0 & !(sign.0 as i32)) | ix1.0 as i32) == 0 { + return x; /* sqrt(+-0) = +-0 */ + } + if ix0 < 0 { + return (x - x) / (x - x); /* sqrt(-ve) = sNaN */ } } - #[cfg(any(not(target_feature = "sse2"), feature = "force-soft-floats"))] - { - use core::num::Wrapping; - - const TINY: f64 = 1.0e-300; - - let mut z: f64; - let sign: Wrapping = Wrapping(0x80000000); - let mut ix0: i32; - let mut s0: i32; - let mut q: i32; - let mut m: i32; - let mut t: i32; - let mut i: i32; - let mut r: Wrapping; - let mut t1: Wrapping; - let mut s1: Wrapping; - let mut ix1: Wrapping; - let mut q1: Wrapping; - - ix0 = (x.to_bits() >> 32) as i32; - ix1 = Wrapping(x.to_bits() as u32); - - /* take care of Inf and NaN */ - if (ix0 & 0x7ff00000) == 0x7ff00000 { - return x * x + x; /* sqrt(NaN)=NaN, sqrt(+inf)=+inf, sqrt(-inf)=sNaN */ + /* normalize x */ + m = ix0 >> 20; + if m == 0 { + /* subnormal x */ + while ix0 == 0 { + m -= 21; + ix0 |= (ix1 >> 11).0 as i32; + ix1 <<= 21; } - /* take care of zero */ - if ix0 <= 0 { - if ((ix0 & !(sign.0 as i32)) | ix1.0 as i32) == 0 { - return x; /* sqrt(+-0) = +-0 */ - } - if ix0 < 0 { - return (x - x) / (x - x); /* sqrt(-ve) = sNaN */ - } + i = 0; + while (ix0 & 0x00100000) == 0 { + i += 1; + ix0 <<= 1; } - /* normalize x */ - m = ix0 >> 20; - if m == 0 { - /* subnormal x */ - while ix0 == 0 { - m -= 21; - ix0 |= (ix1 >> 11).0 as i32; - ix1 <<= 21; - } - i = 0; - while (ix0 & 0x00100000) == 0 { - i += 1; - ix0 <<= 1; - } - m -= i - 1; - ix0 |= (ix1 >> (32 - i) as usize).0 as i32; - ix1 = ix1 << i as usize; - } - m -= 1023; /* unbias exponent */ - ix0 = (ix0 & 0x000fffff) | 0x00100000; - if (m & 1) == 1 { - /* odd m, double x to make it even */ - ix0 += ix0 + ((ix1 & sign) >> 31).0 as i32; - ix1 += ix1; - } - m >>= 1; /* m = [m/2] */ - - /* generate sqrt(x) bit by bit */ + m -= i - 1; + ix0 |= (ix1 >> (32 - i) as usize).0 as i32; + ix1 = ix1 << i as usize; + } + m -= 1023; /* unbias exponent */ + ix0 = (ix0 & 0x000fffff) | 0x00100000; + if (m & 1) == 1 { + /* odd m, double x to make it even */ ix0 += ix0 + ((ix1 & sign) >> 31).0 as i32; ix1 += ix1; - q = 0; /* [q,q1] = sqrt(x) */ - q1 = Wrapping(0); - s0 = 0; - s1 = Wrapping(0); - r = Wrapping(0x00200000); /* r = moving bit from right to left */ - - while r != Wrapping(0) { - t = s0 + r.0 as i32; - if t <= ix0 { - s0 = t + r.0 as i32; - ix0 -= t; - q += r.0 as i32; - } - ix0 += ix0 + ((ix1 & sign) >> 31).0 as i32; - ix1 += ix1; - r >>= 1; - } - - r = sign; - while r != Wrapping(0) { - t1 = s1 + r; - t = s0; - if t < ix0 || (t == ix0 && t1 <= ix1) { - s1 = t1 + r; - if (t1 & sign) == sign && (s1 & sign) == Wrapping(0) { - s0 += 1; - } - ix0 -= t; - if ix1 < t1 { - ix0 -= 1; - } - ix1 -= t1; - q1 += r; - } - ix0 += ix0 + ((ix1 & sign) >> 31).0 as i32; - ix1 += ix1; - r >>= 1; - } - - /* use floating add to find out rounding direction */ - if (ix0 as u32 | ix1.0) != 0 { - z = 1.0 - TINY; /* raise inexact flag */ - if z >= 1.0 { - z = 1.0 + TINY; - if q1.0 == 0xffffffff { - q1 = Wrapping(0); - q += 1; - } else if z > 1.0 { - if q1.0 == 0xfffffffe { - q += 1; - } - q1 += Wrapping(2); - } else { - q1 += q1 & Wrapping(1); - } - } - } - ix0 = (q >> 1) + 0x3fe00000; - ix1 = q1 >> 1; - if (q & 1) == 1 { - ix1 |= sign; - } - ix0 += m << 20; - f64::from_bits((ix0 as u64) << 32 | ix1.0 as u64) } + m >>= 1; /* m = [m/2] */ + + /* generate sqrt(x) bit by bit */ + ix0 += ix0 + ((ix1 & sign) >> 31).0 as i32; + ix1 += ix1; + q = 0; /* [q,q1] = sqrt(x) */ + q1 = Wrapping(0); + s0 = 0; + s1 = Wrapping(0); + r = Wrapping(0x00200000); /* r = moving bit from right to left */ + + while r != Wrapping(0) { + t = s0 + r.0 as i32; + if t <= ix0 { + s0 = t + r.0 as i32; + ix0 -= t; + q += r.0 as i32; + } + ix0 += ix0 + ((ix1 & sign) >> 31).0 as i32; + ix1 += ix1; + r >>= 1; + } + + r = sign; + while r != Wrapping(0) { + t1 = s1 + r; + t = s0; + if t < ix0 || (t == ix0 && t1 <= ix1) { + s1 = t1 + r; + if (t1 & sign) == sign && (s1 & sign) == Wrapping(0) { + s0 += 1; + } + ix0 -= t; + if ix1 < t1 { + ix0 -= 1; + } + ix1 -= t1; + q1 += r; + } + ix0 += ix0 + ((ix1 & sign) >> 31).0 as i32; + ix1 += ix1; + r >>= 1; + } + + /* use floating add to find out rounding direction */ + if (ix0 as u32 | ix1.0) != 0 { + z = 1.0 - TINY; /* raise inexact flag */ + if z >= 1.0 { + z = 1.0 + TINY; + if q1.0 == 0xffffffff { + q1 = Wrapping(0); + q += 1; + } else if z > 1.0 { + if q1.0 == 0xfffffffe { + q += 1; + } + q1 += Wrapping(2); + } else { + q1 += q1 & Wrapping(1); + } + } + } + ix0 = (q >> 1) + 0x3fe00000; + ix1 = q1 >> 1; + if (q & 1) == 1 { + ix1 |= sign; + } + ix0 += m << 20; + f64::from_bits((ix0 as u64) << 32 | ix1.0 as u64) } #[cfg(test)] diff --git a/library/compiler-builtins/libm/src/math/sqrtf.rs b/library/compiler-builtins/libm/src/math/sqrtf.rs index b2996b350cb4..d2f7ae70334c 100644 --- a/library/compiler-builtins/libm/src/math/sqrtf.rs +++ b/library/compiler-builtins/libm/src/math/sqrtf.rs @@ -18,109 +18,92 @@ pub fn sqrtf(x: f32) -> f32 { select_implementation! { name: sqrtf, + use_arch: target_feature = "sse2", use_intrinsic: target_arch = "wasm32", args: x, } - #[cfg(all(target_feature = "sse", not(feature = "force-soft-floats")))] - { - // Note: This path is unlikely since LLVM will usually have already - // optimized sqrt calls into hardware instructions if sse is available, - // but if someone does end up here they'll appreciate the speed increase. - #[cfg(target_arch = "x86")] - use core::arch::x86::*; - #[cfg(target_arch = "x86_64")] - use core::arch::x86_64::*; - unsafe { - let m = _mm_set_ss(x); - let m_sqrt = _mm_sqrt_ss(m); - _mm_cvtss_f32(m_sqrt) + const TINY: f32 = 1.0e-30; + + let mut z: f32; + let sign: i32 = 0x80000000u32 as i32; + let mut ix: i32; + let mut s: i32; + let mut q: i32; + let mut m: i32; + let mut t: i32; + let mut i: i32; + let mut r: u32; + + ix = x.to_bits() as i32; + + /* take care of Inf and NaN */ + if (ix as u32 & 0x7f800000) == 0x7f800000 { + return x * x + x; /* sqrt(NaN)=NaN, sqrt(+inf)=+inf, sqrt(-inf)=sNaN */ + } + + /* take care of zero */ + if ix <= 0 { + if (ix & !sign) == 0 { + return x; /* sqrt(+-0) = +-0 */ + } + if ix < 0 { + return (x - x) / (x - x); /* sqrt(-ve) = sNaN */ } } - #[cfg(any(not(target_feature = "sse"), feature = "force-soft-floats"))] - { - const TINY: f32 = 1.0e-30; - let mut z: f32; - let sign: i32 = 0x80000000u32 as i32; - let mut ix: i32; - let mut s: i32; - let mut q: i32; - let mut m: i32; - let mut t: i32; - let mut i: i32; - let mut r: u32; - - ix = x.to_bits() as i32; - - /* take care of Inf and NaN */ - if (ix as u32 & 0x7f800000) == 0x7f800000 { - return x * x + x; /* sqrt(NaN)=NaN, sqrt(+inf)=+inf, sqrt(-inf)=sNaN */ + /* normalize x */ + m = ix >> 23; + if m == 0 { + /* subnormal x */ + i = 0; + while ix & 0x00800000 == 0 { + ix <<= 1; + i = i + 1; } - - /* take care of zero */ - if ix <= 0 { - if (ix & !sign) == 0 { - return x; /* sqrt(+-0) = +-0 */ - } - if ix < 0 { - return (x - x) / (x - x); /* sqrt(-ve) = sNaN */ - } - } - - /* normalize x */ - m = ix >> 23; - if m == 0 { - /* subnormal x */ - i = 0; - while ix & 0x00800000 == 0 { - ix <<= 1; - i = i + 1; - } - m -= i - 1; - } - m -= 127; /* unbias exponent */ - ix = (ix & 0x007fffff) | 0x00800000; - if m & 1 == 1 { - /* odd m, double x to make it even */ - ix += ix; - } - m >>= 1; /* m = [m/2] */ - - /* generate sqrt(x) bit by bit */ + m -= i - 1; + } + m -= 127; /* unbias exponent */ + ix = (ix & 0x007fffff) | 0x00800000; + if m & 1 == 1 { + /* odd m, double x to make it even */ ix += ix; - q = 0; - s = 0; - r = 0x01000000; /* r = moving bit from right to left */ - - while r != 0 { - t = s + r as i32; - if t <= ix { - s = t + r as i32; - ix -= t; - q += r as i32; - } - ix += ix; - r >>= 1; - } - - /* use floating add to find out rounding direction */ - if ix != 0 { - z = 1.0 - TINY; /* raise inexact flag */ - if z >= 1.0 { - z = 1.0 + TINY; - if z > 1.0 { - q += 2; - } else { - q += q & 1; - } - } - } - - ix = (q >> 1) + 0x3f000000; - ix += m << 23; - f32::from_bits(ix as u32) } + m >>= 1; /* m = [m/2] */ + + /* generate sqrt(x) bit by bit */ + ix += ix; + q = 0; + s = 0; + r = 0x01000000; /* r = moving bit from right to left */ + + while r != 0 { + t = s + r as i32; + if t <= ix { + s = t + r as i32; + ix -= t; + q += r as i32; + } + ix += ix; + r >>= 1; + } + + /* use floating add to find out rounding direction */ + if ix != 0 { + z = 1.0 - TINY; /* raise inexact flag */ + if z >= 1.0 { + z = 1.0 + TINY; + if z > 1.0 { + q += 2; + } else { + q += q & 1; + } + } + } + + ix = (q >> 1) + 0x3f000000; + ix += m << 23; + f32::from_bits(ix as u32) } // PowerPC tests are failing on LLVM 13: https://github.com/rust-lang/rust/issues/88520