From d6646ae2d1170e2716c5f964cc394f14b4b6a7da Mon Sep 17 00:00:00 2001 From: Trevor Gross Date: Mon, 28 Oct 2024 20:31:48 -0500 Subject: [PATCH] Move architecture-specific code to `src/math/arch` Move the code and call into its new location with `select_implementation`. --- .../libm/src/math/arch/i586.rs | 37 +++ .../libm/src/math/arch/i686.rs | 24 ++ .../libm/src/math/arch/mod.rs | 19 ++ .../compiler-builtins/libm/src/math/ceil.rs | 19 +- .../compiler-builtins/libm/src/math/floor.rs | 19 +- .../compiler-builtins/libm/src/math/sqrt.rs | 261 ++++++++---------- .../compiler-builtins/libm/src/math/sqrtf.rs | 169 +++++------- 7 files changed, 280 insertions(+), 268 deletions(-) create mode 100644 library/compiler-builtins/libm/src/math/arch/i586.rs create mode 100644 library/compiler-builtins/libm/src/math/arch/i686.rs diff --git a/library/compiler-builtins/libm/src/math/arch/i586.rs b/library/compiler-builtins/libm/src/math/arch/i586.rs new file mode 100644 index 000000000000..f92b9a2af711 --- /dev/null +++ b/library/compiler-builtins/libm/src/math/arch/i586.rs @@ -0,0 +1,37 @@ +//! Architecture-specific support for x86-32 without SSE2 + +use super::super::fabs; + +/// Use an alternative implementation on x86, because the +/// main implementation fails with the x87 FPU used by +/// debian i386, probably due to excess precision issues. +/// Basic implementation taken from https://github.com/rust-lang/libm/issues/219. +pub fn ceil(x: f64) -> f64 { + if fabs(x).to_bits() < 4503599627370496.0_f64.to_bits() { + let truncated = x as i64 as f64; + if truncated < x { + return truncated + 1.0; + } else { + return truncated; + } + } else { + return x; + } +} + +/// Use an alternative implementation on x86, because the +/// main implementation fails with the x87 FPU used by +/// debian i386, probably due to excess precision issues. +/// Basic implementation taken from https://github.com/rust-lang/libm/issues/219. +pub fn floor(x: f64) -> f64 { + if fabs(x).to_bits() < 4503599627370496.0_f64.to_bits() { + let truncated = x as i64 as f64; + if truncated > x { + return truncated - 1.0; + } else { + return truncated; + } + } else { + return x; + } +} diff --git a/library/compiler-builtins/libm/src/math/arch/i686.rs b/library/compiler-builtins/libm/src/math/arch/i686.rs new file mode 100644 index 000000000000..80f7face1742 --- /dev/null +++ b/library/compiler-builtins/libm/src/math/arch/i686.rs @@ -0,0 +1,24 @@ +//! Architecture-specific support for x86-32 and x86-64 with SSE2 + +#![cfg(not(feature = "force-soft-floats"))] + +#[cfg(target_arch = "x86")] +use core::arch::x86::*; +#[cfg(target_arch = "x86_64")] +use core::arch::x86_64::*; + +pub fn sqrtf(x: f32) -> f32 { + unsafe { + let m = _mm_set_ss(x); + let m_sqrt = _mm_sqrt_ss(m); + _mm_cvtss_f32(m_sqrt) + } +} + +pub fn sqrt(x: f64) -> f64 { + unsafe { + let m = _mm_set_sd(x); + let m_sqrt = _mm_sqrt_pd(m); + _mm_cvtsd_f64(m_sqrt) + } +} diff --git a/library/compiler-builtins/libm/src/math/arch/mod.rs b/library/compiler-builtins/libm/src/math/arch/mod.rs index a4bc218b743d..cf9547117b83 100644 --- a/library/compiler-builtins/libm/src/math/arch/mod.rs +++ b/library/compiler-builtins/libm/src/math/arch/mod.rs @@ -7,3 +7,22 @@ #[cfg(intrinsics_enabled)] pub mod intrinsics; + +// Most implementations should be defined here, to ensure they are not made available when +// soft floats are required. +#[cfg(arch_enabled)] +cfg_if! { + if #[cfg(target_feature = "sse2")] { + mod i686; + pub use i686::{sqrt, sqrtf}; + } +} + +// There are certain architecture-specific implementations that are needed for correctness +// even with `force-soft-float`. These are configured here. +cfg_if! { + if #[cfg(all(target_arch = "x86", not(target_feature = "sse2")))] { + mod i586; + pub use i586::{ceil, floor}; + } +} diff --git a/library/compiler-builtins/libm/src/math/ceil.rs b/library/compiler-builtins/libm/src/math/ceil.rs index 0da01b4d0b69..c7e857dbb6fb 100644 --- a/library/compiler-builtins/libm/src/math/ceil.rs +++ b/library/compiler-builtins/libm/src/math/ceil.rs @@ -10,28 +10,11 @@ const TOINT: f64 = 1. / f64::EPSILON; pub fn ceil(x: f64) -> f64 { select_implementation! { name: ceil, + use_arch_required: all(target_arch = "x86", not(target_feature = "sse2")), use_intrinsic: target_arch = "wasm32", args: x, } - #[cfg(all(target_arch = "x86", not(target_feature = "sse2")))] - { - //use an alternative implementation on x86, because the - //main implementation fails with the x87 FPU used by - //debian i386, probably due to excess precision issues. - //basic implementation taken from https://github.com/rust-lang/libm/issues/219 - use super::fabs; - if fabs(x).to_bits() < 4503599627370496.0_f64.to_bits() { - let truncated = x as i64 as f64; - if truncated < x { - return truncated + 1.0; - } else { - return truncated; - } - } else { - return x; - } - } let u: u64 = x.to_bits(); let e: i64 = (u >> 52 & 0x7ff) as i64; let y: f64; diff --git a/library/compiler-builtins/libm/src/math/floor.rs b/library/compiler-builtins/libm/src/math/floor.rs index 2b9955ebae34..532226b9f86a 100644 --- a/library/compiler-builtins/libm/src/math/floor.rs +++ b/library/compiler-builtins/libm/src/math/floor.rs @@ -10,28 +10,11 @@ const TOINT: f64 = 1. / f64::EPSILON; pub fn floor(x: f64) -> f64 { select_implementation! { name: floor, + use_arch_required: all(target_arch = "x86", not(target_feature = "sse2")), use_intrinsic: target_arch = "wasm32", args: x, } - #[cfg(all(target_arch = "x86", not(target_feature = "sse2")))] - { - //use an alternative implementation on x86, because the - //main implementation fails with the x87 FPU used by - //debian i386, probably due to excess precision issues. - //basic implementation taken from https://github.com/rust-lang/libm/issues/219 - use super::fabs; - if fabs(x).to_bits() < 4503599627370496.0_f64.to_bits() { - let truncated = x as i64 as f64; - if truncated > x { - return truncated - 1.0; - } else { - return truncated; - } - } else { - return x; - } - } let ui = x.to_bits(); let e = ((ui >> 52) & 0x7ff) as i32; diff --git a/library/compiler-builtins/libm/src/math/sqrt.rs b/library/compiler-builtins/libm/src/math/sqrt.rs index 2e856100f7dd..a443b7e4c613 100644 --- a/library/compiler-builtins/libm/src/math/sqrt.rs +++ b/library/compiler-builtins/libm/src/math/sqrt.rs @@ -83,156 +83,139 @@ use core::f64; pub fn sqrt(x: f64) -> f64 { select_implementation! { name: sqrt, + use_arch: target_feature = "sse2", use_intrinsic: target_arch = "wasm32", args: x, } - #[cfg(all(target_feature = "sse2", not(feature = "force-soft-floats")))] - { - // Note: This path is unlikely since LLVM will usually have already - // optimized sqrt calls into hardware instructions if sse2 is available, - // but if someone does end up here they'll appreciate the speed increase. - #[cfg(target_arch = "x86")] - use core::arch::x86::*; - #[cfg(target_arch = "x86_64")] - use core::arch::x86_64::*; - unsafe { - let m = _mm_set_sd(x); - let m_sqrt = _mm_sqrt_pd(m); - _mm_cvtsd_f64(m_sqrt) + use core::num::Wrapping; + + const TINY: f64 = 1.0e-300; + + let mut z: f64; + let sign: Wrapping = Wrapping(0x80000000); + let mut ix0: i32; + let mut s0: i32; + let mut q: i32; + let mut m: i32; + let mut t: i32; + let mut i: i32; + let mut r: Wrapping; + let mut t1: Wrapping; + let mut s1: Wrapping; + let mut ix1: Wrapping; + let mut q1: Wrapping; + + ix0 = (x.to_bits() >> 32) as i32; + ix1 = Wrapping(x.to_bits() as u32); + + /* take care of Inf and NaN */ + if (ix0 & 0x7ff00000) == 0x7ff00000 { + return x * x + x; /* sqrt(NaN)=NaN, sqrt(+inf)=+inf, sqrt(-inf)=sNaN */ + } + /* take care of zero */ + if ix0 <= 0 { + if ((ix0 & !(sign.0 as i32)) | ix1.0 as i32) == 0 { + return x; /* sqrt(+-0) = +-0 */ + } + if ix0 < 0 { + return (x - x) / (x - x); /* sqrt(-ve) = sNaN */ } } - #[cfg(any(not(target_feature = "sse2"), feature = "force-soft-floats"))] - { - use core::num::Wrapping; - - const TINY: f64 = 1.0e-300; - - let mut z: f64; - let sign: Wrapping = Wrapping(0x80000000); - let mut ix0: i32; - let mut s0: i32; - let mut q: i32; - let mut m: i32; - let mut t: i32; - let mut i: i32; - let mut r: Wrapping; - let mut t1: Wrapping; - let mut s1: Wrapping; - let mut ix1: Wrapping; - let mut q1: Wrapping; - - ix0 = (x.to_bits() >> 32) as i32; - ix1 = Wrapping(x.to_bits() as u32); - - /* take care of Inf and NaN */ - if (ix0 & 0x7ff00000) == 0x7ff00000 { - return x * x + x; /* sqrt(NaN)=NaN, sqrt(+inf)=+inf, sqrt(-inf)=sNaN */ + /* normalize x */ + m = ix0 >> 20; + if m == 0 { + /* subnormal x */ + while ix0 == 0 { + m -= 21; + ix0 |= (ix1 >> 11).0 as i32; + ix1 <<= 21; } - /* take care of zero */ - if ix0 <= 0 { - if ((ix0 & !(sign.0 as i32)) | ix1.0 as i32) == 0 { - return x; /* sqrt(+-0) = +-0 */ - } - if ix0 < 0 { - return (x - x) / (x - x); /* sqrt(-ve) = sNaN */ - } + i = 0; + while (ix0 & 0x00100000) == 0 { + i += 1; + ix0 <<= 1; } - /* normalize x */ - m = ix0 >> 20; - if m == 0 { - /* subnormal x */ - while ix0 == 0 { - m -= 21; - ix0 |= (ix1 >> 11).0 as i32; - ix1 <<= 21; - } - i = 0; - while (ix0 & 0x00100000) == 0 { - i += 1; - ix0 <<= 1; - } - m -= i - 1; - ix0 |= (ix1 >> (32 - i) as usize).0 as i32; - ix1 = ix1 << i as usize; - } - m -= 1023; /* unbias exponent */ - ix0 = (ix0 & 0x000fffff) | 0x00100000; - if (m & 1) == 1 { - /* odd m, double x to make it even */ - ix0 += ix0 + ((ix1 & sign) >> 31).0 as i32; - ix1 += ix1; - } - m >>= 1; /* m = [m/2] */ - - /* generate sqrt(x) bit by bit */ + m -= i - 1; + ix0 |= (ix1 >> (32 - i) as usize).0 as i32; + ix1 = ix1 << i as usize; + } + m -= 1023; /* unbias exponent */ + ix0 = (ix0 & 0x000fffff) | 0x00100000; + if (m & 1) == 1 { + /* odd m, double x to make it even */ ix0 += ix0 + ((ix1 & sign) >> 31).0 as i32; ix1 += ix1; - q = 0; /* [q,q1] = sqrt(x) */ - q1 = Wrapping(0); - s0 = 0; - s1 = Wrapping(0); - r = Wrapping(0x00200000); /* r = moving bit from right to left */ - - while r != Wrapping(0) { - t = s0 + r.0 as i32; - if t <= ix0 { - s0 = t + r.0 as i32; - ix0 -= t; - q += r.0 as i32; - } - ix0 += ix0 + ((ix1 & sign) >> 31).0 as i32; - ix1 += ix1; - r >>= 1; - } - - r = sign; - while r != Wrapping(0) { - t1 = s1 + r; - t = s0; - if t < ix0 || (t == ix0 && t1 <= ix1) { - s1 = t1 + r; - if (t1 & sign) == sign && (s1 & sign) == Wrapping(0) { - s0 += 1; - } - ix0 -= t; - if ix1 < t1 { - ix0 -= 1; - } - ix1 -= t1; - q1 += r; - } - ix0 += ix0 + ((ix1 & sign) >> 31).0 as i32; - ix1 += ix1; - r >>= 1; - } - - /* use floating add to find out rounding direction */ - if (ix0 as u32 | ix1.0) != 0 { - z = 1.0 - TINY; /* raise inexact flag */ - if z >= 1.0 { - z = 1.0 + TINY; - if q1.0 == 0xffffffff { - q1 = Wrapping(0); - q += 1; - } else if z > 1.0 { - if q1.0 == 0xfffffffe { - q += 1; - } - q1 += Wrapping(2); - } else { - q1 += q1 & Wrapping(1); - } - } - } - ix0 = (q >> 1) + 0x3fe00000; - ix1 = q1 >> 1; - if (q & 1) == 1 { - ix1 |= sign; - } - ix0 += m << 20; - f64::from_bits((ix0 as u64) << 32 | ix1.0 as u64) } + m >>= 1; /* m = [m/2] */ + + /* generate sqrt(x) bit by bit */ + ix0 += ix0 + ((ix1 & sign) >> 31).0 as i32; + ix1 += ix1; + q = 0; /* [q,q1] = sqrt(x) */ + q1 = Wrapping(0); + s0 = 0; + s1 = Wrapping(0); + r = Wrapping(0x00200000); /* r = moving bit from right to left */ + + while r != Wrapping(0) { + t = s0 + r.0 as i32; + if t <= ix0 { + s0 = t + r.0 as i32; + ix0 -= t; + q += r.0 as i32; + } + ix0 += ix0 + ((ix1 & sign) >> 31).0 as i32; + ix1 += ix1; + r >>= 1; + } + + r = sign; + while r != Wrapping(0) { + t1 = s1 + r; + t = s0; + if t < ix0 || (t == ix0 && t1 <= ix1) { + s1 = t1 + r; + if (t1 & sign) == sign && (s1 & sign) == Wrapping(0) { + s0 += 1; + } + ix0 -= t; + if ix1 < t1 { + ix0 -= 1; + } + ix1 -= t1; + q1 += r; + } + ix0 += ix0 + ((ix1 & sign) >> 31).0 as i32; + ix1 += ix1; + r >>= 1; + } + + /* use floating add to find out rounding direction */ + if (ix0 as u32 | ix1.0) != 0 { + z = 1.0 - TINY; /* raise inexact flag */ + if z >= 1.0 { + z = 1.0 + TINY; + if q1.0 == 0xffffffff { + q1 = Wrapping(0); + q += 1; + } else if z > 1.0 { + if q1.0 == 0xfffffffe { + q += 1; + } + q1 += Wrapping(2); + } else { + q1 += q1 & Wrapping(1); + } + } + } + ix0 = (q >> 1) + 0x3fe00000; + ix1 = q1 >> 1; + if (q & 1) == 1 { + ix1 |= sign; + } + ix0 += m << 20; + f64::from_bits((ix0 as u64) << 32 | ix1.0 as u64) } #[cfg(test)] diff --git a/library/compiler-builtins/libm/src/math/sqrtf.rs b/library/compiler-builtins/libm/src/math/sqrtf.rs index b2996b350cb4..d2f7ae70334c 100644 --- a/library/compiler-builtins/libm/src/math/sqrtf.rs +++ b/library/compiler-builtins/libm/src/math/sqrtf.rs @@ -18,109 +18,92 @@ pub fn sqrtf(x: f32) -> f32 { select_implementation! { name: sqrtf, + use_arch: target_feature = "sse2", use_intrinsic: target_arch = "wasm32", args: x, } - #[cfg(all(target_feature = "sse", not(feature = "force-soft-floats")))] - { - // Note: This path is unlikely since LLVM will usually have already - // optimized sqrt calls into hardware instructions if sse is available, - // but if someone does end up here they'll appreciate the speed increase. - #[cfg(target_arch = "x86")] - use core::arch::x86::*; - #[cfg(target_arch = "x86_64")] - use core::arch::x86_64::*; - unsafe { - let m = _mm_set_ss(x); - let m_sqrt = _mm_sqrt_ss(m); - _mm_cvtss_f32(m_sqrt) + const TINY: f32 = 1.0e-30; + + let mut z: f32; + let sign: i32 = 0x80000000u32 as i32; + let mut ix: i32; + let mut s: i32; + let mut q: i32; + let mut m: i32; + let mut t: i32; + let mut i: i32; + let mut r: u32; + + ix = x.to_bits() as i32; + + /* take care of Inf and NaN */ + if (ix as u32 & 0x7f800000) == 0x7f800000 { + return x * x + x; /* sqrt(NaN)=NaN, sqrt(+inf)=+inf, sqrt(-inf)=sNaN */ + } + + /* take care of zero */ + if ix <= 0 { + if (ix & !sign) == 0 { + return x; /* sqrt(+-0) = +-0 */ + } + if ix < 0 { + return (x - x) / (x - x); /* sqrt(-ve) = sNaN */ } } - #[cfg(any(not(target_feature = "sse"), feature = "force-soft-floats"))] - { - const TINY: f32 = 1.0e-30; - let mut z: f32; - let sign: i32 = 0x80000000u32 as i32; - let mut ix: i32; - let mut s: i32; - let mut q: i32; - let mut m: i32; - let mut t: i32; - let mut i: i32; - let mut r: u32; - - ix = x.to_bits() as i32; - - /* take care of Inf and NaN */ - if (ix as u32 & 0x7f800000) == 0x7f800000 { - return x * x + x; /* sqrt(NaN)=NaN, sqrt(+inf)=+inf, sqrt(-inf)=sNaN */ + /* normalize x */ + m = ix >> 23; + if m == 0 { + /* subnormal x */ + i = 0; + while ix & 0x00800000 == 0 { + ix <<= 1; + i = i + 1; } - - /* take care of zero */ - if ix <= 0 { - if (ix & !sign) == 0 { - return x; /* sqrt(+-0) = +-0 */ - } - if ix < 0 { - return (x - x) / (x - x); /* sqrt(-ve) = sNaN */ - } - } - - /* normalize x */ - m = ix >> 23; - if m == 0 { - /* subnormal x */ - i = 0; - while ix & 0x00800000 == 0 { - ix <<= 1; - i = i + 1; - } - m -= i - 1; - } - m -= 127; /* unbias exponent */ - ix = (ix & 0x007fffff) | 0x00800000; - if m & 1 == 1 { - /* odd m, double x to make it even */ - ix += ix; - } - m >>= 1; /* m = [m/2] */ - - /* generate sqrt(x) bit by bit */ + m -= i - 1; + } + m -= 127; /* unbias exponent */ + ix = (ix & 0x007fffff) | 0x00800000; + if m & 1 == 1 { + /* odd m, double x to make it even */ ix += ix; - q = 0; - s = 0; - r = 0x01000000; /* r = moving bit from right to left */ - - while r != 0 { - t = s + r as i32; - if t <= ix { - s = t + r as i32; - ix -= t; - q += r as i32; - } - ix += ix; - r >>= 1; - } - - /* use floating add to find out rounding direction */ - if ix != 0 { - z = 1.0 - TINY; /* raise inexact flag */ - if z >= 1.0 { - z = 1.0 + TINY; - if z > 1.0 { - q += 2; - } else { - q += q & 1; - } - } - } - - ix = (q >> 1) + 0x3f000000; - ix += m << 23; - f32::from_bits(ix as u32) } + m >>= 1; /* m = [m/2] */ + + /* generate sqrt(x) bit by bit */ + ix += ix; + q = 0; + s = 0; + r = 0x01000000; /* r = moving bit from right to left */ + + while r != 0 { + t = s + r as i32; + if t <= ix { + s = t + r as i32; + ix -= t; + q += r as i32; + } + ix += ix; + r >>= 1; + } + + /* use floating add to find out rounding direction */ + if ix != 0 { + z = 1.0 - TINY; /* raise inexact flag */ + if z >= 1.0 { + z = 1.0 + TINY; + if z > 1.0 { + q += 2; + } else { + q += q & 1; + } + } + } + + ix = (q >> 1) + 0x3f000000; + ix += m << 23; + f32::from_bits(ix as u32) } // PowerPC tests are failing on LLVM 13: https://github.com/rust-lang/rust/issues/88520