From 42fce292ab45c4d28de82799a83f3313b9ca9907 Mon Sep 17 00:00:00 2001 From: Trevor Gross Date: Mon, 13 Jan 2025 13:27:22 +0000 Subject: [PATCH] Add a generic version of `floor` Additionally, make use of this version to implement `floor` and `floorf`. Similar to `ceil`, musl'f `ceilf` routine seems to work better for all float widths than the `ceil` algorithm. Trying with the `ceil` (`f64`) algorithm produced the following regressions: icount::icount_bench_floor_group::icount_bench_floor logspace:setup_floor() Performance has regressed: Instructions (14064 > 13171) regressed by +6.78005% (>+5.00000) Baselines: softfloat|softfloat Instructions: 14064|13171 (+6.78005%) [+1.06780x] L1 Hits: 16821|15802 (+6.44855%) [+1.06449x] L2 Hits: 0|0 (No change) RAM Hits: 8|9 (-11.1111%) [-1.12500x] Total read+write: 16829|15811 (+6.43856%) [+1.06439x] Estimated Cycles: 17101|16117 (+6.10535%) [+1.06105x] icount::icount_bench_floorf128_group::icount_bench_floorf128 logspace:setup_floorf128() Baselines: softfloat|softfloat Instructions: 166868|N/A (*********) L1 Hits: 221429|N/A (*********) L2 Hits: 1|N/A (*********) RAM Hits: 34|N/A (*********) Total read+write: 221464|N/A (*********) Estimated Cycles: 222624|N/A (*********) icount::icount_bench_floorf16_group::icount_bench_floorf16 logspace:setup_floorf16() Baselines: softfloat|softfloat Instructions: 143029|N/A (*********) L1 Hits: 176517|N/A (*********) L2 Hits: 1|N/A (*********) RAM Hits: 13|N/A (*********) Total read+write: 176531|N/A (*********) Estimated Cycles: 176977|N/A (*********) icount::icount_bench_floorf_group::icount_bench_floorf logspace:setup_floorf() Performance has regressed: Instructions (14732 > 10441) regressed by +41.0976% (>+5.00000) Baselines: softfloat|softfloat Instructions: 14732|10441 (+41.0976%) [+1.41098x] L1 Hits: 17616|13027 (+35.2268%) [+1.35227x] L2 Hits: 0|0 (No change) RAM Hits: 8|6 (+33.3333%) [+1.33333x] Total read+write: 17624|13033 (+35.2260%) [+1.35226x] Estimated Cycles: 17896|13237 (+35.1968%) [+1.35197x] --- .../libm/etc/function-definitions.json | 6 +- .../compiler-builtins/libm/src/math/floor.rs | 41 +------ .../compiler-builtins/libm/src/math/floorf.rs | 52 +-------- .../libm/src/math/generic/floor.rs | 106 ++++++++++++++++++ .../libm/src/math/generic/mod.rs | 2 + 5 files changed, 114 insertions(+), 93 deletions(-) create mode 100644 library/compiler-builtins/libm/src/math/generic/floor.rs diff --git a/library/compiler-builtins/libm/etc/function-definitions.json b/library/compiler-builtins/libm/etc/function-definitions.json index c75152f63019..6a865f42784d 100644 --- a/library/compiler-builtins/libm/etc/function-definitions.json +++ b/library/compiler-builtins/libm/etc/function-definitions.json @@ -336,14 +336,16 @@ "src/libm_helper.rs", "src/math/arch/i586.rs", "src/math/arch/wasm32.rs", - "src/math/floor.rs" + "src/math/floor.rs", + "src/math/generic/floor.rs" ], "type": "f64" }, "floorf": { "sources": [ "src/math/arch/wasm32.rs", - "src/math/floorf.rs" + "src/math/floorf.rs", + "src/math/generic/floor.rs" ], "type": "f32" }, diff --git a/library/compiler-builtins/libm/src/math/floor.rs b/library/compiler-builtins/libm/src/math/floor.rs index 2823bf44d9c3..b4f02abc4911 100644 --- a/library/compiler-builtins/libm/src/math/floor.rs +++ b/library/compiler-builtins/libm/src/math/floor.rs @@ -1,8 +1,3 @@ -#![allow(unreachable_code)] -use core::f64; - -const TOINT: f64 = 1. / f64::EPSILON; - /// Floor (f64) /// /// Finds the nearest integer less than or equal to `x`. @@ -15,39 +10,5 @@ pub fn floor(x: f64) -> f64 { args: x, } - let ui = x.to_bits(); - let e = ((ui >> 52) & 0x7ff) as i32; - - if (e >= 0x3ff + 52) || (x == 0.) { - return x; - } - /* y = int(x) - x, where int(x) is an integer neighbor of x */ - let y = if (ui >> 63) != 0 { x - TOINT + TOINT - x } else { x + TOINT - TOINT - x }; - /* special case because of non-nearest rounding modes */ - if e < 0x3ff { - force_eval!(y); - return if (ui >> 63) != 0 { -1. } else { 0. }; - } - if y > 0. { x + y - 1. } else { x + y } -} - -#[cfg(test)] -mod tests { - use super::*; - - #[test] - fn sanity_check() { - assert_eq!(floor(1.1), 1.0); - assert_eq!(floor(2.9), 2.0); - } - - /// The spec: https://en.cppreference.com/w/cpp/numeric/math/floor - #[test] - fn spec_tests() { - // Not Asserted: that the current rounding mode has no effect. - assert!(floor(f64::NAN).is_nan()); - for f in [0.0, -0.0, f64::INFINITY, f64::NEG_INFINITY].iter().copied() { - assert_eq!(floor(f), f); - } - } + return super::generic::floor(x); } diff --git a/library/compiler-builtins/libm/src/math/floorf.rs b/library/compiler-builtins/libm/src/math/floorf.rs index 23a18c0f7376..16957b7f3557 100644 --- a/library/compiler-builtins/libm/src/math/floorf.rs +++ b/library/compiler-builtins/libm/src/math/floorf.rs @@ -1,5 +1,3 @@ -use core::f32; - /// Floor (f32) /// /// Finds the nearest integer less than or equal to `x`. @@ -11,53 +9,5 @@ pub fn floorf(x: f32) -> f32 { args: x, } - let mut ui = x.to_bits(); - let e = (((ui >> 23) as i32) & 0xff) - 0x7f; - - if e >= 23 { - return x; - } - if e >= 0 { - let m: u32 = 0x007fffff >> e; - if (ui & m) == 0 { - return x; - } - force_eval!(x + f32::from_bits(0x7b800000)); - if ui >> 31 != 0 { - ui += m; - } - ui &= !m; - } else { - force_eval!(x + f32::from_bits(0x7b800000)); - if ui >> 31 == 0 { - ui = 0; - } else if ui << 1 != 0 { - return -1.0; - } - } - f32::from_bits(ui) -} - -// PowerPC tests are failing on LLVM 13: https://github.com/rust-lang/rust/issues/88520 -#[cfg(not(target_arch = "powerpc64"))] -#[cfg(test)] -mod tests { - use super::*; - - #[test] - fn sanity_check() { - assert_eq!(floorf(0.5), 0.0); - assert_eq!(floorf(1.1), 1.0); - assert_eq!(floorf(2.9), 2.0); - } - - /// The spec: https://en.cppreference.com/w/cpp/numeric/math/floor - #[test] - fn spec_tests() { - // Not Asserted: that the current rounding mode has no effect. - assert!(floorf(f32::NAN).is_nan()); - for f in [0.0, -0.0, f32::INFINITY, f32::NEG_INFINITY].iter().copied() { - assert_eq!(floorf(f), f); - } - } + return super::generic::floor(x); } diff --git a/library/compiler-builtins/libm/src/math/generic/floor.rs b/library/compiler-builtins/libm/src/math/generic/floor.rs new file mode 100644 index 000000000000..6754c08f870b --- /dev/null +++ b/library/compiler-builtins/libm/src/math/generic/floor.rs @@ -0,0 +1,106 @@ +/* SPDX-License-Identifier: MIT + * origin: musl src/math/floor.c */ + +//! Generic `floor` algorithm. +//! +//! Note that this uses the algorithm from musl's `floorf` rather than `floor` or `floorl` because +//! performance seems to be better (based on icount) and it does not seem to experience rounding +//! errors on i386. + +use super::super::{Float, Int, IntTy, MinInt}; + +pub fn floor(x: F) -> F { + let zero = IntTy::::ZERO; + + let mut ix = x.to_bits(); + let e = x.exp_unbiased(); + + // If the represented value has no fractional part, no truncation is needed. + if e >= F::SIG_BITS as i32 { + return x; + } + + if e >= 0 { + // |x| >= 1.0 + + let m = F::SIG_MASK >> e.unsigned(); + if ix & m == zero { + // Portion to be masked is already zero; no adjustment needed. + return x; + } + + // Otherwise, raise an inexact exception. + force_eval!(x + F::MAX); + + if x.is_sign_negative() { + ix += m; + } + + ix &= !m; + F::from_bits(ix) + } else { + // |x| < 1.0, raise an inexact exception since truncation will happen (unless x == 0). + force_eval!(x + F::MAX); + + if x.is_sign_positive() { + // 0.0 <= x < 1.0; rounding down goes toward +0.0. + F::ZERO + } else if ix << 1 != zero { + // -1.0 < x < 0.0; rounding down goes toward -1.0. + F::NEG_ONE + } else { + // -0.0 remains unchanged + x + } + } +} + +#[cfg(test)] +mod tests { + use super::*; + + /// Test against https://en.cppreference.com/w/cpp/numeric/math/floor + fn spec_test() { + // Not Asserted: that the current rounding mode has no effect. + for f in [F::ZERO, F::NEG_ZERO, F::INFINITY, F::NEG_INFINITY].iter().copied() { + assert_biteq!(floor(f), f); + } + } + + /* Skipping f16 / f128 "sanity_check"s due to rejected literal lexing at MSRV */ + + #[test] + #[cfg(f16_enabled)] + fn spec_tests_f16() { + spec_test::(); + } + + #[test] + fn sanity_check_f32() { + assert_eq!(floor(0.5f32), 0.0); + assert_eq!(floor(1.1f32), 1.0); + assert_eq!(floor(2.9f32), 2.0); + } + + #[test] + fn spec_tests_f32() { + spec_test::(); + } + + #[test] + fn sanity_check_f64() { + assert_eq!(floor(1.1f64), 1.0); + assert_eq!(floor(2.9f64), 2.0); + } + + #[test] + fn spec_tests_f64() { + spec_test::(); + } + + #[test] + #[cfg(f128_enabled)] + fn spec_tests_f128() { + spec_test::(); + } +} diff --git a/library/compiler-builtins/libm/src/math/generic/mod.rs b/library/compiler-builtins/libm/src/math/generic/mod.rs index f8bb9fa6adb1..b08a77d5d649 100644 --- a/library/compiler-builtins/libm/src/math/generic/mod.rs +++ b/library/compiler-builtins/libm/src/math/generic/mod.rs @@ -2,6 +2,7 @@ mod ceil; mod copysign; mod fabs; mod fdim; +mod floor; mod sqrt; mod trunc; @@ -9,5 +10,6 @@ pub use ceil::ceil; pub use copysign::copysign; pub use fabs::fabs; pub use fdim::fdim; +pub use floor::floor; pub use sqrt::sqrt; pub use trunc::trunc;