From 42fce292ab45c4d28de82799a83f3313b9ca9907 Mon Sep 17 00:00:00 2001 From: Trevor Gross Date: Mon, 13 Jan 2025 13:27:22 +0000 Subject: [PATCH 1/3] Add a generic version of `floor` Additionally, make use of this version to implement `floor` and `floorf`. Similar to `ceil`, musl'f `ceilf` routine seems to work better for all float widths than the `ceil` algorithm. Trying with the `ceil` (`f64`) algorithm produced the following regressions: icount::icount_bench_floor_group::icount_bench_floor logspace:setup_floor() Performance has regressed: Instructions (14064 > 13171) regressed by +6.78005% (>+5.00000) Baselines: softfloat|softfloat Instructions: 14064|13171 (+6.78005%) [+1.06780x] L1 Hits: 16821|15802 (+6.44855%) [+1.06449x] L2 Hits: 0|0 (No change) RAM Hits: 8|9 (-11.1111%) [-1.12500x] Total read+write: 16829|15811 (+6.43856%) [+1.06439x] Estimated Cycles: 17101|16117 (+6.10535%) [+1.06105x] icount::icount_bench_floorf128_group::icount_bench_floorf128 logspace:setup_floorf128() Baselines: softfloat|softfloat Instructions: 166868|N/A (*********) L1 Hits: 221429|N/A (*********) L2 Hits: 1|N/A (*********) RAM Hits: 34|N/A (*********) Total read+write: 221464|N/A (*********) Estimated Cycles: 222624|N/A (*********) icount::icount_bench_floorf16_group::icount_bench_floorf16 logspace:setup_floorf16() Baselines: softfloat|softfloat Instructions: 143029|N/A (*********) L1 Hits: 176517|N/A (*********) L2 Hits: 1|N/A (*********) RAM Hits: 13|N/A (*********) Total read+write: 176531|N/A (*********) Estimated Cycles: 176977|N/A (*********) icount::icount_bench_floorf_group::icount_bench_floorf logspace:setup_floorf() Performance has regressed: Instructions (14732 > 10441) regressed by +41.0976% (>+5.00000) Baselines: softfloat|softfloat Instructions: 14732|10441 (+41.0976%) [+1.41098x] L1 Hits: 17616|13027 (+35.2268%) [+1.35227x] L2 Hits: 0|0 (No change) RAM Hits: 8|6 (+33.3333%) [+1.33333x] Total read+write: 17624|13033 (+35.2260%) [+1.35226x] Estimated Cycles: 17896|13237 (+35.1968%) [+1.35197x] --- .../libm/etc/function-definitions.json | 6 +- .../compiler-builtins/libm/src/math/floor.rs | 41 +------ .../compiler-builtins/libm/src/math/floorf.rs | 52 +-------- .../libm/src/math/generic/floor.rs | 106 ++++++++++++++++++ .../libm/src/math/generic/mod.rs | 2 + 5 files changed, 114 insertions(+), 93 deletions(-) create mode 100644 library/compiler-builtins/libm/src/math/generic/floor.rs diff --git a/library/compiler-builtins/libm/etc/function-definitions.json b/library/compiler-builtins/libm/etc/function-definitions.json index c75152f63019..6a865f42784d 100644 --- a/library/compiler-builtins/libm/etc/function-definitions.json +++ b/library/compiler-builtins/libm/etc/function-definitions.json @@ -336,14 +336,16 @@ "src/libm_helper.rs", "src/math/arch/i586.rs", "src/math/arch/wasm32.rs", - "src/math/floor.rs" + "src/math/floor.rs", + "src/math/generic/floor.rs" ], "type": "f64" }, "floorf": { "sources": [ "src/math/arch/wasm32.rs", - "src/math/floorf.rs" + "src/math/floorf.rs", + "src/math/generic/floor.rs" ], "type": "f32" }, diff --git a/library/compiler-builtins/libm/src/math/floor.rs b/library/compiler-builtins/libm/src/math/floor.rs index 2823bf44d9c3..b4f02abc4911 100644 --- a/library/compiler-builtins/libm/src/math/floor.rs +++ b/library/compiler-builtins/libm/src/math/floor.rs @@ -1,8 +1,3 @@ -#![allow(unreachable_code)] -use core::f64; - -const TOINT: f64 = 1. / f64::EPSILON; - /// Floor (f64) /// /// Finds the nearest integer less than or equal to `x`. @@ -15,39 +10,5 @@ pub fn floor(x: f64) -> f64 { args: x, } - let ui = x.to_bits(); - let e = ((ui >> 52) & 0x7ff) as i32; - - if (e >= 0x3ff + 52) || (x == 0.) { - return x; - } - /* y = int(x) - x, where int(x) is an integer neighbor of x */ - let y = if (ui >> 63) != 0 { x - TOINT + TOINT - x } else { x + TOINT - TOINT - x }; - /* special case because of non-nearest rounding modes */ - if e < 0x3ff { - force_eval!(y); - return if (ui >> 63) != 0 { -1. } else { 0. }; - } - if y > 0. { x + y - 1. } else { x + y } -} - -#[cfg(test)] -mod tests { - use super::*; - - #[test] - fn sanity_check() { - assert_eq!(floor(1.1), 1.0); - assert_eq!(floor(2.9), 2.0); - } - - /// The spec: https://en.cppreference.com/w/cpp/numeric/math/floor - #[test] - fn spec_tests() { - // Not Asserted: that the current rounding mode has no effect. - assert!(floor(f64::NAN).is_nan()); - for f in [0.0, -0.0, f64::INFINITY, f64::NEG_INFINITY].iter().copied() { - assert_eq!(floor(f), f); - } - } + return super::generic::floor(x); } diff --git a/library/compiler-builtins/libm/src/math/floorf.rs b/library/compiler-builtins/libm/src/math/floorf.rs index 23a18c0f7376..16957b7f3557 100644 --- a/library/compiler-builtins/libm/src/math/floorf.rs +++ b/library/compiler-builtins/libm/src/math/floorf.rs @@ -1,5 +1,3 @@ -use core::f32; - /// Floor (f32) /// /// Finds the nearest integer less than or equal to `x`. @@ -11,53 +9,5 @@ pub fn floorf(x: f32) -> f32 { args: x, } - let mut ui = x.to_bits(); - let e = (((ui >> 23) as i32) & 0xff) - 0x7f; - - if e >= 23 { - return x; - } - if e >= 0 { - let m: u32 = 0x007fffff >> e; - if (ui & m) == 0 { - return x; - } - force_eval!(x + f32::from_bits(0x7b800000)); - if ui >> 31 != 0 { - ui += m; - } - ui &= !m; - } else { - force_eval!(x + f32::from_bits(0x7b800000)); - if ui >> 31 == 0 { - ui = 0; - } else if ui << 1 != 0 { - return -1.0; - } - } - f32::from_bits(ui) -} - -// PowerPC tests are failing on LLVM 13: https://github.com/rust-lang/rust/issues/88520 -#[cfg(not(target_arch = "powerpc64"))] -#[cfg(test)] -mod tests { - use super::*; - - #[test] - fn sanity_check() { - assert_eq!(floorf(0.5), 0.0); - assert_eq!(floorf(1.1), 1.0); - assert_eq!(floorf(2.9), 2.0); - } - - /// The spec: https://en.cppreference.com/w/cpp/numeric/math/floor - #[test] - fn spec_tests() { - // Not Asserted: that the current rounding mode has no effect. - assert!(floorf(f32::NAN).is_nan()); - for f in [0.0, -0.0, f32::INFINITY, f32::NEG_INFINITY].iter().copied() { - assert_eq!(floorf(f), f); - } - } + return super::generic::floor(x); } diff --git a/library/compiler-builtins/libm/src/math/generic/floor.rs b/library/compiler-builtins/libm/src/math/generic/floor.rs new file mode 100644 index 000000000000..6754c08f870b --- /dev/null +++ b/library/compiler-builtins/libm/src/math/generic/floor.rs @@ -0,0 +1,106 @@ +/* SPDX-License-Identifier: MIT + * origin: musl src/math/floor.c */ + +//! Generic `floor` algorithm. +//! +//! Note that this uses the algorithm from musl's `floorf` rather than `floor` or `floorl` because +//! performance seems to be better (based on icount) and it does not seem to experience rounding +//! errors on i386. + +use super::super::{Float, Int, IntTy, MinInt}; + +pub fn floor(x: F) -> F { + let zero = IntTy::::ZERO; + + let mut ix = x.to_bits(); + let e = x.exp_unbiased(); + + // If the represented value has no fractional part, no truncation is needed. + if e >= F::SIG_BITS as i32 { + return x; + } + + if e >= 0 { + // |x| >= 1.0 + + let m = F::SIG_MASK >> e.unsigned(); + if ix & m == zero { + // Portion to be masked is already zero; no adjustment needed. + return x; + } + + // Otherwise, raise an inexact exception. + force_eval!(x + F::MAX); + + if x.is_sign_negative() { + ix += m; + } + + ix &= !m; + F::from_bits(ix) + } else { + // |x| < 1.0, raise an inexact exception since truncation will happen (unless x == 0). + force_eval!(x + F::MAX); + + if x.is_sign_positive() { + // 0.0 <= x < 1.0; rounding down goes toward +0.0. + F::ZERO + } else if ix << 1 != zero { + // -1.0 < x < 0.0; rounding down goes toward -1.0. + F::NEG_ONE + } else { + // -0.0 remains unchanged + x + } + } +} + +#[cfg(test)] +mod tests { + use super::*; + + /// Test against https://en.cppreference.com/w/cpp/numeric/math/floor + fn spec_test() { + // Not Asserted: that the current rounding mode has no effect. + for f in [F::ZERO, F::NEG_ZERO, F::INFINITY, F::NEG_INFINITY].iter().copied() { + assert_biteq!(floor(f), f); + } + } + + /* Skipping f16 / f128 "sanity_check"s due to rejected literal lexing at MSRV */ + + #[test] + #[cfg(f16_enabled)] + fn spec_tests_f16() { + spec_test::(); + } + + #[test] + fn sanity_check_f32() { + assert_eq!(floor(0.5f32), 0.0); + assert_eq!(floor(1.1f32), 1.0); + assert_eq!(floor(2.9f32), 2.0); + } + + #[test] + fn spec_tests_f32() { + spec_test::(); + } + + #[test] + fn sanity_check_f64() { + assert_eq!(floor(1.1f64), 1.0); + assert_eq!(floor(2.9f64), 2.0); + } + + #[test] + fn spec_tests_f64() { + spec_test::(); + } + + #[test] + #[cfg(f128_enabled)] + fn spec_tests_f128() { + spec_test::(); + } +} diff --git a/library/compiler-builtins/libm/src/math/generic/mod.rs b/library/compiler-builtins/libm/src/math/generic/mod.rs index f8bb9fa6adb1..b08a77d5d649 100644 --- a/library/compiler-builtins/libm/src/math/generic/mod.rs +++ b/library/compiler-builtins/libm/src/math/generic/mod.rs @@ -2,6 +2,7 @@ mod ceil; mod copysign; mod fabs; mod fdim; +mod floor; mod sqrt; mod trunc; @@ -9,5 +10,6 @@ pub use ceil::ceil; pub use copysign::copysign; pub use fabs::fabs; pub use fdim::fdim; +pub use floor::floor; pub use sqrt::sqrt; pub use trunc::trunc; From 6a8bb0fa80ba2e777dea045441a0a879c4247b93 Mon Sep 17 00:00:00 2001 From: Trevor Gross Date: Mon, 13 Jan 2025 13:33:24 +0000 Subject: [PATCH 2/3] Add `floorf16` and `floorf128` Use the generic algorithms to provide implementations for these routines. --- .../crates/compiler-builtins-smoke-test/src/lib.rs | 2 ++ .../libm/crates/libm-macros/src/shared.rs | 4 ++-- .../libm/crates/libm-test/benches/icount.rs | 2 ++ .../libm/crates/libm-test/benches/random.rs | 4 +++- .../libm/crates/libm-test/src/mpfloat.rs | 4 ++++ .../crates/libm-test/tests/compare_built_musl.rs | 2 ++ .../compiler-builtins/libm/crates/util/src/main.rs | 2 ++ .../libm/etc/function-definitions.json | 14 ++++++++++++++ .../compiler-builtins/libm/etc/function-list.txt | 2 ++ .../compiler-builtins/libm/src/math/floorf128.rs | 7 +++++++ .../compiler-builtins/libm/src/math/floorf16.rs | 7 +++++++ library/compiler-builtins/libm/src/math/mod.rs | 4 ++++ 12 files changed, 51 insertions(+), 3 deletions(-) create mode 100644 library/compiler-builtins/libm/src/math/floorf128.rs create mode 100644 library/compiler-builtins/libm/src/math/floorf16.rs diff --git a/library/compiler-builtins/libm/crates/compiler-builtins-smoke-test/src/lib.rs b/library/compiler-builtins/libm/crates/compiler-builtins-smoke-test/src/lib.rs index b9521eb07152..4834ba256eb2 100644 --- a/library/compiler-builtins/libm/crates/compiler-builtins-smoke-test/src/lib.rs +++ b/library/compiler-builtins/libm/crates/compiler-builtins-smoke-test/src/lib.rs @@ -97,6 +97,8 @@ no_mangle! { fdimf16(x: f16, y: f16) -> f16; floor(x: f64) -> f64; floorf(x: f32) -> f32; + floorf128(x: f128) -> f128; + floorf16(x: f16) -> f16; fma(x: f64, y: f64, z: f64) -> f64; fmaf(x: f32, y: f32, z: f32) -> f32; fmax(x: f64, y: f64) -> f64; diff --git a/library/compiler-builtins/libm/crates/libm-macros/src/shared.rs b/library/compiler-builtins/libm/crates/libm-macros/src/shared.rs index e7d3d18d9a1b..64623658d655 100644 --- a/library/compiler-builtins/libm/crates/libm-macros/src/shared.rs +++ b/library/compiler-builtins/libm/crates/libm-macros/src/shared.rs @@ -9,7 +9,7 @@ const ALL_OPERATIONS_NESTED: &[(FloatTy, Signature, Option, &[&str])] FloatTy::F16, Signature { args: &[Ty::F16], returns: &[Ty::F16] }, None, - &["ceilf16", "fabsf16", "sqrtf16", "truncf16"], + &["ceilf16", "fabsf16", "floorf16", "sqrtf16", "truncf16"], ), ( // `fn(f32) -> f32` @@ -40,7 +40,7 @@ const ALL_OPERATIONS_NESTED: &[(FloatTy, Signature, Option, &[&str])] FloatTy::F128, Signature { args: &[Ty::F128], returns: &[Ty::F128] }, None, - &["ceilf128", "fabsf128", "sqrtf128", "truncf128"], + &["ceilf128", "fabsf128", "floorf128", "sqrtf128", "truncf128"], ), ( // `(f16, f16) -> f16` diff --git a/library/compiler-builtins/libm/crates/libm-test/benches/icount.rs b/library/compiler-builtins/libm/crates/libm-test/benches/icount.rs index 84be3d5245b6..eae63619c579 100644 --- a/library/compiler-builtins/libm/crates/libm-test/benches/icount.rs +++ b/library/compiler-builtins/libm/crates/libm-test/benches/icount.rs @@ -101,6 +101,8 @@ main!( icount_bench_fdimf16_group, icount_bench_fdimf_group, icount_bench_floor_group, + icount_bench_floorf128_group, + icount_bench_floorf16_group, icount_bench_floorf_group, icount_bench_fma_group, icount_bench_fmaf_group, diff --git a/library/compiler-builtins/libm/crates/libm-test/benches/random.rs b/library/compiler-builtins/libm/crates/libm-test/benches/random.rs index 511e26d91267..bd7b3597196c 100644 --- a/library/compiler-builtins/libm/crates/libm-test/benches/random.rs +++ b/library/compiler-builtins/libm/crates/libm-test/benches/random.rs @@ -125,8 +125,10 @@ libm_macros::for_each_function! { | fabsf16 | fdimf128 | fdimf16 - | sqrtf16 + | floorf128 + | floorf16 | sqrtf128 + | sqrtf16 | truncf128 | truncf16 => (false, None), diff --git a/library/compiler-builtins/libm/crates/libm-test/src/mpfloat.rs b/library/compiler-builtins/libm/crates/libm-test/src/mpfloat.rs index bbd19dbb07ba..53fade7d0ef6 100644 --- a/library/compiler-builtins/libm/crates/libm-test/src/mpfloat.rs +++ b/library/compiler-builtins/libm/crates/libm-test/src/mpfloat.rs @@ -148,6 +148,8 @@ libm_macros::for_each_function! { fabsf128, fabsf16,floor, floorf, + floorf128, + floorf16, fmod, fmodf, frexp, @@ -240,6 +242,7 @@ impl_no_round! { impl_no_round! { fabsf16 => abs_mut; ceilf16 => ceil_mut; + floorf16 => floor_mut; truncf16 => trunc_mut; } @@ -247,6 +250,7 @@ impl_no_round! { impl_no_round! { fabsf128 => abs_mut; ceilf128 => ceil_mut; + floorf128 => floor_mut; truncf128 => trunc_mut; } diff --git a/library/compiler-builtins/libm/crates/libm-test/tests/compare_built_musl.rs b/library/compiler-builtins/libm/crates/libm-test/tests/compare_built_musl.rs index e13acf3de216..335496fce36c 100644 --- a/library/compiler-builtins/libm/crates/libm-test/tests/compare_built_musl.rs +++ b/library/compiler-builtins/libm/crates/libm-test/tests/compare_built_musl.rs @@ -87,6 +87,8 @@ libm_macros::for_each_function! { fabsf16, fdimf128, fdimf16, + floorf128, + floorf16, truncf128, truncf16, sqrtf16, diff --git a/library/compiler-builtins/libm/crates/util/src/main.rs b/library/compiler-builtins/libm/crates/util/src/main.rs index 810919339ec6..988c01d07607 100644 --- a/library/compiler-builtins/libm/crates/util/src/main.rs +++ b/library/compiler-builtins/libm/crates/util/src/main.rs @@ -92,6 +92,8 @@ fn do_eval(basis: &str, op: &str, inputs: &[&str]) { | fabsf16 | fdimf128 | fdimf16 + | floorf128 + | floorf16 | sqrtf128 | sqrtf16 | truncf128 diff --git a/library/compiler-builtins/libm/etc/function-definitions.json b/library/compiler-builtins/libm/etc/function-definitions.json index 6a865f42784d..eef176fb57c8 100644 --- a/library/compiler-builtins/libm/etc/function-definitions.json +++ b/library/compiler-builtins/libm/etc/function-definitions.json @@ -349,6 +349,20 @@ ], "type": "f32" }, + "floorf128": { + "sources": [ + "src/math/floorf128.rs", + "src/math/generic/floor.rs" + ], + "type": "f128" + }, + "floorf16": { + "sources": [ + "src/math/floorf16.rs", + "src/math/generic/floor.rs" + ], + "type": "f16" + }, "fma": { "sources": [ "src/libm_helper.rs", diff --git a/library/compiler-builtins/libm/etc/function-list.txt b/library/compiler-builtins/libm/etc/function-list.txt index 337e7e434e5f..3bb895f4a297 100644 --- a/library/compiler-builtins/libm/etc/function-list.txt +++ b/library/compiler-builtins/libm/etc/function-list.txt @@ -49,6 +49,8 @@ fdimf128 fdimf16 floor floorf +floorf128 +floorf16 fma fmaf fmax diff --git a/library/compiler-builtins/libm/src/math/floorf128.rs b/library/compiler-builtins/libm/src/math/floorf128.rs new file mode 100644 index 000000000000..9a9fe4151152 --- /dev/null +++ b/library/compiler-builtins/libm/src/math/floorf128.rs @@ -0,0 +1,7 @@ +/// Floor (f128) +/// +/// Finds the nearest integer less than or equal to `x`. +#[cfg_attr(all(test, assert_no_panic), no_panic::no_panic)] +pub fn floorf128(x: f128) -> f128 { + return super::generic::floor(x); +} diff --git a/library/compiler-builtins/libm/src/math/floorf16.rs b/library/compiler-builtins/libm/src/math/floorf16.rs new file mode 100644 index 000000000000..f9b868e04109 --- /dev/null +++ b/library/compiler-builtins/libm/src/math/floorf16.rs @@ -0,0 +1,7 @@ +/// Floor (f16) +/// +/// Finds the nearest integer less than or equal to `x`. +#[cfg_attr(all(test, assert_no_panic), no_panic::no_panic)] +pub fn floorf16(x: f16) -> f16 { + return super::generic::floor(x); +} diff --git a/library/compiler-builtins/libm/src/math/mod.rs b/library/compiler-builtins/libm/src/math/mod.rs index 5228e78b7f33..68d201524b4b 100644 --- a/library/compiler-builtins/libm/src/math/mod.rs +++ b/library/compiler-builtins/libm/src/math/mod.rs @@ -345,6 +345,7 @@ cfg_if! { mod copysignf16; mod fabsf16; mod fdimf16; + mod floorf16; mod sqrtf16; mod truncf16; @@ -352,6 +353,7 @@ cfg_if! { pub use self::copysignf16::copysignf16; pub use self::fabsf16::fabsf16; pub use self::fdimf16::fdimf16; + pub use self::floorf16::floorf16; pub use self::sqrtf16::sqrtf16; pub use self::truncf16::truncf16; } @@ -363,6 +365,7 @@ cfg_if! { mod copysignf128; mod fabsf128; mod fdimf128; + mod floorf128; mod sqrtf128; mod truncf128; @@ -370,6 +373,7 @@ cfg_if! { pub use self::copysignf128::copysignf128; pub use self::fabsf128::fabsf128; pub use self::fdimf128::fdimf128; + pub use self::floorf128::floorf128; pub use self::sqrtf128::sqrtf128; pub use self::truncf128::truncf128; } From 3ae70a4a6cdf1bd11be554ac3ba272707ce8da56 Mon Sep 17 00:00:00 2001 From: Trevor Gross Date: Wed, 22 Jan 2025 08:48:02 +0000 Subject: [PATCH 3/3] Adjust `ceil` style to be more similar to `floor` --- .../crates/compiler-builtins-smoke-test/src/lib.rs | 2 ++ .../compiler-builtins/libm/src/math/generic/ceil.rs | 12 ++++++++---- 2 files changed, 10 insertions(+), 4 deletions(-) diff --git a/library/compiler-builtins/libm/crates/compiler-builtins-smoke-test/src/lib.rs b/library/compiler-builtins/libm/crates/compiler-builtins-smoke-test/src/lib.rs index 4834ba256eb2..1a7aa983ee44 100644 --- a/library/compiler-builtins/libm/crates/compiler-builtins-smoke-test/src/lib.rs +++ b/library/compiler-builtins/libm/crates/compiler-builtins-smoke-test/src/lib.rs @@ -67,6 +67,8 @@ no_mangle! { cbrtf(x: f32) -> f32; ceil(x: f64) -> f64; ceilf(x: f32) -> f32; + ceilf128(x: f128) -> f128; + ceilf16(x: f16) -> f16; copysign(x: f64, y: f64) -> f64; copysignf(x: f32, y: f32) -> f32; copysignf128(x: f128, y: f128) -> f128; diff --git a/library/compiler-builtins/libm/src/math/generic/ceil.rs b/library/compiler-builtins/libm/src/math/generic/ceil.rs index 34261faf782f..971a4d3d8c5f 100644 --- a/library/compiler-builtins/libm/src/math/generic/ceil.rs +++ b/library/compiler-builtins/libm/src/math/generic/ceil.rs @@ -31,24 +31,28 @@ pub fn ceil(x: F) -> F { // Otherwise, raise an inexact exception. force_eval!(x + F::MAX); + if x.is_sign_positive() { ix += m; } + ix &= !m; + F::from_bits(ix) } else { // |x| < 1.0, raise an inexact exception since truncation will happen (unless x == 0). force_eval!(x + F::MAX); if x.is_sign_negative() { // -1.0 < x <= -0.0; rounding up goes toward -0.0. - return F::NEG_ZERO; + F::NEG_ZERO } else if ix << 1 != zero { // 0.0 < x < 1.0; rounding up goes toward +1.0. - return F::ONE; + F::ONE + } else { + // +0.0 remains unchanged + x } } - - F::from_bits(ix) } #[cfg(test)]