Add a generic version of floor
Additionally, make use of this version to implement `floor` and
`floorf`.
Similar to `ceil`, musl'f `ceilf` routine seems to work better for all
float widths than the `ceil` algorithm. Trying with the `ceil` (`f64`)
algorithm produced the following regressions:
icount::icount_bench_floor_group::icount_bench_floor logspace:setup_floor()
Performance has regressed: Instructions (14064 > 13171) regressed by +6.78005% (>+5.00000)
Baselines: softfloat|softfloat
Instructions: 14064|13171 (+6.78005%) [+1.06780x]
L1 Hits: 16821|15802 (+6.44855%) [+1.06449x]
L2 Hits: 0|0 (No change)
RAM Hits: 8|9 (-11.1111%) [-1.12500x]
Total read+write: 16829|15811 (+6.43856%) [+1.06439x]
Estimated Cycles: 17101|16117 (+6.10535%) [+1.06105x]
icount::icount_bench_floorf128_group::icount_bench_floorf128 logspace:setup_floorf128()
Baselines: softfloat|softfloat
Instructions: 166868|N/A (*********)
L1 Hits: 221429|N/A (*********)
L2 Hits: 1|N/A (*********)
RAM Hits: 34|N/A (*********)
Total read+write: 221464|N/A (*********)
Estimated Cycles: 222624|N/A (*********)
icount::icount_bench_floorf16_group::icount_bench_floorf16 logspace:setup_floorf16()
Baselines: softfloat|softfloat
Instructions: 143029|N/A (*********)
L1 Hits: 176517|N/A (*********)
L2 Hits: 1|N/A (*********)
RAM Hits: 13|N/A (*********)
Total read+write: 176531|N/A (*********)
Estimated Cycles: 176977|N/A (*********)
icount::icount_bench_floorf_group::icount_bench_floorf logspace:setup_floorf()
Performance has regressed: Instructions (14732 > 10441) regressed by +41.0976% (>+5.00000)
Baselines: softfloat|softfloat
Instructions: 14732|10441 (+41.0976%) [+1.41098x]
L1 Hits: 17616|13027 (+35.2268%) [+1.35227x]
L2 Hits: 0|0 (No change)
RAM Hits: 8|6 (+33.3333%) [+1.33333x]
Total read+write: 17624|13033 (+35.2260%) [+1.35226x]
Estimated Cycles: 17896|13237 (+35.1968%) [+1.35197x]
This commit is contained in:
parent
7a357b96c0
commit
42fce292ab
5 changed files with 114 additions and 93 deletions
|
|
@ -336,14 +336,16 @@
|
|||
"src/libm_helper.rs",
|
||||
"src/math/arch/i586.rs",
|
||||
"src/math/arch/wasm32.rs",
|
||||
"src/math/floor.rs"
|
||||
"src/math/floor.rs",
|
||||
"src/math/generic/floor.rs"
|
||||
],
|
||||
"type": "f64"
|
||||
},
|
||||
"floorf": {
|
||||
"sources": [
|
||||
"src/math/arch/wasm32.rs",
|
||||
"src/math/floorf.rs"
|
||||
"src/math/floorf.rs",
|
||||
"src/math/generic/floor.rs"
|
||||
],
|
||||
"type": "f32"
|
||||
},
|
||||
|
|
|
|||
|
|
@ -1,8 +1,3 @@
|
|||
#![allow(unreachable_code)]
|
||||
use core::f64;
|
||||
|
||||
const TOINT: f64 = 1. / f64::EPSILON;
|
||||
|
||||
/// Floor (f64)
|
||||
///
|
||||
/// Finds the nearest integer less than or equal to `x`.
|
||||
|
|
@ -15,39 +10,5 @@ pub fn floor(x: f64) -> f64 {
|
|||
args: x,
|
||||
}
|
||||
|
||||
let ui = x.to_bits();
|
||||
let e = ((ui >> 52) & 0x7ff) as i32;
|
||||
|
||||
if (e >= 0x3ff + 52) || (x == 0.) {
|
||||
return x;
|
||||
}
|
||||
/* y = int(x) - x, where int(x) is an integer neighbor of x */
|
||||
let y = if (ui >> 63) != 0 { x - TOINT + TOINT - x } else { x + TOINT - TOINT - x };
|
||||
/* special case because of non-nearest rounding modes */
|
||||
if e < 0x3ff {
|
||||
force_eval!(y);
|
||||
return if (ui >> 63) != 0 { -1. } else { 0. };
|
||||
}
|
||||
if y > 0. { x + y - 1. } else { x + y }
|
||||
}
|
||||
|
||||
#[cfg(test)]
|
||||
mod tests {
|
||||
use super::*;
|
||||
|
||||
#[test]
|
||||
fn sanity_check() {
|
||||
assert_eq!(floor(1.1), 1.0);
|
||||
assert_eq!(floor(2.9), 2.0);
|
||||
}
|
||||
|
||||
/// The spec: https://en.cppreference.com/w/cpp/numeric/math/floor
|
||||
#[test]
|
||||
fn spec_tests() {
|
||||
// Not Asserted: that the current rounding mode has no effect.
|
||||
assert!(floor(f64::NAN).is_nan());
|
||||
for f in [0.0, -0.0, f64::INFINITY, f64::NEG_INFINITY].iter().copied() {
|
||||
assert_eq!(floor(f), f);
|
||||
}
|
||||
}
|
||||
return super::generic::floor(x);
|
||||
}
|
||||
|
|
|
|||
|
|
@ -1,5 +1,3 @@
|
|||
use core::f32;
|
||||
|
||||
/// Floor (f32)
|
||||
///
|
||||
/// Finds the nearest integer less than or equal to `x`.
|
||||
|
|
@ -11,53 +9,5 @@ pub fn floorf(x: f32) -> f32 {
|
|||
args: x,
|
||||
}
|
||||
|
||||
let mut ui = x.to_bits();
|
||||
let e = (((ui >> 23) as i32) & 0xff) - 0x7f;
|
||||
|
||||
if e >= 23 {
|
||||
return x;
|
||||
}
|
||||
if e >= 0 {
|
||||
let m: u32 = 0x007fffff >> e;
|
||||
if (ui & m) == 0 {
|
||||
return x;
|
||||
}
|
||||
force_eval!(x + f32::from_bits(0x7b800000));
|
||||
if ui >> 31 != 0 {
|
||||
ui += m;
|
||||
}
|
||||
ui &= !m;
|
||||
} else {
|
||||
force_eval!(x + f32::from_bits(0x7b800000));
|
||||
if ui >> 31 == 0 {
|
||||
ui = 0;
|
||||
} else if ui << 1 != 0 {
|
||||
return -1.0;
|
||||
}
|
||||
}
|
||||
f32::from_bits(ui)
|
||||
}
|
||||
|
||||
// PowerPC tests are failing on LLVM 13: https://github.com/rust-lang/rust/issues/88520
|
||||
#[cfg(not(target_arch = "powerpc64"))]
|
||||
#[cfg(test)]
|
||||
mod tests {
|
||||
use super::*;
|
||||
|
||||
#[test]
|
||||
fn sanity_check() {
|
||||
assert_eq!(floorf(0.5), 0.0);
|
||||
assert_eq!(floorf(1.1), 1.0);
|
||||
assert_eq!(floorf(2.9), 2.0);
|
||||
}
|
||||
|
||||
/// The spec: https://en.cppreference.com/w/cpp/numeric/math/floor
|
||||
#[test]
|
||||
fn spec_tests() {
|
||||
// Not Asserted: that the current rounding mode has no effect.
|
||||
assert!(floorf(f32::NAN).is_nan());
|
||||
for f in [0.0, -0.0, f32::INFINITY, f32::NEG_INFINITY].iter().copied() {
|
||||
assert_eq!(floorf(f), f);
|
||||
}
|
||||
}
|
||||
return super::generic::floor(x);
|
||||
}
|
||||
|
|
|
|||
106
library/compiler-builtins/libm/src/math/generic/floor.rs
Normal file
106
library/compiler-builtins/libm/src/math/generic/floor.rs
Normal file
|
|
@ -0,0 +1,106 @@
|
|||
/* SPDX-License-Identifier: MIT
|
||||
* origin: musl src/math/floor.c */
|
||||
|
||||
//! Generic `floor` algorithm.
|
||||
//!
|
||||
//! Note that this uses the algorithm from musl's `floorf` rather than `floor` or `floorl` because
|
||||
//! performance seems to be better (based on icount) and it does not seem to experience rounding
|
||||
//! errors on i386.
|
||||
|
||||
use super::super::{Float, Int, IntTy, MinInt};
|
||||
|
||||
pub fn floor<F: Float>(x: F) -> F {
|
||||
let zero = IntTy::<F>::ZERO;
|
||||
|
||||
let mut ix = x.to_bits();
|
||||
let e = x.exp_unbiased();
|
||||
|
||||
// If the represented value has no fractional part, no truncation is needed.
|
||||
if e >= F::SIG_BITS as i32 {
|
||||
return x;
|
||||
}
|
||||
|
||||
if e >= 0 {
|
||||
// |x| >= 1.0
|
||||
|
||||
let m = F::SIG_MASK >> e.unsigned();
|
||||
if ix & m == zero {
|
||||
// Portion to be masked is already zero; no adjustment needed.
|
||||
return x;
|
||||
}
|
||||
|
||||
// Otherwise, raise an inexact exception.
|
||||
force_eval!(x + F::MAX);
|
||||
|
||||
if x.is_sign_negative() {
|
||||
ix += m;
|
||||
}
|
||||
|
||||
ix &= !m;
|
||||
F::from_bits(ix)
|
||||
} else {
|
||||
// |x| < 1.0, raise an inexact exception since truncation will happen (unless x == 0).
|
||||
force_eval!(x + F::MAX);
|
||||
|
||||
if x.is_sign_positive() {
|
||||
// 0.0 <= x < 1.0; rounding down goes toward +0.0.
|
||||
F::ZERO
|
||||
} else if ix << 1 != zero {
|
||||
// -1.0 < x < 0.0; rounding down goes toward -1.0.
|
||||
F::NEG_ONE
|
||||
} else {
|
||||
// -0.0 remains unchanged
|
||||
x
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
#[cfg(test)]
|
||||
mod tests {
|
||||
use super::*;
|
||||
|
||||
/// Test against https://en.cppreference.com/w/cpp/numeric/math/floor
|
||||
fn spec_test<F: Float>() {
|
||||
// Not Asserted: that the current rounding mode has no effect.
|
||||
for f in [F::ZERO, F::NEG_ZERO, F::INFINITY, F::NEG_INFINITY].iter().copied() {
|
||||
assert_biteq!(floor(f), f);
|
||||
}
|
||||
}
|
||||
|
||||
/* Skipping f16 / f128 "sanity_check"s due to rejected literal lexing at MSRV */
|
||||
|
||||
#[test]
|
||||
#[cfg(f16_enabled)]
|
||||
fn spec_tests_f16() {
|
||||
spec_test::<f16>();
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn sanity_check_f32() {
|
||||
assert_eq!(floor(0.5f32), 0.0);
|
||||
assert_eq!(floor(1.1f32), 1.0);
|
||||
assert_eq!(floor(2.9f32), 2.0);
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn spec_tests_f32() {
|
||||
spec_test::<f32>();
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn sanity_check_f64() {
|
||||
assert_eq!(floor(1.1f64), 1.0);
|
||||
assert_eq!(floor(2.9f64), 2.0);
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn spec_tests_f64() {
|
||||
spec_test::<f64>();
|
||||
}
|
||||
|
||||
#[test]
|
||||
#[cfg(f128_enabled)]
|
||||
fn spec_tests_f128() {
|
||||
spec_test::<f128>();
|
||||
}
|
||||
}
|
||||
|
|
@ -2,6 +2,7 @@ mod ceil;
|
|||
mod copysign;
|
||||
mod fabs;
|
||||
mod fdim;
|
||||
mod floor;
|
||||
mod sqrt;
|
||||
mod trunc;
|
||||
|
||||
|
|
@ -9,5 +10,6 @@ pub use ceil::ceil;
|
|||
pub use copysign::copysign;
|
||||
pub use fabs::fabs;
|
||||
pub use fdim::fdim;
|
||||
pub use floor::floor;
|
||||
pub use sqrt::sqrt;
|
||||
pub use trunc::trunc;
|
||||
|
|
|
|||
Loading…
Add table
Add a link
Reference in a new issue