Merge pull request rust-lang/libm#510 from tgross35/replace-fenv

Migrate away from nonfunctional `fenv` stubs
This commit is contained in:
Trevor Gross 2025-02-10 07:25:58 -06:00 committed by GitHub
commit 8e7c83ea2d
12 changed files with 470 additions and 178 deletions

View file

@ -32,6 +32,7 @@ impl Float for f8 {
const INFINITY: Self = Self(0b0_1111_000);
const NEG_INFINITY: Self = Self(0b1_1111_000);
const NAN: Self = Self(0b0_1111_100);
const MIN_POSITIVE_NORMAL: Self = Self(1 << Self::SIG_BITS);
// FIXME: incorrect values
const EPSILON: Self = Self::ZERO;
const PI: Self = Self::ZERO;

View file

@ -5,12 +5,15 @@
*/
use super::Float;
use super::fenv::Rounding;
use super::support::cold_path;
use super::support::{FpResult, Round, cold_path};
/// Compute the cube root of the argument.
#[cfg_attr(all(test, assert_no_panic), no_panic::no_panic)]
pub fn cbrt(x: f64) -> f64 {
cbrt_round(x, Round::Nearest).val
}
pub fn cbrt_round(x: f64, round: Round) -> FpResult<f64> {
const ESCALE: [f64; 3] = [
1.0,
hf64!("0x1.428a2f98d728bp+0"), /* 2^(1/3) */
@ -33,8 +36,6 @@ pub fn cbrt(x: f64) -> f64 {
let off = [hf64!("0x1p-53"), 0.0, 0.0, 0.0];
let rm = Rounding::get();
/* rm=0 for rounding to nearest, and other values for directed roundings */
let hx: u64 = x.to_bits();
let mut mant: u64 = hx & f64::SIG_MASK;
@ -51,7 +52,7 @@ pub fn cbrt(x: f64) -> f64 {
to that for x a signaling NaN, it correctly triggers
the invalid exception. */
if e == f64::EXP_SAT || ix == 0 {
return x + x;
return FpResult::ok(x + x);
}
let nz = ix.leading_zeros() - 11; /* subnormal */
@ -124,8 +125,8 @@ pub fn cbrt(x: f64) -> f64 {
* from ulp(1);
* for rounding to nearest, ady0 is tiny when dy is near from 1/2 ulp(1),
* or from 3/2 ulp(1). */
let mut ady0: f64 = (ady - off[rm as usize]).abs();
let mut ady1: f64 = (ady - (hf64!("0x1p-52") + off[rm as usize])).abs();
let mut ady0: f64 = (ady - off[round as usize]).abs();
let mut ady1: f64 = (ady - (hf64!("0x1p-52") + off[round as usize])).abs();
if ady0 < hf64!("0x1p-75") || ady1 < hf64!("0x1p-75") {
cold_path();
@ -140,8 +141,8 @@ pub fn cbrt(x: f64) -> f64 {
dy = (y1 - y) - dy;
y1 = y;
ady = dy.abs();
ady0 = (ady - off[rm as usize]).abs();
ady1 = (ady - (hf64!("0x1p-52") + off[rm as usize])).abs();
ady0 = (ady - off[round as usize]).abs();
ady1 = (ady - (hf64!("0x1p-52") + off[round as usize])).abs();
if ady0 < hf64!("0x1p-98") || ady1 < hf64!("0x1p-98") {
cold_path();
@ -157,7 +158,7 @@ pub fn cbrt(x: f64) -> f64 {
y1 = hf64!("0x1.de87aa837820fp+0").copysign(zz);
}
if rm != Rounding::Nearest {
if round != Round::Nearest {
let wlist = [
(hf64!("0x1.3a9ccd7f022dbp+0"), hf64!("0x1.1236160ba9b93p+0")), // ~ 0x1.1236160ba9b930000000000001e7e8fap+0
(hf64!("0x1.7845d2faac6fep+0"), hf64!("0x1.23115e657e49cp+0")), // ~ 0x1.23115e657e49c0000000000001d7a799p+0
@ -170,7 +171,7 @@ pub fn cbrt(x: f64) -> f64 {
for (a, b) in wlist {
if azz == a {
let tmp = if rm as u64 + sign == 2 { hf64!("0x1p-52") } else { 0.0 };
let tmp = if round as u64 + sign == 2 { hf64!("0x1p-52") } else { 0.0 };
y1 = (b + tmp).copysign(zz);
}
}
@ -194,7 +195,7 @@ pub fn cbrt(x: f64) -> f64 {
}
}
f64::from_bits(cvt3)
FpResult::ok(f64::from_bits(cvt3))
}
fn fmaf64(x: f64, y: f64, z: f64) -> f64 {

View file

@ -1,49 +0,0 @@
// src: musl/src/fenv/fenv.c
/* Dummy functions for archs lacking fenv implementation */
pub(crate) const FE_UNDERFLOW: i32 = 0;
pub(crate) const FE_INEXACT: i32 = 0;
pub(crate) const FE_TONEAREST: i32 = 0;
pub(crate) const FE_DOWNWARD: i32 = 1;
pub(crate) const FE_UPWARD: i32 = 2;
pub(crate) const FE_TOWARDZERO: i32 = 3;
#[inline]
pub(crate) fn feclearexcept(_mask: i32) -> i32 {
0
}
#[inline]
pub(crate) fn feraiseexcept(_mask: i32) -> i32 {
0
}
#[inline]
pub(crate) fn fetestexcept(_mask: i32) -> i32 {
0
}
#[inline]
pub(crate) fn fegetround() -> i32 {
FE_TONEAREST
}
#[derive(Clone, Copy, Debug, PartialEq)]
pub(crate) enum Rounding {
Nearest = FE_TONEAREST as isize,
Downward = FE_DOWNWARD as isize,
Upward = FE_UPWARD as isize,
ToZero = FE_TOWARDZERO as isize,
}
impl Rounding {
pub(crate) fn get() -> Self {
match fegetround() {
x if x == FE_DOWNWARD => Self::Downward,
x if x == FE_UPWARD => Self::Upward,
x if x == FE_TOWARDZERO => Self::ToZero,
_ => Self::Nearest,
}
}
}

View file

@ -7,9 +7,14 @@
//! performance seems to be better (based on icount) and it does not seem to experience rounding
//! errors on i386.
use super::super::support::{FpResult, Status};
use super::super::{Float, Int, IntTy, MinInt};
pub fn ceil<F: Float>(x: F) -> F {
ceil_status(x).val
}
pub fn ceil_status<F: Float>(x: F) -> FpResult<F> {
let zero = IntTy::<F>::ZERO;
let mut ix = x.to_bits();
@ -17,20 +22,20 @@ pub fn ceil<F: Float>(x: F) -> F {
// If the represented value has no fractional part, no truncation is needed.
if e >= F::SIG_BITS as i32 {
return x;
return FpResult::ok(x);
}
if e >= 0 {
let status;
let res = if e >= 0 {
// |x| >= 1.0
let m = F::SIG_MASK >> e.unsigned();
if (ix & m) == zero {
// Portion to be masked is already zero; no adjustment needed.
return x;
return FpResult::ok(x);
}
// Otherwise, raise an inexact exception.
force_eval!(x + F::MAX);
status = Status::INEXACT;
if x.is_sign_positive() {
ix += m;
@ -40,7 +45,11 @@ pub fn ceil<F: Float>(x: F) -> F {
F::from_bits(ix)
} else {
// |x| < 1.0, raise an inexact exception since truncation will happen (unless x == 0).
force_eval!(x + F::MAX);
if ix & F::SIG_MASK == F::Int::ZERO {
status = Status::OK;
} else {
status = Status::INEXACT;
}
if x.is_sign_negative() {
// -1.0 < x <= -0.0; rounding up goes toward -0.0.
@ -52,18 +61,30 @@ pub fn ceil<F: Float>(x: F) -> F {
// +0.0 remains unchanged
x
}
}
};
FpResult::new(res, status)
}
#[cfg(test)]
mod tests {
use super::*;
use crate::support::Hexf;
/// Test against https://en.cppreference.com/w/cpp/numeric/math/ceil
fn spec_test<F: Float>() {
// Not Asserted: that the current rounding mode has no effect.
for f in [F::ZERO, F::NEG_ZERO, F::INFINITY, F::NEG_INFINITY].iter().copied() {
assert_biteq!(ceil(f), f);
fn spec_test<F: Float>(cases: &[(F, F, Status)]) {
let roundtrip = [F::ZERO, F::ONE, F::NEG_ONE, F::NEG_ZERO, F::INFINITY, F::NEG_INFINITY];
for x in roundtrip {
let FpResult { val, status } = ceil_status(x);
assert_biteq!(val, x, "{}", Hexf(x));
assert_eq!(status, Status::OK, "{}", Hexf(x));
}
for &(x, res, res_stat) in cases {
let FpResult { val, status } = ceil_status(x);
assert_biteq!(val, res, "{}", Hexf(x));
assert_eq!(status, res_stat, "{}", Hexf(x));
}
}
@ -72,7 +93,17 @@ mod tests {
#[test]
#[cfg(f16_enabled)]
fn spec_tests_f16() {
spec_test::<f16>();
let cases = [
(0.1, 1.0, Status::INEXACT),
(-0.1, -0.0, Status::INEXACT),
(0.9, 1.0, Status::INEXACT),
(-0.9, -0.0, Status::INEXACT),
(1.1, 2.0, Status::INEXACT),
(-1.1, -1.0, Status::INEXACT),
(1.9, 2.0, Status::INEXACT),
(-1.9, -1.0, Status::INEXACT),
];
spec_test::<f16>(&cases);
}
#[test]
@ -83,7 +114,17 @@ mod tests {
#[test]
fn spec_tests_f32() {
spec_test::<f32>();
let cases = [
(0.1, 1.0, Status::INEXACT),
(-0.1, -0.0, Status::INEXACT),
(0.9, 1.0, Status::INEXACT),
(-0.9, -0.0, Status::INEXACT),
(1.1, 2.0, Status::INEXACT),
(-1.1, -1.0, Status::INEXACT),
(1.9, 2.0, Status::INEXACT),
(-1.9, -1.0, Status::INEXACT),
];
spec_test::<f32>(&cases);
}
#[test]
@ -94,12 +135,32 @@ mod tests {
#[test]
fn spec_tests_f64() {
spec_test::<f64>();
let cases = [
(0.1, 1.0, Status::INEXACT),
(-0.1, -0.0, Status::INEXACT),
(0.9, 1.0, Status::INEXACT),
(-0.9, -0.0, Status::INEXACT),
(1.1, 2.0, Status::INEXACT),
(-1.1, -1.0, Status::INEXACT),
(1.9, 2.0, Status::INEXACT),
(-1.9, -1.0, Status::INEXACT),
];
spec_test::<f64>(&cases);
}
#[test]
#[cfg(f128_enabled)]
fn spec_tests_f128() {
spec_test::<f128>();
let cases = [
(0.1, 1.0, Status::INEXACT),
(-0.1, -0.0, Status::INEXACT),
(0.9, 1.0, Status::INEXACT),
(-0.9, -0.0, Status::INEXACT),
(1.1, 2.0, Status::INEXACT),
(-1.1, -1.0, Status::INEXACT),
(1.9, 2.0, Status::INEXACT),
(-1.9, -1.0, Status::INEXACT),
];
spec_test::<f128>(&cases);
}
}

View file

@ -7,9 +7,14 @@
//! performance seems to be better (based on icount) and it does not seem to experience rounding
//! errors on i386.
use super::super::support::{FpResult, Status};
use super::super::{Float, Int, IntTy, MinInt};
pub fn floor<F: Float>(x: F) -> F {
floor_status(x).val
}
pub fn floor_status<F: Float>(x: F) -> FpResult<F> {
let zero = IntTy::<F>::ZERO;
let mut ix = x.to_bits();
@ -17,20 +22,20 @@ pub fn floor<F: Float>(x: F) -> F {
// If the represented value has no fractional part, no truncation is needed.
if e >= F::SIG_BITS as i32 {
return x;
return FpResult::ok(x);
}
if e >= 0 {
let status;
let res = if e >= 0 {
// |x| >= 1.0
let m = F::SIG_MASK >> e.unsigned();
if ix & m == zero {
// Portion to be masked is already zero; no adjustment needed.
return x;
return FpResult::ok(x);
}
// Otherwise, raise an inexact exception.
force_eval!(x + F::MAX);
status = Status::INEXACT;
if x.is_sign_negative() {
ix += m;
@ -39,8 +44,12 @@ pub fn floor<F: Float>(x: F) -> F {
ix &= !m;
F::from_bits(ix)
} else {
// |x| < 1.0, raise an inexact exception since truncation will happen (unless x == 0).
force_eval!(x + F::MAX);
// |x| < 1.0, raise an inexact exception since truncation will happen.
if ix & F::SIG_MASK == F::Int::ZERO {
status = Status::OK;
} else {
status = Status::INEXACT;
}
if x.is_sign_positive() {
// 0.0 <= x < 1.0; rounding down goes toward +0.0.
@ -52,27 +61,40 @@ pub fn floor<F: Float>(x: F) -> F {
// -0.0 remains unchanged
x
}
}
};
FpResult::new(res, status)
}
#[cfg(test)]
mod tests {
use super::*;
use crate::support::Hexf;
/// Test against https://en.cppreference.com/w/cpp/numeric/math/floor
fn spec_test<F: Float>() {
// Not Asserted: that the current rounding mode has no effect.
for f in [F::ZERO, F::NEG_ZERO, F::INFINITY, F::NEG_INFINITY].iter().copied() {
assert_biteq!(floor(f), f);
fn spec_test<F: Float>(cases: &[(F, F, Status)]) {
let roundtrip = [F::ZERO, F::ONE, F::NEG_ONE, F::NEG_ZERO, F::INFINITY, F::NEG_INFINITY];
for x in roundtrip {
let FpResult { val, status } = floor_status(x);
assert_biteq!(val, x, "{}", Hexf(x));
assert_eq!(status, Status::OK, "{}", Hexf(x));
}
for &(x, res, res_stat) in cases {
let FpResult { val, status } = floor_status(x);
assert_biteq!(val, res, "{}", Hexf(x));
assert_eq!(status, res_stat, "{}", Hexf(x));
}
}
/* Skipping f16 / f128 "sanity_check"s due to rejected literal lexing at MSRV */
/* Skipping f16 / f128 "sanity_check"s and spec cases due to rejected literal lexing at MSRV */
#[test]
#[cfg(f16_enabled)]
fn spec_tests_f16() {
spec_test::<f16>();
let cases = [];
spec_test::<f16>(&cases);
}
#[test]
@ -84,7 +106,17 @@ mod tests {
#[test]
fn spec_tests_f32() {
spec_test::<f32>();
let cases = [
(0.1, 0.0, Status::INEXACT),
(-0.1, -1.0, Status::INEXACT),
(0.9, 0.0, Status::INEXACT),
(-0.9, -1.0, Status::INEXACT),
(1.1, 1.0, Status::INEXACT),
(-1.1, -2.0, Status::INEXACT),
(1.9, 1.0, Status::INEXACT),
(-1.9, -2.0, Status::INEXACT),
];
spec_test::<f32>(&cases);
}
#[test]
@ -95,12 +127,23 @@ mod tests {
#[test]
fn spec_tests_f64() {
spec_test::<f64>();
let cases = [
(0.1, 0.0, Status::INEXACT),
(-0.1, -1.0, Status::INEXACT),
(0.9, 0.0, Status::INEXACT),
(-0.9, -1.0, Status::INEXACT),
(1.1, 1.0, Status::INEXACT),
(-1.1, -2.0, Status::INEXACT),
(1.9, 1.0, Status::INEXACT),
(-1.9, -2.0, Status::INEXACT),
];
spec_test::<f64>(&cases);
}
#[test]
#[cfg(f128_enabled)]
fn spec_tests_f128() {
spec_test::<f128>();
let cases = [];
spec_test::<f128>(&cases);
}
}

View file

@ -1,12 +1,7 @@
/* SPDX-License-Identifier: MIT */
/* origin: musl src/math/{fma,fmaf}.c. Ported to generic Rust algorithm in 2025, TG. */
use core::{f32, f64};
use super::super::fenv::{
FE_INEXACT, FE_TONEAREST, FE_UNDERFLOW, feclearexcept, fegetround, feraiseexcept, fetestexcept,
};
use super::super::support::{DInt, HInt, IntTy};
use super::super::support::{DInt, FpResult, HInt, IntTy, Round, Status};
use super::super::{CastFrom, CastInto, DFloat, Float, HFloat, Int, MinInt};
/// Fused multiply-add that works when there is not a larger float size available. Currently this
@ -14,7 +9,18 @@ use super::super::{CastFrom, CastInto, DFloat, Float, HFloat, Int, MinInt};
#[cfg_attr(all(test, assert_no_panic), no_panic::no_panic)]
pub fn fma<F>(x: F, y: F, z: F) -> F
where
F: Float + FmaHelper,
F: Float,
F: CastFrom<F::SignedInt>,
F: CastFrom<i8>,
F::Int: HInt,
u32: CastInto<F::Int>,
{
fma_round(x, y, z, Round::Nearest).val
}
pub fn fma_round<F>(x: F, y: F, z: F, _round: Round) -> FpResult<F>
where
F: Float,
F: CastFrom<F::SignedInt>,
F: CastFrom<i8>,
F::Int: HInt,
@ -30,16 +36,16 @@ where
if nx.is_zero_nan_inf() || ny.is_zero_nan_inf() {
// Value will overflow, defer to non-fused operations.
return x * y + z;
return FpResult::ok(x * y + z);
}
if nz.is_zero_nan_inf() {
if nz.is_zero() {
// Empty add component means we only need to multiply.
return x * y;
return FpResult::ok(x * y);
}
// `z` is NaN or infinity, which sets the result.
return z;
return FpResult::ok(z);
}
// multiply: r = x * y
@ -147,7 +153,7 @@ where
}
} else {
// exact +/- 0.0
return x * y + z;
return FpResult::ok(x * y + z);
}
e -= d;
@ -168,6 +174,8 @@ where
// Unbiased exponent for the maximum value of `r`
let max_pow = F::BITS - 1 + F::EXP_BIAS;
let mut status = Status::OK;
if e < -(max_pow as i32 - 2) {
// Result is subnormal before rounding
if e == -(max_pow as i32 - 1) {
@ -178,7 +186,9 @@ where
if r == c {
// Min normal after rounding,
return r.raise_underflow_as_min_positive();
status.set_underflow(true);
r = F::MIN_POSITIVE_NORMAL.copysign(r);
return FpResult::new(r, status);
}
if (rhi << (F::SIG_BITS + 1)) != zero {
@ -195,7 +205,7 @@ where
// Remove the top bit
r = F::cast_from(2i8) * r - c;
r += r.raise_underflow_ret_zero();
status.set_underflow(true);
}
} else {
// Only round once when scaled
@ -212,12 +222,22 @@ where
}
// Use our exponent to scale the final value.
super::scalbn(r, e)
FpResult::new(super::scalbn(r, e), status)
}
/// Fma implementation when a hardware-backed larger float type is available. For `f32` and `f64`,
/// `f64` has enough precision to represent the `f32` in its entirety, except for double rounding.
pub fn fma_wide<F, B>(x: F, y: F, z: F) -> F
where
F: Float + HFloat<D = B>,
B: Float + DFloat<H = F>,
B::Int: CastInto<i32>,
i32: CastFrom<i32>,
{
fma_wide_round(x, y, z, Round::Nearest).val
}
pub fn fma_wide_round<F, B>(x: F, y: F, z: F, round: Round) -> FpResult<F>
where
F: Float + HFloat<D = B>,
B: Float + DFloat<H = F>,
@ -244,24 +264,26 @@ where
// Or the result is exact
|| (result - xy == zb && result - zb == xy)
// Or the mode is something other than round to nearest
|| fegetround() != FE_TONEAREST
|| round != Round::Nearest
{
let min_inexact_exp = (B::EXP_BIAS as i32 + F::EXP_MIN_SUBNORM) as u32;
let max_inexact_exp = (B::EXP_BIAS as i32 + F::EXP_MIN) as u32;
if (min_inexact_exp..max_inexact_exp).contains(&re) && fetestexcept(FE_INEXACT) != 0 {
feclearexcept(FE_INEXACT);
// prevent `xy + vz` from being CSE'd with `xy + z` above
let vz: F = force_eval!(z);
result = xy + vz.widen();
if fetestexcept(FE_INEXACT) != 0 {
feraiseexcept(FE_UNDERFLOW);
let mut status = Status::OK;
if (min_inexact_exp..max_inexact_exp).contains(&re) && status.inexact() {
// This branch is never hit; requires previous operations to set a status
status.set_inexact(false);
result = xy + z.widen();
if status.inexact() {
status.set_underflow(true);
} else {
feraiseexcept(FE_INEXACT);
status.set_inexact(true);
}
}
return result.narrow();
return FpResult { val: result.narrow(), status };
}
let neg = ui >> (B::BITS - 1) != IntTy::<B>::ZERO;
@ -272,7 +294,7 @@ where
ui -= one;
}
B::from_bits(ui).narrow()
FpResult::ok(B::from_bits(ui).narrow())
}
/// Representation of `F` that has handled subnormals.
@ -337,49 +359,13 @@ impl<F: Float> Norm<F> {
}
}
/// Type-specific helpers that are not needed outside of fma.
pub trait FmaHelper {
/// Raise underflow and return the minimum positive normal value with the sign of `self`.
fn raise_underflow_as_min_positive(self) -> Self;
/// Raise underflow and return zero.
fn raise_underflow_ret_zero(self) -> Self;
}
impl FmaHelper for f64 {
fn raise_underflow_as_min_positive(self) -> Self {
/* min normal after rounding, underflow depends
* on arch behaviour which can be imitated by
* a double to float conversion */
let fltmin: f32 = (hf64!("0x0.ffffff8p-63") * f32::MIN_POSITIVE as f64 * self) as f32;
f64::MIN_POSITIVE / f32::MIN_POSITIVE as f64 * fltmin as f64
}
fn raise_underflow_ret_zero(self) -> Self {
/* raise underflow portably, such that it
* cannot be optimized away */
let tiny: f64 = f64::MIN_POSITIVE / f32::MIN_POSITIVE as f64 * self;
(tiny * tiny) * (self - self)
}
}
#[cfg(f128_enabled)]
impl FmaHelper for f128 {
fn raise_underflow_as_min_positive(self) -> Self {
f128::MIN_POSITIVE.copysign(self)
}
fn raise_underflow_ret_zero(self) -> Self {
f128::ZERO
}
}
#[cfg(test)]
mod tests {
use super::*;
fn spec_test<F>()
where
F: Float + FmaHelper,
F: Float,
F: CastFrom<F::SignedInt>,
F: CastFrom<i8>,
F::Int: HInt,
@ -401,6 +387,29 @@ mod tests {
#[test]
fn spec_test_f64() {
spec_test::<f64>();
let expect_underflow = [
(
hf64!("0x1.0p-1070"),
hf64!("0x1.0p-1070"),
hf64!("0x1.ffffffffffffp-1023"),
hf64!("0x0.ffffffffffff8p-1022"),
),
(
// FIXME: we raise underflow but this should only be inexact (based on C and
// `rustc_apfloat`).
hf64!("0x1.0p-1070"),
hf64!("0x1.0p-1070"),
hf64!("-0x1.0p-1022"),
hf64!("-0x1.0p-1022"),
),
];
for (x, y, z, res) in expect_underflow {
let FpResult { val, status } = fma_round(x, y, z, Round::Nearest);
assert_biteq!(val, res);
assert_eq!(status, Status::UNDERFLOW);
}
}
#[test]

View file

@ -41,10 +41,23 @@
//! Goldschmidt has the advantage over Newton-Raphson that `sqrt(x)` and `1/sqrt(x)` are
//! computed at the same time, i.e. there is no need to calculate `1/sqrt(x)` and invert it.
use super::super::support::{IntTy, cold_path, raise_invalid};
use super::super::support::{FpResult, IntTy, Round, Status, cold_path};
use super::super::{CastFrom, CastInto, DInt, Float, HInt, Int, MinInt};
pub fn sqrt<F>(x: F) -> F
where
F: Float + SqrtHelper,
F::Int: HInt,
F::Int: From<u8>,
F::Int: From<F::ISet2>,
F::Int: CastInto<F::ISet1>,
F::Int: CastInto<F::ISet2>,
u32: CastInto<F::Int>,
{
sqrt_round(x, Round::Nearest).val
}
pub fn sqrt_round<F>(x: F, _round: Round) -> FpResult<F>
where
F: Float + SqrtHelper,
F::Int: HInt,
@ -78,17 +91,17 @@ where
// +/-0
if ix << 1 == zero {
return x;
return FpResult::ok(x);
}
// Positive infinity
if ix == F::EXP_MASK {
return x;
return FpResult::ok(x);
}
// NaN or negative
if ix > F::EXP_MASK {
return raise_invalid(x);
return FpResult::new(F::NAN, Status::INVALID);
}
// Normalize subnormals by multiplying by 1.0 << SIG_BITS (e.g. 0x1p52 for doubles).
@ -215,7 +228,7 @@ where
y = y + t;
}
y
FpResult::ok(y)
}
/// Multiply at the wider integer size, returning the high half.
@ -329,7 +342,7 @@ impl SqrtHelper for f128 {
/// A U0.16 representation of `1/sqrt(x)`.
///
// / The index is a 7-bit number consisting of a single exponent bit and 6 bits of significand.
/// The index is a 7-bit number consisting of a single exponent bit and 6 bits of significand.
#[rustfmt::skip]
static RSQRT_TAB: [u16; 128] = [
0xb451, 0xb2f0, 0xb196, 0xb044, 0xaef9, 0xadb6, 0xac79, 0xab43,
@ -354,7 +367,7 @@ static RSQRT_TAB: [u16; 128] = [
mod tests {
use super::*;
/// Test against edge cases from https://en.cppreference.com/w/cpp/numeric/math/sqrt
/// Test behavior specified in IEEE 754 `squareRoot`.
fn spec_test<F>()
where
F: Float + SqrtHelper,
@ -365,11 +378,22 @@ mod tests {
F::Int: CastInto<F::ISet2>,
u32: CastInto<F::Int>,
{
// Not Asserted: FE_INVALID exception is raised if argument is negative.
assert!(sqrt(F::NEG_ONE).is_nan());
assert!(sqrt(F::NAN).is_nan());
for f in [F::ZERO, F::NEG_ZERO, F::INFINITY].iter().copied() {
assert_biteq!(sqrt(f), f);
// Values that should return a NaN and raise invalid
let nan = [F::NEG_INFINITY, F::NEG_ONE, F::NAN, F::MIN];
// Values that return unaltered
let roundtrip = [F::ZERO, F::NEG_ZERO, F::INFINITY];
for x in nan {
let FpResult { val, status } = sqrt_round(x, Round::Nearest);
assert!(val.is_nan());
assert!(status == Status::INVALID);
}
for x in roundtrip {
let FpResult { val, status } = sqrt_round(x, Round::Nearest);
assert_biteq!(val, x);
assert!(status == Status::OK);
}
}

View file

@ -1,15 +1,20 @@
/* SPDX-License-Identifier: MIT
* origin: musl src/math/trunc.c */
use super::super::support::{FpResult, Status};
use super::super::{Float, Int, IntTy, MinInt};
pub fn trunc<F: Float>(x: F) -> F {
trunc_status(x).val
}
pub fn trunc_status<F: Float>(x: F) -> FpResult<F> {
let mut xi: F::Int = x.to_bits();
let e: i32 = x.exp_unbiased();
// C1: The represented value has no fractional part, so no truncation is needed
if e >= F::SIG_BITS as i32 {
return x;
return FpResult::ok(x);
}
let mask = if e < 0 {
@ -23,22 +28,68 @@ pub fn trunc<F: Float>(x: F) -> F {
// C4: If the to-be-masked-out portion is already zero, we have an exact result
if (xi & !mask) == IntTy::<F>::ZERO {
return x;
return FpResult::ok(x);
}
// C5: Otherwise the result is inexact and we will truncate. Raise `FE_INEXACT`, mask the
// result, and return.
force_eval!(x + F::MAX);
let status = if xi & F::SIG_MASK == F::Int::ZERO { Status::OK } else { Status::INEXACT };
xi &= mask;
F::from_bits(xi)
FpResult::new(F::from_bits(xi), status)
}
#[cfg(test)]
mod tests {
use super::*;
use crate::support::Hexf;
fn spec_test<F: Float>(cases: &[(F, F, Status)]) {
let roundtrip = [F::ZERO, F::ONE, F::NEG_ONE, F::NEG_ZERO, F::INFINITY, F::NEG_INFINITY];
for x in roundtrip {
let FpResult { val, status } = trunc_status(x);
assert_biteq!(val, x, "{}", Hexf(x));
assert_eq!(status, Status::OK, "{}", Hexf(x));
}
for &(x, res, res_stat) in cases {
let FpResult { val, status } = trunc_status(x);
assert_biteq!(val, res, "{}", Hexf(x));
assert_eq!(status, res_stat, "{}", Hexf(x));
}
}
/* Skipping f16 / f128 "sanity_check"s and spec cases due to rejected literal lexing at MSRV */
#[test]
fn sanity_check() {
#[cfg(f16_enabled)]
fn spec_tests_f16() {
let cases = [];
spec_test::<f16>(&cases);
}
#[test]
fn sanity_check_f32() {
assert_eq!(trunc(0.5f32), 0.0);
assert_eq!(trunc(1.1f32), 1.0);
assert_eq!(trunc(2.9f32), 2.0);
}
#[test]
fn spec_tests_f32() {
let cases = [
(0.1, 0.0, Status::INEXACT),
(-0.1, -0.0, Status::INEXACT),
(0.9, 0.0, Status::INEXACT),
(-0.9, -0.0, Status::INEXACT),
(1.1, 1.0, Status::INEXACT),
(-1.1, -1.0, Status::INEXACT),
(1.9, 1.0, Status::INEXACT),
(-1.9, -1.0, Status::INEXACT),
];
spec_test::<f32>(&cases);
assert_biteq!(trunc(1.1f32), 1.0);
assert_biteq!(trunc(1.1f64), 1.0);
@ -54,4 +105,32 @@ mod tests {
assert_biteq!(trunc(hf32!("-0x1p-1")), -0.0);
assert_biteq!(trunc(hf64!("-0x1p-1")), -0.0);
}
#[test]
fn sanity_check_f64() {
assert_eq!(trunc(1.1f64), 1.0);
assert_eq!(trunc(2.9f64), 2.0);
}
#[test]
fn spec_tests_f64() {
let cases = [
(0.1, 0.0, Status::INEXACT),
(-0.1, -0.0, Status::INEXACT),
(0.9, 0.0, Status::INEXACT),
(-0.9, -0.0, Status::INEXACT),
(1.1, 1.0, Status::INEXACT),
(-1.1, -1.0, Status::INEXACT),
(1.9, 1.0, Status::INEXACT),
(-1.9, -1.0, Status::INEXACT),
];
spec_test::<f64>(&cases);
}
#[test]
#[cfg(f128_enabled)]
fn spec_tests_f128() {
let cases = [];
spec_test::<f128>(&cases);
}
}

View file

@ -94,7 +94,6 @@ cfg_if! {
// Private modules
mod arch;
mod expo2;
mod fenv;
mod k_cos;
mod k_cosf;
mod k_expo2;

View file

@ -0,0 +1,118 @@
//! Support for rounding directions and status flags as specified by IEEE 754.
//!
//! Rust does not support the floating point environment so rounding mode is passed as an argument
//! and status flags are returned as part of the result. There is currently not much support for
//! this; most existing ports from musl use a form of `force_eval!` to raise exceptions, but this
//! has no side effects in Rust. Further, correct behavior relies on elementary operations making
//! use of the correct rounding and raising relevant exceptions, which is not the case for Rust.
//!
//! This module exists so no functionality is lost when porting algorithms that respect floating
//! point environment, and so that some functionality may be tested (that which does not rely on
//! side effects from elementary operations). Full support would require wrappers around basic
//! operations, but there is no plan to add this at the current time.
/// A value combined with a floating point status.
pub struct FpResult<T> {
pub val: T,
#[cfg_attr(not(feature = "unstable-public-internals"), allow(dead_code))]
pub status: Status,
}
impl<T> FpResult<T> {
pub fn new(val: T, status: Status) -> Self {
Self { val, status }
}
/// Return `val` with `Status::OK`.
pub fn ok(val: T) -> Self {
Self { val, status: Status::OK }
}
}
/// IEEE 754 rounding mode, excluding the optional `roundTiesToAway` version of nearest.
///
/// Integer representation comes from what CORE-MATH uses for indexing.
#[cfg_attr(not(feature = "unstable-public-internals"), allow(dead_code))]
#[derive(Clone, Copy, Debug, PartialEq)]
pub enum Round {
/// IEEE 754 nearest, `roundTiesToEven`.
Nearest = 0,
/// IEEE 754 `roundTowardNegative`.
Negative = 1,
/// IEEE 754 `roundTowardPositive`.
Positive = 2,
/// IEEE 754 `roundTowardZero`.
Zero = 3,
}
/// IEEE 754 exception status flags.
#[derive(Clone, Copy, Debug, PartialEq)]
pub struct Status(u8);
impl Status {
/// Default status indicating no errors.
pub const OK: Self = Self(0);
/// No definable result.
///
/// Includes:
/// - Any ops on sNaN, with a few exceptions.
/// - `0 * inf`, `inf * 0`.
/// - `fma(0, inf, c)` or `fma(inf, 0, c)`, possibly excluding `c = qNaN`.
/// - `+inf + -inf` and similar (includes subtraction and fma).
/// - `0.0 / 0.0`, `inf / inf`
/// - `remainder(x, y)` if `y == 0.0` or `x == inf`, and neither is NaN.
/// - `sqrt(x)` with `x < 0.0`.
pub const INVALID: Self = Self(1);
/// Division by zero.
///
/// The default result for division is +/-inf based on operand sign. For `logB`, the default
/// result is -inf.
/// `x / y` when `x != 0.0` and `y == 0.0`,
#[cfg_attr(not(feature = "unstable-public-internals"), allow(dead_code))]
pub const DIVIDE_BY_ZERO: Self = Self(1 << 2);
/// The result exceeds the maximum finite value.
///
/// The default result depends on rounding mode. `Nearest*` rounds to +/- infinity, sign based
/// on the intermediate result. `Zero` rounds to the signed maximum finite. `Positive` and
/// `Negative` round to signed maximum finite in one direction, signed infinity in the other.
#[cfg_attr(not(feature = "unstable-public-internals"), allow(dead_code))]
pub const OVERFLOW: Self = Self(1 << 3);
/// The result is subnormal and lost precision.
pub const UNDERFLOW: Self = Self(1 << 4);
/// The finite-precision result does not match that of infinite precision, and the reason
/// is not represented by one of the other flags.
pub const INEXACT: Self = Self(1 << 5);
/// True if `UNDERFLOW` is set.
#[cfg_attr(not(feature = "unstable-public-internals"), allow(dead_code))]
pub fn underflow(self) -> bool {
self.0 & Self::UNDERFLOW.0 != 0
}
pub fn set_underflow(&mut self, val: bool) {
self.set_flag(val, Self::UNDERFLOW);
}
/// True if `INEXACT` is set.
pub fn inexact(self) -> bool {
self.0 & Self::INEXACT.0 != 0
}
pub fn set_inexact(&mut self, val: bool) {
self.set_flag(val, Self::INEXACT);
}
fn set_flag(&mut self, val: bool, mask: Self) {
if val {
self.0 |= mask.0;
} else {
self.0 &= !mask.0;
}
}
}

View file

@ -41,6 +41,8 @@ pub trait Float:
const NEG_PI: Self;
const FRAC_PI_2: Self;
const MIN_POSITIVE_NORMAL: Self;
/// The bitwidth of the float type
const BITS: u32;
@ -200,6 +202,9 @@ macro_rules! float_impl {
const MIN: Self = $from_bits(Self::Int::MAX & !(1 << Self::SIG_BITS));
const EPSILON: Self = <$ty>::EPSILON;
// Exponent is a 1 in the LSB
const MIN_POSITIVE_NORMAL: Self = $from_bits(1 << Self::SIG_BITS);
const PI: Self = core::$ty::consts::PI;
const NEG_PI: Self = -Self::PI;
const FRAC_PI_2: Self = core::$ty::consts::FRAC_PI_2;
@ -358,6 +363,7 @@ mod tests {
// results for zero and subnormals.
assert_eq!(f16::ZERO.exp_unbiased(), -15);
assert_eq!(f16::from_bits(0x1).exp_unbiased(), -15);
assert_eq!(f16::MIN_POSITIVE, f16::MIN_POSITIVE_NORMAL);
// `from_parts`
assert_biteq!(f16::from_parts(true, f16::EXP_BIAS, 0), -1.0f16);
@ -383,6 +389,7 @@ mod tests {
// results for zero and subnormals.
assert_eq!(f32::ZERO.exp_unbiased(), -127);
assert_eq!(f32::from_bits(0x1).exp_unbiased(), -127);
assert_eq!(f32::MIN_POSITIVE, f32::MIN_POSITIVE_NORMAL);
// `from_parts`
assert_biteq!(f32::from_parts(true, f32::EXP_BIAS, 0), -1.0f32);
@ -409,6 +416,7 @@ mod tests {
// results for zero and subnormals.
assert_eq!(f64::ZERO.exp_unbiased(), -1023);
assert_eq!(f64::from_bits(0x1).exp_unbiased(), -1023);
assert_eq!(f64::MIN_POSITIVE, f64::MIN_POSITIVE_NORMAL);
// `from_parts`
assert_biteq!(f64::from_parts(true, f64::EXP_BIAS, 0), -1.0f64);
@ -436,6 +444,7 @@ mod tests {
// results for zero and subnormals.
assert_eq!(f128::ZERO.exp_unbiased(), -16383);
assert_eq!(f128::from_bits(0x1).exp_unbiased(), -16383);
assert_eq!(f128::MIN_POSITIVE, f128::MIN_POSITIVE_NORMAL);
// `from_parts`
assert_biteq!(f128::from_parts(true, f128::EXP_BIAS, 0), -1.0f128);

View file

@ -1,12 +1,14 @@
#[macro_use]
pub mod macros;
mod big;
mod env;
mod float_traits;
pub mod hex_float;
mod int_traits;
#[allow(unused_imports)]
pub use big::{i256, u256};
pub use env::{FpResult, Round, Status};
#[allow(unused_imports)]
pub use float_traits::{DFloat, Float, HFloat, IntTy};
pub(crate) use float_traits::{f32_from_bits, f64_from_bits};
@ -25,8 +27,3 @@ pub fn cold_path() {
#[cfg(intrinsics_enabled)]
core::intrinsics::cold_path();
}
/// Return `x`, first raising `FE_INVALID`.
pub fn raise_invalid<F: Float>(x: F) -> F {
(x - x) / (x - x)
}