Add a generic version of scalbn

This replaces the `f32` and `f64` versions of `scalbn` and `ldexp`.
This commit is contained in:
Trevor Gross 2025-01-23 22:02:22 +00:00 committed by Trevor Gross
parent b8da1919f9
commit 3aa2d1cfc2
6 changed files with 133 additions and 59 deletions

View file

@ -158,7 +158,8 @@ libm_macros::for_each_function! {
ilogbf,
jn,
jnf,
ldexp,ldexpf,
ldexp,
ldexpf,
lgamma_r,
lgammaf_r,
modf,

View file

@ -698,12 +698,14 @@
"scalbn": {
"sources": [
"src/libm_helper.rs",
"src/math/generic/scalbn.rs",
"src/math/scalbn.rs"
],
"type": "f64"
},
"scalbnf": {
"sources": [
"src/math/generic/scalbn.rs",
"src/math/scalbnf.rs"
],
"type": "f32"

View file

@ -4,6 +4,7 @@ mod fabs;
mod fdim;
mod floor;
mod rint;
mod scalbn;
mod sqrt;
mod trunc;
@ -13,5 +14,6 @@ pub use fabs::fabs;
pub use fdim::fdim;
pub use floor::floor;
pub use rint::rint;
pub use scalbn::scalbn;
pub use sqrt::sqrt;
pub use trunc::trunc;

View file

@ -0,0 +1,123 @@
use super::super::{CastFrom, CastInto, Float, IntTy, MinInt};
/// Scale the exponent.
///
/// From N3220:
///
/// > The scalbn and scalbln functions compute `x * b^n`, where `b = FLT_RADIX` if the return type
/// > of the function is a standard floating type, or `b = 10` if the return type of the function
/// > is a decimal floating type. A range error occurs for some finite x, depending on n.
/// >
/// > [...]
/// >
/// > * `scalbn(±0, n)` returns `±0`.
/// > * `scalbn(x, 0)` returns `x`.
/// > * `scalbn(±∞, n)` returns `±∞`.
/// >
/// > If the calculation does not overflow or underflow, the returned value is exact and
/// > independent of the current rounding direction mode.
pub fn scalbn<F: Float>(mut x: F, mut n: i32) -> F
where
u32: CastInto<F::Int>,
F::Int: CastFrom<i32>,
F::Int: CastFrom<u32>,
{
let zero = IntTy::<F>::ZERO;
// Bits including the implicit bit
let sig_total_bits = F::SIG_BITS + 1;
// Maximum and minimum values when biased
let exp_max: i32 = F::EXP_BIAS as i32;
let exp_min = -(exp_max - 1);
// 2 ^ Emax, where Emax is the maximum biased exponent value (1023 for f64)
let f_exp_max = F::from_parts(false, F::EXP_BIAS << 1, zero);
// 2 ^ Emin, where Emin is the minimum biased exponent value (-1022 for f64)
let f_exp_min = F::from_parts(false, 1, zero);
// 2 ^ sig_total_bits, representation of what can be accounted for with subnormals
let f_exp_subnorm = F::from_parts(false, sig_total_bits + F::EXP_BIAS, zero);
if n > exp_max {
x *= f_exp_max;
n -= exp_max;
if n > exp_max {
x *= f_exp_max;
n -= exp_max;
if n > exp_max {
n = exp_max;
}
}
} else if n < exp_min {
let mul = f_exp_min * f_exp_subnorm;
let add = (exp_max - 1) - sig_total_bits as i32;
x *= mul;
n += add;
if n < exp_min {
x *= mul;
n += add;
if n < exp_min {
n = exp_min;
}
}
}
x * F::from_parts(false, (F::EXP_BIAS as i32 + n) as u32, zero)
}
#[cfg(test)]
mod tests {
use super::super::super::Int;
use super::*;
// Tests against N3220
fn spec_test<F: Float>()
where
u32: CastInto<F::Int>,
F::Int: CastFrom<i32>,
F::Int: CastFrom<u32>,
{
// `scalbn(±0, n)` returns `±0`.
assert_biteq!(scalbn(F::NEG_ZERO, 10), F::NEG_ZERO);
assert_biteq!(scalbn(F::NEG_ZERO, 0), F::NEG_ZERO);
assert_biteq!(scalbn(F::NEG_ZERO, -10), F::NEG_ZERO);
assert_biteq!(scalbn(F::ZERO, 10), F::ZERO);
assert_biteq!(scalbn(F::ZERO, 0), F::ZERO);
assert_biteq!(scalbn(F::ZERO, -10), F::ZERO);
// `scalbn(x, 0)` returns `x`.
assert_biteq!(scalbn(F::MIN, 0), F::MIN);
assert_biteq!(scalbn(F::MAX, 0), F::MAX);
assert_biteq!(scalbn(F::INFINITY, 0), F::INFINITY);
assert_biteq!(scalbn(F::NEG_INFINITY, 0), F::NEG_INFINITY);
assert_biteq!(scalbn(F::ZERO, 0), F::ZERO);
assert_biteq!(scalbn(F::NEG_ZERO, 0), F::NEG_ZERO);
// `scalbn(±∞, n)` returns `±∞`.
assert_biteq!(scalbn(F::INFINITY, 10), F::INFINITY);
assert_biteq!(scalbn(F::INFINITY, -10), F::INFINITY);
assert_biteq!(scalbn(F::NEG_INFINITY, 10), F::NEG_INFINITY);
assert_biteq!(scalbn(F::NEG_INFINITY, -10), F::NEG_INFINITY);
// NaN should remain NaNs.
assert!(scalbn(F::NAN, 10).is_nan());
assert!(scalbn(F::NAN, 0).is_nan());
assert!(scalbn(F::NAN, -10).is_nan());
assert!(scalbn(-F::NAN, 10).is_nan());
assert!(scalbn(-F::NAN, 0).is_nan());
assert!(scalbn(-F::NAN, -10).is_nan());
}
#[test]
fn spec_test_f32() {
spec_test::<f32>();
}
#[test]
fn spec_test_f64() {
spec_test::<f64>();
}
}

View file

@ -1,33 +1,4 @@
#[cfg_attr(all(test, assert_no_panic), no_panic::no_panic)]
pub fn scalbn(x: f64, mut n: i32) -> f64 {
let x1p1023 = f64::from_bits(0x7fe0000000000000); // 0x1p1023 === 2 ^ 1023
let x1p53 = f64::from_bits(0x4340000000000000); // 0x1p53 === 2 ^ 53
let x1p_1022 = f64::from_bits(0x0010000000000000); // 0x1p-1022 === 2 ^ (-1022)
let mut y = x;
if n > 1023 {
y *= x1p1023;
n -= 1023;
if n > 1023 {
y *= x1p1023;
n -= 1023;
if n > 1023 {
n = 1023;
}
}
} else if n < -1022 {
/* make sure final n < -53 to avoid double
rounding in the subnormal range */
y *= x1p_1022 * x1p53;
n += 1022 - 53;
if n < -1022 {
y *= x1p_1022 * x1p53;
n += 1022 - 53;
if n < -1022 {
n = -1022;
}
}
}
y * f64::from_bits(((0x3ff + n) as u64) << 52)
pub fn scalbn(x: f64, n: i32) -> f64 {
super::generic::scalbn(x, n)
}

View file

@ -1,29 +1,4 @@
#[cfg_attr(all(test, assert_no_panic), no_panic::no_panic)]
pub fn scalbnf(mut x: f32, mut n: i32) -> f32 {
let x1p127 = f32::from_bits(0x7f000000); // 0x1p127f === 2 ^ 127
let x1p_126 = f32::from_bits(0x800000); // 0x1p-126f === 2 ^ -126
let x1p24 = f32::from_bits(0x4b800000); // 0x1p24f === 2 ^ 24
if n > 127 {
x *= x1p127;
n -= 127;
if n > 127 {
x *= x1p127;
n -= 127;
if n > 127 {
n = 127;
}
}
} else if n < -126 {
x *= x1p_126 * x1p24;
n += 126 - 24;
if n < -126 {
x *= x1p_126 * x1p24;
n += 126 - 24;
if n < -126 {
n = -126;
}
}
}
x * f32::from_bits(((0x7f + n) as u32) << 23)
pub fn scalbnf(x: f32, n: i32) -> f32 {
super::generic::scalbn(x, n)
}