Shorten prefixes for float constants
Change `SIGNIFICAND_*` to `SIG_*` and `EXPONENT_*` to `EXP_*`. This makes things more consistent with `libm`, and terseness is convenient here since there isn't anything to confuse.
This commit is contained in:
parent
a078f5a0c5
commit
b47d3cc2f8
10 changed files with 111 additions and 116 deletions
|
|
@ -13,14 +13,14 @@ where
|
|||
let zero = F::Int::ZERO;
|
||||
|
||||
let bits = F::BITS.cast();
|
||||
let significand_bits = F::SIGNIFICAND_BITS;
|
||||
let max_exponent = F::EXPONENT_MAX;
|
||||
let significand_bits = F::SIG_BITS;
|
||||
let max_exponent = F::EXP_MAX;
|
||||
|
||||
let implicit_bit = F::IMPLICIT_BIT;
|
||||
let significand_mask = F::SIGNIFICAND_MASK;
|
||||
let significand_mask = F::SIG_MASK;
|
||||
let sign_bit = F::SIGN_MASK as F::Int;
|
||||
let abs_mask = sign_bit - one;
|
||||
let exponent_mask = F::EXPONENT_MASK;
|
||||
let exponent_mask = F::EXP_MASK;
|
||||
let inf_rep = exponent_mask;
|
||||
let quiet_bit = implicit_bit >> 1;
|
||||
let qnan_rep = exponent_mask | quiet_bit;
|
||||
|
|
|
|||
|
|
@ -38,7 +38,7 @@ fn cmp<F: Float>(a: F, b: F) -> Result {
|
|||
|
||||
let sign_bit = F::SIGN_MASK as F::Int;
|
||||
let abs_mask = sign_bit - one;
|
||||
let exponent_mask = F::EXPONENT_MASK;
|
||||
let exponent_mask = F::EXP_MASK;
|
||||
let inf_rep = exponent_mask;
|
||||
|
||||
let a_rep = a.to_bits();
|
||||
|
|
@ -87,7 +87,7 @@ fn unord<F: Float>(a: F, b: F) -> bool {
|
|||
|
||||
let sign_bit = F::SIGN_MASK as F::Int;
|
||||
let abs_mask = sign_bit - one;
|
||||
let exponent_mask = F::EXPONENT_MASK;
|
||||
let exponent_mask = F::EXP_MASK;
|
||||
let inf_rep = exponent_mask;
|
||||
|
||||
let a_rep = a.to_bits();
|
||||
|
|
|
|||
|
|
@ -32,7 +32,7 @@ mod int_to_float {
|
|||
/// Usually 1 is subtracted from this function's result, so that a mantissa with the implicit
|
||||
/// bit set can be added back later.
|
||||
fn exp<I: Int, F: Float<Int: CastFrom<u32>>>(n: u32) -> F::Int {
|
||||
F::Int::cast_from(F::EXPONENT_BIAS - 1 + I::BITS - n)
|
||||
F::Int::cast_from(F::EXP_BIAS - 1 + I::BITS - n)
|
||||
}
|
||||
|
||||
/// Adjust a mantissa with dropped bits to perform correct rounding.
|
||||
|
|
@ -54,17 +54,17 @@ mod int_to_float {
|
|||
/// value to cancel it out.
|
||||
fn repr<F: Float>(e: F::Int, m: F::Int) -> F::Int {
|
||||
// + rather than | so the mantissa can overflow into the exponent
|
||||
(e << F::SIGNIFICAND_BITS) + m
|
||||
(e << F::SIG_BITS) + m
|
||||
}
|
||||
|
||||
/// Shift distance from a left-aligned integer to a smaller float.
|
||||
fn shift_f_lt_i<I: Int, F: Float>() -> u32 {
|
||||
(I::BITS - F::BITS) + F::EXPONENT_BITS
|
||||
(I::BITS - F::BITS) + F::EXP_BITS
|
||||
}
|
||||
|
||||
/// Shift distance from an integer with `n` leading zeros to a smaller float.
|
||||
fn shift_f_gt_i<I: Int, F: Float>(n: u32) -> u32 {
|
||||
F::SIGNIFICAND_BITS - I::BITS + 1 + n
|
||||
F::SIG_BITS - I::BITS + 1 + n
|
||||
}
|
||||
|
||||
/// Perform a signed operation as unsigned, then add the sign back.
|
||||
|
|
@ -85,9 +85,9 @@ mod int_to_float {
|
|||
}
|
||||
let n = i.leading_zeros();
|
||||
// Mantissa with implicit bit set (significant bits)
|
||||
let m_base = (i << n) >> f32::EXPONENT_BITS;
|
||||
let m_base = (i << n) >> f32::EXP_BITS;
|
||||
// Bits that will be dropped (insignificant bits)
|
||||
let adj = (i << n) << (f32::SIGNIFICAND_BITS + 1);
|
||||
let adj = (i << n) << (f32::SIG_BITS + 1);
|
||||
let m = m_adj::<f32>(m_base, adj);
|
||||
let e = exp::<u32, f32>(n) - 1;
|
||||
repr::<f32>(e, m)
|
||||
|
|
@ -116,7 +116,7 @@ mod int_to_float {
|
|||
let m = (i as u64) << (shift_f_gt_i::<u32, f128>(n) - 64);
|
||||
let e = exp::<u32, f128>(n) as u64 - 1;
|
||||
// High 64 bits of f128 representation.
|
||||
let h = (e << (f128::SIGNIFICAND_BITS - 64)) + m;
|
||||
let h = (e << (f128::SIG_BITS - 64)) + m;
|
||||
|
||||
// Shift back to the high bits, the rest of the mantissa will always be 0.
|
||||
(h as u128) << 64
|
||||
|
|
@ -128,8 +128,8 @@ mod int_to_float {
|
|||
// Mantissa with implicit bit set
|
||||
let m_base: u32 = (i_m >> shift_f_lt_i::<u64, f32>()) as u32;
|
||||
// The entire lower half of `i` will be truncated (masked portion), plus the
|
||||
// next `EXPONENT_BITS` bits.
|
||||
let adj = (i_m >> f32::EXPONENT_BITS | i_m & 0xFFFF) as u32;
|
||||
// next `EXP_BITS` bits.
|
||||
let adj = (i_m >> f32::EXP_BITS | i_m & 0xFFFF) as u32;
|
||||
let m = m_adj::<f32>(m_base, adj);
|
||||
let e = if i == 0 { 0 } else { exp::<u64, f32>(n) - 1 };
|
||||
repr::<f32>(e, m)
|
||||
|
|
@ -141,8 +141,8 @@ mod int_to_float {
|
|||
}
|
||||
let n = i.leading_zeros();
|
||||
// Mantissa with implicit bit set
|
||||
let m_base = (i << n) >> f64::EXPONENT_BITS;
|
||||
let adj = (i << n) << (f64::SIGNIFICAND_BITS + 1);
|
||||
let m_base = (i << n) >> f64::EXP_BITS;
|
||||
let adj = (i << n) << (f64::SIG_BITS + 1);
|
||||
let m = m_adj::<f64>(m_base, adj);
|
||||
let e = exp::<u64, f64>(n) - 1;
|
||||
repr::<f64>(e, m)
|
||||
|
|
@ -167,7 +167,7 @@ mod int_to_float {
|
|||
|
||||
// Within the upper `F::BITS`, everything except for the signifcand
|
||||
// gets truncated
|
||||
let d1: u32 = (i_m >> (u128::BITS - f32::BITS - f32::SIGNIFICAND_BITS - 1)).cast();
|
||||
let d1: u32 = (i_m >> (u128::BITS - f32::BITS - f32::SIG_BITS - 1)).cast();
|
||||
|
||||
// The entire rest of `i_m` gets truncated. Zero the upper `F::BITS` then just
|
||||
// check if it is nonzero.
|
||||
|
|
@ -186,8 +186,8 @@ mod int_to_float {
|
|||
// Mantissa with implicit bit set
|
||||
let m_base: u64 = (i_m >> shift_f_lt_i::<u128, f64>()) as u64;
|
||||
// The entire lower half of `i` will be truncated (masked portion), plus the
|
||||
// next `EXPONENT_BITS` bits.
|
||||
let adj = (i_m >> f64::EXPONENT_BITS | i_m & 0xFFFF_FFFF) as u64;
|
||||
// next `EXP_BITS` bits.
|
||||
let adj = (i_m >> f64::EXP_BITS | i_m & 0xFFFF_FFFF) as u64;
|
||||
let m = m_adj::<f64>(m_base, adj);
|
||||
let e = if i == 0 { 0 } else { exp::<u128, f64>(n) - 1 };
|
||||
repr::<f64>(e, m)
|
||||
|
|
@ -200,8 +200,8 @@ mod int_to_float {
|
|||
}
|
||||
let n = i.leading_zeros();
|
||||
// Mantissa with implicit bit set
|
||||
let m_base = (i << n) >> f128::EXPONENT_BITS;
|
||||
let adj = (i << n) << (f128::SIGNIFICAND_BITS + 1);
|
||||
let m_base = (i << n) >> f128::EXP_BITS;
|
||||
let adj = (i << n) << (f128::SIG_BITS + 1);
|
||||
let m = m_adj::<f128>(m_base, adj);
|
||||
let e = exp::<u128, f128>(n) - 1;
|
||||
repr::<f128>(e, m)
|
||||
|
|
@ -362,29 +362,29 @@ where
|
|||
F::Int: CastFrom<u32>,
|
||||
u32: CastFrom<F::Int>,
|
||||
{
|
||||
let int_max_exp = F::EXPONENT_BIAS + I::MAX.ilog2() + 1;
|
||||
let foobar = F::EXPONENT_BIAS + I::UnsignedInt::BITS - 1;
|
||||
let int_max_exp = F::EXP_BIAS + I::MAX.ilog2() + 1;
|
||||
let foobar = F::EXP_BIAS + I::UnsignedInt::BITS - 1;
|
||||
|
||||
if fbits < F::ONE.to_bits() {
|
||||
// < 0 gets rounded to 0
|
||||
I::ZERO
|
||||
} else if fbits < F::Int::cast_from(int_max_exp) << F::SIGNIFICAND_BITS {
|
||||
} else if fbits < F::Int::cast_from(int_max_exp) << F::SIG_BITS {
|
||||
// >= 1, < integer max
|
||||
let m_base = if I::UnsignedInt::BITS >= F::Int::BITS {
|
||||
I::UnsignedInt::cast_from(fbits) << (I::BITS - F::SIGNIFICAND_BITS - 1)
|
||||
I::UnsignedInt::cast_from(fbits) << (I::BITS - F::SIG_BITS - 1)
|
||||
} else {
|
||||
I::UnsignedInt::cast_from(fbits >> (F::SIGNIFICAND_BITS - I::BITS + 1))
|
||||
I::UnsignedInt::cast_from(fbits >> (F::SIG_BITS - I::BITS + 1))
|
||||
};
|
||||
|
||||
// Set the implicit 1-bit.
|
||||
let m: I::UnsignedInt = I::UnsignedInt::ONE << (I::BITS - 1) | m_base;
|
||||
|
||||
// Shift based on the exponent and bias.
|
||||
let s: u32 = (foobar) - u32::cast_from(fbits >> F::SIGNIFICAND_BITS);
|
||||
let s: u32 = (foobar) - u32::cast_from(fbits >> F::SIG_BITS);
|
||||
|
||||
let unsigned = m >> s;
|
||||
map_inbounds(I::from_unsigned(unsigned))
|
||||
} else if fbits <= F::EXPONENT_MASK {
|
||||
} else if fbits <= F::EXP_MASK {
|
||||
// >= max (incl. inf)
|
||||
out_of_bounds()
|
||||
} else {
|
||||
|
|
|
|||
|
|
@ -105,16 +105,16 @@ where
|
|||
let hw = F::BITS / 2;
|
||||
let lo_mask = F::Int::MAX >> hw;
|
||||
|
||||
let significand_bits = F::SIGNIFICAND_BITS;
|
||||
let significand_bits = F::SIG_BITS;
|
||||
// Saturated exponent, representing infinity
|
||||
let exponent_sat: F::Int = F::EXPONENT_MAX.cast();
|
||||
let exponent_sat: F::Int = F::EXP_MAX.cast();
|
||||
|
||||
let exponent_bias = F::EXPONENT_BIAS;
|
||||
let exponent_bias = F::EXP_BIAS;
|
||||
let implicit_bit = F::IMPLICIT_BIT;
|
||||
let significand_mask = F::SIGNIFICAND_MASK;
|
||||
let significand_mask = F::SIG_MASK;
|
||||
let sign_bit = F::SIGN_MASK;
|
||||
let abs_mask = sign_bit - one;
|
||||
let exponent_mask = F::EXPONENT_MASK;
|
||||
let exponent_mask = F::EXP_MASK;
|
||||
let inf_rep = exponent_mask;
|
||||
let quiet_bit = implicit_bit >> 1;
|
||||
let qnan_rep = exponent_mask | quiet_bit;
|
||||
|
|
|
|||
|
|
@ -15,19 +15,19 @@ where
|
|||
let src_zero = F::Int::ZERO;
|
||||
let src_one = F::Int::ONE;
|
||||
let src_bits = F::BITS;
|
||||
let src_sign_bits = F::SIGNIFICAND_BITS;
|
||||
let src_exp_bias = F::EXPONENT_BIAS;
|
||||
let src_sign_bits = F::SIG_BITS;
|
||||
let src_exp_bias = F::EXP_BIAS;
|
||||
let src_min_normal = F::IMPLICIT_BIT;
|
||||
let src_infinity = F::EXPONENT_MASK;
|
||||
let src_infinity = F::EXP_MASK;
|
||||
let src_sign_mask = F::SIGN_MASK as F::Int;
|
||||
let src_abs_mask = src_sign_mask - src_one;
|
||||
let src_qnan = F::SIGNIFICAND_MASK;
|
||||
let src_qnan = F::SIG_MASK;
|
||||
let src_nan_code = src_qnan - src_one;
|
||||
|
||||
let dst_bits = R::BITS;
|
||||
let dst_sign_bits = R::SIGNIFICAND_BITS;
|
||||
let dst_inf_exp = R::EXPONENT_MAX;
|
||||
let dst_exp_bias = R::EXPONENT_BIAS;
|
||||
let dst_sign_bits = R::SIG_BITS;
|
||||
let dst_inf_exp = R::EXP_MAX;
|
||||
let dst_exp_bias = R::EXP_BIAS;
|
||||
let dst_min_normal = R::IMPLICIT_BIT;
|
||||
|
||||
let sign_bits_delta = dst_sign_bits - src_sign_bits;
|
||||
|
|
|
|||
|
|
@ -42,32 +42,32 @@ pub(crate) trait Float:
|
|||
const ZERO: Self;
|
||||
const ONE: Self;
|
||||
|
||||
/// The bitwidth of the float type
|
||||
/// The bitwidth of the float type.
|
||||
const BITS: u32;
|
||||
|
||||
/// The bitwidth of the significand
|
||||
const SIGNIFICAND_BITS: u32;
|
||||
/// The bitwidth of the significand.
|
||||
const SIG_BITS: u32;
|
||||
|
||||
/// The bitwidth of the exponent
|
||||
const EXPONENT_BITS: u32 = Self::BITS - Self::SIGNIFICAND_BITS - 1;
|
||||
/// The bitwidth of the exponent.
|
||||
const EXP_BITS: u32 = Self::BITS - Self::SIG_BITS - 1;
|
||||
|
||||
/// The saturated value of the exponent (infinite representation), in the rightmost postiion.
|
||||
const EXPONENT_MAX: u32 = (1 << Self::EXPONENT_BITS) - 1;
|
||||
const EXP_MAX: u32 = (1 << Self::EXP_BITS) - 1;
|
||||
|
||||
/// The exponent bias value
|
||||
const EXPONENT_BIAS: u32 = Self::EXPONENT_MAX >> 1;
|
||||
/// The exponent bias value.
|
||||
const EXP_BIAS: u32 = Self::EXP_MAX >> 1;
|
||||
|
||||
/// A mask for the sign bit
|
||||
/// A mask for the sign bit.
|
||||
const SIGN_MASK: Self::Int;
|
||||
|
||||
/// A mask for the significand
|
||||
const SIGNIFICAND_MASK: Self::Int;
|
||||
/// A mask for the significand.
|
||||
const SIG_MASK: Self::Int;
|
||||
|
||||
/// The implicit bit of the float format
|
||||
/// The implicit bit of the float format.
|
||||
const IMPLICIT_BIT: Self::Int;
|
||||
|
||||
/// A mask for the exponent
|
||||
const EXPONENT_MASK: Self::Int;
|
||||
/// A mask for the exponent.
|
||||
const EXP_MASK: Self::Int;
|
||||
|
||||
/// Returns `self` transmuted to `Self::Int`
|
||||
fn to_bits(self) -> Self::Int;
|
||||
|
|
@ -122,12 +122,12 @@ macro_rules! float_impl {
|
|||
const ONE: Self = 1.0;
|
||||
|
||||
const BITS: u32 = $bits;
|
||||
const SIGNIFICAND_BITS: u32 = $significand_bits;
|
||||
const SIG_BITS: u32 = $significand_bits;
|
||||
|
||||
const SIGN_MASK: Self::Int = 1 << (Self::BITS - 1);
|
||||
const SIGNIFICAND_MASK: Self::Int = (1 << Self::SIGNIFICAND_BITS) - 1;
|
||||
const IMPLICIT_BIT: Self::Int = 1 << Self::SIGNIFICAND_BITS;
|
||||
const EXPONENT_MASK: Self::Int = !(Self::SIGN_MASK | Self::SIGNIFICAND_MASK);
|
||||
const SIG_MASK: Self::Int = (1 << Self::SIG_BITS) - 1;
|
||||
const IMPLICIT_BIT: Self::Int = 1 << Self::SIG_BITS;
|
||||
const EXP_MASK: Self::Int = !(Self::SIGN_MASK | Self::SIG_MASK);
|
||||
|
||||
fn to_bits(self) -> Self::Int {
|
||||
self.to_bits()
|
||||
|
|
@ -142,8 +142,7 @@ macro_rules! float_impl {
|
|||
// necessary builtin (__unordtf2) to test whether `f128` is NaN.
|
||||
// FIXME(f16_f128): Remove once the nightly toolchain has the __unordtf2 builtin
|
||||
// x is NaN if all the bits of the exponent are set and the significand is non-0
|
||||
x.to_bits() & $ty::EXPONENT_MASK == $ty::EXPONENT_MASK
|
||||
&& x.to_bits() & $ty::SIGNIFICAND_MASK != 0
|
||||
x.to_bits() & $ty::EXP_MASK == $ty::EXP_MASK && x.to_bits() & $ty::SIG_MASK != 0
|
||||
}
|
||||
#[cfg(not(feature = "mangled-names"))]
|
||||
fn is_nan(x: $ty) -> bool {
|
||||
|
|
@ -159,10 +158,10 @@ macro_rules! float_impl {
|
|||
self.is_sign_negative()
|
||||
}
|
||||
fn exp(self) -> Self::ExpInt {
|
||||
((self.to_bits() & Self::EXPONENT_MASK) >> Self::SIGNIFICAND_BITS) as Self::ExpInt
|
||||
((self.to_bits() & Self::EXP_MASK) >> Self::SIG_BITS) as Self::ExpInt
|
||||
}
|
||||
fn frac(self) -> Self::Int {
|
||||
self.to_bits() & Self::SIGNIFICAND_MASK
|
||||
self.to_bits() & Self::SIG_MASK
|
||||
}
|
||||
fn imp_frac(self) -> Self::Int {
|
||||
self.frac() | Self::IMPLICIT_BIT
|
||||
|
|
@ -173,21 +172,19 @@ macro_rules! float_impl {
|
|||
fn from_parts(negative: bool, exponent: Self::Int, significand: Self::Int) -> Self {
|
||||
Self::from_bits(
|
||||
((negative as Self::Int) << (Self::BITS - 1))
|
||||
| ((exponent << Self::SIGNIFICAND_BITS) & Self::EXPONENT_MASK)
|
||||
| (significand & Self::SIGNIFICAND_MASK),
|
||||
| ((exponent << Self::SIG_BITS) & Self::EXP_MASK)
|
||||
| (significand & Self::SIG_MASK),
|
||||
)
|
||||
}
|
||||
fn normalize(significand: Self::Int) -> (i32, Self::Int) {
|
||||
let shift = significand
|
||||
.leading_zeros()
|
||||
.wrapping_sub(Self::EXPONENT_BITS);
|
||||
let shift = significand.leading_zeros().wrapping_sub(Self::EXP_BITS);
|
||||
(
|
||||
1i32.wrapping_sub(shift as i32),
|
||||
significand << shift as Self::Int,
|
||||
)
|
||||
}
|
||||
fn is_subnormal(self) -> bool {
|
||||
(self.to_bits() & Self::EXPONENT_MASK) == Self::Int::ZERO
|
||||
(self.to_bits() & Self::EXP_MASK) == Self::Int::ZERO
|
||||
}
|
||||
}
|
||||
};
|
||||
|
|
|
|||
|
|
@ -13,20 +13,20 @@ where
|
|||
let zero = F::Int::ZERO;
|
||||
|
||||
let bits = F::BITS;
|
||||
let significand_bits = F::SIGNIFICAND_BITS;
|
||||
let max_exponent = F::EXPONENT_MAX;
|
||||
let significand_bits = F::SIG_BITS;
|
||||
let max_exponent = F::EXP_MAX;
|
||||
|
||||
let exponent_bias = F::EXPONENT_BIAS;
|
||||
let exponent_bias = F::EXP_BIAS;
|
||||
|
||||
let implicit_bit = F::IMPLICIT_BIT;
|
||||
let significand_mask = F::SIGNIFICAND_MASK;
|
||||
let sign_bit = F::SIGN_MASK as F::Int;
|
||||
let significand_mask = F::SIG_MASK;
|
||||
let sign_bit = F::SIGN_MASK;
|
||||
let abs_mask = sign_bit - one;
|
||||
let exponent_mask = F::EXPONENT_MASK;
|
||||
let exponent_mask = F::EXP_MASK;
|
||||
let inf_rep = exponent_mask;
|
||||
let quiet_bit = implicit_bit >> 1;
|
||||
let qnan_rep = exponent_mask | quiet_bit;
|
||||
let exponent_bits = F::EXPONENT_BITS;
|
||||
let exponent_bits = F::EXP_BITS;
|
||||
|
||||
let a_rep = a.to_bits();
|
||||
let b_rep = b.to_bits();
|
||||
|
|
|
|||
|
|
@ -14,33 +14,33 @@ where
|
|||
let src_zero = F::Int::ZERO;
|
||||
let src_one = F::Int::ONE;
|
||||
let src_bits = F::BITS;
|
||||
let src_exp_bias = F::EXPONENT_BIAS;
|
||||
let src_exp_bias = F::EXP_BIAS;
|
||||
|
||||
let src_min_normal = F::IMPLICIT_BIT;
|
||||
let src_significand_mask = F::SIGNIFICAND_MASK;
|
||||
let src_infinity = F::EXPONENT_MASK;
|
||||
let src_significand_mask = F::SIG_MASK;
|
||||
let src_infinity = F::EXP_MASK;
|
||||
let src_sign_mask = F::SIGN_MASK;
|
||||
let src_abs_mask = src_sign_mask - src_one;
|
||||
let round_mask = (src_one << (F::SIGNIFICAND_BITS - R::SIGNIFICAND_BITS)) - src_one;
|
||||
let halfway = src_one << (F::SIGNIFICAND_BITS - R::SIGNIFICAND_BITS - 1);
|
||||
let src_qnan = src_one << (F::SIGNIFICAND_BITS - 1);
|
||||
let round_mask = (src_one << (F::SIG_BITS - R::SIG_BITS)) - src_one;
|
||||
let halfway = src_one << (F::SIG_BITS - R::SIG_BITS - 1);
|
||||
let src_qnan = src_one << (F::SIG_BITS - 1);
|
||||
let src_nan_code = src_qnan - src_one;
|
||||
|
||||
let dst_zero = R::Int::ZERO;
|
||||
let dst_one = R::Int::ONE;
|
||||
let dst_bits = R::BITS;
|
||||
let dst_inf_exp = R::EXPONENT_MAX;
|
||||
let dst_exp_bias = R::EXPONENT_BIAS;
|
||||
let dst_inf_exp = R::EXP_MAX;
|
||||
let dst_exp_bias = R::EXP_BIAS;
|
||||
|
||||
let underflow_exponent: F::Int = (src_exp_bias + 1 - dst_exp_bias).cast();
|
||||
let overflow_exponent: F::Int = (src_exp_bias + dst_inf_exp - dst_exp_bias).cast();
|
||||
let underflow: F::Int = underflow_exponent << F::SIGNIFICAND_BITS;
|
||||
let overflow: F::Int = overflow_exponent << F::SIGNIFICAND_BITS;
|
||||
let underflow: F::Int = underflow_exponent << F::SIG_BITS;
|
||||
let overflow: F::Int = overflow_exponent << F::SIG_BITS;
|
||||
|
||||
let dst_qnan = R::Int::ONE << (R::SIGNIFICAND_BITS - 1);
|
||||
let dst_qnan = R::Int::ONE << (R::SIG_BITS - 1);
|
||||
let dst_nan_code = dst_qnan - dst_one;
|
||||
|
||||
let sign_bits_delta = F::SIGNIFICAND_BITS - R::SIGNIFICAND_BITS;
|
||||
let sign_bits_delta = F::SIG_BITS - R::SIG_BITS;
|
||||
// Break a into a sign and representation of the absolute value.
|
||||
let a_abs = a.to_bits() & src_abs_mask;
|
||||
let sign = a.to_bits() & src_sign_mask;
|
||||
|
|
@ -53,7 +53,7 @@ where
|
|||
abs_result = (a_abs >> sign_bits_delta).cast();
|
||||
// Cast before shifting to prevent overflow.
|
||||
let bias_diff: R::Int = src_exp_bias.wrapping_sub(dst_exp_bias).cast();
|
||||
let tmp = bias_diff << R::SIGNIFICAND_BITS;
|
||||
let tmp = bias_diff << R::SIG_BITS;
|
||||
abs_result = abs_result.wrapping_sub(tmp);
|
||||
|
||||
let round_bits = a_abs & round_mask;
|
||||
|
|
@ -70,26 +70,25 @@ where
|
|||
// bit and inserting the (truncated) trailing NaN field.
|
||||
// Cast before shifting to prevent overflow.
|
||||
let dst_inf_exp: R::Int = dst_inf_exp.cast();
|
||||
abs_result = dst_inf_exp << R::SIGNIFICAND_BITS;
|
||||
abs_result = dst_inf_exp << R::SIG_BITS;
|
||||
abs_result |= dst_qnan;
|
||||
abs_result |= dst_nan_code
|
||||
& ((a_abs & src_nan_code) >> (F::SIGNIFICAND_BITS - R::SIGNIFICAND_BITS)).cast();
|
||||
abs_result |= dst_nan_code & ((a_abs & src_nan_code) >> (F::SIG_BITS - R::SIG_BITS)).cast();
|
||||
} else if a_abs >= overflow {
|
||||
// a overflows to infinity.
|
||||
// Cast before shifting to prevent overflow.
|
||||
let dst_inf_exp: R::Int = dst_inf_exp.cast();
|
||||
abs_result = dst_inf_exp << R::SIGNIFICAND_BITS;
|
||||
abs_result = dst_inf_exp << R::SIG_BITS;
|
||||
} else {
|
||||
// a underflows on conversion to the destination type or is an exact
|
||||
// zero. The result may be a denormal or zero. Extract the exponent
|
||||
// to get the shift amount for the denormalization.
|
||||
let a_exp: u32 = (a_abs >> F::SIGNIFICAND_BITS).cast();
|
||||
let a_exp: u32 = (a_abs >> F::SIG_BITS).cast();
|
||||
let shift = src_exp_bias - dst_exp_bias - a_exp + 1;
|
||||
|
||||
let significand = (a.to_bits() & src_significand_mask) | src_min_normal;
|
||||
|
||||
// Right shift by the denormalization amount with sticky.
|
||||
if shift > F::SIGNIFICAND_BITS {
|
||||
if shift > F::SIG_BITS {
|
||||
abs_result = dst_zero;
|
||||
} else {
|
||||
let sticky = if (significand << (src_bits - shift)) != src_zero {
|
||||
|
|
@ -98,8 +97,7 @@ where
|
|||
src_zero
|
||||
};
|
||||
let denormalized_significand: F::Int = significand >> shift | sticky;
|
||||
abs_result =
|
||||
(denormalized_significand >> (F::SIGNIFICAND_BITS - R::SIGNIFICAND_BITS)).cast();
|
||||
abs_result = (denormalized_significand >> (F::SIG_BITS - R::SIG_BITS)).cast();
|
||||
let round_bits = denormalized_significand & round_mask;
|
||||
// Round to nearest
|
||||
if round_bits > halfway {
|
||||
|
|
|
|||
|
|
@ -178,18 +178,18 @@ fn fuzz_float_step<F: Float>(rng: &mut Xoshiro128StarStar, f: &mut F) {
|
|||
let sign = (rng32 & 1) != 0;
|
||||
|
||||
// exponent fuzzing. Only 4 bits for the selector needed.
|
||||
let ones = (F::Int::ONE << F::EXPONENT_BITS) - F::Int::ONE;
|
||||
let r0 = (rng32 >> 1) % F::EXPONENT_BITS;
|
||||
let r1 = (rng32 >> 5) % F::EXPONENT_BITS;
|
||||
let ones = (F::Int::ONE << F::EXP_BITS) - F::Int::ONE;
|
||||
let r0 = (rng32 >> 1) % F::EXP_BITS;
|
||||
let r1 = (rng32 >> 5) % F::EXP_BITS;
|
||||
// custom rotate shift. Note that `F::Int` is unsigned, so we can shift right without smearing
|
||||
// the sign bit.
|
||||
let mask = if r1 == 0 {
|
||||
ones.wrapping_shr(r0)
|
||||
} else {
|
||||
let tmp = ones.wrapping_shr(r0);
|
||||
(tmp.wrapping_shl(r1) | tmp.wrapping_shr(F::EXPONENT_BITS - r1)) & ones
|
||||
(tmp.wrapping_shl(r1) | tmp.wrapping_shr(F::EXP_BITS - r1)) & ones
|
||||
};
|
||||
let mut exp = (f.to_bits() & F::EXPONENT_MASK) >> F::SIGNIFICAND_BITS;
|
||||
let mut exp = (f.to_bits() & F::EXP_MASK) >> F::SIG_BITS;
|
||||
match (rng32 >> 9) % 4 {
|
||||
0 => exp |= mask,
|
||||
1 => exp &= mask,
|
||||
|
|
@ -197,9 +197,9 @@ fn fuzz_float_step<F: Float>(rng: &mut Xoshiro128StarStar, f: &mut F) {
|
|||
}
|
||||
|
||||
// significand fuzzing
|
||||
let mut sig = f.to_bits() & F::SIGNIFICAND_MASK;
|
||||
let mut sig = f.to_bits() & F::SIG_MASK;
|
||||
fuzz_step(rng, &mut sig);
|
||||
sig &= F::SIGNIFICAND_MASK;
|
||||
sig &= F::SIG_MASK;
|
||||
|
||||
*f = F::from_parts(sign, exp, sig);
|
||||
}
|
||||
|
|
@ -209,22 +209,22 @@ macro_rules! float_edge_cases {
|
|||
for exponent in [
|
||||
F::Int::ZERO,
|
||||
F::Int::ONE,
|
||||
F::Int::ONE << (F::EXPONENT_BITS / 2),
|
||||
(F::Int::ONE << (F::EXPONENT_BITS - 1)) - F::Int::ONE,
|
||||
F::Int::ONE << (F::EXPONENT_BITS - 1),
|
||||
(F::Int::ONE << (F::EXPONENT_BITS - 1)) + F::Int::ONE,
|
||||
(F::Int::ONE << F::EXPONENT_BITS) - F::Int::ONE,
|
||||
F::Int::ONE << (F::EXP_BITS / 2),
|
||||
(F::Int::ONE << (F::EXP_BITS - 1)) - F::Int::ONE,
|
||||
F::Int::ONE << (F::EXP_BITS - 1),
|
||||
(F::Int::ONE << (F::EXP_BITS - 1)) + F::Int::ONE,
|
||||
(F::Int::ONE << F::EXP_BITS) - F::Int::ONE,
|
||||
]
|
||||
.iter()
|
||||
{
|
||||
for significand in [
|
||||
F::Int::ZERO,
|
||||
F::Int::ONE,
|
||||
F::Int::ONE << (F::SIGNIFICAND_BITS / 2),
|
||||
(F::Int::ONE << (F::SIGNIFICAND_BITS - 1)) - F::Int::ONE,
|
||||
F::Int::ONE << (F::SIGNIFICAND_BITS - 1),
|
||||
(F::Int::ONE << (F::SIGNIFICAND_BITS - 1)) + F::Int::ONE,
|
||||
(F::Int::ONE << F::SIGNIFICAND_BITS) - F::Int::ONE,
|
||||
F::Int::ONE << (F::SIG_BITS / 2),
|
||||
(F::Int::ONE << (F::SIG_BITS - 1)) - F::Int::ONE,
|
||||
F::Int::ONE << (F::SIG_BITS - 1),
|
||||
(F::Int::ONE << (F::SIG_BITS - 1)) + F::Int::ONE,
|
||||
(F::Int::ONE << F::SIG_BITS) - F::Int::ONE,
|
||||
]
|
||||
.iter()
|
||||
{
|
||||
|
|
|
|||
|
|
@ -19,8 +19,8 @@ macro_rules! pow {
|
|||
use compiler_builtins::float::Float;
|
||||
fuzz_float_2(N, |x: $f, y: $f| {
|
||||
if !(Float::is_subnormal(x) || Float::is_subnormal(y) || x.is_nan()) {
|
||||
let n = y.to_bits() & !<$f as Float>::SIGNIFICAND_MASK;
|
||||
let n = (n as <$f as Float>::SignedInt) >> <$f as Float>::SIGNIFICAND_BITS;
|
||||
let n = y.to_bits() & !<$f as Float>::SIG_MASK;
|
||||
let n = (n as <$f as Float>::SignedInt) >> <$f as Float>::SIG_BITS;
|
||||
let n = n as i32;
|
||||
let tmp0: $f = x.powi(n);
|
||||
let tmp1: $f = $fn(x, n);
|
||||
|
|
|
|||
Loading…
Add table
Add a link
Reference in a new issue