Rollup merge of #139186 - TDecking:float, r=workingjubilee

Refactor `diy_float`

The refactor replaces bespoke algorithms with functions already inside the standard library, improving both codegen and readability.
This commit is contained in:
Guillaume Gomez 2025-05-01 22:27:20 +02:00 committed by GitHub
commit bd68c36ee0
No known key found for this signature in database
GPG key ID: B5690EEEBB952194
2 changed files with 15 additions and 47 deletions

View file

@ -21,61 +21,29 @@ pub struct Fp {
impl Fp {
/// Returns a correctly rounded product of itself and `other`.
pub fn mul(&self, other: &Fp) -> Fp {
const MASK: u64 = 0xffffffff;
let a = self.f >> 32;
let b = self.f & MASK;
let c = other.f >> 32;
let d = other.f & MASK;
let ac = a * c;
let bc = b * c;
let ad = a * d;
let bd = b * d;
let tmp = (bd >> 32) + (ad & MASK) + (bc & MASK) + (1 << 31) /* round */;
let f = ac + (ad >> 32) + (bc >> 32) + (tmp >> 32);
pub fn mul(self, other: Self) -> Self {
let (lo, hi) = self.f.widening_mul(other.f);
let f = hi + (lo >> 63) /* round */;
let e = self.e + other.e + 64;
Fp { f, e }
Self { f, e }
}
/// Normalizes itself so that the resulting mantissa is at least `2^63`.
pub fn normalize(&self) -> Fp {
let mut f = self.f;
let mut e = self.e;
if f >> (64 - 32) == 0 {
f <<= 32;
e -= 32;
}
if f >> (64 - 16) == 0 {
f <<= 16;
e -= 16;
}
if f >> (64 - 8) == 0 {
f <<= 8;
e -= 8;
}
if f >> (64 - 4) == 0 {
f <<= 4;
e -= 4;
}
if f >> (64 - 2) == 0 {
f <<= 2;
e -= 2;
}
if f >> (64 - 1) == 0 {
f <<= 1;
e -= 1;
}
pub fn normalize(self) -> Self {
let lz = self.f.leading_zeros();
let f = self.f << lz;
let e = self.e - lz as i16;
debug_assert!(f >= (1 << 63));
Fp { f, e }
Self { f, e }
}
/// Normalizes itself to have the shared exponent.
/// It can only decrease the exponent (and thus increase the mantissa).
pub fn normalize_to(&self, e: i16) -> Fp {
pub fn normalize_to(self, e: i16) -> Self {
let edelta = self.e - e;
assert!(edelta >= 0);
let edelta = edelta as usize;
assert_eq!(self.f << edelta >> edelta, self.f);
Fp { f: self.f << edelta, e }
Self { f: self.f << edelta, e }
}
}

View file

@ -196,9 +196,9 @@ pub fn format_shortest_opt<'a>(
let (minusk, cached) = cached_power(ALPHA - plus.e - 64, GAMMA - plus.e - 64);
// scale fps. this gives the maximal error of 1 ulp (proved from Theorem 5.1).
let plus = plus.mul(&cached);
let minus = minus.mul(&cached);
let v = v.mul(&cached);
let plus = plus.mul(cached);
let minus = minus.mul(cached);
let v = v.mul(cached);
debug_assert_eq!(plus.e, minus.e);
debug_assert_eq!(plus.e, v.e);
@ -480,7 +480,7 @@ pub fn format_exact_opt<'a>(
// normalize and scale `v`.
let v = Fp { f: d.mant, e: d.exp }.normalize();
let (minusk, cached) = cached_power(ALPHA - v.e - 64, GAMMA - v.e - 64);
let v = v.mul(&cached);
let v = v.mul(cached);
// divide `v` into integral and fractional parts.
let e = -v.e as usize;