Merge pull request #622 from tgross35/f128-div

Add `__divtf3`
2024-09-24 18:40:16 +02:00 · 2024-09-24 18:40:16 +02:00 · 608fd00051
commit 608fd00051
parent cad966f041 4842bd6ab1
10 changed files with 548 additions and 695 deletions
--- a/library/compiler-builtins/README.md
+++ b/library/compiler-builtins/README.md
@ -222,7 +222,7 @@ of being added to Rust.

 - [x] addtf3.c
 - [x] comparetf2.c
- [ ] divtf3.c
+- [x] divtf3.c
 - [x] extenddftf2.c
 - [x] extendhfsf2.c
 - [x] extendhftf2.c
--- a/library/compiler-builtins/build.rs
+++ b/library/compiler-builtins/build.rs
@ -526,7 +526,6 @@ mod c {
                ("__floatsitf", "floatsitf.c"),
                ("__floatunditf", "floatunditf.c"),
                ("__floatunsitf", "floatunsitf.c"),
-                ("__divtf3", "divtf3.c"),
                ("__powitf2", "powitf2.c"),
                ("__fe_getround", "fp_mode.c"),
                ("__fe_raise_inexact", "fp_mode.c"),
--- a/library/compiler-builtins/examples/intrinsics.rs
+++ b/library/compiler-builtins/examples/intrinsics.rs
@ -256,6 +256,10 @@ mod intrinsics {
        a * b
    }

+    pub fn divtf(a: f128, b: f128) -> f128 {
+        a / b
+    }
+
    pub fn subtf(a: f128, b: f128) -> f128 {
        a - b
    }
@ -440,6 +444,7 @@ fn run() {
    bb(aeabi_uldivmod(bb(2), bb(3)));
    bb(ashlti3(bb(2), bb(2)));
    bb(ashrti3(bb(2), bb(2)));
+    bb(divtf(bb(2.), bb(2.)));
    bb(divti3(bb(2), bb(2)));
    bb(eqtf(bb(2.), bb(2.)));
    bb(extendhfdf(bb(2.)));
--- a/library/compiler-builtins/src/float/div.rs
+++ b/library/compiler-builtins/src/float/div.rs
--- a/library/compiler-builtins/src/float/mod.rs
+++ b/library/compiler-builtins/src/float/mod.rs
@ -31,10 +31,10 @@ pub(crate) trait Float:
    + ops::Rem<Output = Self>
 {
    /// A uint of the same width as the float
-    type Int: Int;
+    type Int: Int<OtherSign = Self::SignedInt, UnsignedInt = Self::Int>;

    /// A int of the same width as the float
-    type SignedInt: Int;
+    type SignedInt: Int + MinInt<OtherSign = Self::Int, UnsignedInt = Self::Int>;

    /// An int capable of containing the exponent bits plus a sign bit. This is signed.
    type ExpInt: Int;
@ -51,7 +51,7 @@ pub(crate) trait Float:
    /// The bitwidth of the exponent
    const EXPONENT_BITS: u32 = Self::BITS - Self::SIGNIFICAND_BITS - 1;

-    /// The maximum value of the exponent
+    /// The saturated value of the exponent (infinite representation), in the rightmost postiion.
    const EXPONENT_MAX: u32 = (1 << Self::EXPONENT_BITS) - 1;

    /// The exponent bias value
@ -83,7 +83,7 @@ pub(crate) trait Float:
    /// Returns true if the sign is negative
    fn is_sign_negative(self) -> bool;

-    /// Returns the exponent with bias
+    /// Returns the exponent, not adjusting for bias.
    fn exp(self) -> Self::ExpInt;

    /// Returns the significand with no implicit bit (or the "fractional" part)
@ -175,7 +175,7 @@ macro_rules! float_impl {
            fn normalize(significand: Self::Int) -> (i32, Self::Int) {
                let shift = significand
                    .leading_zeros()
-                    .wrapping_sub((Self::Int::ONE << Self::SIGNIFICAND_BITS).leading_zeros());
+                    .wrapping_sub(Self::EXPONENT_BITS);
                (
                    1i32.wrapping_sub(shift as i32),
                    significand << shift as Self::Int,
--- a/library/compiler-builtins/src/int/big.rs
+++ b/library/compiler-builtins/src/int/big.rs
@ -93,7 +93,7 @@ macro_rules! impl_common {
            type Output = Self;

            fn shl(self, rhs: u32) -> Self::Output {
-                todo!()
+                unimplemented!("only used to meet trait bounds")
            }
        }
    };
@ -102,6 +102,41 @@ macro_rules! impl_common {
 impl_common!(i256);
 impl_common!(u256);

+impl ops::Shr<u32> for u256 {
+    type Output = Self;
+
+    fn shr(self, rhs: u32) -> Self::Output {
+        assert!(rhs < Self::BITS, "attempted to shift right with overflow");
+
+        if rhs == 0 {
+            return self;
+        }
+
+        let mut ret = self;
+        let byte_shift = rhs / 64;
+        let bit_shift = rhs % 64;
+
+        for idx in 0..4 {
+            let base_idx = idx + byte_shift as usize;
+
+            let Some(base) = ret.0.get(base_idx) else {
+                ret.0[idx] = 0;
+                continue;
+            };
+
+            let mut new_val = base >> bit_shift;
+
+            if let Some(new) = ret.0.get(base_idx + 1) {
+                new_val |= new.overflowing_shl(64 - bit_shift).0;
+            }
+
+            ret.0[idx] = new_val;
+        }
+
+        ret
+    }
+}
+
 macro_rules! word {
    (1, $val:expr) => {
        (($val >> (32 * 3)) & Self::from(WORD_LO_MASK)) as u64
--- a/library/compiler-builtins/testcrate/benches/float_div.rs
+++ b/library/compiler-builtins/testcrate/benches/float_div.rs
@ -1,5 +1,7 @@
+#![cfg_attr(f128_enabled, feature(f128))]
+
 use compiler_builtins::float::div;
-use criterion::{criterion_group, criterion_main, Criterion};
+use criterion::{criterion_main, Criterion};
 use testcrate::float_bench;

 float_bench! {
@ -64,5 +66,28 @@ float_bench! {
    ],
 }

-criterion_group!(float_div, div_f32, div_f64);
+#[cfg(f128_enabled)]
+float_bench! {
+    name: div_f128,
+    sig: (a: f128, b: f128) -> f128,
+    crate_fn: div::__divtf3,
+    crate_fn_ppc: div::__divkf3,
+    sys_fn: __divtf3,
+    sys_fn_ppc: __divkf3,
+    sys_available: not(feature = "no-sys-f128"),
+    asm: []
+}
+
+pub fn float_div() {
+    let mut criterion = Criterion::default().configure_from_args();
+
+    div_f32(&mut criterion);
+    div_f64(&mut criterion);
+
+    #[cfg(f128_enabled)]
+    {
+        div_f128(&mut criterion);
+    }
+}
+
 criterion_main!(float_div);
--- a/library/compiler-builtins/testcrate/src/bench.rs
+++ b/library/compiler-builtins/testcrate/src/bench.rs
@ -30,13 +30,14 @@ pub fn skip_sys_checks(test_name: &str) -> bool {

    // FIXME(f16_f128): system symbols have incorrect results
    // <https://github.com/rust-lang/compiler-builtins/issues/617#issuecomment-2125914639>
-    const X86_NO_SSE_SKIPPED: &[&str] =
-        &["add_f128", "sub_f128", "mul_f128", "powi_f32", "powi_f64"];
+    const X86_NO_SSE_SKIPPED: &[&str] = &[
+        "add_f128", "sub_f128", "mul_f128", "div_f128", "powi_f32", "powi_f64",
+    ];

    // FIXME(f16_f128): Wide multiply carry bug in `compiler-rt`, re-enable when nightly no longer
    // uses `compiler-rt` version.
    // <https://github.com/llvm/llvm-project/issues/91840>
-    const AARCH64_SKIPPED: &[&str] = &["mul_f128"];
+    const AARCH64_SKIPPED: &[&str] = &["mul_f128", "div_f128"];

    // FIXME(llvm): system symbols have incorrect results on Windows
    // <https://github.com/rust-lang/compiler-builtins/issues/617#issuecomment-2121359807>
--- a/library/compiler-builtins/testcrate/tests/big.rs
+++ b/library/compiler-builtins/testcrate/tests/big.rs
@ -59,3 +59,76 @@ fn widen_mul_u128() {
    }
    assert!(errors.is_empty());
 }
+
+#[test]
+fn not_u128() {
+    assert_eq!(!u256::ZERO, u256::MAX);
+}
+
+#[test]
+fn shr_u128() {
+    let only_low = [
+        1,
+        u16::MAX.into(),
+        u32::MAX.into(),
+        u64::MAX.into(),
+        u128::MAX,
+    ];
+
+    let mut errors = Vec::new();
+
+    for a in only_low {
+        for perturb in 0..10 {
+            let a = a.saturating_add(perturb);
+            for shift in 0..128 {
+                let res = a.widen() >> shift;
+                let expected = (a >> shift).widen();
+                if res != expected {
+                    errors.push((a.widen(), shift, res, expected));
+                }
+            }
+        }
+    }
+
+    let check = [
+        (
+            u256::MAX,
+            1,
+            u256([u64::MAX, u64::MAX, u64::MAX, u64::MAX >> 1]),
+        ),
+        (
+            u256::MAX,
+            5,
+            u256([u64::MAX, u64::MAX, u64::MAX, u64::MAX >> 5]),
+        ),
+        (u256::MAX, 63, u256([u64::MAX, u64::MAX, u64::MAX, 1])),
+        (u256::MAX, 64, u256([u64::MAX, u64::MAX, u64::MAX, 0])),
+        (u256::MAX, 65, u256([u64::MAX, u64::MAX, u64::MAX >> 1, 0])),
+        (u256::MAX, 127, u256([u64::MAX, u64::MAX, 1, 0])),
+        (u256::MAX, 128, u256([u64::MAX, u64::MAX, 0, 0])),
+        (u256::MAX, 129, u256([u64::MAX, u64::MAX >> 1, 0, 0])),
+        (u256::MAX, 191, u256([u64::MAX, 1, 0, 0])),
+        (u256::MAX, 192, u256([u64::MAX, 0, 0, 0])),
+        (u256::MAX, 193, u256([u64::MAX >> 1, 0, 0, 0])),
+        (u256::MAX, 191, u256([u64::MAX, 1, 0, 0])),
+        (u256::MAX, 254, u256([0b11, 0, 0, 0])),
+        (u256::MAX, 255, u256([1, 0, 0, 0])),
+    ];
+
+    for (input, shift, expected) in check {
+        let res = input >> shift;
+        if res != expected {
+            errors.push((input, shift, res, expected));
+        }
+    }
+
+    for (a, b, res, expected) in &errors {
+        eprintln!(
+            "FAILURE: {} >> {b} = {} got {}",
+            hexu(*a),
+            hexu(*expected),
+            hexu(*res),
+        );
+    }
+    assert!(errors.is_empty());
+}
--- a/library/compiler-builtins/testcrate/tests/div_rem.rs
+++ b/library/compiler-builtins/testcrate/tests/div_rem.rs
@ -1,3 +1,4 @@
+#![feature(f128)]
 #![allow(unused_macros)]

 use compiler_builtins::int::sdiv::{__divmoddi4, __divmodsi4, __divmodti4};
@ -115,7 +116,13 @@ macro_rules! float {
                fuzz_float_2(N, |x: $f, y: $f| {
                    let quo0: $f = apfloat_fallback!($f, $apfloat_ty, $sys_available, Div::div, x, y);
                    let quo1: $f = $fn(x, y);
-                    #[cfg(not(target_arch = "arm"))]
+
+                    // ARM SIMD instructions always flush subnormals to zero
+                    if cfg!(target_arch = "arm") &&
+                        ((Float::is_subnormal(quo0)) || Float::is_subnormal(quo1)) {
+                        return;
+                    }
+
                    if !Float::eq_repr(quo0, quo1) {
                        panic!(
                            "{}({:?}, {:?}): std: {:?}, builtins: {:?}",
@ -126,21 +133,6 @@ macro_rules! float {
                            quo1
                        );
                    }
-
-                    // ARM SIMD instructions always flush subnormals to zero
-                    #[cfg(target_arch = "arm")]
-                    if !(Float::is_subnormal(quo0) || Float::is_subnormal(quo1)) {
-                        if !Float::eq_repr(quo0, quo1) {
-                            panic!(
-                                "{}({:?}, {:?}): std: {:?}, builtins: {:?}",
-                                stringify!($fn),
-                                x,
-                                y,
-                                quo0,
-                                quo1
-                            );
-                        }
-                    }
                });
            }
        )*
@ -155,4 +147,19 @@ mod float_div {
        f32, __divsf3, Single, all();
        f64, __divdf3, Double, all();
    }
+
+    #[cfg(not(feature = "no-f16-f128"))]
+    #[cfg(not(any(target_arch = "powerpc", target_arch = "powerpc64")))]
+    float! {
+        f128, __divtf3, Quad,
+        // FIXME(llvm): there is a bug in LLVM rt.
+        // See <https://github.com/llvm/llvm-project/issues/91840>.
+        not(any(feature = "no-sys-f128", all(target_arch = "aarch64", target_os = "linux")));
+    }
+
+    #[cfg(not(feature = "no-f16-f128"))]
+    #[cfg(any(target_arch = "powerpc", target_arch = "powerpc64"))]
+    float! {
+        f128, __divkf3, Quad, not(feature = "no-sys-f128");
+    }
 }