Implement remaining __clz*i2 intrinsics

2024-06-22 00:01:52 +03:00 · 2024-06-22 00:01:52 +03:00 · a5c7a17d55
commit a5c7a17d55
parent e01e62aa16
5 changed files with 123 additions and 67 deletions
--- a/library/compiler-builtins/README.md
+++ b/library/compiler-builtins/README.md
@ -157,6 +157,9 @@ rely on CI.
 - [x] bswapdi2.c
 - [x] bswapsi2.c
 - [x] bswapti2.c
+- [x] clzdi2.c
+- [x] clzsi2.c
+- [x] clzti2.c
 - [x] comparedf2.c
 - [x] comparesf2.c
 - [x] divdf3.c
@ -325,9 +328,6 @@ These builtins are never called by LLVM.
 - ~~arm/switch32.S~~
 - ~~arm/switch8.S~~
 - ~~arm/switchu8.S~~
- ~~clzdi2.c~~
- ~~clzsi2.c~~
- ~~clzti2.c~~
 - ~~cmpdi2.c~~
 - ~~cmpti2.c~~
 - ~~ctzdi2.c~~
--- a/library/compiler-builtins/build.rs
+++ b/library/compiler-builtins/build.rs
@ -165,6 +165,7 @@ fn configure_check_cfg() {
        "__bswapdi2",
        "__bswapti2",
        "__clzsi2",
+        "__clzdi2",
        "__divdi3",
        "__divsi3",
        "__divmoddi4",
@ -382,7 +383,6 @@ mod c {
            sources.extend(&[
                ("__absvti2", "absvti2.c"),
                ("__addvti3", "addvti3.c"),
-                ("__clzti2", "clzti2.c"),
                ("__cmpti2", "cmpti2.c"),
                ("__ctzti2", "ctzti2.c"),
                ("__ffsti2", "ffsti2.c"),
--- a/library/compiler-builtins/src/int/leading_zeros.rs
+++ b/library/compiler-builtins/src/int/leading_zeros.rs
@ -3,10 +3,12 @@
 // adding a zero check at the beginning, but `__clzsi2` has a precondition that `x != 0`.
 // Compilers will insert the check for zero in cases where it is needed.

+use crate::int::{CastInto, Int};
+
 public_test_dep! {
 /// Returns the number of leading binary zeros in `x`.
 #[allow(dead_code)]
-pub(crate) fn usize_leading_zeros_default(x: usize) -> usize {
+pub(crate) fn leading_zeros_default<T: Int + CastInto<usize>>(x: T) -> usize {
    // The basic idea is to test if the higher bits of `x` are zero and bisect the number
    // of leading zeros. It is possible for all branches of the bisection to use the same
    // code path by conditionally shifting the higher parts down to let the next bisection
@ -16,46 +18,47 @@ pub(crate) fn usize_leading_zeros_default(x: usize) -> usize {
    // because it simplifies the final bisection step.
    let mut x = x;
    // the number of potential leading zeros
-    let mut z = usize::MAX.count_ones() as usize;
+    let mut z = T::BITS as usize;
    // a temporary
-    let mut t: usize;
-    #[cfg(target_pointer_width = "64")]
-    {
+    let mut t: T;
+
+    const { assert!(T::BITS <= 64) };
+    if T::BITS >= 64 {
        t = x >> 32;
-        if t != 0 {
+        if t != T::ZERO {
            z -= 32;
            x = t;
        }
    }
-    #[cfg(any(target_pointer_width = "32", target_pointer_width = "64"))]
-    {
+    if T::BITS >= 32 {
        t = x >> 16;
-        if t != 0 {
+        if t != T::ZERO {
            z -= 16;
            x = t;
        }
    }
+    const { assert!(T::BITS >= 16) };
    t = x >> 8;
-    if t != 0 {
+    if t != T::ZERO {
        z -= 8;
        x = t;
    }
    t = x >> 4;
-    if t != 0 {
+    if t != T::ZERO {
        z -= 4;
        x = t;
    }
    t = x >> 2;
-    if t != 0 {
+    if t != T::ZERO {
        z -= 2;
        x = t;
    }
    // the last two bisections are combined into one conditional
    t = x >> 1;
-    if t != 0 {
+    if t != T::ZERO {
        z - 2
    } else {
-        z - x
+        z - x.cast()
    }

    // We could potentially save a few cycles by using the LUT trick from
@ -80,12 +83,12 @@ pub(crate) fn usize_leading_zeros_default(x: usize) -> usize {
 public_test_dep! {
 /// Returns the number of leading binary zeros in `x`.
 #[allow(dead_code)]
-pub(crate) fn usize_leading_zeros_riscv(x: usize) -> usize {
+pub(crate) fn leading_zeros_riscv<T: Int + CastInto<usize>>(x: T) -> usize {
    let mut x = x;
    // the number of potential leading zeros
-    let mut z = usize::MAX.count_ones() as usize;
+    let mut z = T::BITS;
    // a temporary
-    let mut t: usize;
+    let mut t: u32;

    // RISC-V does not have a set-if-greater-than-or-equal instruction and
    // `(x >= power-of-two) as usize` will get compiled into two instructions, but this is
@ -95,11 +98,11 @@ pub(crate) fn usize_leading_zeros_riscv(x: usize) -> usize {
    // right). If we try to save an instruction by using `x < imm` for each bisection, we
    // have to shift `x` left and compare with powers of two approaching `usize::MAX + 1`,
    // but the immediate will never fit into 12 bits and never save an instruction.
-    #[cfg(target_pointer_width = "64")]
-    {
+    const { assert!(T::BITS <= 64) };
+    if T::BITS >= 64 {
        // If the upper 32 bits of `x` are not all 0, `t` is set to `1 << 5`, otherwise
        // `t` is set to 0.
-        t = ((x >= (1 << 32)) as usize) << 5;
+        t = ((x >= (T::ONE << 32)) as u32) << 5;
        // If `t` was set to `1 << 5`, then the upper 32 bits are shifted down for the
        // next step to process.
        x >>= t;
@ -107,43 +110,58 @@ pub(crate) fn usize_leading_zeros_riscv(x: usize) -> usize {
        // leading zeros
        z -= t;
    }
-    #[cfg(any(target_pointer_width = "32", target_pointer_width = "64"))]
-    {
-        t = ((x >= (1 << 16)) as usize) << 4;
+    if T::BITS >= 32 {
+        t = ((x >= (T::ONE << 16)) as u32) << 4;
        x >>= t;
        z -= t;
    }
-    t = ((x >= (1 << 8)) as usize) << 3;
+    const { assert!(T::BITS >= 16) };
+    t = ((x >= (T::ONE << 8)) as u32) << 3;
    x >>= t;
    z -= t;
-    t = ((x >= (1 << 4)) as usize) << 2;
+    t = ((x >= (T::ONE << 4)) as u32) << 2;
    x >>= t;
    z -= t;
-    t = ((x >= (1 << 2)) as usize) << 1;
+    t = ((x >= (T::ONE << 2)) as u32) << 1;
    x >>= t;
    z -= t;
-    t = (x >= (1 << 1)) as usize;
+    t = (x >= (T::ONE << 1)) as u32;
    x >>= t;
    z -= t;
    // All bits except the LSB are guaranteed to be zero for this final bisection step.
    // If `x != 0` then `x == 1` and subtracts one potential zero from `z`.
-    z - x
+    z as usize - x.cast()
 }
 }

 intrinsics! {
    #[maybe_use_optimized_c_shim]
-    #[cfg(any(
-        target_pointer_width = "16",
-        target_pointer_width = "32",
-        target_pointer_width = "64"
-    ))]
-    /// Returns the number of leading binary zeros in `x`.
-    pub extern "C" fn __clzsi2(x: usize) -> usize {
+    /// Returns the number of leading binary zeros in `x`
+    pub extern "C" fn __clzsi2(x: u32) -> usize {
        if cfg!(any(target_arch = "riscv32", target_arch = "riscv64")) {
-            usize_leading_zeros_riscv(x)
+            leading_zeros_riscv(x)
        } else {
-            usize_leading_zeros_default(x)
+            leading_zeros_default(x)
+        }
+    }
+
+    #[maybe_use_optimized_c_shim]
+    /// Returns the number of leading binary zeros in `x`
+    pub extern "C" fn __clzdi2(x: u64) -> usize {
+        if cfg!(any(target_arch = "riscv32", target_arch = "riscv64")) {
+            leading_zeros_riscv(x)
+        } else {
+            leading_zeros_default(x)
+        }
+    }
+
+    /// Returns the number of leading binary zeros in `x`
+    pub extern "C" fn __clzti2(x: u128) -> usize {
+        let hi = (x >> 64) as u64;
+        if hi == 0 {
+            64 + __clzdi2(x as u64)
+        } else {
+            __clzdi2(hi)
        }
    }
 }
--- a/library/compiler-builtins/src/int/mod.rs
+++ b/library/compiler-builtins/src/int/mod.rs
@ -12,7 +12,6 @@ pub mod shift;
 pub mod udiv;

 pub use big::{i256, u256};
-pub use leading_zeros::__clzsi2;

 public_test_dep! {
 /// Minimal integer implementations needed on all integer types, including wide integers.
--- a/library/compiler-builtins/testcrate/tests/misc.rs
+++ b/library/compiler-builtins/testcrate/tests/misc.rs
@ -65,31 +65,70 @@ fn fuzz_values() {

 #[test]
 fn leading_zeros() {
-    use compiler_builtins::int::__clzsi2;
-    use compiler_builtins::int::leading_zeros::{
-        usize_leading_zeros_default, usize_leading_zeros_riscv,
-    };
-    fuzz(N, |x: usize| {
-        let lz = x.leading_zeros() as usize;
-        let lz0 = __clzsi2(x);
-        let lz1 = usize_leading_zeros_default(x);
-        let lz2 = usize_leading_zeros_riscv(x);
-        if lz0 != lz {
-            panic!("__clzsi2({}): std: {}, builtins: {}", x, lz, lz0);
-        }
-        if lz1 != lz {
-            panic!(
-                "usize_leading_zeros_default({}): std: {}, builtins: {}",
-                x, lz, lz1
-            );
-        }
-        if lz2 != lz {
-            panic!(
-                "usize_leading_zeros_riscv({}): std: {}, builtins: {}",
-                x, lz, lz2
-            );
-        }
-    })
+    use compiler_builtins::int::leading_zeros::{leading_zeros_default, leading_zeros_riscv};
+    {
+        use compiler_builtins::int::leading_zeros::__clzsi2;
+        fuzz(N, |x: u32| {
+            if x == 0 {
+                return; // undefined value for an intrinsic
+            }
+            let lz = x.leading_zeros() as usize;
+            let lz0 = __clzsi2(x);
+            let lz1 = leading_zeros_default(x);
+            let lz2 = leading_zeros_riscv(x);
+            if lz0 != lz {
+                panic!("__clzsi2({}): std: {}, builtins: {}", x, lz, lz0);
+            }
+            if lz1 != lz {
+                panic!(
+                    "leading_zeros_default({}): std: {}, builtins: {}",
+                    x, lz, lz1
+                );
+            }
+            if lz2 != lz {
+                panic!("leading_zeros_riscv({}): std: {}, builtins: {}", x, lz, lz2);
+            }
+        });
+    }
+
+    {
+        use compiler_builtins::int::leading_zeros::__clzdi2;
+        fuzz(N, |x: u64| {
+            if x == 0 {
+                return; // undefined value for an intrinsic
+            }
+            let lz = x.leading_zeros() as usize;
+            let lz0 = __clzdi2(x);
+            let lz1 = leading_zeros_default(x);
+            let lz2 = leading_zeros_riscv(x);
+            if lz0 != lz {
+                panic!("__clzdi2({}): std: {}, builtins: {}", x, lz, lz0);
+            }
+            if lz1 != lz {
+                panic!(
+                    "leading_zeros_default({}): std: {}, builtins: {}",
+                    x, lz, lz1
+                );
+            }
+            if lz2 != lz {
+                panic!("leading_zeros_riscv({}): std: {}, builtins: {}", x, lz, lz2);
+            }
+        });
+    }
+
+    {
+        use compiler_builtins::int::leading_zeros::__clzti2;
+        fuzz(N, |x: u128| {
+            if x == 0 {
+                return; // undefined value for an intrinsic
+            }
+            let lz = x.leading_zeros() as usize;
+            let lz0 = __clzti2(x);
+            if lz0 != lz {
+                panic!("__clzti2({}): std: {}, builtins: {}", x, lz, lz0);
+            }
+        });
+    }
 }

 #[test]