Merge pull request #661 from rust-lang/revert-656-public-test-deps

Revert "Eliminate the use of `public_test_dep!`"
2024-08-06 21:18:16 -05:00 · 2024-08-06 21:18:16 -05:00 · a1fd037088
commit a1fd037088
parent 93083bd3bc 14bd1e6ae1
8 changed files with 770 additions and 763 deletions
--- a/library/compiler-builtins/src/float/mod.rs
+++ b/library/compiler-builtins/src/float/mod.rs
@ -1,3 +1,7 @@
+use core::ops;
+
+use crate::int::{DInt, Int, MinInt};
+
 pub mod add;
 pub mod cmp;
 pub mod conv;
@ -6,11 +10,187 @@ pub mod extend;
 pub mod mul;
 pub mod pow;
 pub mod sub;
-pub(crate) mod traits;
 pub mod trunc;

-#[cfg(not(feature = "public-test-deps"))]
-pub(crate) use traits::{Float, HalfRep};
+/// Wrapper to extract the integer type half of the float's size
+pub(crate) type HalfRep<F> = <<F as Float>::Int as DInt>::H;

-#[cfg(feature = "public-test-deps")]
-pub use traits::{Float, HalfRep};
+public_test_dep! {
+/// Trait for some basic operations on floats
+#[allow(dead_code)]
+pub(crate) trait Float:
+    Copy
+    + core::fmt::Debug
+    + PartialEq
+    + PartialOrd
+    + ops::AddAssign
+    + ops::MulAssign
+    + ops::Add<Output = Self>
+    + ops::Sub<Output = Self>
+    + ops::Div<Output = Self>
+    + ops::Rem<Output = Self>
+{
+    /// A uint of the same width as the float
+    type Int: Int;
+
+    /// A int of the same width as the float
+    type SignedInt: Int;
+
+    /// An int capable of containing the exponent bits plus a sign bit. This is signed.
+    type ExpInt: Int;
+
+    const ZERO: Self;
+    const ONE: Self;
+
+    /// The bitwidth of the float type
+    const BITS: u32;
+
+    /// The bitwidth of the significand
+    const SIGNIFICAND_BITS: u32;
+
+    /// The bitwidth of the exponent
+    const EXPONENT_BITS: u32 = Self::BITS - Self::SIGNIFICAND_BITS - 1;
+
+    /// The maximum value of the exponent
+    const EXPONENT_MAX: u32 = (1 << Self::EXPONENT_BITS) - 1;
+
+    /// The exponent bias value
+    const EXPONENT_BIAS: u32 = Self::EXPONENT_MAX >> 1;
+
+    /// A mask for the sign bit
+    const SIGN_MASK: Self::Int;
+
+    /// A mask for the significand
+    const SIGNIFICAND_MASK: Self::Int;
+
+    /// The implicit bit of the float format
+    const IMPLICIT_BIT: Self::Int;
+
+    /// A mask for the exponent
+    const EXPONENT_MASK: Self::Int;
+
+    /// Returns `self` transmuted to `Self::Int`
+    fn repr(self) -> Self::Int;
+
+    /// Returns `self` transmuted to `Self::SignedInt`
+    fn signed_repr(self) -> Self::SignedInt;
+
+    /// Checks if two floats have the same bit representation. *Except* for NaNs! NaN can be
+    /// represented in multiple different ways. This method returns `true` if two NaNs are
+    /// compared.
+    fn eq_repr(self, rhs: Self) -> bool;
+
+    /// Returns true if the sign is negative
+    fn is_sign_negative(self) -> bool;
+
+    /// Returns the exponent with bias
+    fn exp(self) -> Self::ExpInt;
+
+    /// Returns the significand with no implicit bit (or the "fractional" part)
+    fn frac(self) -> Self::Int;
+
+    /// Returns the significand with implicit bit
+    fn imp_frac(self) -> Self::Int;
+
+    /// Returns a `Self::Int` transmuted back to `Self`
+    fn from_repr(a: Self::Int) -> Self;
+
+    /// Constructs a `Self` from its parts. Inputs are treated as bits and shifted into position.
+    fn from_parts(sign: bool, exponent: Self::Int, significand: Self::Int) -> Self;
+
+    /// Returns (normalized exponent, normalized significand)
+    fn normalize(significand: Self::Int) -> (i32, Self::Int);
+
+    /// Returns if `self` is subnormal
+    fn is_subnormal(self) -> bool;
+}
+}
+
+macro_rules! float_impl {
+    ($ty:ident, $ity:ident, $sity:ident, $expty:ident, $bits:expr, $significand_bits:expr) => {
+        impl Float for $ty {
+            type Int = $ity;
+            type SignedInt = $sity;
+            type ExpInt = $expty;
+
+            const ZERO: Self = 0.0;
+            const ONE: Self = 1.0;
+
+            const BITS: u32 = $bits;
+            const SIGNIFICAND_BITS: u32 = $significand_bits;
+
+            const SIGN_MASK: Self::Int = 1 << (Self::BITS - 1);
+            const SIGNIFICAND_MASK: Self::Int = (1 << Self::SIGNIFICAND_BITS) - 1;
+            const IMPLICIT_BIT: Self::Int = 1 << Self::SIGNIFICAND_BITS;
+            const EXPONENT_MASK: Self::Int = !(Self::SIGN_MASK | Self::SIGNIFICAND_MASK);
+
+            fn repr(self) -> Self::Int {
+                self.to_bits()
+            }
+            fn signed_repr(self) -> Self::SignedInt {
+                self.to_bits() as Self::SignedInt
+            }
+            fn eq_repr(self, rhs: Self) -> bool {
+                #[cfg(feature = "mangled-names")]
+                fn is_nan(x: $ty) -> bool {
+                    // When using mangled-names, the "real" compiler-builtins might not have the
+                    // necessary builtin (__unordtf2) to test whether `f128` is NaN.
+                    // FIXME(f16_f128): Remove once the nightly toolchain has the __unordtf2 builtin
+                    // x is NaN if all the bits of the exponent are set and the significand is non-0
+                    x.repr() & $ty::EXPONENT_MASK == $ty::EXPONENT_MASK
+                        && x.repr() & $ty::SIGNIFICAND_MASK != 0
+                }
+                #[cfg(not(feature = "mangled-names"))]
+                fn is_nan(x: $ty) -> bool {
+                    x.is_nan()
+                }
+                if is_nan(self) && is_nan(rhs) {
+                    true
+                } else {
+                    self.repr() == rhs.repr()
+                }
+            }
+            fn is_sign_negative(self) -> bool {
+                self.is_sign_negative()
+            }
+            fn exp(self) -> Self::ExpInt {
+                ((self.to_bits() & Self::EXPONENT_MASK) >> Self::SIGNIFICAND_BITS) as Self::ExpInt
+            }
+            fn frac(self) -> Self::Int {
+                self.to_bits() & Self::SIGNIFICAND_MASK
+            }
+            fn imp_frac(self) -> Self::Int {
+                self.frac() | Self::IMPLICIT_BIT
+            }
+            fn from_repr(a: Self::Int) -> Self {
+                Self::from_bits(a)
+            }
+            fn from_parts(sign: bool, exponent: Self::Int, significand: Self::Int) -> Self {
+                Self::from_repr(
+                    ((sign as Self::Int) << (Self::BITS - 1))
+                        | ((exponent << Self::SIGNIFICAND_BITS) & Self::EXPONENT_MASK)
+                        | (significand & Self::SIGNIFICAND_MASK),
+                )
+            }
+            fn normalize(significand: Self::Int) -> (i32, Self::Int) {
+                let shift = significand
+                    .leading_zeros()
+                    .wrapping_sub((Self::Int::ONE << Self::SIGNIFICAND_BITS).leading_zeros());
+                (
+                    1i32.wrapping_sub(shift as i32),
+                    significand << shift as Self::Int,
+                )
+            }
+            fn is_subnormal(self) -> bool {
+                (self.repr() & Self::EXPONENT_MASK) == Self::Int::ZERO
+            }
+        }
+    };
+}
+
+#[cfg(f16_enabled)]
+float_impl!(f16, u16, i16, i8, 16, 10);
+float_impl!(f32, u32, i32, i16, 32, 23);
+float_impl!(f64, u64, i64, i16, 64, 52);
+#[cfg(f128_enabled)]
+float_impl!(f128, u128, i128, i16, 128, 112);
--- a/library/compiler-builtins/src/float/traits.rs
+++ b/library/compiler-builtins/src/float/traits.rs
@ -1,184 +0,0 @@
-use core::ops;
-
-use crate::int::{DInt, Int, MinInt};
-
-/// Wrapper to extract the integer type half of the float's size
-pub type HalfRep<F> = <<F as Float>::Int as DInt>::H;
-
-/// Trait for some basic operations on floats
-#[allow(dead_code)]
-pub trait Float:
-    Copy
-    + core::fmt::Debug
-    + PartialEq
-    + PartialOrd
-    + ops::AddAssign
-    + ops::MulAssign
-    + ops::Add<Output = Self>
-    + ops::Sub<Output = Self>
-    + ops::Div<Output = Self>
-    + ops::Rem<Output = Self>
-{
-    /// A uint of the same width as the float
-    type Int: Int;
-
-    /// A int of the same width as the float
-    type SignedInt: Int;
-
-    /// An int capable of containing the exponent bits plus a sign bit. This is signed.
-    type ExpInt: Int;
-
-    const ZERO: Self;
-    const ONE: Self;
-
-    /// The bitwidth of the float type
-    const BITS: u32;
-
-    /// The bitwidth of the significand
-    const SIGNIFICAND_BITS: u32;
-
-    /// The bitwidth of the exponent
-    const EXPONENT_BITS: u32 = Self::BITS - Self::SIGNIFICAND_BITS - 1;
-
-    /// The maximum value of the exponent
-    const EXPONENT_MAX: u32 = (1 << Self::EXPONENT_BITS) - 1;
-
-    /// The exponent bias value
-    const EXPONENT_BIAS: u32 = Self::EXPONENT_MAX >> 1;
-
-    /// A mask for the sign bit
-    const SIGN_MASK: Self::Int;
-
-    /// A mask for the significand
-    const SIGNIFICAND_MASK: Self::Int;
-
-    /// The implicit bit of the float format
-    const IMPLICIT_BIT: Self::Int;
-
-    /// A mask for the exponent
-    const EXPONENT_MASK: Self::Int;
-
-    /// Returns `self` transmuted to `Self::Int`
-    fn repr(self) -> Self::Int;
-
-    /// Returns `self` transmuted to `Self::SignedInt`
-    fn signed_repr(self) -> Self::SignedInt;
-
-    /// Checks if two floats have the same bit representation. *Except* for NaNs! NaN can be
-    /// represented in multiple different ways. This method returns `true` if two NaNs are
-    /// compared.
-    fn eq_repr(self, rhs: Self) -> bool;
-
-    /// Returns true if the sign is negative
-    fn is_sign_negative(self) -> bool;
-
-    /// Returns the exponent with bias
-    fn exp(self) -> Self::ExpInt;
-
-    /// Returns the significand with no implicit bit (or the "fractional" part)
-    fn frac(self) -> Self::Int;
-
-    /// Returns the significand with implicit bit
-    fn imp_frac(self) -> Self::Int;
-
-    /// Returns a `Self::Int` transmuted back to `Self`
-    fn from_repr(a: Self::Int) -> Self;
-
-    /// Constructs a `Self` from its parts. Inputs are treated as bits and shifted into position.
-    fn from_parts(sign: bool, exponent: Self::Int, significand: Self::Int) -> Self;
-
-    /// Returns (normalized exponent, normalized significand)
-    fn normalize(significand: Self::Int) -> (i32, Self::Int);
-
-    /// Returns if `self` is subnormal
-    fn is_subnormal(self) -> bool;
-}
-
-macro_rules! float_impl {
-    ($ty:ident, $ity:ident, $sity:ident, $expty:ident, $bits:expr, $significand_bits:expr) => {
-        impl Float for $ty {
-            type Int = $ity;
-            type SignedInt = $sity;
-            type ExpInt = $expty;
-
-            const ZERO: Self = 0.0;
-            const ONE: Self = 1.0;
-
-            const BITS: u32 = $bits;
-            const SIGNIFICAND_BITS: u32 = $significand_bits;
-
-            const SIGN_MASK: Self::Int = 1 << (Self::BITS - 1);
-            const SIGNIFICAND_MASK: Self::Int = (1 << Self::SIGNIFICAND_BITS) - 1;
-            const IMPLICIT_BIT: Self::Int = 1 << Self::SIGNIFICAND_BITS;
-            const EXPONENT_MASK: Self::Int = !(Self::SIGN_MASK | Self::SIGNIFICAND_MASK);
-
-            fn repr(self) -> Self::Int {
-                self.to_bits()
-            }
-            fn signed_repr(self) -> Self::SignedInt {
-                self.to_bits() as Self::SignedInt
-            }
-            fn eq_repr(self, rhs: Self) -> bool {
-                #[cfg(feature = "mangled-names")]
-                fn is_nan(x: $ty) -> bool {
-                    // When using mangled-names, the "real" compiler-builtins might not have the
-                    // necessary builtin (__unordtf2) to test whether `f128` is NaN.
-                    // FIXME(f16_f128): Remove once the nightly toolchain has the __unordtf2 builtin
-                    // x is NaN if all the bits of the exponent are set and the significand is non-0
-                    x.repr() & $ty::EXPONENT_MASK == $ty::EXPONENT_MASK
-                        && x.repr() & $ty::SIGNIFICAND_MASK != 0
-                }
-                #[cfg(not(feature = "mangled-names"))]
-                fn is_nan(x: $ty) -> bool {
-                    x.is_nan()
-                }
-                if is_nan(self) && is_nan(rhs) {
-                    true
-                } else {
-                    self.repr() == rhs.repr()
-                }
-            }
-            fn is_sign_negative(self) -> bool {
-                self.is_sign_negative()
-            }
-            fn exp(self) -> Self::ExpInt {
-                ((self.to_bits() & Self::EXPONENT_MASK) >> Self::SIGNIFICAND_BITS) as Self::ExpInt
-            }
-            fn frac(self) -> Self::Int {
-                self.to_bits() & Self::SIGNIFICAND_MASK
-            }
-            fn imp_frac(self) -> Self::Int {
-                self.frac() | Self::IMPLICIT_BIT
-            }
-            fn from_repr(a: Self::Int) -> Self {
-                Self::from_bits(a)
-            }
-            fn from_parts(sign: bool, exponent: Self::Int, significand: Self::Int) -> Self {
-                Self::from_repr(
-                    ((sign as Self::Int) << (Self::BITS - 1))
-                        | ((exponent << Self::SIGNIFICAND_BITS) & Self::EXPONENT_MASK)
-                        | (significand & Self::SIGNIFICAND_MASK),
-                )
-            }
-            fn normalize(significand: Self::Int) -> (i32, Self::Int) {
-                let shift = significand
-                    .leading_zeros()
-                    .wrapping_sub((Self::Int::ONE << Self::SIGNIFICAND_BITS).leading_zeros());
-                (
-                    1i32.wrapping_sub(shift as i32),
-                    significand << shift as Self::Int,
-                )
-            }
-            fn is_subnormal(self) -> bool {
-                (self.repr() & Self::EXPONENT_MASK) == Self::Int::ZERO
-            }
-        }
-    };
-}
-
-#[cfg(not(feature = "no-f16-f128"))]
-float_impl!(f16, u16, i16, i8, 16, 10);
-float_impl!(f32, u32, i32, i16, 32, 23);
-float_impl!(f64, u64, i64, i16, 64, 52);
-#[cfg(not(feature = "no-f16-f128"))]
-float_impl!(f128, u128, i128, i16, 128, 112);
--- a/library/compiler-builtins/src/int/leading_zeros.rs
+++ b/library/compiler-builtins/src/int/leading_zeros.rs
@ -3,140 +3,136 @@
 // adding a zero check at the beginning, but `__clzsi2` has a precondition that `x != 0`.
 // Compilers will insert the check for zero in cases where it is needed.

-mod implementation {
-    use crate::int::{CastInto, Int};
+use crate::int::{CastInto, Int};

-    /// Returns the number of leading binary zeros in `x`.
-    #[allow(dead_code)]
-    pub fn leading_zeros_default<T: Int + CastInto<usize>>(x: T) -> usize {
-        // The basic idea is to test if the higher bits of `x` are zero and bisect the number
-        // of leading zeros. It is possible for all branches of the bisection to use the same
-        // code path by conditionally shifting the higher parts down to let the next bisection
-        // step work on the higher or lower parts of `x`. Instead of starting with `z == 0`
-        // and adding to the number of zeros, it is slightly faster to start with
-        // `z == usize::MAX.count_ones()` and subtract from the potential number of zeros,
-        // because it simplifies the final bisection step.
-        let mut x = x;
-        // the number of potential leading zeros
-        let mut z = T::BITS as usize;
-        // a temporary
-        let mut t: T;
+public_test_dep! {
+/// Returns the number of leading binary zeros in `x`.
+#[allow(dead_code)]
+pub(crate) fn leading_zeros_default<T: Int + CastInto<usize>>(x: T) -> usize {
+    // The basic idea is to test if the higher bits of `x` are zero and bisect the number
+    // of leading zeros. It is possible for all branches of the bisection to use the same
+    // code path by conditionally shifting the higher parts down to let the next bisection
+    // step work on the higher or lower parts of `x`. Instead of starting with `z == 0`
+    // and adding to the number of zeros, it is slightly faster to start with
+    // `z == usize::MAX.count_ones()` and subtract from the potential number of zeros,
+    // because it simplifies the final bisection step.
+    let mut x = x;
+    // the number of potential leading zeros
+    let mut z = T::BITS as usize;
+    // a temporary
+    let mut t: T;

-        const { assert!(T::BITS <= 64) };
-        if T::BITS >= 64 {
-            t = x >> 32;
-            if t != T::ZERO {
-                z -= 32;
-                x = t;
-            }
-        }
-        if T::BITS >= 32 {
-            t = x >> 16;
-            if t != T::ZERO {
-                z -= 16;
-                x = t;
-            }
-        }
-        const { assert!(T::BITS >= 16) };
-        t = x >> 8;
+    const { assert!(T::BITS <= 64) };
+    if T::BITS >= 64 {
+        t = x >> 32;
        if t != T::ZERO {
-            z -= 8;
+            z -= 32;
            x = t;
        }
-        t = x >> 4;
+    }
+    if T::BITS >= 32 {
+        t = x >> 16;
        if t != T::ZERO {
-            z -= 4;
+            z -= 16;
            x = t;
        }
-        t = x >> 2;
-        if t != T::ZERO {
-            z -= 2;
-            x = t;
-        }
-        // the last two bisections are combined into one conditional
-        t = x >> 1;
-        if t != T::ZERO {
-            z - 2
-        } else {
-            z - x.cast()
-        }
-
-        // We could potentially save a few cycles by using the LUT trick from
-        // "https://embeddedgurus.com/state-space/2014/09/
-        // fast-deterministic-and-portable-counting-leading-zeros/".
-        // However, 256 bytes for a LUT is too large for embedded use cases. We could remove
-        // the last 3 bisections  and use this 16 byte LUT for the rest of the work:
-        //const LUT: [u8; 16] = [0, 1, 2, 2, 3, 3, 3, 3, 4, 4, 4, 4, 4, 4, 4, 4];
-        //z -= LUT[x] as usize;
-        //z
-        // However, it ends up generating about the same number of instructions. When benchmarked
-        // on x86_64, it is slightly faster to use the LUT, but this is probably because of OOO
-        // execution effects. Changing to using a LUT and branching is risky for smaller cores.
+    }
+    const { assert!(T::BITS >= 16) };
+    t = x >> 8;
+    if t != T::ZERO {
+        z -= 8;
+        x = t;
+    }
+    t = x >> 4;
+    if t != T::ZERO {
+        z -= 4;
+        x = t;
+    }
+    t = x >> 2;
+    if t != T::ZERO {
+        z -= 2;
+        x = t;
+    }
+    // the last two bisections are combined into one conditional
+    t = x >> 1;
+    if t != T::ZERO {
+        z - 2
+    } else {
+        z - x.cast()
    }

-    // The above method does not compile well on RISC-V (because of the lack of predicated
-    // instructions), producing code with many branches or using an excessively long
-    // branchless solution. This method takes advantage of the set-if-less-than instruction on
-    // RISC-V that allows `(x >= power-of-two) as usize` to be branchless.
-
-    /// Returns the number of leading binary zeros in `x`.
-    #[allow(dead_code)]
-    pub fn leading_zeros_riscv<T: Int + CastInto<usize>>(x: T) -> usize {
-        let mut x = x;
-        // the number of potential leading zeros
-        let mut z = T::BITS;
-        // a temporary
-        let mut t: u32;
-
-        // RISC-V does not have a set-if-greater-than-or-equal instruction and
-        // `(x >= power-of-two) as usize` will get compiled into two instructions, but this is
-        // still the most optimal method. A conditional set can only be turned into a single
-        // immediate instruction if `x` is compared with an immediate `imm` (that can fit into
-        // 12 bits) like `x < imm` but not `imm < x` (because the immediate is always on the
-        // right). If we try to save an instruction by using `x < imm` for each bisection, we
-        // have to shift `x` left and compare with powers of two approaching `usize::MAX + 1`,
-        // but the immediate will never fit into 12 bits and never save an instruction.
-        const { assert!(T::BITS <= 64) };
-        if T::BITS >= 64 {
-            // If the upper 32 bits of `x` are not all 0, `t` is set to `1 << 5`, otherwise
-            // `t` is set to 0.
-            t = ((x >= (T::ONE << 32)) as u32) << 5;
-            // If `t` was set to `1 << 5`, then the upper 32 bits are shifted down for the
-            // next step to process.
-            x >>= t;
-            // If `t` was set to `1 << 5`, then we subtract 32 from the number of potential
-            // leading zeros
-            z -= t;
-        }
-        if T::BITS >= 32 {
-            t = ((x >= (T::ONE << 16)) as u32) << 4;
-            x >>= t;
-            z -= t;
-        }
-        const { assert!(T::BITS >= 16) };
-        t = ((x >= (T::ONE << 8)) as u32) << 3;
-        x >>= t;
-        z -= t;
-        t = ((x >= (T::ONE << 4)) as u32) << 2;
-        x >>= t;
-        z -= t;
-        t = ((x >= (T::ONE << 2)) as u32) << 1;
-        x >>= t;
-        z -= t;
-        t = (x >= (T::ONE << 1)) as u32;
-        x >>= t;
-        z -= t;
-        // All bits except the LSB are guaranteed to be zero for this final bisection step.
-        // If `x != 0` then `x == 1` and subtracts one potential zero from `z`.
-        z as usize - x.cast()
-    }
+    // We could potentially save a few cycles by using the LUT trick from
+    // "https://embeddedgurus.com/state-space/2014/09/
+    // fast-deterministic-and-portable-counting-leading-zeros/".
+    // However, 256 bytes for a LUT is too large for embedded use cases. We could remove
+    // the last 3 bisections  and use this 16 byte LUT for the rest of the work:
+    //const LUT: [u8; 16] = [0, 1, 2, 2, 3, 3, 3, 3, 4, 4, 4, 4, 4, 4, 4, 4];
+    //z -= LUT[x] as usize;
+    //z
+    // However, it ends up generating about the same number of instructions. When benchmarked
+    // on x86_64, it is slightly faster to use the LUT, but this is probably because of OOO
+    // execution effects. Changing to using a LUT and branching is risky for smaller cores.
+}
 }

-#[cfg(not(feature = "public-test-deps"))]
-pub(crate) use implementation::*;
+// The above method does not compile well on RISC-V (because of the lack of predicated
+// instructions), producing code with many branches or using an excessively long
+// branchless solution. This method takes advantage of the set-if-less-than instruction on
+// RISC-V that allows `(x >= power-of-two) as usize` to be branchless.

-#[cfg(feature = "public-test-deps")]
-pub use implementation::*;
+public_test_dep! {
+/// Returns the number of leading binary zeros in `x`.
+#[allow(dead_code)]
+pub(crate) fn leading_zeros_riscv<T: Int + CastInto<usize>>(x: T) -> usize {
+    let mut x = x;
+    // the number of potential leading zeros
+    let mut z = T::BITS;
+    // a temporary
+    let mut t: u32;
+
+    // RISC-V does not have a set-if-greater-than-or-equal instruction and
+    // `(x >= power-of-two) as usize` will get compiled into two instructions, but this is
+    // still the most optimal method. A conditional set can only be turned into a single
+    // immediate instruction if `x` is compared with an immediate `imm` (that can fit into
+    // 12 bits) like `x < imm` but not `imm < x` (because the immediate is always on the
+    // right). If we try to save an instruction by using `x < imm` for each bisection, we
+    // have to shift `x` left and compare with powers of two approaching `usize::MAX + 1`,
+    // but the immediate will never fit into 12 bits and never save an instruction.
+    const { assert!(T::BITS <= 64) };
+    if T::BITS >= 64 {
+        // If the upper 32 bits of `x` are not all 0, `t` is set to `1 << 5`, otherwise
+        // `t` is set to 0.
+        t = ((x >= (T::ONE << 32)) as u32) << 5;
+        // If `t` was set to `1 << 5`, then the upper 32 bits are shifted down for the
+        // next step to process.
+        x >>= t;
+        // If `t` was set to `1 << 5`, then we subtract 32 from the number of potential
+        // leading zeros
+        z -= t;
+    }
+    if T::BITS >= 32 {
+        t = ((x >= (T::ONE << 16)) as u32) << 4;
+        x >>= t;
+        z -= t;
+    }
+    const { assert!(T::BITS >= 16) };
+    t = ((x >= (T::ONE << 8)) as u32) << 3;
+    x >>= t;
+    z -= t;
+    t = ((x >= (T::ONE << 4)) as u32) << 2;
+    x >>= t;
+    z -= t;
+    t = ((x >= (T::ONE << 2)) as u32) << 1;
+    x >>= t;
+    z -= t;
+    t = (x >= (T::ONE << 1)) as u32;
+    x >>= t;
+    z -= t;
+    // All bits except the LSB are guaranteed to be zero for this final bisection step.
+    // If `x != 0` then `x == 1` and subtracts one potential zero from `z`.
+    z as usize - x.cast()
+}
+}

 intrinsics! {
    /// Returns the number of leading binary zeros in `x`
--- a/library/compiler-builtins/src/int/mod.rs
+++ b/library/compiler-builtins/src/int/mod.rs
@ -1,4 +1,6 @@
-pub(crate) mod specialized_div_rem;
+use core::ops;
+
+mod specialized_div_rem;

 pub mod addsub;
 mod big;
@ -8,13 +10,416 @@ pub mod mul;
 pub mod sdiv;
 pub mod shift;
 pub mod trailing_zeros;
-mod traits;
 pub mod udiv;

 pub use big::{i256, u256};

-#[cfg(not(feature = "public-test-deps"))]
-pub(crate) use traits::{CastFrom, CastInto, DInt, HInt, Int, MinInt};
+public_test_dep! {
+/// Minimal integer implementations needed on all integer types, including wide integers.
+#[allow(dead_code)]
+pub(crate) trait MinInt: Copy
+    + core::fmt::Debug
+    + ops::BitOr<Output = Self>
+    + ops::Not<Output = Self>
+    + ops::Shl<u32, Output = Self>
+{

-#[cfg(feature = "public-test-deps")]
-pub use traits::{CastFrom, CastInto, DInt, HInt, Int, MinInt};
+    /// Type with the same width but other signedness
+    type OtherSign: MinInt;
+    /// Unsigned version of Self
+    type UnsignedInt: MinInt;
+
+    /// If `Self` is a signed integer
+    const SIGNED: bool;
+
+    /// The bitwidth of the int type
+    const BITS: u32;
+
+    const ZERO: Self;
+    const ONE: Self;
+    const MIN: Self;
+    const MAX: Self;
+}
+}
+
+public_test_dep! {
+/// Trait for some basic operations on integers
+#[allow(dead_code)]
+pub(crate) trait Int: MinInt
+    + PartialEq
+    + PartialOrd
+    + ops::AddAssign
+    + ops::SubAssign
+    + ops::BitAndAssign
+    + ops::BitOrAssign
+    + ops::BitXorAssign
+    + ops::ShlAssign<i32>
+    + ops::ShrAssign<u32>
+    + ops::Add<Output = Self>
+    + ops::Sub<Output = Self>
+    + ops::Mul<Output = Self>
+    + ops::Div<Output = Self>
+    + ops::Shr<u32, Output = Self>
+    + ops::BitXor<Output = Self>
+    + ops::BitAnd<Output = Self>
+{
+    /// LUT used for maximizing the space covered and minimizing the computational cost of fuzzing
+    /// in `testcrate`. For example, Self = u128 produces [0,1,2,7,8,15,16,31,32,63,64,95,96,111,
+    /// 112,119,120,125,126,127].
+    const FUZZ_LENGTHS: [u8; 20] = make_fuzz_lengths(<Self as MinInt>::BITS);
+
+    /// The number of entries of `FUZZ_LENGTHS` actually used. The maximum is 20 for u128.
+    const FUZZ_NUM: usize = {
+        let log2 = (<Self as MinInt>::BITS - 1).count_ones() as usize;
+        if log2 == 3 {
+            // case for u8
+            6
+        } else {
+            // 3 entries on each extreme, 2 in the middle, and 4 for each scale of intermediate
+            // boundaries.
+            8 + (4 * (log2 - 4))
+        }
+    };
+
+    fn unsigned(self) -> Self::UnsignedInt;
+    fn from_unsigned(unsigned: Self::UnsignedInt) -> Self;
+
+    fn from_bool(b: bool) -> Self;
+
+    /// Prevents the need for excessive conversions between signed and unsigned
+    fn logical_shr(self, other: u32) -> Self;
+
+    /// Absolute difference between two integers.
+    fn abs_diff(self, other: Self) -> Self::UnsignedInt;
+
+    // copied from primitive integers, but put in a trait
+    fn is_zero(self) -> bool;
+    fn wrapping_neg(self) -> Self;
+    fn wrapping_add(self, other: Self) -> Self;
+    fn wrapping_mul(self, other: Self) -> Self;
+    fn wrapping_sub(self, other: Self) -> Self;
+    fn wrapping_shl(self, other: u32) -> Self;
+    fn wrapping_shr(self, other: u32) -> Self;
+    fn rotate_left(self, other: u32) -> Self;
+    fn overflowing_add(self, other: Self) -> (Self, bool);
+    fn leading_zeros(self) -> u32;
+    fn ilog2(self) -> u32;
+}
+}
+
+pub(crate) const fn make_fuzz_lengths(bits: u32) -> [u8; 20] {
+    let mut v = [0u8; 20];
+    v[0] = 0;
+    v[1] = 1;
+    v[2] = 2; // important for parity and the iX::MIN case when reversed
+    let mut i = 3;
+
+    // No need for any more until the byte boundary, because there should be no algorithms
+    // that are sensitive to anything not next to byte boundaries after 2. We also scale
+    // in powers of two, which is important to prevent u128 corner tests from getting too
+    // big.
+    let mut l = 8;
+    loop {
+        if l >= ((bits / 2) as u8) {
+            break;
+        }
+        // get both sides of the byte boundary
+        v[i] = l - 1;
+        i += 1;
+        v[i] = l;
+        i += 1;
+        l *= 2;
+    }
+
+    if bits != 8 {
+        // add the lower side of the middle boundary
+        v[i] = ((bits / 2) - 1) as u8;
+        i += 1;
+    }
+
+    // We do not want to jump directly from the Self::BITS/2 boundary to the Self::BITS
+    // boundary because of algorithms that split the high part up. We reverse the scaling
+    // as we go to Self::BITS.
+    let mid = i;
+    let mut j = 1;
+    loop {
+        v[i] = (bits as u8) - (v[mid - j]) - 1;
+        if j == mid {
+            break;
+        }
+        i += 1;
+        j += 1;
+    }
+    v
+}
+
+macro_rules! int_impl_common {
+    ($ty:ty) => {
+        fn from_bool(b: bool) -> Self {
+            b as $ty
+        }
+
+        fn logical_shr(self, other: u32) -> Self {
+            Self::from_unsigned(self.unsigned().wrapping_shr(other))
+        }
+
+        fn is_zero(self) -> bool {
+            self == Self::ZERO
+        }
+
+        fn wrapping_neg(self) -> Self {
+            <Self>::wrapping_neg(self)
+        }
+
+        fn wrapping_add(self, other: Self) -> Self {
+            <Self>::wrapping_add(self, other)
+        }
+
+        fn wrapping_mul(self, other: Self) -> Self {
+            <Self>::wrapping_mul(self, other)
+        }
+
+        fn wrapping_sub(self, other: Self) -> Self {
+            <Self>::wrapping_sub(self, other)
+        }
+
+        fn wrapping_shl(self, other: u32) -> Self {
+            <Self>::wrapping_shl(self, other)
+        }
+
+        fn wrapping_shr(self, other: u32) -> Self {
+            <Self>::wrapping_shr(self, other)
+        }
+
+        fn rotate_left(self, other: u32) -> Self {
+            <Self>::rotate_left(self, other)
+        }
+
+        fn overflowing_add(self, other: Self) -> (Self, bool) {
+            <Self>::overflowing_add(self, other)
+        }
+
+        fn leading_zeros(self) -> u32 {
+            <Self>::leading_zeros(self)
+        }
+
+        fn ilog2(self) -> u32 {
+            <Self>::ilog2(self)
+        }
+    };
+}
+
+macro_rules! int_impl {
+    ($ity:ty, $uty:ty) => {
+        impl MinInt for $uty {
+            type OtherSign = $ity;
+            type UnsignedInt = $uty;
+
+            const BITS: u32 = <Self as MinInt>::ZERO.count_zeros();
+            const SIGNED: bool = Self::MIN != Self::ZERO;
+
+            const ZERO: Self = 0;
+            const ONE: Self = 1;
+            const MIN: Self = <Self>::MIN;
+            const MAX: Self = <Self>::MAX;
+        }
+
+        impl Int for $uty {
+            fn unsigned(self) -> $uty {
+                self
+            }
+
+            // It makes writing macros easier if this is implemented for both signed and unsigned
+            #[allow(clippy::wrong_self_convention)]
+            fn from_unsigned(me: $uty) -> Self {
+                me
+            }
+
+            fn abs_diff(self, other: Self) -> Self {
+                if self < other {
+                    other.wrapping_sub(self)
+                } else {
+                    self.wrapping_sub(other)
+                }
+            }
+
+            int_impl_common!($uty);
+        }
+
+        impl MinInt for $ity {
+            type OtherSign = $uty;
+            type UnsignedInt = $uty;
+
+            const BITS: u32 = <Self as MinInt>::ZERO.count_zeros();
+            const SIGNED: bool = Self::MIN != Self::ZERO;
+
+            const ZERO: Self = 0;
+            const ONE: Self = 1;
+            const MIN: Self = <Self>::MIN;
+            const MAX: Self = <Self>::MAX;
+        }
+
+        impl Int for $ity {
+            fn unsigned(self) -> $uty {
+                self as $uty
+            }
+
+            fn from_unsigned(me: $uty) -> Self {
+                me as $ity
+            }
+
+            fn abs_diff(self, other: Self) -> $uty {
+                self.wrapping_sub(other).wrapping_abs() as $uty
+            }
+
+            int_impl_common!($ity);
+        }
+    };
+}
+
+int_impl!(isize, usize);
+int_impl!(i8, u8);
+int_impl!(i16, u16);
+int_impl!(i32, u32);
+int_impl!(i64, u64);
+int_impl!(i128, u128);
+
+public_test_dep! {
+/// Trait for integers twice the bit width of another integer. This is implemented for all
+/// primitives except for `u8`, because there is not a smaller primitive.
+pub(crate) trait DInt: MinInt {
+    /// Integer that is half the bit width of the integer this trait is implemented for
+    type H: HInt<D = Self>;
+
+    /// Returns the low half of `self`
+    fn lo(self) -> Self::H;
+    /// Returns the high half of `self`
+    fn hi(self) -> Self::H;
+    /// Returns the low and high halves of `self` as a tuple
+    fn lo_hi(self) -> (Self::H, Self::H) {
+        (self.lo(), self.hi())
+    }
+    /// Constructs an integer using lower and higher half parts
+    fn from_lo_hi(lo: Self::H, hi: Self::H) -> Self {
+        lo.zero_widen() | hi.widen_hi()
+    }
+}
+}
+
+public_test_dep! {
+/// Trait for integers half the bit width of another integer. This is implemented for all
+/// primitives except for `u128`, because it there is not a larger primitive.
+pub(crate) trait HInt: Int {
+    /// Integer that is double the bit width of the integer this trait is implemented for
+    type D: DInt<H = Self> + MinInt;
+
+    /// Widens (using default extension) the integer to have double bit width
+    fn widen(self) -> Self::D;
+    /// Widens (zero extension only) the integer to have double bit width. This is needed to get
+    /// around problems with associated type bounds (such as `Int<Othersign: DInt>`) being unstable
+    fn zero_widen(self) -> Self::D;
+    /// Widens the integer to have double bit width and shifts the integer into the higher bits
+    fn widen_hi(self) -> Self::D {
+        self.widen() << <Self as MinInt>::BITS
+    }
+    /// Widening multiplication with zero widening. This cannot overflow.
+    fn zero_widen_mul(self, rhs: Self) -> Self::D;
+    /// Widening multiplication. This cannot overflow.
+    fn widen_mul(self, rhs: Self) -> Self::D;
+}
+}
+
+macro_rules! impl_d_int {
+    ($($X:ident $D:ident),*) => {
+        $(
+            impl DInt for $D {
+                type H = $X;
+
+                fn lo(self) -> Self::H {
+                    self as $X
+                }
+                fn hi(self) -> Self::H {
+                    (self >> <$X as MinInt>::BITS) as $X
+                }
+            }
+        )*
+    };
+}
+
+macro_rules! impl_h_int {
+    ($($H:ident $uH:ident $X:ident),*) => {
+        $(
+            impl HInt for $H {
+                type D = $X;
+
+                fn widen(self) -> Self::D {
+                    self as $X
+                }
+                fn zero_widen(self) -> Self::D {
+                    (self as $uH) as $X
+                }
+                fn zero_widen_mul(self, rhs: Self) -> Self::D {
+                    self.zero_widen().wrapping_mul(rhs.zero_widen())
+                }
+                fn widen_mul(self, rhs: Self) -> Self::D {
+                    self.widen().wrapping_mul(rhs.widen())
+                }
+            }
+        )*
+    };
+}
+
+impl_d_int!(u8 u16, u16 u32, u32 u64, u64 u128, i8 i16, i16 i32, i32 i64, i64 i128);
+impl_h_int!(
+    u8 u8 u16,
+    u16 u16 u32,
+    u32 u32 u64,
+    u64 u64 u128,
+    i8 u8 i16,
+    i16 u16 i32,
+    i32 u32 i64,
+    i64 u64 i128
+);
+
+public_test_dep! {
+/// Trait to express (possibly lossy) casting of integers
+pub(crate) trait CastInto<T: Copy>: Copy {
+    fn cast(self) -> T;
+}
+
+pub(crate) trait CastFrom<T: Copy>:Copy {
+    fn cast_from(value: T) -> Self;
+}
+}
+
+impl<T: Copy, U: CastInto<T> + Copy> CastFrom<U> for T {
+    fn cast_from(value: U) -> Self {
+        value.cast()
+    }
+}
+
+macro_rules! cast_into {
+    ($ty:ty) => {
+        cast_into!($ty; usize, isize, u8, i8, u16, i16, u32, i32, u64, i64, u128, i128);
+    };
+    ($ty:ty; $($into:ty),*) => {$(
+        impl CastInto<$into> for $ty {
+            fn cast(self) -> $into {
+                self as $into
+            }
+        }
+    )*};
+}
+
+cast_into!(usize);
+cast_into!(isize);
+cast_into!(u8);
+cast_into!(i8);
+cast_into!(u16);
+cast_into!(i16);
+cast_into!(u32);
+cast_into!(i32);
+cast_into!(u64);
+cast_into!(i64);
+cast_into!(u128);
+cast_into!(i128);
--- a/library/compiler-builtins/src/int/specialized_div_rem/delegate.rs
+++ b/library/compiler-builtins/src/int/specialized_div_rem/delegate.rs
@ -185,6 +185,7 @@ macro_rules! impl_delegate {
    };
 }

+public_test_dep! {
 /// Returns `n / d` and sets `*rem = n % d`.
 ///
 /// This specialization exists because:
@ -194,7 +195,7 @@ macro_rules! impl_delegate {
 ///    delegate algorithm strategy the only reasonably fast way to perform `u128` division.
 // used on SPARC
 #[allow(dead_code)]
-pub fn u128_divide_sparc(duo: u128, div: u128, rem: &mut u128) -> u128 {
+pub(crate) fn u128_divide_sparc(duo: u128, div: u128, rem: &mut u128) -> u128 {
    use super::*;
    let duo_lo = duo as u64;
    let duo_hi = (duo >> 64) as u64;
@ -315,3 +316,4 @@ pub fn u128_divide_sparc(duo: u128, div: u128, rem: &mut u128) -> u128 {
        }
    }
 }
+}
--- a/library/compiler-builtins/src/int/trailing_zeros.rs
+++ b/library/compiler-builtins/src/int/trailing_zeros.rs
@ -1,51 +1,45 @@
-mod implementation {
-    use crate::int::{CastInto, Int};
+use crate::int::{CastInto, Int};

-    /// Returns number of trailing binary zeros in `x`.
-    #[allow(dead_code)]
-    pub fn trailing_zeros<T: Int + CastInto<u32> + CastInto<u16> + CastInto<u8>>(x: T) -> usize {
-        let mut x = x;
-        let mut r: u32 = 0;
-        let mut t: u32;
+public_test_dep! {
+/// Returns number of trailing binary zeros in `x`.
+#[allow(dead_code)]
+pub(crate) fn trailing_zeros<T: Int + CastInto<u32> + CastInto<u16> + CastInto<u8>>(x: T) -> usize {
+    let mut x = x;
+    let mut r: u32 = 0;
+    let mut t: u32;

-        const { assert!(T::BITS <= 64) };
-        if T::BITS >= 64 {
-            r += ((CastInto::<u32>::cast(x) == 0) as u32) << 5; // if (x has no 32 small bits) t = 32 else 0
-            x >>= r; // remove 32 zero bits
-        }
-
-        if T::BITS >= 32 {
-            t = ((CastInto::<u16>::cast(x) == 0) as u32) << 4; // if (x has no 16 small bits) t = 16 else 0
-            r += t;
-            x >>= t; // x = [0 - 0xFFFF] + higher garbage bits
-        }
-
-        const { assert!(T::BITS >= 16) };
-        t = ((CastInto::<u8>::cast(x) == 0) as u32) << 3;
-        x >>= t; // x = [0 - 0xFF] + higher garbage bits
-        r += t;
-
-        let mut x: u8 = x.cast();
-
-        t = (((x & 0x0F) == 0) as u32) << 2;
-        x >>= t; // x = [0 - 0xF] + higher garbage bits
-        r += t;
-
-        t = (((x & 0x3) == 0) as u32) << 1;
-        x >>= t; // x = [0 - 0x3] + higher garbage bits
-        r += t;
-
-        x &= 3;
-
-        r as usize + ((2 - (x >> 1) as usize) & (((x & 1) == 0) as usize).wrapping_neg())
+    const { assert!(T::BITS <= 64) };
+    if T::BITS >= 64 {
+        r += ((CastInto::<u32>::cast(x) == 0) as u32) << 5; // if (x has no 32 small bits) t = 32 else 0
+        x >>= r; // remove 32 zero bits
    }
+
+    if T::BITS >= 32 {
+        t = ((CastInto::<u16>::cast(x) == 0) as u32) << 4; // if (x has no 16 small bits) t = 16 else 0
+        r += t;
+        x >>= t;         // x = [0 - 0xFFFF] + higher garbage bits
+    }
+
+    const { assert!(T::BITS >= 16) };
+    t = ((CastInto::<u8>::cast(x) == 0) as u32) << 3;
+    x >>= t; // x = [0 - 0xFF] + higher garbage bits
+    r += t;
+
+    let mut x: u8 = x.cast();
+
+    t = (((x & 0x0F) == 0) as u32) << 2;
+    x >>= t; // x = [0 - 0xF] + higher garbage bits
+    r += t;
+
+    t = (((x & 0x3) == 0) as u32) << 1;
+    x >>= t;  // x = [0 - 0x3] + higher garbage bits
+    r += t;
+
+    x &= 3;
+
+    r as usize + ((2 - (x >> 1) as usize) & (((x & 1) == 0) as usize).wrapping_neg())
+}
 }
-
-#[cfg(not(feature = "public-test-deps"))]
-pub(crate) use implementation::*;
-
-#[cfg(feature = "public-test-deps")]
-pub use implementation::*;

 intrinsics! {
    /// Returns the number of trailing binary zeros in `x` (32 bit version).
--- a/library/compiler-builtins/src/int/traits.rs
+++ b/library/compiler-builtins/src/int/traits.rs
@ -1,402 +0,0 @@
-use core::ops;
-
-/// Minimal integer implementations needed on all integer types, including wide integers.
-#[allow(dead_code)]
-pub trait MinInt:
-    Copy
-    + core::fmt::Debug
-    + ops::BitOr<Output = Self>
-    + ops::Not<Output = Self>
-    + ops::Shl<u32, Output = Self>
-{
-    /// Type with the same width but other signedness
-    type OtherSign: MinInt;
-    /// Unsigned version of Self
-    type UnsignedInt: MinInt;
-
-    /// If `Self` is a signed integer
-    const SIGNED: bool;
-
-    /// The bitwidth of the int type
-    const BITS: u32;
-
-    const ZERO: Self;
-    const ONE: Self;
-    const MIN: Self;
-    const MAX: Self;
-}
-
-/// Trait for some basic operations on integers
-#[allow(dead_code)]
-pub trait Int:
-    MinInt
-    + PartialEq
-    + PartialOrd
-    + ops::AddAssign
-    + ops::SubAssign
-    + ops::BitAndAssign
-    + ops::BitOrAssign
-    + ops::BitXorAssign
-    + ops::ShlAssign<i32>
-    + ops::ShrAssign<u32>
-    + ops::Add<Output = Self>
-    + ops::Sub<Output = Self>
-    + ops::Mul<Output = Self>
-    + ops::Div<Output = Self>
-    + ops::Shr<u32, Output = Self>
-    + ops::BitXor<Output = Self>
-    + ops::BitAnd<Output = Self>
-{
-    /// LUT used for maximizing the space covered and minimizing the computational cost of fuzzing
-    /// in `testcrate`. For example, Self = u128 produces [0,1,2,7,8,15,16,31,32,63,64,95,96,111,
-    /// 112,119,120,125,126,127].
-    const FUZZ_LENGTHS: [u8; 20] = make_fuzz_lengths(<Self as MinInt>::BITS);
-
-    /// The number of entries of `FUZZ_LENGTHS` actually used. The maximum is 20 for u128.
-    const FUZZ_NUM: usize = {
-        let log2 = (<Self as MinInt>::BITS - 1).count_ones() as usize;
-        if log2 == 3 {
-            // case for u8
-            6
-        } else {
-            // 3 entries on each extreme, 2 in the middle, and 4 for each scale of intermediate
-            // boundaries.
-            8 + (4 * (log2 - 4))
-        }
-    };
-
-    fn unsigned(self) -> Self::UnsignedInt;
-    fn from_unsigned(unsigned: Self::UnsignedInt) -> Self;
-
-    fn from_bool(b: bool) -> Self;
-
-    /// Prevents the need for excessive conversions between signed and unsigned
-    fn logical_shr(self, other: u32) -> Self;
-
-    /// Absolute difference between two integers.
-    fn abs_diff(self, other: Self) -> Self::UnsignedInt;
-
-    // copied from primitive integers, but put in a trait
-    fn is_zero(self) -> bool;
-    fn wrapping_neg(self) -> Self;
-    fn wrapping_add(self, other: Self) -> Self;
-    fn wrapping_mul(self, other: Self) -> Self;
-    fn wrapping_sub(self, other: Self) -> Self;
-    fn wrapping_shl(self, other: u32) -> Self;
-    fn wrapping_shr(self, other: u32) -> Self;
-    fn rotate_left(self, other: u32) -> Self;
-    fn overflowing_add(self, other: Self) -> (Self, bool);
-    fn leading_zeros(self) -> u32;
-    fn ilog2(self) -> u32;
-}
-
-const fn make_fuzz_lengths(bits: u32) -> [u8; 20] {
-    let mut v = [0u8; 20];
-    v[0] = 0;
-    v[1] = 1;
-    v[2] = 2; // important for parity and the iX::MIN case when reversed
-    let mut i = 3;
-
-    // No need for any more until the byte boundary, because there should be no algorithms
-    // that are sensitive to anything not next to byte boundaries after 2. We also scale
-    // in powers of two, which is important to prevent u128 corner tests from getting too
-    // big.
-    let mut l = 8;
-    loop {
-        if l >= ((bits / 2) as u8) {
-            break;
-        }
-        // get both sides of the byte boundary
-        v[i] = l - 1;
-        i += 1;
-        v[i] = l;
-        i += 1;
-        l *= 2;
-    }
-
-    if bits != 8 {
-        // add the lower side of the middle boundary
-        v[i] = ((bits / 2) - 1) as u8;
-        i += 1;
-    }
-
-    // We do not want to jump directly from the Self::BITS/2 boundary to the Self::BITS
-    // boundary because of algorithms that split the high part up. We reverse the scaling
-    // as we go to Self::BITS.
-    let mid = i;
-    let mut j = 1;
-    loop {
-        v[i] = (bits as u8) - (v[mid - j]) - 1;
-        if j == mid {
-            break;
-        }
-        i += 1;
-        j += 1;
-    }
-    v
-}
-
-macro_rules! int_impl_common {
-    ($ty:ty) => {
-        fn from_bool(b: bool) -> Self {
-            b as $ty
-        }
-
-        fn logical_shr(self, other: u32) -> Self {
-            Self::from_unsigned(self.unsigned().wrapping_shr(other))
-        }
-
-        fn is_zero(self) -> bool {
-            self == Self::ZERO
-        }
-
-        fn wrapping_neg(self) -> Self {
-            <Self>::wrapping_neg(self)
-        }
-
-        fn wrapping_add(self, other: Self) -> Self {
-            <Self>::wrapping_add(self, other)
-        }
-
-        fn wrapping_mul(self, other: Self) -> Self {
-            <Self>::wrapping_mul(self, other)
-        }
-
-        fn wrapping_sub(self, other: Self) -> Self {
-            <Self>::wrapping_sub(self, other)
-        }
-
-        fn wrapping_shl(self, other: u32) -> Self {
-            <Self>::wrapping_shl(self, other)
-        }
-
-        fn wrapping_shr(self, other: u32) -> Self {
-            <Self>::wrapping_shr(self, other)
-        }
-
-        fn rotate_left(self, other: u32) -> Self {
-            <Self>::rotate_left(self, other)
-        }
-
-        fn overflowing_add(self, other: Self) -> (Self, bool) {
-            <Self>::overflowing_add(self, other)
-        }
-
-        fn leading_zeros(self) -> u32 {
-            <Self>::leading_zeros(self)
-        }
-
-        fn ilog2(self) -> u32 {
-            <Self>::ilog2(self)
-        }
-    };
-}
-
-macro_rules! int_impl {
-    ($ity:ty, $uty:ty) => {
-        impl MinInt for $uty {
-            type OtherSign = $ity;
-            type UnsignedInt = $uty;
-
-            const BITS: u32 = <Self as MinInt>::ZERO.count_zeros();
-            const SIGNED: bool = Self::MIN != Self::ZERO;
-
-            const ZERO: Self = 0;
-            const ONE: Self = 1;
-            const MIN: Self = <Self>::MIN;
-            const MAX: Self = <Self>::MAX;
-        }
-
-        impl Int for $uty {
-            fn unsigned(self) -> $uty {
-                self
-            }
-
-            // It makes writing macros easier if this is implemented for both signed and unsigned
-            #[allow(clippy::wrong_self_convention)]
-            fn from_unsigned(me: $uty) -> Self {
-                me
-            }
-
-            fn abs_diff(self, other: Self) -> Self {
-                if self < other {
-                    other.wrapping_sub(self)
-                } else {
-                    self.wrapping_sub(other)
-                }
-            }
-
-            int_impl_common!($uty);
-        }
-
-        impl MinInt for $ity {
-            type OtherSign = $uty;
-            type UnsignedInt = $uty;
-
-            const BITS: u32 = <Self as MinInt>::ZERO.count_zeros();
-            const SIGNED: bool = Self::MIN != Self::ZERO;
-
-            const ZERO: Self = 0;
-            const ONE: Self = 1;
-            const MIN: Self = <Self>::MIN;
-            const MAX: Self = <Self>::MAX;
-        }
-
-        impl Int for $ity {
-            fn unsigned(self) -> $uty {
-                self as $uty
-            }
-
-            fn from_unsigned(me: $uty) -> Self {
-                me as $ity
-            }
-
-            fn abs_diff(self, other: Self) -> $uty {
-                self.wrapping_sub(other).wrapping_abs() as $uty
-            }
-
-            int_impl_common!($ity);
-        }
-    };
-}
-
-int_impl!(isize, usize);
-int_impl!(i8, u8);
-int_impl!(i16, u16);
-int_impl!(i32, u32);
-int_impl!(i64, u64);
-int_impl!(i128, u128);
-
-/// Trait for integers twice the bit width of another integer. This is implemented for all
-/// primitives except for `u8`, because there is not a smaller primitive.
-pub trait DInt: MinInt {
-    /// Integer that is half the bit width of the integer this trait is implemented for
-    type H: HInt<D = Self>;
-
-    /// Returns the low half of `self`
-    fn lo(self) -> Self::H;
-    /// Returns the high half of `self`
-    fn hi(self) -> Self::H;
-    /// Returns the low and high halves of `self` as a tuple
-    fn lo_hi(self) -> (Self::H, Self::H) {
-        (self.lo(), self.hi())
-    }
-    /// Constructs an integer using lower and higher half parts
-    fn from_lo_hi(lo: Self::H, hi: Self::H) -> Self {
-        lo.zero_widen() | hi.widen_hi()
-    }
-}
-
-/// Trait for integers half the bit width of another integer. This is implemented for all
-/// primitives except for `u128`, because it there is not a larger primitive.
-pub trait HInt: Int {
-    /// Integer that is double the bit width of the integer this trait is implemented for
-    type D: DInt<H = Self> + MinInt;
-
-    /// Widens (using default extension) the integer to have double bit width
-    fn widen(self) -> Self::D;
-    /// Widens (zero extension only) the integer to have double bit width. This is needed to get
-    /// around problems with associated type bounds (such as `Int<Othersign: DInt>`) being unstable
-    fn zero_widen(self) -> Self::D;
-    /// Widens the integer to have double bit width and shifts the integer into the higher bits
-    fn widen_hi(self) -> Self::D {
-        self.widen() << <Self as MinInt>::BITS
-    }
-    /// Widening multiplication with zero widening. This cannot overflow.
-    fn zero_widen_mul(self, rhs: Self) -> Self::D;
-    /// Widening multiplication. This cannot overflow.
-    fn widen_mul(self, rhs: Self) -> Self::D;
-}
-
-macro_rules! impl_d_int {
-    ($($X:ident $D:ident),*) => {
-        $(
-            impl DInt for $D {
-                type H = $X;
-
-                fn lo(self) -> Self::H {
-                    self as $X
-                }
-                fn hi(self) -> Self::H {
-                    (self >> <$X as MinInt>::BITS) as $X
-                }
-            }
-        )*
-    };
-}
-
-macro_rules! impl_h_int {
-    ($($H:ident $uH:ident $X:ident),*) => {
-        $(
-            impl HInt for $H {
-                type D = $X;
-
-                fn widen(self) -> Self::D {
-                    self as $X
-                }
-                fn zero_widen(self) -> Self::D {
-                    (self as $uH) as $X
-                }
-                fn zero_widen_mul(self, rhs: Self) -> Self::D {
-                    self.zero_widen().wrapping_mul(rhs.zero_widen())
-                }
-                fn widen_mul(self, rhs: Self) -> Self::D {
-                    self.widen().wrapping_mul(rhs.widen())
-                }
-            }
-        )*
-    };
-}
-
-impl_d_int!(u8 u16, u16 u32, u32 u64, u64 u128, i8 i16, i16 i32, i32 i64, i64 i128);
-impl_h_int!(
-    u8 u8 u16,
-    u16 u16 u32,
-    u32 u32 u64,
-    u64 u64 u128,
-    i8 u8 i16,
-    i16 u16 i32,
-    i32 u32 i64,
-    i64 u64 i128
-);
-
-/// Trait to express (possibly lossy) casting of integers
-pub trait CastInto<T: Copy>: Copy {
-    fn cast(self) -> T;
-}
-
-pub trait CastFrom<T: Copy>: Copy {
-    fn cast_from(value: T) -> Self;
-}
-
-impl<T: Copy, U: CastInto<T> + Copy> CastFrom<U> for T {
-    fn cast_from(value: U) -> Self {
-        value.cast()
-    }
-}
-
-macro_rules! cast_into {
-    ($ty:ty) => {
-        cast_into!($ty; usize, isize, u8, i8, u16, i16, u32, i32, u64, i64, u128, i128);
-    };
-    ($ty:ty; $($into:ty),*) => {$(
-        impl CastInto<$into> for $ty {
-            fn cast(self) -> $into {
-                self as $into
-            }
-        }
-    )*};
-}
-
-cast_into!(usize);
-cast_into!(isize);
-cast_into!(u8);
-cast_into!(i8);
-cast_into!(u16);
-cast_into!(i16);
-cast_into!(u32);
-cast_into!(i32);
-cast_into!(u64);
-cast_into!(i64);
-cast_into!(u128);
-cast_into!(i128);
--- a/library/compiler-builtins/src/macros.rs
+++ b/library/compiler-builtins/src/macros.rs
@ -1,5 +1,21 @@
 //! Macros shared throughout the compiler-builtins implementation

+/// Changes the visibility to `pub` if feature "public-test-deps" is set
+#[cfg(not(feature = "public-test-deps"))]
+macro_rules! public_test_dep {
+    ($(#[$($meta:meta)*])* pub(crate) $ident:ident $($tokens:tt)*) => {
+        $(#[$($meta)*])* pub(crate) $ident $($tokens)*
+    };
+}
+
+/// Changes the visibility to `pub` if feature "public-test-deps" is set
+#[cfg(feature = "public-test-deps")]
+macro_rules! public_test_dep {
+    {$(#[$($meta:meta)*])* pub(crate) $ident:ident $($tokens:tt)*} => {
+        $(#[$($meta)*])* pub $ident $($tokens)*
+    };
+}
+
 /// The "main macro" used for defining intrinsics.
 ///
 /// The compiler-builtins library is super platform-specific with tons of crazy