Auto merge of #149586 - tgross35:update-builtins, r=tgross35

compiler-builtins subtree update Subtree update of `compiler-builtins` to acb3a0074d. Created using https://github.com/rust-lang/josh-sync. r? `@ghost`
2025-12-04 11:28:11 +00:00 · 2025-12-04 11:28:11 +00:00 · 5372fc9cb7
commit 5372fc9cb7
parent b1b08cdef5 55c229b647
25 changed files with 314 additions and 82 deletions
--- a/library/compiler-builtins/.github/workflows/main.yaml
+++ b/library/compiler-builtins/.github/workflows/main.yaml
@ -13,7 +13,7 @@ env:
  RUSTDOCFLAGS: -Dwarnings
  RUSTFLAGS: -Dwarnings
  RUST_BACKTRACE: full
-  BENCHMARK_RUSTC: nightly-2025-05-28 # Pin the toolchain for reproducable results
+  BENCHMARK_RUSTC: nightly-2025-12-01 # Pin the toolchain for reproducable results

 jobs:
  # Determine which tests should be run based on changed files.
@ -132,10 +132,7 @@ jobs:
        rustup default "$channel"
        rustup target add "${{ matrix.target }}"

-    # Our scripts use nextest if possible. This is skipped on the native ppc
-    # and s390x runners since install-action doesn't support them.
    - uses: taiki-e/install-action@nextest
-      if: "!(matrix.os == 'ubuntu-24.04-ppc64le' || matrix.os == 'ubuntu-24.04-s390x')"

    - uses: Swatinem/rust-cache@v2
      with:
--- a/library/compiler-builtins/compiler-builtins/README.md
+++ b/library/compiler-builtins/compiler-builtins/README.md
@ -374,7 +374,7 @@ Miscellaneous functionality that is not used by Rust.
 - ~~i386/fp_mode.c~~
 - ~~int_util.c~~
 - ~~loongarch/fp_mode.c~~
- ~~os_version_check.c~~
+- ~~os_version_check.c~~ (implemented in `std` instead)
 - ~~riscv/fp_mode.c~~
 - ~~riscv/restore.S~~ (callee-saved registers)
 - ~~riscv/save.S~~ (callee-saved registers)
--- a/library/compiler-builtins/crates/musl-math-sys/build.rs
+++ b/library/compiler-builtins/crates/musl-math-sys/build.rs
@ -46,7 +46,7 @@ fn main() {
    let cfg = Config::from_env();

    if cfg.target_env == "msvc"
-        || cfg.target_family == "wasm"
+        || cfg.target_families.iter().any(|f| f == "wasm")
        || cfg.target_features.iter().any(|f| f == "thumb-mode")
    {
        println!(
@ -69,7 +69,7 @@ struct Config {
    musl_arch: String,
    target_arch: String,
    target_env: String,
-    target_family: String,
+    target_families: Vec<String>,
    target_os: String,
    target_string: String,
    target_vendor: String,
@ -79,6 +79,9 @@ struct Config {
 impl Config {
    fn from_env() -> Self {
        let manifest_dir = PathBuf::from(env::var("CARGO_MANIFEST_DIR").unwrap());
+        let target_families = env::var("CARGO_CFG_TARGET_FAMILY")
+            .map(|feats| feats.split(',').map(ToOwned::to_owned).collect())
+            .unwrap_or_default();
        let target_features = env::var("CARGO_CFG_TARGET_FEATURE")
            .map(|feats| feats.split(',').map(ToOwned::to_owned).collect())
            .unwrap_or_default();
@ -104,7 +107,7 @@ impl Config {
            musl_arch,
            target_arch,
            target_env: env::var("CARGO_CFG_TARGET_ENV").unwrap(),
-            target_family: env::var("CARGO_CFG_TARGET_FAMILY").unwrap(),
+            target_families,
            target_os: env::var("CARGO_CFG_TARGET_OS").unwrap(),
            target_string: env::var("TARGET").unwrap(),
            target_vendor: env::var("CARGO_CFG_TARGET_VENDOR").unwrap(),
--- a/library/compiler-builtins/crates/symbol-check/src/main.rs
+++ b/library/compiler-builtins/crates/symbol-check/src/main.rs
@ -7,9 +7,10 @@ use std::io::{BufRead, BufReader};
 use std::path::{Path, PathBuf};
 use std::process::{Command, Stdio};

-use object::read::archive::{ArchiveFile, ArchiveMember};
+use object::read::archive::ArchiveFile;
 use object::{
-    File as ObjFile, Object, ObjectSection, ObjectSymbol, Symbol, SymbolKind, SymbolScope,
+    File as ObjFile, Object, ObjectSection, ObjectSymbol, Result as ObjResult, Symbol, SymbolKind,
+    SymbolScope,
 };
 use serde_json::Value;

@ -24,6 +25,11 @@ Cargo will get invoked with `CARGO_ARGS` and the specified target. All output
 `compiler_builtins*.rlib` files will be checked.

 If TARGET is not specified, the host target is used.
+
+    check PATHS ...
+
+Run the same checks on the given set of paths, without invoking Cargo. Paths
+may be either archives or object files.
 ";

 fn main() {
@ -33,12 +39,14 @@ fn main() {

    match &args_ref[1..] {
        ["build-and-check", target, "--", args @ ..] if !args.is_empty() => {
-            check_cargo_args(args);
            run_build_and_check(target, args);
        }
        ["build-and-check", "--", args @ ..] if !args.is_empty() => {
-            check_cargo_args(args);
-            run_build_and_check(&host_target(), args);
+            let target = &host_target();
+            run_build_and_check(target, args);
+        }
+        ["check", paths @ ..] if !paths.is_empty() => {
+            check_paths(paths);
        }
        _ => {
            println!("{USAGE}");
@ -47,22 +55,25 @@ fn main() {
    }
 }

-/// Make sure `--target` isn't passed to avoid confusion (since it should be proivded only once,
-/// positionally).
-fn check_cargo_args(args: &[&str]) {
+fn run_build_and_check(target: &str, args: &[&str]) {
+    // Make sure `--target` isn't passed to avoid confusion (since it should be
+    // proivded only once, positionally).
    for arg in args {
        assert!(
            !arg.contains("--target"),
            "target must be passed positionally. {USAGE}"
        );
    }
+
+    let paths = exec_cargo_with_args(target, args);
+    check_paths(&paths);
 }

-fn run_build_and_check(target: &str, args: &[&str]) {
-    let paths = exec_cargo_with_args(target, args);
+fn check_paths<P: AsRef<Path>>(paths: &[P]) {
    for path in paths {
+        let path = path.as_ref();
        println!("Checking {}", path.display());
-        let archive = Archive::from_path(&path);
+        let archive = BinFile::from_path(path);

        verify_no_duplicates(&archive);
        verify_core_symbols(&archive);
@ -165,7 +176,7 @@ struct SymInfo {
 }

 impl SymInfo {
-    fn new(sym: &Symbol, obj: &ObjFile, member: &ArchiveMember) -> Self {
+    fn new(sym: &Symbol, obj: &ObjFile, obj_path: &str) -> Self {
        // Include the section name if possible. Fall back to the `Section` debug impl if not.
        let section = sym.section();
        let section_name = sym
@ -187,7 +198,7 @@ impl SymInfo {
            is_weak: sym.is_weak(),
            is_common: sym.is_common(),
            address: sym.address(),
-            object: String::from_utf8_lossy(member.name()).into_owned(),
+            object: obj_path.to_owned(),
        }
    }
 }
@ -197,7 +208,7 @@ impl SymInfo {
 /// Note that this will also locate cases where a symbol is weakly defined in more than one place.
 /// Technically there are no linker errors that will come from this, but it keeps our binary more
 /// straightforward and saves some distribution size.
-fn verify_no_duplicates(archive: &Archive) {
+fn verify_no_duplicates(archive: &BinFile) {
    let mut syms = BTreeMap::<String, SymInfo>::new();
    let mut dups = Vec::new();
    let mut found_any = false;
@ -254,7 +265,7 @@ fn verify_no_duplicates(archive: &Archive) {
 }

 /// Ensure that there are no references to symbols from `core` that aren't also (somehow) defined.
-fn verify_core_symbols(archive: &Archive) {
+fn verify_core_symbols(archive: &BinFile) {
    let mut defined = BTreeSet::new();
    let mut undefined = Vec::new();
    let mut has_symbols = false;
@ -289,39 +300,63 @@ fn verify_core_symbols(archive: &Archive) {
 }

 /// Thin wrapper for owning data used by `object`.
-struct Archive {
+struct BinFile {
+    path: PathBuf,
    data: Vec<u8>,
 }

-impl Archive {
+impl BinFile {
    fn from_path(path: &Path) -> Self {
        Self {
+            path: path.to_owned(),
            data: fs::read(path).expect("reading file failed"),
        }
    }

-    fn file(&self) -> ArchiveFile<'_> {
-        ArchiveFile::parse(self.data.as_slice()).expect("archive parse failed")
+    fn as_archive_file(&self) -> ObjResult<ArchiveFile<'_>> {
+        ArchiveFile::parse(self.data.as_slice())
    }

-    /// For a given archive, do something with each object file.
-    fn for_each_object(&self, mut f: impl FnMut(ObjFile, &ArchiveMember)) {
-        let archive = self.file();
+    fn as_obj_file(&self) -> ObjResult<ObjFile<'_>> {
+        ObjFile::parse(self.data.as_slice())
+    }

-        for member in archive.members() {
-            let member = member.expect("failed to access member");
-            let obj_data = member
-                .data(self.data.as_slice())
-                .expect("failed to access object");
-            let obj = ObjFile::parse(obj_data).expect("failed to parse object");
-            f(obj, &member);
+    /// For a given archive, do something with each object file. For an object file, do
+    /// something once.
+    fn for_each_object(&self, mut f: impl FnMut(ObjFile, &str)) {
+        // Try as an archive first.
+        let as_archive = self.as_archive_file();
+        if let Ok(archive) = as_archive {
+            for member in archive.members() {
+                let member = member.expect("failed to access member");
+                let obj_data = member
+                    .data(self.data.as_slice())
+                    .expect("failed to access object");
+                let obj = ObjFile::parse(obj_data).expect("failed to parse object");
+                f(obj, &String::from_utf8_lossy(member.name()));
+            }
+
+            return;
        }
+
+        // Fall back to parsing as an object file.
+        let as_obj = self.as_obj_file();
+        if let Ok(obj) = as_obj {
+            f(obj, &self.path.to_string_lossy());
+            return;
+        }
+
+        panic!(
+            "failed to parse as either archive or object file: {:?}, {:?}",
+            as_archive.unwrap_err(),
+            as_obj.unwrap_err(),
+        );
    }

-    /// For a given archive, do something with each symbol.
-    fn for_each_symbol(&self, mut f: impl FnMut(Symbol, &ObjFile, &ArchiveMember)) {
-        self.for_each_object(|obj, member| {
-            obj.symbols().for_each(|sym| f(sym, &obj, member));
+    /// D something with each symbol in an archive or object file.
+    fn for_each_symbol(&self, mut f: impl FnMut(Symbol, &ObjFile, &str)) {
+        self.for_each_object(|obj, obj_path| {
+            obj.symbols().for_each(|sym| f(sym, &obj, obj_path));
        });
    }
 }
--- a/library/compiler-builtins/libm-test/benches/icount.rs
+++ b/library/compiler-builtins/libm-test/benches/icount.rs
@ -111,6 +111,17 @@ fn icount_bench_u128_widen_mul(cases: Vec<(u128, u128)>) {
    }
 }

+#[library_benchmark]
+#[bench::linspace(setup_u128_mul())]
+fn icount_bench_u256_narrowing_div(cases: Vec<(u128, u128)>) {
+    use libm::support::NarrowingDiv;
+    for (x, y) in cases.iter().copied() {
+        let x = black_box(x.widen_hi());
+        let y = black_box(y);
+        black_box(x.checked_narrowing_div_rem(y));
+    }
+}
+
 #[library_benchmark]
 #[bench::linspace(setup_u256_add())]
 fn icount_bench_u256_add(cases: Vec<(u256, u256)>) {
@ -145,7 +156,7 @@ fn icount_bench_u256_shr(cases: Vec<(u256, u32)>) {

 library_benchmark_group!(
    name = icount_bench_u128_group;
-    benchmarks = icount_bench_u128_widen_mul, icount_bench_u256_add, icount_bench_u256_sub, icount_bench_u256_shl, icount_bench_u256_shr
+    benchmarks = icount_bench_u128_widen_mul, icount_bench_u256_narrowing_div, icount_bench_u256_add, icount_bench_u256_sub, icount_bench_u256_shl, icount_bench_u256_shr
 );

 #[library_benchmark]
--- a/library/compiler-builtins/libm-test/src/precision.rs
+++ b/library/compiler-builtins/libm-test/src/precision.rs
@ -1,8 +1,6 @@
 //! Configuration for skipping or changing the result for individual test cases (inputs) rather
 //! than ignoring entire tests.

-use core::f32;
-
 use CheckBasis::{Mpfr, Musl};
 use libm::support::CastFrom;
 use {BaseName as Bn, Identifier as Id};
--- a/library/compiler-builtins/libm/configure.rs
+++ b/library/compiler-builtins/libm/configure.rs
@ -13,7 +13,7 @@ pub struct Config {
    pub target_triple: String,
    pub target_arch: String,
    pub target_env: String,
-    pub target_family: Option<String>,
+    pub target_families: Vec<String>,
    pub target_os: String,
    pub target_string: String,
    pub target_vendor: String,
@ -25,6 +25,9 @@ pub struct Config {
 impl Config {
    pub fn from_env() -> Self {
        let target_triple = env::var("TARGET").unwrap();
+        let target_families = env::var("CARGO_CFG_TARGET_FAMILY")
+            .map(|feats| feats.split(',').map(ToOwned::to_owned).collect())
+            .unwrap_or_default();
        let target_features = env::var("CARGO_CFG_TARGET_FEATURE")
            .map(|feats| feats.split(',').map(ToOwned::to_owned).collect())
            .unwrap_or_default();
@ -41,7 +44,7 @@ impl Config {
            cargo_features,
            target_arch: env::var("CARGO_CFG_TARGET_ARCH").unwrap(),
            target_env: env::var("CARGO_CFG_TARGET_ENV").unwrap(),
-            target_family: env::var("CARGO_CFG_TARGET_FAMILY").ok(),
+            target_families,
            target_os: env::var("CARGO_CFG_TARGET_OS").unwrap(),
            target_string: env::var("TARGET").unwrap(),
            target_vendor: env::var("CARGO_CFG_TARGET_VENDOR").unwrap(),
--- a/library/compiler-builtins/libm/src/math/atan.rs
+++ b/library/compiler-builtins/libm/src/math/atan.rs
@ -29,8 +29,6 @@
 * to produce the hexadecimal values shown.
 */

-use core::f64;
-
 use super::fabs;

 const ATANHI: [f64; 4] = [
@ -134,19 +132,19 @@ pub fn atan(x: f64) -> f64 {

 #[cfg(test)]
 mod tests {
-    use core::f64;
+    use core::f64::consts;

    use super::atan;

    #[test]
    fn sanity_check() {
        for (input, answer) in [
-            (3.0_f64.sqrt() / 3.0, f64::consts::FRAC_PI_6),
-            (1.0, f64::consts::FRAC_PI_4),
-            (3.0_f64.sqrt(), f64::consts::FRAC_PI_3),
-            (-3.0_f64.sqrt() / 3.0, -f64::consts::FRAC_PI_6),
-            (-1.0, -f64::consts::FRAC_PI_4),
-            (-3.0_f64.sqrt(), -f64::consts::FRAC_PI_3),
+            (3.0_f64.sqrt() / 3.0, consts::FRAC_PI_6),
+            (1.0, consts::FRAC_PI_4),
+            (3.0_f64.sqrt(), consts::FRAC_PI_3),
+            (-3.0_f64.sqrt() / 3.0, -consts::FRAC_PI_6),
+            (-1.0, -consts::FRAC_PI_4),
+            (-3.0_f64.sqrt(), -consts::FRAC_PI_3),
        ]
        .iter()
        {
@ -167,12 +165,12 @@ mod tests {

    #[test]
    fn infinity() {
-        assert_eq!(atan(f64::INFINITY), f64::consts::FRAC_PI_2);
+        assert_eq!(atan(f64::INFINITY), consts::FRAC_PI_2);
    }

    #[test]
    fn minus_infinity() {
-        assert_eq!(atan(f64::NEG_INFINITY), -f64::consts::FRAC_PI_2);
+        assert_eq!(atan(f64::NEG_INFINITY), -consts::FRAC_PI_2);
    }

    #[test]
--- a/library/compiler-builtins/libm/src/math/cbrtf.rs
+++ b/library/compiler-builtins/libm/src/math/cbrtf.rs
@ -17,8 +17,6 @@
 * Return cube root of x
 */

-use core::f32;
-
 const B1: u32 = 709958130; /* B1 = (127-127.0/3-0.03306235651)*2**23 */
 const B2: u32 = 642849266; /* B2 = (127-127.0/3-24/3-0.03306235651)*2**23 */

--- a/library/compiler-builtins/libm/src/math/expm1.rs
+++ b/library/compiler-builtins/libm/src/math/expm1.rs
@ -10,8 +10,6 @@
 * ====================================================
 */

-use core::f64;
-
 const O_THRESHOLD: f64 = 7.09782712893383973096e+02; /* 0x40862E42, 0xFEFA39EF */
 const LN2_HI: f64 = 6.93147180369123816490e-01; /* 0x3fe62e42, 0xfee00000 */
 const LN2_LO: f64 = 1.90821492927058770002e-10; /* 0x3dea39ef, 0x35793c76 */
--- a/library/compiler-builtins/libm/src/math/hypot.rs
+++ b/library/compiler-builtins/libm/src/math/hypot.rs
@ -1,5 +1,3 @@
-use core::f64;
-
 use super::sqrt;

 const SPLIT: f64 = 134217728. + 1.; // 0x1p27 + 1 === (2 ^ 27) + 1
--- a/library/compiler-builtins/libm/src/math/hypotf.rs
+++ b/library/compiler-builtins/libm/src/math/hypotf.rs
@ -1,5 +1,3 @@
-use core::f32;
-
 use super::sqrtf;

 #[cfg_attr(assert_no_panic, no_panic::no_panic)]
--- a/library/compiler-builtins/libm/src/math/log10.rs
+++ b/library/compiler-builtins/libm/src/math/log10.rs
@ -17,8 +17,6 @@
 *    log10(x) = (f - f*f/2 + r)/log(10) + k*log10(2)
 */

-use core::f64;
-
 const IVLN10HI: f64 = 4.34294481878168880939e-01; /* 0x3fdbcb7b, 0x15200000 */
 const IVLN10LO: f64 = 2.50829467116452752298e-11; /* 0x3dbb9438, 0xca9aadd5 */
 const LOG10_2HI: f64 = 3.01029995663611771306e-01; /* 0x3FD34413, 0x509F6000 */
--- a/library/compiler-builtins/libm/src/math/log10f.rs
+++ b/library/compiler-builtins/libm/src/math/log10f.rs
@ -13,8 +13,6 @@
 * See comments in log10.c.
 */

-use core::f32;
-
 const IVLN10HI: f32 = 4.3432617188e-01; /* 0x3ede6000 */
 const IVLN10LO: f32 = -3.1689971365e-05; /* 0xb804ead9 */
 const LOG10_2HI: f32 = 3.0102920532e-01; /* 0x3e9a2080 */
--- a/library/compiler-builtins/libm/src/math/log1p.rs
+++ b/library/compiler-builtins/libm/src/math/log1p.rs
@ -53,8 +53,6 @@
 *       See HP-15C Advanced Functions Handbook, p.193.
 */

-use core::f64;
-
 const LN2_HI: f64 = 6.93147180369123816490e-01; /* 3fe62e42 fee00000 */
 const LN2_LO: f64 = 1.90821492927058770002e-10; /* 3dea39ef 35793c76 */
 const LG1: f64 = 6.666666666666735130e-01; /* 3FE55555 55555593 */
--- a/library/compiler-builtins/libm/src/math/log1pf.rs
+++ b/library/compiler-builtins/libm/src/math/log1pf.rs
@ -10,8 +10,6 @@
 * ====================================================
 */

-use core::f32;
-
 const LN2_HI: f32 = 6.9313812256e-01; /* 0x3f317180 */
 const LN2_LO: f32 = 9.0580006145e-06; /* 0x3717f7d1 */
 /* |(log(1+s)-log(1-s))/s - Lg(s)| < 2**-34.24 (~[-4.95e-11, 4.97e-11]). */
--- a/library/compiler-builtins/libm/src/math/log2.rs
+++ b/library/compiler-builtins/libm/src/math/log2.rs
@ -17,8 +17,6 @@
 *    log2(x) = (f - f*f/2 + r)/log(2) + k
 */

-use core::f64;
-
 const IVLN2HI: f64 = 1.44269504072144627571e+00; /* 0x3ff71547, 0x65200000 */
 const IVLN2LO: f64 = 1.67517131648865118353e-10; /* 0x3de705fc, 0x2eefa200 */
 const LG1: f64 = 6.666666666666735130e-01; /* 3FE55555 55555593 */
--- a/library/compiler-builtins/libm/src/math/log2f.rs
+++ b/library/compiler-builtins/libm/src/math/log2f.rs
@ -13,8 +13,6 @@
 * See comments in log2.c.
 */

-use core::f32;
-
 const IVLN2HI: f32 = 1.4428710938e+00; /* 0x3fb8b000 */
 const IVLN2LO: f32 = -1.7605285393e-04; /* 0xb9389ad4 */
 /* |(log(1+s)-log(1-s))/s - Lg(s)| < 2**-34.24 (~[-4.95e-11, 4.97e-11]). */
--- a/library/compiler-builtins/libm/src/math/rem_pio2f.rs
+++ b/library/compiler-builtins/libm/src/math/rem_pio2f.rs
@ -14,8 +14,6 @@
 * ====================================================
 */

-use core::f64;
-
 use super::rem_pio2_large;

 const TOINT: f64 = 1.5 / f64::EPSILON;
--- a/library/compiler-builtins/libm/src/math/support/big/tests.rs
+++ b/library/compiler-builtins/libm/src/math/support/big/tests.rs
@ -3,6 +3,7 @@ use std::string::String;
 use std::{eprintln, format};

 use super::{HInt, MinInt, i256, u256};
+use crate::support::{Int as _, NarrowingDiv};

 const LOHI_SPLIT: u128 = 0xaaaaaaaaaaaaaaaaffffffffffffffff;

@ -336,3 +337,28 @@ fn i256_shifts() {
        x = y;
    }
 }
+#[test]
+fn div_u256_by_u128() {
+    for j in i8::MIN..=i8::MAX {
+        let y: u128 = (j as i128).rotate_right(4).unsigned();
+        if y == 0 {
+            continue;
+        }
+        for i in i8::MIN..=i8::MAX {
+            let x: u128 = (i as i128).rotate_right(4).unsigned();
+            let xy = x.widen_mul(y);
+            assert_eq!(xy.checked_narrowing_div_rem(y), Some((x, 0)));
+            if y != 1 {
+                assert_eq!((xy + u256::ONE).checked_narrowing_div_rem(y), Some((x, 1)));
+            }
+            if x != 0 {
+                assert_eq!(
+                    (xy - u256::ONE).checked_narrowing_div_rem(y),
+                    Some((x - 1, y - 1))
+                );
+            }
+            let r = ((y as f64) * 0.12345) as u128;
+            assert_eq!((xy + r.widen()).checked_narrowing_div_rem(y), Some((x, r)));
+        }
+    }
+}
--- a/library/compiler-builtins/libm/src/math/support/float_traits.rs
+++ b/library/compiler-builtins/libm/src/math/support/float_traits.rs
@ -289,7 +289,10 @@ macro_rules! float_impl {
                cfg_if! {
                    // fma is not yet available in `core`
                    if #[cfg(intrinsics_enabled)] {
-                        core::intrinsics::$fma_intrinsic(self, y, z)
+                        // FIXME(msrv,bench): once our benchmark rustc version is above the
+                        // 2022-09-23 nightly, this can be removed.
+                        #[allow(unused_unsafe)]
+                        unsafe { core::intrinsics::$fma_intrinsic(self, y, z) }
                    } else {
                        super::super::$fma_fn(self, y, z)
                    }
--- a/library/compiler-builtins/libm/src/math/support/int_traits.rs
+++ b/library/compiler-builtins/libm/src/math/support/int_traits.rs
@ -1,5 +1,8 @@
 use core::{cmp, fmt, ops};

+mod narrowing_div;
+pub use narrowing_div::NarrowingDiv;
+
 /// Minimal integer implementations needed on all integer types, including wide integers.
 #[allow(dead_code)] // Some constants are only used with tests
 pub trait MinInt:
--- a/library/compiler-builtins/libm/src/math/support/int_traits/narrowing_div.rs
+++ b/library/compiler-builtins/libm/src/math/support/int_traits/narrowing_div.rs
@ -0,0 +1,176 @@
+/* SPDX-License-Identifier: MIT OR Apache-2.0 */
+use crate::support::{CastInto, DInt, HInt, Int, MinInt, u256};
+
+/// Trait for unsigned division of a double-wide integer
+/// when the quotient doesn't overflow.
+///
+/// This is the inverse of widening multiplication:
+///  - for any `x` and nonzero `y`: `x.widen_mul(y).checked_narrowing_div_rem(y) == Some((x, 0))`,
+///  - and for any `r in 0..y`: `x.carrying_mul(y, r).checked_narrowing_div_rem(y) == Some((x, r))`,
+#[allow(dead_code)]
+pub trait NarrowingDiv: DInt + MinInt<Unsigned = Self> {
+    /// Computes `(self / n, self % n))`
+    ///
+    /// # Safety
+    /// The caller must ensure that `self.hi() < n`, or equivalently,
+    /// that the quotient does not overflow.
+    unsafe fn unchecked_narrowing_div_rem(self, n: Self::H) -> (Self::H, Self::H);
+
+    /// Returns `Some((self / n, self % n))` when `self.hi() < n`.
+    fn checked_narrowing_div_rem(self, n: Self::H) -> Option<(Self::H, Self::H)> {
+        if self.hi() < n {
+            Some(unsafe { self.unchecked_narrowing_div_rem(n) })
+        } else {
+            None
+        }
+    }
+}
+
+// For primitive types we can just use the standard
+// division operators in the double-wide type.
+macro_rules! impl_narrowing_div_primitive {
+    ($D:ident) => {
+        impl NarrowingDiv for $D {
+            unsafe fn unchecked_narrowing_div_rem(self, n: Self::H) -> (Self::H, Self::H) {
+                if self.hi() >= n {
+                    unsafe { core::hint::unreachable_unchecked() }
+                }
+                ((self / n.widen()).cast(), (self % n.widen()).cast())
+            }
+        }
+    };
+}
+
+// Extend division from `u2N / uN` to `u4N / u2N`
+// This is not the most efficient algorithm, but it is
+// relatively simple.
+macro_rules! impl_narrowing_div_recurse {
+    ($D:ident) => {
+        impl NarrowingDiv for $D {
+            unsafe fn unchecked_narrowing_div_rem(self, n: Self::H) -> (Self::H, Self::H) {
+                if self.hi() >= n {
+                    unsafe { core::hint::unreachable_unchecked() }
+                }
+
+                // Normalize the divisor by shifting the most significant one
+                // to the leading position. `n != 0` is implied by `self.hi() < n`
+                let lz = n.leading_zeros();
+                let a = self << lz;
+                let b = n << lz;
+
+                let ah = a.hi();
+                let (a0, a1) = a.lo().lo_hi();
+                // SAFETY: For both calls, `b.leading_zeros() == 0` by the above shift.
+                // SAFETY: `ah < b` follows from `self.hi() < n`
+                let (q1, r) = unsafe { div_three_digits_by_two(a1, ah, b) };
+                // SAFETY: `r < b` is given as the postcondition of the previous call
+                let (q0, r) = unsafe { div_three_digits_by_two(a0, r, b) };
+
+                // Undo the earlier normalization for the remainder
+                (Self::H::from_lo_hi(q0, q1), r >> lz)
+            }
+        }
+    };
+}
+
+impl_narrowing_div_primitive!(u16);
+impl_narrowing_div_primitive!(u32);
+impl_narrowing_div_primitive!(u64);
+impl_narrowing_div_primitive!(u128);
+impl_narrowing_div_recurse!(u256);
+
+/// Implement `u3N / u2N`-division on top of `u2N / uN`-division.
+///
+/// Returns the quotient and remainder of `(a * R + a0) / n`,
+/// where `R = (1 << U::BITS)` is the digit size.
+///
+/// # Safety
+/// Requires that `n.leading_zeros() == 0` and `a < n`.
+unsafe fn div_three_digits_by_two<U>(a0: U, a: U::D, n: U::D) -> (U, U::D)
+where
+    U: HInt,
+    U::D: Int + NarrowingDiv,
+{
+    if n.leading_zeros() > 0 || a >= n {
+        unsafe { core::hint::unreachable_unchecked() }
+    }
+
+    // n = n1R + n0
+    let (n0, n1) = n.lo_hi();
+    // a = a2R + a1
+    let (a1, a2) = a.lo_hi();
+
+    let mut q;
+    let mut r;
+    let mut wrap;
+    // `a < n` is guaranteed by the caller, but `a2 == n1 && a1 < n0` is possible
+    if let Some((q0, r1)) = a.checked_narrowing_div_rem(n1) {
+        q = q0;
+        // a = qn1 + r1, where 0 <= r1 < n1
+
+        // Include the remainder with the low bits:
+        // r = a0 + r1R
+        r = U::D::from_lo_hi(a0, r1);
+
+        // Subtract the contribution of the divisor low bits with the estimated quotient
+        let d = q.widen_mul(n0);
+        (r, wrap) = r.overflowing_sub(d);
+
+        // Since `q` is the quotient of dividing with a slightly smaller divisor,
+        // it may be an overapproximation, but is never too small, and similarly,
+        // `r` is now either the correct remainder ...
+        if !wrap {
+            return (q, r);
+        }
+        // ... or the remainder went "negative" (by as much as `d = qn0 < RR`)
+        // and we have to adjust.
+        q -= U::ONE;
+    } else {
+        debug_assert!(a2 == n1 && a1 < n0);
+        // Otherwise, `a2 == n1`, and the estimated quotient would be
+        // `R + (a1 % n1)`, but the correct quotient can't overflow.
+        // We'll start from `q = R = (1 << U::BITS)`,
+        // so `r = aR + a0 - qn = (a - n)R + a0`
+        r = U::D::from_lo_hi(a0, a1.wrapping_sub(n0));
+        // Since `a < n`, the first decrement is always needed:
+        q = U::MAX; /* R - 1 */
+    }
+
+    (r, wrap) = r.overflowing_add(n);
+    if wrap {
+        return (q, r);
+    }
+
+    // If the remainder still didn't wrap, we need another step.
+    q -= U::ONE;
+    (r, wrap) = r.overflowing_add(n);
+    // Since `n >= RR/2`, at least one of the two `r += n` must have wrapped.
+    debug_assert!(wrap, "estimated quotient should be off by at most two");
+    (q, r)
+}
+
+#[cfg(test)]
+mod test {
+    use super::{HInt, NarrowingDiv};
+
+    #[test]
+    fn inverse_mul() {
+        for x in 0..=u8::MAX {
+            for y in 1..=u8::MAX {
+                let xy = x.widen_mul(y);
+                assert_eq!(xy.checked_narrowing_div_rem(y), Some((x, 0)));
+                assert_eq!(
+                    (xy + (y - 1) as u16).checked_narrowing_div_rem(y),
+                    Some((x, y - 1))
+                );
+                if y > 1 {
+                    assert_eq!((xy + 1).checked_narrowing_div_rem(y), Some((x, 1)));
+                    assert_eq!(
+                        (xy + (y - 2) as u16).checked_narrowing_div_rem(y),
+                        Some((x, y - 2))
+                    );
+                }
+            }
+        }
+    }
+}
--- a/library/compiler-builtins/libm/src/math/support/mod.rs
+++ b/library/compiler-builtins/libm/src/math/support/mod.rs
@ -28,7 +28,8 @@ pub use hex_float::hf16;
 pub use hex_float::hf128;
 #[allow(unused_imports)]
 pub use hex_float::{hf32, hf64};
-pub use int_traits::{CastFrom, CastInto, DInt, HInt, Int, MinInt};
+#[allow(unused_imports)]
+pub use int_traits::{CastFrom, CastInto, DInt, HInt, Int, MinInt, NarrowingDiv};

 /// Hint to the compiler that the current path is cold.
 pub fn cold_path() {
--- a/library/compiler-builtins/rust-version
+++ b/library/compiler-builtins/rust-version
@ -1 +1 @@
-d36f964125163c2e698de5559efefb8217b8b7f0
+47cd7120d9b4e1b64eb27c87522a07888197fae8