From 88dcaf20b5400e023024daae6a3c927a32fa36ab Mon Sep 17 00:00:00 2001
From: Trevor Gross <tmgross@umich.edu>
Date: Fri, 18 Apr 2025 19:19:24 +0000
Subject: [PATCH] Mark generic functions `#[inline]`

Benchmarks for [1] seemed to indicate that repository organization for
some reason had an effect on performance, even though the exact same
rustc commands were running (though some with a different order). After
investigating more, it appears that dependencies may have an affect on
inlining thresholds for generic functions.

It is surprising that this happens, we more or less expect that public
functions will be standalone but everything they call will be inlined.
To help ensure this, mark all generic functions `#[inline]` if they
should be merged into the public function.

Zulip discussion at [2].

[1]: https://github.com/rust-lang/libm/pull/533
[2]: https://rust-lang.zulipchat.com/#narrow/channel/182449-t-compiler.2Fhelp/topic/Dependencies.20affecting.20codegen/with/513079387
---
 library/compiler-builtins/libm/src/math/fma.rs                 | 1 +
 library/compiler-builtins/libm/src/math/fma_wide.rs            | 1 +
 library/compiler-builtins/libm/src/math/generic/ceil.rs        | 2 ++
 library/compiler-builtins/libm/src/math/generic/copysign.rs    | 1 +
 library/compiler-builtins/libm/src/math/generic/fabs.rs        | 1 +
 library/compiler-builtins/libm/src/math/generic/fdim.rs        | 1 +
 library/compiler-builtins/libm/src/math/generic/floor.rs       | 2 ++
 library/compiler-builtins/libm/src/math/generic/fmax.rs        | 2 +-
 library/compiler-builtins/libm/src/math/generic/fmaximum.rs    | 1 +
 .../compiler-builtins/libm/src/math/generic/fmaximum_num.rs    | 1 +
 library/compiler-builtins/libm/src/math/generic/fmin.rs        | 1 +
 library/compiler-builtins/libm/src/math/generic/fminimum.rs    | 1 +
 .../compiler-builtins/libm/src/math/generic/fminimum_num.rs    | 1 +
 library/compiler-builtins/libm/src/math/generic/fmod.rs        | 2 +-
 library/compiler-builtins/libm/src/math/generic/mod.rs         | 3 +++
 library/compiler-builtins/libm/src/math/generic/rint.rs        | 1 +
 library/compiler-builtins/libm/src/math/generic/round.rs       | 1 +
 library/compiler-builtins/libm/src/math/generic/scalbn.rs      | 1 +
 library/compiler-builtins/libm/src/math/generic/sqrt.rs        | 2 ++
 library/compiler-builtins/libm/src/math/generic/trunc.rs       | 2 ++
 library/compiler-builtins/libm/src/math/roundeven.rs           | 1 +
 21 files changed, 27 insertions(+), 2 deletions(-)
diff --git a/library/compiler-builtins/libm/src/math/fma.rs b/library/compiler-builtins/libm/src/math/fma.rs
index 789b0836afbf..e0b3347acf85 100644
--- a/library/compiler-builtins/libm/src/math/fma.rs
+++ b/library/compiler-builtins/libm/src/math/fma.rs
@@ -29,6 +29,7 @@ pub fn fmaf128(x: f128, y: f128, z: f128) -> f128 {
 
 /// Fused multiply-add that works when there is not a larger float size available. Computes
 /// `(x * y) + z`.
+#[inline]
 pub fn fma_round<F>(x: F, y: F, z: F, _round: Round) -> FpResult<F>
 where
     F: Float,
diff --git a/library/compiler-builtins/libm/src/math/fma_wide.rs b/library/compiler-builtins/libm/src/math/fma_wide.rs
index 8e908a14f214..08b78b022645 100644
--- a/library/compiler-builtins/libm/src/math/fma_wide.rs
+++ b/library/compiler-builtins/libm/src/math/fma_wide.rs
@@ -28,6 +28,7 @@ pub fn fmaf(x: f32, y: f32, z: f32) -> f32 {
 
 /// Fma implementation when a hardware-backed larger float type is available. For `f32` and `f64`,
 /// `f64` has enough precision to represent the `f32` in its entirety, except for double rounding.
+#[inline]
 pub fn fma_wide_round<F, B>(x: F, y: F, z: F, round: Round) -> FpResult<F>
 where
     F: Float + HFloat<D = B>,
diff --git a/library/compiler-builtins/libm/src/math/generic/ceil.rs b/library/compiler-builtins/libm/src/math/generic/ceil.rs
index bf7e1d8e2100..5c5bb47638fd 100644
--- a/library/compiler-builtins/libm/src/math/generic/ceil.rs
+++ b/library/compiler-builtins/libm/src/math/generic/ceil.rs
@@ -10,10 +10,12 @@
 use super::super::support::{FpResult, Status};
 use super::super::{Float, Int, IntTy, MinInt};
 
+#[inline]
 pub fn ceil<F: Float>(x: F) -> F {
     ceil_status(x).val
 }
 
+#[inline]
 pub fn ceil_status<F: Float>(x: F) -> FpResult<F> {
     let zero = IntTy::<F>::ZERO;
 
diff --git a/library/compiler-builtins/libm/src/math/generic/copysign.rs b/library/compiler-builtins/libm/src/math/generic/copysign.rs
index 04864a359056..a61af22f04ab 100644
--- a/library/compiler-builtins/libm/src/math/generic/copysign.rs
+++ b/library/compiler-builtins/libm/src/math/generic/copysign.rs
@@ -1,6 +1,7 @@
 use super::super::Float;
 
 /// Copy the sign of `y` to `x`.
+#[inline]
 pub fn copysign<F: Float>(x: F, y: F) -> F {
     let mut ux = x.to_bits();
     let uy = y.to_bits();
diff --git a/library/compiler-builtins/libm/src/math/generic/fabs.rs b/library/compiler-builtins/libm/src/math/generic/fabs.rs
index 75b473107c67..0fa0edf9b876 100644
--- a/library/compiler-builtins/libm/src/math/generic/fabs.rs
+++ b/library/compiler-builtins/libm/src/math/generic/fabs.rs
@@ -1,6 +1,7 @@
 use super::super::Float;
 
 /// Absolute value.
+#[inline]
 pub fn fabs<F: Float>(x: F) -> F {
     let abs_mask = !F::SIGN_MASK;
     F::from_bits(x.to_bits() & abs_mask)
diff --git a/library/compiler-builtins/libm/src/math/generic/fdim.rs b/library/compiler-builtins/libm/src/math/generic/fdim.rs
index bf971cd7d18c..a63007b191cc 100644
--- a/library/compiler-builtins/libm/src/math/generic/fdim.rs
+++ b/library/compiler-builtins/libm/src/math/generic/fdim.rs
@@ -1,5 +1,6 @@
 use super::super::Float;
 
+#[inline]
 pub fn fdim<F: Float>(x: F, y: F) -> F {
     if x <= y { F::ZERO } else { x - y }
 }
diff --git a/library/compiler-builtins/libm/src/math/generic/floor.rs b/library/compiler-builtins/libm/src/math/generic/floor.rs
index 7799551644f3..2438046254fc 100644
--- a/library/compiler-builtins/libm/src/math/generic/floor.rs
+++ b/library/compiler-builtins/libm/src/math/generic/floor.rs
@@ -10,10 +10,12 @@
 use super::super::support::{FpResult, Status};
 use super::super::{Float, Int, IntTy, MinInt};
 
+#[inline]
 pub fn floor<F: Float>(x: F) -> F {
     floor_status(x).val
 }
 
+#[inline]
 pub fn floor_status<F: Float>(x: F) -> FpResult<F> {
     let zero = IntTy::<F>::ZERO;
 
diff --git a/library/compiler-builtins/libm/src/math/generic/fmax.rs b/library/compiler-builtins/libm/src/math/generic/fmax.rs
index 29a031100571..bf3f847e89bb 100644
--- a/library/compiler-builtins/libm/src/math/generic/fmax.rs
+++ b/library/compiler-builtins/libm/src/math/generic/fmax.rs
@@ -16,7 +16,7 @@
 
 use super::super::Float;
 
-#[cfg_attr(all(test, assert_no_panic), no_panic::no_panic)]
+#[inline]
 pub fn fmax<F: Float>(x: F, y: F) -> F {
     let res = if x.is_nan() || x < y { y } else { x };
     // Canonicalize
diff --git a/library/compiler-builtins/libm/src/math/generic/fmaximum.rs b/library/compiler-builtins/libm/src/math/generic/fmaximum.rs
index 9e8d1739f678..387055af29c2 100644
--- a/library/compiler-builtins/libm/src/math/generic/fmaximum.rs
+++ b/library/compiler-builtins/libm/src/math/generic/fmaximum.rs
@@ -11,6 +11,7 @@
 
 use super::super::Float;
 
+#[inline]
 pub fn fmaximum<F: Float>(x: F, y: F) -> F {
     let res = if x.is_nan() {
         x
diff --git a/library/compiler-builtins/libm/src/math/generic/fmaximum_num.rs b/library/compiler-builtins/libm/src/math/generic/fmaximum_num.rs
index 756ef5d9f6de..f7efdde80ea7 100644
--- a/library/compiler-builtins/libm/src/math/generic/fmaximum_num.rs
+++ b/library/compiler-builtins/libm/src/math/generic/fmaximum_num.rs
@@ -13,6 +13,7 @@
 
 use super::super::Float;
 
+#[inline]
 pub fn fmaximum_num<F: Float>(x: F, y: F) -> F {
     let res =
         if x.is_nan() || x < y || (x.to_bits() == F::NEG_ZERO.to_bits() && y.is_sign_positive()) {
diff --git a/library/compiler-builtins/libm/src/math/generic/fmin.rs b/library/compiler-builtins/libm/src/math/generic/fmin.rs
index 69fbf85a194e..cd3caeee4f20 100644
--- a/library/compiler-builtins/libm/src/math/generic/fmin.rs
+++ b/library/compiler-builtins/libm/src/math/generic/fmin.rs
@@ -16,6 +16,7 @@
 
 use super::super::Float;
 
+#[inline]
 pub fn fmin<F: Float>(x: F, y: F) -> F {
     let res = if y.is_nan() || x < y { x } else { y };
     // Canonicalize
diff --git a/library/compiler-builtins/libm/src/math/generic/fminimum.rs b/library/compiler-builtins/libm/src/math/generic/fminimum.rs
index ee5493880eb4..4ddb36455064 100644
--- a/library/compiler-builtins/libm/src/math/generic/fminimum.rs
+++ b/library/compiler-builtins/libm/src/math/generic/fminimum.rs
@@ -11,6 +11,7 @@
 
 use super::super::Float;
 
+#[inline]
 pub fn fminimum<F: Float>(x: F, y: F) -> F {
     let res = if x.is_nan() {
         x
diff --git a/library/compiler-builtins/libm/src/math/generic/fminimum_num.rs b/library/compiler-builtins/libm/src/math/generic/fminimum_num.rs
index 966618328242..441c204a9219 100644
--- a/library/compiler-builtins/libm/src/math/generic/fminimum_num.rs
+++ b/library/compiler-builtins/libm/src/math/generic/fminimum_num.rs
@@ -13,6 +13,7 @@
 
 use super::super::Float;
 
+#[inline]
 pub fn fminimum_num<F: Float>(x: F, y: F) -> F {
     let res =
         if y.is_nan() || x < y || (x.to_bits() == F::NEG_ZERO.to_bits() && y.is_sign_positive()) {
diff --git a/library/compiler-builtins/libm/src/math/generic/fmod.rs b/library/compiler-builtins/libm/src/math/generic/fmod.rs
index cd23350ea3a6..6414bbd25081 100644
--- a/library/compiler-builtins/libm/src/math/generic/fmod.rs
+++ b/library/compiler-builtins/libm/src/math/generic/fmod.rs
@@ -3,7 +3,7 @@
 
 use super::super::{CastFrom, Float, Int, MinInt};
 
-#[cfg_attr(all(test, assert_no_panic), no_panic::no_panic)]
+#[inline]
 pub fn fmod<F: Float>(x: F, y: F) -> F {
     let zero = F::Int::ZERO;
     let one = F::Int::ONE;
diff --git a/library/compiler-builtins/libm/src/math/generic/mod.rs b/library/compiler-builtins/libm/src/math/generic/mod.rs
index 9be185f809f1..35846351a6e8 100644
--- a/library/compiler-builtins/libm/src/math/generic/mod.rs
+++ b/library/compiler-builtins/libm/src/math/generic/mod.rs
@@ -1,3 +1,6 @@
+// Note: generic functions are marked `#[inline]` because, even though generic functions are
+// typically inlined, this does not seem to always be the case.
+
 mod ceil;
 mod copysign;
 mod fabs;
diff --git a/library/compiler-builtins/libm/src/math/generic/rint.rs b/library/compiler-builtins/libm/src/math/generic/rint.rs
index 45d2f3138665..9cdeb1185a86 100644
--- a/library/compiler-builtins/libm/src/math/generic/rint.rs
+++ b/library/compiler-builtins/libm/src/math/generic/rint.rs
@@ -6,6 +6,7 @@ use super::super::support::{FpResult, Round};
 
 /// IEEE 754-2019 `roundToIntegralExact`, which respects rounding mode and raises inexact if
 /// applicable.
+#[inline]
 pub fn rint_round<F: Float>(x: F, _round: Round) -> FpResult<F> {
     let toint = F::ONE / F::EPSILON;
     let e = x.ex();
diff --git a/library/compiler-builtins/libm/src/math/generic/round.rs b/library/compiler-builtins/libm/src/math/generic/round.rs
index 8b51381880cc..01314ac70c27 100644
--- a/library/compiler-builtins/libm/src/math/generic/round.rs
+++ b/library/compiler-builtins/libm/src/math/generic/round.rs
@@ -1,6 +1,7 @@
 use super::super::{Float, MinInt};
 use super::{copysign, trunc};
 
+#[inline]
 pub fn round<F: Float>(x: F) -> F {
     let f0p5 = F::from_parts(false, F::EXP_BIAS - 1, F::Int::ZERO); // 0.5
     let f0p25 = F::from_parts(false, F::EXP_BIAS - 2, F::Int::ZERO); // 0.25
diff --git a/library/compiler-builtins/libm/src/math/generic/scalbn.rs b/library/compiler-builtins/libm/src/math/generic/scalbn.rs
index b2696e5cc9f5..a45db1b4a024 100644
--- a/library/compiler-builtins/libm/src/math/generic/scalbn.rs
+++ b/library/compiler-builtins/libm/src/math/generic/scalbn.rs
@@ -16,6 +16,7 @@ use super::super::{CastFrom, CastInto, Float, IntTy, MinInt};
 /// >
 /// > If the calculation does not overflow or underflow, the returned value is exact and
 /// > independent of the current rounding direction mode.
+#[inline]
 pub fn scalbn<F: Float>(mut x: F, mut n: i32) -> F
 where
     u32: CastInto<F::Int>,
diff --git a/library/compiler-builtins/libm/src/math/generic/sqrt.rs b/library/compiler-builtins/libm/src/math/generic/sqrt.rs
index 5918025bc678..ec9ff22df208 100644
--- a/library/compiler-builtins/libm/src/math/generic/sqrt.rs
+++ b/library/compiler-builtins/libm/src/math/generic/sqrt.rs
@@ -44,6 +44,7 @@
 use super::super::support::{FpResult, IntTy, Round, Status, cold_path};
 use super::super::{CastFrom, CastInto, DInt, Float, HInt, Int, MinInt};
 
+#[inline]
 pub fn sqrt<F>(x: F) -> F
 where
     F: Float + SqrtHelper,
@@ -57,6 +58,7 @@ where
     sqrt_round(x, Round::Nearest).val
 }
 
+#[inline]
 pub fn sqrt_round<F>(x: F, _round: Round) -> FpResult<F>
 where
     F: Float + SqrtHelper,
diff --git a/library/compiler-builtins/libm/src/math/generic/trunc.rs b/library/compiler-builtins/libm/src/math/generic/trunc.rs
index 0fb3fa5ad3b8..25414ecf426a 100644
--- a/library/compiler-builtins/libm/src/math/generic/trunc.rs
+++ b/library/compiler-builtins/libm/src/math/generic/trunc.rs
@@ -4,10 +4,12 @@
 use super::super::support::{FpResult, Status};
 use super::super::{Float, Int, IntTy, MinInt};
 
+#[inline]
 pub fn trunc<F: Float>(x: F) -> F {
     trunc_status(x).val
 }
 
+#[inline]
 pub fn trunc_status<F: Float>(x: F) -> FpResult<F> {
     let mut xi: F::Int = x.to_bits();
     let e: i32 = x.exp_unbiased();
diff --git a/library/compiler-builtins/libm/src/math/roundeven.rs b/library/compiler-builtins/libm/src/math/roundeven.rs
index ec1738285e62..6e621d7628f2 100644
--- a/library/compiler-builtins/libm/src/math/roundeven.rs
+++ b/library/compiler-builtins/libm/src/math/roundeven.rs
@@ -30,6 +30,7 @@ pub fn roundevenf128(x: f128) -> f128 {
     roundeven_impl(x)
 }
 
+#[inline]
 pub fn roundeven_impl<F: Float>(x: F) -> F {
     super::generic::rint_round(x, Round::Nearest).val
 }