Mark generic functions #[inline]

Benchmarks for [1] seemed to indicate that repository organization for
some reason had an effect on performance, even though the exact same
rustc commands were running (though some with a different order). After
investigating more, it appears that dependencies may have an affect on
inlining thresholds for generic functions.

It is surprising that this happens, we more or less expect that public
functions will be standalone but everything they call will be inlined.
To help ensure this, mark all generic functions `#[inline]` if they
should be merged into the public function.

Zulip discussion at [2].

[1]: https://github.com/rust-lang/libm/pull/533
[2]: https://rust-lang.zulipchat.com/#narrow/channel/182449-t-compiler.2Fhelp/topic/Dependencies.20affecting.20codegen/with/513079387
This commit is contained in:
Trevor Gross 2025-04-18 19:19:24 +00:00 committed by Trevor Gross
parent 3cecf22e0c
commit 88dcaf20b5
21 changed files with 27 additions and 2 deletions

View file

@ -29,6 +29,7 @@ pub fn fmaf128(x: f128, y: f128, z: f128) -> f128 {
/// Fused multiply-add that works when there is not a larger float size available. Computes
/// `(x * y) + z`.
#[inline]
pub fn fma_round<F>(x: F, y: F, z: F, _round: Round) -> FpResult<F>
where
F: Float,

View file

@ -28,6 +28,7 @@ pub fn fmaf(x: f32, y: f32, z: f32) -> f32 {
/// Fma implementation when a hardware-backed larger float type is available. For `f32` and `f64`,
/// `f64` has enough precision to represent the `f32` in its entirety, except for double rounding.
#[inline]
pub fn fma_wide_round<F, B>(x: F, y: F, z: F, round: Round) -> FpResult<F>
where
F: Float + HFloat<D = B>,

View file

@ -10,10 +10,12 @@
use super::super::support::{FpResult, Status};
use super::super::{Float, Int, IntTy, MinInt};
#[inline]
pub fn ceil<F: Float>(x: F) -> F {
ceil_status(x).val
}
#[inline]
pub fn ceil_status<F: Float>(x: F) -> FpResult<F> {
let zero = IntTy::<F>::ZERO;

View file

@ -1,6 +1,7 @@
use super::super::Float;
/// Copy the sign of `y` to `x`.
#[inline]
pub fn copysign<F: Float>(x: F, y: F) -> F {
let mut ux = x.to_bits();
let uy = y.to_bits();

View file

@ -1,6 +1,7 @@
use super::super::Float;
/// Absolute value.
#[inline]
pub fn fabs<F: Float>(x: F) -> F {
let abs_mask = !F::SIGN_MASK;
F::from_bits(x.to_bits() & abs_mask)

View file

@ -1,5 +1,6 @@
use super::super::Float;
#[inline]
pub fn fdim<F: Float>(x: F, y: F) -> F {
if x <= y { F::ZERO } else { x - y }
}

View file

@ -10,10 +10,12 @@
use super::super::support::{FpResult, Status};
use super::super::{Float, Int, IntTy, MinInt};
#[inline]
pub fn floor<F: Float>(x: F) -> F {
floor_status(x).val
}
#[inline]
pub fn floor_status<F: Float>(x: F) -> FpResult<F> {
let zero = IntTy::<F>::ZERO;

View file

@ -16,7 +16,7 @@
use super::super::Float;
#[cfg_attr(all(test, assert_no_panic), no_panic::no_panic)]
#[inline]
pub fn fmax<F: Float>(x: F, y: F) -> F {
let res = if x.is_nan() || x < y { y } else { x };
// Canonicalize

View file

@ -11,6 +11,7 @@
use super::super::Float;
#[inline]
pub fn fmaximum<F: Float>(x: F, y: F) -> F {
let res = if x.is_nan() {
x

View file

@ -13,6 +13,7 @@
use super::super::Float;
#[inline]
pub fn fmaximum_num<F: Float>(x: F, y: F) -> F {
let res =
if x.is_nan() || x < y || (x.to_bits() == F::NEG_ZERO.to_bits() && y.is_sign_positive()) {

View file

@ -16,6 +16,7 @@
use super::super::Float;
#[inline]
pub fn fmin<F: Float>(x: F, y: F) -> F {
let res = if y.is_nan() || x < y { x } else { y };
// Canonicalize

View file

@ -11,6 +11,7 @@
use super::super::Float;
#[inline]
pub fn fminimum<F: Float>(x: F, y: F) -> F {
let res = if x.is_nan() {
x

View file

@ -13,6 +13,7 @@
use super::super::Float;
#[inline]
pub fn fminimum_num<F: Float>(x: F, y: F) -> F {
let res =
if y.is_nan() || x < y || (x.to_bits() == F::NEG_ZERO.to_bits() && y.is_sign_positive()) {

View file

@ -3,7 +3,7 @@
use super::super::{CastFrom, Float, Int, MinInt};
#[cfg_attr(all(test, assert_no_panic), no_panic::no_panic)]
#[inline]
pub fn fmod<F: Float>(x: F, y: F) -> F {
let zero = F::Int::ZERO;
let one = F::Int::ONE;

View file

@ -1,3 +1,6 @@
// Note: generic functions are marked `#[inline]` because, even though generic functions are
// typically inlined, this does not seem to always be the case.
mod ceil;
mod copysign;
mod fabs;

View file

@ -6,6 +6,7 @@ use super::super::support::{FpResult, Round};
/// IEEE 754-2019 `roundToIntegralExact`, which respects rounding mode and raises inexact if
/// applicable.
#[inline]
pub fn rint_round<F: Float>(x: F, _round: Round) -> FpResult<F> {
let toint = F::ONE / F::EPSILON;
let e = x.ex();

View file

@ -1,6 +1,7 @@
use super::super::{Float, MinInt};
use super::{copysign, trunc};
#[inline]
pub fn round<F: Float>(x: F) -> F {
let f0p5 = F::from_parts(false, F::EXP_BIAS - 1, F::Int::ZERO); // 0.5
let f0p25 = F::from_parts(false, F::EXP_BIAS - 2, F::Int::ZERO); // 0.25

View file

@ -16,6 +16,7 @@ use super::super::{CastFrom, CastInto, Float, IntTy, MinInt};
/// >
/// > If the calculation does not overflow or underflow, the returned value is exact and
/// > independent of the current rounding direction mode.
#[inline]
pub fn scalbn<F: Float>(mut x: F, mut n: i32) -> F
where
u32: CastInto<F::Int>,

View file

@ -44,6 +44,7 @@
use super::super::support::{FpResult, IntTy, Round, Status, cold_path};
use super::super::{CastFrom, CastInto, DInt, Float, HInt, Int, MinInt};
#[inline]
pub fn sqrt<F>(x: F) -> F
where
F: Float + SqrtHelper,
@ -57,6 +58,7 @@ where
sqrt_round(x, Round::Nearest).val
}
#[inline]
pub fn sqrt_round<F>(x: F, _round: Round) -> FpResult<F>
where
F: Float + SqrtHelper,

View file

@ -4,10 +4,12 @@
use super::super::support::{FpResult, Status};
use super::super::{Float, Int, IntTy, MinInt};
#[inline]
pub fn trunc<F: Float>(x: F) -> F {
trunc_status(x).val
}
#[inline]
pub fn trunc_status<F: Float>(x: F) -> FpResult<F> {
let mut xi: F::Int = x.to_bits();
let e: i32 = x.exp_unbiased();

View file

@ -30,6 +30,7 @@ pub fn roundevenf128(x: f128) -> f128 {
roundeven_impl(x)
}
#[inline]
pub fn roundeven_impl<F: Float>(x: F) -> F {
super::generic::rint_round(x, Round::Nearest).val
}