Auto merge of #145910 - saethlin:ignore-intrinsic-calls, r=cjgillot

Ignore intrinsic calls in cross-crate-inlining cost model

I noticed in a side project that a function which just compares to `[u64; 2]` for equality is not cross-crate-inlinable. That was surprising to me because I didn't think that code contained a function call, but of course our array comparisons are lowered to an intrinsic. Intrinsic calls don't make a function no longer a leaf, so it makes sense to add this as an exception to the "only leaves" cross-crate-inline heuristic.

This is the useful compare link: https://perf.rust-lang.org/compare.html?start=7cb1a81145a739c4fd858abe3c624ce8e6e5f9cd&end=c3f0a64dbf9fba4722dacf8e39d2fe00069c995e&stat=instructions%3Au because it disables CGU merging in both commits, so effects that cause changes in the sysroot to perturb partitioning downstream are excluded. Perturbations to what is and isn't cross-crate-inlinable in the sysroot has chaotic effects on what items are in which CGUs after merging. It looks like before this PR by sheer luck some of the CGUs dirtied by the patch in eza incr-unchanged happened to be merged together, and with this PR they are not.

The perf runs on this PR point to a nice runtime performance improvement.
This commit is contained in:
bors 2025-09-08 03:03:21 +00:00
commit a09fbe2c83
6 changed files with 25 additions and 1 deletions

View file

@ -135,7 +135,16 @@ impl<'tcx> Visitor<'tcx> for CostChecker<'_, 'tcx> {
} }
} }
} }
TerminatorKind::Call { unwind, .. } => { TerminatorKind::Call { ref func, unwind, .. } => {
// We track calls because they make our function not a leaf (and in theory, the
// number of calls indicates how likely this function is to perturb other CGUs).
// But intrinsics don't have a body that gets assigned to a CGU, so they are
// ignored.
if let Some((fn_def_id, _)) = func.const_fn_def()
&& self.tcx.has_attr(fn_def_id, sym::rustc_intrinsic)
{
return;
}
self.calls += 1; self.calls += 1;
if let UnwindAction::Cleanup(_) = unwind { if let UnwindAction::Cleanup(_) = unwind {
self.landing_pads += 1; self.landing_pads += 1;

View file

@ -9,6 +9,7 @@
// CHECK-LABEL: use_bp // CHECK-LABEL: use_bp
// aarch64: brk #0xf000 // aarch64: brk #0xf000
// x86_64: int3 // x86_64: int3
#[inline(never)]
pub fn use_bp() { pub fn use_bp() {
core::arch::breakpoint(); core::arch::breakpoint();
} }

View file

@ -16,6 +16,7 @@ use std::simd::*;
// It would emit about an extra fadd, depending on the architecture. // It would emit about an extra fadd, depending on the architecture.
// CHECK-LABEL: reduce_fadd_negative_zero // CHECK-LABEL: reduce_fadd_negative_zero
#[inline(never)]
pub unsafe fn reduce_fadd_negative_zero(v: f32x4) -> f32 { pub unsafe fn reduce_fadd_negative_zero(v: f32x4) -> f32 {
// x86_64: addps // x86_64: addps
// x86_64-NEXT: movshdup // x86_64-NEXT: movshdup

View file

@ -18,3 +18,8 @@ pub fn stem_fn() -> String {
fn inner() -> String { fn inner() -> String {
String::from("test") String::from("test")
} }
// This function's optimized MIR contains a call, but it is to an intrinsic.
pub fn leaf_with_intrinsic(a: &[u64; 2], b: &[u64; 2]) -> bool {
a == b
}

View file

@ -18,3 +18,10 @@ pub fn stem_outer() -> String {
// CHECK: call {{.*}}stem_fn // CHECK: call {{.*}}stem_fn
leaf::stem_fn() leaf::stem_fn()
} }
// Check that we inline functions that call intrinsics
#[no_mangle]
pub fn leaf_with_intrinsic_outer(a: &[u64; 2], b: &[u64; 2]) -> bool {
// CHECK-NOT: call {{.*}}leaf_with_intrinsic
leaf::leaf_with_intrinsic(a, b)
}

View file

@ -32,6 +32,7 @@ pub static tested_symbol: [u8; 6] = *b"foobar";
// INTERPOSABLE: @{{.*}}default_visibility{{.*}}tested_symbol{{.*}} = constant // INTERPOSABLE: @{{.*}}default_visibility{{.*}}tested_symbol{{.*}} = constant
// DEFAULT: @{{.*}}default_visibility{{.*}}tested_symbol{{.*}} = constant // DEFAULT: @{{.*}}default_visibility{{.*}}tested_symbol{{.*}} = constant
#[inline(never)]
pub fn do_memcmp(left: &[u8], right: &[u8]) -> i32 { pub fn do_memcmp(left: &[u8], right: &[u8]) -> i32 {
left.cmp(right) as i32 left.cmp(right) as i32
} }