Auto merge of #150605 - RalfJung:fallback-intrinsic-skip, r=mati865

skip codegen for intrinsics with big fallback bodies if backend does not need them

This hopefully fixes the perf regression from https://github.com/rust-lang/rust/pull/148478. I only added the intrinsics with big fallback bodies to the list; it doesn't seem worth the effort of going through the entire list.

Fixes https://github.com/rust-lang/rust/issues/149945
Cc @scottmcm @bjorn3
This commit is contained in:
bors 2026-02-04 17:12:58 +00:00
commit db3e99bbab
6 changed files with 25 additions and 9 deletions

View file

@ -43,7 +43,7 @@ use rustc_middle::ty::TyCtxt;
use rustc_middle::util::Providers;
use rustc_session::Session;
use rustc_session::config::{OptLevel, OutputFilenames, PrintKind, PrintRequest};
use rustc_span::Symbol;
use rustc_span::{Symbol, sym};
use rustc_target::spec::{RelocModel, TlsModel};
use crate::llvm::ToLlvmBool;
@ -344,6 +344,10 @@ impl CodegenBackend for LlvmCodegenBackend {
target_config(sess)
}
fn replaced_intrinsics(&self) -> Vec<Symbol> {
vec![sym::unchecked_funnel_shl, sym::unchecked_funnel_shr, sym::carrying_mul_add]
}
fn codegen_crate<'tcx>(&self, tcx: TyCtxt<'tcx>) -> Box<dyn Any> {
Box::new(rustc_codegen_ssa::base::codegen_crate(
LlvmCodegenBackend(()),

View file

@ -74,6 +74,12 @@ pub trait CodegenBackend {
fn print_version(&self) {}
/// Returns a list of all intrinsics that this backend definitely
/// replaces, which means their fallback bodies do not need to be monomorphized.
fn replaced_intrinsics(&self) -> Vec<Symbol> {
vec![]
}
/// Value printed by `--print=backend-has-zstd`.
///
/// Used by compiletest to determine whether tests involving zstd compression

View file

@ -474,6 +474,7 @@ pub fn run_compiler<R: Send>(config: Config, f: impl FnOnce(&Compiler) -> R + Se
);
codegen_backend.init(&sess);
sess.replaced_intrinsics = FxHashSet::from_iter(codegen_backend.replaced_intrinsics());
let cfg = parse_cfg(sess.dcx(), config.crate_cfg);
let mut cfg = config::build_configuration(&sess, cfg);

View file

@ -1002,11 +1002,12 @@ fn visit_instance_use<'tcx>(
if tcx.should_codegen_locally(panic_instance) {
output.push(create_fn_mono_item(tcx, panic_instance, source));
}
} else if !intrinsic.must_be_overridden {
} else if !intrinsic.must_be_overridden
&& !tcx.sess.replaced_intrinsics.contains(&intrinsic.name)
{
// Codegen the fallback body of intrinsics with fallback bodies.
// We explicitly skip this otherwise to ensure we get a linker error
// if anyone tries to call this intrinsic and the codegen backend did not
// override the implementation.
// We have to skip this otherwise as there's no body to codegen.
// We also skip intrinsics the backend handles, to reduce monomorphizations.
let instance = ty::Instance::new_raw(instance.def_id(), instance.args);
if tcx.should_codegen_locally(instance) {
output.push(create_fn_mono_item(tcx, instance, source));

View file

@ -8,7 +8,7 @@ use std::{env, io};
use rand::{RngCore, rng};
use rustc_data_structures::base_n::{CASE_INSENSITIVE, ToBaseN};
use rustc_data_structures::flock;
use rustc_data_structures::fx::{FxHashMap, FxIndexSet};
use rustc_data_structures::fx::{FxHashMap, FxHashSet, FxIndexSet};
use rustc_data_structures::profiling::{SelfProfiler, SelfProfilerRef};
use rustc_data_structures::sync::{DynSend, DynSync, Lock, MappedReadGuard, ReadGuard, RwLock};
use rustc_errors::annotate_snippet_emitter_writer::AnnotateSnippetEmitter;
@ -154,6 +154,10 @@ pub struct Session {
/// preserved with a flag like `-C save-temps`, since these files may be
/// hard linked.
pub invocation_temp: Option<String>,
/// The names of intrinsics that the current codegen backend replaces
/// with its own implementations.
pub replaced_intrinsics: FxHashSet<Symbol>,
}
#[derive(Clone, Copy)]
@ -1092,6 +1096,7 @@ pub fn build_session(
target_filesearch,
host_filesearch,
invocation_temp,
replaced_intrinsics: FxHashSet::default(), // filled by `run_compiler`
};
validate_commandline_args_with_session_available(&sess);

View file

@ -11,10 +11,9 @@
use std::intrinsics::{carrying_mul_add, fallback};
// The fallbacks are emitted even when they're never used, but optimize out.
// The fallbacks should not be emitted.
// RAW: wide_mul_u128
// OPT-NOT: wide_mul_u128
// NOT: wide_mul_u128
// CHECK-LABEL: @cma_u8
#[no_mangle]