miri: use allocator_shim_contents codegen helper

This commit is contained in:
Ralf Jung 2025-10-15 10:24:22 +02:00
parent 28d0a4a205
commit 804af99457
7 changed files with 218 additions and 206 deletions

View file

@ -31,10 +31,23 @@ pub enum AllocatorTy {
Usize,
}
/// Some allocator methods are known to the compiler: they act more like
/// intrinsics/language primitives than library-defined functions.
/// FIXME: ideally this would be derived from attributes like `#[rustc_allocator]`,
/// so we don't have two sources of truth.
#[derive(Copy, Clone, Debug)]
pub enum SpecialAllocatorMethod {
Alloc,
AllocZeroed,
Dealloc,
Realloc,
}
/// A method that will be codegened in the allocator shim.
#[derive(Copy, Clone)]
pub struct AllocatorMethod {
pub name: Symbol,
pub special: Option<SpecialAllocatorMethod>,
pub inputs: &'static [AllocatorMethodInput],
pub output: AllocatorTy,
}
@ -47,11 +60,13 @@ pub struct AllocatorMethodInput {
pub static ALLOCATOR_METHODS: &[AllocatorMethod] = &[
AllocatorMethod {
name: sym::alloc,
special: Some(SpecialAllocatorMethod::Alloc),
inputs: &[AllocatorMethodInput { name: "layout", ty: AllocatorTy::Layout }],
output: AllocatorTy::ResultPtr,
},
AllocatorMethod {
name: sym::dealloc,
special: Some(SpecialAllocatorMethod::Dealloc),
inputs: &[
AllocatorMethodInput { name: "ptr", ty: AllocatorTy::Ptr },
AllocatorMethodInput { name: "layout", ty: AllocatorTy::Layout },
@ -60,6 +75,7 @@ pub static ALLOCATOR_METHODS: &[AllocatorMethod] = &[
},
AllocatorMethod {
name: sym::realloc,
special: Some(SpecialAllocatorMethod::Realloc),
inputs: &[
AllocatorMethodInput { name: "ptr", ty: AllocatorTy::Ptr },
AllocatorMethodInput { name: "layout", ty: AllocatorTy::Layout },
@ -69,6 +85,7 @@ pub static ALLOCATOR_METHODS: &[AllocatorMethod] = &[
},
AllocatorMethod {
name: sym::alloc_zeroed,
special: Some(SpecialAllocatorMethod::AllocZeroed),
inputs: &[AllocatorMethodInput { name: "layout", ty: AllocatorTy::Layout }],
output: AllocatorTy::ResultPtr,
},

View file

@ -1,13 +1,13 @@
use libc::c_uint;
use rustc_ast::expand::allocator::{
AllocatorMethod, AllocatorTy, NO_ALLOC_SHIM_IS_UNSTABLE, default_fn_name, global_fn_name,
AllocatorMethod, AllocatorTy, NO_ALLOC_SHIM_IS_UNSTABLE, SpecialAllocatorMethod,
default_fn_name, global_fn_name,
};
use rustc_codegen_ssa::traits::BaseTypeCodegenMethods as _;
use rustc_middle::bug;
use rustc_middle::middle::codegen_fn_attrs::{CodegenFnAttrFlags, CodegenFnAttrs};
use rustc_middle::ty::TyCtxt;
use rustc_session::config::{DebugInfo, OomStrategy};
use rustc_span::sym;
use rustc_symbol_mangling::mangle_internal_symbol;
use crate::attributes::llfn_attrs_from_instance;
@ -65,12 +65,12 @@ pub(crate) unsafe fn codegen(
let from_name = mangle_internal_symbol(tcx, &global_fn_name(method.name));
let to_name = mangle_internal_symbol(tcx, &default_fn_name(method.name));
let alloc_attr_flag = match method.name {
sym::alloc => CodegenFnAttrFlags::ALLOCATOR,
sym::dealloc => CodegenFnAttrFlags::DEALLOCATOR,
sym::realloc => CodegenFnAttrFlags::REALLOCATOR,
sym::alloc_zeroed => CodegenFnAttrFlags::ALLOCATOR_ZEROED,
_ => CodegenFnAttrFlags::empty(),
let alloc_attr_flag = match method.special {
Some(SpecialAllocatorMethod::Alloc) => CodegenFnAttrFlags::ALLOCATOR,
Some(SpecialAllocatorMethod::Dealloc) => CodegenFnAttrFlags::DEALLOCATOR,
Some(SpecialAllocatorMethod::Realloc) => CodegenFnAttrFlags::REALLOCATOR,
Some(SpecialAllocatorMethod::AllocZeroed) => CodegenFnAttrFlags::ALLOCATOR_ZEROED,
None => CodegenFnAttrFlags::empty(),
};
let mut attrs = CodegenFnAttrs::new();

View file

@ -669,6 +669,7 @@ pub fn allocator_shim_contents(tcx: TyCtxt<'_>, kind: AllocatorKind) -> Vec<Allo
if tcx.alloc_error_handler_kind(()).unwrap() == AllocatorKind::Default {
methods.push(AllocatorMethod {
name: ALLOC_ERROR_HANDLER,
special: None,
inputs: &[],
output: AllocatorTy::Never,
});

View file

@ -53,6 +53,7 @@
extern crate rustc_abi;
extern crate rustc_apfloat;
extern crate rustc_ast;
extern crate rustc_codegen_ssa;
extern crate rustc_const_eval;
extern crate rustc_data_structures;
extern crate rustc_errors;

View file

@ -12,6 +12,8 @@ use rand::rngs::StdRng;
use rand::{Rng, SeedableRng};
use rustc_abi::{Align, ExternAbi, Size};
use rustc_apfloat::{Float, FloatConvert};
use rustc_ast::expand::allocator::{self, SpecialAllocatorMethod};
use rustc_data_structures::either::Either;
use rustc_data_structures::fx::{FxHashMap, FxHashSet};
#[allow(unused)]
use rustc_data_structures::static_assert_size;
@ -27,6 +29,7 @@ use rustc_middle::ty::{self, Instance, Ty, TyCtxt};
use rustc_session::config::InliningThreshold;
use rustc_span::def_id::{CrateNum, DefId};
use rustc_span::{Span, SpanData, Symbol};
use rustc_symbol_mangling::mangle_internal_symbol;
use rustc_target::callconv::FnAbi;
use crate::alloc_addresses::EvalContextExt;
@ -652,6 +655,10 @@ pub struct MiriMachine<'tcx> {
pub(crate) pthread_rwlock_sanity: Cell<bool>,
pub(crate) pthread_condvar_sanity: Cell<bool>,
/// (Foreign) symbols that are synthesized as part of the allocator shim: the key indicates the
/// name of the symbol being synthesized; the value indicates whether this should invoke some
/// other symbol or whether this has special allocator semantics.
pub(crate) allocator_shim_symbols: FxHashMap<Symbol, Either<Symbol, SpecialAllocatorMethod>>,
/// Cache for `mangle_internal_symbol`.
pub(crate) mangle_internal_symbol_cache: FxHashMap<&'static str, String>,
@ -819,6 +826,7 @@ impl<'tcx> MiriMachine<'tcx> {
pthread_mutex_sanity: Cell::new(false),
pthread_rwlock_sanity: Cell::new(false),
pthread_condvar_sanity: Cell::new(false),
allocator_shim_symbols: Self::allocator_shim_symbols(tcx),
mangle_internal_symbol_cache: Default::default(),
force_intrinsic_fallback: config.force_intrinsic_fallback,
float_nondet: config.float_nondet,
@ -827,6 +835,36 @@ impl<'tcx> MiriMachine<'tcx> {
}
}
fn allocator_shim_symbols(
tcx: TyCtxt<'tcx>,
) -> FxHashMap<Symbol, Either<Symbol, SpecialAllocatorMethod>> {
use rustc_codegen_ssa::base::allocator_shim_contents;
// codegen uses `allocator_kind_for_codegen` here, but that's only needed to deal with
// dylibs which we do not support.
let Some(kind) = tcx.allocator_kind(()) else {
return Default::default();
};
let methods = allocator_shim_contents(tcx, kind);
let mut symbols = FxHashMap::default();
for method in methods {
let from_name = Symbol::intern(&mangle_internal_symbol(
tcx,
&allocator::global_fn_name(method.name),
));
let to = match method.special {
Some(special) => Either::Right(special),
None =>
Either::Left(Symbol::intern(&mangle_internal_symbol(
tcx,
&allocator::default_fn_name(method.name),
))),
};
symbols.try_insert(from_name, to).unwrap();
}
symbols
}
pub(crate) fn late_init(
ecx: &mut MiriInterpCx<'tcx>,
config: &MiriConfig,
@ -992,6 +1030,7 @@ impl VisitProvenance for MiriMachine<'_> {
pthread_mutex_sanity: _,
pthread_rwlock_sanity: _,
pthread_condvar_sanity: _,
allocator_shim_symbols: _,
mangle_internal_symbol_cache: _,
force_intrinsic_fallback: _,
float_nondet: _,

View file

@ -1,5 +1,8 @@
use rustc_abi::{Align, Size};
use rustc_ast::expand::allocator::AllocatorKind;
use rustc_abi::{Align, AlignFromBytesError, CanonAbi, Size};
use rustc_ast::expand::allocator::SpecialAllocatorMethod;
use rustc_middle::ty::Ty;
use rustc_span::Symbol;
use rustc_target::callconv::FnAbi;
use crate::*;
@ -54,30 +57,100 @@ pub trait EvalContextExt<'tcx>: crate::MiriInterpCxExt<'tcx> {
Align::from_bytes(prev_power_of_two(size)).unwrap()
}
/// Emulates calling the internal __rust_* allocator functions
fn emulate_allocator(
/// Check some basic requirements for this allocation request:
/// non-zero size, power-of-two alignment.
fn check_rust_alloc_request(&self, size: u64, align: u64) -> InterpResult<'tcx> {
let this = self.eval_context_ref();
if size == 0 {
throw_ub_format!("creating allocation with size 0");
}
if size > this.max_size_of_val().bytes() {
throw_ub_format!("creating an allocation larger than half the address space");
}
if let Err(e) = Align::from_bytes(align) {
match e {
AlignFromBytesError::TooLarge(_) => {
throw_unsup_format!(
"creating allocation with alignment {align} exceeding rustc's maximum \
supported value"
);
}
AlignFromBytesError::NotPowerOfTwo(_) => {
throw_ub_format!("creating allocation with non-power-of-two alignment {align}");
}
}
}
interp_ok(())
}
fn rust_special_allocator_method(
&mut self,
default: impl FnOnce(&mut MiriInterpCx<'tcx>) -> InterpResult<'tcx>,
) -> InterpResult<'tcx, EmulateItemResult> {
method: SpecialAllocatorMethod,
link_name: Symbol,
abi: &FnAbi<'tcx, Ty<'tcx>>,
args: &[OpTy<'tcx>],
dest: &PlaceTy<'tcx>,
) -> InterpResult<'tcx> {
let this = self.eval_context_mut();
let Some(allocator_kind) = this.tcx.allocator_kind(()) else {
// in real code, this symbol does not exist without an allocator
return interp_ok(EmulateItemResult::NotSupported);
};
match method {
SpecialAllocatorMethod::Alloc | SpecialAllocatorMethod::AllocZeroed => {
let [size, align] =
this.check_shim_sig_lenient(abi, CanonAbi::Rust, link_name, args)?;
let size = this.read_target_usize(size)?;
let align = this.read_target_usize(align)?;
match allocator_kind {
AllocatorKind::Global => {
// When `#[global_allocator]` is used, `__rust_*` is defined by the macro expansion
// of this attribute. As such we have to call an exported Rust function,
// and not execute any Miri shim. Somewhat unintuitively doing so is done
// by returning `NotSupported`, which triggers the `lookup_exported_symbol`
// fallback case in `emulate_foreign_item`.
interp_ok(EmulateItemResult::NotSupported)
this.check_rust_alloc_request(size, align)?;
let ptr = this.allocate_ptr(
Size::from_bytes(size),
Align::from_bytes(align).unwrap(),
MiriMemoryKind::Rust.into(),
if matches!(method, SpecialAllocatorMethod::AllocZeroed) {
AllocInit::Zero
} else {
AllocInit::Uninit
},
)?;
this.write_pointer(ptr, dest)
}
AllocatorKind::Default => {
default(this)?;
interp_ok(EmulateItemResult::NeedsReturn)
SpecialAllocatorMethod::Dealloc => {
let [ptr, old_size, align] =
this.check_shim_sig_lenient(abi, CanonAbi::Rust, link_name, args)?;
let ptr = this.read_pointer(ptr)?;
let old_size = this.read_target_usize(old_size)?;
let align = this.read_target_usize(align)?;
// No need to check old_size/align; we anyway check that they match the allocation.
this.deallocate_ptr(
ptr,
Some((Size::from_bytes(old_size), Align::from_bytes(align).unwrap())),
MiriMemoryKind::Rust.into(),
)
}
SpecialAllocatorMethod::Realloc => {
let [ptr, old_size, align, new_size] =
this.check_shim_sig_lenient(abi, CanonAbi::Rust, link_name, args)?;
let ptr = this.read_pointer(ptr)?;
let old_size = this.read_target_usize(old_size)?;
let align = this.read_target_usize(align)?;
let new_size = this.read_target_usize(new_size)?;
// No need to check old_size; we anyway check that they match the allocation.
this.check_rust_alloc_request(new_size, align)?;
let align = Align::from_bytes(align).unwrap();
let new_ptr = this.reallocate_ptr(
ptr,
Some((Size::from_bytes(old_size), align)),
Size::from_bytes(new_size),
align,
MiriMemoryKind::Rust.into(),
AllocInit::Uninit,
)?;
this.write_pointer(new_ptr, dest)
}
}
}

View file

@ -2,8 +2,9 @@ use std::collections::hash_map::Entry;
use std::io::Write;
use std::path::Path;
use rustc_abi::{Align, AlignFromBytesError, CanonAbi, Size};
use rustc_ast::expand::allocator::AllocatorKind;
use rustc_abi::{Align, CanonAbi, Size};
use rustc_ast::expand::allocator::NO_ALLOC_SHIM_IS_UNSTABLE;
use rustc_data_structures::either::Either;
use rustc_hir::attrs::Linkage;
use rustc_hir::def::DefKind;
use rustc_hir::def_id::CrateNum;
@ -11,6 +12,7 @@ use rustc_middle::middle::codegen_fn_attrs::CodegenFnAttrFlags;
use rustc_middle::mir::interpret::AllocInit;
use rustc_middle::ty::{Instance, Ty};
use rustc_middle::{mir, ty};
use rustc_session::config::OomStrategy;
use rustc_span::Symbol;
use rustc_target::callconv::FnAbi;
@ -50,31 +52,21 @@ pub trait EvalContextExt<'tcx>: crate::MiriInterpCxExt<'tcx> {
) -> InterpResult<'tcx, Option<(&'tcx mir::Body<'tcx>, ty::Instance<'tcx>)>> {
let this = self.eval_context_mut();
// Some shims forward to other MIR bodies.
match link_name.as_str() {
// This allocator function has forwarding shims synthesized during normal codegen
// (see `allocator_shim_contents`); this is where we emulate that behavior.
// FIXME should use global_fn_name, but mangle_internal_symbol requires a static str.
name if name == this.mangle_internal_symbol("__rust_alloc_error_handler") => {
// Forward to the right symbol that implements this function.
let Some(handler_kind) = this.tcx.alloc_error_handler_kind(()) else {
// in real code, this symbol does not exist without an allocator
throw_unsup_format!(
"`__rust_alloc_error_handler` cannot be called when no alloc error handler is set"
);
};
if handler_kind == AllocatorKind::Default {
let name =
Symbol::intern(this.mangle_internal_symbol("__rdl_alloc_error_handler"));
// Handle allocator shim.
if let Some(shim) = this.machine.allocator_shim_symbols.get(&link_name) {
match *shim {
Either::Left(other_fn) => {
let handler = this
.lookup_exported_symbol(name)?
.lookup_exported_symbol(other_fn)?
.expect("missing alloc error handler symbol");
return interp_ok(Some(handler));
}
// Fall through to the `lookup_exported_symbol` below which should find
// a `__rust_alloc_error_handler`.
Either::Right(special) => {
this.rust_special_allocator_method(special, link_name, abi, args, dest)?;
this.return_to_block(ret)?;
return interp_ok(None);
}
}
_ => {}
}
// FIXME: avoid allocating memory
@ -254,33 +246,6 @@ pub trait EvalContextExt<'tcx>: crate::MiriInterpCxExt<'tcx> {
impl<'tcx> EvalContextExtPriv<'tcx> for crate::MiriInterpCx<'tcx> {}
trait EvalContextExtPriv<'tcx>: crate::MiriInterpCxExt<'tcx> {
/// Check some basic requirements for this allocation request:
/// non-zero size, power-of-two alignment.
fn check_rustc_alloc_request(&self, size: u64, align: u64) -> InterpResult<'tcx> {
let this = self.eval_context_ref();
if size == 0 {
throw_ub_format!("creating allocation with size 0");
}
if size > this.max_size_of_val().bytes() {
throw_ub_format!("creating an allocation larger than half the address space");
}
if let Err(e) = Align::from_bytes(align) {
match e {
AlignFromBytesError::TooLarge(_) => {
throw_unsup_format!(
"creating allocation with alignment {align} exceeding rustc's maximum \
supported value"
);
}
AlignFromBytesError::NotPowerOfTwo(_) => {
throw_ub_format!("creating allocation with non-power-of-two alignment {align}");
}
}
}
interp_ok(())
}
fn emulate_foreign_item_inner(
&mut self,
link_name: Symbol,
@ -340,7 +305,51 @@ trait EvalContextExtPriv<'tcx>: crate::MiriInterpCxExt<'tcx> {
// Here we dispatch all the shims for foreign functions. If you have a platform specific
// shim, add it to the corresponding submodule.
match link_name.as_str() {
// Magic functions Rust emits (and not as part of the allocator shim).
name if name == this.mangle_internal_symbol(NO_ALLOC_SHIM_IS_UNSTABLE) => {
// This is a no-op shim that only exists to prevent making the allocator shims
// instantly stable.
let [] = this.check_shim_sig_lenient(abi, CanonAbi::Rust, link_name, args)?;
}
name if name == this.mangle_internal_symbol(OomStrategy::SYMBOL) => {
// Gets the value of the `oom` option.
let [] = this.check_shim_sig_lenient(abi, CanonAbi::Rust, link_name, args)?;
let val = this.tcx.sess.opts.unstable_opts.oom.should_panic();
this.write_int(val, dest)?;
}
// Miri-specific extern functions
"miri_alloc" => {
let [size, align] =
this.check_shim_sig_lenient(abi, CanonAbi::Rust, link_name, args)?;
let size = this.read_target_usize(size)?;
let align = this.read_target_usize(align)?;
this.check_rust_alloc_request(size, align)?;
let ptr = this.allocate_ptr(
Size::from_bytes(size),
Align::from_bytes(align).unwrap(),
MiriMemoryKind::Miri.into(),
AllocInit::Uninit,
)?;
this.write_pointer(ptr, dest)?;
}
"miri_dealloc" => {
let [ptr, old_size, align] =
this.check_shim_sig_lenient(abi, CanonAbi::Rust, link_name, args)?;
let ptr = this.read_pointer(ptr)?;
let old_size = this.read_target_usize(old_size)?;
let align = this.read_target_usize(align)?;
// No need to check old_size/align; we anyway check that they match the allocation.
this.deallocate_ptr(
ptr,
Some((Size::from_bytes(old_size), Align::from_bytes(align).unwrap())),
MiriMemoryKind::Miri.into(),
)?;
}
"miri_start_unwind" => {
let [payload] =
this.check_shim_sig_lenient(abi, CanonAbi::Rust, link_name, args)?;
@ -492,7 +501,6 @@ trait EvalContextExtPriv<'tcx>: crate::MiriInterpCxExt<'tcx> {
}
}
}
// GenMC mode: Assume statements block the current thread when their condition is false.
"miri_genmc_assume" => {
let [condition] =
@ -579,133 +587,6 @@ trait EvalContextExtPriv<'tcx>: crate::MiriInterpCxExt<'tcx> {
}
}
// Rust allocation
name if name == this.mangle_internal_symbol("__rust_alloc") || name == "miri_alloc" => {
let default = |ecx: &mut MiriInterpCx<'tcx>| {
// Only call `check_shim` when `#[global_allocator]` isn't used. When that
// macro is used, we act like no shim exists, so that the exported function can run.
let [size, align] =
ecx.check_shim_sig_lenient(abi, CanonAbi::Rust, link_name, args)?;
let size = ecx.read_target_usize(size)?;
let align = ecx.read_target_usize(align)?;
ecx.check_rustc_alloc_request(size, align)?;
let memory_kind = match link_name.as_str() {
"miri_alloc" => MiriMemoryKind::Miri,
_ => MiriMemoryKind::Rust,
};
let ptr = ecx.allocate_ptr(
Size::from_bytes(size),
Align::from_bytes(align).unwrap(),
memory_kind.into(),
AllocInit::Uninit,
)?;
ecx.write_pointer(ptr, dest)
};
match link_name.as_str() {
"miri_alloc" => {
default(this)?;
return interp_ok(EmulateItemResult::NeedsReturn);
}
_ => return this.emulate_allocator(default),
}
}
name if name == this.mangle_internal_symbol("__rust_alloc_zeroed") => {
return this.emulate_allocator(|this| {
// See the comment for `__rust_alloc` why `check_shim` is only called in the
// default case.
let [size, align] =
this.check_shim_sig_lenient(abi, CanonAbi::Rust, link_name, args)?;
let size = this.read_target_usize(size)?;
let align = this.read_target_usize(align)?;
this.check_rustc_alloc_request(size, align)?;
let ptr = this.allocate_ptr(
Size::from_bytes(size),
Align::from_bytes(align).unwrap(),
MiriMemoryKind::Rust.into(),
AllocInit::Zero,
)?;
this.write_pointer(ptr, dest)
});
}
name if name == this.mangle_internal_symbol("__rust_dealloc")
|| name == "miri_dealloc" =>
{
let default = |ecx: &mut MiriInterpCx<'tcx>| {
// See the comment for `__rust_alloc` why `check_shim` is only called in the
// default case.
let [ptr, old_size, align] =
ecx.check_shim_sig_lenient(abi, CanonAbi::Rust, link_name, args)?;
let ptr = ecx.read_pointer(ptr)?;
let old_size = ecx.read_target_usize(old_size)?;
let align = ecx.read_target_usize(align)?;
let memory_kind = match link_name.as_str() {
"miri_dealloc" => MiriMemoryKind::Miri,
_ => MiriMemoryKind::Rust,
};
// No need to check old_size/align; we anyway check that they match the allocation.
ecx.deallocate_ptr(
ptr,
Some((Size::from_bytes(old_size), Align::from_bytes(align).unwrap())),
memory_kind.into(),
)
};
match link_name.as_str() {
"miri_dealloc" => {
default(this)?;
return interp_ok(EmulateItemResult::NeedsReturn);
}
_ => return this.emulate_allocator(default),
}
}
name if name == this.mangle_internal_symbol("__rust_realloc") => {
return this.emulate_allocator(|this| {
// See the comment for `__rust_alloc` why `check_shim` is only called in the
// default case.
let [ptr, old_size, align, new_size] =
this.check_shim_sig_lenient(abi, CanonAbi::Rust, link_name, args)?;
let ptr = this.read_pointer(ptr)?;
let old_size = this.read_target_usize(old_size)?;
let align = this.read_target_usize(align)?;
let new_size = this.read_target_usize(new_size)?;
// No need to check old_size; we anyway check that they match the allocation.
this.check_rustc_alloc_request(new_size, align)?;
let align = Align::from_bytes(align).unwrap();
let new_ptr = this.reallocate_ptr(
ptr,
Some((Size::from_bytes(old_size), align)),
Size::from_bytes(new_size),
align,
MiriMemoryKind::Rust.into(),
AllocInit::Uninit,
)?;
this.write_pointer(new_ptr, dest)
});
}
name if name == this.mangle_internal_symbol("__rust_no_alloc_shim_is_unstable_v2") => {
// This is a no-op shim that only exists to prevent making the allocator shims instantly stable.
let [] = this.check_shim_sig_lenient(abi, CanonAbi::Rust, link_name, args)?;
}
name if name
== this.mangle_internal_symbol("__rust_alloc_error_handler_should_panic_v2") =>
{
// Gets the value of the `oom` option.
let [] = this.check_shim_sig_lenient(abi, CanonAbi::Rust, link_name, args)?;
let val = this.tcx.sess.opts.unstable_opts.oom.should_panic();
this.write_int(val, dest)?;
}
// C memory handling functions
"memcmp" => {
let [left, right, n] =