From 1d9efbbd8f618b1618840c2ab677feed52eac138 Mon Sep 17 00:00:00 2001 From: Ralf Jung Date: Sun, 26 Jul 2020 11:11:17 +0200 Subject: [PATCH] Miri: replace canonical_alloc_id mechanism by extern_static_alloc_id which is called only when a pointer is 'imported' into the machine --- src/librustc_middle/mir/interpret/error.rs | 8 +-- src/librustc_mir/interpret/eval_context.rs | 15 +++-- src/librustc_mir/interpret/machine.rs | 69 +++++++--------------- src/librustc_mir/interpret/memory.rs | 59 ++++++++++-------- src/librustc_mir/interpret/operand.rs | 18 +++--- src/librustc_mir/interpret/place.rs | 2 +- src/librustc_mir/interpret/step.rs | 4 +- 7 files changed, 78 insertions(+), 97 deletions(-) diff --git a/src/librustc_middle/mir/interpret/error.rs b/src/librustc_middle/mir/interpret/error.rs index abbbbf7fbd6a..ff6284e75db8 100644 --- a/src/librustc_middle/mir/interpret/error.rs +++ b/src/librustc_middle/mir/interpret/error.rs @@ -502,8 +502,6 @@ impl fmt::Display for UndefinedBehaviorInfo<'_> { pub enum UnsupportedOpInfo { /// Free-form case. Only for errors that are never caught! Unsupported(String), - /// Accessing an unsupported foreign static. - ReadForeignStatic(DefId), /// Could not find MIR for a function. NoMirFor(DefId), /// Encountered a pointer where we needed raw bytes. @@ -515,6 +513,8 @@ pub enum UnsupportedOpInfo { ReadBytesAsPointer, /// Accessing thread local statics ThreadLocalStatic(DefId), + /// Accessing an unsupported extern static. + ReadExternStatic(DefId), } impl fmt::Display for UnsupportedOpInfo { @@ -522,8 +522,8 @@ impl fmt::Display for UnsupportedOpInfo { use UnsupportedOpInfo::*; match self { Unsupported(ref msg) => write!(f, "{}", msg), - ReadForeignStatic(did) => { - write!(f, "cannot read from foreign (extern) static ({:?})", did) + ReadExternStatic(did) => { + write!(f, "cannot read from extern static ({:?})", did) } NoMirFor(did) => write!(f, "no MIR body is available for {:?}", did), ReadPointerAsBytes => write!(f, "unable to turn pointer into raw bytes",), diff --git a/src/librustc_mir/interpret/eval_context.rs b/src/librustc_mir/interpret/eval_context.rs index ba462ec35eac..0f580b308142 100644 --- a/src/librustc_mir/interpret/eval_context.rs +++ b/src/librustc_mir/interpret/eval_context.rs @@ -323,14 +323,17 @@ impl<'mir, 'tcx: 'mir, M: Machine<'mir, 'tcx>> InterpCx<'mir, 'tcx, M> { } /// Call this to turn untagged "global" pointers (obtained via `tcx`) into - /// the *canonical* machine pointer to the allocation. Must never be used - /// for any other pointers! + /// the machine pointer to the allocation. Must never be used + /// for any other pointers, nor for TLS statics. /// - /// This represents a *direct* access to that memory, as opposed to access - /// through a pointer that was created by the program. + /// Using the resulting pointer represents a *direct* access to that memory + /// (e.g. by directly using a `static`), + /// as opposed to access through a pointer that was created by the program. + /// + /// This function can fail only if `ptr` points to an `extern static`. #[inline(always)] - pub fn tag_global_base_pointer(&self, ptr: Pointer) -> Pointer { - self.memory.tag_global_base_pointer(ptr) + pub fn global_base_pointer(&self, ptr: Pointer) -> InterpResult<'tcx, Pointer> { + self.memory.global_base_pointer(ptr) } #[inline(always)] diff --git a/src/librustc_mir/interpret/machine.rs b/src/librustc_mir/interpret/machine.rs index ec1c93c81657..634f3c96e6c9 100644 --- a/src/librustc_mir/interpret/machine.rs +++ b/src/librustc_mir/interpret/machine.rs @@ -238,45 +238,30 @@ pub trait Machine<'mir, 'tcx>: Sized { Ok(()) } - /// Called for *every* memory access to determine the real ID of the given allocation. - /// This provides a way for the machine to "redirect" certain allocations as it sees fit. - /// - /// This is used by Miri to redirect extern statics to real allocations. - /// - /// This function must be idempotent. - #[inline] - fn canonical_alloc_id(_mem: &Memory<'mir, 'tcx, Self>, id: AllocId) -> AllocId { - id + /// Return the `AllocId` for the given thread-local static in the current thread. + fn thread_local_static_alloc_id( + _ecx: &mut InterpCx<'mir, 'tcx, Self>, + def_id: DefId, + ) -> InterpResult<'tcx, AllocId> { + throw_unsup!(ThreadLocalStatic(def_id)) } - /// Called when converting a `ty::Const` to an operand (in - /// `eval_const_to_op`). - /// - /// Miri uses this callback for creating per thread allocations for thread - /// locals. In Rust, one way of creating a thread local is by marking a - /// static with `#[thread_local]`. On supported platforms this gets - /// translated to a LLVM thread local for which LLVM automatically ensures - /// that each thread gets its own copy. Since LLVM automatically handles - /// thread locals, the Rust compiler just treats thread local statics as - /// regular statics even though accessing a thread local static should be an - /// effectful computation that depends on the current thread. The long term - /// plan is to change MIR to make accesses to thread locals explicit - /// (https://github.com/rust-lang/rust/issues/70685). While the issue 70685 - /// is not fixed, our current workaround in Miri is to use this function to - /// make per-thread copies of thread locals. Please note that we cannot make - /// these copies in `canonical_alloc_id` because that is too late: for - /// example, if one created a pointer in thread `t1` to a thread local and - /// sent it to another thread `t2`, resolving the access in - /// `canonical_alloc_id` would result in pointer pointing to `t2`'s thread - /// local and not `t1` as it should. - #[inline] - fn adjust_global_const( - _ecx: &InterpCx<'mir, 'tcx, Self>, - val: mir::interpret::ConstValue<'tcx>, - ) -> InterpResult<'tcx, mir::interpret::ConstValue<'tcx>> { - Ok(val) + /// Return the `AllocId` backing the given `extern static`. + fn extern_static_alloc_id( + mem: &Memory<'mir, 'tcx, Self>, + def_id: DefId, + ) -> InterpResult<'tcx, AllocId> { + // Use the `AllocId` associated with the `DefId`. Any actual *access* will fail. + Ok(mem.tcx.create_static_alloc(def_id)) } + /// Return the "base" tag for the given *global* allocation: the one that is used for direct + /// accesses to this static/const/fn allocation. If `id` is not a global allocation, + /// this will return an unusable tag (i.e., accesses will be UB)! + /// + /// Called on the id returned by `thread_local_static_alloc_id` and `extern_static_alloc_id`, if needed. + fn tag_global_base_pointer(memory_extra: &Self::MemoryExtra, id: AllocId) -> Self::PointerTag; + /// Called to initialize the "extra" state of an allocation and make the pointers /// it contains (in relocations) tagged. The way we construct allocations is /// to always first construct it without extra and then add the extra. @@ -309,13 +294,6 @@ pub trait Machine<'mir, 'tcx>: Sized { Ok(()) } - /// Return the "base" tag for the given *global* allocation: the one that is used for direct - /// accesses to this static/const/fn allocation. If `id` is not a global allocation, - /// this will return an unusable tag (i.e., accesses will be UB)! - /// - /// Expects `id` to be already canonical, if needed. - fn tag_global_base_pointer(memory_extra: &Self::MemoryExtra, id: AllocId) -> Self::PointerTag; - /// Executes a retagging operation #[inline] fn retag( @@ -375,13 +353,6 @@ pub trait Machine<'mir, 'tcx>: Sized { _mem: &Memory<'mir, 'tcx, Self>, _ptr: Pointer, ) -> InterpResult<'tcx, u64>; - - fn thread_local_alloc_id( - _ecx: &mut InterpCx<'mir, 'tcx, Self>, - did: DefId, - ) -> InterpResult<'tcx, AllocId> { - throw_unsup!(ThreadLocalStatic(did)) - } } // A lot of the flexibility above is just needed for `Miri`, but all "compile-time" machines diff --git a/src/librustc_mir/interpret/memory.rs b/src/librustc_mir/interpret/memory.rs index 8af1a8ac608a..fce33eed66fe 100644 --- a/src/librustc_mir/interpret/memory.rs +++ b/src/librustc_mir/interpret/memory.rs @@ -137,15 +137,33 @@ impl<'mir, 'tcx, M: Machine<'mir, 'tcx>> Memory<'mir, 'tcx, M> { } /// Call this to turn untagged "global" pointers (obtained via `tcx`) into - /// the *canonical* machine pointer to the allocation. Must never be used - /// for any other pointers! + /// the machine pointer to the allocation. Must never be used + /// for any other pointers, nor for TLS statics. /// - /// This represents a *direct* access to that memory, as opposed to access - /// through a pointer that was created by the program. + /// Using the resulting pointer represents a *direct* access to that memory + /// (e.g. by directly using a `static`), + /// as opposed to access through a pointer that was created by the program. + /// + /// This function can fail only if `ptr` points to an `extern static`. #[inline] - pub fn tag_global_base_pointer(&self, ptr: Pointer) -> Pointer { - let id = M::canonical_alloc_id(self, ptr.alloc_id); - ptr.with_tag(M::tag_global_base_pointer(&self.extra, id)) + pub fn global_base_pointer(&self, mut ptr: Pointer) -> InterpResult<'tcx, Pointer> { + // We need to handle `extern static`. + let ptr = match self.tcx.get_global_alloc(ptr.alloc_id) { + Some(GlobalAlloc::Static(def_id)) if self.tcx.is_thread_local_static(def_id) => { + bug!("global memory cannot point to thread-local static") + } + Some(GlobalAlloc::Static(def_id)) if self.tcx.is_foreign_item(def_id) => { + ptr.alloc_id = M::extern_static_alloc_id(self, def_id)?; + ptr + } + _ => { + // No need to change the `AllocId`. + ptr + } + }; + // And we need to get the tag. + let tag = M::tag_global_base_pointer(&self.extra, ptr.alloc_id); + Ok(ptr.with_tag(tag)) } pub fn create_fn_alloc( @@ -162,7 +180,9 @@ impl<'mir, 'tcx, M: Machine<'mir, 'tcx>> Memory<'mir, 'tcx, M> { id } }; - self.tag_global_base_pointer(Pointer::from(id)) + // Functions are global allocations, so make sure we get the right base pointer. + // We know this is not an `extern static` so this cannmot fail. + self.global_base_pointer(Pointer::from(id)).unwrap() } pub fn allocate( @@ -195,6 +215,7 @@ impl<'mir, 'tcx, M: Machine<'mir, 'tcx>> Memory<'mir, 'tcx, M> { M::GLOBAL_KIND.map(MemoryKind::Machine), "dynamically allocating global memory" ); + // This is a new allocation, not a new global one, so no `global_base_ptr`. let (alloc, tag) = M::init_allocation_extra(&self.extra, id, Cow::Owned(alloc), Some(kind)); self.alloc_map.insert(id, (kind, alloc.into_owned())); Pointer::from(id).with_tag(tag) @@ -437,6 +458,7 @@ impl<'mir, 'tcx, M: Machine<'mir, 'tcx>> Memory<'mir, 'tcx, M> { Some(GlobalAlloc::Function(..)) => throw_ub!(DerefFunctionPointer(id)), None => throw_ub!(PointerUseAfterFree(id)), Some(GlobalAlloc::Static(def_id)) => { + assert!(tcx.is_static(def_id)); assert!(!tcx.is_thread_local_static(def_id)); // Notice that every static has two `AllocId` that will resolve to the same // thing here: one maps to `GlobalAlloc::Static`, this is the "lazy" ID, @@ -448,24 +470,15 @@ impl<'mir, 'tcx, M: Machine<'mir, 'tcx>> Memory<'mir, 'tcx, M> { // The `GlobalAlloc::Memory` branch here is still reachable though; when a static // contains a reference to memory that was created during its evaluation (i.e., not // to another static), those inner references only exist in "resolved" form. - // - // Assumes `id` is already canonical. if tcx.is_foreign_item(def_id) { - trace!("get_global_alloc: foreign item {:?}", def_id); - throw_unsup!(ReadForeignStatic(def_id)) + throw_unsup!(ReadExternStatic(def_id)); } trace!("get_global_alloc: Need to compute {:?}", def_id); let instance = Instance::mono(tcx, def_id); let gid = GlobalId { instance, promoted: None }; // Use the raw query here to break validation cycles. Later uses of the static // will call the full query anyway. - let raw_const = - tcx.const_eval_raw(ty::ParamEnv::reveal_all().and(gid)).map_err(|err| { - // no need to report anything, the const_eval call takes care of that - // for statics - assert!(tcx.is_static(def_id)); - err - })?; + let raw_const = tcx.const_eval_raw(ty::ParamEnv::reveal_all().and(gid))?; // Make sure we use the ID of the resolved memory, not the lazy one! let id = raw_const.alloc_id; let allocation = tcx.global_alloc(id).unwrap_memory(); @@ -482,6 +495,7 @@ impl<'mir, 'tcx, M: Machine<'mir, 'tcx>> Memory<'mir, 'tcx, M> { alloc, M::GLOBAL_KIND.map(MemoryKind::Machine), ); + // Sanity check that this is the same pointer we would have gotten via `global_base_pointer`. debug_assert_eq!(tag, M::tag_global_base_pointer(memory_extra, id)); Ok(alloc) } @@ -492,7 +506,6 @@ impl<'mir, 'tcx, M: Machine<'mir, 'tcx>> Memory<'mir, 'tcx, M> { &self, id: AllocId, ) -> InterpResult<'tcx, &Allocation> { - let id = M::canonical_alloc_id(self, id); // The error type of the inner closure here is somewhat funny. We have two // ways of "erroring": An actual error, or because we got a reference from // `get_global_alloc` that we can actually use directly without inserting anything anywhere. @@ -529,7 +542,6 @@ impl<'mir, 'tcx, M: Machine<'mir, 'tcx>> Memory<'mir, 'tcx, M> { &mut self, id: AllocId, ) -> InterpResult<'tcx, &mut Allocation> { - let id = M::canonical_alloc_id(self, id); let tcx = self.tcx; let memory_extra = &self.extra; let a = self.alloc_map.get_mut_or(id, || { @@ -568,7 +580,6 @@ impl<'mir, 'tcx, M: Machine<'mir, 'tcx>> Memory<'mir, 'tcx, M> { id: AllocId, liveness: AllocCheck, ) -> InterpResult<'static, (Size, Align)> { - let id = M::canonical_alloc_id(self, id); // # Regular allocations // Don't use `self.get_raw` here as that will // a) cause cycles in case `id` refers to a static @@ -621,7 +632,6 @@ impl<'mir, 'tcx, M: Machine<'mir, 'tcx>> Memory<'mir, 'tcx, M> { } } - /// Assumes `id` is already canonical. fn get_fn_alloc(&self, id: AllocId) -> Option> { trace!("reading fn ptr: {}", id); if let Some(extra) = self.extra_fn_ptr_map.get(&id) { @@ -642,8 +652,7 @@ impl<'mir, 'tcx, M: Machine<'mir, 'tcx>> Memory<'mir, 'tcx, M> { if ptr.offset.bytes() != 0 { throw_ub!(InvalidFunctionPointer(ptr.erase_tag())) } - let id = M::canonical_alloc_id(self, ptr.alloc_id); - self.get_fn_alloc(id).ok_or_else(|| err_ub!(InvalidFunctionPointer(ptr.erase_tag())).into()) + self.get_fn_alloc(ptr.alloc_id).ok_or_else(|| err_ub!(InvalidFunctionPointer(ptr.erase_tag())).into()) } pub fn mark_immutable(&mut self, id: AllocId) -> InterpResult<'tcx> { diff --git a/src/librustc_mir/interpret/operand.rs b/src/librustc_mir/interpret/operand.rs index face72d70cea..bb058476823e 100644 --- a/src/librustc_mir/interpret/operand.rs +++ b/src/librustc_mir/interpret/operand.rs @@ -541,9 +541,11 @@ impl<'mir, 'tcx: 'mir, M: Machine<'mir, 'tcx>> InterpCx<'mir, 'tcx, M> { val: &ty::Const<'tcx>, layout: Option>, ) -> InterpResult<'tcx, OpTy<'tcx, M::PointerTag>> { - let tag_scalar = |scalar| match scalar { - Scalar::Ptr(ptr) => Scalar::Ptr(self.tag_global_base_pointer(ptr)), - Scalar::Raw { data, size } => Scalar::Raw { data, size }, + let tag_scalar = |scalar| -> InterpResult<'tcx, _> { + Ok(match scalar { + Scalar::Ptr(ptr) => Scalar::Ptr(self.global_base_pointer(ptr)?), + Scalar::Raw { data, size } => Scalar::Raw { data, size }, + }) }; // Early-return cases. let val_val = match val.val { @@ -570,10 +572,6 @@ impl<'mir, 'tcx: 'mir, M: Machine<'mir, 'tcx>> InterpCx<'mir, 'tcx, M> { } ty::ConstKind::Value(val_val) => val_val, }; - // This call allows the machine to create fresh allocation ids for - // thread-local statics (see the `adjust_global_const` function - // documentation). - let val_val = M::adjust_global_const(self, val_val)?; // Other cases need layout. let layout = from_known_layout(self.tcx, self.param_env, layout, || self.layout_of(val.ty))?; @@ -582,10 +580,10 @@ impl<'mir, 'tcx: 'mir, M: Machine<'mir, 'tcx>> InterpCx<'mir, 'tcx, M> { let id = self.tcx.create_memory_alloc(alloc); // We rely on mutability being set correctly in that allocation to prevent writes // where none should happen. - let ptr = self.tag_global_base_pointer(Pointer::new(id, offset)); + let ptr = self.global_base_pointer(Pointer::new(id, offset))?; Operand::Indirect(MemPlace::from_ptr(ptr, layout.align.abi)) } - ConstValue::Scalar(x) => Operand::Immediate(tag_scalar(x).into()), + ConstValue::Scalar(x) => Operand::Immediate(tag_scalar(x)?.into()), ConstValue::Slice { data, start, end } => { // We rely on mutability being set correctly in `data` to prevent writes // where none should happen. @@ -594,7 +592,7 @@ impl<'mir, 'tcx: 'mir, M: Machine<'mir, 'tcx>> InterpCx<'mir, 'tcx, M> { Size::from_bytes(start), // offset: `start` ); Operand::Immediate(Immediate::new_slice( - self.tag_global_base_pointer(ptr).into(), + self.global_base_pointer(ptr)?.into(), u64::try_from(end.checked_sub(start).unwrap()).unwrap(), // len: `end - start` self, )) diff --git a/src/librustc_mir/interpret/place.rs b/src/librustc_mir/interpret/place.rs index 270be9860645..76f0e82db96f 100644 --- a/src/librustc_mir/interpret/place.rs +++ b/src/librustc_mir/interpret/place.rs @@ -1126,7 +1126,7 @@ where ) -> InterpResult<'tcx, MPlaceTy<'tcx, M::PointerTag>> { // This must be an allocation in `tcx` let _ = self.tcx.global_alloc(raw.alloc_id); - let ptr = self.tag_global_base_pointer(Pointer::from(raw.alloc_id)); + let ptr = self.global_base_pointer(Pointer::from(raw.alloc_id))?; let layout = self.layout_of(raw.ty)?; Ok(MPlaceTy::from_aligned_ptr(ptr, layout)) } diff --git a/src/librustc_mir/interpret/step.rs b/src/librustc_mir/interpret/step.rs index 18f9bbd2e315..3d1e3eccc614 100644 --- a/src/librustc_mir/interpret/step.rs +++ b/src/librustc_mir/interpret/step.rs @@ -141,8 +141,8 @@ impl<'mir, 'tcx: 'mir, M: Machine<'mir, 'tcx>> InterpCx<'mir, 'tcx, M> { use rustc_middle::mir::Rvalue::*; match *rvalue { ThreadLocalRef(did) => { - let id = M::thread_local_alloc_id(self, did)?; - let val = Scalar::Ptr(self.tag_global_base_pointer(id.into())); + let id = M::thread_local_static_alloc_id(self, did)?; + let val = self.global_base_pointer(id.into())?; self.write_scalar(val, dest)?; }