Fix MaybeUninit codegen using GVN

2025-10-16 23:28:24 -04:00 · 2025-10-16 23:28:24 -04:00 · 1a4852c5fe
commit 1a4852c5fe
parent 77761f314d
11 changed files with 112 additions and 17 deletions
--- a/compiler/rustc_codegen_ssa/src/mir/rvalue.rs
+++ b/compiler/rustc_codegen_ssa/src/mir/rvalue.rs
@ -24,6 +24,12 @@ impl<'a, 'tcx, Bx: BuilderMethods<'a, 'tcx>> FunctionCx<'a, 'tcx, Bx> {
    ) {
        match *rvalue {
            mir::Rvalue::Use(ref operand) => {
                if let mir::Operand::Constant(const_op) = operand {
                    let val = self.eval_mir_constant(&const_op);
                    if val.all_bytes_uninit(self.cx.tcx()) {
                        return;
                    }
                }
                let cg_operand = self.codegen_operand(bx, operand);
                // Crucially, we do *not* use `OperandValue::Ref` for types with
                // `BackendRepr::Scalar | BackendRepr::ScalarPair`. This ensures we match the MIR
--- a/compiler/rustc_const_eval/src/interpret/operand.rs
+++ b/compiler/rustc_const_eval/src/interpret/operand.rs
@ -522,6 +522,10 @@ impl<'tcx, Prov: Provenance> OpTy<'tcx, Prov> {
    pub(super) fn op(&self) -> &Operand<Prov> {
        &self.op
    }
    pub fn is_immediate_uninit(&self) -> bool {
        matches!(self.op, Operand::Immediate(Immediate::Uninit))
    }
 }
 impl<'tcx, Prov: Provenance> Projectable<'tcx, Prov> for OpTy<'tcx, Prov> {
--- a/compiler/rustc_middle/src/mir/pretty.rs
+++ b/compiler/rustc_middle/src/mir/pretty.rs
@ -1872,6 +1872,13 @@ fn pretty_print_const_value_tcx<'tcx>(
        return Ok(());
    }
    // Printing [MaybeUninit<u8>::uninit(); N] or any other aggregate where all fields are uninit
    // becomes very verbose. This special case makes the dump terse and clear.
    if ct.all_bytes_uninit(tcx) {
        fmt.write_str("<uninit>")?;
        return Ok(());
    }
    let u8_type = tcx.types.u8;
    match (ct, ty.kind()) {
        // Byte/string slices, printed as (byte) string literals.
--- a/compiler/rustc_mir_transform/src/gvn.rs
+++ b/compiler/rustc_mir_transform/src/gvn.rs
@ -570,9 +570,19 @@ impl<'body, 'a, 'tcx> VnState<'body, 'a, 'tcx> {
            _ if ty.is_zst() => ImmTy::uninit(ty).into(),
            Opaque(_) => return None,
            // Do not bother evaluating repeat expressions. This would uselessly consume memory.
            Repeat(..) => return None,
            // In general, evaluating repeat expressions just consumes a lot of memory.
            // But in the special case that the element is just Immediate::Uninit, we can evaluate
            // it without extra memory! If we don't propagate uninit values like this, LLVM can get
            // very confused: https://github.com/rust-lang/rust/issues/139355
            Repeat(value, _count) => {
                let value = self.eval_to_const(value)?;
                if value.is_immediate_uninit() {
                    ImmTy::uninit(ty).into()
                } else {
                    return None;
                }
            }
            Constant { ref value, disambiguator: _ } => {
                self.ecx.eval_mir_constant(value, DUMMY_SP, None).discard_err()?
            }
@ -608,8 +618,12 @@ impl<'body, 'a, 'tcx> VnState<'body, 'a, 'tcx> {
            }
            Union(active_field, field) => {
                let field = self.eval_to_const(field)?;
-                if matches!(ty.backend_repr, BackendRepr::Scalar(..) | BackendRepr::ScalarPair(..))
+                if field.layout.layout.is_zst() {
-                {
+                    ImmTy::from_immediate(Immediate::Uninit, ty).into()
                } else if matches!(
                    ty.backend_repr,
                    BackendRepr::Scalar(..) | BackendRepr::ScalarPair(..)
                ) {
                    let dest = self.ecx.allocate(ty, MemoryKind::Stack).discard_err()?;
                    let field_dest = self.ecx.project_field(&dest, active_field).discard_err()?;
                    self.ecx.copy_op(field, &field_dest).discard_err()?;
@ -1705,7 +1719,11 @@ fn op_to_prop_const<'tcx>(
    // Do not synthetize too large constants. Codegen will just memcpy them, which we'd like to
    // avoid.
-    if !matches!(op.layout.backend_repr, BackendRepr::Scalar(..) | BackendRepr::ScalarPair(..)) {
+    // But we *do* want to synthesize any size constant if it is entirely uninit because that
    // benefits codegen, which has special handling for them.
    if !op.is_immediate_uninit()
        && !matches!(op.layout.backend_repr, BackendRepr::Scalar(..) | BackendRepr::ScalarPair(..))
    {
        return None;
    }
--- a/tests/codegen-llvm/maybeuninit-array.rs
+++ b/tests/codegen-llvm/maybeuninit-array.rs
@ -0,0 +1,15 @@
 // This is a regression test for https://github.com/rust-lang/rust/issues/139355
 //@ compile-flags: -Copt-level=3
 #![crate_type = "lib"]
 use std::mem::MaybeUninit;
 #[no_mangle]
 pub fn create_uninit_array() -> [[MaybeUninit<u8>; 4]; 200] {
    // CHECK-LABEL: create_uninit_array
    // CHECK-NEXT: start:
    // CHECK-NEXT: ret void
    [[MaybeUninit::<u8>::uninit(); 4]; 200]
 }
--- a/tests/codegen-llvm/uninit-consts.rs
+++ b/tests/codegen-llvm/uninit-consts.rs
@ -11,21 +11,19 @@ pub struct PartiallyUninit {
    y: MaybeUninit<[u8; 10]>,
 }
 // CHECK: [[FULLY_UNINIT:@.*]] = private unnamed_addr constant [10 x i8] undef
 // CHECK: [[PARTIALLY_UNINIT:@.*]] = private unnamed_addr constant <{ [4 x i8], [12 x i8] }> <{ [4 x i8] c"{{\\EF\\BE\\AD\\DE|\\DE\\AD\\BE\\EF}}", [12 x i8] undef }>, align 4
 // This shouldn't contain undef, since it contains more chunks
 // than the default value of uninit_const_chunk_threshold.
 // CHECK: [[UNINIT_PADDING_HUGE:@.*]] = private unnamed_addr constant [32768 x i8] c"{{.+}}", align 4
 // CHECK: [[FULLY_UNINIT_HUGE:@.*]] = private unnamed_addr constant [16384 x i8] undef
 // CHECK-LABEL: @fully_uninit
 #[no_mangle]
 pub const fn fully_uninit() -> MaybeUninit<[u8; 10]> {
    const M: MaybeUninit<[u8; 10]> = MaybeUninit::uninit();
-    // CHECK: call void @llvm.memcpy.{{.+}}(ptr align 1 %_0, ptr align 1 {{.*}}[[FULLY_UNINIT]]{{.*}}, i{{(32|64)}} 10, i1 false)
+    // returning uninit doesn't need to do anything to the return place at all
    // CHECK-NEXT: start:
    // CHECK-NEXT: ret void
    M
 }
@ -49,6 +47,8 @@ pub const fn uninit_padding_huge() -> [(u32, u8); 4096] {
 #[no_mangle]
 pub const fn fully_uninit_huge() -> MaybeUninit<[u32; 4096]> {
    const F: MaybeUninit<[u32; 4096]> = MaybeUninit::uninit();
-    // CHECK: call void @llvm.memcpy.{{.+}}(ptr align 4 %_0, ptr align 4 {{.*}}[[FULLY_UNINIT_HUGE]]{{.*}}, i{{(32|64)}} 16384, i1 false)
+    // returning uninit doesn't need to do anything to the return place at all
    // CHECK-NEXT: start:
    // CHECK-NEXT: ret void
    F
 }
--- a/tests/mir-opt/const_prop/maybe_uninit.rs
+++ b/tests/mir-opt/const_prop/maybe_uninit.rs
@ -0,0 +1,10 @@
 //@ compile-flags: -O
 use std::mem::MaybeUninit;
 // EMIT_MIR maybe_uninit.u8_array.GVN.diff
 pub fn u8_array() -> [MaybeUninit<u8>; 8] {
    // CHECK: fn u8_array(
    // CHECK: _0 = const <uninit>;
    [MaybeUninit::uninit(); 8]
 }
--- a/tests/mir-opt/const_prop/maybe_uninit.u8_array.GVN.diff
+++ b/tests/mir-opt/const_prop/maybe_uninit.u8_array.GVN.diff
@ -0,0 +1,28 @@
 - // MIR for `u8_array` before GVN
 + // MIR for `u8_array` after GVN
  fn u8_array() -> [MaybeUninit<u8>; 8] {
      let mut _0: [std::mem::MaybeUninit<u8>; 8];
      let mut _1: std::mem::MaybeUninit<u8>;
      scope 1 (inlined MaybeUninit::<u8>::uninit) {
      }
      bb0: {
          StorageLive(_1);
 -         _1 = MaybeUninit::<u8> { uninit: const () };
 -         _0 = [move _1; 8];
 +         _1 = const <uninit>;
 +         _0 = const <uninit>;
          StorageDead(_1);
          return;
      }
 + }
 + 
 + ALLOC0 (size: 8, align: 1) {
 +     __ __ __ __ __ __ __ __                         │ ░░░░░░░░
 + }
 + 
 + ALLOC1 (size: 1, align: 1) {
 +     __                                              │ ░
  }
--- a/tests/mir-opt/const_prop/transmute.rs
+++ b/tests/mir-opt/const_prop/transmute.rs
@ -43,8 +43,7 @@ pub unsafe fn invalid_bool() -> bool {
 // EMIT_MIR transmute.undef_union_as_integer.GVN.diff
 pub unsafe fn undef_union_as_integer() -> u32 {
    // CHECK-LABEL: fn undef_union_as_integer(
-    // CHECK: _1 = const Union32
+    // CHECK: _0 = const <uninit>;
    // CHECK: _0 = const {{.*}}: u32;
    union Union32 {
        value: u32,
        unit: (),
--- a/tests/mir-opt/const_prop/transmute.undef_union_as_integer.GVN.32bit.diff
+++ b/tests/mir-opt/const_prop/transmute.undef_union_as_integer.GVN.32bit.diff
@ -12,16 +12,20 @@
 -         _2 = ();
 -         _1 = Union32 { value: move _2 };
 +         _2 = const ();
-+         _1 = const Union32 {{ value: Indirect { alloc_id: ALLOC0, offset: Size(0 bytes) }: u32, unit: () }};
+         _1 = const <uninit>;
          StorageDead(_2);
 -         _0 = move _1 as u32 (Transmute);
-+         _0 = const Indirect { alloc_id: ALLOC0, offset: Size(0 bytes) }: u32;
+         _0 = const <uninit>;
          StorageDead(_1);
          return;
      }
 + }
 + 
 + ALLOC0 (size: 4, align: 4) {
 +     __ __ __ __                                     │ ░░░░
 + }
 + 
 + ALLOC1 (size: 4, align: 4) {
 +     __ __ __ __                                     │ ░░░░
  }
--- a/tests/mir-opt/const_prop/transmute.undef_union_as_integer.GVN.64bit.diff
+++ b/tests/mir-opt/const_prop/transmute.undef_union_as_integer.GVN.64bit.diff
@ -12,16 +12,20 @@
 -         _2 = ();
 -         _1 = Union32 { value: move _2 };
 +         _2 = const ();
-+         _1 = const Union32 {{ value: Indirect { alloc_id: ALLOC0, offset: Size(0 bytes) }: u32, unit: () }};
+         _1 = const <uninit>;
          StorageDead(_2);
 -         _0 = move _1 as u32 (Transmute);
-+         _0 = const Indirect { alloc_id: ALLOC0, offset: Size(0 bytes) }: u32;
+         _0 = const <uninit>;
          StorageDead(_1);
          return;
      }
 + }
 + 
 + ALLOC0 (size: 4, align: 4) {
 +     __ __ __ __                                     │ ░░░░
 + }
 + 
 + ALLOC1 (size: 4, align: 4) {
 +     __ __ __ __                                     │ ░░░░
  }