Rollup merge of #139340 - beetrees:riscv-float-struct-abi, r=workingjubilee

Fix RISC-V C function ABI when passing/returning structs containing floats RISC-V passes structs containing only one or two floats (or a float and integer pair) in registers, as long as the individual floats/integers fit in a single corresponding register (see [the ABI specification](https://github.com/riscv-non-isa/riscv-elf-psabi-doc/releases/download/v1.0/riscv-abi.pdf) for details). Before this PR, Rust would not check what offset the second float/integer was at, instead assuming that it was at the standard offset for its default alignment. However, as the offset can be affected by `#[repr(align(N))]` and `#[repr(packed)]`, this caused miscompilations (see #115609). To fix this, this PR introduces a `rest_offset` field to `CastTarget` that can be used to explicitly specify at what offset the `rest` part of the cast is located at. While fixing this, I discovered another bug: the size of the cast target was being used as the size of the MIR return place (when the function was using a `PassMode::Cast` return type). However, the cast target is allowed to be smaller than the size of the actual type, causing a miscompilation. This PR fixes this issue by using the largest of the size of the type and the size of the cast target as the size of the MIR return place, ensuring all reads/writes will be inbounds. Fixes the RISC-V part of #115609. cc target maintainers of `riscv64gc-unknown-linux-gnu`: `@kito-cheng` `@michaelmaitland` `@robin-randhawa-sifive` `@topperc` r? `@workingjubilee`
2025-06-16 19:54:32 +02:00 · 2025-06-16 19:54:32 +02:00 · 4479d42d60
commit 4479d42d60
parent 3bc767e1a2 5723c9997c
13 changed files with 537 additions and 134 deletions
--- a/compiler/rustc_codegen_cranelift/src/abi/pass_mode.rs
+++ b/compiler/rustc_codegen_cranelift/src/abi/pass_mode.rs
@ -40,7 +40,18 @@ fn apply_attrs_to_abi_param(param: AbiParam, arg_attrs: ArgAttributes) -> AbiPar
    }
 }

-fn cast_target_to_abi_params(cast: &CastTarget) -> SmallVec<[AbiParam; 2]> {
+fn cast_target_to_abi_params(cast: &CastTarget) -> SmallVec<[(Size, AbiParam); 2]> {
+    if let Some(offset_from_start) = cast.rest_offset {
+        assert!(cast.prefix[1..].iter().all(|p| p.is_none()));
+        assert_eq!(cast.rest.unit.size, cast.rest.total);
+        let first = cast.prefix[0].unwrap();
+        let second = cast.rest.unit;
+        return smallvec![
+            (Size::ZERO, reg_to_abi_param(first)),
+            (offset_from_start, reg_to_abi_param(second))
+        ];
+    }
+
    let (rest_count, rem_bytes) = if cast.rest.unit.size.bytes() == 0 {
        (0, 0)
    } else {
@ -55,25 +66,32 @@ fn cast_target_to_abi_params(cast: &CastTarget) -> SmallVec<[AbiParam; 2]> {
    // different types in Cranelift IR. Instead a single array of primitive types is used.

    // Create list of fields in the main structure
-    let mut args = cast
+    let args = cast
        .prefix
        .iter()
        .flatten()
        .map(|&reg| reg_to_abi_param(reg))
-        .chain((0..rest_count).map(|_| reg_to_abi_param(cast.rest.unit)))
-        .collect::<SmallVec<_>>();
+        .chain((0..rest_count).map(|_| reg_to_abi_param(cast.rest.unit)));
+
+    let mut res = SmallVec::new();
+    let mut offset = Size::ZERO;
+
+    for arg in args {
+        res.push((offset, arg));
+        offset += Size::from_bytes(arg.value_type.bytes());
+    }

    // Append final integer
    if rem_bytes != 0 {
        // Only integers can be really split further.
        assert_eq!(cast.rest.unit.kind, RegKind::Integer);
-        args.push(reg_to_abi_param(Reg {
-            kind: RegKind::Integer,
-            size: Size::from_bytes(rem_bytes),
-        }));
+        res.push((
+            offset,
+            reg_to_abi_param(Reg { kind: RegKind::Integer, size: Size::from_bytes(rem_bytes) }),
+        ));
    }

-    args
+    res
 }

 impl<'tcx> ArgAbiExt<'tcx> for ArgAbi<'tcx, Ty<'tcx>> {
@ -104,7 +122,7 @@ impl<'tcx> ArgAbiExt<'tcx> for ArgAbi<'tcx, Ty<'tcx>> {
            },
            PassMode::Cast { ref cast, pad_i32 } => {
                assert!(!pad_i32, "padding support not yet implemented");
-                cast_target_to_abi_params(cast)
+                cast_target_to_abi_params(cast).into_iter().map(|(_, param)| param).collect()
            }
            PassMode::Indirect { attrs, meta_attrs: None, on_stack } => {
                if on_stack {
@ -160,9 +178,10 @@ impl<'tcx> ArgAbiExt<'tcx> for ArgAbi<'tcx, Ty<'tcx>> {
                }
                _ => unreachable!("{:?}", self.layout.backend_repr),
            },
-            PassMode::Cast { ref cast, .. } => {
-                (None, cast_target_to_abi_params(cast).into_iter().collect())
-            }
+            PassMode::Cast { ref cast, .. } => (
+                None,
+                cast_target_to_abi_params(cast).into_iter().map(|(_, param)| param).collect(),
+            ),
            PassMode::Indirect { attrs, meta_attrs: None, on_stack } => {
                assert!(!on_stack);
                (
@ -187,12 +206,14 @@ pub(super) fn to_casted_value<'tcx>(
 ) -> SmallVec<[Value; 2]> {
    let (ptr, meta) = arg.force_stack(fx);
    assert!(meta.is_none());
-    let mut offset = 0;
    cast_target_to_abi_params(cast)
        .into_iter()
-        .map(|param| {
-            let val = ptr.offset_i64(fx, offset).load(fx, param.value_type, MemFlags::new());
-            offset += i64::from(param.value_type.bytes());
+        .map(|(offset, param)| {
+            let val = ptr.offset_i64(fx, offset.bytes() as i64).load(
+                fx,
+                param.value_type,
+                MemFlags::new(),
+            );
            val
        })
        .collect()
@ -205,7 +226,7 @@ pub(super) fn from_casted_value<'tcx>(
    cast: &CastTarget,
 ) -> CValue<'tcx> {
    let abi_params = cast_target_to_abi_params(cast);
-    let abi_param_size: u32 = abi_params.iter().map(|param| param.value_type.bytes()).sum();
+    let abi_param_size: u32 = abi_params.iter().map(|(_, param)| param.value_type.bytes()).sum();
    let layout_size = u32::try_from(layout.size.bytes()).unwrap();
    let ptr = fx.create_stack_slot(
        // Stack slot size may be bigger for example `[u8; 3]` which is packed into an `i32`.
@ -214,16 +235,13 @@ pub(super) fn from_casted_value<'tcx>(
        std::cmp::max(abi_param_size, layout_size),
        u32::try_from(layout.align.abi.bytes()).unwrap(),
    );
-    let mut offset = 0;
    let mut block_params_iter = block_params.iter().copied();
-    for param in abi_params {
-        let val = ptr.offset_i64(fx, offset).store(
+    for (offset, _) in abi_params {
+        ptr.offset_i64(fx, offset.bytes() as i64).store(
            fx,
            block_params_iter.next().unwrap(),
            MemFlags::new(),
-        );
-        offset += i64::from(param.value_type.bytes());
-        val
+        )
    }
    assert_eq!(block_params_iter.next(), None, "Leftover block param");
    CValue::by_ref(ptr, layout)
--- a/compiler/rustc_codegen_gcc/src/intrinsic/mod.rs
+++ b/compiler/rustc_codegen_gcc/src/intrinsic/mod.rs
@ -626,7 +626,7 @@ impl<'gcc, 'tcx> ArgAbiExt<'gcc, 'tcx> for ArgAbi<'tcx, Ty<'tcx>> {
                bx.lifetime_start(llscratch, scratch_size);

                // ... where we first store the value...
-                bx.store(val, llscratch, scratch_align);
+                rustc_codegen_ssa::mir::store_cast(bx, cast, val, llscratch, scratch_align);

                // ... and then memcpy it to the intended destination.
                bx.memcpy(
--- a/compiler/rustc_codegen_llvm/src/abi.rs
+++ b/compiler/rustc_codegen_llvm/src/abi.rs
@ -229,7 +229,7 @@ impl<'ll, 'tcx> ArgAbiExt<'ll, 'tcx> for ArgAbi<'tcx, Ty<'tcx>> {
                let llscratch = bx.alloca(scratch_size, scratch_align);
                bx.lifetime_start(llscratch, scratch_size);
                // ...store the value...
-                bx.store(val, llscratch, scratch_align);
+                rustc_codegen_ssa::mir::store_cast(bx, cast, val, llscratch, scratch_align);
                // ... and then memcpy it to the intended destination.
                bx.memcpy(
                    dst.val.llval,
--- a/compiler/rustc_codegen_ssa/src/mir/block.rs
+++ b/compiler/rustc_codegen_ssa/src/mir/block.rs
@ -1,6 +1,6 @@
 use std::cmp;

-use rustc_abi::{BackendRepr, ExternAbi, HasDataLayout, Reg, Size, WrappingRange};
+use rustc_abi::{Align, BackendRepr, ExternAbi, HasDataLayout, Reg, Size, WrappingRange};
 use rustc_ast as ast;
 use rustc_ast::{InlineAsmOptions, InlineAsmTemplatePiece};
 use rustc_data_structures::packed::Pu128;
@ -13,7 +13,7 @@ use rustc_middle::{bug, span_bug};
 use rustc_session::config::OptLevel;
 use rustc_span::Span;
 use rustc_span::source_map::Spanned;
-use rustc_target::callconv::{ArgAbi, FnAbi, PassMode};
+use rustc_target::callconv::{ArgAbi, CastTarget, FnAbi, PassMode};
 use tracing::{debug, info};

 use super::operand::OperandRef;
@ -558,8 +558,7 @@ impl<'a, 'tcx, Bx: BuilderMethods<'a, 'tcx>> FunctionCx<'a, 'tcx, Bx> {
                    }
                    ZeroSized => bug!("ZST return value shouldn't be in PassMode::Cast"),
                };
-                let ty = bx.cast_backend_type(cast_ty);
-                bx.load(ty, llslot, self.fn_abi.ret.layout.align.abi)
+                load_cast(bx, cast_ty, llslot, self.fn_abi.ret.layout.align.abi)
            }
        };
        bx.ret(llval);
@ -1618,8 +1617,7 @@ impl<'a, 'tcx, Bx: BuilderMethods<'a, 'tcx>> FunctionCx<'a, 'tcx, Bx> {
                    MemFlags::empty(),
                );
                // ...and then load it with the ABI type.
-                let cast_ty = bx.cast_backend_type(cast);
-                llval = bx.load(cast_ty, llscratch, scratch_align);
+                llval = load_cast(bx, cast, llscratch, scratch_align);
                bx.lifetime_end(llscratch, scratch_size);
            } else {
                // We can't use `PlaceRef::load` here because the argument
@ -1969,3 +1967,47 @@ enum ReturnDest<'tcx, V> {
    /// Store a direct return value to an operand local place.
    DirectOperand(mir::Local),
 }
+
+fn load_cast<'a, 'tcx, Bx: BuilderMethods<'a, 'tcx>>(
+    bx: &mut Bx,
+    cast: &CastTarget,
+    ptr: Bx::Value,
+    align: Align,
+) -> Bx::Value {
+    let cast_ty = bx.cast_backend_type(cast);
+    if let Some(offset_from_start) = cast.rest_offset {
+        assert!(cast.prefix[1..].iter().all(|p| p.is_none()));
+        assert_eq!(cast.rest.unit.size, cast.rest.total);
+        let first_ty = bx.reg_backend_type(&cast.prefix[0].unwrap());
+        let second_ty = bx.reg_backend_type(&cast.rest.unit);
+        let first = bx.load(first_ty, ptr, align);
+        let second_ptr = bx.inbounds_ptradd(ptr, bx.const_usize(offset_from_start.bytes()));
+        let second = bx.load(second_ty, second_ptr, align.restrict_for_offset(offset_from_start));
+        let res = bx.cx().const_poison(cast_ty);
+        let res = bx.insert_value(res, first, 0);
+        bx.insert_value(res, second, 1)
+    } else {
+        bx.load(cast_ty, ptr, align)
+    }
+}
+
+pub fn store_cast<'a, 'tcx, Bx: BuilderMethods<'a, 'tcx>>(
+    bx: &mut Bx,
+    cast: &CastTarget,
+    value: Bx::Value,
+    ptr: Bx::Value,
+    align: Align,
+) {
+    if let Some(offset_from_start) = cast.rest_offset {
+        assert!(cast.prefix[1..].iter().all(|p| p.is_none()));
+        assert_eq!(cast.rest.unit.size, cast.rest.total);
+        assert!(cast.prefix[0].is_some());
+        let first = bx.extract_value(value, 0);
+        let second = bx.extract_value(value, 1);
+        bx.store(first, ptr, align);
+        let second_ptr = bx.inbounds_ptradd(ptr, bx.const_usize(offset_from_start.bytes()));
+        bx.store(second, second_ptr, align.restrict_for_offset(offset_from_start));
+    } else {
+        bx.store(value, ptr, align);
+    };
+}
--- a/compiler/rustc_codegen_ssa/src/mir/mod.rs
+++ b/compiler/rustc_codegen_ssa/src/mir/mod.rs
@ -26,6 +26,7 @@ pub mod place;
 mod rvalue;
 mod statement;

+pub use self::block::store_cast;
 use self::debuginfo::{FunctionDebugContext, PerLocalVarDebugInfo};
 use self::operand::{OperandRef, OperandValue};
 use self::place::PlaceRef;
@ -259,7 +260,7 @@ pub fn codegen_mir<'a, 'tcx, Bx: BuilderMethods<'a, 'tcx>>(
                    }
                    PassMode::Cast { ref cast, .. } => {
                        debug!("alloc: {:?} (return place) -> place", local);
-                        let size = cast.size(&start_bx);
+                        let size = cast.size(&start_bx).max(layout.size);
                        return LocalRef::Place(PlaceRef::alloca_size(&mut start_bx, size, layout));
                    }
                    _ => {}
--- a/compiler/rustc_target/src/callconv/mips64.rs
+++ b/compiler/rustc_target/src/callconv/mips64.rs
@ -2,9 +2,7 @@ use rustc_abi::{
    BackendRepr, FieldsShape, Float, HasDataLayout, Primitive, Reg, Size, TyAbiInterface,
 };

-use crate::callconv::{
-    ArgAbi, ArgAttribute, ArgAttributes, ArgExtension, CastTarget, FnAbi, PassMode, Uniform,
-};
+use crate::callconv::{ArgAbi, ArgExtension, CastTarget, FnAbi, PassMode, Uniform};

 fn extend_integer_width_mips<Ty>(arg: &mut ArgAbi<'_, Ty>, bits: u64) {
    // Always sign extend u32 values on 64-bit mips
@ -140,16 +138,7 @@ where

    // Extract first 8 chunks as the prefix
    let rest_size = size - Size::from_bytes(8) * prefix_index as u64;
-    arg.cast_to(CastTarget {
-        prefix,
-        rest: Uniform::new(Reg::i64(), rest_size),
-        attrs: ArgAttributes {
-            regular: ArgAttribute::default(),
-            arg_ext: ArgExtension::None,
-            pointee_size: Size::ZERO,
-            pointee_align: None,
-        },
-    });
+    arg.cast_to(CastTarget::prefixed(prefix, Uniform::new(Reg::i64(), rest_size)));
 }

 pub(crate) fn compute_abi_info<'a, Ty, C>(cx: &C, fn_abi: &mut FnAbi<'a, Ty>)
--- a/compiler/rustc_target/src/callconv/mod.rs
+++ b/compiler/rustc_target/src/callconv/mod.rs
@ -197,6 +197,17 @@ impl ArgAttributes {
    }
 }

+impl From<ArgAttribute> for ArgAttributes {
+    fn from(value: ArgAttribute) -> Self {
+        Self {
+            regular: value,
+            arg_ext: ArgExtension::None,
+            pointee_size: Size::ZERO,
+            pointee_align: None,
+        }
+    }
+}
+
 /// An argument passed entirely registers with the
 /// same kind (e.g., HFA / HVA on PPC64 and AArch64).
 #[derive(Clone, Copy, PartialEq, Eq, Hash, Debug, HashStable_Generic)]
@ -251,6 +262,9 @@ impl Uniform {
 #[derive(Clone, PartialEq, Eq, Hash, Debug, HashStable_Generic)]
 pub struct CastTarget {
    pub prefix: [Option<Reg>; 8],
+    /// The offset of `rest` from the start of the value. Currently only implemented for a `Reg`
+    /// pair created by the `offset_pair` method.
+    pub rest_offset: Option<Size>,
    pub rest: Uniform,
    pub attrs: ArgAttributes,
 }
@ -263,42 +277,45 @@ impl From<Reg> for CastTarget {

 impl From<Uniform> for CastTarget {
    fn from(uniform: Uniform) -> CastTarget {
-        CastTarget {
-            prefix: [None; 8],
-            rest: uniform,
-            attrs: ArgAttributes {
-                regular: ArgAttribute::default(),
-                arg_ext: ArgExtension::None,
-                pointee_size: Size::ZERO,
-                pointee_align: None,
-            },
-        }
+        Self::prefixed([None; 8], uniform)
    }
 }

 impl CastTarget {
-    pub fn pair(a: Reg, b: Reg) -> CastTarget {
-        CastTarget {
+    pub fn prefixed(prefix: [Option<Reg>; 8], rest: Uniform) -> Self {
+        Self { prefix, rest_offset: None, rest, attrs: ArgAttributes::new() }
+    }
+
+    pub fn offset_pair(a: Reg, offset_from_start: Size, b: Reg) -> Self {
+        Self {
            prefix: [Some(a), None, None, None, None, None, None, None],
-            rest: Uniform::from(b),
-            attrs: ArgAttributes {
-                regular: ArgAttribute::default(),
-                arg_ext: ArgExtension::None,
-                pointee_size: Size::ZERO,
-                pointee_align: None,
-            },
+            rest_offset: Some(offset_from_start),
+            rest: b.into(),
+            attrs: ArgAttributes::new(),
        }
    }

+    pub fn with_attrs(mut self, attrs: ArgAttributes) -> Self {
+        self.attrs = attrs;
+        self
+    }
+
+    pub fn pair(a: Reg, b: Reg) -> CastTarget {
+        Self::prefixed([Some(a), None, None, None, None, None, None, None], Uniform::from(b))
+    }
+
    /// When you only access the range containing valid data, you can use this unaligned size;
    /// otherwise, use the safer `size` method.
    pub fn unaligned_size<C: HasDataLayout>(&self, _cx: &C) -> Size {
        // Prefix arguments are passed in specific designated registers
-        let prefix_size = self
-            .prefix
-            .iter()
-            .filter_map(|x| x.map(|reg| reg.size))
-            .fold(Size::ZERO, |acc, size| acc + size);
+        let prefix_size = if let Some(offset_from_start) = self.rest_offset {
+            offset_from_start
+        } else {
+            self.prefix
+                .iter()
+                .filter_map(|x| x.map(|reg| reg.size))
+                .fold(Size::ZERO, |acc, size| acc + size)
+        };
        // Remaining arguments are passed in chunks of the unit size
        let rest_size =
            self.rest.unit.size * self.rest.total.bytes().div_ceil(self.rest.unit.size.bytes());
@ -322,9 +339,22 @@ impl CastTarget {
    /// Checks if these two `CastTarget` are equal enough to be considered "the same for all
    /// function call ABIs".
    pub fn eq_abi(&self, other: &Self) -> bool {
-        let CastTarget { prefix: prefix_l, rest: rest_l, attrs: attrs_l } = self;
-        let CastTarget { prefix: prefix_r, rest: rest_r, attrs: attrs_r } = other;
-        prefix_l == prefix_r && rest_l == rest_r && attrs_l.eq_abi(attrs_r)
+        let CastTarget {
+            prefix: prefix_l,
+            rest_offset: rest_offset_l,
+            rest: rest_l,
+            attrs: attrs_l,
+        } = self;
+        let CastTarget {
+            prefix: prefix_r,
+            rest_offset: rest_offset_r,
+            rest: rest_r,
+            attrs: attrs_r,
+        } = other;
+        prefix_l == prefix_r
+            && rest_offset_l == rest_offset_r
+            && rest_l == rest_r
+            && attrs_l.eq_abi(attrs_r)
    }
 }

--- a/compiler/rustc_target/src/callconv/nvptx64.rs
+++ b/compiler/rustc_target/src/callconv/nvptx64.rs
@ -1,6 +1,6 @@
 use rustc_abi::{HasDataLayout, Reg, Size, TyAbiInterface};

-use super::{ArgAttribute, ArgAttributes, ArgExtension, CastTarget};
+use super::CastTarget;
 use crate::callconv::{ArgAbi, FnAbi, Uniform};

 fn classify_ret<Ty>(ret: &mut ArgAbi<'_, Ty>) {
@ -34,16 +34,10 @@ fn classify_aggregate<Ty>(arg: &mut ArgAbi<'_, Ty>) {
    };

    if align_bytes == size.bytes() {
-        arg.cast_to(CastTarget {
-            prefix: [Some(reg), None, None, None, None, None, None, None],
-            rest: Uniform::new(Reg::i8(), Size::from_bytes(0)),
-            attrs: ArgAttributes {
-                regular: ArgAttribute::default(),
-                arg_ext: ArgExtension::None,
-                pointee_size: Size::ZERO,
-                pointee_align: None,
-            },
-        });
+        arg.cast_to(CastTarget::prefixed(
+            [Some(reg), None, None, None, None, None, None, None],
+            Uniform::new(Reg::i8(), Size::ZERO),
+        ));
    } else {
        arg.cast_to(Uniform::new(reg, size));
    }
@ -78,11 +72,10 @@ where
    };
    if arg.layout.size.bytes() / align_bytes == 1 {
        // Make sure we pass the struct as array at the LLVM IR level and not as a single integer.
-        arg.cast_to(CastTarget {
-            prefix: [Some(unit), None, None, None, None, None, None, None],
-            rest: Uniform::new(unit, Size::ZERO),
-            attrs: ArgAttributes::new(),
-        });
+        arg.cast_to(CastTarget::prefixed(
+            [Some(unit), None, None, None, None, None, None, None],
+            Uniform::new(unit, Size::ZERO),
+        ));
    } else {
        arg.cast_to(Uniform::new(unit, arg.layout.size));
    }
--- a/compiler/rustc_target/src/callconv/riscv.rs
+++ b/compiler/rustc_target/src/callconv/riscv.rs
@ -14,16 +14,16 @@ use crate::spec::HasTargetSpec;

 #[derive(Copy, Clone)]
 enum RegPassKind {
-    Float(Reg),
-    Integer(Reg),
+    Float { offset_from_start: Size, ty: Reg },
+    Integer { offset_from_start: Size, ty: Reg },
    Unknown,
 }

 #[derive(Copy, Clone)]
 enum FloatConv {
-    FloatPair(Reg, Reg),
+    FloatPair { first_ty: Reg, second_ty_offset_from_start: Size, second_ty: Reg },
    Float(Reg),
-    MixedPair(Reg, Reg),
+    MixedPair { first_ty: Reg, second_ty_offset_from_start: Size, second_ty: Reg },
 }

 #[derive(Copy, Clone)]
@ -43,6 +43,7 @@ fn should_use_fp_conv_helper<'a, Ty, C>(
    flen: u64,
    field1_kind: &mut RegPassKind,
    field2_kind: &mut RegPassKind,
+    offset_from_start: Size,
 ) -> Result<(), CannotUseFpConv>
 where
    Ty: TyAbiInterface<'a, C> + Copy,
@ -55,16 +56,16 @@ where
                }
                match (*field1_kind, *field2_kind) {
                    (RegPassKind::Unknown, _) => {
-                        *field1_kind = RegPassKind::Integer(Reg {
-                            kind: RegKind::Integer,
-                            size: arg_layout.size,
-                        });
+                        *field1_kind = RegPassKind::Integer {
+                            offset_from_start,
+                            ty: Reg { kind: RegKind::Integer, size: arg_layout.size },
+                        };
                    }
-                    (RegPassKind::Float(_), RegPassKind::Unknown) => {
-                        *field2_kind = RegPassKind::Integer(Reg {
-                            kind: RegKind::Integer,
-                            size: arg_layout.size,
-                        });
+                    (RegPassKind::Float { .. }, RegPassKind::Unknown) => {
+                        *field2_kind = RegPassKind::Integer {
+                            offset_from_start,
+                            ty: Reg { kind: RegKind::Integer, size: arg_layout.size },
+                        };
                    }
                    _ => return Err(CannotUseFpConv),
                }
@ -75,12 +76,16 @@ where
                }
                match (*field1_kind, *field2_kind) {
                    (RegPassKind::Unknown, _) => {
-                        *field1_kind =
-                            RegPassKind::Float(Reg { kind: RegKind::Float, size: arg_layout.size });
+                        *field1_kind = RegPassKind::Float {
+                            offset_from_start,
+                            ty: Reg { kind: RegKind::Float, size: arg_layout.size },
+                        };
                    }
                    (_, RegPassKind::Unknown) => {
-                        *field2_kind =
-                            RegPassKind::Float(Reg { kind: RegKind::Float, size: arg_layout.size });
+                        *field2_kind = RegPassKind::Float {
+                            offset_from_start,
+                            ty: Reg { kind: RegKind::Float, size: arg_layout.size },
+                        };
                    }
                    _ => return Err(CannotUseFpConv),
                }
@ -102,13 +107,14 @@ where
                            flen,
                            field1_kind,
                            field2_kind,
+                            offset_from_start,
                        );
                    }
                    return Err(CannotUseFpConv);
                }
            }
            FieldsShape::Array { count, .. } => {
-                for _ in 0..count {
+                for i in 0..count {
                    let elem_layout = arg_layout.field(cx, 0);
                    should_use_fp_conv_helper(
                        cx,
@ -117,6 +123,7 @@ where
                        flen,
                        field1_kind,
                        field2_kind,
+                        offset_from_start + elem_layout.size * i,
                    )?;
                }
            }
@ -127,7 +134,15 @@ where
                }
                for i in arg_layout.fields.index_by_increasing_offset() {
                    let field = arg_layout.field(cx, i);
-                    should_use_fp_conv_helper(cx, &field, xlen, flen, field1_kind, field2_kind)?;
+                    should_use_fp_conv_helper(
+                        cx,
+                        &field,
+                        xlen,
+                        flen,
+                        field1_kind,
+                        field2_kind,
+                        offset_from_start + arg_layout.fields.offset(i),
+                    )?;
                }
            }
        },
@ -146,14 +161,52 @@ where
 {
    let mut field1_kind = RegPassKind::Unknown;
    let mut field2_kind = RegPassKind::Unknown;
-    if should_use_fp_conv_helper(cx, arg, xlen, flen, &mut field1_kind, &mut field2_kind).is_err() {
+    if should_use_fp_conv_helper(
+        cx,
+        arg,
+        xlen,
+        flen,
+        &mut field1_kind,
+        &mut field2_kind,
+        Size::ZERO,
+    )
+    .is_err()
+    {
        return None;
    }
    match (field1_kind, field2_kind) {
-        (RegPassKind::Integer(l), RegPassKind::Float(r)) => Some(FloatConv::MixedPair(l, r)),
-        (RegPassKind::Float(l), RegPassKind::Integer(r)) => Some(FloatConv::MixedPair(l, r)),
-        (RegPassKind::Float(l), RegPassKind::Float(r)) => Some(FloatConv::FloatPair(l, r)),
-        (RegPassKind::Float(f), RegPassKind::Unknown) => Some(FloatConv::Float(f)),
+        (
+            RegPassKind::Integer { offset_from_start, .. }
+            | RegPassKind::Float { offset_from_start, .. },
+            _,
+        ) if offset_from_start != Size::ZERO => {
+            panic!("type {:?} has a first field with non-zero offset {offset_from_start:?}", arg.ty)
+        }
+        (
+            RegPassKind::Integer { ty: first_ty, .. },
+            RegPassKind::Float { offset_from_start, ty: second_ty },
+        ) => Some(FloatConv::MixedPair {
+            first_ty,
+            second_ty_offset_from_start: offset_from_start,
+            second_ty,
+        }),
+        (
+            RegPassKind::Float { ty: first_ty, .. },
+            RegPassKind::Integer { offset_from_start, ty: second_ty },
+        ) => Some(FloatConv::MixedPair {
+            first_ty,
+            second_ty_offset_from_start: offset_from_start,
+            second_ty,
+        }),
+        (
+            RegPassKind::Float { ty: first_ty, .. },
+            RegPassKind::Float { offset_from_start, ty: second_ty },
+        ) => Some(FloatConv::FloatPair {
+            first_ty,
+            second_ty_offset_from_start: offset_from_start,
+            second_ty,
+        }),
+        (RegPassKind::Float { ty, .. }, RegPassKind::Unknown) => Some(FloatConv::Float(ty)),
        _ => None,
    }
 }
@ -171,11 +224,19 @@ where
            FloatConv::Float(f) => {
                arg.cast_to(f);
            }
-            FloatConv::FloatPair(l, r) => {
-                arg.cast_to(CastTarget::pair(l, r));
+            FloatConv::FloatPair { first_ty, second_ty_offset_from_start, second_ty } => {
+                arg.cast_to(CastTarget::offset_pair(
+                    first_ty,
+                    second_ty_offset_from_start,
+                    second_ty,
+                ));
            }
-            FloatConv::MixedPair(l, r) => {
-                arg.cast_to(CastTarget::pair(l, r));
+            FloatConv::MixedPair { first_ty, second_ty_offset_from_start, second_ty } => {
+                arg.cast_to(CastTarget::offset_pair(
+                    first_ty,
+                    second_ty_offset_from_start,
+                    second_ty,
+                ));
            }
        }
        return false;
@ -239,15 +300,27 @@ fn classify_arg<'a, Ty, C>(
                arg.cast_to(f);
                return;
            }
-            Some(FloatConv::FloatPair(l, r)) if *avail_fprs >= 2 => {
+            Some(FloatConv::FloatPair { first_ty, second_ty_offset_from_start, second_ty })
+                if *avail_fprs >= 2 =>
+            {
                *avail_fprs -= 2;
-                arg.cast_to(CastTarget::pair(l, r));
+                arg.cast_to(CastTarget::offset_pair(
+                    first_ty,
+                    second_ty_offset_from_start,
+                    second_ty,
+                ));
                return;
            }
-            Some(FloatConv::MixedPair(l, r)) if *avail_fprs >= 1 && *avail_gprs >= 1 => {
+            Some(FloatConv::MixedPair { first_ty, second_ty_offset_from_start, second_ty })
+                if *avail_fprs >= 1 && *avail_gprs >= 1 =>
+            {
                *avail_gprs -= 1;
                *avail_fprs -= 1;
-                arg.cast_to(CastTarget::pair(l, r));
+                arg.cast_to(CastTarget::offset_pair(
+                    first_ty,
+                    second_ty_offset_from_start,
+                    second_ty,
+                ));
                return;
            }
            _ => (),
--- a/compiler/rustc_target/src/callconv/sparc64.rs
+++ b/compiler/rustc_target/src/callconv/sparc64.rs
@ -5,9 +5,7 @@ use rustc_abi::{
    TyAndLayout,
 };

-use crate::callconv::{
-    ArgAbi, ArgAttribute, ArgAttributes, ArgExtension, CastTarget, FnAbi, Uniform,
-};
+use crate::callconv::{ArgAbi, ArgAttribute, CastTarget, FnAbi, Uniform};
 use crate::spec::HasTargetSpec;

 #[derive(Clone, Debug)]
@ -197,16 +195,10 @@ where
                    rest_size = rest_size - Reg::i32().size;
                }

-                arg.cast_to(CastTarget {
-                    prefix: data.prefix,
-                    rest: Uniform::new(Reg::i64(), rest_size),
-                    attrs: ArgAttributes {
-                        regular: data.arg_attribute,
-                        arg_ext: ArgExtension::None,
-                        pointee_size: Size::ZERO,
-                        pointee_align: None,
-                    },
-                });
+                arg.cast_to(
+                    CastTarget::prefixed(data.prefix, Uniform::new(Reg::i64(), rest_size))
+                        .with_attrs(data.arg_attribute.into()),
+                );
                return;
            }
        }
--- a/tests/assembly/riscv-float-struct-abi.rs
+++ b/tests/assembly/riscv-float-struct-abi.rs
@ -0,0 +1,177 @@
+//@ add-core-stubs
+//@ assembly-output: emit-asm
+//@ compile-flags: -Copt-level=3 --target riscv64gc-unknown-linux-gnu
+//@ needs-llvm-components: riscv
+
+#![feature(no_core, lang_items)]
+#![no_std]
+#![no_core]
+#![crate_type = "lib"]
+
+extern crate minicore;
+use minicore::*;
+
+#[repr(C, align(64))]
+struct Aligned(f64);
+
+#[repr(C)]
+struct Padded(u8, Aligned);
+
+#[repr(C, packed)]
+struct Packed(u8, f32);
+
+impl Copy for Aligned {}
+impl Copy for Padded {}
+impl Copy for Packed {}
+
+extern "C" {
+    fn take_padded(x: Padded);
+    fn get_padded() -> Padded;
+    fn take_packed(x: Packed);
+    fn get_packed() -> Packed;
+}
+
+// CHECK-LABEL: pass_padded
+#[unsafe(no_mangle)]
+extern "C" fn pass_padded(out: &mut Padded, x: Padded) {
+    // CHECK: sb a1, 0(a0)
+    // CHECK-NEXT: fsd fa0, 64(a0)
+    // CHECK-NEXT: ret
+    *out = x;
+}
+
+// CHECK-LABEL: ret_padded
+#[unsafe(no_mangle)]
+extern "C" fn ret_padded(x: &Padded) -> Padded {
+    // CHECK: fld fa0, 64(a0)
+    // CHECK-NEXT: lbu a0, 0(a0)
+    // CHECK-NEXT: ret
+    *x
+}
+
+#[unsafe(no_mangle)]
+extern "C" fn call_padded(x: &Padded) {
+    // CHECK: fld fa0, 64(a0)
+    // CHECK-NEXT: lbu a0, 0(a0)
+    // CHECK-NEXT: tail take_padded
+    unsafe {
+        take_padded(*x);
+    }
+}
+
+#[unsafe(no_mangle)]
+extern "C" fn receive_padded(out: &mut Padded) {
+    // CHECK: addi sp, sp, -16
+    // CHECK-NEXT: .cfi_def_cfa_offset 16
+    // CHECK-NEXT: sd ra, [[#%d,RA_SPILL:]](sp)
+    // CHECK-NEXT: sd [[TEMP:.*]], [[#%d,TEMP_SPILL:]](sp)
+    // CHECK-NEXT: .cfi_offset ra, [[#%d,RA_SPILL - 16]]
+    // CHECK-NEXT: .cfi_offset [[TEMP]], [[#%d,TEMP_SPILL - 16]]
+    // CHECK-NEXT: mv [[TEMP]], a0
+    // CHECK-NEXT: call get_padded
+    // CHECK-NEXT: sb a0, 0([[TEMP]])
+    // CHECK-NEXT: fsd fa0, 64([[TEMP]])
+    // CHECK-NEXT: ld ra, [[#%d,RA_SPILL]](sp)
+    // CHECK-NEXT: ld [[TEMP]], [[#%d,TEMP_SPILL]](sp)
+    // CHECK: addi sp, sp, 16
+    // CHECK: ret
+    unsafe {
+        *out = get_padded();
+    }
+}
+
+// CHECK-LABEL: pass_packed
+#[unsafe(no_mangle)]
+extern "C" fn pass_packed(out: &mut Packed, x: Packed) {
+    // CHECK: addi sp, sp, -16
+    // CHECK-NEXT: .cfi_def_cfa_offset 16
+    // CHECK-NEXT: sb a1, 0(a0)
+    // CHECK-NEXT: fsw fa0, 8(sp)
+    // CHECK-NEXT: lw [[VALUE:.*]], 8(sp)
+    // CHECK-DAG: srli [[BYTE4:.*]], [[VALUE]], 24
+    // CHECK-DAG: srli [[BYTE3:.*]], [[VALUE]], 16
+    // CHECK-DAG: srli [[BYTE2:.*]], [[VALUE]], 8
+    // CHECK-DAG: sb [[VALUE]], 1(a0)
+    // CHECK-DAG: sb [[BYTE2]], 2(a0)
+    // CHECK-DAG: sb [[BYTE3]], 3(a0)
+    // CHECK-DAG: sb [[BYTE4]], 4(a0)
+    // CHECK-NEXT: addi sp, sp, 16
+    // CHECK: ret
+    *out = x;
+}
+
+// CHECK-LABEL: ret_packed
+#[unsafe(no_mangle)]
+extern "C" fn ret_packed(x: &Packed) -> Packed {
+    // CHECK: addi sp, sp, -16
+    // CHECK-NEXT: .cfi_def_cfa_offset 16
+    // CHECK-NEXT: lbu [[BYTE2:.*]], 2(a0)
+    // CHECK-NEXT: lbu [[BYTE1:.*]], 1(a0)
+    // CHECK-NEXT: lbu [[BYTE3:.*]], 3(a0)
+    // CHECK-NEXT: lbu [[BYTE4:.*]], 4(a0)
+    // CHECK-NEXT: slli [[SHIFTED2:.*]], [[BYTE2]], 8
+    // CHECK-NEXT: or [[BYTE12:.*]], [[SHIFTED2]], [[BYTE1]]
+    // CHECK-NEXT: slli [[SHIFTED3:.*]], [[BYTE3]], 16
+    // CHECK-NEXT: slli [[SHIFTED4:.*]], [[BYTE4]], 24
+    // CHECK-NEXT: or [[BYTE34:.*]], [[SHIFTED3]], [[SHIFTED4]]
+    // CHECK-NEXT: or [[VALUE:.*]], [[BYTE12]], [[BYTE34]]
+    // CHECK-NEXT: sw [[VALUE]], 8(sp)
+    // CHECK-NEXT: flw fa0, 8(sp)
+    // CHECK-NEXT: lbu a0, 0(a0)
+    // CHECK-NEXT: addi sp, sp, 16
+    // CHECK: ret
+    *x
+}
+
+#[unsafe(no_mangle)]
+extern "C" fn call_packed(x: &Packed) {
+    // CHECK: addi sp, sp, -16
+    // CHECK-NEXT: .cfi_def_cfa_offset 16
+    // CHECK-NEXT: lbu [[BYTE2:.*]], 2(a0)
+    // CHECK-NEXT: lbu [[BYTE1:.*]], 1(a0)
+    // CHECK-NEXT: lbu [[BYTE3:.*]], 3(a0)
+    // CHECK-NEXT: lbu [[BYTE4:.*]], 4(a0)
+    // CHECK-NEXT: slli [[SHIFTED2:.*]], [[BYTE2]], 8
+    // CHECK-NEXT: or [[BYTE12:.*]], [[SHIFTED2]], [[BYTE1]]
+    // CHECK-NEXT: slli [[SHIFTED3:.*]], [[BYTE3]], 16
+    // CHECK-NEXT: slli [[SHIFTED4:.*]], [[BYTE4]], 24
+    // CHECK-NEXT: or [[BYTE34:.*]], [[SHIFTED3]], [[SHIFTED4]]
+    // CHECK-NEXT: or [[VALUE:.*]], [[BYTE12]], [[BYTE34]]
+    // CHECK-NEXT: sw [[VALUE]], 8(sp)
+    // CHECK-NEXT: flw fa0, 8(sp)
+    // CHECK-NEXT: lbu a0, 0(a0)
+    // CHECK-NEXT: addi sp, sp, 16
+    // CHECK: tail take_packed
+    unsafe {
+        take_packed(*x);
+    }
+}
+
+#[unsafe(no_mangle)]
+extern "C" fn receive_packed(out: &mut Packed) {
+    // CHECK: addi sp, sp, -32
+    // CHECK-NEXT: .cfi_def_cfa_offset 32
+    // CHECK-NEXT: sd ra, [[#%d,RA_SPILL:]](sp)
+    // CHECK-NEXT: sd [[TEMP:.*]], [[#%d,TEMP_SPILL:]](sp)
+    // CHECK-NEXT: .cfi_offset ra, [[#%d,RA_SPILL - 32]]
+    // CHECK-NEXT: .cfi_offset [[TEMP]], [[#%d,TEMP_SPILL - 32]]
+    // CHECK-NEXT: mv [[TEMP]], a0
+    // CHECK-NEXT: call get_packed
+    // CHECK-NEXT: sb a0, 0([[TEMP]])
+    // CHECK-NEXT: fsw fa0, [[FLOAT_SPILL:.*]](sp)
+    // CHECK-NEXT: lw [[VALUE:.*]], [[FLOAT_SPILL]](sp)
+    // CHECK-DAG: srli [[BYTE4:.*]], [[VALUE]], 24
+    // CHECK-DAG: srli [[BYTE3:.*]], [[VALUE]], 16
+    // CHECK-DAG: srli [[BYTE2:.*]], [[VALUE]], 8
+    // CHECK-DAG: sb [[VALUE]], 1([[TEMP]])
+    // CHECK-DAG: sb [[BYTE2]], 2([[TEMP]])
+    // CHECK-DAG: sb [[BYTE3]], 3([[TEMP]])
+    // CHECK-DAG: sb [[BYTE4]], 4([[TEMP]])
+    // CHECK-NEXT: ld ra, [[#%d,RA_SPILL]](sp)
+    // CHECK-NEXT: ld [[TEMP]], [[#%d,TEMP_SPILL]](sp)
+    // CHECK: addi sp, sp, 32
+    // CHECK: ret
+    unsafe {
+        *out = get_packed();
+    }
+}
--- a/tests/codegen/riscv-abi/cast-local-large-enough.rs
+++ b/tests/codegen/riscv-abi/cast-local-large-enough.rs
@ -0,0 +1,44 @@
+//@ add-core-stubs
+//@ compile-flags: -Copt-level=0 -Cdebuginfo=0 --target riscv64gc-unknown-linux-gnu
+//@ needs-llvm-components: riscv
+
+#![feature(no_core, lang_items)]
+#![no_std]
+#![no_core]
+#![crate_type = "lib"]
+
+extern crate minicore;
+use minicore::*;
+
+#[repr(C, align(64))]
+struct Aligned(f64);
+
+#[repr(C, align(64))]
+struct AlignedPair(f32, f64);
+
+impl Copy for Aligned {}
+impl Copy for AlignedPair {}
+
+// CHECK-LABEL: define double @read_aligned
+#[unsafe(no_mangle)]
+pub extern "C" fn read_aligned(x: &Aligned) -> Aligned {
+    // CHECK: %[[TEMP:.*]] = alloca [64 x i8], align 64
+    // CHECK-NEXT: call void @llvm.memcpy.p0.p0.i64(ptr align 64 %[[TEMP]], ptr align 64 %[[PTR:.*]], i64 64, i1 false)
+    // CHECK-NEXT: %[[RES:.*]] = load double, ptr %[[TEMP]], align 64
+    // CHECK-NEXT: ret double %[[RES]]
+    *x
+}
+
+// CHECK-LABEL: define { float, double } @read_aligned_pair
+#[unsafe(no_mangle)]
+pub extern "C" fn read_aligned_pair(x: &AlignedPair) -> AlignedPair {
+    // CHECK: %[[TEMP:.*]] = alloca [64 x i8], align 64
+    // CHECK-NEXT: call void @llvm.memcpy.p0.p0.i64(ptr align 64 %[[TEMP]], ptr align 64 %[[PTR:.*]], i64 64, i1 false)
+    // CHECK-NEXT: %[[FIRST:.*]] = load float, ptr %[[TEMP]], align 64
+    // CHECK-NEXT: %[[SECOND_PTR:.*]] = getelementptr inbounds i8, ptr %[[TEMP]], i64 8
+    // CHECK-NEXT: %[[SECOND:.*]] = load double, ptr %[[SECOND_PTR]], align 8
+    // CHECK-NEXT: %[[RES1:.*]] = insertvalue { float, double } poison, float %[[FIRST]], 0
+    // CHECK-NEXT: %[[RES2:.*]] = insertvalue { float, double } %[[RES1]], double %[[SECOND]], 1
+    // CHECK-NEXT: ret { float, double } %[[RES2]]
+    *x
+}
--- a/tests/ui/abi/numbers-arithmetic/float-struct.rs
+++ b/tests/ui/abi/numbers-arithmetic/float-struct.rs
@ -0,0 +1,44 @@
+//@ run-pass
+
+use std::fmt::Debug;
+use std::hint::black_box;
+
+#[repr(C)]
+#[derive(Copy, Clone, PartialEq, Debug, Default)]
+struct Regular(f32, f64);
+
+#[repr(C, packed)]
+#[derive(Copy, Clone, PartialEq, Debug, Default)]
+struct Packed(f32, f64);
+
+#[repr(C, align(64))]
+#[derive(Copy, Clone, PartialEq, Debug, Default)]
+struct AlignedF32(f32);
+
+#[repr(C)]
+#[derive(Copy, Clone, PartialEq, Debug, Default)]
+struct Aligned(f64, AlignedF32);
+
+#[inline(never)]
+extern "C" fn read<T: Copy>(x: &T) -> T {
+    *black_box(x)
+}
+
+#[inline(never)]
+extern "C" fn write<T: Copy>(x: T, dest: &mut T) {
+    *dest = black_box(x)
+}
+
+#[track_caller]
+fn check<T: Copy + PartialEq + Debug + Default>(x: T) {
+    assert_eq!(read(&x), x);
+    let mut out = T::default();
+    write(x, &mut out);
+    assert_eq!(out, x);
+}
+
+fn main() {
+    check(Regular(1.0, 2.0));
+    check(Packed(3.0, 4.0));
+    check(Aligned(5.0, AlignedF32(6.0)));
+}