Auto merge of #141309 - RalfJung:x86-simd-abi, r=tgross35,nikic,workingjubilee
x86 (32/64): go back to passing SIMD vectors by-ptr Fixes https://github.com/rust-lang/rust/issues/139029 by partially reverting https://github.com/rust-lang/rust/pull/135408 and going back to passing SIMD vectors by-ptr on x86. Sadly, by-val confuses the LLVM inliner so much that it's not worth it... Also fixes https://github.com/rust-lang/rust/issues/141848 by no longer actually using vector registers with the "Rust" ABI. r? `@tgross35` Cc `@nikic` try-job: `test-various*` try-job: dist-i586-gnu-i586-i686-musl try-job: x86_64-gnu-nopt try-job: `x86_64-msvc*` try-job: `i686-msvc*`
This commit is contained in:
commit
ff223d35cd
5 changed files with 34 additions and 51 deletions
|
|
@ -35,8 +35,6 @@ fn do_check_simd_vector_abi<'tcx>(
|
|||
is_call: bool,
|
||||
loc: impl Fn() -> (Span, HirId),
|
||||
) {
|
||||
// We check this on all functions, including those using the "Rust" ABI.
|
||||
// For the "Rust" ABI it would be a bug if the lint ever triggered, but better safe than sorry.
|
||||
let feature_def = tcx.sess.target.features_for_correct_vector_abi();
|
||||
let codegen_attrs = tcx.codegen_fn_attrs(def_id);
|
||||
let have_feature = |feat: Symbol| {
|
||||
|
|
@ -123,8 +121,9 @@ fn do_check_wasm_abi<'tcx>(
|
|||
is_call: bool,
|
||||
loc: impl Fn() -> (Span, HirId),
|
||||
) {
|
||||
// Only proceed for `extern "C" fn` on wasm32-unknown-unknown (same check as what `adjust_for_foreign_abi` uses to call `compute_wasm_abi_info`),
|
||||
// and only proceed if `wasm_c_abi_opt` indicates we should emit the lint.
|
||||
// Only proceed for `extern "C" fn` on wasm32-unknown-unknown (same check as what
|
||||
// `adjust_for_foreign_abi` uses to call `compute_wasm_abi_info`), and only proceed if
|
||||
// `wasm_c_abi_opt` indicates we should emit the lint.
|
||||
if !(tcx.sess.target.arch == "wasm32"
|
||||
&& tcx.sess.target.os == "unknown"
|
||||
&& tcx.wasm_c_abi_opt() == WasmCAbi::Legacy { with_lint: true }
|
||||
|
|
@ -157,8 +156,15 @@ fn check_instance_abi<'tcx>(tcx: TyCtxt<'tcx>, instance: Instance<'tcx>) {
|
|||
else {
|
||||
// An error will be reported during codegen if we cannot determine the ABI of this
|
||||
// function.
|
||||
tcx.dcx().delayed_bug("ABI computation failure should lead to compilation failure");
|
||||
return;
|
||||
};
|
||||
// Unlike the call-site check, we do also check "Rust" ABI functions here.
|
||||
// This should never trigger, *except* if we start making use of vector registers
|
||||
// for the "Rust" ABI and the user disables those vector registers (which should trigger a
|
||||
// warning as that's clearly disabling a "required" target feature for this target).
|
||||
// Using such a function is where disabling the vector register actually can start leading
|
||||
// to soundness issues, so erroring here seems good.
|
||||
let loc = || {
|
||||
let def_id = instance.def_id();
|
||||
(
|
||||
|
|
@ -179,7 +185,8 @@ fn check_call_site_abi<'tcx>(
|
|||
loc: impl Fn() -> (Span, HirId) + Copy,
|
||||
) {
|
||||
if callee.fn_sig(tcx).abi().is_rustic_abi() {
|
||||
// we directly handle the soundness of Rust ABIs
|
||||
// We directly handle the soundness of Rust ABIs -- so let's skip the majority of
|
||||
// call sites to avoid a perf regression.
|
||||
return;
|
||||
}
|
||||
let typing_env = ty::TypingEnv::fully_monomorphized();
|
||||
|
|
|
|||
|
|
@ -7,7 +7,7 @@ use rustc_abi::{
|
|||
use rustc_macros::HashStable_Generic;
|
||||
|
||||
pub use crate::spec::AbiMap;
|
||||
use crate::spec::{HasTargetSpec, HasWasmCAbiOpt, HasX86AbiOpt, RustcAbi, WasmCAbi};
|
||||
use crate::spec::{HasTargetSpec, HasWasmCAbiOpt, HasX86AbiOpt, WasmCAbi};
|
||||
|
||||
mod aarch64;
|
||||
mod amdgpu;
|
||||
|
|
@ -696,24 +696,6 @@ impl<'a, Ty> FnAbi<'a, Ty> {
|
|||
_ => {}
|
||||
};
|
||||
|
||||
// Decides whether we can pass the given SIMD argument via `PassMode::Direct`.
|
||||
// May only return `true` if the target will always pass those arguments the same way,
|
||||
// no matter what the user does with `-Ctarget-feature`! In other words, whatever
|
||||
// target features are required to pass a SIMD value in registers must be listed in
|
||||
// the `abi_required_features` for the current target and ABI.
|
||||
let can_pass_simd_directly = |arg: &ArgAbi<'_, Ty>| match &*spec.arch {
|
||||
// On x86, if we have SSE2 (which we have by default for x86_64), we can always pass up
|
||||
// to 128-bit-sized vectors.
|
||||
"x86" if spec.rustc_abi == Some(RustcAbi::X86Sse2) => arg.layout.size.bits() <= 128,
|
||||
"x86_64" if spec.rustc_abi != Some(RustcAbi::X86Softfloat) => {
|
||||
// FIXME once https://github.com/bytecodealliance/wasmtime/issues/10254 is fixed
|
||||
// accept vectors up to 128bit rather than vectors of exactly 128bit.
|
||||
arg.layout.size.bits() == 128
|
||||
}
|
||||
// So far, we haven't implemented this logic for any other target.
|
||||
_ => false,
|
||||
};
|
||||
|
||||
for (arg_idx, arg) in self
|
||||
.args
|
||||
.iter_mut()
|
||||
|
|
@ -813,9 +795,10 @@ impl<'a, Ty> FnAbi<'a, Ty> {
|
|||
// target feature sets. Some more information about this
|
||||
// issue can be found in #44367.
|
||||
//
|
||||
// Note that the intrinsic ABI is exempt here as those are not
|
||||
// real functions anyway, and the backend expects very specific types.
|
||||
if spec.simd_types_indirect && !can_pass_simd_directly(arg) {
|
||||
// We *could* do better in some cases, e.g. on x86_64 targets where SSE2 is
|
||||
// required. However, it turns out that that makes LLVM worse at optimizing this
|
||||
// code, so we pass things indirectly even there. See #139029 for more on that.
|
||||
if spec.simd_types_indirect {
|
||||
arg.make_indirect();
|
||||
}
|
||||
}
|
||||
|
|
|
|||
|
|
@ -27,8 +27,9 @@ trait Copy {}
|
|||
#[repr(simd)]
|
||||
pub struct Sse([f32; 4]);
|
||||
|
||||
// x86-64: <4 x float> @sse_id(<4 x float> {{[^,]*}})
|
||||
// x86-32: <4 x float> @sse_id(<4 x float> {{[^,]*}})
|
||||
// FIXME: due to #139029 we are passing them all indirectly.
|
||||
// x86-64: void @sse_id(ptr{{( [^,]*)?}} sret([16 x i8]){{( .*)?}}, ptr{{( [^,]*)?}})
|
||||
// x86-32: void @sse_id(ptr{{( [^,]*)?}} sret([16 x i8]){{( .*)?}}, ptr{{( [^,]*)?}})
|
||||
// x86-32-nosse: void @sse_id(ptr{{( [^,]*)?}} sret([16 x i8]){{( .*)?}}, ptr{{( [^,]*)?}})
|
||||
#[no_mangle]
|
||||
pub fn sse_id(x: Sse) -> Sse {
|
||||
|
|
|
|||
|
|
@ -1,14 +1,8 @@
|
|||
//
|
||||
//@ compile-flags: -C no-prepopulate-passes
|
||||
// LLVM IR isn't very portable and the one tested here depends on the ABI
|
||||
// which is different between x86 (where we use SSE registers) and others.
|
||||
// `x86-64` and `x86-32-sse2` are identical, but compiletest does not support
|
||||
// taking the union of multiple `only` annotations.
|
||||
//@ revisions: x86-64 x86-32-sse2 other
|
||||
//@[x86-64] only-x86_64
|
||||
//@[x86-32-sse2] only-rustc_abi-x86-sse2
|
||||
//@[other] ignore-rustc_abi-x86-sse2
|
||||
//@[other] ignore-x86_64
|
||||
// 32bit MSVC does not align things properly so we suppress high alignment annotations (#112480)
|
||||
//@ ignore-i686-pc-windows-msvc
|
||||
//@ ignore-i686-pc-windows-gnu
|
||||
|
||||
#![crate_type = "lib"]
|
||||
#![allow(non_camel_case_types)]
|
||||
|
|
@ -47,9 +41,7 @@ pub fn build_array_s(x: [f32; 4]) -> S<4> {
|
|||
#[no_mangle]
|
||||
pub fn build_array_transmute_s(x: [f32; 4]) -> S<4> {
|
||||
// CHECK: %[[VAL:.+]] = load <4 x float>, ptr %x, align [[ARRAY_ALIGN]]
|
||||
// x86-32: ret <4 x float> %[[VAL:.+]]
|
||||
// x86-64: ret <4 x float> %[[VAL:.+]]
|
||||
// other: store <4 x float> %[[VAL:.+]], ptr %_0, align [[VECTOR_ALIGN]]
|
||||
// CHECK: store <4 x float> %[[VAL:.+]], ptr %_0, align [[VECTOR_ALIGN]]
|
||||
unsafe { std::mem::transmute(x) }
|
||||
}
|
||||
|
||||
|
|
@ -64,8 +56,6 @@ pub fn build_array_t(x: [f32; 4]) -> T {
|
|||
#[no_mangle]
|
||||
pub fn build_array_transmute_t(x: [f32; 4]) -> T {
|
||||
// CHECK: %[[VAL:.+]] = load <4 x float>, ptr %x, align [[ARRAY_ALIGN]]
|
||||
// x86-32: ret <4 x float> %[[VAL:.+]]
|
||||
// x86-64: ret <4 x float> %[[VAL:.+]]
|
||||
// other: store <4 x float> %[[VAL:.+]], ptr %_0, align [[VECTOR_ALIGN]]
|
||||
// CHECK: store <4 x float> %[[VAL:.+]], ptr %_0, align [[VECTOR_ALIGN]]
|
||||
unsafe { std::mem::transmute(x) }
|
||||
}
|
||||
|
|
|
|||
|
|
@ -30,16 +30,18 @@ fn load<T, const N: usize>(v: PackedSimd<T, N>) -> FullSimd<T, N> {
|
|||
}
|
||||
}
|
||||
|
||||
// CHECK-LABEL: define <3 x float> @square_packed_full(ptr{{[a-z_ ]*}} align 4 {{[^,]*}})
|
||||
// CHECK-LABEL: square_packed_full
|
||||
// CHECK-SAME: ptr{{[a-z_ ]*}} sret([[RET_TYPE:[^)]+]]) [[RET_ALIGN:align (8|16)]]{{[^%]*}} [[RET_VREG:%[_0-9]*]]
|
||||
// CHECK-SAME: ptr{{[a-z_ ]*}} align 4
|
||||
#[no_mangle]
|
||||
pub fn square_packed_full(x: PackedSimd<f32, 3>) -> FullSimd<f32, 3> {
|
||||
// The unoptimized version of this is not very interesting to check
|
||||
// since `load` does not get inlined.
|
||||
// opt3-NEXT: start:
|
||||
// opt3-NEXT: load <3 x float>
|
||||
// CHECK-NEXT: start
|
||||
// noopt: alloca [[RET_TYPE]], [[RET_ALIGN]]
|
||||
// CHECK: load <3 x float>
|
||||
let x = load(x);
|
||||
// opt3-NEXT: [[VREG:%[a-z0-9_]+]] = fmul <3 x float>
|
||||
// opt3-NEXT: ret <3 x float> [[VREG:%[a-z0-9_]+]]
|
||||
// CHECK: [[VREG:%[a-z0-9_]+]] = fmul <3 x float>
|
||||
// CHECK-NEXT: store <3 x float> [[VREG]], ptr [[RET_VREG]], [[RET_ALIGN]]
|
||||
// CHECK-NEXT: ret void
|
||||
unsafe { intrinsics::simd_mul(x, x) }
|
||||
}
|
||||
|
||||
|
|
|
|||
Loading…
Add table
Add a link
Reference in a new issue