Merge pull request #1903 from folkertdev/s390x-llvm-21-fixes

`s390x` llvm 21 improvements
This commit is contained in:
Amanieu d'Antras 2025-08-21 20:31:06 +00:00 committed by GitHub
commit b2189b8ff6
No known key found for this signature in database
GPG key ID: B5690EEEBB952194

View file

@ -94,8 +94,6 @@ unsafe extern "unadjusted" {
#[link_name = "llvm.s390.vsrlb"] fn vsrlb(a: vector_signed_char, b: vector_signed_char) -> vector_signed_char;
#[link_name = "llvm.s390.vslb"] fn vslb(a: vector_signed_char, b: vector_signed_char) -> vector_signed_char;
#[link_name = "llvm.s390.vsldb"] fn vsldb(a: i8x16, b: i8x16, c: u32) -> i8x16;
#[link_name = "llvm.s390.vsld"] fn vsld(a: i8x16, b: i8x16, c: u32) -> i8x16;
#[link_name = "llvm.s390.vsrd"] fn vsrd(a: i8x16, b: i8x16, c: u32) -> i8x16;
#[link_name = "llvm.s390.verimb"] fn verimb(a: vector_signed_char, b: vector_signed_char, c: vector_signed_char, d: i32) -> vector_signed_char;
@ -169,13 +167,6 @@ unsafe extern "unadjusted" {
#[link_name = "llvm.s390.vpklsfs"] fn vpklsfs(a: vector_unsigned_int, b: vector_unsigned_int) -> PackedTuple<vector_unsigned_short, i32>;
#[link_name = "llvm.s390.vpklsgs"] fn vpklsgs(a: vector_unsigned_long_long, b: vector_unsigned_long_long) -> PackedTuple<vector_unsigned_int, i32>;
#[link_name = "llvm.s390.vuplb"] fn vuplb (a: vector_signed_char) -> vector_signed_short;
#[link_name = "llvm.s390.vuplhw"] fn vuplhw (a: vector_signed_short) -> vector_signed_int;
#[link_name = "llvm.s390.vuplf"] fn vuplf (a: vector_signed_int) -> vector_signed_long_long;
#[link_name = "llvm.s390.vupllb"] fn vupllb (a: vector_unsigned_char) -> vector_unsigned_short;
#[link_name = "llvm.s390.vupllh"] fn vupllh (a: vector_unsigned_short) -> vector_unsigned_int;
#[link_name = "llvm.s390.vupllf"] fn vupllf (a: vector_unsigned_int) -> vector_unsigned_long_long;
#[link_name = "llvm.s390.vavgb"] fn vavgb(a: vector_signed_char, b: vector_signed_char) -> vector_signed_char;
#[link_name = "llvm.s390.vavgh"] fn vavgh(a: vector_signed_short, b: vector_signed_short) -> vector_signed_short;
#[link_name = "llvm.s390.vavgf"] fn vavgf(a: vector_signed_int, b: vector_signed_int) -> vector_signed_int;
@ -188,22 +179,6 @@ unsafe extern "unadjusted" {
#[link_name = "llvm.s390.vcksm"] fn vcksm(a: vector_unsigned_int, b: vector_unsigned_int) -> vector_unsigned_int;
#[link_name = "llvm.s390.vmeb"] fn vmeb(a: vector_signed_char, b: vector_signed_char) -> vector_signed_short;
#[link_name = "llvm.s390.vmeh"] fn vmeh(a: vector_signed_short, b: vector_signed_short) -> vector_signed_int;
#[link_name = "llvm.s390.vmef"] fn vmef(a: vector_signed_int, b: vector_signed_int) -> vector_signed_long_long;
#[link_name = "llvm.s390.vmleb"] fn vmleb(a: vector_unsigned_char, b: vector_unsigned_char) -> vector_unsigned_short;
#[link_name = "llvm.s390.vmleh"] fn vmleh(a: vector_unsigned_short, b: vector_unsigned_short) -> vector_unsigned_int;
#[link_name = "llvm.s390.vmlef"] fn vmlef(a: vector_unsigned_int, b: vector_unsigned_int) -> vector_unsigned_long_long;
#[link_name = "llvm.s390.vmob"] fn vmob(a: vector_signed_char, b: vector_signed_char) -> vector_signed_short;
#[link_name = "llvm.s390.vmoh"] fn vmoh(a: vector_signed_short, b: vector_signed_short) -> vector_signed_int;
#[link_name = "llvm.s390.vmof"] fn vmof(a: vector_signed_int, b: vector_signed_int) -> vector_signed_long_long;
#[link_name = "llvm.s390.vmlob"] fn vmlob(a: vector_unsigned_char, b: vector_unsigned_char) -> vector_unsigned_short;
#[link_name = "llvm.s390.vmloh"] fn vmloh(a: vector_unsigned_short, b: vector_unsigned_short) -> vector_unsigned_int;
#[link_name = "llvm.s390.vmlof"] fn vmlof(a: vector_unsigned_int, b: vector_unsigned_int) -> vector_unsigned_long_long;
#[link_name = "llvm.s390.vmhb"] fn vmhb(a: vector_signed_char, b: vector_signed_char) -> vector_signed_char;
#[link_name = "llvm.s390.vmhh"] fn vmhh(a: vector_signed_short, b: vector_signed_short) -> vector_signed_short;
#[link_name = "llvm.s390.vmhf"] fn vmhf(a: vector_signed_int, b: vector_signed_int) -> vector_signed_int;
@ -379,7 +354,20 @@ impl<const N: usize> ShuffleMask<N> {
ShuffleMask(mask)
}
const fn pack() -> Self {
const fn even() -> Self {
let mut mask = [0; N];
let mut i = 0;
let mut index = 0;
while index < N {
mask[index] = i as u32;
i += 2;
index += 1;
}
ShuffleMask(mask)
}
const fn odd() -> Self {
let mut mask = [0; N];
let mut i = 1;
let mut index = 0;
@ -392,6 +380,10 @@ impl<const N: usize> ShuffleMask<N> {
ShuffleMask(mask)
}
const fn pack() -> Self {
Self::odd()
}
const fn unpack_low() -> Self {
let mut mask = [0; N];
let mut i = 0;
@ -1201,10 +1193,8 @@ mod sealed {
test_impl! { vec_roundc_f32 (a: vector_float) -> vector_float [nearbyint_v4f32, "vector-enhancements-1" vfisb] }
test_impl! { vec_roundc_f64 (a: vector_double) -> vector_double [nearbyint_v2f64, vfidb] }
// FIXME(llvm) llvm trunk already lowers roundeven to vfidb, but rust does not use it yet
// use https://godbolt.org/z/cWq95fexe to check, and enable the instruction test when it works
test_impl! { vec_round_f32 (a: vector_float) -> vector_float [roundeven_v4f32, _] }
test_impl! { vec_round_f64 (a: vector_double) -> vector_double [roundeven_v2f64, _] }
test_impl! { vec_round_f32 (a: vector_float) -> vector_float [roundeven_v4f32, "vector-enhancements-1" vfisb] }
test_impl! { vec_round_f64 (a: vector_double) -> vector_double [roundeven_v2f64, vfidb] }
#[unstable(feature = "stdarch_s390x", issue = "135681")]
pub trait VectorRoundc {
@ -2362,6 +2352,9 @@ mod sealed {
unsafe fn vec_packs(self, b: Other) -> Self::Result;
}
// FIXME(llvm): https://github.com/llvm/llvm-project/issues/153655
// Other targets can use a min/max for the saturation + a truncation.
impl_vec_trait! { [VectorPacks vec_packs] vpksh (vector_signed_short, vector_signed_short) -> vector_signed_char }
impl_vec_trait! { [VectorPacks vec_packs] vpklsh (vector_unsigned_short, vector_unsigned_short) -> vector_unsigned_char }
impl_vec_trait! { [VectorPacks vec_packs] vpksf (vector_signed_int, vector_signed_int) -> vector_signed_short }
@ -2583,8 +2576,14 @@ mod sealed {
unsafe fn vec_unpackl(self) -> Self::Result;
}
// FIXME(llvm): a shuffle + simd_as does not currently optimize into a single instruction like
// unpachk above. Tracked in https://github.com/llvm/llvm-project/issues/129576.
// NOTE: `vuplh` is used for "unpack logical high", hence `vuplhw`.
impl_vec_unpack!(unpack_low vuplb vector_signed_char i8x8 vector_signed_short 8);
impl_vec_unpack!(unpack_low vuplhw vector_signed_short i16x4 vector_signed_int 4);
impl_vec_unpack!(unpack_low vuplf vector_signed_int i32x2 vector_signed_long_long 2);
impl_vec_unpack!(unpack_low vupllb vector_unsigned_char u8x8 vector_unsigned_short 8);
impl_vec_unpack!(unpack_low vupllh vector_unsigned_short u16x4 vector_unsigned_int 4);
impl_vec_unpack!(unpack_low vupllf vector_unsigned_int u32x2 vector_unsigned_long_long 2);
impl_vec_trait! {[VectorUnpackl vec_unpackl] vuplb (vector_signed_char) -> vector_signed_short}
impl_vec_trait! {[VectorUnpackl vec_unpackl] vuplhw (vector_signed_short) -> vector_signed_int}
@ -2645,61 +2644,65 @@ mod sealed {
unsafe fn vec_mule(self, b: Self) -> Result;
}
// FIXME(llvm) sadly this does not yet work https://github.com/llvm/llvm-project/issues/129705
// #[target_feature(enable = "vector")]
// #[cfg_attr(test, assert_instr(vmleh))]
// unsafe fn vec_vmleh(a: vector_unsigned_short, b: vector_unsigned_short) -> vector_unsigned_int {
// let even_a: vector_unsigned_int = simd_as(simd_shuffle::<_, _, u16x4>(
// a,
// a,
// const { ShuffleMask([0, 2, 4, 6]) },
// ));
//
// let even_b: vector_unsigned_int = simd_as(simd_shuffle::<_, _, u16x4>(
// b,
// b,
// const { ShuffleMask([0, 2, 4, 6]) },
// ));
//
// simd_mul(even_a, even_b)
// }
macro_rules! impl_vec_mul_even_odd {
($mask:ident $instr:ident $src:ident $shuffled:ident $dst:ident $width:literal) => {
#[inline]
#[target_feature(enable = "vector")]
#[cfg_attr(test, assert_instr($instr))]
unsafe fn $instr(a: $src, b: $src) -> $dst {
let elems_a: $dst = simd_as(simd_shuffle::<_, _, $shuffled>(
a,
a, // this argument is ignored entirely.
const { ShuffleMask::<$width>::$mask() },
));
test_impl! { vec_vmeb(a: vector_signed_char, b: vector_signed_char) -> vector_signed_short [ vmeb, vmeb ] }
test_impl! { vec_vmeh(a: vector_signed_short, b: vector_signed_short) -> vector_signed_int[ vmeh, vmeh ] }
test_impl! { vec_vmef(a: vector_signed_int, b: vector_signed_int) -> vector_signed_long_long [ vmef, vmef ] }
let elems_b: $dst = simd_as(simd_shuffle::<_, _, $shuffled>(
b,
b, // this argument is ignored entirely.
const { ShuffleMask::<$width>::$mask() },
));
test_impl! { vec_vmleb(a: vector_unsigned_char, b: vector_unsigned_char) -> vector_unsigned_short [ vmleb, vmleb ] }
test_impl! { vec_vmleh(a: vector_unsigned_short, b: vector_unsigned_short) -> vector_unsigned_int[ vmleh, vmleh ] }
test_impl! { vec_vmlef(a: vector_unsigned_int, b: vector_unsigned_int) -> vector_unsigned_long_long [ vmlef, vmlef ] }
simd_mul(elems_a, elems_b)
}
};
}
impl_mul!([VectorMule vec_mule] vec_vmeb (vector_signed_char, vector_signed_char) -> vector_signed_short );
impl_mul!([VectorMule vec_mule] vec_vmeh (vector_signed_short, vector_signed_short) -> vector_signed_int);
impl_mul!([VectorMule vec_mule] vec_vmef (vector_signed_int, vector_signed_int) -> vector_signed_long_long );
impl_vec_mul_even_odd! { even vmeb vector_signed_char i8x8 vector_signed_short 8 }
impl_vec_mul_even_odd! { even vmeh vector_signed_short i16x4 vector_signed_int 4 }
impl_vec_mul_even_odd! { even vmef vector_signed_int i32x2 vector_signed_long_long 2 }
impl_mul!([VectorMule vec_mule] vec_vmleb (vector_unsigned_char, vector_unsigned_char) -> vector_unsigned_short );
impl_mul!([VectorMule vec_mule] vec_vmleh (vector_unsigned_short, vector_unsigned_short) -> vector_unsigned_int);
impl_mul!([VectorMule vec_mule] vec_vmlef (vector_unsigned_int, vector_unsigned_int) -> vector_unsigned_long_long );
impl_vec_mul_even_odd! { even vmleb vector_unsigned_char u8x8 vector_unsigned_short 8 }
impl_vec_mul_even_odd! { even vmleh vector_unsigned_short u16x4 vector_unsigned_int 4 }
impl_vec_mul_even_odd! { even vmlef vector_unsigned_int u32x2 vector_unsigned_long_long 2 }
impl_mul!([VectorMule vec_mule] vmeb (vector_signed_char, vector_signed_char) -> vector_signed_short );
impl_mul!([VectorMule vec_mule] vmeh (vector_signed_short, vector_signed_short) -> vector_signed_int);
impl_mul!([VectorMule vec_mule] vmef (vector_signed_int, vector_signed_int) -> vector_signed_long_long );
impl_mul!([VectorMule vec_mule] vmleb (vector_unsigned_char, vector_unsigned_char) -> vector_unsigned_short );
impl_mul!([VectorMule vec_mule] vmleh (vector_unsigned_short, vector_unsigned_short) -> vector_unsigned_int);
impl_mul!([VectorMule vec_mule] vmlef (vector_unsigned_int, vector_unsigned_int) -> vector_unsigned_long_long );
#[unstable(feature = "stdarch_s390x", issue = "135681")]
pub trait VectorMulo<Result> {
unsafe fn vec_mulo(self, b: Self) -> Result;
}
test_impl! { vec_vmob(a: vector_signed_char, b: vector_signed_char) -> vector_signed_short [ vmob, vmob ] }
test_impl! { vec_vmoh(a: vector_signed_short, b: vector_signed_short) -> vector_signed_int[ vmoh, vmoh ] }
test_impl! { vec_vmof(a: vector_signed_int, b: vector_signed_int) -> vector_signed_long_long [ vmof, vmof ] }
impl_vec_mul_even_odd! { odd vmob vector_signed_char i8x8 vector_signed_short 8 }
impl_vec_mul_even_odd! { odd vmoh vector_signed_short i16x4 vector_signed_int 4 }
impl_vec_mul_even_odd! { odd vmof vector_signed_int i32x2 vector_signed_long_long 2 }
test_impl! { vec_vmlob(a: vector_unsigned_char, b: vector_unsigned_char) -> vector_unsigned_short [ vmlob, vmlob ] }
test_impl! { vec_vmloh(a: vector_unsigned_short, b: vector_unsigned_short) -> vector_unsigned_int[ vmloh, vmloh ] }
test_impl! { vec_vmlof(a: vector_unsigned_int, b: vector_unsigned_int) -> vector_unsigned_long_long [ vmlof, vmlof ] }
impl_vec_mul_even_odd! { odd vmlob vector_unsigned_char u8x8 vector_unsigned_short 8 }
impl_vec_mul_even_odd! { odd vmloh vector_unsigned_short u16x4 vector_unsigned_int 4 }
impl_vec_mul_even_odd! { odd vmlof vector_unsigned_int u32x2 vector_unsigned_long_long 2 }
impl_mul!([VectorMulo vec_mulo] vec_vmob (vector_signed_char, vector_signed_char) -> vector_signed_short );
impl_mul!([VectorMulo vec_mulo] vec_vmoh (vector_signed_short, vector_signed_short) -> vector_signed_int);
impl_mul!([VectorMulo vec_mulo] vec_vmof (vector_signed_int, vector_signed_int) -> vector_signed_long_long );
impl_mul!([VectorMulo vec_mulo] vmob (vector_signed_char, vector_signed_char) -> vector_signed_short );
impl_mul!([VectorMulo vec_mulo] vmoh (vector_signed_short, vector_signed_short) -> vector_signed_int);
impl_mul!([VectorMulo vec_mulo] vmof (vector_signed_int, vector_signed_int) -> vector_signed_long_long );
impl_mul!([VectorMulo vec_mulo] vec_vmlob (vector_unsigned_char, vector_unsigned_char) -> vector_unsigned_short );
impl_mul!([VectorMulo vec_mulo] vec_vmloh (vector_unsigned_short, vector_unsigned_short) -> vector_unsigned_int);
impl_mul!([VectorMulo vec_mulo] vec_vmlof (vector_unsigned_int, vector_unsigned_int) -> vector_unsigned_long_long );
impl_mul!([VectorMulo vec_mulo] vmlob (vector_unsigned_char, vector_unsigned_char) -> vector_unsigned_short );
impl_mul!([VectorMulo vec_mulo] vmloh (vector_unsigned_short, vector_unsigned_short) -> vector_unsigned_int);
impl_mul!([VectorMulo vec_mulo] vmlof (vector_unsigned_int, vector_unsigned_int) -> vector_unsigned_long_long );
#[unstable(feature = "stdarch_s390x", issue = "135681")]
pub trait VectorMulh<Result> {
@ -3322,8 +3325,7 @@ mod sealed {
#[inline]
#[target_feature(enable = "vector")]
// FIXME(llvm): https://github.com/llvm/llvm-project/issues/129899
// #[cfg_attr(test, assert_instr(vsegb))]
#[cfg_attr(test, assert_instr(vsegb))]
pub unsafe fn vsegb(a: vector_signed_char) -> vector_signed_long_long {
simd_as(simd_shuffle::<_, _, i8x2>(
a,
@ -3334,8 +3336,7 @@ mod sealed {
#[inline]
#[target_feature(enable = "vector")]
// FIXME(llvm): https://github.com/llvm/llvm-project/issues/129899
// #[cfg_attr(test, assert_instr(vsegh))]
#[cfg_attr(test, assert_instr(vsegh))]
pub unsafe fn vsegh(a: vector_signed_short) -> vector_signed_long_long {
simd_as(simd_shuffle::<_, _, i16x2>(
a,
@ -3346,8 +3347,7 @@ mod sealed {
#[inline]
#[target_feature(enable = "vector")]
// FIXME(llvm): https://github.com/llvm/llvm-project/issues/129899
// #[cfg_attr(test, assert_instr(vsegf))]
#[cfg_attr(test, assert_instr(vsegf))]
pub unsafe fn vsegf(a: vector_signed_int) -> vector_signed_long_long {
simd_as(simd_shuffle::<_, _, i32x2>(
a,
@ -3485,10 +3485,44 @@ mod sealed {
unsafe fn vec_sldb<const C: u32>(self, b: Self) -> Self;
}
// FIXME(llvm) https://github.com/llvm/llvm-project/issues/129955
// ideally we could implement this in terms of llvm.fshl.i128
// #[link_name = "llvm.fshl.i128"] fn fshl_i128(a: u128, b: u128, c: u128) -> u128;
// transmute(fshl_i128(transmute(a), transmute(b), const { C * 8 } ))
#[inline]
#[target_feature(enable = "vector")]
#[cfg_attr(test, assert_instr(vsldb))]
unsafe fn test_vec_sld(a: vector_signed_int, b: vector_signed_int) -> vector_signed_int {
a.vec_sld::<13>(b)
}
#[inline]
#[target_feature(enable = "vector")]
#[cfg_attr(test, assert_instr(vsldb))]
unsafe fn test_vec_sldw(a: vector_signed_int, b: vector_signed_int) -> vector_signed_int {
a.vec_sldw::<3>(b)
}
#[inline]
#[target_feature(enable = "vector-enhancements-2")]
#[cfg_attr(test, assert_instr(vsld))]
unsafe fn test_vec_sldb(a: vector_signed_int, b: vector_signed_int) -> vector_signed_int {
a.vec_sldb::<7>(b)
}
#[inline]
#[target_feature(enable = "vector-enhancements-2")]
#[cfg_attr(test, assert_instr(vsrd))]
unsafe fn test_vec_srdb(a: vector_signed_int, b: vector_signed_int) -> vector_signed_int {
a.vec_srdb::<7>(b)
}
unsafe fn funnel_shl_u128(a: u128, b: u128, c: u128) -> u128 {
#[repr(simd)]
struct Single([u128; 1]);
transmute(simd_funnel_shl::<Single>(
transmute(a),
transmute(b),
transmute(c),
))
}
macro_rules! impl_vec_sld {
($($ty:ident)*) => {
@ -3499,21 +3533,21 @@ mod sealed {
#[target_feature(enable = "vector")]
unsafe fn vec_sld<const C: u32>(self, b: Self) -> Self {
static_assert_uimm_bits!(C, 4);
transmute(vsldb(transmute(self), transmute(b), C))
transmute(funnel_shl_u128(transmute(self), transmute(b), const { C as u128 * 8 }))
}
#[inline]
#[target_feature(enable = "vector")]
unsafe fn vec_sldw<const C: u32>(self, b: Self) -> Self {
static_assert_uimm_bits!(C, 2);
transmute(vsldb(transmute(self), transmute(b), const { 4 * C }))
transmute(funnel_shl_u128(transmute(self), transmute(b), const { C as u128 * 4 * 8 }))
}
#[inline]
#[target_feature(enable = "vector-enhancements-2")]
unsafe fn vec_sldb<const C: u32>(self, b: Self) -> Self {
static_assert_uimm_bits!(C, 3);
transmute(vsld(transmute(self), transmute(b), C))
transmute(funnel_shl_u128(transmute(self), transmute(b), const { C as u128 }))
}
}
@ -3524,6 +3558,11 @@ mod sealed {
unsafe fn vec_srdb<const C: u32>(self, b: Self) -> Self {
static_assert_uimm_bits!(C, 3);
transmute(vsrd(transmute(self), transmute(b), C))
// FIXME(llvm): https://github.com/llvm/llvm-project/issues/129955#issuecomment-3207488190
// LLVM currently rewrites `fshr` to `fshl`, and the logic in the s390x
// backend cannot deal with that yet.
// #[link_name = "llvm.fshr.i128"] fn fshr_i128(a: u128, b: u128, c: u128) -> u128;
// transmute(fshr_i128(transmute(self), transmute(b), const { C as u128 }))
}
}
)*
@ -4679,11 +4718,9 @@ pub unsafe fn vec_subc_u128(
a: vector_unsigned_char,
b: vector_unsigned_char,
) -> vector_unsigned_char {
// FIXME(llvm) sadly this does not work https://github.com/llvm/llvm-project/issues/129608
// let a: u128 = transmute(a);
// let b: u128 = transmute(b);
// transmute(!a.overflowing_sub(b).1 as u128)
transmute(vscbiq(transmute(a), transmute(b)))
let a: u128 = transmute(a);
let b: u128 = transmute(b);
transmute(!a.overflowing_sub(b).1 as u128)
}
/// Vector Add Compute Carryout unsigned 128-bits
@ -4715,7 +4752,7 @@ pub unsafe fn vec_adde_u128(
let a: u128 = transmute(a);
let b: u128 = transmute(b);
let c: u128 = transmute(c);
// FIXME(llvm) sadly this does not work
// FIXME(llvm) https://github.com/llvm/llvm-project/pull/153557
// let (d, _carry) = a.carrying_add(b, c & 1 != 0);
// transmute(d)
transmute(vacq(a, b, c))