From 34629d4d20b799f52a3b1f57d7897566c2a2f8fb Mon Sep 17 00:00:00 2001 From: Folkert de Vries Date: Sat, 1 Mar 2025 15:47:37 +0100 Subject: [PATCH] add `vec_pack`, `vec_packs` and `vec_packsu` --- .../crates/core_arch/src/s390x/vector.rs | 182 ++++++++++++++++++ 1 file changed, 182 insertions(+) diff --git a/library/stdarch/crates/core_arch/src/s390x/vector.rs b/library/stdarch/crates/core_arch/src/s390x/vector.rs index 7931accf3046..2d5e696b0474 100644 --- a/library/stdarch/crates/core_arch/src/s390x/vector.rs +++ b/library/stdarch/crates/core_arch/src/s390x/vector.rs @@ -151,6 +151,14 @@ unsafe extern "unadjusted" { #[link_name = "llvm.s390.lcbb"] fn lcbb(a: *const u8, b: u32) -> u32; #[link_name = "llvm.s390.vlbb"] fn vlbb(a: *const u8, b: u32) -> MaybeUninit; + + #[link_name = "llvm.s390.vpksh"] fn vpksh(a: vector_signed_short, b: vector_signed_short) -> vector_signed_char; + #[link_name = "llvm.s390.vpksf"] fn vpksf(a: vector_signed_int, b: vector_signed_int) -> vector_signed_short; + #[link_name = "llvm.s390.vpksg"] fn vpksg(a: vector_signed_long_long, b: vector_signed_long_long) -> vector_signed_int; + + #[link_name = "llvm.s390.vpklsh"] fn vpklsh(a: vector_signed_short, b: vector_signed_short) -> vector_unsigned_char; + #[link_name = "llvm.s390.vpklsf"] fn vpklsf(a: vector_signed_int, b: vector_signed_int) -> vector_unsigned_short; + #[link_name = "llvm.s390.vpklsg"] fn vpklsg(a: vector_signed_long_long, b: vector_signed_long_long) -> vector_unsigned_int; } impl_from! { i8x16, u8x16, i16x8, u16x8, i32x4, u32x4, i64x2, u64x2, f32x4, f64x2 } @@ -203,6 +211,19 @@ impl ShuffleMask { } ShuffleMask(mask) } + + const fn pack() -> Self { + let mut mask = [0; N]; + let mut i = 1; + let mut index = 0; + while index < N { + mask[index] = i as u32; + + i += 2; + index += 1; + } + ShuffleMask(mask) + } } const fn genmask() -> [u8; 16] { @@ -2009,6 +2030,114 @@ mod sealed { vector_unsigned_long_long([a, b]) } } + + #[inline] + #[target_feature(enable = "vector")] + unsafe fn pack(a: T, b: T) -> T { + simd_shuffle(a, b, const { ShuffleMask::::pack() }) + } + + #[inline] + #[target_feature(enable = "vector")] + #[cfg_attr(test, assert_instr(vpkh))] + unsafe fn vpkh(a: i16x8, b: i16x8) -> i8x16 { + let a: i8x16 = transmute(a); + let b: i8x16 = transmute(b); + simd_shuffle(a, b, const { ShuffleMask::<16>::pack() }) + } + #[inline] + #[target_feature(enable = "vector")] + #[cfg_attr(test, assert_instr(vpkf))] + unsafe fn vpkf(a: i32x4, b: i32x4) -> i16x8 { + let a: i16x8 = transmute(a); + let b: i16x8 = transmute(b); + simd_shuffle(a, b, const { ShuffleMask::<8>::pack() }) + } + #[inline] + #[target_feature(enable = "vector")] + #[cfg_attr(test, assert_instr(vpkg))] + unsafe fn vpkg(a: i64x2, b: i64x2) -> i32x4 { + let a: i32x4 = transmute(a); + let b: i32x4 = transmute(b); + simd_shuffle(a, b, const { ShuffleMask::<4>::pack() }) + } + + #[unstable(feature = "stdarch_s390x", issue = "135681")] + pub trait VectorPack { + type Result; + unsafe fn vec_pack(self, b: Other) -> Self::Result; + } + + impl_vec_trait! { [VectorPack vec_pack]+ vpkh (vector_signed_short, vector_signed_short) -> vector_signed_char } + impl_vec_trait! { [VectorPack vec_pack]+ vpkh (vector_unsigned_short, vector_unsigned_short) -> vector_unsigned_char } + impl_vec_trait! { [VectorPack vec_pack]+ vpkh (vector_bool_short, vector_bool_short) -> vector_bool_char } + impl_vec_trait! { [VectorPack vec_pack]+ vpkf (vector_signed_int, vector_signed_int) -> vector_signed_short } + impl_vec_trait! { [VectorPack vec_pack]+ vpkf (vector_unsigned_int, vector_unsigned_int) -> vector_unsigned_short } + impl_vec_trait! { [VectorPack vec_pack]+ vpkf (vector_bool_int, vector_bool_int) -> vector_bool_short } + impl_vec_trait! { [VectorPack vec_pack]+ vpkg (vector_signed_long_long, vector_signed_long_long) -> vector_signed_int } + impl_vec_trait! { [VectorPack vec_pack]+ vpkg (vector_unsigned_long_long, vector_unsigned_long_long) -> vector_unsigned_int } + impl_vec_trait! { [VectorPack vec_pack]+ vpkg (vector_bool_long_long, vector_bool_long_long) -> vector_bool_int } + + #[unstable(feature = "stdarch_powerpc", issue = "111145")] + pub trait VectorPacks { + type Result; + unsafe fn vec_packs(self, b: Other) -> Self::Result; + } + + impl_vec_trait! { [VectorPacks vec_packs] vpksh (vector_signed_short, vector_signed_short) -> vector_signed_char } + impl_vec_trait! { [VectorPacks vec_packs] vpklsh (vector_unsigned_short, vector_unsigned_short) -> vector_unsigned_char } + impl_vec_trait! { [VectorPacks vec_packs] vpksf (vector_signed_int, vector_signed_int) -> vector_signed_short } + impl_vec_trait! { [VectorPacks vec_packs] vpklsf (vector_unsigned_int, vector_unsigned_int) -> vector_unsigned_short } + impl_vec_trait! { [VectorPacks vec_packs] vpksg (vector_signed_long_long, vector_signed_long_long) -> vector_signed_int } + impl_vec_trait! { [VectorPacks vec_packs] vpklsg (vector_unsigned_long_long, vector_unsigned_long_long) -> vector_unsigned_int } + + #[unstable(feature = "stdarch_powerpc", issue = "111145")] + pub trait VectorPacksu { + type Result; + unsafe fn vec_packsu(self, b: Other) -> Self::Result; + } + + unsafe fn simd_smax(a: T, b: T) -> T { + simd_select::(simd_gt::(a, b), a, b) + } + + #[inline] + #[target_feature(enable = "vector")] + #[cfg_attr(test, assert_instr(vpklsh))] + unsafe fn vpacksuh(a: vector_signed_short, b: vector_signed_short) -> vector_unsigned_char { + vpklsh( + simd_smax(a, vector_signed_short([0; 8])), + simd_smax(b, vector_signed_short([0; 8])), + ) + } + #[inline] + #[target_feature(enable = "vector")] + #[cfg_attr(test, assert_instr(vpklsf))] + unsafe fn vpacksuf(a: vector_signed_int, b: vector_signed_int) -> vector_unsigned_short { + vpklsf( + simd_smax(a, vector_signed_int([0; 4])), + simd_smax(b, vector_signed_int([0; 4])), + ) + } + #[inline] + #[target_feature(enable = "vector")] + #[cfg_attr(test, assert_instr(vpklsg))] + unsafe fn vpacksug( + a: vector_signed_long_long, + b: vector_signed_long_long, + ) -> vector_unsigned_int { + vpklsg( + simd_smax(a, vector_signed_long_long([0; 2])), + simd_smax(b, vector_signed_long_long([0; 2])), + ) + } + + impl_vec_trait! { [VectorPacksu vec_packsu] vpacksuh (vector_signed_short, vector_signed_short) -> vector_unsigned_char } + impl_vec_trait! { [VectorPacksu vec_packsu] vpklsh (vector_unsigned_short, vector_unsigned_short) -> vector_unsigned_char } + impl_vec_trait! { [VectorPacksu vec_packsu] vpacksuf (vector_signed_int, vector_signed_int) -> vector_unsigned_short } + impl_vec_trait! { [VectorPacksu vec_packsu] vpklsf (vector_unsigned_int, vector_unsigned_int) -> vector_unsigned_short } + impl_vec_trait! { [VectorPacksu vec_packsu] vpacksug (vector_signed_long_long, vector_signed_long_long) -> vector_unsigned_int } + impl_vec_trait! { [VectorPacksu vec_packsu] vpklsg (vector_unsigned_long_long, vector_unsigned_long_long) -> vector_unsigned_int } } /// Load Count to Block Boundary @@ -2544,6 +2673,30 @@ where a.vec_mergeh(b) } +/// Vector Pack +#[inline] +#[target_feature(enable = "vector")] +#[unstable(feature = "stdarch_s390x", issue = "135681")] +pub unsafe fn vec_pack, U>(a: T, b: U) -> T::Result { + a.vec_pack(b) +} + +/// Vector Pack Saturated +#[inline] +#[target_feature(enable = "vector")] +#[unstable(feature = "stdarch_s390x", issue = "135681")] +pub unsafe fn vec_packs, U>(a: T, b: U) -> T::Result { + a.vec_packs(b) +} + +/// Vector Pack Saturated Unsigned +#[inline] +#[target_feature(enable = "vector")] +#[unstable(feature = "stdarch_s390x", issue = "135681")] +pub unsafe fn vec_packsu, U>(a: T, b: U) -> T::Result { + a.vec_packsu(b) +} + /// Merges the least significant ("low") halves of two vectors. #[inline] #[target_feature(enable = "vector")] @@ -2952,6 +3105,11 @@ mod tests { assert_eq!(ShuffleMask::<4>::merge_high().0, [0, 4, 1, 5]); } + #[test] + fn pack_mask() { + assert_eq!(ShuffleMask::<4>::pack().0, [1, 3, 5, 7]); + } + #[test] fn test_vec_mask() { assert_eq!( @@ -3718,4 +3876,28 @@ mod tests { assert_eq!(unsafe { __lcbb::<64>(ARRAY.0[56..].as_ptr()) }, 8); assert_eq!(unsafe { __lcbb::<64>(ARRAY.0[48..].as_ptr()) }, 16); } + + test_vec_2! { test_vec_pack, vec_pack, i16x8, i16x8 -> i8x16, + [0, 1, -1, 42, 32767, -32768, 30000, -30000], + [32767, -32768, 12345, -12345, 0, 1, -1, 42], + [0, 1, -1, 42, -1, 0, 48, -48, -1, 0, 57, -57, 0, 1, -1, 42] + } + + test_vec_2! { test_vec_packs, vec_packs, i16x8, i16x8 -> i8x16, + [0, 1, -1, 42, 32767, -32768, 30000, -30000], + [32767, -32768, 12345, -12345, 0, 1, -1, 42], + [0, 1, -1, 42, 127, -128, 127, -128, 127, -128, 127, -128, 0, 1, -1, 42] + } + + test_vec_2! { test_vec_packsu_signed, vec_packsu, i16x8, i16x8 -> u8x16, + [0, 1, -1, 42, 32767, -32768, 30000, -30000], + [32767, -32768, 12345, -12345, 0, 1, -1, 42], + [0, 1, 0, 42, 255, 0, 255, 0, 255, 0, 255, 0, 0, 1, 0, 42] + } + + test_vec_2! { test_vec_packsu_unsigned, vec_packsu, u16x8, u16x8 -> u8x16, + [65535, 32768, 1234, 5678, 16, 8, 4, 2], + [30000, 25000, 20000, 15000, 31, 63, 127, 255], + [255, 255, 255, 255, 16, 8, 4, 2, 255, 255, 255, 255, 31, 63, 127, 255] + } }