simplify simd_ty, simd_m_ty macros: do not repeat the element type N times

This commit is contained in:
Ralf Jung 2024-02-17 13:35:37 +01:00 committed by Amanieu d'Antras
parent c064146098
commit 92a957da6d
5 changed files with 111 additions and 534 deletions

View file

@ -3736,7 +3736,7 @@ mod tests {
let b = i8x16::new(
17, 18, 19, 20, 20, 21, 22, 23, 24, 25, 26, 27, 29, 29, 30, 31,
);
let r = i8x16(1, 5, 9, 13, 17, 21, 25, 29, 35, 39, 41, 45, 49, 53, 58, 61);
let r = i8x16::new(1, 5, 9, 13, 17, 21, 25, 29, 35, 39, 41, 45, 49, 53, 58, 61);
let e: i8x16 = transmute(vpaddq_u8(transmute(a), transmute(b)));
assert_eq!(r, e);
}

View file

@ -3,28 +3,23 @@
#![allow(non_camel_case_types)]
macro_rules! simd_ty {
($id:ident [$ety:ident]: $($elem_ty:ident),* | $($elem_name:ident),*) => {
($id:ident [$ety:ident]: $($elem_name:ident),*) => {
#[repr(simd)]
#[derive(Copy, Clone, Debug, PartialEq)]
pub(crate) struct $id($(pub $elem_ty),*);
pub(crate) struct $id { $(pub $elem_name: $ety),* }
#[allow(clippy::use_self)]
impl $id {
#[inline(always)]
pub(crate) const fn new($($elem_name: $elem_ty),*) -> Self {
$id($($elem_name),*)
pub(crate) const fn new($($elem_name: $ety),*) -> Self {
$id { $($elem_name),* }
}
// FIXME: Workaround rust@60637
#[inline(always)]
pub(crate) const fn splat(value: $ety) -> Self {
$id($({
// We want this to be repeated for each element.
// So we need to use `elem_name` in a `$(...)`.
// But we don't actually need that name for anything so we use a dummy struct.
#[allow(non_camel_case_types, dead_code)]
struct $elem_name;
value
}),*)
$id { $(
$elem_name: value
),* }
}
/// Extract the element at position `index`.
@ -47,10 +42,10 @@ macro_rules! simd_ty {
}
macro_rules! simd_m_ty {
($id:ident [$ety:ident]: $($elem_ty:ident),* | $($elem_name:ident),*) => {
($id:ident [$ety:ident]: $($elem_name:ident),*) => {
#[repr(simd)]
#[derive(Copy, Clone, Debug, PartialEq)]
pub(crate) struct $id($(pub $elem_ty),*);
pub(crate) struct $id { $(pub $elem_name: $ety),* }
#[allow(clippy::use_self)]
impl $id {
@ -61,17 +56,15 @@ macro_rules! simd_m_ty {
#[inline(always)]
pub(crate) const fn new($($elem_name: bool),*) -> Self {
$id($(Self::bool_to_internal($elem_name)),*)
$id { $($elem_name: Self::bool_to_internal($elem_name)),* }
}
// FIXME: Workaround rust@60637
#[inline(always)]
pub(crate) const fn splat(value: bool) -> Self {
$id($({
#[allow(non_camel_case_types, dead_code)]
struct $elem_name;
Self::bool_to_internal(value)
}),*)
$id { $(
$elem_name: Self::bool_to_internal(value)
),* }
}
}
}
@ -79,28 +72,22 @@ macro_rules! simd_m_ty {
// 16-bit wide types:
simd_ty!(u8x2[u8]: u8, u8 | x0, x1);
simd_ty!(i8x2[i8]: i8, i8 | x0, x1);
simd_ty!(u8x2[u8]: x0, x1);
simd_ty!(i8x2[i8]: x0, x1);
// 32-bit wide types:
simd_ty!(u8x4[u8]: u8, u8, u8, u8 | x0, x1, x2, x3);
simd_ty!(u16x2[u16]: u16, u16 | x0, x1);
simd_ty!(u8x4[u8]: x0, x1, x2, x3);
simd_ty!(u16x2[u16]: x0, x1);
simd_ty!(i8x4[i8]: i8, i8, i8, i8 | x0, x1, x2, x3);
simd_ty!(i16x2[i16]: i16, i16 | x0, x1);
simd_ty!(i8x4[i8]: x0, x1, x2, x3);
simd_ty!(i16x2[i16]: x0, x1);
// 64-bit wide types:
simd_ty!(
u8x8[u8]: u8,
u8,
u8,
u8,
u8,
u8,
u8,
u8 | x0,
u8x8[u8]:
x0,
x1,
x2,
x3,
@ -109,19 +96,13 @@ simd_ty!(
x6,
x7
);
simd_ty!(u16x4[u16]: u16, u16, u16, u16 | x0, x1, x2, x3);
simd_ty!(u32x2[u32]: u32, u32 | x0, x1);
simd_ty!(u64x1[u64]: u64 | x1);
simd_ty!(u16x4[u16]: x0, x1, x2, x3);
simd_ty!(u32x2[u32]: x0, x1);
simd_ty!(u64x1[u64]: x1);
simd_ty!(
i8x8[i8]: i8,
i8,
i8,
i8,
i8,
i8,
i8,
i8 | x0,
i8x8[i8]:
x0,
x1,
x2,
x3,
@ -130,32 +111,18 @@ simd_ty!(
x6,
x7
);
simd_ty!(i16x4[i16]: i16, i16, i16, i16 | x0, x1, x2, x3);
simd_ty!(i32x2[i32]: i32, i32 | x0, x1);
simd_ty!(i64x1[i64]: i64 | x1);
simd_ty!(i16x4[i16]: x0, x1, x2, x3);
simd_ty!(i32x2[i32]: x0, x1);
simd_ty!(i64x1[i64]: x1);
simd_ty!(f32x2[f32]: f32, f32 | x0, x1);
simd_ty!(f64x1[f64]: f64 | x1);
simd_ty!(f32x2[f32]: x0, x1);
simd_ty!(f64x1[f64]: x1);
// 128-bit wide types:
simd_ty!(
u8x16[u8]: u8,
u8,
u8,
u8,
u8,
u8,
u8,
u8,
u8,
u8,
u8,
u8,
u8,
u8,
u8,
u8 | x0,
u8x16[u8]:
x0,
x1,
x2,
x3,
@ -173,14 +140,8 @@ simd_ty!(
x15
);
simd_ty!(
u16x8[u16]: u16,
u16,
u16,
u16,
u16,
u16,
u16,
u16 | x0,
u16x8[u16]:
x0,
x1,
x2,
x3,
@ -189,26 +150,12 @@ simd_ty!(
x6,
x7
);
simd_ty!(u32x4[u32]: u32, u32, u32, u32 | x0, x1, x2, x3);
simd_ty!(u64x2[u64]: u64, u64 | x0, x1);
simd_ty!(u32x4[u32]: x0, x1, x2, x3);
simd_ty!(u64x2[u64]: x0, x1);
simd_ty!(
i8x16[i8]: i8,
i8,
i8,
i8,
i8,
i8,
i8,
i8,
i8,
i8,
i8,
i8,
i8,
i8,
i8,
i8 | x0,
i8x16[i8]:
x0,
x1,
x2,
x3,
@ -226,14 +173,8 @@ simd_ty!(
x15
);
simd_ty!(
i16x8[i16]: i16,
i16,
i16,
i16,
i16,
i16,
i16,
i16 | x0,
i16x8[i16]:
x0,
x1,
x2,
x3,
@ -242,30 +183,16 @@ simd_ty!(
x6,
x7
);
simd_ty!(i32x4[i32]: i32, i32, i32, i32 | x0, x1, x2, x3);
simd_ty!(i64x2[i64]: i64, i64 | x0, x1);
simd_ty!(i32x4[i32]: x0, x1, x2, x3);
simd_ty!(i64x2[i64]: x0, x1);
simd_ty!(f32x4[f32]: f32, f32, f32, f32 | x0, x1, x2, x3);
simd_ty!(f64x2[f64]: f64, f64 | x0, x1);
simd_ty!(f64x4[f64]: f64, f64, f64, f64 | x0, x1, x2, x3);
simd_ty!(f32x4[f32]: x0, x1, x2, x3);
simd_ty!(f64x2[f64]: x0, x1);
simd_ty!(f64x4[f64]: x0, x1, x2, x3);
simd_m_ty!(
m8x16[i8]: i8,
i8,
i8,
i8,
i8,
i8,
i8,
i8,
i8,
i8,
i8,
i8,
i8,
i8,
i8,
i8 | x0,
m8x16[i8]:
x0,
x1,
x2,
x3,
@ -283,14 +210,8 @@ simd_m_ty!(
x15
);
simd_m_ty!(
m16x8[i16]: i16,
i16,
i16,
i16,
i16,
i16,
i16,
i16 | x0,
m16x8[i16]:
x0,
x1,
x2,
x3,
@ -299,44 +220,14 @@ simd_m_ty!(
x6,
x7
);
simd_m_ty!(m32x4[i32]: i32, i32, i32, i32 | x0, x1, x2, x3);
simd_m_ty!(m64x2[i64]: i64, i64 | x0, x1);
simd_m_ty!(m32x4[i32]: x0, x1, x2, x3);
simd_m_ty!(m64x2[i64]: x0, x1);
// 256-bit wide types:
simd_ty!(
u8x32[u8]: u8,
u8,
u8,
u8,
u8,
u8,
u8,
u8,
u8,
u8,
u8,
u8,
u8,
u8,
u8,
u8,
u8,
u8,
u8,
u8,
u8,
u8,
u8,
u8,
u8,
u8,
u8,
u8,
u8,
u8,
u8,
u8 | x0,
u8x32[u8]:
x0,
x1,
x2,
x3,
@ -370,22 +261,8 @@ simd_ty!(
x31
);
simd_ty!(
u16x16[u16]: u16,
u16,
u16,
u16,
u16,
u16,
u16,
u16,
u16,
u16,
u16,
u16,
u16,
u16,
u16,
u16 | x0,
u16x16[u16]:
x0,
x1,
x2,
x3,
@ -403,14 +280,8 @@ simd_ty!(
x15
);
simd_ty!(
u32x8[u32]: u32,
u32,
u32,
u32,
u32,
u32,
u32,
u32 | x0,
u32x8[u32]:
x0,
x1,
x2,
x3,
@ -419,41 +290,11 @@ simd_ty!(
x6,
x7
);
simd_ty!(u64x4[u64]: u64, u64, u64, u64 | x0, x1, x2, x3);
simd_ty!(u64x4[u64]: x0, x1, x2, x3);
simd_ty!(
i8x32[i8]: i8,
i8,
i8,
i8,
i8,
i8,
i8,
i8,
i8,
i8,
i8,
i8,
i8,
i8,
i8,
i8,
i8,
i8,
i8,
i8,
i8,
i8,
i8,
i8,
i8,
i8,
i8,
i8,
i8,
i8,
i8,
i8 | x0,
i8x32[i8]:
x0,
x1,
x2,
x3,
@ -487,22 +328,8 @@ simd_ty!(
x31
);
simd_ty!(
i16x16[i16]: i16,
i16,
i16,
i16,
i16,
i16,
i16,
i16,
i16,
i16,
i16,
i16,
i16,
i16,
i16,
i16 | x0,
i16x16[i16]:
x0,
x1,
x2,
x3,
@ -520,14 +347,8 @@ simd_ty!(
x15
);
simd_ty!(
i32x8[i32]: i32,
i32,
i32,
i32,
i32,
i32,
i32,
i32 | x0,
i32x8[i32]:
x0,
x1,
x2,
x3,
@ -536,17 +357,11 @@ simd_ty!(
x6,
x7
);
simd_ty!(i64x4[i64]: i64, i64, i64, i64 | x0, x1, x2, x3);
simd_ty!(i64x4[i64]: x0, x1, x2, x3);
simd_ty!(
f32x8[f32]: f32,
f32,
f32,
f32,
f32,
f32,
f32,
f32 | x0,
f32x8[f32]:
x0,
x1,
x2,
x3,
@ -559,70 +374,8 @@ simd_ty!(
// 512-bit wide types:
simd_ty!(
i8x64[i8]: i8,
i8,
i8,
i8,
i8,
i8,
i8,
i8,
i8,
i8,
i8,
i8,
i8,
i8,
i8,
i8,
i8,
i8,
i8,
i8,
i8,
i8,
i8,
i8,
i8,
i8,
i8,
i8,
i8,
i8,
i8,
i8,
i8,
i8,
i8,
i8,
i8,
i8,
i8,
i8,
i8,
i8,
i8,
i8,
i8,
i8,
i8,
i8,
i8,
i8,
i8,
i8,
i8,
i8,
i8,
i8,
i8,
i8,
i8,
i8,
i8,
i8,
i8,
i8 | x0,
i8x64[i8]:
x0,
x1,
x2,
x3,
@ -689,70 +442,8 @@ simd_ty!(
);
simd_ty!(
u8x64[u8]: u8,
u8,
u8,
u8,
u8,
u8,
u8,
u8,
u8,
u8,
u8,
u8,
u8,
u8,
u8,
u8,
u8,
u8,
u8,
u8,
u8,
u8,
u8,
u8,
u8,
u8,
u8,
u8,
u8,
u8,
u8,
u8,
u8,
u8,
u8,
u8,
u8,
u8,
u8,
u8,
u8,
u8,
u8,
u8,
u8,
u8,
u8,
u8,
u8,
u8,
u8,
u8,
u8,
u8,
u8,
u8,
u8,
u8,
u8,
u8,
u8,
u8,
u8,
u8 | x0,
u8x64[u8]:
x0,
x1,
x2,
x3,
@ -819,38 +510,8 @@ simd_ty!(
);
simd_ty!(
i16x32[i16]: i16,
i16,
i16,
i16,
i16,
i16,
i16,
i16,
i16,
i16,
i16,
i16,
i16,
i16,
i16,
i16,
i16,
i16,
i16,
i16,
i16,
i16,
i16,
i16,
i16,
i16,
i16,
i16,
i16,
i16,
i16,
i16 | x0,
i16x32[i16]:
x0,
x1,
x2,
x3,
@ -885,38 +546,8 @@ simd_ty!(
);
simd_ty!(
u16x32[u16]: u16,
u16,
u16,
u16,
u16,
u16,
u16,
u16,
u16,
u16,
u16,
u16,
u16,
u16,
u16,
u16,
u16,
u16,
u16,
u16,
u16,
u16,
u16,
u16,
u16,
u16,
u16,
u16,
u16,
u16,
u16,
u16 | x0,
u16x32[u16]:
x0,
x1,
x2,
x3,
@ -951,22 +582,8 @@ simd_ty!(
);
simd_ty!(
i32x16[i32]: i32,
i32,
i32,
i32,
i32,
i32,
i32,
i32,
i32,
i32,
i32,
i32,
i32,
i32,
i32,
i32 | x0,
i32x16[i32]:
x0,
x1,
x2,
x3,
@ -985,22 +602,8 @@ simd_ty!(
);
simd_ty!(
u32x16[u32]: u32,
u32,
u32,
u32,
u32,
u32,
u32,
u32,
u32,
u32,
u32,
u32,
u32,
u32,
u32,
u32 | x0,
u32x16[u32]:
x0,
x1,
x2,
x3,
@ -1019,22 +622,8 @@ simd_ty!(
);
simd_ty!(
f32x16[f32]: f32,
f32,
f32,
f32,
f32,
f32,
f32,
f32,
f32,
f32,
f32,
f32,
f32,
f32,
f32,
f32 | x0,
f32x16[f32]:
x0,
x1,
x2,
x3,
@ -1053,14 +642,8 @@ simd_ty!(
);
simd_ty!(
i64x8[i64]: i64,
i64,
i64,
i64,
i64,
i64,
i64,
i64 | x0,
i64x8[i64]:
x0,
x1,
x2,
x3,
@ -1071,14 +654,8 @@ simd_ty!(
);
simd_ty!(
u64x8[u64]: u64,
u64,
u64,
u64,
u64,
u64,
u64,
u64 | x0,
u64x8[u64]:
x0,
x1,
x2,
x3,
@ -1089,14 +666,8 @@ simd_ty!(
);
simd_ty!(
f64x8[f64]: f64,
f64,
f64,
f64,
f64,
f64,
f64,
f64 | x0,
f64x8[f64]:
x0,
x1,
x2,
x3,

View file

@ -715,7 +715,7 @@ pub const fn i8x16(
a14: i8,
a15: i8,
) -> v128 {
simd::i8x16(
simd::i8x16::new(
a0, a1, a2, a3, a4, a5, a6, a7, a8, a9, a10, a11, a12, a13, a14, a15,
)
.v128()
@ -747,7 +747,7 @@ pub const fn u8x16(
a14: u8,
a15: u8,
) -> v128 {
simd::u8x16(
simd::u8x16::new(
a0, a1, a2, a3, a4, a5, a6, a7, a8, a9, a10, a11, a12, a13, a14, a15,
)
.v128()
@ -776,7 +776,7 @@ pub const fn u8x16(
#[stable(feature = "wasm_simd", since = "1.54.0")]
#[rustc_const_stable(feature = "wasm_simd", since = "1.54.0")]
pub const fn i16x8(a0: i16, a1: i16, a2: i16, a3: i16, a4: i16, a5: i16, a6: i16, a7: i16) -> v128 {
simd::i16x8(a0, a1, a2, a3, a4, a5, a6, a7).v128()
simd::i16x8::new(a0, a1, a2, a3, a4, a5, a6, a7).v128()
}
/// Materializes a SIMD value from the provided operands.
@ -788,7 +788,7 @@ pub const fn i16x8(a0: i16, a1: i16, a2: i16, a3: i16, a4: i16, a5: i16, a6: i16
#[stable(feature = "wasm_simd", since = "1.54.0")]
#[rustc_const_stable(feature = "wasm_simd", since = "1.54.0")]
pub const fn u16x8(a0: u16, a1: u16, a2: u16, a3: u16, a4: u16, a5: u16, a6: u16, a7: u16) -> v128 {
simd::u16x8(a0, a1, a2, a3, a4, a5, a6, a7).v128()
simd::u16x8::new(a0, a1, a2, a3, a4, a5, a6, a7).v128()
}
/// Materializes a SIMD value from the provided operands.
@ -801,7 +801,7 @@ pub const fn u16x8(a0: u16, a1: u16, a2: u16, a3: u16, a4: u16, a5: u16, a6: u16
#[stable(feature = "wasm_simd", since = "1.54.0")]
#[rustc_const_stable(feature = "wasm_simd", since = "1.54.0")]
pub const fn i32x4(a0: i32, a1: i32, a2: i32, a3: i32) -> v128 {
simd::i32x4(a0, a1, a2, a3).v128()
simd::i32x4::new(a0, a1, a2, a3).v128()
}
/// Materializes a SIMD value from the provided operands.
@ -813,7 +813,7 @@ pub const fn i32x4(a0: i32, a1: i32, a2: i32, a3: i32) -> v128 {
#[stable(feature = "wasm_simd", since = "1.54.0")]
#[rustc_const_stable(feature = "wasm_simd", since = "1.54.0")]
pub const fn u32x4(a0: u32, a1: u32, a2: u32, a3: u32) -> v128 {
simd::u32x4(a0, a1, a2, a3).v128()
simd::u32x4::new(a0, a1, a2, a3).v128()
}
/// Materializes a SIMD value from the provided operands.
@ -826,7 +826,7 @@ pub const fn u32x4(a0: u32, a1: u32, a2: u32, a3: u32) -> v128 {
#[stable(feature = "wasm_simd", since = "1.54.0")]
#[rustc_const_stable(feature = "wasm_simd", since = "1.54.0")]
pub const fn i64x2(a0: i64, a1: i64) -> v128 {
simd::i64x2(a0, a1).v128()
simd::i64x2::new(a0, a1).v128()
}
/// Materializes a SIMD value from the provided operands.
@ -838,7 +838,7 @@ pub const fn i64x2(a0: i64, a1: i64) -> v128 {
#[stable(feature = "wasm_simd", since = "1.54.0")]
#[rustc_const_stable(feature = "wasm_simd", since = "1.54.0")]
pub const fn u64x2(a0: u64, a1: u64) -> v128 {
simd::u64x2(a0, a1).v128()
simd::u64x2::new(a0, a1).v128()
}
/// Materializes a SIMD value from the provided operands.
@ -851,7 +851,7 @@ pub const fn u64x2(a0: u64, a1: u64) -> v128 {
#[stable(feature = "wasm_simd", since = "1.54.0")]
#[rustc_const_stable(feature = "wasm_simd_const", since = "1.56.0")]
pub const fn f32x4(a0: f32, a1: f32, a2: f32, a3: f32) -> v128 {
simd::f32x4(a0, a1, a2, a3).v128()
simd::f32x4::new(a0, a1, a2, a3).v128()
}
/// Materializes a SIMD value from the provided operands.
@ -864,7 +864,7 @@ pub const fn f32x4(a0: f32, a1: f32, a2: f32, a3: f32) -> v128 {
#[stable(feature = "wasm_simd", since = "1.54.0")]
#[rustc_const_stable(feature = "wasm_simd_const", since = "1.56.0")]
pub const fn f64x2(a0: f64, a1: f64) -> v128 {
simd::f64x2(a0, a1).v128()
simd::f64x2::new(a0, a1).v128()
}
/// Returns a new vector with lanes selected from the lanes of the two input
@ -2183,7 +2183,7 @@ pub fn f64x2_ge(a: v128, b: v128) -> v128 {
#[doc(alias("v128.not"))]
#[stable(feature = "wasm_simd", since = "1.54.0")]
pub fn v128_not(a: v128) -> v128 {
unsafe { simd_xor(a.as_i64x2(), simd::i64x2(!0, !0)).v128() }
unsafe { simd_xor(a.as_i64x2(), simd::i64x2::new(!0, !0)).v128() }
}
/// Performs a bitwise and of the two input 128-bit vectors, returning the
@ -2206,7 +2206,13 @@ pub fn v128_and(a: v128, b: v128) -> v128 {
#[doc(alias("v128.andnot"))]
#[stable(feature = "wasm_simd", since = "1.54.0")]
pub fn v128_andnot(a: v128, b: v128) -> v128 {
unsafe { simd_and(a.as_i64x2(), simd_xor(b.as_i64x2(), simd::i64x2(-1, -1))).v128() }
unsafe {
simd_and(
a.as_i64x2(),
simd_xor(b.as_i64x2(), simd::i64x2::new(-1, -1)),
)
.v128()
}
}
/// Performs a bitwise or of the two input 128-bit vectors, returning the

View file

@ -16023,7 +16023,7 @@ pub unsafe fn _mm512_setr_epi32(
e1: i32,
e0: i32,
) -> __m512i {
let r = i32x16(
let r = i32x16::new(
e15, e14, e13, e12, e11, e10, e9, e8, e7, e6, e5, e4, e3, e2, e1, e0,
);
transmute(r)
@ -16101,7 +16101,7 @@ pub unsafe fn _mm512_set_epi8(
e1: i8,
e0: i8,
) -> __m512i {
let r = i8x64(
let r = i8x64::new(
e0, e1, e2, e3, e4, e5, e6, e7, e8, e9, e10, e11, e12, e13, e14, e15, e16, e17, e18, e19,
e20, e21, e22, e23, e24, e25, e26, e27, e28, e29, e30, e31, e32, e33, e34, e35, e36, e37,
e38, e39, e40, e41, e42, e43, e44, e45, e46, e47, e48, e49, e50, e51, e52, e53, e54, e55,
@ -16150,7 +16150,7 @@ pub unsafe fn _mm512_set_epi16(
e1: i16,
e0: i16,
) -> __m512i {
let r = i16x32(
let r = i16x32::new(
e0, e1, e2, e3, e4, e5, e6, e7, e8, e9, e10, e11, e12, e13, e14, e15, e16, e17, e18, e19,
e20, e21, e22, e23, e24, e25, e26, e27, e28, e29, e30, e31,
);

View file

@ -1211,7 +1211,7 @@ pub unsafe fn _mm_loadr_ps(p: *const f32) -> __m128 {
#[target_feature(enable = "sse")]
#[stable(feature = "simd_x86_mm_loadu_si64", since = "1.46.0")]
pub unsafe fn _mm_loadu_si64(mem_addr: *const u8) -> __m128i {
transmute(i64x2(ptr::read_unaligned(mem_addr as *const i64), 0))
transmute(i64x2::new(ptr::read_unaligned(mem_addr as *const i64), 0))
}
/// Stores the lowest 32 bit float of `a` into memory.