Convert some SSE2 intrinsics to const generics (#1021)

This commit is contained in:
Rémy Rakic 2021-02-28 03:48:43 +01:00 committed by GitHub
parent 78ab9042cb
commit 5438f1fb7e
No known key found for this signature in database
GPG key ID: 4AEE18F83AFDEB23
5 changed files with 87 additions and 219 deletions

View file

@ -1,5 +1,21 @@
//! Utility macros.
// Helper struct used to trigger const eval errors when a const generic immediate value is
// out of range.
pub(crate) struct ValidateConstImm8<const imm8: i32>();
impl<const imm8: i32> ValidateConstImm8<imm8> {
pub(crate) const VALID: () = {
let _ = 1 / ((imm8 >= 0 && imm8 <= 255) as usize);
};
}
#[allow(unused)]
macro_rules! static_assert_imm8 {
($imm:ident) => {
let _ = $crate::core_arch::macros::ValidateConstImm8::<$imm>::VALID;
};
}
#[allow(unused)]
macro_rules! static_assert {
($imm:ident : $ty:ty where $e:expr) => {
@ -320,48 +336,6 @@ macro_rules! constify_imm5 {
};
}
//immediate value: -16:15
#[allow(unused)]
macro_rules! constify_imm5 {
($imm8:expr, $expand:ident) => {
#[allow(overflowing_literals)]
match ($imm8) & 0b1_1111 {
0 => $expand!(0),
1 => $expand!(1),
2 => $expand!(2),
3 => $expand!(3),
4 => $expand!(4),
5 => $expand!(5),
6 => $expand!(6),
7 => $expand!(7),
8 => $expand!(8),
9 => $expand!(9),
10 => $expand!(10),
11 => $expand!(11),
12 => $expand!(12),
13 => $expand!(13),
14 => $expand!(14),
15 => $expand!(15),
16 => $expand!(16),
17 => $expand!(17),
18 => $expand!(18),
19 => $expand!(19),
20 => $expand!(20),
21 => $expand!(21),
22 => $expand!(22),
23 => $expand!(23),
24 => $expand!(24),
25 => $expand!(25),
26 => $expand!(26),
27 => $expand!(27),
28 => $expand!(28),
29 => $expand!(29),
30 => $expand!(30),
_ => $expand!(31),
}
};
}
//immediate value: 0:16
#[allow(unused)]
macro_rules! constify_imm4 {

View file

@ -5858,7 +5858,7 @@ pub unsafe fn _mm256_maskz_srai_epi16(k: __mmask16, a: __m256i, imm8: u32) -> __
pub unsafe fn _mm_mask_srai_epi16(src: __m128i, k: __mmask8, a: __m128i, imm8: u32) -> __m128i {
macro_rules! call {
($imm8:expr) => {
_mm_srai_epi16(a, $imm8)
_mm_srai_epi16::<$imm8>(a)
};
}
let shf = constify_imm8_sae!(imm8, call);
@ -5875,7 +5875,7 @@ pub unsafe fn _mm_mask_srai_epi16(src: __m128i, k: __mmask8, a: __m128i, imm8: u
pub unsafe fn _mm_maskz_srai_epi16(k: __mmask8, a: __m128i, imm8: u32) -> __m128i {
macro_rules! call {
($imm8:expr) => {
_mm_srai_epi16(a, $imm8)
_mm_srai_epi16::<$imm8>(a)
};
}
let shf = constify_imm8_sae!(imm8, call);
@ -7414,7 +7414,7 @@ pub unsafe fn _mm_mask_shufflelo_epi16(
) -> __m128i {
macro_rules! call {
($imm8:expr) => {
_mm_shufflelo_epi16(a, $imm8)
_mm_shufflelo_epi16::<$imm8>(a)
};
}
let shuffle = constify_imm8_sae!(imm8, call);
@ -7431,7 +7431,7 @@ pub unsafe fn _mm_mask_shufflelo_epi16(
pub unsafe fn _mm_maskz_shufflelo_epi16(k: __mmask8, a: __m128i, imm8: i32) -> __m128i {
macro_rules! call {
($imm8:expr) => {
_mm_shufflelo_epi16(a, $imm8)
_mm_shufflelo_epi16::<$imm8>(a)
};
}
let shuffle = constify_imm8_sae!(imm8, call);
@ -7592,7 +7592,7 @@ pub unsafe fn _mm_mask_shufflehi_epi16(
) -> __m128i {
macro_rules! call {
($imm8:expr) => {
_mm_shufflehi_epi16(a, $imm8)
_mm_shufflehi_epi16::<$imm8>(a)
};
}
let shuffle = constify_imm8_sae!(imm8, call);
@ -7609,7 +7609,7 @@ pub unsafe fn _mm_mask_shufflehi_epi16(
pub unsafe fn _mm_maskz_shufflehi_epi16(k: __mmask8, a: __m128i, imm8: i32) -> __m128i {
macro_rules! call {
($imm8:expr) => {
_mm_shufflehi_epi16(a, $imm8)
_mm_shufflehi_epi16::<$imm8>(a)
};
}
let shuffle = constify_imm8_sae!(imm8, call);

View file

@ -19238,7 +19238,7 @@ pub unsafe fn _mm256_maskz_srai_epi32(k: __mmask8, a: __m256i, imm8: u32) -> __m
pub unsafe fn _mm_mask_srai_epi32(src: __m128i, k: __mmask8, a: __m128i, imm8: u32) -> __m128i {
macro_rules! call {
($imm8:expr) => {
_mm_srai_epi32(a, $imm8)
_mm_srai_epi32::<$imm8>(a)
};
}
let shf = constify_imm8_sae!(imm8, call);
@ -19255,7 +19255,7 @@ pub unsafe fn _mm_mask_srai_epi32(src: __m128i, k: __mmask8, a: __m128i, imm8: u
pub unsafe fn _mm_maskz_srai_epi32(k: __mmask8, a: __m128i, imm8: u32) -> __m128i {
macro_rules! call {
($imm8:expr) => {
_mm_srai_epi32(a, $imm8)
_mm_srai_epi32::<$imm8>(a)
};
}
let shf = constify_imm8_sae!(imm8, call);
@ -22495,7 +22495,7 @@ pub unsafe fn _mm_mask_shuffle_epi32(
) -> __m128i {
macro_rules! call {
($imm8:expr) => {
_mm_shuffle_epi32(a, $imm8)
_mm_shuffle_epi32::<$imm8>(a)
};
}
let r = constify_imm8_sae!(imm8, call);
@ -22512,7 +22512,7 @@ pub unsafe fn _mm_mask_shuffle_epi32(
pub unsafe fn _mm_maskz_shuffle_epi32(k: __mmask8, a: __m128i, imm8: _MM_PERM_ENUM) -> __m128i {
macro_rules! call {
($imm8:expr) => {
_mm_shuffle_epi32(a, $imm8)
_mm_shuffle_epi32::<$imm8>(a)
};
}
let r = constify_imm8_sae!(imm8, call);

View file

@ -1010,7 +1010,7 @@ pub const fn _MM_SHUFFLE(z: u32, y: u32, x: u32, w: u32) -> i32 {
#[rustc_legacy_const_generics(2)]
#[stable(feature = "simd_x86", since = "1.27.0")]
pub unsafe fn _mm_shuffle_ps<const mask: i32>(a: __m128, b: __m128) -> __m128 {
static_assert!(mask: i32 where mask >= 0 && mask <= 255);
static_assert_imm8!(mask);
simd_shuffle4(
a,
b,

View file

@ -594,16 +594,11 @@ pub unsafe fn _mm_sll_epi64(a: __m128i, count: __m128i) -> __m128i {
#[inline]
#[target_feature(enable = "sse2")]
#[cfg_attr(test, assert_instr(psraw, imm8 = 1))]
#[rustc_args_required_const(1)]
#[rustc_legacy_const_generics(1)]
#[stable(feature = "simd_x86", since = "1.27.0")]
pub unsafe fn _mm_srai_epi16(a: __m128i, imm8: i32) -> __m128i {
let a = a.as_i16x8();
macro_rules! call {
($imm8:expr) => {
transmute(psraiw(a, $imm8))
};
}
constify_imm8!(imm8, call)
pub unsafe fn _mm_srai_epi16<const imm8: i32>(a: __m128i) -> __m128i {
static_assert_imm8!(imm8);
transmute(psraiw(a.as_i16x8(), imm8))
}
/// Shifts packed 16-bit integers in `a` right by `count` while shifting in sign
@ -625,16 +620,11 @@ pub unsafe fn _mm_sra_epi16(a: __m128i, count: __m128i) -> __m128i {
#[inline]
#[target_feature(enable = "sse2")]
#[cfg_attr(test, assert_instr(psrad, imm8 = 1))]
#[rustc_args_required_const(1)]
#[rustc_legacy_const_generics(1)]
#[stable(feature = "simd_x86", since = "1.27.0")]
pub unsafe fn _mm_srai_epi32(a: __m128i, imm8: i32) -> __m128i {
let a = a.as_i32x4();
macro_rules! call {
($imm8:expr) => {
transmute(psraid(a, $imm8))
};
}
constify_imm8!(imm8, call)
pub unsafe fn _mm_srai_epi32<const imm8: i32>(a: __m128i) -> __m128i {
static_assert_imm8!(imm8);
transmute(psraid(a.as_i32x4(), imm8))
}
/// Shifts packed 32-bit integers in `a` right by `count` while shifting in sign
@ -1461,60 +1451,21 @@ pub unsafe fn _mm_movemask_epi8(a: __m128i) -> i32 {
#[inline]
#[target_feature(enable = "sse2")]
#[cfg_attr(test, assert_instr(pshufd, imm8 = 9))]
#[rustc_args_required_const(1)]
#[rustc_legacy_const_generics(1)]
#[stable(feature = "simd_x86", since = "1.27.0")]
pub unsafe fn _mm_shuffle_epi32(a: __m128i, imm8: i32) -> __m128i {
// simd_shuffleX requires that its selector parameter be made up of
// constant values, but we can't enforce that here. In spirit, we need
// to write a `match` on all possible values of a byte, and for each value,
// hard-code the correct `simd_shuffleX` call using only constants. We
// then hope for LLVM to do the rest.
//
// Of course, that's... awful. So we try to use macros to do it for us.
let imm8 = (imm8 & 0xFF) as u8;
pub unsafe fn _mm_shuffle_epi32<const imm8: i32>(a: __m128i) -> __m128i {
static_assert_imm8!(imm8);
let a = a.as_i32x4();
macro_rules! shuffle_done {
($x01:expr, $x23:expr, $x45:expr, $x67:expr) => {
simd_shuffle4(a, a, [$x01, $x23, $x45, $x67])
};
}
macro_rules! shuffle_x67 {
($x01:expr, $x23:expr, $x45:expr) => {
match (imm8 >> 6) & 0b11 {
0b00 => shuffle_done!($x01, $x23, $x45, 0),
0b01 => shuffle_done!($x01, $x23, $x45, 1),
0b10 => shuffle_done!($x01, $x23, $x45, 2),
_ => shuffle_done!($x01, $x23, $x45, 3),
}
};
}
macro_rules! shuffle_x45 {
($x01:expr, $x23:expr) => {
match (imm8 >> 4) & 0b11 {
0b00 => shuffle_x67!($x01, $x23, 0),
0b01 => shuffle_x67!($x01, $x23, 1),
0b10 => shuffle_x67!($x01, $x23, 2),
_ => shuffle_x67!($x01, $x23, 3),
}
};
}
macro_rules! shuffle_x23 {
($x01:expr) => {
match (imm8 >> 2) & 0b11 {
0b00 => shuffle_x45!($x01, 0),
0b01 => shuffle_x45!($x01, 1),
0b10 => shuffle_x45!($x01, 2),
_ => shuffle_x45!($x01, 3),
}
};
}
let x: i32x4 = match imm8 & 0b11 {
0b00 => shuffle_x23!(0),
0b01 => shuffle_x23!(1),
0b10 => shuffle_x23!(2),
_ => shuffle_x23!(3),
};
let x: i32x4 = simd_shuffle4(
a,
a,
[
imm8 as u32 & 0b11,
(imm8 as u32 >> 2) & 0b11,
(imm8 as u32 >> 4) & 0b11,
(imm8 as u32 >> 6) & 0b11,
],
);
transmute(x)
}
@ -1528,53 +1479,25 @@ pub unsafe fn _mm_shuffle_epi32(a: __m128i, imm8: i32) -> __m128i {
#[inline]
#[target_feature(enable = "sse2")]
#[cfg_attr(test, assert_instr(pshufhw, imm8 = 9))]
#[rustc_args_required_const(1)]
#[rustc_legacy_const_generics(1)]
#[stable(feature = "simd_x86", since = "1.27.0")]
pub unsafe fn _mm_shufflehi_epi16(a: __m128i, imm8: i32) -> __m128i {
// See _mm_shuffle_epi32.
let imm8 = (imm8 & 0xFF) as u8;
pub unsafe fn _mm_shufflehi_epi16<const imm8: i32>(a: __m128i) -> __m128i {
static_assert_imm8!(imm8);
let a = a.as_i16x8();
macro_rules! shuffle_done {
($x01:expr, $x23:expr, $x45:expr, $x67:expr) => {
simd_shuffle8(a, a, [0, 1, 2, 3, $x01 + 4, $x23 + 4, $x45 + 4, $x67 + 4])
};
}
macro_rules! shuffle_x67 {
($x01:expr, $x23:expr, $x45:expr) => {
match (imm8 >> 6) & 0b11 {
0b00 => shuffle_done!($x01, $x23, $x45, 0),
0b01 => shuffle_done!($x01, $x23, $x45, 1),
0b10 => shuffle_done!($x01, $x23, $x45, 2),
_ => shuffle_done!($x01, $x23, $x45, 3),
}
};
}
macro_rules! shuffle_x45 {
($x01:expr, $x23:expr) => {
match (imm8 >> 4) & 0b11 {
0b00 => shuffle_x67!($x01, $x23, 0),
0b01 => shuffle_x67!($x01, $x23, 1),
0b10 => shuffle_x67!($x01, $x23, 2),
_ => shuffle_x67!($x01, $x23, 3),
}
};
}
macro_rules! shuffle_x23 {
($x01:expr) => {
match (imm8 >> 2) & 0b11 {
0b00 => shuffle_x45!($x01, 0),
0b01 => shuffle_x45!($x01, 1),
0b10 => shuffle_x45!($x01, 2),
_ => shuffle_x45!($x01, 3),
}
};
}
let x: i16x8 = match imm8 & 0b11 {
0b00 => shuffle_x23!(0),
0b01 => shuffle_x23!(1),
0b10 => shuffle_x23!(2),
_ => shuffle_x23!(3),
};
let x: i16x8 = simd_shuffle8(
a,
a,
[
0,
1,
2,
3,
(imm8 as u32 & 0b11) + 4,
((imm8 as u32 >> 2) & 0b11) + 4,
((imm8 as u32 >> 4) & 0b11) + 4,
((imm8 as u32 >> 6) & 0b11) + 4,
],
);
transmute(x)
}
@ -1588,54 +1511,25 @@ pub unsafe fn _mm_shufflehi_epi16(a: __m128i, imm8: i32) -> __m128i {
#[inline]
#[target_feature(enable = "sse2")]
#[cfg_attr(test, assert_instr(pshuflw, imm8 = 9))]
#[rustc_args_required_const(1)]
#[rustc_legacy_const_generics(1)]
#[stable(feature = "simd_x86", since = "1.27.0")]
pub unsafe fn _mm_shufflelo_epi16(a: __m128i, imm8: i32) -> __m128i {
// See _mm_shuffle_epi32.
let imm8 = (imm8 & 0xFF) as u8;
pub unsafe fn _mm_shufflelo_epi16<const imm8: i32>(a: __m128i) -> __m128i {
static_assert_imm8!(imm8);
let a = a.as_i16x8();
macro_rules! shuffle_done {
($x01:expr, $x23:expr, $x45:expr, $x67:expr) => {
simd_shuffle8(a, a, [$x01, $x23, $x45, $x67, 4, 5, 6, 7])
};
}
macro_rules! shuffle_x67 {
($x01:expr, $x23:expr, $x45:expr) => {
match (imm8 >> 6) & 0b11 {
0b00 => shuffle_done!($x01, $x23, $x45, 0),
0b01 => shuffle_done!($x01, $x23, $x45, 1),
0b10 => shuffle_done!($x01, $x23, $x45, 2),
_ => shuffle_done!($x01, $x23, $x45, 3),
}
};
}
macro_rules! shuffle_x45 {
($x01:expr, $x23:expr) => {
match (imm8 >> 4) & 0b11 {
0b00 => shuffle_x67!($x01, $x23, 0),
0b01 => shuffle_x67!($x01, $x23, 1),
0b10 => shuffle_x67!($x01, $x23, 2),
_ => shuffle_x67!($x01, $x23, 3),
}
};
}
macro_rules! shuffle_x23 {
($x01:expr) => {
match (imm8 >> 2) & 0b11 {
0b00 => shuffle_x45!($x01, 0),
0b01 => shuffle_x45!($x01, 1),
0b10 => shuffle_x45!($x01, 2),
_ => shuffle_x45!($x01, 3),
}
};
}
let x: i16x8 = match imm8 & 0b11 {
0b00 => shuffle_x23!(0),
0b01 => shuffle_x23!(1),
0b10 => shuffle_x23!(2),
_ => shuffle_x23!(3),
};
let x: i16x8 = simd_shuffle8(
a,
a,
[
imm8 as u32 & 0b11,
(imm8 as u32 >> 2) & 0b11,
(imm8 as u32 >> 4) & 0b11,
(imm8 as u32 >> 6) & 0b11,
4,
5,
6,
7,
],
);
transmute(x)
}
@ -3594,7 +3488,7 @@ mod tests {
#[simd_test(enable = "sse2")]
unsafe fn test_mm_srai_epi16() {
let r = _mm_srai_epi16(_mm_set1_epi16(-1), 1);
let r = _mm_srai_epi16::<1>(_mm_set1_epi16(-1));
assert_eq_m128i(r, _mm_set1_epi16(-1));
}
@ -3608,7 +3502,7 @@ mod tests {
#[simd_test(enable = "sse2")]
unsafe fn test_mm_srai_epi32() {
let r = _mm_srai_epi32(_mm_set1_epi32(-1), 1);
let r = _mm_srai_epi32::<1>(_mm_set1_epi32(-1));
assert_eq_m128i(r, _mm_set1_epi32(-1));
}
@ -4107,7 +4001,7 @@ mod tests {
#[simd_test(enable = "sse2")]
unsafe fn test_mm_shuffle_epi32() {
let a = _mm_setr_epi32(5, 10, 15, 20);
let r = _mm_shuffle_epi32(a, 0b00_01_01_11);
let r = _mm_shuffle_epi32::<0b00_01_01_11>(a);
let e = _mm_setr_epi32(20, 10, 10, 5);
assert_eq_m128i(r, e);
}
@ -4115,7 +4009,7 @@ mod tests {
#[simd_test(enable = "sse2")]
unsafe fn test_mm_shufflehi_epi16() {
let a = _mm_setr_epi16(1, 2, 3, 4, 5, 10, 15, 20);
let r = _mm_shufflehi_epi16(a, 0b00_01_01_11);
let r = _mm_shufflehi_epi16::<0b00_01_01_11>(a);
let e = _mm_setr_epi16(1, 2, 3, 4, 20, 10, 10, 5);
assert_eq_m128i(r, e);
}
@ -4123,7 +4017,7 @@ mod tests {
#[simd_test(enable = "sse2")]
unsafe fn test_mm_shufflelo_epi16() {
let a = _mm_setr_epi16(5, 10, 15, 20, 1, 2, 3, 4);
let r = _mm_shufflelo_epi16(a, 0b00_01_01_11);
let r = _mm_shufflelo_epi16::<0b00_01_01_11>(a);
let e = _mm_setr_epi16(20, 10, 10, 5, 1, 2, 3, 4);
assert_eq_m128i(r, e);
}