convert _mm256_shufflelo_epi16 to const generics
This commit is contained in:
parent
914893b147
commit
28a93bbbad
2 changed files with 29 additions and 52 deletions
|
|
@ -2531,57 +2531,34 @@ pub unsafe fn _mm256_shufflehi_epi16<const IMM8: i32>(a: __m256i) -> __m256i {
|
|||
/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm256_shufflelo_epi16)
|
||||
#[inline]
|
||||
#[target_feature(enable = "avx2")]
|
||||
#[cfg_attr(test, assert_instr(vpshuflw, imm8 = 9))]
|
||||
#[rustc_args_required_const(1)]
|
||||
#[cfg_attr(test, assert_instr(vpshuflw, IMM8 = 9))]
|
||||
#[rustc_legacy_const_generics(1)]
|
||||
#[stable(feature = "simd_x86", since = "1.27.0")]
|
||||
pub unsafe fn _mm256_shufflelo_epi16(a: __m256i, imm8: i32) -> __m256i {
|
||||
let imm8 = (imm8 & 0xFF) as u8;
|
||||
pub unsafe fn _mm256_shufflelo_epi16<const IMM8: i32>(a: __m256i) -> __m256i {
|
||||
static_assert_imm8!(IMM8);
|
||||
let a = a.as_i16x16();
|
||||
macro_rules! shuffle_done {
|
||||
($x01: expr, $x23: expr, $x45: expr, $x67: expr) => {
|
||||
#[rustfmt::skip]
|
||||
simd_shuffle16(a, a, [
|
||||
0+$x01, 0+$x23, 0+$x45, 0+$x67, 4, 5, 6, 7,
|
||||
8+$x01, 8+$x23, 8+$x45, 8+$x67, 12, 13, 14, 15,
|
||||
])
|
||||
};
|
||||
}
|
||||
macro_rules! shuffle_x67 {
|
||||
($x01:expr, $x23:expr, $x45:expr) => {
|
||||
match (imm8 >> 6) & 0b11 {
|
||||
0b00 => shuffle_done!($x01, $x23, $x45, 0),
|
||||
0b01 => shuffle_done!($x01, $x23, $x45, 1),
|
||||
0b10 => shuffle_done!($x01, $x23, $x45, 2),
|
||||
_ => shuffle_done!($x01, $x23, $x45, 3),
|
||||
}
|
||||
};
|
||||
}
|
||||
macro_rules! shuffle_x45 {
|
||||
($x01:expr, $x23:expr) => {
|
||||
match (imm8 >> 4) & 0b11 {
|
||||
0b00 => shuffle_x67!($x01, $x23, 0),
|
||||
0b01 => shuffle_x67!($x01, $x23, 1),
|
||||
0b10 => shuffle_x67!($x01, $x23, 2),
|
||||
_ => shuffle_x67!($x01, $x23, 3),
|
||||
}
|
||||
};
|
||||
}
|
||||
macro_rules! shuffle_x23 {
|
||||
($x01:expr) => {
|
||||
match (imm8 >> 2) & 0b11 {
|
||||
0b00 => shuffle_x45!($x01, 0),
|
||||
0b01 => shuffle_x45!($x01, 1),
|
||||
0b10 => shuffle_x45!($x01, 2),
|
||||
_ => shuffle_x45!($x01, 3),
|
||||
}
|
||||
};
|
||||
}
|
||||
let r: i16x16 = match imm8 & 0b11 {
|
||||
0b00 => shuffle_x23!(0),
|
||||
0b01 => shuffle_x23!(1),
|
||||
0b10 => shuffle_x23!(2),
|
||||
_ => shuffle_x23!(3),
|
||||
};
|
||||
let r: i16x16 = simd_shuffle16(
|
||||
a,
|
||||
a,
|
||||
[
|
||||
0 + (IMM8 as u32 & 0b11),
|
||||
0 + ((IMM8 as u32 >> 2) & 0b11),
|
||||
0 + ((IMM8 as u32 >> 4) & 0b11),
|
||||
0 + ((IMM8 as u32 >> 6) & 0b11),
|
||||
4,
|
||||
5,
|
||||
6,
|
||||
7,
|
||||
8 + (IMM8 as u32 & 0b11),
|
||||
8 + ((IMM8 as u32 >> 2) & 0b11),
|
||||
8 + ((IMM8 as u32 >> 4) & 0b11),
|
||||
8 + ((IMM8 as u32 >> 6) & 0b11),
|
||||
12,
|
||||
13,
|
||||
14,
|
||||
15,
|
||||
],
|
||||
);
|
||||
transmute(r)
|
||||
}
|
||||
|
||||
|
|
@ -4884,7 +4861,7 @@ mod tests {
|
|||
44, 22, 22, 11, 0, 1, 2, 3,
|
||||
88, 66, 66, 55, 4, 5, 6, 7,
|
||||
);
|
||||
let r = _mm256_shufflelo_epi16(a, 0b00_01_01_11);
|
||||
let r = _mm256_shufflelo_epi16::<0b00_01_01_11>(a);
|
||||
assert_eq_m256i(r, e);
|
||||
}
|
||||
|
||||
|
|
|
|||
|
|
@ -7233,7 +7233,7 @@ pub unsafe fn _mm256_mask_shufflelo_epi16<const IMM8: i32>(
|
|||
a: __m256i,
|
||||
) -> __m256i {
|
||||
static_assert_imm8!(IMM8);
|
||||
let shuffle = _mm256_shufflelo_epi16(a, IMM8);
|
||||
let shuffle = _mm256_shufflelo_epi16::<IMM8>(a);
|
||||
transmute(simd_select_bitmask(k, shuffle.as_i16x16(), src.as_i16x16()))
|
||||
}
|
||||
|
||||
|
|
@ -7246,7 +7246,7 @@ pub unsafe fn _mm256_mask_shufflelo_epi16<const IMM8: i32>(
|
|||
#[rustc_legacy_const_generics(2)]
|
||||
pub unsafe fn _mm256_maskz_shufflelo_epi16<const IMM8: i32>(k: __mmask16, a: __m256i) -> __m256i {
|
||||
static_assert_imm8!(IMM8);
|
||||
let shuffle = _mm256_shufflelo_epi16(a, IMM8);
|
||||
let shuffle = _mm256_shufflelo_epi16::<IMM8>(a);
|
||||
let zero = _mm256_setzero_si256().as_i16x16();
|
||||
transmute(simd_select_bitmask(k, shuffle.as_i16x16(), zero))
|
||||
}
|
||||
|
|
|
|||
Loading…
Add table
Add a link
Reference in a new issue