Merge pull request #1042 from minybot/avx512

This commit is contained in:
minybot 2021-03-05 15:50:15 -05:00 committed by GitHub
parent 13635198cb
commit da344ab998
No known key found for this signature in database
GPG key ID: 4AEE18F83AFDEB23
6 changed files with 2634 additions and 3791 deletions

View file

@ -2642,74 +2642,25 @@ pub unsafe fn _mm256_shuffle_epi8(a: __m256i, b: __m256i) -> __m256i {
/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm256_shuffle_epi32)
#[inline]
#[target_feature(enable = "avx2")]
#[cfg_attr(test, assert_instr(vpermilps, imm8 = 9))]
#[rustc_args_required_const(1)]
#[cfg_attr(test, assert_instr(vpermilps, MASK = 9))]
#[rustc_legacy_const_generics(1)]
#[stable(feature = "simd_x86", since = "1.27.0")]
pub unsafe fn _mm256_shuffle_epi32(a: __m256i, imm8: i32) -> __m256i {
// simd_shuffleX requires that its selector parameter be made up of
// constant values, but we can't enforce that here. In spirit, we need
// to write a `match` on all possible values of a byte, and for each value,
// hard-code the correct `simd_shuffleX` call using only constants. We
// then hope for LLVM to do the rest.
//
// Of course, that's... awful. So we try to use macros to do it for us.
let imm8 = (imm8 & 0xFF) as u8;
let a = a.as_i32x8();
macro_rules! shuffle_done {
($x01:expr, $x23:expr, $x45:expr, $x67:expr) => {
simd_shuffle8(
a,
a,
[
$x01,
$x23,
$x45,
$x67,
4 + $x01,
4 + $x23,
4 + $x45,
4 + $x67,
],
)
};
}
macro_rules! shuffle_x67 {
($x01:expr, $x23:expr, $x45:expr) => {
match (imm8 >> 6) & 0b11 {
0b00 => shuffle_done!($x01, $x23, $x45, 0),
0b01 => shuffle_done!($x01, $x23, $x45, 1),
0b10 => shuffle_done!($x01, $x23, $x45, 2),
_ => shuffle_done!($x01, $x23, $x45, 3),
}
};
}
macro_rules! shuffle_x45 {
($x01:expr, $x23:expr) => {
match (imm8 >> 4) & 0b11 {
0b00 => shuffle_x67!($x01, $x23, 0),
0b01 => shuffle_x67!($x01, $x23, 1),
0b10 => shuffle_x67!($x01, $x23, 2),
_ => shuffle_x67!($x01, $x23, 3),
}
};
}
macro_rules! shuffle_x23 {
($x01:expr) => {
match (imm8 >> 2) & 0b11 {
0b00 => shuffle_x45!($x01, 0),
0b01 => shuffle_x45!($x01, 1),
0b10 => shuffle_x45!($x01, 2),
_ => shuffle_x45!($x01, 3),
}
};
}
let r: i32x8 = match imm8 & 0b11 {
0b00 => shuffle_x23!(0),
0b01 => shuffle_x23!(1),
0b10 => shuffle_x23!(2),
_ => shuffle_x23!(3),
};
pub unsafe fn _mm256_shuffle_epi32<const MASK: i32>(a: __m256i) -> __m256i {
static_assert_imm8!(MASK);
let r: i32x8 = simd_shuffle8(
a.as_i32x8(),
a.as_i32x8(),
[
MASK as u32 & 0b11,
(MASK as u32 >> 2) & 0b11,
(MASK as u32 >> 4) & 0b11,
(MASK as u32 >> 6) & 0b11,
(MASK as u32 & 0b11) + 4,
((MASK as u32 >> 2) & 0b11) + 4,
((MASK as u32 >> 4) & 0b11) + 4,
((MASK as u32 >> 6) & 0b11) + 4,
],
);
transmute(r)
}

File diff suppressed because it is too large Load diff

File diff suppressed because it is too large Load diff

View file

@ -189,37 +189,6 @@ macro_rules! constify_imm8_gather {
};
}
// For round instructions, the only valid values for rounding are 4, 8, 9, 10 and 11.
// This macro enforces that.
#[allow(unused)]
macro_rules! constify_imm4_round {
($imm8:expr, $expand:ident) => {
#[allow(overflowing_literals)]
match ($imm8) & 0b1111 {
4 => $expand!(4),
8 => $expand!(8),
9 => $expand!(9),
10 => $expand!(10),
11 => $expand!(11),
_ => panic!("Invalid round value"),
}
};
}
// For sae instructions, the only valid values for sae are 4 and 8.
// This macro enforces that.
#[allow(unused)]
macro_rules! constify_imm4_sae {
($imm8:expr, $expand:ident) => {
#[allow(overflowing_literals)]
match ($imm8) & 0b1111 {
4 => $expand!(4),
8 => $expand!(8),
_ => panic!("Invalid sae value"),
}
};
}
// Two mantissas parameters.
// This macro enforces that.
#[allow(unused)]

File diff suppressed because it is too large Load diff

View file

@ -1,32 +1,33 @@
//! Utility macros.
// For round instructions, the only valid values for rounding are 4, 8, 9, 10 and 11.
// This macro enforces that.
#[allow(unused)]
macro_rules! constify_imm4_round {
($imm8:expr, $expand:ident) => {
#[allow(overflowing_literals)]
match ($imm8) & 0b1111 {
4 => $expand!(4),
8 => $expand!(8),
9 => $expand!(9),
10 => $expand!(10),
11 => $expand!(11),
_ => panic!("Invalid round value"),
}
// Helper struct used to trigger const eval errors when the const generic immediate value `imm` is
// not a round number.
pub(crate) struct ValidateConstRound<const IMM: i32>;
impl<const IMM: i32> ValidateConstRound<IMM> {
pub(crate) const VALID: () = {
let _ = 1 / ((IMM == 4 || IMM == 8 || IMM == 9 || IMM == 10 || IMM == 11) as usize);
};
}
// For sae instructions, the only valid values for sae are 4 and 8.
// This macro enforces that.
#[allow(unused)]
macro_rules! constify_imm4_sae {
($imm8:expr, $expand:ident) => {
#[allow(overflowing_literals)]
match ($imm8) & 0b1111 {
4 => $expand!(4),
8 => $expand!(8),
_ => panic!("Invalid sae value"),
}
macro_rules! static_assert_rounding {
($imm:ident) => {
let _ = $crate::core_arch::x86_64::macros::ValidateConstRound::<$imm>::VALID;
};
}
// Helper struct used to trigger const eval errors when the const generic immediate value `imm` is
// not a sae number.
pub(crate) struct ValidateConstSae<const IMM: i32>;
impl<const IMM: i32> ValidateConstSae<IMM> {
pub(crate) const VALID: () = {
let _ = 1 / ((IMM == 4 || IMM == 8) as usize);
};
}
#[allow(unused)]
macro_rules! static_assert_sae {
($imm:ident) => {
let _ = $crate::core_arch::x86_64::macros::ValidateConstSae::<$imm>::VALID;
};
}