Move code out of constify macros (#973)
This commit is contained in:
parent
6fc888bd95
commit
111adc8ac0
4 changed files with 514 additions and 441 deletions
|
|
@ -1186,11 +1186,12 @@ pub unsafe fn _mm_permutevar_ps(a: __m128, b: __m128i) -> __m128 {
|
|||
#[stable(feature = "simd_x86", since = "1.27.0")]
|
||||
pub unsafe fn _mm256_permute_ps(a: __m256, imm8: i32) -> __m256 {
|
||||
let imm8 = (imm8 & 0xFF) as u8;
|
||||
let undefined = _mm256_undefined_ps();
|
||||
macro_rules! shuffle4 {
|
||||
($a:expr, $b:expr, $c:expr, $d:expr) => {
|
||||
simd_shuffle8(
|
||||
a,
|
||||
_mm256_undefined_ps(),
|
||||
undefined,
|
||||
[$a, $b, $c, $d, $a + 4, $b + 4, $c + 4, $d + 4],
|
||||
)
|
||||
};
|
||||
|
|
@ -1244,9 +1245,10 @@ pub unsafe fn _mm256_permute_ps(a: __m256, imm8: i32) -> __m256 {
|
|||
#[stable(feature = "simd_x86", since = "1.27.0")]
|
||||
pub unsafe fn _mm_permute_ps(a: __m128, imm8: i32) -> __m128 {
|
||||
let imm8 = (imm8 & 0xFF) as u8;
|
||||
let undefined = _mm_undefined_ps();
|
||||
macro_rules! shuffle4 {
|
||||
($a:expr, $b:expr, $c:expr, $d:expr) => {
|
||||
simd_shuffle4(a, _mm_undefined_ps(), [$a, $b, $c, $d])
|
||||
simd_shuffle4(a, undefined, [$a, $b, $c, $d])
|
||||
};
|
||||
}
|
||||
macro_rules! shuffle3 {
|
||||
|
|
@ -1322,9 +1324,10 @@ pub unsafe fn _mm_permutevar_pd(a: __m128d, b: __m128i) -> __m128d {
|
|||
#[stable(feature = "simd_x86", since = "1.27.0")]
|
||||
pub unsafe fn _mm256_permute_pd(a: __m256d, imm8: i32) -> __m256d {
|
||||
let imm8 = (imm8 & 0xFF) as u8;
|
||||
let undefined = _mm256_undefined_pd();
|
||||
macro_rules! shuffle4 {
|
||||
($a:expr, $b:expr, $c:expr, $d:expr) => {
|
||||
simd_shuffle4(a, _mm256_undefined_pd(), [$a, $b, $c, $d])
|
||||
simd_shuffle4(a, undefined, [$a, $b, $c, $d])
|
||||
};
|
||||
}
|
||||
macro_rules! shuffle3 {
|
||||
|
|
@ -1368,9 +1371,10 @@ pub unsafe fn _mm256_permute_pd(a: __m256d, imm8: i32) -> __m256d {
|
|||
#[stable(feature = "simd_x86", since = "1.27.0")]
|
||||
pub unsafe fn _mm_permute_pd(a: __m128d, imm8: i32) -> __m128d {
|
||||
let imm8 = (imm8 & 0xFF) as u8;
|
||||
let undefined = _mm_undefined_pd();
|
||||
macro_rules! shuffle2 {
|
||||
($a:expr, $b:expr) => {
|
||||
simd_shuffle2(a, _mm_undefined_pd(), [$a, $b])
|
||||
simd_shuffle2(a, undefined, [$a, $b])
|
||||
};
|
||||
}
|
||||
macro_rules! shuffle1 {
|
||||
|
|
|
|||
|
|
@ -2737,14 +2737,11 @@ pub unsafe fn _mm512_mask_cmpneq_epi8_mask(k1: __mmask64, a: __m512i, b: __m512i
|
|||
#[rustc_args_required_const(2)]
|
||||
#[cfg_attr(test, assert_instr(vpcmp, imm8 = 0))]
|
||||
pub unsafe fn _mm512_cmp_epu16_mask(a: __m512i, b: __m512i, imm8: i32) -> __mmask32 {
|
||||
let a = a.as_u16x32();
|
||||
let b = b.as_u16x32();
|
||||
macro_rules! call {
|
||||
($imm3:expr) => {
|
||||
vpcmpuw(
|
||||
a.as_u16x32(),
|
||||
b.as_u16x32(),
|
||||
$imm3,
|
||||
0b11111111_11111111_11111111_11111111,
|
||||
)
|
||||
vpcmpuw(a, b, $imm3, 0b11111111_11111111_11111111_11111111)
|
||||
};
|
||||
}
|
||||
let r = constify_imm3!(imm8, call);
|
||||
|
|
@ -2764,9 +2761,11 @@ pub unsafe fn _mm512_mask_cmp_epu16_mask(
|
|||
b: __m512i,
|
||||
imm8: i32,
|
||||
) -> __mmask32 {
|
||||
let a = a.as_u16x32();
|
||||
let b = b.as_u16x32();
|
||||
macro_rules! call {
|
||||
($imm3:expr) => {
|
||||
vpcmpuw(a.as_u16x32(), b.as_u16x32(), $imm3, k1)
|
||||
vpcmpuw(a, b, $imm3, k1)
|
||||
};
|
||||
}
|
||||
let r = constify_imm3!(imm8, call);
|
||||
|
|
@ -2781,11 +2780,13 @@ pub unsafe fn _mm512_mask_cmp_epu16_mask(
|
|||
#[rustc_args_required_const(2)]
|
||||
#[cfg_attr(test, assert_instr(vpcmp, imm8 = 0))]
|
||||
pub unsafe fn _mm512_cmp_epu8_mask(a: __m512i, b: __m512i, imm8: i32) -> __mmask64 {
|
||||
let a = a.as_u8x64();
|
||||
let b = b.as_u8x64();
|
||||
macro_rules! call {
|
||||
($imm3:expr) => {
|
||||
vpcmpub(
|
||||
a.as_u8x64(),
|
||||
b.as_u8x64(),
|
||||
a,
|
||||
b,
|
||||
$imm3,
|
||||
0b11111111_11111111_11111111_11111111_11111111_11111111_11111111_11111111,
|
||||
)
|
||||
|
|
@ -2808,9 +2809,11 @@ pub unsafe fn _mm512_mask_cmp_epu8_mask(
|
|||
b: __m512i,
|
||||
imm8: i32,
|
||||
) -> __mmask64 {
|
||||
let a = a.as_u8x64();
|
||||
let b = b.as_u8x64();
|
||||
macro_rules! call {
|
||||
($imm3:expr) => {
|
||||
vpcmpub(a.as_u8x64(), b.as_u8x64(), $imm3, k1)
|
||||
vpcmpub(a, b, $imm3, k1)
|
||||
};
|
||||
}
|
||||
let r = constify_imm3!(imm8, call);
|
||||
|
|
@ -2825,14 +2828,11 @@ pub unsafe fn _mm512_mask_cmp_epu8_mask(
|
|||
#[rustc_args_required_const(2)]
|
||||
#[cfg_attr(test, assert_instr(vpcmp, imm8 = 0))]
|
||||
pub unsafe fn _mm512_cmp_epi16_mask(a: __m512i, b: __m512i, imm8: i32) -> __mmask32 {
|
||||
let a = a.as_i16x32();
|
||||
let b = b.as_i16x32();
|
||||
macro_rules! call {
|
||||
($imm3:expr) => {
|
||||
vpcmpw(
|
||||
a.as_i16x32(),
|
||||
b.as_i16x32(),
|
||||
$imm3,
|
||||
0b11111111_11111111_11111111_11111111,
|
||||
)
|
||||
vpcmpw(a, b, $imm3, 0b11111111_11111111_11111111_11111111)
|
||||
};
|
||||
}
|
||||
let r = constify_imm3!(imm8, call);
|
||||
|
|
@ -2852,9 +2852,11 @@ pub unsafe fn _mm512_mask_cmp_epi16_mask(
|
|||
b: __m512i,
|
||||
imm8: i32,
|
||||
) -> __mmask32 {
|
||||
let a = a.as_i16x32();
|
||||
let b = b.as_i16x32();
|
||||
macro_rules! call {
|
||||
($imm3:expr) => {
|
||||
vpcmpw(a.as_i16x32(), b.as_i16x32(), $imm3, k1)
|
||||
vpcmpw(a, b, $imm3, k1)
|
||||
};
|
||||
}
|
||||
let r = constify_imm3!(imm8, call);
|
||||
|
|
@ -2869,11 +2871,13 @@ pub unsafe fn _mm512_mask_cmp_epi16_mask(
|
|||
#[rustc_args_required_const(2)]
|
||||
#[cfg_attr(test, assert_instr(vpcmp, imm8 = 0))]
|
||||
pub unsafe fn _mm512_cmp_epi8_mask(a: __m512i, b: __m512i, imm8: i32) -> __mmask64 {
|
||||
let a = a.as_i8x64();
|
||||
let b = b.as_i8x64();
|
||||
macro_rules! call {
|
||||
($imm3:expr) => {
|
||||
vpcmpb(
|
||||
a.as_i8x64(),
|
||||
b.as_i8x64(),
|
||||
a,
|
||||
b,
|
||||
$imm3,
|
||||
0b11111111_11111111_11111111_11111111_11111111_11111111_11111111_11111111,
|
||||
)
|
||||
|
|
@ -2896,9 +2900,11 @@ pub unsafe fn _mm512_mask_cmp_epi8_mask(
|
|||
b: __m512i,
|
||||
imm8: i32,
|
||||
) -> __mmask64 {
|
||||
let a = a.as_i8x64();
|
||||
let b = b.as_i8x64();
|
||||
macro_rules! call {
|
||||
($imm3:expr) => {
|
||||
vpcmpb(a.as_i8x64(), b.as_i8x64(), $imm3, k1)
|
||||
vpcmpb(a, b, $imm3, k1)
|
||||
};
|
||||
}
|
||||
let r = constify_imm3!(imm8, call);
|
||||
|
|
@ -3729,9 +3735,10 @@ pub unsafe fn _mm_maskz_sll_epi16(k: __mmask8, a: __m128i, count: __m128i) -> __
|
|||
#[cfg_attr(test, assert_instr(vpsllw, imm8 = 5))]
|
||||
#[rustc_args_required_const(1)]
|
||||
pub unsafe fn _mm512_slli_epi16(a: __m512i, imm8: u32) -> __m512i {
|
||||
let a = a.as_i16x32();
|
||||
macro_rules! call {
|
||||
($imm8:expr) => {
|
||||
vpslliw(a.as_i16x32(), $imm8)
|
||||
vpslliw(a, $imm8)
|
||||
};
|
||||
}
|
||||
let r = constify_imm8_sae!(imm8, call);
|
||||
|
|
@ -3746,9 +3753,10 @@ pub unsafe fn _mm512_slli_epi16(a: __m512i, imm8: u32) -> __m512i {
|
|||
#[cfg_attr(test, assert_instr(vpsllw, imm8 = 5))]
|
||||
#[rustc_args_required_const(3)]
|
||||
pub unsafe fn _mm512_mask_slli_epi16(src: __m512i, k: __mmask32, a: __m512i, imm8: u32) -> __m512i {
|
||||
let a = a.as_i16x32();
|
||||
macro_rules! call {
|
||||
($imm8:expr) => {
|
||||
vpslliw(a.as_i16x32(), $imm8)
|
||||
vpslliw(a, $imm8)
|
||||
};
|
||||
}
|
||||
let shf = constify_imm8_sae!(imm8, call);
|
||||
|
|
@ -3763,9 +3771,10 @@ pub unsafe fn _mm512_mask_slli_epi16(src: __m512i, k: __mmask32, a: __m512i, imm
|
|||
#[cfg_attr(test, assert_instr(vpsllw, imm8 = 5))]
|
||||
#[rustc_args_required_const(2)]
|
||||
pub unsafe fn _mm512_maskz_slli_epi16(k: __mmask32, a: __m512i, imm8: u32) -> __m512i {
|
||||
let a = a.as_i16x32();
|
||||
macro_rules! call {
|
||||
($imm8:expr) => {
|
||||
vpslliw(a.as_i16x32(), $imm8)
|
||||
vpslliw(a, $imm8)
|
||||
};
|
||||
}
|
||||
let shf = constify_imm8_sae!(imm8, call);
|
||||
|
|
@ -3800,10 +3809,10 @@ pub unsafe fn _mm256_mask_slli_epi16(src: __m256i, k: __mmask16, a: __m256i, imm
|
|||
pub unsafe fn _mm256_maskz_slli_epi16(k: __mmask16, a: __m256i, imm8: u32) -> __m256i {
|
||||
macro_rules! call {
|
||||
($imm8:expr) => {
|
||||
_mm256_slli_epi16(a, $imm8).as_i16x16()
|
||||
_mm256_slli_epi16(a, $imm8)
|
||||
};
|
||||
}
|
||||
let shf = constify_imm8_sae!(imm8, call);
|
||||
let shf = constify_imm8_sae!(imm8, call).as_i16x16();
|
||||
let zero = _mm256_setzero_si256().as_i16x16();
|
||||
transmute(simd_select_bitmask(k, shf, zero))
|
||||
}
|
||||
|
|
@ -3818,10 +3827,10 @@ pub unsafe fn _mm256_maskz_slli_epi16(k: __mmask16, a: __m256i, imm8: u32) -> __
|
|||
pub unsafe fn _mm_mask_slli_epi16(src: __m128i, k: __mmask8, a: __m128i, imm8: u32) -> __m128i {
|
||||
macro_rules! call {
|
||||
($imm8:expr) => {
|
||||
_mm_slli_epi16(a, $imm8).as_i16x8()
|
||||
_mm_slli_epi16(a, $imm8)
|
||||
};
|
||||
}
|
||||
let shf = constify_imm8_sae!(imm8, call);
|
||||
let shf = constify_imm8_sae!(imm8, call).as_i16x8();
|
||||
transmute(simd_select_bitmask(k, shf, src.as_i16x8()))
|
||||
}
|
||||
|
||||
|
|
@ -3835,10 +3844,10 @@ pub unsafe fn _mm_mask_slli_epi16(src: __m128i, k: __mmask8, a: __m128i, imm8: u
|
|||
pub unsafe fn _mm_maskz_slli_epi16(k: __mmask8, a: __m128i, imm8: u32) -> __m128i {
|
||||
macro_rules! call {
|
||||
($imm8:expr) => {
|
||||
_mm_slli_epi16(a, $imm8).as_i16x8()
|
||||
_mm_slli_epi16(a, $imm8)
|
||||
};
|
||||
}
|
||||
let shf = constify_imm8_sae!(imm8, call);
|
||||
let shf = constify_imm8_sae!(imm8, call).as_i16x8();
|
||||
let zero = _mm_setzero_si128().as_i16x8();
|
||||
transmute(simd_select_bitmask(k, shf, zero))
|
||||
}
|
||||
|
|
@ -4054,9 +4063,10 @@ pub unsafe fn _mm_maskz_srl_epi16(k: __mmask8, a: __m128i, count: __m128i) -> __
|
|||
#[cfg_attr(test, assert_instr(vpsrlw, imm8 = 5))]
|
||||
#[rustc_args_required_const(1)]
|
||||
pub unsafe fn _mm512_srli_epi16(a: __m512i, imm8: u32) -> __m512i {
|
||||
let a = a.as_i16x32();
|
||||
macro_rules! call {
|
||||
($imm8:expr) => {
|
||||
vpsrliw(a.as_i16x32(), $imm8)
|
||||
vpsrliw(a, $imm8)
|
||||
};
|
||||
}
|
||||
let r = constify_imm8_sae!(imm8, call);
|
||||
|
|
@ -4071,9 +4081,10 @@ pub unsafe fn _mm512_srli_epi16(a: __m512i, imm8: u32) -> __m512i {
|
|||
#[cfg_attr(test, assert_instr(vpsrlw, imm8 = 5))]
|
||||
#[rustc_args_required_const(3)]
|
||||
pub unsafe fn _mm512_mask_srli_epi16(src: __m512i, k: __mmask32, a: __m512i, imm8: u32) -> __m512i {
|
||||
let a = a.as_i16x32();
|
||||
macro_rules! call {
|
||||
($imm8:expr) => {
|
||||
vpsrliw(a.as_i16x32(), $imm8)
|
||||
vpsrliw(a, $imm8)
|
||||
};
|
||||
}
|
||||
let shf = constify_imm8_sae!(imm8, call);
|
||||
|
|
@ -4089,9 +4100,10 @@ pub unsafe fn _mm512_mask_srli_epi16(src: __m512i, k: __mmask32, a: __m512i, imm
|
|||
#[rustc_args_required_const(2)]
|
||||
pub unsafe fn _mm512_maskz_srli_epi16(k: __mmask32, a: __m512i, imm8: i32) -> __m512i {
|
||||
//imm8 should be u32, it seems the document to verify is incorrect
|
||||
let a = a.as_i16x32();
|
||||
macro_rules! call {
|
||||
($imm8:expr) => {
|
||||
vpsrliw(a.as_i16x32(), $imm8)
|
||||
vpsrliw(a, $imm8)
|
||||
};
|
||||
}
|
||||
let shf = constify_imm8_sae!(imm8, call);
|
||||
|
|
@ -4109,10 +4121,10 @@ pub unsafe fn _mm512_maskz_srli_epi16(k: __mmask32, a: __m512i, imm8: i32) -> __
|
|||
pub unsafe fn _mm256_mask_srli_epi16(src: __m256i, k: __mmask16, a: __m256i, imm8: i32) -> __m256i {
|
||||
macro_rules! call {
|
||||
($imm8:expr) => {
|
||||
_mm256_srli_epi16(a, $imm8).as_i16x16()
|
||||
_mm256_srli_epi16(a, $imm8)
|
||||
};
|
||||
}
|
||||
let shf = constify_imm8_sae!(imm8, call);
|
||||
let shf = constify_imm8_sae!(imm8, call).as_i16x16();
|
||||
transmute(simd_select_bitmask(k, shf, src.as_i16x16()))
|
||||
}
|
||||
|
||||
|
|
@ -4126,10 +4138,10 @@ pub unsafe fn _mm256_mask_srli_epi16(src: __m256i, k: __mmask16, a: __m256i, imm
|
|||
pub unsafe fn _mm256_maskz_srli_epi16(k: __mmask16, a: __m256i, imm8: i32) -> __m256i {
|
||||
macro_rules! call {
|
||||
($imm8:expr) => {
|
||||
_mm256_srli_epi16(a, $imm8).as_i16x16()
|
||||
_mm256_srli_epi16(a, $imm8)
|
||||
};
|
||||
}
|
||||
let shf = constify_imm8_sae!(imm8, call);
|
||||
let shf = constify_imm8_sae!(imm8, call).as_i16x16();
|
||||
let zero = _mm256_setzero_si256().as_i16x16();
|
||||
transmute(simd_select_bitmask(k, shf, zero))
|
||||
}
|
||||
|
|
@ -4144,10 +4156,10 @@ pub unsafe fn _mm256_maskz_srli_epi16(k: __mmask16, a: __m256i, imm8: i32) -> __
|
|||
pub unsafe fn _mm_mask_srli_epi16(src: __m128i, k: __mmask8, a: __m128i, imm8: i32) -> __m128i {
|
||||
macro_rules! call {
|
||||
($imm8:expr) => {
|
||||
_mm_srli_epi16(a, $imm8).as_i16x8()
|
||||
_mm_srli_epi16(a, $imm8)
|
||||
};
|
||||
}
|
||||
let shf = constify_imm8_sae!(imm8, call);
|
||||
let shf = constify_imm8_sae!(imm8, call).as_i16x8();
|
||||
transmute(simd_select_bitmask(k, shf, src.as_i16x8()))
|
||||
}
|
||||
|
||||
|
|
@ -4161,10 +4173,10 @@ pub unsafe fn _mm_mask_srli_epi16(src: __m128i, k: __mmask8, a: __m128i, imm8: i
|
|||
pub unsafe fn _mm_maskz_srli_epi16(k: __mmask8, a: __m128i, imm8: i32) -> __m128i {
|
||||
macro_rules! call {
|
||||
($imm8:expr) => {
|
||||
_mm_srli_epi16(a, $imm8).as_i16x8()
|
||||
_mm_srli_epi16(a, $imm8)
|
||||
};
|
||||
}
|
||||
let shf = constify_imm8_sae!(imm8, call);
|
||||
let shf = constify_imm8_sae!(imm8, call).as_i16x8();
|
||||
let zero = _mm_setzero_si128().as_i16x8();
|
||||
transmute(simd_select_bitmask(k, shf, zero))
|
||||
}
|
||||
|
|
@ -4380,9 +4392,10 @@ pub unsafe fn _mm_maskz_sra_epi16(k: __mmask8, a: __m128i, count: __m128i) -> __
|
|||
#[cfg_attr(test, assert_instr(vpsraw, imm8 = 1))]
|
||||
#[rustc_args_required_const(1)]
|
||||
pub unsafe fn _mm512_srai_epi16(a: __m512i, imm8: u32) -> __m512i {
|
||||
let a = a.as_i16x32();
|
||||
macro_rules! call {
|
||||
($imm8:expr) => {
|
||||
vpsraiw(a.as_i16x32(), $imm8)
|
||||
vpsraiw(a, $imm8)
|
||||
};
|
||||
}
|
||||
let r = constify_imm8_sae!(imm8, call);
|
||||
|
|
@ -4397,9 +4410,10 @@ pub unsafe fn _mm512_srai_epi16(a: __m512i, imm8: u32) -> __m512i {
|
|||
#[cfg_attr(test, assert_instr(vpsraw, imm8 = 1))]
|
||||
#[rustc_args_required_const(3)]
|
||||
pub unsafe fn _mm512_mask_srai_epi16(src: __m512i, k: __mmask32, a: __m512i, imm8: u32) -> __m512i {
|
||||
let a = a.as_i16x32();
|
||||
macro_rules! call {
|
||||
($imm8:expr) => {
|
||||
vpsraiw(a.as_i16x32(), $imm8)
|
||||
vpsraiw(a, $imm8)
|
||||
};
|
||||
}
|
||||
let shf = constify_imm8_sae!(imm8, call);
|
||||
|
|
@ -4414,9 +4428,10 @@ pub unsafe fn _mm512_mask_srai_epi16(src: __m512i, k: __mmask32, a: __m512i, imm
|
|||
#[cfg_attr(test, assert_instr(vpsraw, imm8 = 1))]
|
||||
#[rustc_args_required_const(2)]
|
||||
pub unsafe fn _mm512_maskz_srai_epi16(k: __mmask32, a: __m512i, imm8: u32) -> __m512i {
|
||||
let a = a.as_i16x32();
|
||||
macro_rules! call {
|
||||
($imm8:expr) => {
|
||||
vpsraiw(a.as_i16x32(), $imm8)
|
||||
vpsraiw(a, $imm8)
|
||||
};
|
||||
}
|
||||
let shf = constify_imm8_sae!(imm8, call);
|
||||
|
|
@ -4434,10 +4449,10 @@ pub unsafe fn _mm512_maskz_srai_epi16(k: __mmask32, a: __m512i, imm8: u32) -> __
|
|||
pub unsafe fn _mm256_mask_srai_epi16(src: __m256i, k: __mmask16, a: __m256i, imm8: u32) -> __m256i {
|
||||
macro_rules! call {
|
||||
($imm8:expr) => {
|
||||
_mm256_srai_epi16(a, $imm8).as_i16x16()
|
||||
_mm256_srai_epi16(a, $imm8)
|
||||
};
|
||||
}
|
||||
let shf = constify_imm8_sae!(imm8, call);
|
||||
let shf = constify_imm8_sae!(imm8, call).as_i16x16();
|
||||
transmute(simd_select_bitmask(k, shf, src.as_i16x16()))
|
||||
}
|
||||
|
||||
|
|
@ -4451,10 +4466,10 @@ pub unsafe fn _mm256_mask_srai_epi16(src: __m256i, k: __mmask16, a: __m256i, imm
|
|||
pub unsafe fn _mm256_maskz_srai_epi16(k: __mmask16, a: __m256i, imm8: u32) -> __m256i {
|
||||
macro_rules! call {
|
||||
($imm8:expr) => {
|
||||
_mm256_srai_epi16(a, $imm8).as_i16x16()
|
||||
_mm256_srai_epi16(a, $imm8)
|
||||
};
|
||||
}
|
||||
let shf = constify_imm8_sae!(imm8, call);
|
||||
let shf = constify_imm8_sae!(imm8, call).as_i16x16();
|
||||
let zero = _mm256_setzero_si256().as_i16x16();
|
||||
transmute(simd_select_bitmask(k, shf, zero))
|
||||
}
|
||||
|
|
@ -4469,10 +4484,10 @@ pub unsafe fn _mm256_maskz_srai_epi16(k: __mmask16, a: __m256i, imm8: u32) -> __
|
|||
pub unsafe fn _mm_mask_srai_epi16(src: __m128i, k: __mmask8, a: __m128i, imm8: u32) -> __m128i {
|
||||
macro_rules! call {
|
||||
($imm8:expr) => {
|
||||
_mm_srai_epi16(a, $imm8).as_i16x8()
|
||||
_mm_srai_epi16(a, $imm8)
|
||||
};
|
||||
}
|
||||
let shf = constify_imm8_sae!(imm8, call);
|
||||
let shf = constify_imm8_sae!(imm8, call).as_i16x8();
|
||||
transmute(simd_select_bitmask(k, shf, src.as_i16x8()))
|
||||
}
|
||||
|
||||
|
|
@ -4486,10 +4501,10 @@ pub unsafe fn _mm_mask_srai_epi16(src: __m128i, k: __mmask8, a: __m128i, imm8: u
|
|||
pub unsafe fn _mm_maskz_srai_epi16(k: __mmask8, a: __m128i, imm8: u32) -> __m128i {
|
||||
macro_rules! call {
|
||||
($imm8:expr) => {
|
||||
_mm_srai_epi16(a, $imm8).as_i16x8()
|
||||
_mm_srai_epi16(a, $imm8)
|
||||
};
|
||||
}
|
||||
let shf = constify_imm8_sae!(imm8, call);
|
||||
let shf = constify_imm8_sae!(imm8, call).as_i16x8();
|
||||
let zero = _mm_setzero_si128().as_i16x8();
|
||||
transmute(simd_select_bitmask(k, shf, zero))
|
||||
}
|
||||
|
|
@ -6430,9 +6445,11 @@ pub unsafe fn _mm512_sad_epu8(a: __m512i, b: __m512i) -> __m512i {
|
|||
#[rustc_args_required_const(2)]
|
||||
#[cfg_attr(test, assert_instr(vdbpsadbw, imm8 = 0))]
|
||||
pub unsafe fn _mm512_dbsad_epu8(a: __m512i, b: __m512i, imm8: i32) -> __m512i {
|
||||
let a = a.as_u8x64();
|
||||
let b = b.as_u8x64();
|
||||
macro_rules! call {
|
||||
($imm8:expr) => {
|
||||
vdbpsadbw(a.as_u8x64(), b.as_u8x64(), $imm8)
|
||||
vdbpsadbw(a, b, $imm8)
|
||||
};
|
||||
}
|
||||
let r = constify_imm8_sae!(imm8, call);
|
||||
|
|
@ -6453,9 +6470,11 @@ pub unsafe fn _mm512_mask_dbsad_epu8(
|
|||
b: __m512i,
|
||||
imm8: i32,
|
||||
) -> __m512i {
|
||||
let a = a.as_u8x64();
|
||||
let b = b.as_u8x64();
|
||||
macro_rules! call {
|
||||
($imm8:expr) => {
|
||||
vdbpsadbw(a.as_u8x64(), b.as_u8x64(), $imm8)
|
||||
vdbpsadbw(a, b, $imm8)
|
||||
};
|
||||
}
|
||||
let r = constify_imm8_sae!(imm8, call);
|
||||
|
|
@ -6470,9 +6489,11 @@ pub unsafe fn _mm512_mask_dbsad_epu8(
|
|||
#[rustc_args_required_const(3)]
|
||||
#[cfg_attr(test, assert_instr(vdbpsadbw, imm8 = 0))]
|
||||
pub unsafe fn _mm512_maskz_dbsad_epu8(k: __mmask32, a: __m512i, b: __m512i, imm8: i32) -> __m512i {
|
||||
let a = a.as_u8x64();
|
||||
let b = b.as_u8x64();
|
||||
macro_rules! call {
|
||||
($imm8:expr) => {
|
||||
vdbpsadbw(a.as_u8x64(), b.as_u8x64(), $imm8)
|
||||
vdbpsadbw(a, b, $imm8)
|
||||
};
|
||||
}
|
||||
let r = constify_imm8_sae!(imm8, call);
|
||||
|
|
|
|||
File diff suppressed because it is too large
Load diff
|
|
@ -239,9 +239,11 @@ pub unsafe fn _mm_maskz_gf2p8mul_epi8(k: __mmask16, a: __m128i, b: __m128i) -> _
|
|||
#[rustc_args_required_const(2)]
|
||||
pub unsafe fn _mm512_gf2p8affine_epi64_epi8(x: __m512i, a: __m512i, b: i32) -> __m512i {
|
||||
assert!(0 <= b && b < 256);
|
||||
let x = x.as_i8x64();
|
||||
let a = a.as_i8x64();
|
||||
macro_rules! call {
|
||||
($imm8:expr) => {
|
||||
vgf2p8affineqb_512(x.as_i8x64(), a.as_i8x64(), $imm8)
|
||||
vgf2p8affineqb_512(x, a, $imm8)
|
||||
};
|
||||
}
|
||||
let r = constify_imm8_sae!(b, call);
|
||||
|
|
@ -269,9 +271,11 @@ pub unsafe fn _mm512_maskz_gf2p8affine_epi64_epi8(
|
|||
) -> __m512i {
|
||||
let zero = _mm512_setzero_si512().as_i8x64();
|
||||
assert!(0 <= b && b < 256);
|
||||
let x = x.as_i8x64();
|
||||
let a = a.as_i8x64();
|
||||
macro_rules! call {
|
||||
($imm8:expr) => {
|
||||
vgf2p8affineqb_512(x.as_i8x64(), a.as_i8x64(), $imm8)
|
||||
vgf2p8affineqb_512(x, a, $imm8)
|
||||
};
|
||||
}
|
||||
let r = constify_imm8_sae!(b, call);
|
||||
|
|
@ -299,9 +303,11 @@ pub unsafe fn _mm512_mask_gf2p8affine_epi64_epi8(
|
|||
b: i32,
|
||||
) -> __m512i {
|
||||
assert!(0 <= b && b < 256);
|
||||
let x = x.as_i8x64();
|
||||
let a = a.as_i8x64();
|
||||
macro_rules! call {
|
||||
($imm8:expr) => {
|
||||
vgf2p8affineqb_512(x.as_i8x64(), a.as_i8x64(), $imm8)
|
||||
vgf2p8affineqb_512(x, a, $imm8)
|
||||
};
|
||||
}
|
||||
let r = constify_imm8_sae!(b, call);
|
||||
|
|
@ -320,9 +326,11 @@ pub unsafe fn _mm512_mask_gf2p8affine_epi64_epi8(
|
|||
#[rustc_args_required_const(2)]
|
||||
pub unsafe fn _mm256_gf2p8affine_epi64_epi8(x: __m256i, a: __m256i, b: i32) -> __m256i {
|
||||
assert!(0 <= b && b < 256);
|
||||
let x = x.as_i8x32();
|
||||
let a = a.as_i8x32();
|
||||
macro_rules! call {
|
||||
($imm8:expr) => {
|
||||
vgf2p8affineqb_256(x.as_i8x32(), a.as_i8x32(), $imm8)
|
||||
vgf2p8affineqb_256(x, a, $imm8)
|
||||
};
|
||||
}
|
||||
let r = constify_imm8_sae!(b, call);
|
||||
|
|
@ -350,9 +358,11 @@ pub unsafe fn _mm256_maskz_gf2p8affine_epi64_epi8(
|
|||
) -> __m256i {
|
||||
let zero = _mm256_setzero_si256().as_i8x32();
|
||||
assert!(0 <= b && b < 256);
|
||||
let x = x.as_i8x32();
|
||||
let a = a.as_i8x32();
|
||||
macro_rules! call {
|
||||
($imm8:expr) => {
|
||||
vgf2p8affineqb_256(x.as_i8x32(), a.as_i8x32(), $imm8)
|
||||
vgf2p8affineqb_256(x, a, $imm8)
|
||||
};
|
||||
}
|
||||
let r = constify_imm8_sae!(b, call);
|
||||
|
|
@ -380,9 +390,11 @@ pub unsafe fn _mm256_mask_gf2p8affine_epi64_epi8(
|
|||
b: i32,
|
||||
) -> __m256i {
|
||||
assert!(0 <= b && b < 256);
|
||||
let x = x.as_i8x32();
|
||||
let a = a.as_i8x32();
|
||||
macro_rules! call {
|
||||
($imm8:expr) => {
|
||||
vgf2p8affineqb_256(x.as_i8x32(), a.as_i8x32(), $imm8)
|
||||
vgf2p8affineqb_256(x, a, $imm8)
|
||||
};
|
||||
}
|
||||
let r = constify_imm8_sae!(b, call);
|
||||
|
|
@ -401,9 +413,11 @@ pub unsafe fn _mm256_mask_gf2p8affine_epi64_epi8(
|
|||
#[rustc_args_required_const(2)]
|
||||
pub unsafe fn _mm_gf2p8affine_epi64_epi8(x: __m128i, a: __m128i, b: i32) -> __m128i {
|
||||
assert!(0 <= b && b < 256);
|
||||
let x = x.as_i8x16();
|
||||
let a = a.as_i8x16();
|
||||
macro_rules! call {
|
||||
($imm8:expr) => {
|
||||
vgf2p8affineqb_128(x.as_i8x16(), a.as_i8x16(), $imm8)
|
||||
vgf2p8affineqb_128(x, a, $imm8)
|
||||
};
|
||||
}
|
||||
let r = constify_imm8_sae!(b, call);
|
||||
|
|
@ -431,9 +445,11 @@ pub unsafe fn _mm_maskz_gf2p8affine_epi64_epi8(
|
|||
) -> __m128i {
|
||||
let zero = _mm_setzero_si128().as_i8x16();
|
||||
assert!(0 <= b && b < 256);
|
||||
let x = x.as_i8x16();
|
||||
let a = a.as_i8x16();
|
||||
macro_rules! call {
|
||||
($imm8:expr) => {
|
||||
vgf2p8affineqb_128(x.as_i8x16(), a.as_i8x16(), $imm8)
|
||||
vgf2p8affineqb_128(x, a, $imm8)
|
||||
};
|
||||
}
|
||||
let r = constify_imm8_sae!(b, call);
|
||||
|
|
@ -461,9 +477,11 @@ pub unsafe fn _mm_mask_gf2p8affine_epi64_epi8(
|
|||
b: i32,
|
||||
) -> __m128i {
|
||||
assert!(0 <= b && b < 256);
|
||||
let x = x.as_i8x16();
|
||||
let a = a.as_i8x16();
|
||||
macro_rules! call {
|
||||
($imm8:expr) => {
|
||||
vgf2p8affineqb_128(x.as_i8x16(), a.as_i8x16(), $imm8)
|
||||
vgf2p8affineqb_128(x, a, $imm8)
|
||||
};
|
||||
}
|
||||
let r = constify_imm8_sae!(b, call);
|
||||
|
|
@ -484,9 +502,11 @@ pub unsafe fn _mm_mask_gf2p8affine_epi64_epi8(
|
|||
#[rustc_args_required_const(2)]
|
||||
pub unsafe fn _mm512_gf2p8affineinv_epi64_epi8(x: __m512i, a: __m512i, b: i32) -> __m512i {
|
||||
assert!(0 <= b && b < 256);
|
||||
let x = x.as_i8x64();
|
||||
let a = a.as_i8x64();
|
||||
macro_rules! call {
|
||||
($imm8:expr) => {
|
||||
vgf2p8affineinvqb_512(x.as_i8x64(), a.as_i8x64(), $imm8)
|
||||
vgf2p8affineinvqb_512(x, a, $imm8)
|
||||
};
|
||||
}
|
||||
let r = constify_imm8_sae!(b, call);
|
||||
|
|
@ -516,9 +536,11 @@ pub unsafe fn _mm512_maskz_gf2p8affineinv_epi64_epi8(
|
|||
) -> __m512i {
|
||||
assert!(0 <= b && b < 256);
|
||||
let zero = _mm512_setzero_si512().as_i8x64();
|
||||
let x = x.as_i8x64();
|
||||
let a = a.as_i8x64();
|
||||
macro_rules! call {
|
||||
($imm8:expr) => {
|
||||
vgf2p8affineinvqb_512(x.as_i8x64(), a.as_i8x64(), $imm8)
|
||||
vgf2p8affineinvqb_512(x, a, $imm8)
|
||||
};
|
||||
}
|
||||
let r = constify_imm8_sae!(b, call);
|
||||
|
|
@ -548,9 +570,11 @@ pub unsafe fn _mm512_mask_gf2p8affineinv_epi64_epi8(
|
|||
b: i32,
|
||||
) -> __m512i {
|
||||
assert!(0 <= b && b < 256);
|
||||
let x = x.as_i8x64();
|
||||
let a = a.as_i8x64();
|
||||
macro_rules! call {
|
||||
($imm8:expr) => {
|
||||
vgf2p8affineinvqb_512(x.as_i8x64(), a.as_i8x64(), $imm8)
|
||||
vgf2p8affineinvqb_512(x, a, $imm8)
|
||||
};
|
||||
}
|
||||
let r = constify_imm8_sae!(b, call);
|
||||
|
|
@ -571,9 +595,11 @@ pub unsafe fn _mm512_mask_gf2p8affineinv_epi64_epi8(
|
|||
#[rustc_args_required_const(2)]
|
||||
pub unsafe fn _mm256_gf2p8affineinv_epi64_epi8(x: __m256i, a: __m256i, b: i32) -> __m256i {
|
||||
assert!(0 <= b && b < 256);
|
||||
let x = x.as_i8x32();
|
||||
let a = a.as_i8x32();
|
||||
macro_rules! call {
|
||||
($imm8:expr) => {
|
||||
vgf2p8affineinvqb_256(x.as_i8x32(), a.as_i8x32(), $imm8)
|
||||
vgf2p8affineinvqb_256(x, a, $imm8)
|
||||
};
|
||||
}
|
||||
let r = constify_imm8_sae!(b, call);
|
||||
|
|
@ -603,9 +629,11 @@ pub unsafe fn _mm256_maskz_gf2p8affineinv_epi64_epi8(
|
|||
) -> __m256i {
|
||||
assert!(0 <= b && b < 256);
|
||||
let zero = _mm256_setzero_si256().as_i8x32();
|
||||
let x = x.as_i8x32();
|
||||
let a = a.as_i8x32();
|
||||
macro_rules! call {
|
||||
($imm8:expr) => {
|
||||
vgf2p8affineinvqb_256(x.as_i8x32(), a.as_i8x32(), $imm8)
|
||||
vgf2p8affineinvqb_256(x, a, $imm8)
|
||||
};
|
||||
}
|
||||
let r = constify_imm8_sae!(b, call);
|
||||
|
|
@ -635,9 +663,11 @@ pub unsafe fn _mm256_mask_gf2p8affineinv_epi64_epi8(
|
|||
b: i32,
|
||||
) -> __m256i {
|
||||
assert!(0 <= b && b < 256);
|
||||
let x = x.as_i8x32();
|
||||
let a = a.as_i8x32();
|
||||
macro_rules! call {
|
||||
($imm8:expr) => {
|
||||
vgf2p8affineinvqb_256(x.as_i8x32(), a.as_i8x32(), $imm8)
|
||||
vgf2p8affineinvqb_256(x, a, $imm8)
|
||||
};
|
||||
}
|
||||
let r = constify_imm8_sae!(b, call);
|
||||
|
|
@ -658,9 +688,11 @@ pub unsafe fn _mm256_mask_gf2p8affineinv_epi64_epi8(
|
|||
#[rustc_args_required_const(2)]
|
||||
pub unsafe fn _mm_gf2p8affineinv_epi64_epi8(x: __m128i, a: __m128i, b: i32) -> __m128i {
|
||||
assert!(0 <= b && b < 256);
|
||||
let x = x.as_i8x16();
|
||||
let a = a.as_i8x16();
|
||||
macro_rules! call {
|
||||
($imm8:expr) => {
|
||||
vgf2p8affineinvqb_128(x.as_i8x16(), a.as_i8x16(), $imm8)
|
||||
vgf2p8affineinvqb_128(x, a, $imm8)
|
||||
};
|
||||
}
|
||||
let r = constify_imm8_sae!(b, call);
|
||||
|
|
@ -690,9 +722,11 @@ pub unsafe fn _mm_maskz_gf2p8affineinv_epi64_epi8(
|
|||
) -> __m128i {
|
||||
assert!(0 <= b && b < 256);
|
||||
let zero = _mm_setzero_si128().as_i8x16();
|
||||
let x = x.as_i8x16();
|
||||
let a = a.as_i8x16();
|
||||
macro_rules! call {
|
||||
($imm8:expr) => {
|
||||
vgf2p8affineinvqb_128(x.as_i8x16(), a.as_i8x16(), $imm8)
|
||||
vgf2p8affineinvqb_128(x, a, $imm8)
|
||||
};
|
||||
}
|
||||
let r = constify_imm8_sae!(b, call);
|
||||
|
|
@ -722,9 +756,11 @@ pub unsafe fn _mm_mask_gf2p8affineinv_epi64_epi8(
|
|||
b: i32,
|
||||
) -> __m128i {
|
||||
assert!(0 <= b && b < 256);
|
||||
let x = x.as_i8x16();
|
||||
let a = a.as_i8x16();
|
||||
macro_rules! call {
|
||||
($imm8:expr) => {
|
||||
vgf2p8affineinvqb_128(x.as_i8x16(), a.as_i8x16(), $imm8)
|
||||
vgf2p8affineinvqb_128(x, a, $imm8)
|
||||
};
|
||||
}
|
||||
let r = constify_imm8_sae!(b, call);
|
||||
|
|
|
|||
Loading…
Add table
Add a link
Reference in a new issue