From f0a9100c7f07dc96068505934d4d755f5b369301 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?R=C3=A9my=20Rakic?= Date: Tue, 9 Mar 2021 23:21:47 +0100 Subject: [PATCH] Convert the last avx512f and avx512vpclmulqdq intrinsics (#1068) --- .../crates/core_arch/src/x86/avx512f.rs | 100 +++++++++--------- .../crates/core_arch/src/x86/avx512vaes.rs | 24 ++--- .../core_arch/src/x86/avx512vpclmulqdq.rs | 94 +++++++--------- .../crates/core_arch/src/x86_64/avx512f.rs | 2 +- 4 files changed, 104 insertions(+), 116 deletions(-) diff --git a/library/stdarch/crates/core_arch/src/x86/avx512f.rs b/library/stdarch/crates/core_arch/src/x86/avx512f.rs index 16313c349a22..1b4ec89a1dea 100644 --- a/library/stdarch/crates/core_arch/src/x86/avx512f.rs +++ b/library/stdarch/crates/core_arch/src/x86/avx512f.rs @@ -21896,57 +21896,57 @@ pub unsafe fn _mm256_maskz_extractf32x4_ps(k: __mmask8, a: __m2 transmute(simd_select_bitmask(k, r.as_f32x4(), zero)) } -/// Extract 256 bits (composed of 4 packed 64-bit integers) from a, selected with imm8, and store the result in dst. +/// Extract 256 bits (composed of 4 packed 64-bit integers) from a, selected with IMM1, and store the result in dst. /// /// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm512_extracti64x4_epi64&expand=2473) #[inline] #[target_feature(enable = "avx512f")] #[cfg_attr( all(test, not(target_os = "windows")), - assert_instr(vextractf64x4, imm8 = 1) //should be vextracti64x4 + assert_instr(vextractf64x4, IMM1 = 1) //should be vextracti64x4 )] -#[rustc_args_required_const(1)] -pub unsafe fn _mm512_extracti64x4_epi64(a: __m512i, imm8: i32) -> __m256i { - assert!(imm8 >= 0 && imm8 <= 1); - match imm8 & 0x1 { +#[rustc_legacy_const_generics(1)] +pub unsafe fn _mm512_extracti64x4_epi64(a: __m512i) -> __m256i { + static_assert_imm1!(IMM1); + match IMM1 { 0 => simd_shuffle4(a, _mm512_set1_epi64(0), [0, 1, 2, 3]), _ => simd_shuffle4(a, _mm512_set1_epi64(0), [4, 5, 6, 7]), } } -/// Extract 256 bits (composed of 4 packed 64-bit integers) from a, selected with imm8, and store the results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set). +/// Extract 256 bits (composed of 4 packed 64-bit integers) from a, selected with IMM1, and store the results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set). /// /// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm512_mask_extracti64x4_epi64&expand=2474) #[inline] #[target_feature(enable = "avx512f")] #[cfg_attr( all(test, not(target_os = "windows")), - assert_instr(vextracti64x4, IMM8 = 1) + assert_instr(vextracti64x4, IMM1 = 1) )] #[rustc_legacy_const_generics(3)] -pub unsafe fn _mm512_mask_extracti64x4_epi64( +pub unsafe fn _mm512_mask_extracti64x4_epi64( src: __m256i, k: __mmask8, a: __m512i, ) -> __m256i { - static_assert_imm1!(IMM8); - let r = _mm512_extracti64x4_epi64(a, IMM8); + static_assert_imm1!(IMM1); + let r = _mm512_extracti64x4_epi64::(a); transmute(simd_select_bitmask(k, r.as_i64x4(), src.as_i64x4())) } -/// Extract 256 bits (composed of 4 packed 64-bit integers) from a, selected with imm8, and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set). +/// Extract 256 bits (composed of 4 packed 64-bit integers) from a, selected with IMM1, and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set). /// /// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm512_maskz_extracti64x4_epi64&expand=2475) #[inline] #[target_feature(enable = "avx512f")] #[cfg_attr( all(test, not(target_os = "windows")), - assert_instr(vextracti64x4, IMM8 = 1) + assert_instr(vextracti64x4, IMM1 = 1) )] #[rustc_legacy_const_generics(2)] -pub unsafe fn _mm512_maskz_extracti64x4_epi64(k: __mmask8, a: __m512i) -> __m256i { - static_assert_imm1!(IMM8); - let r = _mm512_extracti64x4_epi64(a, IMM8); +pub unsafe fn _mm512_maskz_extracti64x4_epi64(k: __mmask8, a: __m512i) -> __m256i { + static_assert_imm1!(IMM1); + let r = _mm512_extracti64x4_epi64::(a); let zero = _mm256_setzero_si256().as_i64x4(); transmute(simd_select_bitmask(k, r.as_i64x4(), zero)) } @@ -22006,21 +22006,21 @@ pub unsafe fn _mm512_maskz_extractf64x4_pd(k: __mmask8, a: __m5 transmute(simd_select_bitmask(k, r.as_f64x4(), zero)) } -/// Extract 128 bits (composed of 4 packed 32-bit integers) from a, selected with imm8, and store the result in dst. +/// Extract 128 bits (composed of 4 packed 32-bit integers) from a, selected with IMM2, and store the result in dst. /// /// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm512_extracti32x4_epi32&expand=2461) #[inline] #[target_feature(enable = "avx512f")] #[cfg_attr( all(test, not(target_os = "windows")), - assert_instr(vextractf32x4, imm8 = 3) //should be vextracti32x4 + assert_instr(vextractf32x4, IMM2 = 3) //should be vextracti32x4 )] -#[rustc_args_required_const(1)] -pub unsafe fn _mm512_extracti32x4_epi32(a: __m512i, imm8: i32) -> __m128i { - assert!(imm8 >= 0 && imm8 <= 3); +#[rustc_legacy_const_generics(1)] +pub unsafe fn _mm512_extracti32x4_epi32(a: __m512i) -> __m128i { + static_assert_imm2!(IMM2); let a = a.as_i32x16(); let undefined = _mm512_undefined_epi32().as_i32x16(); - let extract: i32x4 = match imm8 & 0x3 { + let extract: i32x4 = match IMM2 { 0 => simd_shuffle4(a, undefined, [0, 1, 2, 3]), 1 => simd_shuffle4(a, undefined, [4, 5, 6, 7]), 2 => simd_shuffle4(a, undefined, [8, 9, 10, 11]), @@ -22029,97 +22029,97 @@ pub unsafe fn _mm512_extracti32x4_epi32(a: __m512i, imm8: i32) -> __m128i { transmute(extract) } -/// Extract 128 bits (composed of 4 packed 32-bit integers) from a, selected with imm8, and store the results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set). +/// Extract 128 bits (composed of 4 packed 32-bit integers) from a, selected with IMM2, and store the results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set). /// /// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm512_mask_extracti32x4_epi32&expand=2462) #[inline] #[target_feature(enable = "avx512f")] #[cfg_attr( all(test, not(target_os = "windows")), - assert_instr(vextracti32x4, IMM8 = 3) + assert_instr(vextracti32x4, IMM2 = 3) )] #[rustc_legacy_const_generics(3)] -pub unsafe fn _mm512_mask_extracti32x4_epi32( +pub unsafe fn _mm512_mask_extracti32x4_epi32( src: __m128i, k: __mmask8, a: __m512i, ) -> __m128i { - static_assert_imm2!(IMM8); - let r = _mm512_extracti32x4_epi32(a, IMM8); + static_assert_imm2!(IMM2); + let r = _mm512_extracti32x4_epi32::(a); transmute(simd_select_bitmask(k, r.as_i32x4(), src.as_i32x4())) } -/// Extract 128 bits (composed of 4 packed 32-bit integers) from a, selected with imm8, and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set). +/// Extract 128 bits (composed of 4 packed 32-bit integers) from a, selected with IMM2, and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set). /// /// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm512_maskz_extracti32x4_epi32&expand=2463) #[inline] #[target_feature(enable = "avx512f")] #[cfg_attr( all(test, not(target_os = "windows")), - assert_instr(vextracti32x4, IMM8 = 3) + assert_instr(vextracti32x4, IMM2 = 3) )] #[rustc_legacy_const_generics(2)] -pub unsafe fn _mm512_maskz_extracti32x4_epi32(k: __mmask8, a: __m512i) -> __m128i { - static_assert_imm2!(IMM8); - let r = _mm512_extracti32x4_epi32(a, IMM8); +pub unsafe fn _mm512_maskz_extracti32x4_epi32(k: __mmask8, a: __m512i) -> __m128i { + static_assert_imm2!(IMM2); + let r = _mm512_extracti32x4_epi32::(a); let zero = _mm_setzero_si128().as_i32x4(); transmute(simd_select_bitmask(k, r.as_i32x4(), zero)) } -/// Extract 128 bits (composed of 4 packed 32-bit integers) from a, selected with imm8, and store the result in dst. +/// Extract 128 bits (composed of 4 packed 32-bit integers) from a, selected with IMM1, and store the result in dst. /// /// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm256_extracti32x4_epi32&expand=2458) #[inline] #[target_feature(enable = "avx512f,avx512vl")] #[cfg_attr( all(test, not(target_os = "windows")), - assert_instr(vextract, imm8 = 1) //should be vextracti32x4 + assert_instr(vextract, IMM1 = 1) //should be vextracti32x4 )] -#[rustc_args_required_const(1)] -pub unsafe fn _mm256_extracti32x4_epi32(a: __m256i, imm8: i32) -> __m128i { - assert!(imm8 >= 0 && imm8 <= 1); +#[rustc_legacy_const_generics(1)] +pub unsafe fn _mm256_extracti32x4_epi32(a: __m256i) -> __m128i { + static_assert_imm1!(IMM1); let a = a.as_i32x8(); let undefined = _mm256_undefined_si256().as_i32x8(); - let extract: i32x4 = match imm8 & 0x1 { + let extract: i32x4 = match IMM1 { 0 => simd_shuffle4(a, undefined, [0, 1, 2, 3]), _ => simd_shuffle4(a, undefined, [4, 5, 6, 7]), }; transmute(extract) } -/// Extract 128 bits (composed of 4 packed 32-bit integers) from a, selected with imm8, and store the results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set). +/// Extract 128 bits (composed of 4 packed 32-bit integers) from a, selected with IMM1, and store the results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set). /// /// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm256_mask_extracti32x4_epi32&expand=2459) #[inline] #[target_feature(enable = "avx512f,avx512vl")] #[cfg_attr( all(test, not(target_os = "windows")), - assert_instr(vextracti32x4, IMM8 = 1) + assert_instr(vextracti32x4, IMM1 = 1) )] #[rustc_legacy_const_generics(3)] -pub unsafe fn _mm256_mask_extracti32x4_epi32( +pub unsafe fn _mm256_mask_extracti32x4_epi32( src: __m128i, k: __mmask8, a: __m256i, ) -> __m128i { - static_assert_imm1!(IMM8); - let r = _mm256_extracti32x4_epi32(a, IMM8); + static_assert_imm1!(IMM1); + let r = _mm256_extracti32x4_epi32::(a); transmute(simd_select_bitmask(k, r.as_i32x4(), src.as_i32x4())) } -/// Extract 128 bits (composed of 4 packed 32-bit integers) from a, selected with imm8, and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set). +/// Extract 128 bits (composed of 4 packed 32-bit integers) from a, selected with IMM1, and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set). /// /// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm256_maskz_extracti32x4_epi32&expand=2460) #[inline] #[target_feature(enable = "avx512f,avx512vl")] #[cfg_attr( all(test, not(target_os = "windows")), - assert_instr(vextracti32x4, IMM8 = 1) + assert_instr(vextracti32x4, IMM1 = 1) )] #[rustc_legacy_const_generics(2)] -pub unsafe fn _mm256_maskz_extracti32x4_epi32(k: __mmask8, a: __m256i) -> __m128i { - static_assert_imm1!(IMM8); - let r = _mm256_extracti32x4_epi32(a, IMM8); +pub unsafe fn _mm256_maskz_extracti32x4_epi32(k: __mmask8, a: __m256i) -> __m128i { + static_assert_imm1!(IMM1); + let r = _mm256_extracti32x4_epi32::(a); let zero = _mm_setzero_si128().as_i32x4(); transmute(simd_select_bitmask(k, r.as_i32x4(), zero)) } @@ -46698,7 +46698,7 @@ mod tests { #[simd_test(enable = "avx512f")] unsafe fn test_mm512_extracti32x4_epi32() { let a = _mm512_setr_epi32(1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16); - let r = _mm512_extracti32x4_epi32(a, 0b1); + let r = _mm512_extracti32x4_epi32::<1>(a); let e = _mm_setr_epi32(5, 6, 7, 8); assert_eq_m128i(r, e); } @@ -46727,7 +46727,7 @@ mod tests { #[simd_test(enable = "avx512f,avx512vl")] unsafe fn test_mm256_extracti32x4_epi32() { let a = _mm256_set_epi32(1, 2, 3, 4, 5, 6, 7, 8); - let r = _mm256_extracti32x4_epi32(a, 0b1); + let r = _mm256_extracti32x4_epi32::<1>(a); let e = _mm_set_epi32(1, 2, 3, 4); assert_eq_m128i(r, e); } diff --git a/library/stdarch/crates/core_arch/src/x86/avx512vaes.rs b/library/stdarch/crates/core_arch/src/x86/avx512vaes.rs index 8479d64ee666..676de312b375 100644 --- a/library/stdarch/crates/core_arch/src/x86/avx512vaes.rs +++ b/library/stdarch/crates/core_arch/src/x86/avx512vaes.rs @@ -262,24 +262,24 @@ mod tests { 0x19BE9F660038BDB5, ); let mut a_decomp = [_mm_setzero_si128(); 4]; - a_decomp[0] = _mm512_extracti32x4_epi32(a, 0); - a_decomp[1] = _mm512_extracti32x4_epi32(a, 1); - a_decomp[2] = _mm512_extracti32x4_epi32(a, 2); - a_decomp[3] = _mm512_extracti32x4_epi32(a, 3); + a_decomp[0] = _mm512_extracti32x4_epi32::<0>(a); + a_decomp[1] = _mm512_extracti32x4_epi32::<1>(a); + a_decomp[2] = _mm512_extracti32x4_epi32::<2>(a); + a_decomp[3] = _mm512_extracti32x4_epi32::<3>(a); let mut k_decomp = [_mm_setzero_si128(); 4]; - k_decomp[0] = _mm512_extracti32x4_epi32(k, 0); - k_decomp[1] = _mm512_extracti32x4_epi32(k, 1); - k_decomp[2] = _mm512_extracti32x4_epi32(k, 2); - k_decomp[3] = _mm512_extracti32x4_epi32(k, 3); + k_decomp[0] = _mm512_extracti32x4_epi32::<0>(k); + k_decomp[1] = _mm512_extracti32x4_epi32::<1>(k); + k_decomp[2] = _mm512_extracti32x4_epi32::<2>(k); + k_decomp[3] = _mm512_extracti32x4_epi32::<3>(k); let r = vectorized(a, k); let mut e_decomp = [_mm_setzero_si128(); 4]; for i in 0..4 { e_decomp[i] = linear(a_decomp[i], k_decomp[i]); } - assert_eq_m128i(_mm512_extracti32x4_epi32(r, 0), e_decomp[0]); - assert_eq_m128i(_mm512_extracti32x4_epi32(r, 1), e_decomp[1]); - assert_eq_m128i(_mm512_extracti32x4_epi32(r, 2), e_decomp[2]); - assert_eq_m128i(_mm512_extracti32x4_epi32(r, 3), e_decomp[3]); + assert_eq_m128i(_mm512_extracti32x4_epi32::<0>(r), e_decomp[0]); + assert_eq_m128i(_mm512_extracti32x4_epi32::<1>(r), e_decomp[1]); + assert_eq_m128i(_mm512_extracti32x4_epi32::<2>(r), e_decomp[2]); + assert_eq_m128i(_mm512_extracti32x4_epi32::<3>(r), e_decomp[3]); } #[simd_test(enable = "avx512vaes,avx512f")] diff --git a/library/stdarch/crates/core_arch/src/x86/avx512vpclmulqdq.rs b/library/stdarch/crates/core_arch/src/x86/avx512vpclmulqdq.rs index cd6476482feb..3256c8dba063 100644 --- a/library/stdarch/crates/core_arch/src/x86/avx512vpclmulqdq.rs +++ b/library/stdarch/crates/core_arch/src/x86/avx512vpclmulqdq.rs @@ -34,15 +34,11 @@ extern "C" { #[inline] #[target_feature(enable = "avx512vpclmulqdq,avx512f")] // technically according to Intel's documentation we don't need avx512f here, however LLVM gets confused otherwise -#[cfg_attr(test, assert_instr(vpclmul, imm8 = 0))] -#[rustc_args_required_const(2)] -pub unsafe fn _mm512_clmulepi64_epi128(a: __m512i, b: __m512i, imm8: i32) -> __m512i { - macro_rules! call { - ($imm8:expr) => { - pclmulqdq_512(a, b, $imm8) - }; - } - constify_imm8!(imm8, call) +#[cfg_attr(test, assert_instr(vpclmul, IMM8 = 0))] +#[rustc_legacy_const_generics(2)] +pub unsafe fn _mm512_clmulepi64_epi128(a: __m512i, b: __m512i) -> __m512i { + static_assert_imm8!(IMM8); + pclmulqdq_512(a, b, IMM8 as u8) } /// Performs a carry-less multiplication of two 64-bit polynomials over the @@ -55,15 +51,11 @@ pub unsafe fn _mm512_clmulepi64_epi128(a: __m512i, b: __m512i, imm8: i32) -> __m /// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm256_clmulepi64_epi128) #[inline] #[target_feature(enable = "avx512vpclmulqdq,avx512vl")] -#[cfg_attr(test, assert_instr(vpclmul, imm8 = 0))] -#[rustc_args_required_const(2)] -pub unsafe fn _mm256_clmulepi64_epi128(a: __m256i, b: __m256i, imm8: i32) -> __m256i { - macro_rules! call { - ($imm8:expr) => { - pclmulqdq_256(a, b, $imm8) - }; - } - constify_imm8!(imm8, call) +#[cfg_attr(test, assert_instr(vpclmul, IMM8 = 0))] +#[rustc_legacy_const_generics(2)] +pub unsafe fn _mm256_clmulepi64_epi128(a: __m256i, b: __m256i) -> __m256i { + static_assert_imm8!(IMM8); + pclmulqdq_256(a, b, IMM8 as u8) } #[cfg(test)] @@ -93,37 +85,33 @@ mod tests { let r11 = _mm_set_epi64x(0x1d1e1f2c592e7c45, 0xd66ee03e410fd4ed); let r11 = $broadcast(r11); - $assert($clmul(a, b, 0x00), r00); - $assert($clmul(a, b, 0x10), r01); - $assert($clmul(a, b, 0x01), r10); - $assert($clmul(a, b, 0x11), r11); + $assert($clmul::<0x00>(a, b), r00); + $assert($clmul::<0x10>(a, b), r01); + $assert($clmul::<0x01>(a, b), r10); + $assert($clmul::<0x11>(a, b), r11); let a0 = _mm_set_epi64x(0x0000000000000000, 0x8000000000000000); let a0 = $broadcast(a0); let r = _mm_set_epi64x(0x4000000000000000, 0x0000000000000000); let r = $broadcast(r); - $assert($clmul(a0, a0, 0x00), r); + $assert($clmul::<0x00>(a0, a0), r); } } macro_rules! unroll { - ($target:ident[4] = $op:ident($source:ident,4);) => { - $target[3] = $op($source, 3); - $target[2] = $op($source, 2); - unroll! {$target[2] = $op($source,2);} + ($target:ident[4] = $op:ident::<4>($source:ident);) => { + $target[3] = $op::<3>($source); + $target[2] = $op::<2>($source); + unroll! {$target[2] = $op::<2>($source);} }; - ($target:ident[2] = $op:ident($source:ident,2);) => { - $target[1] = $op($source, 1); - $target[0] = $op($source, 0); + ($target:ident[2] = $op:ident::<2>($source:ident);) => { + $target[1] = $op::<1>($source); + $target[0] = $op::<0>($source); }; - (assert_eq_m128i($op:ident($vec_res:ident,4),$lin_res:ident[4]);) => { - assert_eq_m128i($op($vec_res, 3), $lin_res[3]); - assert_eq_m128i($op($vec_res, 2), $lin_res[2]); - unroll! {assert_eq_m128i($op($vec_res,2),$lin_res[2]);} - }; - (assert_eq_m128i($op:ident($vec_res:ident,2),$lin_res:ident[2]);) => { - assert_eq_m128i($op($vec_res, 1), $lin_res[1]); - assert_eq_m128i($op($vec_res, 0), $lin_res[0]); + (assert_eq_m128i($op:ident::<4>($vec_res:ident),$lin_res:ident[4]);) => { + assert_eq_m128i($op::<3>($vec_res), $lin_res[3]); + assert_eq_m128i($op::<2>($vec_res), $lin_res[2]); + unroll! {assert_eq_m128i($op::<2>($vec_res),$lin_res[2]);} }; (assert_eq_m128i($op:ident::<2>($vec_res:ident),$lin_res:ident[2]);) => { assert_eq_m128i($op::<1>($vec_res), $lin_res[1]); @@ -160,16 +148,16 @@ mod tests { ); let mut a_decomp = [_mm_setzero_si128(); 4]; - unroll! {a_decomp[4] = _mm512_extracti32x4_epi32(a,4);} + unroll! {a_decomp[4] = _mm512_extracti32x4_epi32::<4>(a);} let mut b_decomp = [_mm_setzero_si128(); 4]; - unroll! {b_decomp[4] = _mm512_extracti32x4_epi32(b,4);} + unroll! {b_decomp[4] = _mm512_extracti32x4_epi32::<4>(b);} let r = vectorized(a, b); let mut e_decomp = [_mm_setzero_si128(); 4]; for i in 0..4 { e_decomp[i] = linear(a_decomp[i], b_decomp[i]); } - unroll! {assert_eq_m128i(_mm512_extracti32x4_epi32(r,4),e_decomp[4]);} + unroll! {assert_eq_m128i(_mm512_extracti32x4_epi32::<4>(r),e_decomp[4]);} } // this function tests one of the possible 4 instances @@ -201,13 +189,13 @@ mod tests { ); let mut a_decomp = [_mm_setzero_si128(); 2]; - unroll! {a_decomp[2] = _mm512_extracti32x4_epi32(a,2);} + unroll! {a_decomp[2] = _mm512_extracti32x4_epi32::<2>(a);} let mut b_decomp = [_mm_setzero_si128(); 2]; - unroll! {b_decomp[2] = _mm512_extracti32x4_epi32(b,2);} + unroll! {b_decomp[2] = _mm512_extracti32x4_epi32::<2>(b);} let r = vectorized( - _mm512_extracti64x4_epi64(a, 0), - _mm512_extracti64x4_epi64(b, 0), + _mm512_extracti64x4_epi64::<0>(a), + _mm512_extracti64x4_epi64::<0>(b), ); let mut e_decomp = [_mm_setzero_si128(); 2]; for i in 0..2 { @@ -226,19 +214,19 @@ mod tests { verify_512_helper( |a, b| _mm_clmulepi64_si128::<0x00>(a, b), - |a, b| _mm512_clmulepi64_epi128(a, b, 0x00), + |a, b| _mm512_clmulepi64_epi128::<0x00>(a, b), ); verify_512_helper( |a, b| _mm_clmulepi64_si128::<0x01>(a, b), - |a, b| _mm512_clmulepi64_epi128(a, b, 0x01), + |a, b| _mm512_clmulepi64_epi128::<0x01>(a, b), ); verify_512_helper( |a, b| _mm_clmulepi64_si128::<0x10>(a, b), - |a, b| _mm512_clmulepi64_epi128(a, b, 0x10), + |a, b| _mm512_clmulepi64_epi128::<0x10>(a, b), ); verify_512_helper( |a, b| _mm_clmulepi64_si128::<0x11>(a, b), - |a, b| _mm512_clmulepi64_epi128(a, b, 0x11), + |a, b| _mm512_clmulepi64_epi128::<0x11>(a, b), ); } @@ -252,19 +240,19 @@ mod tests { verify_256_helper( |a, b| _mm_clmulepi64_si128::<0x00>(a, b), - |a, b| _mm256_clmulepi64_epi128(a, b, 0x00), + |a, b| _mm256_clmulepi64_epi128::<0x00>(a, b), ); verify_256_helper( |a, b| _mm_clmulepi64_si128::<0x01>(a, b), - |a, b| _mm256_clmulepi64_epi128(a, b, 0x01), + |a, b| _mm256_clmulepi64_epi128::<0x01>(a, b), ); verify_256_helper( |a, b| _mm_clmulepi64_si128::<0x10>(a, b), - |a, b| _mm256_clmulepi64_epi128(a, b, 0x10), + |a, b| _mm256_clmulepi64_epi128::<0x10>(a, b), ); verify_256_helper( |a, b| _mm_clmulepi64_si128::<0x11>(a, b), - |a, b| _mm256_clmulepi64_epi128(a, b, 0x11), + |a, b| _mm256_clmulepi64_epi128::<0x11>(a, b), ); } } diff --git a/library/stdarch/crates/core_arch/src/x86_64/avx512f.rs b/library/stdarch/crates/core_arch/src/x86_64/avx512f.rs index 07070f64758c..471977821c19 100644 --- a/library/stdarch/crates/core_arch/src/x86_64/avx512f.rs +++ b/library/stdarch/crates/core_arch/src/x86_64/avx512f.rs @@ -11182,7 +11182,7 @@ mod tests { #[simd_test(enable = "avx512f")] unsafe fn test_mm512_extracti64x4_epi64() { let a = _mm512_setr_epi64(1, 2, 3, 4, 5, 6, 7, 8); - let r = _mm512_extracti64x4_epi64(a, 0x1); + let r = _mm512_extracti64x4_epi64::<0x1>(a); let e = _mm256_setr_epi64x(5, 6, 7, 8); assert_eq_m256i(r, e); }