From 883cedc230f74b7b1a5d3ddfa5eb490b6d3a5b66 Mon Sep 17 00:00:00 2001 From: Tobias Decking Date: Mon, 24 Jun 2024 01:06:28 +0200 Subject: [PATCH] Refactor avx512f: integers --- .../stdarch/crates/core_arch/missing-x86.md | 6 - .../crates/core_arch/src/x86/avx512f.rs | 217 +++++++++++------- .../crates/core_arch/src/x86_64/avx512f.rs | 76 ++++++ 3 files changed, 215 insertions(+), 84 deletions(-) diff --git a/library/stdarch/crates/core_arch/missing-x86.md b/library/stdarch/crates/core_arch/missing-x86.md index 6daab7715d32..cdd1fe7eec6f 100644 --- a/library/stdarch/crates/core_arch/missing-x86.md +++ b/library/stdarch/crates/core_arch/missing-x86.md @@ -204,7 +204,6 @@ * [ ] [`_mm256_mmask_i64gather_epi64`](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm256_mmask_i64gather_epi64) * [ ] [`_mm256_mmask_i64gather_pd`](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm256_mmask_i64gather_pd) * [ ] [`_mm256_mmask_i64gather_ps`](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm256_mmask_i64gather_ps) - * [ ] [`_mm_abs_epi64`](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_abs_epi64) * [ ] [`_mm_i32scatter_epi32`](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_i32scatter_epi32) * [ ] [`_mm_i32scatter_epi64`](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_i32scatter_epi64) * [ ] [`_mm_i32scatter_pd`](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_i32scatter_pd) @@ -213,7 +212,6 @@ * [ ] [`_mm_i64scatter_epi64`](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_i64scatter_epi64) * [ ] [`_mm_i64scatter_pd`](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_i64scatter_pd) * [ ] [`_mm_i64scatter_ps`](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_i64scatter_ps) - * [ ] [`_mm_mask_abs_epi64`](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_mask_abs_epi64) * [ ] [`_mm_mask_i32scatter_epi32`](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_mask_i32scatter_epi32) * [ ] [`_mm_mask_i32scatter_epi64`](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_mask_i32scatter_epi64) * [ ] [`_mm_mask_i32scatter_pd`](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_mask_i32scatter_pd) @@ -222,10 +220,6 @@ * [ ] [`_mm_mask_i64scatter_epi64`](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_mask_i64scatter_epi64) * [ ] [`_mm_mask_i64scatter_pd`](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_mask_i64scatter_pd) * [ ] [`_mm_mask_i64scatter_ps`](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_mask_i64scatter_ps) - * [ ] [`_mm_mask_min_epi64`](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_mask_min_epi64) - * [ ] [`_mm_maskz_abs_epi64`](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_maskz_abs_epi64) - * [ ] [`_mm_maskz_min_epi64`](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_maskz_min_epi64) - * [ ] [`_mm_min_epi64`](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_min_epi64) * [ ] [`_mm_mmask_i32gather_epi32`](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_mmask_i32gather_epi32) * [ ] [`_mm_mmask_i32gather_epi64`](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_mmask_i32gather_epi64) * [ ] [`_mm_mmask_i32gather_pd`](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_mmask_i32gather_pd) diff --git a/library/stdarch/crates/core_arch/src/x86/avx512f.rs b/library/stdarch/crates/core_arch/src/x86/avx512f.rs index 9e6ce92b1707..c2795c6577c0 100644 --- a/library/stdarch/crates/core_arch/src/x86/avx512f.rs +++ b/library/stdarch/crates/core_arch/src/x86/avx512f.rs @@ -49,11 +49,9 @@ use stdarch_test::assert_instr; #[cfg_attr(test, assert_instr(vpabsd))] pub unsafe fn _mm512_abs_epi32(a: __m512i) -> __m512i { let a = a.as_i32x16(); - // all-0 is a properly initialized i32x16 - let zero: i32x16 = mem::zeroed(); - let sub = simd_sub(zero, a); - let cmp: i32x16 = simd_gt(a, zero); - transmute(simd_select(cmp, a, sub)) + let zero = i32x16::splat(0); + let r = simd_select::(simd_lt(a, zero), simd_neg(a), a); + transmute(r) } /// Computes the absolute value of packed 32-bit integers in `a`, and store the @@ -144,11 +142,9 @@ pub unsafe fn _mm_maskz_abs_epi32(k: __mmask8, a: __m128i) -> __m128i { #[cfg_attr(test, assert_instr(vpabsq))] pub unsafe fn _mm512_abs_epi64(a: __m512i) -> __m512i { let a = a.as_i64x8(); - // all-0 is a properly initialized i64x8 - let zero: i64x8 = mem::zeroed(); - let sub = simd_sub(zero, a); - let cmp: i64x8 = simd_gt(a, zero); - transmute(simd_select(cmp, a, sub)) + let zero = i64x8::splat(0); + let r = simd_select::(simd_lt(a, zero), simd_neg(a), a); + transmute(r) } /// Compute the absolute value of packed signed 64-bit integers in a, and store the unsigned results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set). @@ -185,11 +181,9 @@ pub unsafe fn _mm512_maskz_abs_epi64(k: __mmask8, a: __m512i) -> __m512i { #[cfg_attr(test, assert_instr(vpabsq))] pub unsafe fn _mm256_abs_epi64(a: __m256i) -> __m256i { let a = a.as_i64x4(); - // all-0 is a properly initialized i64x4 - let zero: i64x4 = mem::zeroed(); - let sub = simd_sub(zero, a); - let cmp: i64x4 = simd_gt(a, zero); - transmute(simd_select(cmp, a, sub)) + let zero = i64x4::splat(0); + let r = simd_select::(simd_lt(a, zero), simd_neg(a), a); + transmute(r) } /// Compute the absolute value of packed signed 64-bit integers in a, and store the unsigned results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set). @@ -206,7 +200,7 @@ pub unsafe fn _mm256_mask_abs_epi64(src: __m256i, k: __mmask8, a: __m256i) -> __ /// Compute the absolute value of packed signed 64-bit integers in a, and store the unsigned results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set). /// -/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_abs_epi64&expand=45) +/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_maskz_abs_epi64) #[inline] #[target_feature(enable = "avx512f,avx512vl")] #[unstable(feature = "stdarch_x86_avx512", issue = "111137")] @@ -217,6 +211,45 @@ pub unsafe fn _mm256_maskz_abs_epi64(k: __mmask8, a: __m256i) -> __m256i { transmute(simd_select_bitmask(k, abs, zero)) } +/// Compute the absolute value of packed signed 64-bit integers in a, and store the unsigned results in dst. +/// +/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_abs_epi64) +#[inline] +#[target_feature(enable = "avx512f,avx512vl")] +#[unstable(feature = "stdarch_x86_avx512", issue = "111137")] +#[cfg_attr(test, assert_instr(vpabsq))] +pub unsafe fn _mm_abs_epi64(a: __m128i) -> __m128i { + let a = a.as_i64x2(); + let zero = i64x2::splat(0); + let r = simd_select::(simd_lt(a, zero), simd_neg(a), a); + transmute(r) +} + +/// Compute the absolute value of packed signed 64-bit integers in a, and store the unsigned results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set). +/// +/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_mask_abs_epi64) +#[inline] +#[target_feature(enable = "avx512f,avx512vl")] +#[unstable(feature = "stdarch_x86_avx512", issue = "111137")] +#[cfg_attr(test, assert_instr(vpabsq))] +pub unsafe fn _mm_mask_abs_epi64(src: __m128i, k: __mmask8, a: __m128i) -> __m128i { + let abs = _mm_abs_epi64(a).as_i64x2(); + transmute(simd_select_bitmask(k, abs, src.as_i64x2())) +} + +/// Compute the absolute value of packed signed 64-bit integers in a, and store the unsigned results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set). +/// +/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_maskz_abs_epi64) +#[inline] +#[target_feature(enable = "avx512f,avx512vl")] +#[unstable(feature = "stdarch_x86_avx512", issue = "111137")] +#[cfg_attr(test, assert_instr(vpabsq))] +pub unsafe fn _mm_maskz_abs_epi64(k: __mmask8, a: __m128i) -> __m128i { + let abs = _mm_abs_epi64(a).as_i64x2(); + let zero = i64x2::splat(0); + transmute(simd_select_bitmask(k, abs, zero)) +} + /// Finds the absolute value of each packed single-precision (32-bit) floating-point element in v2, storing the results in dst. /// /// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_abs_ps&expand=65) @@ -1265,7 +1298,9 @@ pub unsafe fn _mm_maskz_sub_pd(k: __mmask8, a: __m128d, b: __m128d) -> __m128d { #[unstable(feature = "stdarch_x86_avx512", issue = "111137")] #[cfg_attr(test, assert_instr(vpmuldq))] pub unsafe fn _mm512_mul_epi32(a: __m512i, b: __m512i) -> __m512i { - transmute(vpmuldq(a.as_i32x16(), b.as_i32x16())) + let a = simd_cast::<_, i64x8>(simd_cast::<_, i32x8>(a.as_i64x8())); + let b = simd_cast::<_, i64x8>(simd_cast::<_, i32x8>(b.as_i64x8())); + transmute(simd_mul(a, b)) } /// Multiply the low signed 32-bit integers from each packed 64-bit element in a and b, and store the signed 64-bit results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set). @@ -1477,7 +1512,10 @@ pub unsafe fn _mm512_mask_mullox_epi64( #[unstable(feature = "stdarch_x86_avx512", issue = "111137")] #[cfg_attr(test, assert_instr(vpmuludq))] pub unsafe fn _mm512_mul_epu32(a: __m512i, b: __m512i) -> __m512i { - transmute(vpmuludq(a.as_u32x16(), b.as_u32x16())) + let a = a.as_u64x8(); + let b = b.as_u64x8(); + let mask = u64x8::splat(u32::MAX.into()); + transmute(simd_mul(simd_and(a, mask), simd_and(b, mask))) } /// Multiply the low unsigned 32-bit integers from each packed 64-bit element in a and b, and store the unsigned 64-bit results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set). @@ -1907,7 +1945,9 @@ pub unsafe fn _mm_maskz_div_pd(k: __mmask8, a: __m128d, b: __m128d) -> __m128d { #[unstable(feature = "stdarch_x86_avx512", issue = "111137")] #[cfg_attr(test, assert_instr(vpmaxsd))] pub unsafe fn _mm512_max_epi32(a: __m512i, b: __m512i) -> __m512i { - transmute(vpmaxsd(a.as_i32x16(), b.as_i32x16())) + let a = a.as_i32x16(); + let b = b.as_i32x16(); + transmute(simd_select::(simd_gt(a, b), a, b)) } /// Compare packed signed 32-bit integers in a and b, and store packed maximum values in dst using writemask k (elements are copied from src when the corresponding mask bit is not set). @@ -1993,7 +2033,9 @@ pub unsafe fn _mm_maskz_max_epi32(k: __mmask8, a: __m128i, b: __m128i) -> __m128 #[unstable(feature = "stdarch_x86_avx512", issue = "111137")] #[cfg_attr(test, assert_instr(vpmaxsq))] pub unsafe fn _mm512_max_epi64(a: __m512i, b: __m512i) -> __m512i { - transmute(vpmaxsq(a.as_i64x8(), b.as_i64x8())) + let a = a.as_i64x8(); + let b = b.as_i64x8(); + transmute(simd_select::(simd_gt(a, b), a, b)) } /// Compare packed signed 64-bit integers in a and b, and store packed maximum values in dst using writemask k (elements are copied from src when the corresponding mask bit is not set). @@ -2029,7 +2071,9 @@ pub unsafe fn _mm512_maskz_max_epi64(k: __mmask8, a: __m512i, b: __m512i) -> __m #[unstable(feature = "stdarch_x86_avx512", issue = "111137")] #[cfg_attr(test, assert_instr(vpmaxsq))] pub unsafe fn _mm256_max_epi64(a: __m256i, b: __m256i) -> __m256i { - transmute(vpmaxsq256(a.as_i64x4(), b.as_i64x4())) + let a = a.as_i64x4(); + let b = b.as_i64x4(); + transmute(simd_select::(simd_gt(a, b), a, b)) } /// Compare packed signed 64-bit integers in a and b, and store packed maximum values in dst using writemask k (elements are copied from src when the corresponding mask bit is not set). @@ -2065,7 +2109,9 @@ pub unsafe fn _mm256_maskz_max_epi64(k: __mmask8, a: __m256i, b: __m256i) -> __m #[unstable(feature = "stdarch_x86_avx512", issue = "111137")] #[cfg_attr(test, assert_instr(vpmaxsq))] pub unsafe fn _mm_max_epi64(a: __m128i, b: __m128i) -> __m128i { - transmute(vpmaxsq128(a.as_i64x2(), b.as_i64x2())) + let a = a.as_i64x2(); + let b = b.as_i64x2(); + transmute(simd_select::(simd_gt(a, b), a, b)) } /// Compare packed signed 64-bit integers in a and b, and store packed maximum values in dst using writemask k (elements are copied from src when the corresponding mask bit is not set). @@ -2277,7 +2323,9 @@ pub unsafe fn _mm_maskz_max_pd(k: __mmask8, a: __m128d, b: __m128d) -> __m128d { #[unstable(feature = "stdarch_x86_avx512", issue = "111137")] #[cfg_attr(test, assert_instr(vpmaxud))] pub unsafe fn _mm512_max_epu32(a: __m512i, b: __m512i) -> __m512i { - transmute(vpmaxud(a.as_u32x16(), b.as_u32x16())) + let a = a.as_u32x16(); + let b = b.as_u32x16(); + transmute(simd_select::(simd_gt(a, b), a, b)) } /// Compare packed unsigned 32-bit integers in a and b, and store packed maximum values in dst using writemask k (elements are copied from src when the corresponding mask bit is not set). @@ -2363,7 +2411,9 @@ pub unsafe fn _mm_maskz_max_epu32(k: __mmask8, a: __m128i, b: __m128i) -> __m128 #[unstable(feature = "stdarch_x86_avx512", issue = "111137")] #[cfg_attr(test, assert_instr(vpmaxuq))] pub unsafe fn _mm512_max_epu64(a: __m512i, b: __m512i) -> __m512i { - transmute(vpmaxuq(a.as_u64x8(), b.as_u64x8())) + let a = a.as_u64x8(); + let b = b.as_u64x8(); + transmute(simd_select::(simd_gt(a, b), a, b)) } /// Compare packed unsigned 64-bit integers in a and b, and store packed maximum values in dst using writemask k (elements are copied from src when the corresponding mask bit is not set). @@ -2399,7 +2449,9 @@ pub unsafe fn _mm512_maskz_max_epu64(k: __mmask8, a: __m512i, b: __m512i) -> __m #[unstable(feature = "stdarch_x86_avx512", issue = "111137")] #[cfg_attr(test, assert_instr(vpmaxuq))] pub unsafe fn _mm256_max_epu64(a: __m256i, b: __m256i) -> __m256i { - transmute(vpmaxuq256(a.as_u64x4(), b.as_u64x4())) + let a = a.as_u64x4(); + let b = b.as_u64x4(); + transmute(simd_select::(simd_gt(a, b), a, b)) } /// Compare packed unsigned 64-bit integers in a and b, and store packed maximum values in dst using writemask k (elements are copied from src when the corresponding mask bit is not set). @@ -2435,7 +2487,9 @@ pub unsafe fn _mm256_maskz_max_epu64(k: __mmask8, a: __m256i, b: __m256i) -> __m #[unstable(feature = "stdarch_x86_avx512", issue = "111137")] #[cfg_attr(test, assert_instr(vpmaxuq))] pub unsafe fn _mm_max_epu64(a: __m128i, b: __m128i) -> __m128i { - transmute(vpmaxuq128(a.as_u64x2(), b.as_u64x2())) + let a = a.as_u64x2(); + let b = b.as_u64x2(); + transmute(simd_select::(simd_gt(a, b), a, b)) } /// Compare packed unsigned 64-bit integers in a and b, and store packed maximum values in dst using writemask k (elements are copied from src when the corresponding mask bit is not set). @@ -2471,7 +2525,9 @@ pub unsafe fn _mm_maskz_max_epu64(k: __mmask8, a: __m128i, b: __m128i) -> __m128 #[unstable(feature = "stdarch_x86_avx512", issue = "111137")] #[cfg_attr(test, assert_instr(vpminsd))] pub unsafe fn _mm512_min_epi32(a: __m512i, b: __m512i) -> __m512i { - transmute(vpminsd(a.as_i32x16(), b.as_i32x16())) + let a = a.as_i32x16(); + let b = b.as_i32x16(); + transmute(simd_select::(simd_lt(a, b), a, b)) } /// Compare packed signed 32-bit integers in a and b, and store packed minimum values in dst using writemask k (elements are copied from src when the corresponding mask bit is not set). @@ -2557,7 +2613,9 @@ pub unsafe fn _mm_maskz_min_epi32(k: __mmask8, a: __m128i, b: __m128i) -> __m128 #[unstable(feature = "stdarch_x86_avx512", issue = "111137")] #[cfg_attr(test, assert_instr(vpminsq))] pub unsafe fn _mm512_min_epi64(a: __m512i, b: __m512i) -> __m512i { - transmute(vpminsq(a.as_i64x8(), b.as_i64x8())) + let a = a.as_i64x8(); + let b = b.as_i64x8(); + transmute(simd_select::(simd_lt(a, b), a, b)) } /// Compare packed signed 64-bit integers in a and b, and store packed minimum values in dst using writemask k (elements are copied from src when the corresponding mask bit is not set). @@ -2593,7 +2651,9 @@ pub unsafe fn _mm512_maskz_min_epi64(k: __mmask8, a: __m512i, b: __m512i) -> __m #[unstable(feature = "stdarch_x86_avx512", issue = "111137")] #[cfg_attr(test, assert_instr(vpminsq))] pub unsafe fn _mm256_min_epi64(a: __m256i, b: __m256i) -> __m256i { - transmute(vpminsq256(a.as_i64x4(), b.as_i64x4())) + let a = a.as_i64x4(); + let b = b.as_i64x4(); + transmute(simd_select::(simd_lt(a, b), a, b)) } /// Compare packed signed 64-bit integers in a and b, and store packed minimum values in dst using writemask k (elements are copied from src when the corresponding mask bit is not set). @@ -2621,6 +2681,44 @@ pub unsafe fn _mm256_maskz_min_epi64(k: __mmask8, a: __m256i, b: __m256i) -> __m transmute(simd_select_bitmask(k, min, zero)) } +/// Compare packed signed 64-bit integers in a and b, and store packed minimum values in dst. +/// +/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_min_epi64) +#[inline] +#[target_feature(enable = "avx512f,avx512vl")] +#[unstable(feature = "stdarch_x86_avx512", issue = "111137")] +#[cfg_attr(test, assert_instr(vpminsq))] +pub unsafe fn _mm_min_epi64(a: __m128i, b: __m128i) -> __m128i { + let a = a.as_i64x2(); + let b = b.as_i64x2(); + transmute(simd_select::(simd_lt(a, b), a, b)) +} + +/// Compare packed signed 64-bit integers in a and b, and store packed minimum values in dst using writemask k (elements are copied from src when the corresponding mask bit is not set). +/// +/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_mask_min_epi64) +#[inline] +#[target_feature(enable = "avx512f,avx512vl")] +#[unstable(feature = "stdarch_x86_avx512", issue = "111137")] +#[cfg_attr(test, assert_instr(vpminsq))] +pub unsafe fn _mm_mask_min_epi64(src: __m128i, k: __mmask8, a: __m128i, b: __m128i) -> __m128i { + let min = _mm_min_epi64(a, b).as_i64x2(); + transmute(simd_select_bitmask(k, min, src.as_i64x2())) +} + +/// Compare packed signed 64-bit integers in a and b, and store packed minimum values in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set). +/// +/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_maskz_min_epi64) +#[inline] +#[target_feature(enable = "avx512f,avx512vl")] +#[unstable(feature = "stdarch_x86_avx512", issue = "111137")] +#[cfg_attr(test, assert_instr(vpminsq))] +pub unsafe fn _mm_maskz_min_epi64(k: __mmask8, a: __m128i, b: __m128i) -> __m128i { + let min = _mm_min_epi64(a, b).as_i64x2(); + let zero = _mm_setzero_si128().as_i64x2(); + transmute(simd_select_bitmask(k, min, zero)) +} + /// Compare packed single-precision (32-bit) floating-point elements in a and b, and store packed minimum values in dst. /// /// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_min_ps&expand=3769) @@ -2807,7 +2905,9 @@ pub unsafe fn _mm_maskz_min_pd(k: __mmask8, a: __m128d, b: __m128d) -> __m128d { #[unstable(feature = "stdarch_x86_avx512", issue = "111137")] #[cfg_attr(test, assert_instr(vpminud))] pub unsafe fn _mm512_min_epu32(a: __m512i, b: __m512i) -> __m512i { - transmute(vpminud(a.as_u32x16(), b.as_u32x16())) + let a = a.as_u32x16(); + let b = b.as_u32x16(); + transmute(simd_select::(simd_lt(a, b), a, b)) } /// Compare packed unsigned 32-bit integers in a and b, and store packed minimum values in dst using writemask k (elements are copied from src when the corresponding mask bit is not set). @@ -2893,7 +2993,9 @@ pub unsafe fn _mm_maskz_min_epu32(k: __mmask8, a: __m128i, b: __m128i) -> __m128 #[unstable(feature = "stdarch_x86_avx512", issue = "111137")] #[cfg_attr(test, assert_instr(vpminuq))] pub unsafe fn _mm512_min_epu64(a: __m512i, b: __m512i) -> __m512i { - transmute(vpminuq(a.as_u64x8(), b.as_u64x8())) + let a = a.as_u64x8(); + let b = b.as_u64x8(); + transmute(simd_select::(simd_lt(a, b), a, b)) } /// Compare packed unsigned 64-bit integers in a and b, and store packed minimum values in dst using writemask k (elements are copied from src when the corresponding mask bit is not set). @@ -2929,7 +3031,9 @@ pub unsafe fn _mm512_maskz_min_epu64(k: __mmask8, a: __m512i, b: __m512i) -> __m #[unstable(feature = "stdarch_x86_avx512", issue = "111137")] #[cfg_attr(test, assert_instr(vpminuq))] pub unsafe fn _mm256_min_epu64(a: __m256i, b: __m256i) -> __m256i { - transmute(vpminuq256(a.as_u64x4(), b.as_u64x4())) + let a = a.as_u64x4(); + let b = b.as_u64x4(); + transmute(simd_select::(simd_lt(a, b), a, b)) } /// Compare packed unsigned 64-bit integers in a and b, and store packed minimum values in dst using writemask k (elements are copied from src when the corresponding mask bit is not set). @@ -2965,7 +3069,9 @@ pub unsafe fn _mm256_maskz_min_epu64(k: __mmask8, a: __m256i, b: __m256i) -> __m #[unstable(feature = "stdarch_x86_avx512", issue = "111137")] #[cfg_attr(test, assert_instr(vpminuq))] pub unsafe fn _mm_min_epu64(a: __m128i, b: __m128i) -> __m128i { - transmute(vpminuq128(a.as_u64x2(), b.as_u64x2())) + let a = a.as_u64x2(); + let b = b.as_u64x2(); + transmute(simd_select::(simd_lt(a, b), a, b)) } /// Compare packed unsigned 64-bit integers in a and b, and store packed minimum values in dst using writemask k (elements are copied from src when the corresponding mask bit is not set). @@ -40298,51 +40404,6 @@ pub const _MM_PERM_DDDD: _MM_PERM_ENUM = 0xFF; #[allow(improper_ctypes)] extern "C" { - #[link_name = "llvm.x86.avx512.pmul.dq.512"] - fn vpmuldq(a: i32x16, b: i32x16) -> i64x8; - #[link_name = "llvm.x86.avx512.pmulu.dq.512"] - fn vpmuludq(a: u32x16, b: u32x16) -> u64x8; - - #[link_name = "llvm.x86.avx512.mask.pmaxs.d.512"] - fn vpmaxsd(a: i32x16, b: i32x16) -> i32x16; - - #[link_name = "llvm.x86.avx512.mask.pmaxs.q.512"] - fn vpmaxsq(a: i64x8, b: i64x8) -> i64x8; - #[link_name = "llvm.x86.avx512.mask.pmaxs.q.256"] - fn vpmaxsq256(a: i64x4, b: i64x4) -> i64x4; - #[link_name = "llvm.x86.avx512.mask.pmaxs.q.128"] - fn vpmaxsq128(a: i64x2, b: i64x2) -> i64x2; - - #[link_name = "llvm.x86.avx512.mask.pmins.d.512"] - fn vpminsd(a: i32x16, b: i32x16) -> i32x16; - - #[link_name = "llvm.x86.avx512.mask.pmins.q.512"] - fn vpminsq(a: i64x8, b: i64x8) -> i64x8; - #[link_name = "llvm.x86.avx512.mask.pmins.q.256"] - fn vpminsq256(a: i64x4, b: i64x4) -> i64x4; - #[link_name = "llvm.x86.avx512.mask.pmins.q.128"] - fn vpminsq128(a: i64x2, b: i64x2) -> i64x2; - - #[link_name = "llvm.x86.avx512.mask.pmaxu.d.512"] - fn vpmaxud(a: u32x16, b: u32x16) -> u32x16; - - #[link_name = "llvm.x86.avx512.mask.pmaxu.q.512"] - fn vpmaxuq(a: u64x8, b: u64x8) -> u64x8; - #[link_name = "llvm.x86.avx512.mask.pmaxu.q.256"] - fn vpmaxuq256(a: u64x4, b: u64x4) -> u64x4; - #[link_name = "llvm.x86.avx512.mask.pmaxu.q.128"] - fn vpmaxuq128(a: u64x2, b: u64x2) -> u64x2; - - #[link_name = "llvm.x86.avx512.mask.pminu.d.512"] - fn vpminud(a: u32x16, b: u32x16) -> u32x16; - - #[link_name = "llvm.x86.avx512.mask.pminu.q.512"] - fn vpminuq(a: u64x8, b: u64x8) -> u64x8; - #[link_name = "llvm.x86.avx512.mask.pminu.q.256"] - fn vpminuq256(a: u64x4, b: u64x4) -> u64x4; - #[link_name = "llvm.x86.avx512.mask.pminu.q.128"] - fn vpminuq128(a: u64x2, b: u64x2) -> u64x2; - #[link_name = "llvm.x86.avx512.sqrt.ps.512"] fn vsqrtps(a: f32x16, rounding: i32) -> f32x16; #[link_name = "llvm.x86.avx512.sqrt.pd.512"] diff --git a/library/stdarch/crates/core_arch/src/x86_64/avx512f.rs b/library/stdarch/crates/core_arch/src/x86_64/avx512f.rs index fec18e3ea3eb..a2b2496caf15 100644 --- a/library/stdarch/crates/core_arch/src/x86_64/avx512f.rs +++ b/library/stdarch/crates/core_arch/src/x86_64/avx512f.rs @@ -574,6 +574,46 @@ mod tests { assert_eq_m256i(r, e); } + #[simd_test(enable = "avx512f,avx512vl")] + unsafe fn test_mm_abs_epi64() { + let a = _mm_set_epi64x(i64::MAX, i64::MIN); + let r = _mm_abs_epi64(a); + let e = _mm_set_epi64x(i64::MAX, i64::MAX.wrapping_add(1)); + assert_eq_m128i(r, e); + let a = _mm_set_epi64x(100, -100); + let r = _mm_abs_epi64(a); + let e = _mm_set_epi64x(100, 100); + assert_eq_m128i(r, e); + } + + #[simd_test(enable = "avx512f,avx512vl")] + unsafe fn test_mm_mask_abs_epi64() { + let a = _mm_set_epi64x(i64::MAX, i64::MIN); + let r = _mm_mask_abs_epi64(a, 0, a); + assert_eq_m128i(r, a); + let r = _mm_mask_abs_epi64(a, 0b00000011, a); + let e = _mm_set_epi64x(i64::MAX, i64::MAX.wrapping_add(1)); + assert_eq_m128i(r, e); + let a = _mm_set_epi64x(100, -100); + let r = _mm_mask_abs_epi64(a, 0b00000011, a); + let e = _mm_set_epi64x(100, 100); + assert_eq_m128i(r, e); + } + + #[simd_test(enable = "avx512f,avx512vl")] + unsafe fn test_mm_maskz_abs_epi64() { + let a = _mm_set_epi64x(i64::MAX, i64::MIN); + let r = _mm_maskz_abs_epi64(0, a); + assert_eq_m128i(r, _mm_setzero_si128()); + let r = _mm_maskz_abs_epi64(0b00000011, a); + let e = _mm_set_epi64x(i64::MAX, i64::MAX.wrapping_add(1)); + assert_eq_m128i(r, e); + let a = _mm_set_epi64x(100, -100); + let r = _mm_maskz_abs_epi64(0b00000011, a); + let e = _mm_set_epi64x(100, 100); + assert_eq_m128i(r, e); + } + #[simd_test(enable = "avx512f")] unsafe fn test_mm512_abs_pd() { let a = _mm512_setr_pd(0., 1., -1., f64::MAX, f64::MIN, 100., -100., -32.); @@ -1673,6 +1713,42 @@ mod tests { assert_eq_m256i(r, e); } + #[simd_test(enable = "avx512f,avx512vl")] + unsafe fn test_mm_min_epi64() { + let a = _mm_set_epi64x(0, 1); + let b = _mm_set_epi64x(3, 2); + let r = _mm_min_epi64(a, b); + let e = _mm_set_epi64x(0, 1); + assert_eq_m128i(r, e); + let a = _mm_set_epi64x(2, 3); + let b = _mm_set_epi64x(1, 0); + let r = _mm_min_epi64(a, b); + let e = _mm_set_epi64x(1, 0); + assert_eq_m128i(r, e); + } + + #[simd_test(enable = "avx512f,avx512vl")] + unsafe fn test_mm_mask_min_epi64() { + let a = _mm_set_epi64x(0, 1); + let b = _mm_set_epi64x(3, 2); + let r = _mm_mask_min_epi64(a, 0, a, b); + assert_eq_m128i(r, a); + let r = _mm_mask_min_epi64(a, 0b00000011, a, b); + let e = _mm_set_epi64x(0, 1); + assert_eq_m128i(r, e); + } + + #[simd_test(enable = "avx512f,avx512vl")] + unsafe fn test_mm_maskz_min_epi64() { + let a = _mm_set_epi64x(0, 1); + let b = _mm_set_epi64x(3, 2); + let r = _mm_maskz_min_epi64(0, a, b); + assert_eq_m128i(r, _mm_setzero_si128()); + let r = _mm_maskz_min_epi64(0b00000011, a, b); + let e = _mm_set_epi64x(0, 1); + assert_eq_m128i(r, e); + } + #[simd_test(enable = "avx512f")] unsafe fn test_mm512_min_pd() { let a = _mm512_setr_pd(0., 1., 2., 3., 4., 5., 6., 7.);