Refactor avx512f: integers
This commit is contained in:
parent
0d9520dfd4
commit
883cedc230
3 changed files with 215 additions and 84 deletions
|
|
@ -204,7 +204,6 @@
|
|||
* [ ] [`_mm256_mmask_i64gather_epi64`](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm256_mmask_i64gather_epi64)
|
||||
* [ ] [`_mm256_mmask_i64gather_pd`](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm256_mmask_i64gather_pd)
|
||||
* [ ] [`_mm256_mmask_i64gather_ps`](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm256_mmask_i64gather_ps)
|
||||
* [ ] [`_mm_abs_epi64`](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_abs_epi64)
|
||||
* [ ] [`_mm_i32scatter_epi32`](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_i32scatter_epi32)
|
||||
* [ ] [`_mm_i32scatter_epi64`](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_i32scatter_epi64)
|
||||
* [ ] [`_mm_i32scatter_pd`](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_i32scatter_pd)
|
||||
|
|
@ -213,7 +212,6 @@
|
|||
* [ ] [`_mm_i64scatter_epi64`](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_i64scatter_epi64)
|
||||
* [ ] [`_mm_i64scatter_pd`](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_i64scatter_pd)
|
||||
* [ ] [`_mm_i64scatter_ps`](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_i64scatter_ps)
|
||||
* [ ] [`_mm_mask_abs_epi64`](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_mask_abs_epi64)
|
||||
* [ ] [`_mm_mask_i32scatter_epi32`](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_mask_i32scatter_epi32)
|
||||
* [ ] [`_mm_mask_i32scatter_epi64`](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_mask_i32scatter_epi64)
|
||||
* [ ] [`_mm_mask_i32scatter_pd`](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_mask_i32scatter_pd)
|
||||
|
|
@ -222,10 +220,6 @@
|
|||
* [ ] [`_mm_mask_i64scatter_epi64`](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_mask_i64scatter_epi64)
|
||||
* [ ] [`_mm_mask_i64scatter_pd`](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_mask_i64scatter_pd)
|
||||
* [ ] [`_mm_mask_i64scatter_ps`](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_mask_i64scatter_ps)
|
||||
* [ ] [`_mm_mask_min_epi64`](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_mask_min_epi64)
|
||||
* [ ] [`_mm_maskz_abs_epi64`](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_maskz_abs_epi64)
|
||||
* [ ] [`_mm_maskz_min_epi64`](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_maskz_min_epi64)
|
||||
* [ ] [`_mm_min_epi64`](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_min_epi64)
|
||||
* [ ] [`_mm_mmask_i32gather_epi32`](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_mmask_i32gather_epi32)
|
||||
* [ ] [`_mm_mmask_i32gather_epi64`](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_mmask_i32gather_epi64)
|
||||
* [ ] [`_mm_mmask_i32gather_pd`](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_mmask_i32gather_pd)
|
||||
|
|
|
|||
|
|
@ -49,11 +49,9 @@ use stdarch_test::assert_instr;
|
|||
#[cfg_attr(test, assert_instr(vpabsd))]
|
||||
pub unsafe fn _mm512_abs_epi32(a: __m512i) -> __m512i {
|
||||
let a = a.as_i32x16();
|
||||
// all-0 is a properly initialized i32x16
|
||||
let zero: i32x16 = mem::zeroed();
|
||||
let sub = simd_sub(zero, a);
|
||||
let cmp: i32x16 = simd_gt(a, zero);
|
||||
transmute(simd_select(cmp, a, sub))
|
||||
let zero = i32x16::splat(0);
|
||||
let r = simd_select::<i32x16, _>(simd_lt(a, zero), simd_neg(a), a);
|
||||
transmute(r)
|
||||
}
|
||||
|
||||
/// Computes the absolute value of packed 32-bit integers in `a`, and store the
|
||||
|
|
@ -144,11 +142,9 @@ pub unsafe fn _mm_maskz_abs_epi32(k: __mmask8, a: __m128i) -> __m128i {
|
|||
#[cfg_attr(test, assert_instr(vpabsq))]
|
||||
pub unsafe fn _mm512_abs_epi64(a: __m512i) -> __m512i {
|
||||
let a = a.as_i64x8();
|
||||
// all-0 is a properly initialized i64x8
|
||||
let zero: i64x8 = mem::zeroed();
|
||||
let sub = simd_sub(zero, a);
|
||||
let cmp: i64x8 = simd_gt(a, zero);
|
||||
transmute(simd_select(cmp, a, sub))
|
||||
let zero = i64x8::splat(0);
|
||||
let r = simd_select::<i64x8, _>(simd_lt(a, zero), simd_neg(a), a);
|
||||
transmute(r)
|
||||
}
|
||||
|
||||
/// Compute the absolute value of packed signed 64-bit integers in a, and store the unsigned results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set).
|
||||
|
|
@ -185,11 +181,9 @@ pub unsafe fn _mm512_maskz_abs_epi64(k: __mmask8, a: __m512i) -> __m512i {
|
|||
#[cfg_attr(test, assert_instr(vpabsq))]
|
||||
pub unsafe fn _mm256_abs_epi64(a: __m256i) -> __m256i {
|
||||
let a = a.as_i64x4();
|
||||
// all-0 is a properly initialized i64x4
|
||||
let zero: i64x4 = mem::zeroed();
|
||||
let sub = simd_sub(zero, a);
|
||||
let cmp: i64x4 = simd_gt(a, zero);
|
||||
transmute(simd_select(cmp, a, sub))
|
||||
let zero = i64x4::splat(0);
|
||||
let r = simd_select::<i64x4, _>(simd_lt(a, zero), simd_neg(a), a);
|
||||
transmute(r)
|
||||
}
|
||||
|
||||
/// Compute the absolute value of packed signed 64-bit integers in a, and store the unsigned results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set).
|
||||
|
|
@ -206,7 +200,7 @@ pub unsafe fn _mm256_mask_abs_epi64(src: __m256i, k: __mmask8, a: __m256i) -> __
|
|||
|
||||
/// Compute the absolute value of packed signed 64-bit integers in a, and store the unsigned results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
|
||||
///
|
||||
/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_abs_epi64&expand=45)
|
||||
/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_maskz_abs_epi64)
|
||||
#[inline]
|
||||
#[target_feature(enable = "avx512f,avx512vl")]
|
||||
#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
|
||||
|
|
@ -217,6 +211,45 @@ pub unsafe fn _mm256_maskz_abs_epi64(k: __mmask8, a: __m256i) -> __m256i {
|
|||
transmute(simd_select_bitmask(k, abs, zero))
|
||||
}
|
||||
|
||||
/// Compute the absolute value of packed signed 64-bit integers in a, and store the unsigned results in dst.
|
||||
///
|
||||
/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_abs_epi64)
|
||||
#[inline]
|
||||
#[target_feature(enable = "avx512f,avx512vl")]
|
||||
#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
|
||||
#[cfg_attr(test, assert_instr(vpabsq))]
|
||||
pub unsafe fn _mm_abs_epi64(a: __m128i) -> __m128i {
|
||||
let a = a.as_i64x2();
|
||||
let zero = i64x2::splat(0);
|
||||
let r = simd_select::<i64x2, _>(simd_lt(a, zero), simd_neg(a), a);
|
||||
transmute(r)
|
||||
}
|
||||
|
||||
/// Compute the absolute value of packed signed 64-bit integers in a, and store the unsigned results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set).
|
||||
///
|
||||
/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_mask_abs_epi64)
|
||||
#[inline]
|
||||
#[target_feature(enable = "avx512f,avx512vl")]
|
||||
#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
|
||||
#[cfg_attr(test, assert_instr(vpabsq))]
|
||||
pub unsafe fn _mm_mask_abs_epi64(src: __m128i, k: __mmask8, a: __m128i) -> __m128i {
|
||||
let abs = _mm_abs_epi64(a).as_i64x2();
|
||||
transmute(simd_select_bitmask(k, abs, src.as_i64x2()))
|
||||
}
|
||||
|
||||
/// Compute the absolute value of packed signed 64-bit integers in a, and store the unsigned results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
|
||||
///
|
||||
/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_maskz_abs_epi64)
|
||||
#[inline]
|
||||
#[target_feature(enable = "avx512f,avx512vl")]
|
||||
#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
|
||||
#[cfg_attr(test, assert_instr(vpabsq))]
|
||||
pub unsafe fn _mm_maskz_abs_epi64(k: __mmask8, a: __m128i) -> __m128i {
|
||||
let abs = _mm_abs_epi64(a).as_i64x2();
|
||||
let zero = i64x2::splat(0);
|
||||
transmute(simd_select_bitmask(k, abs, zero))
|
||||
}
|
||||
|
||||
/// Finds the absolute value of each packed single-precision (32-bit) floating-point element in v2, storing the results in dst.
|
||||
///
|
||||
/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_abs_ps&expand=65)
|
||||
|
|
@ -1265,7 +1298,9 @@ pub unsafe fn _mm_maskz_sub_pd(k: __mmask8, a: __m128d, b: __m128d) -> __m128d {
|
|||
#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
|
||||
#[cfg_attr(test, assert_instr(vpmuldq))]
|
||||
pub unsafe fn _mm512_mul_epi32(a: __m512i, b: __m512i) -> __m512i {
|
||||
transmute(vpmuldq(a.as_i32x16(), b.as_i32x16()))
|
||||
let a = simd_cast::<_, i64x8>(simd_cast::<_, i32x8>(a.as_i64x8()));
|
||||
let b = simd_cast::<_, i64x8>(simd_cast::<_, i32x8>(b.as_i64x8()));
|
||||
transmute(simd_mul(a, b))
|
||||
}
|
||||
|
||||
/// Multiply the low signed 32-bit integers from each packed 64-bit element in a and b, and store the signed 64-bit results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set).
|
||||
|
|
@ -1477,7 +1512,10 @@ pub unsafe fn _mm512_mask_mullox_epi64(
|
|||
#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
|
||||
#[cfg_attr(test, assert_instr(vpmuludq))]
|
||||
pub unsafe fn _mm512_mul_epu32(a: __m512i, b: __m512i) -> __m512i {
|
||||
transmute(vpmuludq(a.as_u32x16(), b.as_u32x16()))
|
||||
let a = a.as_u64x8();
|
||||
let b = b.as_u64x8();
|
||||
let mask = u64x8::splat(u32::MAX.into());
|
||||
transmute(simd_mul(simd_and(a, mask), simd_and(b, mask)))
|
||||
}
|
||||
|
||||
/// Multiply the low unsigned 32-bit integers from each packed 64-bit element in a and b, and store the unsigned 64-bit results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set).
|
||||
|
|
@ -1907,7 +1945,9 @@ pub unsafe fn _mm_maskz_div_pd(k: __mmask8, a: __m128d, b: __m128d) -> __m128d {
|
|||
#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
|
||||
#[cfg_attr(test, assert_instr(vpmaxsd))]
|
||||
pub unsafe fn _mm512_max_epi32(a: __m512i, b: __m512i) -> __m512i {
|
||||
transmute(vpmaxsd(a.as_i32x16(), b.as_i32x16()))
|
||||
let a = a.as_i32x16();
|
||||
let b = b.as_i32x16();
|
||||
transmute(simd_select::<i32x16, _>(simd_gt(a, b), a, b))
|
||||
}
|
||||
|
||||
/// Compare packed signed 32-bit integers in a and b, and store packed maximum values in dst using writemask k (elements are copied from src when the corresponding mask bit is not set).
|
||||
|
|
@ -1993,7 +2033,9 @@ pub unsafe fn _mm_maskz_max_epi32(k: __mmask8, a: __m128i, b: __m128i) -> __m128
|
|||
#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
|
||||
#[cfg_attr(test, assert_instr(vpmaxsq))]
|
||||
pub unsafe fn _mm512_max_epi64(a: __m512i, b: __m512i) -> __m512i {
|
||||
transmute(vpmaxsq(a.as_i64x8(), b.as_i64x8()))
|
||||
let a = a.as_i64x8();
|
||||
let b = b.as_i64x8();
|
||||
transmute(simd_select::<i64x8, _>(simd_gt(a, b), a, b))
|
||||
}
|
||||
|
||||
/// Compare packed signed 64-bit integers in a and b, and store packed maximum values in dst using writemask k (elements are copied from src when the corresponding mask bit is not set).
|
||||
|
|
@ -2029,7 +2071,9 @@ pub unsafe fn _mm512_maskz_max_epi64(k: __mmask8, a: __m512i, b: __m512i) -> __m
|
|||
#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
|
||||
#[cfg_attr(test, assert_instr(vpmaxsq))]
|
||||
pub unsafe fn _mm256_max_epi64(a: __m256i, b: __m256i) -> __m256i {
|
||||
transmute(vpmaxsq256(a.as_i64x4(), b.as_i64x4()))
|
||||
let a = a.as_i64x4();
|
||||
let b = b.as_i64x4();
|
||||
transmute(simd_select::<i64x4, _>(simd_gt(a, b), a, b))
|
||||
}
|
||||
|
||||
/// Compare packed signed 64-bit integers in a and b, and store packed maximum values in dst using writemask k (elements are copied from src when the corresponding mask bit is not set).
|
||||
|
|
@ -2065,7 +2109,9 @@ pub unsafe fn _mm256_maskz_max_epi64(k: __mmask8, a: __m256i, b: __m256i) -> __m
|
|||
#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
|
||||
#[cfg_attr(test, assert_instr(vpmaxsq))]
|
||||
pub unsafe fn _mm_max_epi64(a: __m128i, b: __m128i) -> __m128i {
|
||||
transmute(vpmaxsq128(a.as_i64x2(), b.as_i64x2()))
|
||||
let a = a.as_i64x2();
|
||||
let b = b.as_i64x2();
|
||||
transmute(simd_select::<i64x2, _>(simd_gt(a, b), a, b))
|
||||
}
|
||||
|
||||
/// Compare packed signed 64-bit integers in a and b, and store packed maximum values in dst using writemask k (elements are copied from src when the corresponding mask bit is not set).
|
||||
|
|
@ -2277,7 +2323,9 @@ pub unsafe fn _mm_maskz_max_pd(k: __mmask8, a: __m128d, b: __m128d) -> __m128d {
|
|||
#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
|
||||
#[cfg_attr(test, assert_instr(vpmaxud))]
|
||||
pub unsafe fn _mm512_max_epu32(a: __m512i, b: __m512i) -> __m512i {
|
||||
transmute(vpmaxud(a.as_u32x16(), b.as_u32x16()))
|
||||
let a = a.as_u32x16();
|
||||
let b = b.as_u32x16();
|
||||
transmute(simd_select::<i32x16, _>(simd_gt(a, b), a, b))
|
||||
}
|
||||
|
||||
/// Compare packed unsigned 32-bit integers in a and b, and store packed maximum values in dst using writemask k (elements are copied from src when the corresponding mask bit is not set).
|
||||
|
|
@ -2363,7 +2411,9 @@ pub unsafe fn _mm_maskz_max_epu32(k: __mmask8, a: __m128i, b: __m128i) -> __m128
|
|||
#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
|
||||
#[cfg_attr(test, assert_instr(vpmaxuq))]
|
||||
pub unsafe fn _mm512_max_epu64(a: __m512i, b: __m512i) -> __m512i {
|
||||
transmute(vpmaxuq(a.as_u64x8(), b.as_u64x8()))
|
||||
let a = a.as_u64x8();
|
||||
let b = b.as_u64x8();
|
||||
transmute(simd_select::<i64x8, _>(simd_gt(a, b), a, b))
|
||||
}
|
||||
|
||||
/// Compare packed unsigned 64-bit integers in a and b, and store packed maximum values in dst using writemask k (elements are copied from src when the corresponding mask bit is not set).
|
||||
|
|
@ -2399,7 +2449,9 @@ pub unsafe fn _mm512_maskz_max_epu64(k: __mmask8, a: __m512i, b: __m512i) -> __m
|
|||
#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
|
||||
#[cfg_attr(test, assert_instr(vpmaxuq))]
|
||||
pub unsafe fn _mm256_max_epu64(a: __m256i, b: __m256i) -> __m256i {
|
||||
transmute(vpmaxuq256(a.as_u64x4(), b.as_u64x4()))
|
||||
let a = a.as_u64x4();
|
||||
let b = b.as_u64x4();
|
||||
transmute(simd_select::<i64x4, _>(simd_gt(a, b), a, b))
|
||||
}
|
||||
|
||||
/// Compare packed unsigned 64-bit integers in a and b, and store packed maximum values in dst using writemask k (elements are copied from src when the corresponding mask bit is not set).
|
||||
|
|
@ -2435,7 +2487,9 @@ pub unsafe fn _mm256_maskz_max_epu64(k: __mmask8, a: __m256i, b: __m256i) -> __m
|
|||
#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
|
||||
#[cfg_attr(test, assert_instr(vpmaxuq))]
|
||||
pub unsafe fn _mm_max_epu64(a: __m128i, b: __m128i) -> __m128i {
|
||||
transmute(vpmaxuq128(a.as_u64x2(), b.as_u64x2()))
|
||||
let a = a.as_u64x2();
|
||||
let b = b.as_u64x2();
|
||||
transmute(simd_select::<i64x2, _>(simd_gt(a, b), a, b))
|
||||
}
|
||||
|
||||
/// Compare packed unsigned 64-bit integers in a and b, and store packed maximum values in dst using writemask k (elements are copied from src when the corresponding mask bit is not set).
|
||||
|
|
@ -2471,7 +2525,9 @@ pub unsafe fn _mm_maskz_max_epu64(k: __mmask8, a: __m128i, b: __m128i) -> __m128
|
|||
#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
|
||||
#[cfg_attr(test, assert_instr(vpminsd))]
|
||||
pub unsafe fn _mm512_min_epi32(a: __m512i, b: __m512i) -> __m512i {
|
||||
transmute(vpminsd(a.as_i32x16(), b.as_i32x16()))
|
||||
let a = a.as_i32x16();
|
||||
let b = b.as_i32x16();
|
||||
transmute(simd_select::<i32x16, _>(simd_lt(a, b), a, b))
|
||||
}
|
||||
|
||||
/// Compare packed signed 32-bit integers in a and b, and store packed minimum values in dst using writemask k (elements are copied from src when the corresponding mask bit is not set).
|
||||
|
|
@ -2557,7 +2613,9 @@ pub unsafe fn _mm_maskz_min_epi32(k: __mmask8, a: __m128i, b: __m128i) -> __m128
|
|||
#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
|
||||
#[cfg_attr(test, assert_instr(vpminsq))]
|
||||
pub unsafe fn _mm512_min_epi64(a: __m512i, b: __m512i) -> __m512i {
|
||||
transmute(vpminsq(a.as_i64x8(), b.as_i64x8()))
|
||||
let a = a.as_i64x8();
|
||||
let b = b.as_i64x8();
|
||||
transmute(simd_select::<i64x8, _>(simd_lt(a, b), a, b))
|
||||
}
|
||||
|
||||
/// Compare packed signed 64-bit integers in a and b, and store packed minimum values in dst using writemask k (elements are copied from src when the corresponding mask bit is not set).
|
||||
|
|
@ -2593,7 +2651,9 @@ pub unsafe fn _mm512_maskz_min_epi64(k: __mmask8, a: __m512i, b: __m512i) -> __m
|
|||
#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
|
||||
#[cfg_attr(test, assert_instr(vpminsq))]
|
||||
pub unsafe fn _mm256_min_epi64(a: __m256i, b: __m256i) -> __m256i {
|
||||
transmute(vpminsq256(a.as_i64x4(), b.as_i64x4()))
|
||||
let a = a.as_i64x4();
|
||||
let b = b.as_i64x4();
|
||||
transmute(simd_select::<i64x4, _>(simd_lt(a, b), a, b))
|
||||
}
|
||||
|
||||
/// Compare packed signed 64-bit integers in a and b, and store packed minimum values in dst using writemask k (elements are copied from src when the corresponding mask bit is not set).
|
||||
|
|
@ -2621,6 +2681,44 @@ pub unsafe fn _mm256_maskz_min_epi64(k: __mmask8, a: __m256i, b: __m256i) -> __m
|
|||
transmute(simd_select_bitmask(k, min, zero))
|
||||
}
|
||||
|
||||
/// Compare packed signed 64-bit integers in a and b, and store packed minimum values in dst.
|
||||
///
|
||||
/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_min_epi64)
|
||||
#[inline]
|
||||
#[target_feature(enable = "avx512f,avx512vl")]
|
||||
#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
|
||||
#[cfg_attr(test, assert_instr(vpminsq))]
|
||||
pub unsafe fn _mm_min_epi64(a: __m128i, b: __m128i) -> __m128i {
|
||||
let a = a.as_i64x2();
|
||||
let b = b.as_i64x2();
|
||||
transmute(simd_select::<i64x2, _>(simd_lt(a, b), a, b))
|
||||
}
|
||||
|
||||
/// Compare packed signed 64-bit integers in a and b, and store packed minimum values in dst using writemask k (elements are copied from src when the corresponding mask bit is not set).
|
||||
///
|
||||
/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_mask_min_epi64)
|
||||
#[inline]
|
||||
#[target_feature(enable = "avx512f,avx512vl")]
|
||||
#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
|
||||
#[cfg_attr(test, assert_instr(vpminsq))]
|
||||
pub unsafe fn _mm_mask_min_epi64(src: __m128i, k: __mmask8, a: __m128i, b: __m128i) -> __m128i {
|
||||
let min = _mm_min_epi64(a, b).as_i64x2();
|
||||
transmute(simd_select_bitmask(k, min, src.as_i64x2()))
|
||||
}
|
||||
|
||||
/// Compare packed signed 64-bit integers in a and b, and store packed minimum values in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
|
||||
///
|
||||
/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_maskz_min_epi64)
|
||||
#[inline]
|
||||
#[target_feature(enable = "avx512f,avx512vl")]
|
||||
#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
|
||||
#[cfg_attr(test, assert_instr(vpminsq))]
|
||||
pub unsafe fn _mm_maskz_min_epi64(k: __mmask8, a: __m128i, b: __m128i) -> __m128i {
|
||||
let min = _mm_min_epi64(a, b).as_i64x2();
|
||||
let zero = _mm_setzero_si128().as_i64x2();
|
||||
transmute(simd_select_bitmask(k, min, zero))
|
||||
}
|
||||
|
||||
/// Compare packed single-precision (32-bit) floating-point elements in a and b, and store packed minimum values in dst.
|
||||
///
|
||||
/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_min_ps&expand=3769)
|
||||
|
|
@ -2807,7 +2905,9 @@ pub unsafe fn _mm_maskz_min_pd(k: __mmask8, a: __m128d, b: __m128d) -> __m128d {
|
|||
#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
|
||||
#[cfg_attr(test, assert_instr(vpminud))]
|
||||
pub unsafe fn _mm512_min_epu32(a: __m512i, b: __m512i) -> __m512i {
|
||||
transmute(vpminud(a.as_u32x16(), b.as_u32x16()))
|
||||
let a = a.as_u32x16();
|
||||
let b = b.as_u32x16();
|
||||
transmute(simd_select::<i32x16, _>(simd_lt(a, b), a, b))
|
||||
}
|
||||
|
||||
/// Compare packed unsigned 32-bit integers in a and b, and store packed minimum values in dst using writemask k (elements are copied from src when the corresponding mask bit is not set).
|
||||
|
|
@ -2893,7 +2993,9 @@ pub unsafe fn _mm_maskz_min_epu32(k: __mmask8, a: __m128i, b: __m128i) -> __m128
|
|||
#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
|
||||
#[cfg_attr(test, assert_instr(vpminuq))]
|
||||
pub unsafe fn _mm512_min_epu64(a: __m512i, b: __m512i) -> __m512i {
|
||||
transmute(vpminuq(a.as_u64x8(), b.as_u64x8()))
|
||||
let a = a.as_u64x8();
|
||||
let b = b.as_u64x8();
|
||||
transmute(simd_select::<i64x8, _>(simd_lt(a, b), a, b))
|
||||
}
|
||||
|
||||
/// Compare packed unsigned 64-bit integers in a and b, and store packed minimum values in dst using writemask k (elements are copied from src when the corresponding mask bit is not set).
|
||||
|
|
@ -2929,7 +3031,9 @@ pub unsafe fn _mm512_maskz_min_epu64(k: __mmask8, a: __m512i, b: __m512i) -> __m
|
|||
#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
|
||||
#[cfg_attr(test, assert_instr(vpminuq))]
|
||||
pub unsafe fn _mm256_min_epu64(a: __m256i, b: __m256i) -> __m256i {
|
||||
transmute(vpminuq256(a.as_u64x4(), b.as_u64x4()))
|
||||
let a = a.as_u64x4();
|
||||
let b = b.as_u64x4();
|
||||
transmute(simd_select::<i64x4, _>(simd_lt(a, b), a, b))
|
||||
}
|
||||
|
||||
/// Compare packed unsigned 64-bit integers in a and b, and store packed minimum values in dst using writemask k (elements are copied from src when the corresponding mask bit is not set).
|
||||
|
|
@ -2965,7 +3069,9 @@ pub unsafe fn _mm256_maskz_min_epu64(k: __mmask8, a: __m256i, b: __m256i) -> __m
|
|||
#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
|
||||
#[cfg_attr(test, assert_instr(vpminuq))]
|
||||
pub unsafe fn _mm_min_epu64(a: __m128i, b: __m128i) -> __m128i {
|
||||
transmute(vpminuq128(a.as_u64x2(), b.as_u64x2()))
|
||||
let a = a.as_u64x2();
|
||||
let b = b.as_u64x2();
|
||||
transmute(simd_select::<i64x2, _>(simd_lt(a, b), a, b))
|
||||
}
|
||||
|
||||
/// Compare packed unsigned 64-bit integers in a and b, and store packed minimum values in dst using writemask k (elements are copied from src when the corresponding mask bit is not set).
|
||||
|
|
@ -40298,51 +40404,6 @@ pub const _MM_PERM_DDDD: _MM_PERM_ENUM = 0xFF;
|
|||
|
||||
#[allow(improper_ctypes)]
|
||||
extern "C" {
|
||||
#[link_name = "llvm.x86.avx512.pmul.dq.512"]
|
||||
fn vpmuldq(a: i32x16, b: i32x16) -> i64x8;
|
||||
#[link_name = "llvm.x86.avx512.pmulu.dq.512"]
|
||||
fn vpmuludq(a: u32x16, b: u32x16) -> u64x8;
|
||||
|
||||
#[link_name = "llvm.x86.avx512.mask.pmaxs.d.512"]
|
||||
fn vpmaxsd(a: i32x16, b: i32x16) -> i32x16;
|
||||
|
||||
#[link_name = "llvm.x86.avx512.mask.pmaxs.q.512"]
|
||||
fn vpmaxsq(a: i64x8, b: i64x8) -> i64x8;
|
||||
#[link_name = "llvm.x86.avx512.mask.pmaxs.q.256"]
|
||||
fn vpmaxsq256(a: i64x4, b: i64x4) -> i64x4;
|
||||
#[link_name = "llvm.x86.avx512.mask.pmaxs.q.128"]
|
||||
fn vpmaxsq128(a: i64x2, b: i64x2) -> i64x2;
|
||||
|
||||
#[link_name = "llvm.x86.avx512.mask.pmins.d.512"]
|
||||
fn vpminsd(a: i32x16, b: i32x16) -> i32x16;
|
||||
|
||||
#[link_name = "llvm.x86.avx512.mask.pmins.q.512"]
|
||||
fn vpminsq(a: i64x8, b: i64x8) -> i64x8;
|
||||
#[link_name = "llvm.x86.avx512.mask.pmins.q.256"]
|
||||
fn vpminsq256(a: i64x4, b: i64x4) -> i64x4;
|
||||
#[link_name = "llvm.x86.avx512.mask.pmins.q.128"]
|
||||
fn vpminsq128(a: i64x2, b: i64x2) -> i64x2;
|
||||
|
||||
#[link_name = "llvm.x86.avx512.mask.pmaxu.d.512"]
|
||||
fn vpmaxud(a: u32x16, b: u32x16) -> u32x16;
|
||||
|
||||
#[link_name = "llvm.x86.avx512.mask.pmaxu.q.512"]
|
||||
fn vpmaxuq(a: u64x8, b: u64x8) -> u64x8;
|
||||
#[link_name = "llvm.x86.avx512.mask.pmaxu.q.256"]
|
||||
fn vpmaxuq256(a: u64x4, b: u64x4) -> u64x4;
|
||||
#[link_name = "llvm.x86.avx512.mask.pmaxu.q.128"]
|
||||
fn vpmaxuq128(a: u64x2, b: u64x2) -> u64x2;
|
||||
|
||||
#[link_name = "llvm.x86.avx512.mask.pminu.d.512"]
|
||||
fn vpminud(a: u32x16, b: u32x16) -> u32x16;
|
||||
|
||||
#[link_name = "llvm.x86.avx512.mask.pminu.q.512"]
|
||||
fn vpminuq(a: u64x8, b: u64x8) -> u64x8;
|
||||
#[link_name = "llvm.x86.avx512.mask.pminu.q.256"]
|
||||
fn vpminuq256(a: u64x4, b: u64x4) -> u64x4;
|
||||
#[link_name = "llvm.x86.avx512.mask.pminu.q.128"]
|
||||
fn vpminuq128(a: u64x2, b: u64x2) -> u64x2;
|
||||
|
||||
#[link_name = "llvm.x86.avx512.sqrt.ps.512"]
|
||||
fn vsqrtps(a: f32x16, rounding: i32) -> f32x16;
|
||||
#[link_name = "llvm.x86.avx512.sqrt.pd.512"]
|
||||
|
|
|
|||
|
|
@ -574,6 +574,46 @@ mod tests {
|
|||
assert_eq_m256i(r, e);
|
||||
}
|
||||
|
||||
#[simd_test(enable = "avx512f,avx512vl")]
|
||||
unsafe fn test_mm_abs_epi64() {
|
||||
let a = _mm_set_epi64x(i64::MAX, i64::MIN);
|
||||
let r = _mm_abs_epi64(a);
|
||||
let e = _mm_set_epi64x(i64::MAX, i64::MAX.wrapping_add(1));
|
||||
assert_eq_m128i(r, e);
|
||||
let a = _mm_set_epi64x(100, -100);
|
||||
let r = _mm_abs_epi64(a);
|
||||
let e = _mm_set_epi64x(100, 100);
|
||||
assert_eq_m128i(r, e);
|
||||
}
|
||||
|
||||
#[simd_test(enable = "avx512f,avx512vl")]
|
||||
unsafe fn test_mm_mask_abs_epi64() {
|
||||
let a = _mm_set_epi64x(i64::MAX, i64::MIN);
|
||||
let r = _mm_mask_abs_epi64(a, 0, a);
|
||||
assert_eq_m128i(r, a);
|
||||
let r = _mm_mask_abs_epi64(a, 0b00000011, a);
|
||||
let e = _mm_set_epi64x(i64::MAX, i64::MAX.wrapping_add(1));
|
||||
assert_eq_m128i(r, e);
|
||||
let a = _mm_set_epi64x(100, -100);
|
||||
let r = _mm_mask_abs_epi64(a, 0b00000011, a);
|
||||
let e = _mm_set_epi64x(100, 100);
|
||||
assert_eq_m128i(r, e);
|
||||
}
|
||||
|
||||
#[simd_test(enable = "avx512f,avx512vl")]
|
||||
unsafe fn test_mm_maskz_abs_epi64() {
|
||||
let a = _mm_set_epi64x(i64::MAX, i64::MIN);
|
||||
let r = _mm_maskz_abs_epi64(0, a);
|
||||
assert_eq_m128i(r, _mm_setzero_si128());
|
||||
let r = _mm_maskz_abs_epi64(0b00000011, a);
|
||||
let e = _mm_set_epi64x(i64::MAX, i64::MAX.wrapping_add(1));
|
||||
assert_eq_m128i(r, e);
|
||||
let a = _mm_set_epi64x(100, -100);
|
||||
let r = _mm_maskz_abs_epi64(0b00000011, a);
|
||||
let e = _mm_set_epi64x(100, 100);
|
||||
assert_eq_m128i(r, e);
|
||||
}
|
||||
|
||||
#[simd_test(enable = "avx512f")]
|
||||
unsafe fn test_mm512_abs_pd() {
|
||||
let a = _mm512_setr_pd(0., 1., -1., f64::MAX, f64::MIN, 100., -100., -32.);
|
||||
|
|
@ -1673,6 +1713,42 @@ mod tests {
|
|||
assert_eq_m256i(r, e);
|
||||
}
|
||||
|
||||
#[simd_test(enable = "avx512f,avx512vl")]
|
||||
unsafe fn test_mm_min_epi64() {
|
||||
let a = _mm_set_epi64x(0, 1);
|
||||
let b = _mm_set_epi64x(3, 2);
|
||||
let r = _mm_min_epi64(a, b);
|
||||
let e = _mm_set_epi64x(0, 1);
|
||||
assert_eq_m128i(r, e);
|
||||
let a = _mm_set_epi64x(2, 3);
|
||||
let b = _mm_set_epi64x(1, 0);
|
||||
let r = _mm_min_epi64(a, b);
|
||||
let e = _mm_set_epi64x(1, 0);
|
||||
assert_eq_m128i(r, e);
|
||||
}
|
||||
|
||||
#[simd_test(enable = "avx512f,avx512vl")]
|
||||
unsafe fn test_mm_mask_min_epi64() {
|
||||
let a = _mm_set_epi64x(0, 1);
|
||||
let b = _mm_set_epi64x(3, 2);
|
||||
let r = _mm_mask_min_epi64(a, 0, a, b);
|
||||
assert_eq_m128i(r, a);
|
||||
let r = _mm_mask_min_epi64(a, 0b00000011, a, b);
|
||||
let e = _mm_set_epi64x(0, 1);
|
||||
assert_eq_m128i(r, e);
|
||||
}
|
||||
|
||||
#[simd_test(enable = "avx512f,avx512vl")]
|
||||
unsafe fn test_mm_maskz_min_epi64() {
|
||||
let a = _mm_set_epi64x(0, 1);
|
||||
let b = _mm_set_epi64x(3, 2);
|
||||
let r = _mm_maskz_min_epi64(0, a, b);
|
||||
assert_eq_m128i(r, _mm_setzero_si128());
|
||||
let r = _mm_maskz_min_epi64(0b00000011, a, b);
|
||||
let e = _mm_set_epi64x(0, 1);
|
||||
assert_eq_m128i(r, e);
|
||||
}
|
||||
|
||||
#[simd_test(enable = "avx512f")]
|
||||
unsafe fn test_mm512_min_pd() {
|
||||
let a = _mm512_setr_pd(0., 1., 2., 3., 4., 5., 6., 7.);
|
||||
|
|
|
|||
Loading…
Add table
Add a link
Reference in a new issue