diff --git a/library/stdarch/crates/core_arch/missing-x86.md b/library/stdarch/crates/core_arch/missing-x86.md
index 4c70c1b435ce..8d75a237a4a2 100644
--- a/library/stdarch/crates/core_arch/missing-x86.md
+++ b/library/stdarch/crates/core_arch/missing-x86.md
@@ -147,78 +147,6 @@
-["AVX512_BF16", "AVX512F"]
* [ ] [`_mm512_cvtpbh_ps`](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm512_cvtpbh_ps)
diff --git a/library/stdarch/crates/core_arch/src/x86/avx512f.rs b/library/stdarch/crates/core_arch/src/x86/avx512f.rs
index cbda06b1ab5d..8a5a529b08e9 100644
--- a/library/stdarch/crates/core_arch/src/x86/avx512f.rs
+++ b/library/stdarch/crates/core_arch/src/x86/avx512f.rs
@@ -16534,27 +16534,6 @@ pub unsafe fn _mm512_mask_i32scatter_epi64(
vpscatterdq(slice, mask, offsets, src, SCALE);
}
-/// Scatter 64-bit integers from a into memory using 32-bit indices. 64-bit elements are stored at addresses starting at base_addr and offset by each 32-bit element in vindex (each index is scaled by the factor in scale). scale should be 1, 2, 4 or 8.
-///
-/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_i32scatter_epi64&expand=4099)
-#[inline]
-#[target_feature(enable = "avx512f,avx512vl")]
-#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
-#[cfg_attr(test, assert_instr(vpscatterdq, SCALE = 1))]
-#[rustc_legacy_const_generics(3)]
-pub unsafe fn _mm256_i32scatter_epi64(
- slice: *mut u8,
- offsets: __m128i,
- src: __m256i,
-) {
- static_assert_imm8_scale!(SCALE);
- let src = src.as_i64x4();
- let neg_one = -1;
- let slice = slice as *mut i8;
- let offsets = offsets.as_i32x4();
- vpscatterdq256(slice, neg_one, offsets, src, SCALE);
-}
-
/// Scatter 64-bit integers from a into memory using 64-bit indices. 64-bit elements are stored at addresses starting at base_addr and offset by each 64-bit element in vindex (each index is scaled by the factor in scale). scale should be 1, 2, 4 or 8.
///
/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_i64scatter_epi64&expand=3116)
@@ -16684,6 +16663,1153 @@ pub unsafe fn _mm512_mask_i64scatter_epi32(
vpscatterqd(slice, mask, offsets, src, SCALE);
}
+/// Loads 8 64-bit integer elements from memory starting at location base_addr at packed 32-bit integer
+/// indices stored in the lower half of vindex scaled by scale and stores them in dst.
+///
+/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm512_i32logather_epi64)
+#[inline]
+#[target_feature(enable = "avx512f")]
+#[cfg_attr(test, assert_instr(vpgatherdq, SCALE = 1))]
+#[rustc_legacy_const_generics(2)]
+#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
+pub unsafe fn _mm512_i32logather_epi64(
+ vindex: __m512i,
+ base_addr: *const u8,
+) -> __m512i {
+ _mm512_i32gather_epi64::(_mm512_castsi512_si256(vindex), base_addr as _)
+}
+
+/// Loads 8 64-bit integer elements from memory starting at location base_addr at packed 32-bit integer
+/// indices stored in the lower half of vindex scaled by scale and stores them in dst using writemask k
+/// (elements are copied from src when the corresponding mask bit is not set).
+///
+/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm512_mask_i32logather_epi64)
+#[inline]
+#[target_feature(enable = "avx512f")]
+#[cfg_attr(test, assert_instr(vpgatherdq, SCALE = 1))]
+#[rustc_legacy_const_generics(4)]
+#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
+pub unsafe fn _mm512_mask_i32logather_epi64(
+ src: __m512i,
+ k: __mmask8,
+ vindex: __m512i,
+ base_addr: *const u8,
+) -> __m512i {
+ _mm512_mask_i32gather_epi64::(src, k, _mm512_castsi512_si256(vindex), base_addr as _)
+}
+
+/// Loads 8 double-precision (64-bit) floating-point elements from memory starting at location base_addr
+/// at packed 32-bit integer indices stored in the lower half of vindex scaled by scale and stores them in dst.
+///
+/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm512_i32logather_pd)
+#[inline]
+#[target_feature(enable = "avx512f")]
+#[cfg_attr(test, assert_instr(vgatherdpd, SCALE = 1))]
+#[rustc_legacy_const_generics(2)]
+#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
+pub unsafe fn _mm512_i32logather_pd(
+ vindex: __m512i,
+ base_addr: *const u8,
+) -> __m512d {
+ _mm512_i32gather_pd::(_mm512_castsi512_si256(vindex), base_addr as _)
+}
+
+/// Loads 8 double-precision (64-bit) floating-point elements from memory starting at location base_addr
+/// at packed 32-bit integer indices stored in the lower half of vindex scaled by scale and stores them in dst
+/// using writemask k (elements are copied from src when the corresponding mask bit is not set).
+///
+/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm512_mask_i32logather_pd)
+#[inline]
+#[target_feature(enable = "avx512f")]
+#[cfg_attr(test, assert_instr(vgatherdpd, SCALE = 1))]
+#[rustc_legacy_const_generics(4)]
+#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
+pub unsafe fn _mm512_mask_i32logather_pd(
+ src: __m512d,
+ k: __mmask8,
+ vindex: __m512i,
+ base_addr: *const u8,
+) -> __m512d {
+ _mm512_mask_i32gather_pd::(src, k, _mm512_castsi512_si256(vindex), base_addr as _)
+}
+
+/// Stores 8 64-bit integer elements from a to memory starting at location base_addr at packed 32-bit integer
+/// indices stored in the lower half of vindex scaled by scale.
+///
+/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm512_i32loscatter_epi64)
+#[inline]
+#[target_feature(enable = "avx512f")]
+#[cfg_attr(test, assert_instr(vpscatterdq, SCALE = 1))]
+#[rustc_legacy_const_generics(3)]
+#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
+pub unsafe fn _mm512_i32loscatter_epi64(
+ base_addr: *mut u8,
+ vindex: __m512i,
+ a: __m512i,
+) {
+ _mm512_i32scatter_epi64::(base_addr as _, _mm512_castsi512_si256(vindex), a)
+}
+
+/// Stores 8 64-bit integer elements from a to memory starting at location base_addr at packed 32-bit integer
+/// indices stored in the lower half of vindex scaled by scale using writemask k (elements whose corresponding
+/// mask bit is not set are not written to memory).
+///
+/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm512_mask_i32loscatter_epi64)
+#[inline]
+#[target_feature(enable = "avx512f")]
+#[cfg_attr(test, assert_instr(vpscatterdq, SCALE = 1))]
+#[rustc_legacy_const_generics(4)]
+#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
+pub unsafe fn _mm512_mask_i32loscatter_epi64(
+ base_addr: *mut u8,
+ k: __mmask8,
+ vindex: __m512i,
+ a: __m512i,
+) {
+ _mm512_mask_i32scatter_epi64::(base_addr as _, k, _mm512_castsi512_si256(vindex), a)
+}
+
+/// Stores 8 double-precision (64-bit) floating-point elements from a to memory starting at location base_addr
+/// at packed 32-bit integer indices stored in the lower half of vindex scaled by scale.
+///
+/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm512_i32loscatter_pd)
+#[inline]
+#[target_feature(enable = "avx512f")]
+#[cfg_attr(test, assert_instr(vscatterdpd, SCALE = 1))]
+#[rustc_legacy_const_generics(3)]
+#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
+pub unsafe fn _mm512_i32loscatter_pd(
+ base_addr: *mut u8,
+ vindex: __m512i,
+ a: __m512d,
+) {
+ _mm512_i32scatter_pd::(base_addr as _, _mm512_castsi512_si256(vindex), a)
+}
+
+/// Stores 8 double-precision (64-bit) floating-point elements from a to memory starting at location base_addr
+/// at packed 32-bit integer indices stored in the lower half of vindex scaled by scale using writemask k
+/// (elements whose corresponding mask bit is not set are not written to memory).
+///
+/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm512_mask_i32loscatter_pd)
+#[inline]
+#[target_feature(enable = "avx512f")]
+#[cfg_attr(test, assert_instr(vscatterdpd, SCALE = 1))]
+#[rustc_legacy_const_generics(4)]
+#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
+pub unsafe fn _mm512_mask_i32loscatter_pd(
+ base_addr: *mut u8,
+ k: __mmask8,
+ vindex: __m512i,
+ a: __m512d,
+) {
+ _mm512_mask_i32scatter_pd::(base_addr as _, k, _mm512_castsi512_si256(vindex), a)
+}
+
+/// Stores 8 32-bit integer elements from a to memory starting at location base_addr at packed 32-bit integer
+/// indices stored in vindex scaled by scale
+///
+/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm256_i32scatter_epi32)
+#[inline]
+#[target_feature(enable = "avx512f,avx512vl")]
+#[cfg_attr(test, assert_instr(vpscatterdd, SCALE = 1))]
+#[rustc_legacy_const_generics(3)]
+#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
+pub unsafe fn _mm256_i32scatter_epi32(
+ base_addr: *mut u8,
+ vindex: __m256i,
+ a: __m256i,
+) {
+ static_assert_imm8_scale!(SCALE);
+ vpscatterdd_256(base_addr as _, 0xff, vindex.as_i32x8(), a.as_i32x8(), SCALE)
+}
+
+/// Stores 8 32-bit integer elements from a to memory starting at location base_addr at packed 32-bit integer
+/// indices stored in vindex scaled by scale using writemask k (elements whose corresponding mask bit is not set
+/// are not written to memory).
+///
+/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm256_mask_i32scatter_epi32)
+#[inline]
+#[target_feature(enable = "avx512f,avx512vl")]
+#[cfg_attr(test, assert_instr(vpscatterdd, SCALE = 1))]
+#[rustc_legacy_const_generics(4)]
+#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
+pub unsafe fn _mm256_mask_i32scatter_epi32(
+ base_addr: *mut u8,
+ k: __mmask8,
+ vindex: __m256i,
+ a: __m256i,
+) {
+ static_assert_imm8_scale!(SCALE);
+ vpscatterdd_256(base_addr as _, k, vindex.as_i32x8(), a.as_i32x8(), SCALE)
+}
+
+/// Scatter 64-bit integers from a into memory using 32-bit indices. 64-bit elements are stored at addresses starting at base_addr and offset by each 32-bit element in vindex (each index is scaled by the factor in scale). scale should be 1, 2, 4 or 8.
+///
+/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_i32scatter_epi64&expand=4099)
+#[inline]
+#[target_feature(enable = "avx512f,avx512vl")]
+#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
+#[cfg_attr(test, assert_instr(vpscatterdq, SCALE = 1))]
+#[rustc_legacy_const_generics(3)]
+pub unsafe fn _mm256_i32scatter_epi64(
+ slice: *mut u8,
+ offsets: __m128i,
+ src: __m256i,
+) {
+ static_assert_imm8_scale!(SCALE);
+ let src = src.as_i64x4();
+ let slice = slice as *mut i8;
+ let offsets = offsets.as_i32x4();
+ vpscatterdq_256(slice, 0xff, offsets, src, SCALE);
+}
+
+/// Stores 4 64-bit integer elements from a to memory starting at location base_addr at packed 32-bit integer
+/// indices stored in vindex scaled by scale using writemask k (elements whose corresponding mask bit is not set
+/// are not written to memory).
+///
+/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm256_i32scatter_epi64)
+#[inline]
+#[target_feature(enable = "avx512f,avx512vl")]
+#[cfg_attr(test, assert_instr(vpscatterdq, SCALE = 1))]
+#[rustc_legacy_const_generics(4)]
+#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
+pub unsafe fn _mm256_mask_i32scatter_epi64(
+ base_addr: *mut u8,
+ k: __mmask8,
+ vindex: __m128i,
+ a: __m256i,
+) {
+ static_assert_imm8_scale!(SCALE);
+ vpscatterdq_256(base_addr as _, k, vindex.as_i32x4(), a.as_i64x4(), SCALE)
+}
+
+/// Stores 4 double-precision (64-bit) floating-point elements from a to memory starting at location base_addr
+/// at packed 32-bit integer indices stored in vindex scaled by scale
+///
+/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm256_i32scatter_pd)
+#[inline]
+#[target_feature(enable = "avx512f,avx512vl")]
+#[cfg_attr(test, assert_instr(vscatterdpd, SCALE = 1))]
+#[rustc_legacy_const_generics(3)]
+#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
+pub unsafe fn _mm256_i32scatter_pd(
+ base_addr: *mut u8,
+ vindex: __m128i,
+ a: __m256d,
+) {
+ static_assert_imm8_scale!(SCALE);
+ vscatterdpd_256(base_addr as _, 0xff, vindex.as_i32x4(), a.as_f64x4(), SCALE)
+}
+
+/// Stores 4 double-precision (64-bit) floating-point elements from a to memory starting at location base_addr
+/// at packed 32-bit integer indices stored in vindex scaled by scale using writemask k (elements whose corresponding
+/// mask bit is not set are not written to memory).
+#[inline]
+#[target_feature(enable = "avx512f,avx512vl")]
+#[cfg_attr(test, assert_instr(vscatterdpd, SCALE = 1))]
+#[rustc_legacy_const_generics(4)]
+#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
+pub unsafe fn _mm256_mask_i32scatter_pd(
+ base_addr: *mut u8,
+ k: __mmask8,
+ vindex: __m128i,
+ a: __m256d,
+) {
+ static_assert_imm8_scale!(SCALE);
+ vscatterdpd_256(base_addr as _, k, vindex.as_i32x4(), a.as_f64x4(), SCALE)
+}
+
+/// Stores 8 single-precision (32-bit) floating-point elements from a to memory starting at location base_addr
+/// at packed 32-bit integer indices stored in vindex scaled by scale
+///
+/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm256_i32scatter_ps)
+#[inline]
+#[target_feature(enable = "avx512f,avx512vl")]
+#[cfg_attr(test, assert_instr(vscatterdps, SCALE = 1))]
+#[rustc_legacy_const_generics(3)]
+#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
+pub unsafe fn _mm256_i32scatter_ps(
+ base_addr: *mut u8,
+ vindex: __m256i,
+ a: __m256,
+) {
+ static_assert_imm8_scale!(SCALE);
+ vscatterdps_256(base_addr as _, 0xff, vindex.as_i32x8(), a.as_f32x8(), SCALE)
+}
+
+/// Stores 8 single-precision (32-bit) floating-point elements from a to memory starting at location base_addr
+/// at packed 32-bit integer indices stored in vindex scaled by scale using writemask k (elements whose corresponding
+/// mask bit is not set are not written to memory).
+///
+/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm256_mask_i32scatter_ps)
+#[inline]
+#[target_feature(enable = "avx512f,avx512vl")]
+#[cfg_attr(test, assert_instr(vscatterdps, SCALE = 1))]
+#[rustc_legacy_const_generics(4)]
+#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
+pub unsafe fn _mm256_mask_i32scatter_ps(
+ base_addr: *mut u8,
+ k: __mmask8,
+ vindex: __m256i,
+ a: __m256,
+) {
+ static_assert_imm8_scale!(SCALE);
+ vscatterdps_256(base_addr as _, k, vindex.as_i32x8(), a.as_f32x8(), SCALE)
+}
+
+/// Stores 4 32-bit integer elements from a to memory starting at location base_addr at packed 64-bit integer
+/// indices stored in vindex scaled by scale
+///
+/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm256_i64scatter_epi32)
+#[inline]
+#[target_feature(enable = "avx512f,avx512vl")]
+#[cfg_attr(test, assert_instr(vpscatterqd, SCALE = 1))]
+#[rustc_legacy_const_generics(3)]
+#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
+pub unsafe fn _mm256_i64scatter_epi32(
+ base_addr: *mut u8,
+ vindex: __m256i,
+ a: __m128i,
+) {
+ static_assert_imm8_scale!(SCALE);
+ vpscatterqd_256(base_addr as _, 0xff, vindex.as_i64x4(), a.as_i32x4(), SCALE)
+}
+
+/// Stores 4 32-bit integer elements from a to memory starting at location base_addr at packed 64-bit integer
+/// indices stored in vindex scaled by scale using writemask k (elements whose corresponding mask bit is not set
+/// are not written to memory).
+///
+/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm256_mask_i64scatter_epi32)
+#[inline]
+#[target_feature(enable = "avx512f,avx512vl")]
+#[cfg_attr(test, assert_instr(vpscatterqd, SCALE = 1))]
+#[rustc_legacy_const_generics(4)]
+#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
+pub unsafe fn _mm256_mask_i64scatter_epi32(
+ base_addr: *mut u8,
+ k: __mmask8,
+ vindex: __m256i,
+ a: __m128i,
+) {
+ static_assert_imm8_scale!(SCALE);
+ vpscatterqd_256(base_addr as _, k, vindex.as_i64x4(), a.as_i32x4(), SCALE)
+}
+
+/// Stores 4 64-bit integer elements from a to memory starting at location base_addr at packed 64-bit integer
+/// indices stored in vindex scaled by scale
+///
+/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm256_i64scatter_epi64)
+#[inline]
+#[target_feature(enable = "avx512f,avx512vl")]
+#[cfg_attr(test, assert_instr(vpscatterqq, SCALE = 1))]
+#[rustc_legacy_const_generics(3)]
+#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
+pub unsafe fn _mm256_i64scatter_epi64(
+ base_addr: *mut u8,
+ vindex: __m256i,
+ a: __m256i,
+) {
+ static_assert_imm8_scale!(SCALE);
+ vpscatterqq_256(base_addr as _, 0xff, vindex.as_i64x4(), a.as_i64x4(), SCALE)
+}
+
+/// Stores 4 64-bit integer elements from a to memory starting at location base_addr at packed 64-bit integer
+/// indices stored in vindex scaled by scale using writemask k (elements whose corresponding mask bit is not set
+/// are not written to memory).
+///
+/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm256_mask_i64scatter_epi64)
+#[inline]
+#[target_feature(enable = "avx512f,avx512vl")]
+#[cfg_attr(test, assert_instr(vpscatterqq, SCALE = 1))]
+#[rustc_legacy_const_generics(4)]
+#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
+pub unsafe fn _mm256_mask_i64scatter_epi64(
+ base_addr: *mut u8,
+ k: __mmask8,
+ vindex: __m256i,
+ a: __m256i,
+) {
+ static_assert_imm8_scale!(SCALE);
+ vpscatterqq_256(base_addr as _, k, vindex.as_i64x4(), a.as_i64x4(), SCALE)
+}
+
+/// Stores 4 double-precision (64-bit) floating-point elements from a to memory starting at location base_addr
+/// at packed 64-bit integer indices stored in vindex scaled by scale
+///
+/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm256_i64scatter_pd)
+#[inline]
+#[target_feature(enable = "avx512f,avx512vl")]
+#[cfg_attr(test, assert_instr(vscatterqpd, SCALE = 1))]
+#[rustc_legacy_const_generics(3)]
+#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
+pub unsafe fn _mm256_i64scatter_pd(
+ base_addr: *mut u8,
+ vindex: __m256i,
+ a: __m256d,
+) {
+ static_assert_imm8_scale!(SCALE);
+ vscatterqpd_256(base_addr as _, 0xff, vindex.as_i64x4(), a.as_f64x4(), SCALE)
+}
+
+/// Stores 4 double-precision (64-bit) floating-point elements from a to memory starting at location base_addr
+/// at packed 64-bit integer indices stored in vindex scaled by scale using writemask k (elements whose corresponding
+/// mask bit is not set are not written to memory).
+///
+/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm256_mask_i64scatter_pd)
+#[inline]
+#[target_feature(enable = "avx512f,avx512vl")]
+#[cfg_attr(test, assert_instr(vscatterqpd, SCALE = 1))]
+#[rustc_legacy_const_generics(4)]
+#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
+pub unsafe fn _mm256_mask_i64scatter_pd(
+ base_addr: *mut u8,
+ k: __mmask8,
+ vindex: __m256i,
+ a: __m256d,
+) {
+ static_assert_imm8_scale!(SCALE);
+ vscatterqpd_256(base_addr as _, k, vindex.as_i64x4(), a.as_f64x4(), SCALE)
+}
+
+/// Stores 4 single-precision (32-bit) floating-point elements from a to memory starting at location base_addr
+/// at packed 64-bit integer indices stored in vindex scaled by scale
+///
+/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm256_i64scatter_ps)
+#[inline]
+#[target_feature(enable = "avx512f,avx512vl")]
+#[cfg_attr(test, assert_instr(vscatterqps, SCALE = 1))]
+#[rustc_legacy_const_generics(3)]
+#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
+pub unsafe fn _mm256_i64scatter_ps(
+ base_addr: *mut u8,
+ vindex: __m256i,
+ a: __m128,
+) {
+ static_assert_imm8_scale!(SCALE);
+ vscatterqps_256(base_addr as _, 0xff, vindex.as_i64x4(), a.as_f32x4(), SCALE)
+}
+
+/// Stores 4 single-precision (32-bit) floating-point elements from a to memory starting at location base_addr
+/// at packed 64-bit integer indices stored in vindex scaled by scale using writemask k (elements whose corresponding
+/// mask bit is not set are not written to memory).
+///
+/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm256_mask_i64scatter_ps)
+#[inline]
+#[target_feature(enable = "avx512f,avx512vl")]
+#[cfg_attr(test, assert_instr(vscatterqps, SCALE = 1))]
+#[rustc_legacy_const_generics(4)]
+#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
+pub unsafe fn _mm256_mask_i64scatter_ps(
+ base_addr: *mut u8,
+ k: __mmask8,
+ vindex: __m256i,
+ a: __m128,
+) {
+ static_assert_imm8_scale!(SCALE);
+ vscatterqps_256(base_addr as _, k, vindex.as_i64x4(), a.as_f32x4(), SCALE)
+}
+
+/// Loads 8 32-bit integer elements from memory starting at location base_addr at packed 32-bit integer
+/// indices stored in vindex scaled by scale using writemask k (elements are copied from src when the corresponding
+/// mask bit is not set).
+///
+/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm256_mmask_i32gather_epi32)
+#[inline]
+#[target_feature(enable = "avx512f,avx512vl")]
+#[cfg_attr(test, assert_instr(vpgatherdd, SCALE = 1))]
+#[rustc_legacy_const_generics(4)]
+#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
+pub unsafe fn _mm256_mmask_i32gather_epi32(
+ src: __m256i,
+ k: __mmask8,
+ vindex: __m256i,
+ base_addr: *const u8,
+) -> __m256i {
+ static_assert_imm8_scale!(SCALE);
+ transmute(vpgatherdd_256(
+ src.as_i32x8(),
+ base_addr as _,
+ vindex.as_i32x8(),
+ k,
+ SCALE,
+ ))
+}
+
+/// Loads 4 64-bit integer elements from memory starting at location base_addr at packed 32-bit integer
+/// indices stored in vindex scaled by scale using writemask k (elements are copied from src when the corresponding
+/// mask bit is not set).
+///
+/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm256_mmask_i32gather_epi64)
+#[inline]
+#[target_feature(enable = "avx512f,avx512vl")]
+#[cfg_attr(test, assert_instr(vpgatherdq, SCALE = 1))]
+#[rustc_legacy_const_generics(4)]
+#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
+pub unsafe fn _mm256_mmask_i32gather_epi64(
+ src: __m256i,
+ k: __mmask8,
+ vindex: __m128i,
+ base_addr: *const u8,
+) -> __m256i {
+ static_assert_imm8_scale!(SCALE);
+ transmute(vpgatherdq_256(
+ src.as_i64x4(),
+ base_addr as _,
+ vindex.as_i32x4(),
+ k,
+ SCALE,
+ ))
+}
+
+/// Loads 4 double-precision (64-bit) floating-point elements from memory starting at location base_addr
+/// at packed 32-bit integer indices stored in vindex scaled by scale using writemask k (elements are copied
+/// from src when the corresponding mask bit is not set).
+///
+/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm256_mmask_i32gather_pd)
+#[inline]
+#[target_feature(enable = "avx512f,avx512vl")]
+#[cfg_attr(test, assert_instr(vgatherdpd, SCALE = 1))]
+#[rustc_legacy_const_generics(4)]
+#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
+pub unsafe fn _mm256_mmask_i32gather_pd(
+ src: __m256d,
+ k: __mmask8,
+ vindex: __m128i,
+ base_addr: *const u8,
+) -> __m256d {
+ static_assert_imm8_scale!(SCALE);
+ transmute(vgatherdpd_256(
+ src.as_f64x4(),
+ base_addr as _,
+ vindex.as_i32x4(),
+ k,
+ SCALE,
+ ))
+}
+
+/// Loads 8 single-precision (32-bit) floating-point elements from memory starting at location base_addr
+/// at packed 32-bit integer indices stored in vindex scaled by scale using writemask k (elements are copied
+/// from src when the corresponding mask bit is not set).
+///
+/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm256_mmask_i32gather_ps)
+#[inline]
+#[target_feature(enable = "avx512f,avx512vl")]
+#[cfg_attr(test, assert_instr(vgatherdps, SCALE = 1))]
+#[rustc_legacy_const_generics(4)]
+#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
+pub unsafe fn _mm256_mmask_i32gather_ps(
+ src: __m256,
+ k: __mmask8,
+ vindex: __m256i,
+ base_addr: *const u8,
+) -> __m256 {
+ static_assert_imm8_scale!(SCALE);
+ transmute(vgatherdps_256(
+ src.as_f32x8(),
+ base_addr as _,
+ vindex.as_i32x8(),
+ k,
+ SCALE,
+ ))
+}
+
+/// Loads 4 32-bit integer elements from memory starting at location base_addr at packed 64-bit integer
+/// indices stored in vindex scaled by scale using writemask k (elements are copied from src when the corresponding
+/// mask bit is not set).
+///
+/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm256_mmask_i64gather_epi32)
+#[inline]
+#[target_feature(enable = "avx512f,avx512vl")]
+#[cfg_attr(test, assert_instr(vpgatherqd, SCALE = 1))]
+#[rustc_legacy_const_generics(4)]
+#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
+pub unsafe fn _mm256_mmask_i64gather_epi32(
+ src: __m128i,
+ k: __mmask8,
+ vindex: __m256i,
+ base_addr: *const u8,
+) -> __m128i {
+ static_assert_imm8_scale!(SCALE);
+ transmute(vpgatherqd_256(
+ src.as_i32x4(),
+ base_addr as _,
+ vindex.as_i64x4(),
+ k,
+ SCALE,
+ ))
+}
+
+/// Loads 4 64-bit integer elements from memory starting at location base_addr at packed 32-bit integer
+/// indices stored in vindex scaled by scale using writemask k (elements are copied from src when the corresponding
+/// mask bit is not set).
+///
+/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm256_mmask_i64gather_epi64)
+#[inline]
+#[target_feature(enable = "avx512f,avx512vl")]
+#[cfg_attr(test, assert_instr(vpgatherqq, SCALE = 1))]
+#[rustc_legacy_const_generics(4)]
+#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
+pub unsafe fn _mm256_mmask_i64gather_epi64(
+ src: __m256i,
+ k: __mmask8,
+ vindex: __m256i,
+ base_addr: *const u8,
+) -> __m256i {
+ static_assert_imm8_scale!(SCALE);
+ transmute(vpgatherqq_256(
+ src.as_i64x4(),
+ base_addr as _,
+ vindex.as_i64x4(),
+ k,
+ SCALE,
+ ))
+}
+
+/// Loads 4 double-precision (64-bit) floating-point elements from memory starting at location base_addr
+/// at packed 32-bit integer indices stored in vindex scaled by scale using writemask k (elements are copied
+/// from src when the corresponding mask bit is not set).
+///
+/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm256_mmask_i64gather_pd)
+#[inline]
+#[target_feature(enable = "avx512f,avx512vl")]
+#[cfg_attr(test, assert_instr(vgatherqpd, SCALE = 1))]
+#[rustc_legacy_const_generics(4)]
+#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
+pub unsafe fn _mm256_mmask_i64gather_pd(
+ src: __m256d,
+ k: __mmask8,
+ vindex: __m256i,
+ base_addr: *const u8,
+) -> __m256d {
+ static_assert_imm8_scale!(SCALE);
+ transmute(vgatherqpd_256(
+ src.as_f64x4(),
+ base_addr as _,
+ vindex.as_i64x4(),
+ k,
+ SCALE,
+ ))
+}
+
+/// Loads 4 single-precision (32-bit) floating-point elements from memory starting at location base_addr
+/// at packed 32-bit integer indices stored in vindex scaled by scale using writemask k (elements are copied
+/// from src when the corresponding mask bit is not set).
+///
+/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm256_mmask_i64gather_ps)
+#[inline]
+#[target_feature(enable = "avx512f,avx512vl")]
+#[cfg_attr(test, assert_instr(vgatherqps, SCALE = 1))]
+#[rustc_legacy_const_generics(4)]
+#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
+pub unsafe fn _mm256_mmask_i64gather_ps(
+ src: __m128,
+ k: __mmask8,
+ vindex: __m256i,
+ base_addr: *const u8,
+) -> __m128 {
+ static_assert_imm8_scale!(SCALE);
+ transmute(vgatherqps_256(
+ src.as_f32x4(),
+ base_addr as _,
+ vindex.as_i64x4(),
+ k,
+ SCALE,
+ ))
+}
+
+/// Stores 4 32-bit integer elements from a to memory starting at location base_addr at packed 32-bit integer
+/// indices stored in vindex scaled by scale
+///
+/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_i32scatter_epi32)
+#[inline]
+#[target_feature(enable = "avx512f,avx512vl")]
+#[cfg_attr(test, assert_instr(vpscatterdd, SCALE = 1))]
+#[rustc_legacy_const_generics(3)]
+#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
+pub unsafe fn _mm_i32scatter_epi32(
+ base_addr: *mut u8,
+ vindex: __m128i,
+ a: __m128i,
+) {
+ static_assert_imm8_scale!(SCALE);
+ vpscatterdd_128(base_addr as _, 0xff, vindex.as_i32x4(), a.as_i32x4(), SCALE)
+}
+
+/// Stores 4 32-bit integer elements from a to memory starting at location base_addr at packed 32-bit integer
+/// indices stored in vindex scaled by scale using writemask k (elements whose corresponding mask bit is not set
+/// are not written to memory).
+///
+/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_mask_i32scatter_epi32)
+#[inline]
+#[target_feature(enable = "avx512f,avx512vl")]
+#[cfg_attr(test, assert_instr(vpscatterdd, SCALE = 1))]
+#[rustc_legacy_const_generics(4)]
+#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
+pub unsafe fn _mm_mask_i32scatter_epi32(
+ base_addr: *mut u8,
+ k: __mmask8,
+ vindex: __m128i,
+ a: __m128i,
+) {
+ static_assert_imm8_scale!(SCALE);
+ vpscatterdd_128(base_addr as _, k, vindex.as_i32x4(), a.as_i32x4(), SCALE)
+}
+
+/// Stores 2 64-bit integer elements from a to memory starting at location base_addr at packed 32-bit integer
+/// indices stored in vindex scaled by scale
+///
+/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_i32scatter_epi64)
+#[inline]
+#[target_feature(enable = "avx512f,avx512vl")]
+#[cfg_attr(test, assert_instr(vpscatterdq, SCALE = 1))]
+#[rustc_legacy_const_generics(3)]
+#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
+pub unsafe fn _mm_i32scatter_epi64(
+ base_addr: *mut u8,
+ vindex: __m128i,
+ a: __m128i,
+) {
+ static_assert_imm8_scale!(SCALE);
+ vpscatterdq_128(base_addr as _, 0xff, vindex.as_i32x4(), a.as_i64x2(), SCALE)
+}
+
+/// Stores 2 64-bit integer elements from a to memory starting at location base_addr at packed 32-bit integer
+/// indices stored in vindex scaled by scale using writemask k (elements whose corresponding mask bit is not set
+/// are not written to memory).
+///
+/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_mask_i32scatter_epi64)
+#[inline]
+#[target_feature(enable = "avx512f,avx512vl")]
+#[cfg_attr(test, assert_instr(vpscatterdq, SCALE = 1))]
+#[rustc_legacy_const_generics(4)]
+#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
+pub unsafe fn _mm_mask_i32scatter_epi64(
+ base_addr: *mut u8,
+ k: __mmask8,
+ vindex: __m128i,
+ a: __m128i,
+) {
+ static_assert_imm8_scale!(SCALE);
+ vpscatterdq_128(base_addr as _, k, vindex.as_i32x4(), a.as_i64x2(), SCALE)
+}
+
+/// Stores 2 double-precision (64-bit) floating-point elements from a to memory starting at location base_addr
+/// at packed 32-bit integer indices stored in vindex scaled by scale
+///
+/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_i32scatter_pd)
+#[inline]
+#[target_feature(enable = "avx512f,avx512vl")]
+#[cfg_attr(test, assert_instr(vscatterdpd, SCALE = 1))]
+#[rustc_legacy_const_generics(3)]
+#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
+pub unsafe fn _mm_i32scatter_pd(base_addr: *mut u8, vindex: __m128i, a: __m128d) {
+ static_assert_imm8_scale!(SCALE);
+ vscatterdpd_128(base_addr as _, 0xff, vindex.as_i32x4(), a.as_f64x2(), SCALE)
+}
+
+/// Stores 2 double-precision (64-bit) floating-point elements from a to memory starting at location base_addr
+/// at packed 32-bit integer indices stored in vindex scaled by scale using writemask k (elements whose corresponding
+/// mask bit is not set are not written to memory).
+///
+/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_mask_i32scatter_pd)
+#[inline]
+#[target_feature(enable = "avx512f,avx512vl")]
+#[cfg_attr(test, assert_instr(vscatterdpd, SCALE = 1))]
+#[rustc_legacy_const_generics(4)]
+#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
+pub unsafe fn _mm_mask_i32scatter_pd(
+ base_addr: *mut u8,
+ k: __mmask8,
+ vindex: __m128i,
+ a: __m128d,
+) {
+ static_assert_imm8_scale!(SCALE);
+ vscatterdpd_128(base_addr as _, k, vindex.as_i32x4(), a.as_f64x2(), SCALE)
+}
+
+/// Stores 4 single-precision (32-bit) floating-point elements from a to memory starting at location base_addr
+/// at packed 32-bit integer indices stored in vindex scaled by scale
+///
+/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_i32scatter_ps)
+#[inline]
+#[target_feature(enable = "avx512f,avx512vl")]
+#[cfg_attr(test, assert_instr(vscatterdps, SCALE = 1))]
+#[rustc_legacy_const_generics(3)]
+#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
+pub unsafe fn _mm_i32scatter_ps(base_addr: *mut u8, vindex: __m128i, a: __m128) {
+ static_assert_imm8_scale!(SCALE);
+ vscatterdps_128(base_addr as _, 0xff, vindex.as_i32x4(), a.as_f32x4(), SCALE)
+}
+
+/// Stores 4 single-precision (32-bit) floating-point elements from a to memory starting at location base_addr
+/// at packed 32-bit integer indices stored in vindex scaled by scale using writemask k (elements whose corresponding
+/// mask bit is not set are not written to memory).
+///
+/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_mask_i32scatter_ps)
+#[inline]
+#[target_feature(enable = "avx512f,avx512vl")]
+#[cfg_attr(test, assert_instr(vscatterdps, SCALE = 1))]
+#[rustc_legacy_const_generics(4)]
+#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
+pub unsafe fn _mm_mask_i32scatter_ps(
+ base_addr: *mut u8,
+ k: __mmask8,
+ vindex: __m128i,
+ a: __m128,
+) {
+ static_assert_imm8_scale!(SCALE);
+ vscatterdps_128(base_addr as _, k, vindex.as_i32x4(), a.as_f32x4(), SCALE)
+}
+
+/// Stores 2 32-bit integer elements from a to memory starting at location base_addr at packed 64-bit integer
+/// indices stored in vindex scaled by scale
+///
+/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_i64scatter_epi32)
+#[inline]
+#[target_feature(enable = "avx512f,avx512vl")]
+#[cfg_attr(test, assert_instr(vpscatterqd, SCALE = 1))]
+#[rustc_legacy_const_generics(3)]
+#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
+pub unsafe fn _mm_i64scatter_epi32(
+ base_addr: *mut u8,
+ vindex: __m128i,
+ a: __m128i,
+) {
+ static_assert_imm8_scale!(SCALE);
+ vpscatterqd_128(base_addr as _, 0xff, vindex.as_i64x2(), a.as_i32x4(), SCALE)
+}
+
+/// Stores 2 32-bit integer elements from a to memory starting at location base_addr at packed 64-bit integer
+/// indices stored in vindex scaled by scale using writemask k (elements whose corresponding mask bit is not set
+/// are not written to memory).
+///
+/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_mask_i64scatter_epi32)
+#[inline]
+#[target_feature(enable = "avx512f,avx512vl")]
+#[cfg_attr(test, assert_instr(vpscatterqd, SCALE = 1))]
+#[rustc_legacy_const_generics(4)]
+#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
+pub unsafe fn _mm_mask_i64scatter_epi32(
+ base_addr: *mut u8,
+ k: __mmask8,
+ vindex: __m128i,
+ a: __m128i,
+) {
+ static_assert_imm8_scale!(SCALE);
+ vpscatterqd_128(base_addr as _, k, vindex.as_i64x2(), a.as_i32x4(), SCALE)
+}
+
+/// Stores 2 64-bit integer elements from a to memory starting at location base_addr at packed 64-bit integer
+/// indices stored in vindex scaled by scale
+///
+/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_i64scatter_epi64)
+#[inline]
+#[target_feature(enable = "avx512f,avx512vl")]
+#[cfg_attr(test, assert_instr(vpscatterqq, SCALE = 1))]
+#[rustc_legacy_const_generics(3)]
+#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
+pub unsafe fn _mm_i64scatter_epi64(
+ base_addr: *mut u8,
+ vindex: __m128i,
+ a: __m128i,
+) {
+ static_assert_imm8_scale!(SCALE);
+ vpscatterqq_128(base_addr as _, 0xff, vindex.as_i64x2(), a.as_i64x2(), SCALE)
+}
+
+/// Stores 2 64-bit integer elements from a to memory starting at location base_addr at packed 64-bit integer
+/// indices stored in vindex scaled by scale using writemask k (elements whose corresponding mask bit is not set
+/// are not written to memory).
+///
+/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_mask_i64scatter_epi64)
+#[inline]
+#[target_feature(enable = "avx512f,avx512vl")]
+#[cfg_attr(test, assert_instr(vpscatterqq, SCALE = 1))]
+#[rustc_legacy_const_generics(4)]
+#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
+pub unsafe fn _mm_mask_i64scatter_epi64(
+ base_addr: *mut u8,
+ k: __mmask8,
+ vindex: __m128i,
+ a: __m128i,
+) {
+ static_assert_imm8_scale!(SCALE);
+ vpscatterqq_128(base_addr as _, k, vindex.as_i64x2(), a.as_i64x2(), SCALE)
+}
+
+/// Stores 2 double-precision (64-bit) floating-point elements from a to memory starting at location base_addr
+/// at packed 64-bit integer indices stored in vindex scaled by scale
+///
+/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_i64scatter_pd)
+#[inline]
+#[target_feature(enable = "avx512f,avx512vl")]
+#[cfg_attr(test, assert_instr(vscatterqpd, SCALE = 1))]
+#[rustc_legacy_const_generics(3)]
+#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
+pub unsafe fn _mm_i64scatter_pd(base_addr: *mut u8, vindex: __m128i, a: __m128d) {
+ static_assert_imm8_scale!(SCALE);
+ vscatterqpd_128(base_addr as _, 0xff, vindex.as_i64x2(), a.as_f64x2(), SCALE)
+}
+
+/// Stores 2 double-precision (64-bit) floating-point elements from a to memory starting at location base_addr
+/// at packed 64-bit integer indices stored in vindex scaled by scale using writemask k (elements whose corresponding
+/// mask bit is not set are not written to memory).
+#[inline]
+#[target_feature(enable = "avx512f,avx512vl")]
+#[cfg_attr(test, assert_instr(vscatterqpd, SCALE = 1))]
+#[rustc_legacy_const_generics(4)]
+#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
+pub unsafe fn _mm_mask_i64scatter_pd(
+ base_addr: *mut u8,
+ k: __mmask8,
+ vindex: __m128i,
+ a: __m128d,
+) {
+ static_assert_imm8_scale!(SCALE);
+ vscatterqpd_128(base_addr as _, k, vindex.as_i64x2(), a.as_f64x2(), SCALE)
+}
+
+/// Stores 2 single-precision (32-bit) floating-point elements from a to memory starting at location base_addr
+/// at packed 64-bit integer indices stored in vindex scaled by scale
+///
+/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_i64scatter_ps)
+#[inline]
+#[target_feature(enable = "avx512f,avx512vl")]
+#[cfg_attr(test, assert_instr(vscatterqps, SCALE = 1))]
+#[rustc_legacy_const_generics(3)]
+#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
+pub unsafe fn _mm_i64scatter_ps(base_addr: *mut u8, vindex: __m128i, a: __m128) {
+ static_assert_imm8_scale!(SCALE);
+ vscatterqps_128(base_addr as _, 0xff, vindex.as_i64x2(), a.as_f32x4(), SCALE)
+}
+
+/// Stores 2 single-precision (32-bit) floating-point elements from a to memory starting at location base_addr
+/// at packed 64-bit integer indices stored in vindex scaled by scale using writemask k (elements whose corresponding
+///
+/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_mask_i64scatter_ps)
+#[inline]
+#[target_feature(enable = "avx512f,avx512vl")]
+#[cfg_attr(test, assert_instr(vscatterqps, SCALE = 1))]
+#[rustc_legacy_const_generics(4)]
+#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
+pub unsafe fn _mm_mask_i64scatter_ps(
+ base_addr: *mut u8,
+ k: __mmask8,
+ vindex: __m128i,
+ a: __m128,
+) {
+ static_assert_imm8_scale!(SCALE);
+ vscatterqps_128(base_addr as _, k, vindex.as_i64x2(), a.as_f32x4(), SCALE)
+}
+
+/// Loads 4 32-bit integer elements from memory starting at location base_addr at packed 32-bit integer
+/// indices stored in vindex scaled by scale using writemask k (elements are copied from src when the corresponding
+/// mask bit is not set).
+///
+/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_mmask_i32gather_epi32)
+#[inline]
+#[target_feature(enable = "avx512f,avx512vl")]
+#[cfg_attr(test, assert_instr(vpgatherdd, SCALE = 1))]
+#[rustc_legacy_const_generics(4)]
+#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
+pub unsafe fn _mm_mmask_i32gather_epi32(
+ src: __m128i,
+ k: __mmask8,
+ vindex: __m128i,
+ base_addr: *const u8,
+) -> __m128i {
+ static_assert_imm8_scale!(SCALE);
+ transmute(vpgatherdd_128(
+ src.as_i32x4(),
+ base_addr as _,
+ vindex.as_i32x4(),
+ k,
+ SCALE,
+ ))
+}
+
+/// Loads 2 64-bit integer elements from memory starting at location base_addr at packed 32-bit integer
+/// indices stored in vindex scaled by scale using writemask k (elements are copied from src when the corresponding
+/// mask bit is not set).
+///
+/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_mmask_i32gather_epi64)
+#[inline]
+#[target_feature(enable = "avx512f,avx512vl")]
+#[cfg_attr(test, assert_instr(vpgatherdq, SCALE = 1))]
+#[rustc_legacy_const_generics(4)]
+#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
+pub unsafe fn _mm_mmask_i32gather_epi64(
+ src: __m128i,
+ k: __mmask8,
+ vindex: __m128i,
+ base_addr: *const u8,
+) -> __m128i {
+ static_assert_imm8_scale!(SCALE);
+ transmute(vpgatherdq_128(
+ src.as_i64x2(),
+ base_addr as _,
+ vindex.as_i32x4(),
+ k,
+ SCALE,
+ ))
+}
+
+/// Loads 2 double-precision (64-bit) floating-point elements from memory starting at location base_addr
+/// at packed 32-bit integer indices stored in vindex scaled by scale using writemask k (elements are copied
+/// from src when the corresponding mask bit is not set).
+///
+/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_mmask_i32gather_pd)
+#[inline]
+#[target_feature(enable = "avx512f,avx512vl")]
+#[cfg_attr(test, assert_instr(vgatherdpd, SCALE = 1))]
+#[rustc_legacy_const_generics(4)]
+#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
+pub unsafe fn _mm_mmask_i32gather_pd(
+ src: __m128d,
+ k: __mmask8,
+ vindex: __m128i,
+ base_addr: *const u8,
+) -> __m128d {
+ static_assert_imm8_scale!(SCALE);
+ transmute(vgatherdpd_128(
+ src.as_f64x2(),
+ base_addr as _,
+ vindex.as_i32x4(),
+ k,
+ SCALE,
+ ))
+}
+
+/// Loads 4 single-precision (32-bit) floating-point elements from memory starting at location base_addr
+/// at packed 32-bit integer indices stored in vindex scaled by scale using writemask k (elements are copied
+/// from src when the corresponding mask bit is not set).
+///
+/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_mmask_i32gather_ps)
+#[inline]
+#[target_feature(enable = "avx512f,avx512vl")]
+#[cfg_attr(test, assert_instr(vgatherdps, SCALE = 1))]
+#[rustc_legacy_const_generics(4)]
+#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
+pub unsafe fn _mm_mmask_i32gather_ps(
+ src: __m128,
+ k: __mmask8,
+ vindex: __m128i,
+ base_addr: *const u8,
+) -> __m128 {
+ static_assert_imm8_scale!(SCALE);
+ transmute(vgatherdps_128(
+ src.as_f32x4(),
+ base_addr as _,
+ vindex.as_i32x4(),
+ k,
+ SCALE,
+ ))
+}
+
+/// Loads 2 32-bit integer elements from memory starting at location base_addr at packed 64-bit integer
+/// indices stored in vindex scaled by scale using writemask k (elements are copied from src when the corresponding
+/// mask bit is not set).
+///
+/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_mmask_i64gather_epi32)
+#[inline]
+#[target_feature(enable = "avx512f,avx512vl")]
+#[cfg_attr(test, assert_instr(vpgatherqd, SCALE = 1))]
+#[rustc_legacy_const_generics(4)]
+#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
+pub unsafe fn _mm_mmask_i64gather_epi32(
+ src: __m128i,
+ k: __mmask8,
+ vindex: __m128i,
+ base_addr: *const u8,
+) -> __m128i {
+ static_assert_imm8_scale!(SCALE);
+ transmute(vpgatherqd_128(
+ src.as_i32x4(),
+ base_addr as _,
+ vindex.as_i64x2(),
+ k,
+ SCALE,
+ ))
+}
+
+/// Loads 2 64-bit integer elements from memory starting at location base_addr at packed 64-bit integer
+/// indices stored in vindex scaled by scale using writemask k (elements are copied from src when the corresponding
+/// mask bit is not set).
+///
+/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_mmask_i64gather_epi64)
+#[inline]
+#[target_feature(enable = "avx512f,avx512vl")]
+#[cfg_attr(test, assert_instr(vpgatherqq, SCALE = 1))]
+#[rustc_legacy_const_generics(4)]
+#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
+pub unsafe fn _mm_mmask_i64gather_epi64(
+ src: __m128i,
+ k: __mmask8,
+ vindex: __m128i,
+ base_addr: *const u8,
+) -> __m128i {
+ static_assert_imm8_scale!(SCALE);
+ transmute(vpgatherqq_128(
+ src.as_i64x2(),
+ base_addr as _,
+ vindex.as_i64x2(),
+ k,
+ SCALE,
+ ))
+}
+
+/// Loads 2 double-precision (64-bit) floating-point elements from memory starting at location base_addr
+/// at packed 64-bit integer indices stored in vindex scaled by scale using writemask k (elements are copied
+/// from src when the corresponding mask bit is not set).
+///
+/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_mmask_i64gather_pd)
+#[inline]
+#[target_feature(enable = "avx512f,avx512vl")]
+#[cfg_attr(test, assert_instr(vgatherqpd, SCALE = 1))]
+#[rustc_legacy_const_generics(4)]
+#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
+pub unsafe fn _mm_mmask_i64gather_pd(
+ src: __m128d,
+ k: __mmask8,
+ vindex: __m128i,
+ base_addr: *const u8,
+) -> __m128d {
+ static_assert_imm8_scale!(SCALE);
+ transmute(vgatherqpd_128(
+ src.as_f64x2(),
+ base_addr as _,
+ vindex.as_i64x2(),
+ k,
+ SCALE,
+ ))
+}
+
+/// Loads 2 single-precision (32-bit) floating-point elements from memory starting at location base_addr
+/// at packed 64-bit integer indices stored in vindex scaled by scale using writemask k (elements are copied
+/// from src when the corresponding mask bit is not set).
+///
+/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_mmask_i64gather_ps)
+#[inline]
+#[target_feature(enable = "avx512f,avx512vl")]
+#[cfg_attr(test, assert_instr(vgatherqps, SCALE = 1))]
+#[rustc_legacy_const_generics(4)]
+#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
+pub unsafe fn _mm_mmask_i64gather_ps(
+ src: __m128,
+ k: __mmask8,
+ vindex: __m128i,
+ base_addr: *const u8,
+) -> __m128 {
+ static_assert_imm8_scale!(SCALE);
+ transmute(vgatherqps_128(
+ src.as_f32x4(),
+ base_addr as _,
+ vindex.as_i64x2(),
+ k,
+ SCALE,
+ ))
+}
+
/// Contiguously store the active 32-bit integers in a (those with their respective bit set in writemask k) to dst, and pass through the remaining elements from src.
///
/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_mask_compress_epi32&expand=1198)
@@ -33844,6 +34970,94 @@ pub unsafe fn _mm_maskz_load_pd(k: __mmask8, mem_addr: *const f64) -> __m128d {
dst
}
+/// Load a single-precision (32-bit) floating-point element from memory into the lower element of dst
+/// using writemask k (the element is copied from src when mask bit 0 is not set), and set the upper
+/// 3 packed elements of dst to zero. mem_addr must be aligned on a 16-byte boundary or a general-protection
+/// exception may be generated.
+///
+/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_mask_load_ss)
+#[inline]
+#[cfg_attr(test, assert_instr(vmovss))]
+#[target_feature(enable = "sse,avx512f")]
+#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
+pub unsafe fn _mm_mask_load_ss(src: __m128, k: __mmask8, mem_addr: *const f32) -> __m128 {
+ let mut dst: __m128 = src;
+ asm!(
+ vpl!("vmovss {dst}{{{k}}}"),
+ p = in(reg) mem_addr,
+ k = in(kreg) k,
+ dst = inout(xmm_reg) dst,
+ options(pure, readonly, nostack, preserves_flags),
+ );
+ dst
+}
+
+/// Load a single-precision (32-bit) floating-point element from memory into the lower element of dst
+/// using zeromask k (the element is zeroed out when mask bit 0 is not set), and set the upper 3 packed
+/// elements of dst to zero. mem_addr must be aligned on a 16-byte boundary or a general-protection
+/// exception may be generated.
+///
+/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_maskz_load_ss)
+#[inline]
+#[cfg_attr(test, assert_instr(vmovss))]
+#[target_feature(enable = "sse,avx512f")]
+#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
+pub unsafe fn _mm_maskz_load_ss(k: __mmask8, mem_addr: *const f32) -> __m128 {
+ let mut dst: __m128;
+ asm!(
+ vpl!("vmovss {dst}{{{k}}} {{z}}"),
+ p = in(reg) mem_addr,
+ k = in(kreg) k,
+ dst = out(xmm_reg) dst,
+ options(pure, readonly, nostack, preserves_flags),
+ );
+ dst
+}
+
+/// Load a double-precision (64-bit) floating-point element from memory into the lower element of dst
+/// using writemask k (the element is copied from src when mask bit 0 is not set), and set the upper
+/// element of dst to zero. mem_addr must be aligned on a 16-byte boundary or a general-protection
+/// exception may be generated.
+///
+/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_mask_load_sd)
+#[inline]
+#[cfg_attr(test, assert_instr(vmovsd))]
+#[target_feature(enable = "sse,avx512f")]
+#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
+pub unsafe fn _mm_mask_load_sd(src: __m128d, k: __mmask8, mem_addr: *const f64) -> __m128d {
+ let mut dst: __m128d = src;
+ asm!(
+ vpl!("vmovsd {dst}{{{k}}}"),
+ p = in(reg) mem_addr,
+ k = in(kreg) k,
+ dst = inout(xmm_reg) dst,
+ options(pure, readonly, nostack, preserves_flags),
+ );
+ dst
+}
+
+/// Load a double-precision (64-bit) floating-point element from memory into the lower element of dst
+/// using zeromask k (the element is zeroed out when mask bit 0 is not set), and set the upper element
+/// of dst to zero. mem_addr must be aligned on a 16-byte boundary or a general-protection exception
+/// may be generated.
+///
+/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_maskz_load_sd)
+#[inline]
+#[cfg_attr(test, assert_instr(vmovsd))]
+#[target_feature(enable = "sse,avx512f")]
+#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
+pub unsafe fn _mm_maskz_load_sd(k: __mmask8, mem_addr: *const f64) -> __m128d {
+ let mut dst: __m128d;
+ asm!(
+ vpl!("vmovsd {dst}{{{k}}} {{z}}"),
+ p = in(reg) mem_addr,
+ k = in(kreg) k,
+ dst = out(xmm_reg) dst,
+ options(pure, readonly, nostack, preserves_flags),
+ );
+ dst
+}
+
/// Store packed 32-bit integers from a into memory using writemask k.
/// mem_addr does not need to be aligned on any particular boundary.
///
@@ -34264,6 +35478,42 @@ pub unsafe fn _mm_mask_store_pd(mem_addr: *mut f64, mask: __mmask8, a: __m128d)
);
}
+/// Store a single-precision (32-bit) floating-point element from a into memory using writemask k. mem_addr
+/// must be aligned on a 16-byte boundary or a general-protection exception may be generated.
+///
+/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_mask_store_ss)
+#[inline]
+#[cfg_attr(test, assert_instr(vmovss))]
+#[target_feature(enable = "sse,avx512f")]
+#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
+pub unsafe fn _mm_mask_store_ss(mem_addr: *mut f32, k: __mmask8, a: __m128) {
+ asm!(
+ vps!("vmovss", "{{{k}}}, {a}"),
+ p = in(reg) mem_addr,
+ k = in(kreg) k,
+ a = in(xmm_reg) a,
+ options(nostack, preserves_flags),
+ );
+}
+
+/// Store a double-precision (64-bit) floating-point element from a into memory using writemask k. mem_addr
+/// must be aligned on a 16-byte boundary or a general-protection exception may be generated.
+///
+/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_mask_store_sd)
+#[inline]
+#[cfg_attr(test, assert_instr(vmovsd))]
+#[target_feature(enable = "sse,avx512f")]
+#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
+pub unsafe fn _mm_mask_store_sd(mem_addr: *mut f64, k: __mmask8, a: __m128d) {
+ asm!(
+ vps!("vmovsd", "{{{k}}}, {a}"),
+ p = in(reg) mem_addr,
+ k = in(kreg) k,
+ a = in(xmm_reg) a,
+ options(nostack, preserves_flags),
+ );
+}
+
/// Load contiguous active 32-bit integers from unaligned memory at mem_addr (those with their respective bit set in mask k), and store the results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set).
///
/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_mask_expandloadu_epi32)
@@ -41162,8 +42412,6 @@ extern "C" {
fn vscatterqps(slice: *mut i8, mask: i8, offsets: i64x8, src: f32x8, scale: i32);
#[link_name = "llvm.x86.avx512.scatter.dpq.512"]
fn vpscatterdq(slice: *mut i8, mask: i8, offsets: i32x8, src: i64x8, scale: i32);
- #[link_name = "llvm.x86.avx512.scattersiv4.di"]
- fn vpscatterdq256(slice: *mut i8, mask: i8, offsets: i32x4, src: i64x4, scale: i32);
#[link_name = "llvm.x86.avx512.scatter.dpi.512"]
fn vpscatterdd(slice: *mut i8, mask: i16, offsets: i32x16, src: i32x16, scale: i32);
@@ -41172,6 +42420,74 @@ extern "C" {
#[link_name = "llvm.x86.avx512.scatter.qpi.512"]
fn vpscatterqd(slice: *mut i8, mask: i8, offsets: i64x8, src: i32x8, scale: i32);
+ #[link_name = "llvm.x86.avx512.scattersiv4.si"]
+ fn vpscatterdd_128(slice: *mut i8, k: u8, offsets: i32x4, src: i32x4, scale: i32);
+ #[link_name = "llvm.x86.avx512.scattersiv2.di"]
+ fn vpscatterdq_128(slice: *mut i8, k: u8, offsets: i32x4, src: i64x2, scale: i32);
+ #[link_name = "llvm.x86.avx512.scattersiv2.df"]
+ fn vscatterdpd_128(slice: *mut i8, k: u8, offsets: i32x4, src: f64x2, scale: i32);
+ #[link_name = "llvm.x86.avx512.scattersiv4.sf"]
+ fn vscatterdps_128(slice: *mut i8, k: u8, offsets: i32x4, src: f32x4, scale: i32);
+ #[link_name = "llvm.x86.avx512.scatterdiv4.si"]
+ fn vpscatterqd_128(slice: *mut i8, k: u8, offsets: i64x2, src: i32x4, scale: i32);
+ #[link_name = "llvm.x86.avx512.scatterdiv2.di"]
+ fn vpscatterqq_128(slice: *mut i8, k: u8, offsets: i64x2, src: i64x2, scale: i32);
+ #[link_name = "llvm.x86.avx512.scatterdiv2.df"]
+ fn vscatterqpd_128(slice: *mut i8, k: u8, offsets: i64x2, src: f64x2, scale: i32);
+ #[link_name = "llvm.x86.avx512.scatterdiv4.sf"]
+ fn vscatterqps_128(slice: *mut i8, k: u8, offsets: i64x2, src: f32x4, scale: i32);
+
+ #[link_name = "llvm.x86.avx512.scattersiv8.si"]
+ fn vpscatterdd_256(slice: *mut i8, k: u8, offsets: i32x8, src: i32x8, scale: i32);
+ #[link_name = "llvm.x86.avx512.scattersiv4.di"]
+ fn vpscatterdq_256(slice: *mut i8, k: u8, offsets: i32x4, src: i64x4, scale: i32);
+ #[link_name = "llvm.x86.avx512.scattersiv4.df"]
+ fn vscatterdpd_256(slice: *mut i8, k: u8, offsets: i32x4, src: f64x4, scale: i32);
+ #[link_name = "llvm.x86.avx512.scattersiv8.sf"]
+ fn vscatterdps_256(slice: *mut i8, k: u8, offsets: i32x8, src: f32x8, scale: i32);
+ #[link_name = "llvm.x86.avx512.scatterdiv8.si"]
+ fn vpscatterqd_256(slice: *mut i8, k: u8, offsets: i64x4, src: i32x4, scale: i32);
+ #[link_name = "llvm.x86.avx512.scatterdiv4.di"]
+ fn vpscatterqq_256(slice: *mut i8, k: u8, offsets: i64x4, src: i64x4, scale: i32);
+ #[link_name = "llvm.x86.avx512.scatterdiv4.df"]
+ fn vscatterqpd_256(slice: *mut i8, k: u8, offsets: i64x4, src: f64x4, scale: i32);
+ #[link_name = "llvm.x86.avx512.scatterdiv8.sf"]
+ fn vscatterqps_256(slice: *mut i8, k: u8, offsets: i64x4, src: f32x4, scale: i32);
+
+ #[link_name = "llvm.x86.avx512.gather3siv4.si"]
+ fn vpgatherdd_128(src: i32x4, slice: *const i8, offsets: i32x4, k: u8, scale: i32) -> i32x4;
+ #[link_name = "llvm.x86.avx512.gather3siv2.di"]
+ fn vpgatherdq_128(src: i64x2, slice: *const i8, offsets: i32x4, k: u8, scale: i32) -> i64x2;
+ #[link_name = "llvm.x86.avx512.gather3siv2.df"]
+ fn vgatherdpd_128(src: f64x2, slice: *const i8, offsets: i32x4, k: u8, scale: i32) -> f64x2;
+ #[link_name = "llvm.x86.avx512.gather3siv4.sf"]
+ fn vgatherdps_128(src: f32x4, slice: *const u8, offsets: i32x4, k: u8, scale: i32) -> f32x4;
+ #[link_name = "llvm.x86.avx512.gather3div4.si"]
+ fn vpgatherqd_128(src: i32x4, slice: *const u8, offsets: i64x2, k: u8, scale: i32) -> i32x4;
+ #[link_name = "llvm.x86.avx512.gather3div2.di"]
+ fn vpgatherqq_128(src: i64x2, slice: *const i8, offsets: i64x2, k: u8, scale: i32) -> i64x2;
+ #[link_name = "llvm.x86.avx512.gather3div2.df"]
+ fn vgatherqpd_128(src: f64x2, slice: *const i8, offsets: i64x2, k: u8, scale: i32) -> f64x2;
+ #[link_name = "llvm.x86.avx512.gather3div4.sf"]
+ fn vgatherqps_128(src: f32x4, slice: *const i8, offsets: i64x2, k: u8, scale: i32) -> f32x4;
+
+ #[link_name = "llvm.x86.avx512.gather3siv8.si"]
+ fn vpgatherdd_256(src: i32x8, slice: *const i8, offsets: i32x8, k: u8, scale: i32) -> i32x8;
+ #[link_name = "llvm.x86.avx512.gather3siv4.di"]
+ fn vpgatherdq_256(src: i64x4, slice: *const i8, offsets: i32x4, k: u8, scale: i32) -> i64x4;
+ #[link_name = "llvm.x86.avx512.gather3siv4.df"]
+ fn vgatherdpd_256(src: f64x4, slice: *const i8, offsets: i32x4, k: u8, scale: i32) -> f64x4;
+ #[link_name = "llvm.x86.avx512.gather3siv8.sf"]
+ fn vgatherdps_256(src: f32x8, slice: *const i8, offsets: i32x8, k: u8, scale: i32) -> f32x8;
+ #[link_name = "llvm.x86.avx512.gather3div8.si"]
+ fn vpgatherqd_256(src: i32x4, slice: *const i8, offsets: i64x4, k: u8, scale: i32) -> i32x4;
+ #[link_name = "llvm.x86.avx512.gather3div4.di"]
+ fn vpgatherqq_256(src: i64x4, slice: *const i8, offsets: i64x4, k: u8, scale: i32) -> i64x4;
+ #[link_name = "llvm.x86.avx512.gather3div4.df"]
+ fn vgatherqpd_256(src: f64x4, slice: *const i8, offsets: i64x4, k: u8, scale: i32) -> f64x4;
+ #[link_name = "llvm.x86.avx512.gather3div8.sf"]
+ fn vgatherqps_256(src: f32x4, slice: *const i8, offsets: i64x4, k: u8, scale: i32) -> f32x4;
+
#[link_name = "llvm.x86.avx512.mask.cmp.ss"]
fn vcmpss(a: __m128, b: __m128, op: i32, m: i8, sae: i32) -> i8;
#[link_name = "llvm.x86.avx512.mask.cmp.sd"]
@@ -50253,6 +51569,60 @@ mod tests {
assert_eq_m128d(r, e);
}
+ #[simd_test(enable = "avx512f")]
+ unsafe fn test_mm_mask_load_ss() {
+ #[repr(align(16))]
+ struct Align {
+ data: f32,
+ }
+ let src = _mm_set_ss(2.0);
+ let mem = Align { data: 1.0 };
+ let r = _mm_mask_load_ss(src, 0b1, &mem.data);
+ assert_eq_m128(r, _mm_set_ss(1.0));
+ let r = _mm_mask_load_ss(src, 0b0, &mem.data);
+ assert_eq_m128(r, _mm_set_ss(2.0));
+ }
+
+ #[simd_test(enable = "avx512f")]
+ unsafe fn test_mm_maskz_load_ss() {
+ #[repr(align(16))]
+ struct Align {
+ data: f32,
+ }
+ let mem = Align { data: 1.0 };
+ let r = _mm_maskz_load_ss(0b1, &mem.data);
+ assert_eq_m128(r, _mm_set_ss(1.0));
+ let r = _mm_maskz_load_ss(0b0, &mem.data);
+ assert_eq_m128(r, _mm_set_ss(0.0));
+ }
+
+ #[simd_test(enable = "avx512f")]
+ unsafe fn test_mm_mask_load_sd() {
+ #[repr(align(16))]
+ struct Align {
+ data: f64,
+ }
+ let src = _mm_set_sd(2.0);
+ let mem = Align { data: 1.0 };
+ let r = _mm_mask_load_sd(src, 0b1, &mem.data);
+ assert_eq_m128d(r, _mm_set_sd(1.0));
+ let r = _mm_mask_load_sd(src, 0b0, &mem.data);
+ assert_eq_m128d(r, _mm_set_sd(2.0));
+ }
+
+ #[simd_test(enable = "avx512f")]
+ unsafe fn test_mm_maskz_load_sd() {
+ #[repr(align(16))]
+ struct Align {
+ data: f64,
+ }
+ let mem = Align { data: 1.0 };
+ let r = _mm_maskz_load_sd(0b1, &mem.data);
+ assert_eq_m128d(r, _mm_set_sd(1.0));
+ let r = _mm_maskz_load_sd(0b0, &mem.data);
+ assert_eq_m128d(r, _mm_set_sd(0.0));
+ }
+
#[simd_test(enable = "avx512f,avx512vl")]
unsafe fn test_mm_mask_storeu_pd() {
let mut r = [42_f64; 2];
@@ -50277,6 +51647,34 @@ mod tests {
assert_eq_m128d(_mm_load_pd(r.data.as_ptr()), e);
}
+ #[simd_test(enable = "avx512f")]
+ unsafe fn test_mm_mask_store_ss() {
+ #[repr(align(16))]
+ struct Align {
+ data: f32,
+ }
+ let a = _mm_set_ss(2.0);
+ let mut mem = Align { data: 1.0 };
+ _mm_mask_store_ss(&mut mem.data, 0b1, a);
+ assert_eq!(mem.data, 2.0);
+ _mm_mask_store_ss(&mut mem.data, 0b0, a);
+ assert_eq!(mem.data, 2.0);
+ }
+
+ #[simd_test(enable = "avx512f")]
+ unsafe fn test_mm_mask_store_sd() {
+ #[repr(align(16))]
+ struct Align {
+ data: f64,
+ }
+ let a = _mm_set_sd(2.0);
+ let mut mem = Align { data: 1.0 };
+ _mm_mask_store_sd(&mut mem.data, 0b1, a);
+ assert_eq!(mem.data, 2.0);
+ _mm_mask_store_sd(&mut mem.data, 0b0, a);
+ assert_eq!(mem.data, 2.0);
+ }
+
#[simd_test(enable = "avx512f")]
unsafe fn test_mm512_setr_pd() {
let r = _mm512_set_pd(0., 1., 2., 3., 4., 5., 6., 7.);
diff --git a/library/stdarch/crates/core_arch/src/x86_64/avx512f.rs b/library/stdarch/crates/core_arch/src/x86_64/avx512f.rs
index a2b2496caf15..5ea6dcc02642 100644
--- a/library/stdarch/crates/core_arch/src/x86_64/avx512f.rs
+++ b/library/stdarch/crates/core_arch/src/x86_64/avx512f.rs
@@ -7649,20 +7649,6 @@ mod tests {
assert_eq!(&arr[..], &expected[..],);
}
- #[simd_test(enable = "avx512f,avx512vl")]
- unsafe fn test_mm256_i32scatter_epi64() {
- let mut arr = [0i64; 64];
- let index = _mm_setr_epi32(0, 16, 32, 48);
- let src = _mm256_setr_epi64x(1, 2, 3, 4);
- // A multiplier of 8 is word-addressing
- _mm256_i32scatter_epi64::<8>(arr.as_mut_ptr() as *mut u8, index, src);
- let mut expected = [0i64; 64];
- for i in 0..4 {
- expected[i * 16] = (i + 1) as i64;
- }
- assert_eq!(&arr[..], &expected[..],);
- }
-
#[simd_test(enable = "avx512f")]
unsafe fn test_mm512_i64scatter_epi64() {
let mut arr = [0i64; 128];
@@ -7721,6 +7707,566 @@ mod tests {
assert_eq!(&arr[..], &expected[..],);
}
+ #[simd_test(enable = "avx512f")]
+ unsafe fn test_mm512_i32logather_epi64() {
+ let base_addr: [i64; 8] = [1, 2, 3, 4, 5, 6, 7, 8];
+ let vindex = _mm512_setr_epi32(1, 2, 3, 4, 5, 6, 7, 0, -1, -1, -1, -1, -1, -1, -1, -1);
+ let r = _mm512_i32logather_epi64::<8>(vindex, base_addr.as_ptr().cast());
+ let expected = _mm512_setr_epi64(2, 3, 4, 5, 6, 7, 8, 1);
+ assert_eq_m512i(expected, r);
+ }
+
+ #[simd_test(enable = "avx512f")]
+ unsafe fn test_mm512_mask_i32logather_epi64() {
+ let base_addr: [i64; 8] = [1, 2, 3, 4, 5, 6, 7, 8];
+ let src = _mm512_setr_epi64(9, 10, 11, 12, 13, 14, 15, 16);
+ let vindex = _mm512_setr_epi32(1, 2, 3, 4, 5, 6, 7, 0, -1, -1, -1, -1, -1, -1, -1, -1);
+ let r =
+ _mm512_mask_i32logather_epi64::<8>(src, 0b01010101, vindex, base_addr.as_ptr().cast());
+ let expected = _mm512_setr_epi64(2, 10, 4, 12, 6, 14, 8, 16);
+ assert_eq_m512i(expected, r);
+ }
+
+ #[simd_test(enable = "avx512f")]
+ unsafe fn test_mm512_i32logather_pd() {
+ let base_addr: [f64; 8] = [1., 2., 3., 4., 5., 6., 7., 8.];
+ let vindex = _mm512_setr_epi32(1, 2, 3, 4, 5, 6, 7, 0, -1, -1, -1, -1, -1, -1, -1, -1);
+ let r = _mm512_i32logather_pd::<8>(vindex, base_addr.as_ptr().cast());
+ let expected = _mm512_setr_pd(2., 3., 4., 5., 6., 7., 8., 1.);
+ assert_eq_m512d(expected, r);
+ }
+
+ #[simd_test(enable = "avx512f")]
+ unsafe fn test_mm512_mask_i32logather_pd() {
+ let base_addr: [f64; 8] = [1., 2., 3., 4., 5., 6., 7., 8.];
+ let src = _mm512_setr_pd(9., 10., 11., 12., 13., 14., 15., 16.);
+ let vindex = _mm512_setr_epi32(1, 2, 3, 4, 5, 6, 7, 0, -1, -1, -1, -1, -1, -1, -1, -1);
+ let r = _mm512_mask_i32logather_pd::<8>(src, 0b01010101, vindex, base_addr.as_ptr().cast());
+ let expected = _mm512_setr_pd(2., 10., 4., 12., 6., 14., 8., 16.);
+ assert_eq_m512d(expected, r);
+ }
+
+ #[simd_test(enable = "avx512f")]
+ unsafe fn test_mm512_i32loscatter_epi64() {
+ let mut base_addr: [i64; 8] = [0; 8];
+ let vindex = _mm512_setr_epi32(1, 2, 3, 4, 5, 6, 7, 0, -1, -1, -1, -1, -1, -1, -1, -1);
+ let src = _mm512_setr_epi64(2, 3, 4, 5, 6, 7, 8, 1);
+ _mm512_i32loscatter_epi64::<8>(base_addr.as_mut_ptr().cast(), vindex, src);
+ let expected = [1, 2, 3, 4, 5, 6, 7, 8];
+ assert_eq!(expected, base_addr);
+ }
+
+ #[simd_test(enable = "avx512f")]
+ unsafe fn test_mm512_mask_i32loscatter_epi64() {
+ let mut base_addr: [i64; 8] = [0; 8];
+ let vindex = _mm512_setr_epi32(1, 2, 3, 4, 5, 6, 7, 0, -1, -1, -1, -1, -1, -1, -1, -1);
+ let src = _mm512_setr_epi64(2, 3, 4, 5, 6, 7, 8, 1);
+ _mm512_mask_i32loscatter_epi64::<8>(base_addr.as_mut_ptr().cast(), 0b01010101, vindex, src);
+ let expected = [0, 2, 0, 4, 0, 6, 0, 8];
+ assert_eq!(expected, base_addr);
+ }
+
+ #[simd_test(enable = "avx512f")]
+ unsafe fn test_mm512_i32loscatter_pd() {
+ let mut base_addr: [f64; 8] = [0.; 8];
+ let vindex = _mm512_setr_epi32(1, 2, 3, 4, 5, 6, 7, 0, -1, -1, -1, -1, -1, -1, -1, -1);
+ let src = _mm512_setr_pd(2., 3., 4., 5., 6., 7., 8., 1.);
+ _mm512_i32loscatter_pd::<8>(base_addr.as_mut_ptr().cast(), vindex, src);
+ let expected = [1., 2., 3., 4., 5., 6., 7., 8.];
+ assert_eq!(expected, base_addr);
+ }
+
+ #[simd_test(enable = "avx512f")]
+ unsafe fn test_mm512_mask_i32loscatter_pd() {
+ let mut base_addr: [f64; 8] = [0.; 8];
+ let vindex = _mm512_setr_epi32(1, 2, 3, 4, 5, 6, 7, 0, -1, -1, -1, -1, -1, -1, -1, -1);
+ let src = _mm512_setr_pd(2., 3., 4., 5., 6., 7., 8., 1.);
+ _mm512_mask_i32loscatter_pd::<8>(base_addr.as_mut_ptr().cast(), 0b01010101, vindex, src);
+ let expected = [0., 2., 0., 4., 0., 6., 0., 8.];
+ assert_eq!(expected, base_addr);
+ }
+
+ #[simd_test(enable = "avx512f,avx512vl")]
+ unsafe fn test_mm_mmask_i32gather_epi32() {
+ let base_addr: [i32; 4] = [1, 2, 3, 4];
+ let src = _mm_setr_epi32(5, 6, 7, 8);
+ let vindex = _mm_setr_epi32(1, 2, 3, 0);
+ let r = _mm_mmask_i32gather_epi32::<4>(src, 0b0101, vindex, base_addr.as_ptr().cast());
+ let expected = _mm_setr_epi32(2, 6, 4, 8);
+ assert_eq_m128i(expected, r);
+ }
+
+ #[simd_test(enable = "avx512f,avx512vl")]
+ unsafe fn test_mm_mmask_i32gather_epi64() {
+ let base_addr: [i64; 2] = [1, 2];
+ let src = _mm_setr_epi64x(5, 6);
+ let vindex = _mm_setr_epi32(1, 0, -1, -1);
+ let r = _mm_mmask_i32gather_epi64::<8>(src, 0b01, vindex, base_addr.as_ptr().cast());
+ let expected = _mm_setr_epi64x(2, 6);
+ assert_eq_m128i(expected, r);
+ }
+
+ #[simd_test(enable = "avx512f,avx512vl")]
+ unsafe fn test_mm_mmask_i32gather_pd() {
+ let base_addr: [f64; 2] = [1., 2.];
+ let src = _mm_setr_pd(5., 6.);
+ let vindex = _mm_setr_epi32(1, 0, -1, -1);
+ let r = _mm_mmask_i32gather_pd::<8>(src, 0b01, vindex, base_addr.as_ptr().cast());
+ let expected = _mm_setr_pd(2., 6.);
+ assert_eq_m128d(expected, r);
+ }
+
+ #[simd_test(enable = "avx512f,avx512vl")]
+ unsafe fn test_mm_mmask_i32gather_ps() {
+ let base_addr: [f32; 4] = [1., 2., 3., 4.];
+ let src = _mm_setr_ps(5., 6., 7., 8.);
+ let vindex = _mm_setr_epi32(1, 2, 3, 0);
+ let r = _mm_mmask_i32gather_ps::<4>(src, 0b0101, vindex, base_addr.as_ptr().cast());
+ let expected = _mm_setr_ps(2., 6., 4., 8.);
+ assert_eq_m128(expected, r);
+ }
+
+ #[simd_test(enable = "avx512f,avx512vl")]
+ unsafe fn test_mm_mmask_i64gather_epi32() {
+ let base_addr: [i32; 2] = [1, 2];
+ let src = _mm_setr_epi32(5, 6, 7, 8);
+ let vindex = _mm_setr_epi64x(1, 0);
+ let r = _mm_mmask_i64gather_epi32::<4>(src, 0b01, vindex, base_addr.as_ptr().cast());
+ let expected = _mm_setr_epi32(2, 6, 0, 0);
+ assert_eq_m128i(expected, r);
+ }
+
+ #[simd_test(enable = "avx512f,avx512vl")]
+ unsafe fn test_mm_mmask_i64gather_epi64() {
+ let base_addr: [i64; 2] = [1, 2];
+ let src = _mm_setr_epi64x(5, 6);
+ let vindex = _mm_setr_epi64x(1, 0);
+ let r = _mm_mmask_i64gather_epi64::<8>(src, 0b01, vindex, base_addr.as_ptr().cast());
+ let expected = _mm_setr_epi64x(2, 6);
+ assert_eq_m128i(expected, r);
+ }
+
+ #[simd_test(enable = "avx512f,avx512vl")]
+ unsafe fn test_mm_mmask_i64gather_pd() {
+ let base_addr: [f64; 2] = [1., 2.];
+ let src = _mm_setr_pd(5., 6.);
+ let vindex = _mm_setr_epi64x(1, 0);
+ let r = _mm_mmask_i64gather_pd::<8>(src, 0b01, vindex, base_addr.as_ptr().cast());
+ let expected = _mm_setr_pd(2., 6.);
+ assert_eq_m128d(expected, r);
+ }
+
+ #[simd_test(enable = "avx512f,avx512vl")]
+ unsafe fn test_mm_mmask_i64gather_ps() {
+ let base_addr: [f32; 2] = [1., 2.];
+ let src = _mm_setr_ps(5., 6., 7., 8.);
+ let vindex = _mm_setr_epi64x(1, 0);
+ let r = _mm_mmask_i64gather_ps::<4>(src, 0b01, vindex, base_addr.as_ptr().cast());
+ let expected = _mm_setr_ps(2., 6., 0., 0.);
+ assert_eq_m128(expected, r);
+ }
+
+ #[simd_test(enable = "avx512f,avx512vl")]
+ unsafe fn test_mm256_mmask_i32gather_epi32() {
+ let base_addr: [i32; 8] = [1, 2, 3, 4, 5, 6, 7, 8];
+ let src = _mm256_setr_epi32(9, 10, 11, 12, 13, 14, 15, 16);
+ let vindex = _mm256_setr_epi32(1, 2, 3, 4, 5, 6, 7, 0);
+ let r =
+ _mm256_mmask_i32gather_epi32::<4>(src, 0b01010101, vindex, base_addr.as_ptr().cast());
+ let expected = _mm256_setr_epi32(2, 10, 4, 12, 6, 14, 8, 16);
+ assert_eq_m256i(expected, r);
+ }
+
+ #[simd_test(enable = "avx512f,avx512vl")]
+ unsafe fn test_mm256_mmask_i32gather_epi64() {
+ let base_addr: [i64; 4] = [1, 2, 3, 4];
+ let src = _mm256_setr_epi64x(9, 10, 11, 12);
+ let vindex = _mm_setr_epi32(1, 2, 3, 4);
+ let r = _mm256_mmask_i32gather_epi64::<8>(src, 0b0101, vindex, base_addr.as_ptr().cast());
+ let expected = _mm256_setr_epi64x(2, 10, 4, 12);
+ assert_eq_m256i(expected, r);
+ }
+
+ #[simd_test(enable = "avx512f,avx512vl")]
+ unsafe fn test_mm256_mmask_i32gather_pd() {
+ let base_addr: [f64; 4] = [1., 2., 3., 4.];
+ let src = _mm256_setr_pd(9., 10., 11., 12.);
+ let vindex = _mm_setr_epi32(1, 2, 3, 4);
+ let r = _mm256_mmask_i32gather_pd::<8>(src, 0b0101, vindex, base_addr.as_ptr().cast());
+ let expected = _mm256_setr_pd(2., 10., 4., 12.);
+ assert_eq_m256d(expected, r);
+ }
+
+ #[simd_test(enable = "avx512f,avx512vl")]
+ unsafe fn test_mm256_mmask_i32gather_ps() {
+ let base_addr: [f32; 8] = [1., 2., 3., 4., 5., 6., 7., 8.];
+ let src = _mm256_setr_ps(9., 10., 11., 12., 13., 14., 15., 16.);
+ let vindex = _mm256_setr_epi32(1, 2, 3, 4, 5, 6, 7, 0);
+ let r = _mm256_mmask_i32gather_ps::<4>(src, 0b01010101, vindex, base_addr.as_ptr().cast());
+ let expected = _mm256_setr_ps(2., 10., 4., 12., 6., 14., 8., 16.);
+ assert_eq_m256(expected, r);
+ }
+
+ #[simd_test(enable = "avx512f,avx512vl")]
+ unsafe fn test_mm256_mmask_i64gather_epi32() {
+ let base_addr: [i32; 4] = [1, 2, 3, 4];
+ let src = _mm_setr_epi32(9, 10, 11, 12);
+ let vindex = _mm256_setr_epi64x(1, 2, 3, 0);
+ let r = _mm256_mmask_i64gather_epi32::<4>(src, 0b0101, vindex, base_addr.as_ptr().cast());
+ let expected = _mm_setr_epi32(2, 10, 4, 12);
+ assert_eq_m128i(expected, r);
+ }
+
+ #[simd_test(enable = "avx512f,avx512vl")]
+ unsafe fn test_mm256_mmask_i64gather_epi64() {
+ let base_addr: [i64; 4] = [1, 2, 3, 4];
+ let src = _mm256_setr_epi64x(9, 10, 11, 12);
+ let vindex = _mm256_setr_epi64x(1, 2, 3, 0);
+ let r = _mm256_mmask_i64gather_epi64::<8>(src, 0b0101, vindex, base_addr.as_ptr().cast());
+ let expected = _mm256_setr_epi64x(2, 10, 4, 12);
+ assert_eq_m256i(expected, r);
+ }
+
+ #[simd_test(enable = "avx512f,avx512vl")]
+ unsafe fn test_mm256_mmask_i64gather_pd() {
+ let base_addr: [f64; 4] = [1., 2., 3., 4.];
+ let src = _mm256_setr_pd(9., 10., 11., 12.);
+ let vindex = _mm256_setr_epi64x(1, 2, 3, 0);
+ let r = _mm256_mmask_i64gather_pd::<8>(src, 0b0101, vindex, base_addr.as_ptr().cast());
+ let expected = _mm256_setr_pd(2., 10., 4., 12.);
+ assert_eq_m256d(expected, r);
+ }
+
+ #[simd_test(enable = "avx512f,avx512vl")]
+ unsafe fn test_mm256_mmask_i64gather_ps() {
+ let base_addr: [f32; 4] = [1., 2., 3., 4.];
+ let src = _mm_setr_ps(9., 10., 11., 12.);
+ let vindex = _mm256_setr_epi64x(1, 2, 3, 0);
+ let r = _mm256_mmask_i64gather_ps::<4>(src, 0b0101, vindex, base_addr.as_ptr().cast());
+ let expected = _mm_setr_ps(2., 10., 4., 12.);
+ assert_eq_m128(expected, r);
+ }
+
+ #[simd_test(enable = "avx512f,avx512vl")]
+ unsafe fn test_mm_i32scatter_epi32() {
+ let mut base_addr: [i32; 4] = [0; 4];
+ let vindex = _mm_setr_epi32(1, 2, 3, 0);
+ let src = _mm_setr_epi32(2, 3, 4, 1);
+ _mm_i32scatter_epi32::<4>(base_addr.as_mut_ptr().cast(), vindex, src);
+ let expected = [1, 2, 3, 4];
+ assert_eq!(expected, base_addr);
+ }
+
+ #[simd_test(enable = "avx512f,avx512vl")]
+ unsafe fn test_mm_mask_i32scatter_epi32() {
+ let mut base_addr: [i32; 4] = [0; 4];
+ let vindex = _mm_setr_epi32(1, 2, 3, 0);
+ let src = _mm_setr_epi32(2, 3, 4, 1);
+ _mm_mask_i32scatter_epi32::<4>(base_addr.as_mut_ptr().cast(), 0b0101, vindex, src);
+ let expected = [0, 2, 0, 4];
+ assert_eq!(expected, base_addr);
+ }
+
+ #[simd_test(enable = "avx512f,avx512vl")]
+ unsafe fn test_mm_i32scatter_epi64() {
+ let mut base_addr: [i64; 2] = [0; 2];
+ let vindex = _mm_setr_epi32(1, 0, -1, -1);
+ let src = _mm_setr_epi64x(2, 1);
+ _mm_i32scatter_epi64::<8>(base_addr.as_mut_ptr().cast(), vindex, src);
+ let expected = [1, 2];
+ assert_eq!(expected, base_addr);
+ }
+
+ #[simd_test(enable = "avx512f,avx512vl")]
+ unsafe fn test_mm_mask_i32scatter_epi64() {
+ let mut base_addr: [i64; 2] = [0; 2];
+ let vindex = _mm_setr_epi32(1, 0, -1, -1);
+ let src = _mm_setr_epi64x(2, 1);
+ _mm_mask_i32scatter_epi64::<8>(base_addr.as_mut_ptr().cast(), 0b01, vindex, src);
+ let expected = [0, 2];
+ assert_eq!(expected, base_addr);
+ }
+
+ #[simd_test(enable = "avx512f,avx512vl")]
+ unsafe fn test_mm_i32scatter_pd() {
+ let mut base_addr: [f64; 2] = [0.; 2];
+ let vindex = _mm_setr_epi32(1, 0, -1, -1);
+ let src = _mm_setr_pd(2., 1.);
+ _mm_i32scatter_pd::<8>(base_addr.as_mut_ptr().cast(), vindex, src);
+ let expected = [1., 2.];
+ assert_eq!(expected, base_addr);
+ }
+
+ #[simd_test(enable = "avx512f,avx512vl")]
+ unsafe fn test_mm_mask_i32scatter_pd() {
+ let mut base_addr: [f64; 2] = [0.; 2];
+ let vindex = _mm_setr_epi32(1, 0, -1, -1);
+ let src = _mm_setr_pd(2., 1.);
+ _mm_mask_i32scatter_pd::<8>(base_addr.as_mut_ptr().cast(), 0b01, vindex, src);
+ let expected = [0., 2.];
+ assert_eq!(expected, base_addr);
+ }
+
+ #[simd_test(enable = "avx512f,avx512vl")]
+ unsafe fn test_mm_i32scatter_ps() {
+ let mut base_addr: [f32; 4] = [0.; 4];
+ let vindex = _mm_setr_epi32(1, 2, 3, 0);
+ let src = _mm_setr_ps(2., 3., 4., 1.);
+ _mm_i32scatter_ps::<4>(base_addr.as_mut_ptr().cast(), vindex, src);
+ let expected = [1., 2., 3., 4.];
+ assert_eq!(expected, base_addr);
+ }
+
+ #[simd_test(enable = "avx512f,avx512vl")]
+ unsafe fn test_mm_mask_i32scatter_ps() {
+ let mut base_addr: [f32; 4] = [0.; 4];
+ let vindex = _mm_setr_epi32(1, 2, 3, 0);
+ let src = _mm_setr_ps(2., 3., 4., 1.);
+ _mm_mask_i32scatter_ps::<4>(base_addr.as_mut_ptr().cast(), 0b0101, vindex, src);
+ let expected = [0., 2., 0., 4.];
+ assert_eq!(expected, base_addr);
+ }
+
+ #[simd_test(enable = "avx512f,avx512vl")]
+ unsafe fn test_mm_i64scatter_epi32() {
+ let mut base_addr: [i32; 2] = [0; 2];
+ let vindex = _mm_setr_epi64x(1, 0);
+ let src = _mm_setr_epi32(2, 1, -1, -1);
+ _mm_i64scatter_epi32::<4>(base_addr.as_mut_ptr().cast(), vindex, src);
+ let expected = [1, 2];
+ assert_eq!(expected, base_addr);
+ }
+
+ #[simd_test(enable = "avx512f,avx512vl")]
+ unsafe fn test_mm_mask_i64scatter_epi32() {
+ let mut base_addr: [i32; 2] = [0; 2];
+ let vindex = _mm_setr_epi64x(1, 0);
+ let src = _mm_setr_epi32(2, 1, -1, -1);
+ _mm_mask_i64scatter_epi32::<4>(base_addr.as_mut_ptr().cast(), 0b01, vindex, src);
+ let expected = [0, 2];
+ assert_eq!(expected, base_addr);
+ }
+
+ #[simd_test(enable = "avx512f,avx512vl")]
+ unsafe fn test_mm_i64scatter_epi64() {
+ let mut base_addr: [i64; 2] = [0; 2];
+ let vindex = _mm_setr_epi64x(1, 0);
+ let src = _mm_setr_epi64x(2, 1);
+ _mm_i64scatter_epi64::<8>(base_addr.as_mut_ptr().cast(), vindex, src);
+ let expected = [1, 2];
+ assert_eq!(expected, base_addr);
+ }
+
+ #[simd_test(enable = "avx512f,avx512vl")]
+ unsafe fn test_mm_mask_i64scatter_epi64() {
+ let mut base_addr: [i64; 2] = [0; 2];
+ let vindex = _mm_setr_epi64x(1, 0);
+ let src = _mm_setr_epi64x(2, 1);
+ _mm_mask_i64scatter_epi64::<8>(base_addr.as_mut_ptr().cast(), 0b01, vindex, src);
+ let expected = [0, 2];
+ assert_eq!(expected, base_addr);
+ }
+
+ #[simd_test(enable = "avx512f,avx512vl")]
+ unsafe fn test_mm_i64scatter_pd() {
+ let mut base_addr: [f64; 2] = [0.; 2];
+ let vindex = _mm_setr_epi64x(1, 0);
+ let src = _mm_setr_pd(2., 1.);
+ _mm_i64scatter_pd::<8>(base_addr.as_mut_ptr().cast(), vindex, src);
+ let expected = [1., 2.];
+ assert_eq!(expected, base_addr);
+ }
+
+ #[simd_test(enable = "avx512f,avx512vl")]
+ unsafe fn test_mm_mask_i64scatter_pd() {
+ let mut base_addr: [f64; 2] = [0.; 2];
+ let vindex = _mm_setr_epi64x(1, 0);
+ let src = _mm_setr_pd(2., 1.);
+ _mm_mask_i64scatter_pd::<8>(base_addr.as_mut_ptr().cast(), 0b01, vindex, src);
+ let expected = [0., 2.];
+ assert_eq!(expected, base_addr);
+ }
+
+ #[simd_test(enable = "avx512f,avx512vl")]
+ unsafe fn test_mm_i64scatter_ps() {
+ let mut base_addr: [f32; 2] = [0.; 2];
+ let vindex = _mm_setr_epi64x(1, 0);
+ let src = _mm_setr_ps(2., 1., -1., -1.);
+ _mm_i64scatter_ps::<4>(base_addr.as_mut_ptr().cast(), vindex, src);
+ let expected = [1., 2.];
+ assert_eq!(expected, base_addr);
+ }
+
+ #[simd_test(enable = "avx512f,avx512vl")]
+ unsafe fn test_mm_mask_i64scatter_ps() {
+ let mut base_addr: [f32; 2] = [0.; 2];
+ let vindex = _mm_setr_epi64x(1, 0);
+ let src = _mm_setr_ps(2., 1., -1., -1.);
+ _mm_mask_i64scatter_ps::<4>(base_addr.as_mut_ptr().cast(), 0b01, vindex, src);
+ let expected = [0., 2.];
+ assert_eq!(expected, base_addr);
+ }
+
+ #[simd_test(enable = "avx512f,avx512vl")]
+ unsafe fn test_mm256_i32scatter_epi32() {
+ let mut base_addr: [i32; 8] = [0; 8];
+ let vindex = _mm256_setr_epi32(1, 2, 3, 4, 5, 6, 7, 0);
+ let src = _mm256_setr_epi32(2, 3, 4, 5, 6, 7, 8, 1);
+ _mm256_i32scatter_epi32::<4>(base_addr.as_mut_ptr().cast(), vindex, src);
+ let expected = [1, 2, 3, 4, 5, 6, 7, 8];
+ assert_eq!(expected, base_addr);
+ }
+
+ #[simd_test(enable = "avx512f,avx512vl")]
+ unsafe fn test_mm256_mask_i32scatter_epi32() {
+ let mut base_addr: [i32; 8] = [0; 8];
+ let vindex = _mm256_setr_epi32(1, 2, 3, 4, 5, 6, 7, 0);
+ let src = _mm256_setr_epi32(2, 3, 4, 5, 6, 7, 8, 1);
+ _mm256_mask_i32scatter_epi32::<4>(base_addr.as_mut_ptr().cast(), 0b01010101, vindex, src);
+ let expected = [0, 2, 0, 4, 0, 6, 0, 8];
+ assert_eq!(expected, base_addr);
+ }
+
+ #[simd_test(enable = "avx512f,avx512vl")]
+ unsafe fn test_mm256_i32scatter_epi64() {
+ let mut base_addr: [i64; 4] = [0; 4];
+ let vindex = _mm_setr_epi32(1, 2, 3, 0);
+ let src = _mm256_setr_epi64x(2, 3, 4, 1);
+ _mm256_i32scatter_epi64::<8>(base_addr.as_mut_ptr().cast(), vindex, src);
+ let expected = [1, 2, 3, 4];
+ assert_eq!(expected, base_addr);
+ }
+
+ #[simd_test(enable = "avx512f,avx512vl")]
+ unsafe fn test_mm256_mask_i32scatter_epi64() {
+ let mut base_addr: [i64; 4] = [0; 4];
+ let vindex = _mm_setr_epi32(1, 2, 3, 0);
+ let src = _mm256_setr_epi64x(2, 3, 4, 1);
+ _mm256_mask_i32scatter_epi64::<8>(base_addr.as_mut_ptr().cast(), 0b0101, vindex, src);
+ let expected = [0, 2, 0, 4];
+ assert_eq!(expected, base_addr);
+ }
+
+ #[simd_test(enable = "avx512f,avx512vl")]
+ unsafe fn test_mm256_i32scatter_pd() {
+ let mut base_addr: [f64; 4] = [0.; 4];
+ let vindex = _mm_setr_epi32(1, 2, 3, 0);
+ let src = _mm256_setr_pd(2., 3., 4., 1.);
+ _mm256_i32scatter_pd::<8>(base_addr.as_mut_ptr().cast(), vindex, src);
+ let expected = [1., 2., 3., 4.];
+ assert_eq!(expected, base_addr);
+ }
+
+ #[simd_test(enable = "avx512f,avx512vl")]
+ unsafe fn test_mm256_mask_i32scatter_pd() {
+ let mut base_addr: [f64; 4] = [0.; 4];
+ let vindex = _mm_setr_epi32(1, 2, 3, 0);
+ let src = _mm256_setr_pd(2., 3., 4., 1.);
+ _mm256_mask_i32scatter_pd::<8>(base_addr.as_mut_ptr().cast(), 0b0101, vindex, src);
+ let expected = [0., 2., 0., 4.];
+ assert_eq!(expected, base_addr);
+ }
+
+ #[simd_test(enable = "avx512f,avx512vl")]
+ unsafe fn test_mm256_i32scatter_ps() {
+ let mut base_addr: [f32; 8] = [0.; 8];
+ let vindex = _mm256_setr_epi32(1, 2, 3, 4, 5, 6, 7, 0);
+ let src = _mm256_setr_ps(2., 3., 4., 5., 6., 7., 8., 1.);
+ _mm256_i32scatter_ps::<4>(base_addr.as_mut_ptr().cast(), vindex, src);
+ let expected = [1., 2., 3., 4., 5., 6., 7., 8.];
+ assert_eq!(expected, base_addr);
+ }
+
+ #[simd_test(enable = "avx512f,avx512vl")]
+ unsafe fn test_mm256_mask_i32scatter_ps() {
+ let mut base_addr: [f32; 8] = [0.; 8];
+ let vindex = _mm256_setr_epi32(1, 2, 3, 4, 5, 6, 7, 0);
+ let src = _mm256_setr_ps(2., 3., 4., 5., 6., 7., 8., 1.);
+ _mm256_mask_i32scatter_ps::<4>(base_addr.as_mut_ptr().cast(), 0b01010101, vindex, src);
+ let expected = [0., 2., 0., 4., 0., 6., 0., 8.];
+ assert_eq!(expected, base_addr);
+ }
+
+ #[simd_test(enable = "avx512f,avx512vl")]
+ unsafe fn test_mm256_i64scatter_epi32() {
+ let mut base_addr: [i32; 4] = [0; 4];
+ let vindex = _mm256_setr_epi64x(1, 2, 3, 0);
+ let src = _mm_setr_epi32(2, 3, 4, 1);
+ _mm256_i64scatter_epi32::<4>(base_addr.as_mut_ptr().cast(), vindex, src);
+ let expected = [1, 2, 3, 4];
+ assert_eq!(expected, base_addr);
+ }
+
+ #[simd_test(enable = "avx512f,avx512vl")]
+ unsafe fn test_mm256_mask_i64scatter_epi32() {
+ let mut base_addr: [i32; 4] = [0; 4];
+ let vindex = _mm256_setr_epi64x(1, 2, 3, 0);
+ let src = _mm_setr_epi32(2, 3, 4, 1);
+ _mm256_mask_i64scatter_epi32::<4>(base_addr.as_mut_ptr().cast(), 0b0101, vindex, src);
+ let expected = [0, 2, 0, 4];
+ assert_eq!(expected, base_addr);
+ }
+
+ #[simd_test(enable = "avx512f,avx512vl")]
+ unsafe fn test_mm256_i64scatter_epi64() {
+ let mut base_addr: [i64; 4] = [0; 4];
+ let vindex = _mm256_setr_epi64x(1, 2, 3, 0);
+ let src = _mm256_setr_epi64x(2, 3, 4, 1);
+ _mm256_i64scatter_epi64::<8>(base_addr.as_mut_ptr().cast(), vindex, src);
+ let expected = [1, 2, 3, 4];
+ assert_eq!(expected, base_addr);
+ }
+
+ #[simd_test(enable = "avx512f,avx512vl")]
+ unsafe fn test_mm256_mask_i64scatter_epi64() {
+ let mut base_addr: [i64; 4] = [0; 4];
+ let vindex = _mm256_setr_epi64x(1, 2, 3, 0);
+ let src = _mm256_setr_epi64x(2, 3, 4, 1);
+ _mm256_mask_i64scatter_epi64::<8>(base_addr.as_mut_ptr().cast(), 0b0101, vindex, src);
+ let expected = [0, 2, 0, 4];
+ assert_eq!(expected, base_addr);
+ }
+
+ #[simd_test(enable = "avx512f,avx512vl")]
+ unsafe fn test_mm256_i64scatter_pd() {
+ let mut base_addr: [f64; 4] = [0.; 4];
+ let vindex = _mm256_setr_epi64x(1, 2, 3, 0);
+ let src = _mm256_setr_pd(2., 3., 4., 1.);
+ _mm256_i64scatter_pd::<8>(base_addr.as_mut_ptr().cast(), vindex, src);
+ let expected = [1., 2., 3., 4.];
+ assert_eq!(expected, base_addr);
+ }
+
+ #[simd_test(enable = "avx512f,avx512vl")]
+ unsafe fn test_mm256_mask_i64scatter_pd() {
+ let mut base_addr: [f64; 4] = [0.; 4];
+ let vindex = _mm256_setr_epi64x(1, 2, 3, 0);
+ let src = _mm256_setr_pd(2., 3., 4., 1.);
+ _mm256_mask_i64scatter_pd::<8>(base_addr.as_mut_ptr().cast(), 0b0101, vindex, src);
+ let expected = [0., 2., 0., 4.];
+ assert_eq!(expected, base_addr);
+ }
+
+ #[simd_test(enable = "avx512f,avx512vl")]
+ unsafe fn test_mm256_i64scatter_ps() {
+ let mut base_addr: [f32; 4] = [0.; 4];
+ let vindex = _mm256_setr_epi64x(1, 2, 3, 0);
+ let src = _mm_setr_ps(2., 3., 4., 1.);
+ _mm256_i64scatter_ps::<4>(base_addr.as_mut_ptr().cast(), vindex, src);
+ let expected = [1., 2., 3., 4.];
+ assert_eq!(expected, base_addr);
+ }
+
+ #[simd_test(enable = "avx512f,avx512vl")]
+ unsafe fn test_mm256_mask_i64scatter_ps() {
+ let mut base_addr: [f32; 4] = [0.; 4];
+ let vindex = _mm256_setr_epi64x(1, 2, 3, 0);
+ let src = _mm_setr_ps(2., 3., 4., 1.);
+ _mm256_mask_i64scatter_ps::<4>(base_addr.as_mut_ptr().cast(), 0b0101, vindex, src);
+ let expected = [0., 2., 0., 4.];
+ assert_eq!(expected, base_addr);
+ }
+
#[simd_test(enable = "avx512f")]
unsafe fn test_mm512_rol_epi64() {
#[rustfmt::skip]