diff --git a/library/stdarch/crates/core_arch/avx512dq.md b/library/stdarch/crates/core_arch/avx512dq.md
index 82b66445a7c1..197dd8021fb8 100644
--- a/library/stdarch/crates/core_arch/avx512dq.md
+++ b/library/stdarch/crates/core_arch/avx512dq.md
@@ -1,5 +1,7 @@
["AVX512DQ"]
+[Intel's List](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#avx512techs=AVX512DQ)
+
- And:
* [x] _mm_mask_and_pd
* [x] _mm_maskz_and_pd
@@ -69,39 +71,39 @@
- Broadcast
- * [ ] _mm256_broadcast_f32x2
- * [ ] _mm256_mask_broadcast_f32x2
- * [ ] _mm256_maskz_broadcast_f32x2
- * [ ] _mm512_broadcast_f32x2
- * [ ] _mm512_mask_broadcast_f32x2
- * [ ] _mm512_maskz_broadcast_f32x2
- * [ ] _mm512_broadcast_f32x8
- * [ ] _mm512_mask_broadcast_f32x8
- * [ ] _mm512_maskz_broadcast_f32x8
- * [ ] _mm256_broadcast_f64x2
- * [ ] _mm256_mask_broadcast_f64x2
- * [ ] _mm256_maskz_broadcast_f64x2
- * [ ] _mm512_broadcast_f64x2
- * [ ] _mm512_mask_broadcast_f64x2
- * [ ] _mm512_maskz_broadcast_f64x2
- * [ ] _mm_broadcast_i32x2
- * [ ] _mm_mask_broadcast_i32x2
- * [ ] _mm_maskz_broadcast_i32x2
- * [ ] _mm256_broadcast_i32x2
- * [ ] _mm256_mask_broadcast_i32x2
- * [ ] _mm256_maskz_broadcast_i32x2
- * [ ] _mm512_broadcast_i32x2
- * [ ] _mm512_mask_broadcast_i32x2
- * [ ] _mm512_maskz_broadcast_i32x2
- * [ ] _mm512_broadcast_i32x8
- * [ ] _mm512_mask_broadcast_i32x8
- * [ ] _mm512_maskz_broadcast_i32x8
- * [ ] _mm256_broadcast_i64x2
- * [ ] _mm256_mask_broadcast_i64x2
- * [ ] _mm256_maskz_broadcast_i64x2
- * [ ] _mm512_broadcast_i64x2
- * [ ] _mm512_mask_broadcast_i64x2
- * [ ] _mm512_maskz_broadcast_i64x2
+ * [x] _mm256_broadcast_f32x2
+ * [x] _mm256_mask_broadcast_f32x2
+ * [x] _mm256_maskz_broadcast_f32x2
+ * [x] _mm512_broadcast_f32x2
+ * [x] _mm512_mask_broadcast_f32x2
+ * [x] _mm512_maskz_broadcast_f32x2
+ * [x] _mm512_broadcast_f32x8
+ * [x] _mm512_mask_broadcast_f32x8
+ * [x] _mm512_maskz_broadcast_f32x8
+ * [x] _mm256_broadcast_f64x2
+ * [x] _mm256_mask_broadcast_f64x2
+ * [x] _mm256_maskz_broadcast_f64x2
+ * [x] _mm512_broadcast_f64x2
+ * [x] _mm512_mask_broadcast_f64x2
+ * [x] _mm512_maskz_broadcast_f64x2
+ * [x] _mm_broadcast_i32x2
+ * [x] _mm_mask_broadcast_i32x2
+ * [x] _mm_maskz_broadcast_i32x2
+ * [x] _mm256_broadcast_i32x2
+ * [x] _mm256_mask_broadcast_i32x2
+ * [x] _mm256_maskz_broadcast_i32x2
+ * [x] _mm512_broadcast_i32x2
+ * [x] _mm512_mask_broadcast_i32x2
+ * [x] _mm512_maskz_broadcast_i32x2
+ * [x] _mm512_broadcast_i32x8
+ * [x] _mm512_mask_broadcast_i32x8
+ * [x] _mm512_maskz_broadcast_i32x8
+ * [x] _mm256_broadcast_i64x2
+ * [x] _mm256_mask_broadcast_i64x2
+ * [x] _mm256_maskz_broadcast_i64x2
+ * [x] _mm512_broadcast_i64x2
+ * [x] _mm512_mask_broadcast_i64x2
+ * [x] _mm512_maskz_broadcast_i64x2
- Convert:
@@ -252,45 +254,45 @@
- Element Extract:
- * [ ] _mm512_extractf32x8_ps
- * [ ] _mm512_mask_extractf32x8_ps
- * [ ] _mm512_maskz_extractf32x8_ps
- * [ ] _mm256_extractf64x2_pd
- * [ ] _mm256_mask_extractf64x2_pd
- * [ ] _mm256_maskz_extractf64x2_pd
- * [ ] _mm512_extractf64x2_pd
- * [ ] _mm512_mask_extractf64x2_pd
- * [ ] _mm512_maskz_extractf64x2_pd
- * [ ] _mm512_extracti32x8_epi32
- * [ ] _mm512_mask_extracti32x8_epi32
- * [ ] _mm512_maskz_extracti32x8_epi32
- * [ ] _mm256_extracti64x2_epi64
- * [ ] _mm256_mask_extracti64x2_epi64
- * [ ] _mm256_maskz_extracti64x2_epi64
- * [ ] _mm512_extracti64x2_epi64
- * [ ] _mm512_mask_extracti64x2_epi64
- * [ ] _mm512_maskz_extracti64x2_epi64
+ * [x] _mm512_extractf32x8_ps
+ * [x] _mm512_mask_extractf32x8_ps
+ * [x] _mm512_maskz_extractf32x8_ps
+ * [x] _mm256_extractf64x2_pd
+ * [x] _mm256_mask_extractf64x2_pd
+ * [x] _mm256_maskz_extractf64x2_pd
+ * [x] _mm512_extractf64x2_pd
+ * [x] _mm512_mask_extractf64x2_pd
+ * [x] _mm512_maskz_extractf64x2_pd
+ * [x] _mm512_extracti32x8_epi32
+ * [x] _mm512_mask_extracti32x8_epi32
+ * [x] _mm512_maskz_extracti32x8_epi32
+ * [x] _mm256_extracti64x2_epi64
+ * [x] _mm256_mask_extracti64x2_epi64
+ * [x] _mm256_maskz_extracti64x2_epi64
+ * [x] _mm512_extracti64x2_epi64
+ * [x] _mm512_mask_extracti64x2_epi64
+ * [x] _mm512_maskz_extracti64x2_epi64
- Element Insert:
- * [ ] _mm512_insertf32x8
- * [ ] _mm512_mask_insertf32x8
- * [ ] _mm512_maskz_insertf32x8
- * [ ] _mm256_insertf64x2
- * [ ] _mm256_mask_insertf64x2
- * [ ] _mm256_maskz_insertf64x2
- * [ ] _mm512_insertf64x2
- * [ ] _mm512_mask_insertf64x2
- * [ ] _mm512_maskz_insertf64x2
- * [ ] _mm512_inserti32x8
- * [ ] _mm512_mask_inserti32x8
- * [ ] _mm512_maskz_inserti32x8
- * [ ] _mm256_inserti64x2
- * [ ] _mm256_mask_inserti64x2
- * [ ] _mm256_maskz_inserti64x2
- * [ ] _mm512_inserti64x2
- * [ ] _mm512_mask_inserti64x2
- * [ ] _mm512_maskz_inserti64x2
+ * [x] _mm512_insertf32x8
+ * [x] _mm512_mask_insertf32x8
+ * [x] _mm512_maskz_insertf32x8
+ * [x] _mm256_insertf64x2
+ * [x] _mm256_mask_insertf64x2
+ * [x] _mm256_maskz_insertf64x2
+ * [x] _mm512_insertf64x2
+ * [x] _mm512_mask_insertf64x2
+ * [x] _mm512_maskz_insertf64x2
+ * [x] _mm512_inserti32x8
+ * [x] _mm512_mask_inserti32x8
+ * [x] _mm512_maskz_inserti32x8
+ * [x] _mm256_inserti64x2
+ * [x] _mm256_mask_inserti64x2
+ * [x] _mm256_maskz_inserti64x2
+ * [x] _mm512_inserti64x2
+ * [x] _mm512_mask_inserti64x2
+ * [x] _mm512_maskz_inserti64x2
- FP-Class
diff --git a/library/stdarch/crates/core_arch/src/x86/avx512dq.rs b/library/stdarch/crates/core_arch/src/x86/avx512dq.rs
index c4ff97035cfa..a5fb77791d88 100644
--- a/library/stdarch/crates/core_arch/src/x86/avx512dq.rs
+++ b/library/stdarch/crates/core_arch/src/x86/avx512dq.rs
@@ -32,7 +32,6 @@ pub unsafe fn _mm_maskz_and_pd(k: __mmask8, a: __m128d, b: __m128d) -> __m128d {
transmute(simd_select_bitmask(k, and, zero))
}
-
/// Compute the bitwise AND of packed double-precision (64-bit) floating point numbers in a and b
/// and store the results in dst using writemask k (elements are copied from src if the corresponding
/// bit is not set).
@@ -350,7 +349,7 @@ pub unsafe fn _mm512_andnot_ps(a: __m512, b: __m512) -> __m512 {
/// Compute the bitwise NOT of packed single-precision (32-bit) floating point numbers in a and then
/// bitwise AND with b and store the results in dst using writemask k (elements are copied from src if the
/// corresponding bit is not set).
-///
+///
/// [Intel's Documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_mask_andnot_ps&ig_expand=341)
#[inline]
#[target_feature(enable = "avx512dq")]
@@ -363,7 +362,7 @@ pub unsafe fn _mm512_mask_andnot_ps(src: __m512, k: __mmask16, a: __m512, b: __m
/// Compute the bitwise NOT of packed single-precision (32-bit) floating point numbers in a and then
/// bitwise AND with b and store the results in dst using zeromask k (elements are zeroed out if the
/// corresponding bit is not set).
-///
+///
/// [Intel's Documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_maskz_andnot_ps&ig_expand=342)
#[inline]
#[target_feature(enable = "avx512dq")]
@@ -740,6 +739,1071 @@ pub unsafe fn _mm512_maskz_xor_ps(k: __mmask16, a: __m512, b: __m512) -> __m512
transmute(simd_select_bitmask(k, xor, zero))
}
+// Broadcast
+
+/// Broadcasts the lower 2 packed single-precsion (32-bit) floating-point elements from a to all
+/// elements of dst.
+///
+/// [Intel's Documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_broadcast_f32x2&ig_expand=509)
+#[inline]
+#[target_feature(enable = "avx512dq,avx512vl")]
+#[cfg_attr(test, assert_instr(vbroadcastf32x2))]
+pub unsafe fn _mm256_broadcast_f32x2(a: __m128) -> __m256 {
+ let b: f32x8 = simd_shuffle!(a, a, [0, 1, 0, 1, 0, 1, 0, 1]);
+ transmute(b)
+}
+
+/// Broadcasts the lower 2 packed single-precsion (32-bit) floating-point elements from a to all
+/// elements of dst using writemask k (elements are copied from src if the corresponding bit is not set).
+///
+/// [Intel's Documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_mask_broadcast_f32x2&ig_expand=510)
+#[inline]
+#[target_feature(enable = "avx512dq,avx512vl")]
+#[cfg_attr(test, assert_instr(vbroadcastf32x2))]
+pub unsafe fn _mm256_mask_broadcast_f32x2(src: __m256, k: __mmask8, a: __m128) -> __m256 {
+ let b = _mm256_broadcast_f32x2(a).as_f32x8();
+ transmute(simd_select_bitmask(k, b, src.as_f32x8()))
+}
+
+/// Broadcasts the lower 2 packed single-precsion (32-bit) floating-point elements from a to all
+/// elements of dst using zeromask k (elements are zeroed out if the corresponding bit is not set).
+///
+/// [Intel's Documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_maskz_broadcast_f32x2&ig_expand=511)
+#[inline]
+#[target_feature(enable = "avx512dq,avx512vl")]
+#[cfg_attr(test, assert_instr(vbroadcastf32x2))]
+pub unsafe fn _mm256_maskz_broadcast_f32x2(k: __mmask8, a: __m128) -> __m256 {
+ let b = _mm256_broadcast_f32x2(a).as_f32x8();
+ let zero = _mm256_setzero_ps().as_f32x8();
+ transmute(simd_select_bitmask(k, b, zero))
+}
+
+/// Broadcasts the lower 2 packed single-precsion (32-bit) floating-point elements from a to all
+/// elements of dst.
+///
+/// [Intel's Documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_broadcast_f32x2&ig_expand=512)
+#[inline]
+#[target_feature(enable = "avx512dq")]
+#[cfg_attr(test, assert_instr(vbroadcastf32x2))]
+pub unsafe fn _mm512_broadcast_f32x2(a: __m128) -> __m512 {
+ let b: f32x16 = simd_shuffle!(a, a, [0, 1, 0, 1, 0, 1, 0, 1, 0, 1, 0, 1, 0, 1, 0, 1]);
+ transmute(b)
+}
+
+/// Broadcasts the lower 2 packed single-precsion (32-bit) floating-point elements from a to all
+/// elements of dst using writemask k (elements are copied from src if the corresponding bit is not set).
+///
+/// [Intel's Documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_mask_broadcast_f32x2&ig_expand=513)
+#[inline]
+#[target_feature(enable = "avx512dq")]
+#[cfg_attr(test, assert_instr(vbroadcastf32x2))]
+pub unsafe fn _mm512_mask_broadcast_f32x2(src: __m512, k: __mmask16, a: __m128) -> __m512 {
+ let b = _mm512_broadcast_f32x2(a).as_f32x16();
+ transmute(simd_select_bitmask(k, b, src.as_f32x16()))
+}
+
+/// Broadcasts the lower 2 packed single-precsion (32-bit) floating-point elements from a to all
+/// elements of dst using zeromask k (elements are zeroed out if the corresponding bit is not set).
+///
+/// [Intel's Documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_maskz_broadcast_f32x2&ig_expand=514)
+#[inline]
+#[target_feature(enable = "avx512dq")]
+#[cfg_attr(test, assert_instr(vbroadcastf32x2))]
+pub unsafe fn _mm512_maskz_broadcast_f32x2(k: __mmask16, a: __m128) -> __m512 {
+ let b = _mm512_broadcast_f32x2(a).as_f32x16();
+ let zero = _mm512_setzero_ps().as_f32x16();
+ transmute(simd_select_bitmask(k, b, zero))
+}
+
+/// Broadcasts the 8 packed single-precision (32-bit) floating-point elements from a to all
+/// elements of dst.
+///
+/// [Intel's Documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_broadcast_f32x8&ig_expand=521)
+#[inline]
+#[target_feature(enable = "avx512dq")]
+pub unsafe fn _mm512_broadcast_f32x8(a: __m256) -> __m512 {
+ let b: f32x16 = simd_shuffle!(a, a, [0, 1, 2, 3, 4, 5, 6, 7, 0, 1, 2, 3, 4, 5, 6, 7]);
+ transmute(b)
+}
+
+/// Broadcasts the 8 packed single-precision (32-bit) floating-point elements from a to all
+/// elements of dst using writemask k (elements are copied from src if the corresponding bit is not set).
+///
+/// [Intel's Documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_mask_broadcast_f32x8&ig_expand=522)
+#[inline]
+#[target_feature(enable = "avx512dq")]
+pub unsafe fn _mm512_mask_broadcast_f32x8(src: __m512, k: __mmask16, a: __m256) -> __m512 {
+ let b = _mm512_broadcast_f32x8(a).as_f32x16();
+ transmute(simd_select_bitmask(k, b, src.as_f32x16()))
+}
+
+/// Broadcasts the 8 packed single-precision (32-bit) floating-point elements from a to all
+/// elements of dst using zeromask k (elements are zeroed out if the corresponding bit is not set).
+///
+/// [Intel's Documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_maskz_broadcast_f32x8&ig_expand=523)
+#[inline]
+#[target_feature(enable = "avx512dq")]
+pub unsafe fn _mm512_maskz_broadcast_f32x8(k: __mmask16, a: __m256) -> __m512 {
+ let b = _mm512_broadcast_f32x8(a).as_f32x16();
+ let zero = _mm512_setzero_ps().as_f32x16();
+ transmute(simd_select_bitmask(k, b, zero))
+}
+
+/// Broadcasts the 2 packed double-precision (64-bit) floating-point elements from a to all
+/// elements of dst.
+///
+/// [Intel's Documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_broadcast_f64x2&ig_expand=524)
+#[inline]
+#[target_feature(enable = "avx512dq,avx512vl")]
+pub unsafe fn _mm256_broadcast_f64x2(a: __m128d) -> __m256d {
+ let b: f64x4 = simd_shuffle!(a, a, [0, 1, 0, 1]);
+ transmute(b)
+}
+
+/// Broadcasts the 2 packed double-precision (64-bit) floating-point elements from a to all
+/// elements of dst using writemask k (elements are copied from src if the corresponding bit is not set).
+///
+/// [Intel's Documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_mask_broadcast_f64x2&ig_expand=525)
+#[inline]
+#[target_feature(enable = "avx512dq,avx512vl")]
+pub unsafe fn _mm256_mask_broadcast_f64x2(src: __m256d, k: __mmask8, a: __m128d) -> __m256d {
+ let b = _mm256_broadcast_f64x2(a).as_f64x4();
+ transmute(simd_select_bitmask(k, b, src.as_f64x4()))
+}
+
+/// Broadcasts the 2 packed double-precision (64-bit) floating-point elements from a to all
+/// elements of dst using zeromask k (elements are zeroed out if the corresponding bit is not set).
+///
+/// [Intel's Documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_maskz_broadcast_f64x2&ig_expand=526)
+#[inline]
+#[target_feature(enable = "avx512dq,avx512vl")]
+pub unsafe fn _mm256_maskz_broadcast_f64x2(k: __mmask8, a: __m128d) -> __m256d {
+ let b = _mm256_broadcast_f64x2(a).as_f64x4();
+ let zero = _mm256_setzero_pd().as_f64x4();
+ transmute(simd_select_bitmask(k, b, zero))
+}
+
+/// Broadcasts the 2 packed double-precision (64-bit) floating-point elements from a to all
+/// elements of dst.
+///
+/// [Intel's Documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_broadcast_f64x2&ig_expand=527)
+#[inline]
+#[target_feature(enable = "avx512dq")]
+pub unsafe fn _mm512_broadcast_f64x2(a: __m128d) -> __m512d {
+ let b: f64x8 = simd_shuffle!(a, a, [0, 1, 0, 1, 0, 1, 0, 1]);
+ transmute(b)
+}
+
+/// Broadcasts the 2 packed double-precision (64-bit) floating-point elements from a to all
+/// elements of dst using writemask k (elements are copied from src if the corresponding bit is not set).
+///
+/// [Intel's Documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_mask_broadcast_f64x2&ig_expand=528)
+#[inline]
+#[target_feature(enable = "avx512dq")]
+pub unsafe fn _mm512_mask_broadcast_f64x2(src: __m512d, k: __mmask8, a: __m128d) -> __m512d {
+ let b = _mm512_broadcast_f64x2(a).as_f64x8();
+ transmute(simd_select_bitmask(k, b, src.as_f64x8()))
+}
+
+/// Broadcasts the 2 packed double-precision (64-bit) floating-point elements from a to all
+/// elements of dst using zeromask k (elements are zeroed out if the corresponding bit is not set).
+///
+/// [Intel's Documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_maskz_broadcast_f64x2&ig_expand=529)
+#[inline]
+#[target_feature(enable = "avx512dq")]
+pub unsafe fn _mm512_maskz_broadcast_f64x2(k: __mmask8, a: __m128d) -> __m512d {
+ let b = _mm512_broadcast_f64x2(a).as_f64x8();
+ let zero = _mm512_setzero_pd().as_f64x8();
+ transmute(simd_select_bitmask(k, b, zero))
+}
+
+/// Broadcasts the lower 2 packed 32-bit integers from a to all elements of dst.
+///
+/// [Intel's Documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_broadcast_i32x2&ig_expand=533)
+#[inline]
+#[target_feature(enable = "avx512dq,avx512vl")]
+#[cfg_attr(test, assert_instr(vbroadcasti32x2))]
+pub unsafe fn _mm_broadcast_i32x2(a: __m128i) -> __m128i {
+ let a = a.as_i32x4();
+ let b: i32x4 = simd_shuffle!(a, a, [0, 1, 0, 1]);
+ transmute(b)
+}
+
+/// Broadcasts the lower 2 packed 32-bit integers from a to all elements of dst using writemask k
+/// (elements are copied from src if the corresponding bit is not set).
+///
+/// [Intel's Documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_mask_broadcast_i32x2&ig_expand=534)
+#[inline]
+#[target_feature(enable = "avx512dq,avx512vl")]
+#[cfg_attr(test, assert_instr(vbroadcasti32x2))]
+pub unsafe fn _mm_mask_broadcast_i32x2(src: __m128i, k: __mmask8, a: __m128i) -> __m128i {
+ let b = _mm_broadcast_i32x2(a).as_i32x4();
+ transmute(simd_select_bitmask(k, b, src.as_i32x4()))
+}
+
+/// Broadcasts the lower 2 packed 32-bit integers from a to all elements of dst using zeromask k
+/// (elements are zeroed out if the corresponding bit is not set).
+///
+/// [Intel's Documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_maskz_broadcast_i32x2&ig_expand=535)
+#[inline]
+#[target_feature(enable = "avx512dq,avx512vl")]
+#[cfg_attr(test, assert_instr(vbroadcasti32x2))]
+pub unsafe fn _mm_maskz_broadcast_i32x2(k: __mmask8, a: __m128i) -> __m128i {
+ let b = _mm_broadcast_i32x2(a).as_i32x4();
+ let zero = _mm_setzero_si128().as_i32x4();
+ transmute(simd_select_bitmask(k, b, zero))
+}
+
+/// Broadcasts the lower 2 packed 32-bit integers from a to all elements of dst.
+///
+/// [Intel's Documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_broadcast_i32x2&ig_expand=536)
+#[inline]
+#[target_feature(enable = "avx512dq,avx512vl")]
+#[cfg_attr(test, assert_instr(vbroadcasti32x2))]
+pub unsafe fn _mm256_broadcast_i32x2(a: __m128i) -> __m256i {
+ let a = a.as_i32x4();
+ let b: i32x8 = simd_shuffle!(a, a, [0, 1, 0, 1, 0, 1, 0, 1]);
+ transmute(b)
+}
+
+/// Broadcasts the lower 2 packed 32-bit integers from a to all elements of dst using writemask k
+/// (elements are copied from src if the corresponding bit is not set).
+///
+/// [Intel's Documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_mask_broadcast_i32x2&ig_expand=537)
+#[inline]
+#[target_feature(enable = "avx512dq,avx512vl")]
+#[cfg_attr(test, assert_instr(vbroadcasti32x2))]
+pub unsafe fn _mm256_mask_broadcast_i32x2(src: __m256i, k: __mmask8, a: __m128i) -> __m256i {
+ let b = _mm256_broadcast_i32x2(a).as_i32x8();
+ transmute(simd_select_bitmask(k, b, src.as_i32x8()))
+}
+
+/// Broadcasts the lower 2 packed 32-bit integers from a to all elements of dst using zeromask k
+/// (elements are zeroed out if the corresponding bit is not set).
+///
+/// [Intel's Documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_maskz_broadcast_i32x2&ig_expand=538)
+#[inline]
+#[target_feature(enable = "avx512dq,avx512vl")]
+#[cfg_attr(test, assert_instr(vbroadcasti32x2))]
+pub unsafe fn _mm256_maskz_broadcast_i32x2(k: __mmask8, a: __m128i) -> __m256i {
+ let b = _mm256_broadcast_i32x2(a).as_i32x8();
+ let zero = _mm256_setzero_si256().as_i32x8();
+ transmute(simd_select_bitmask(k, b, zero))
+}
+
+/// Broadcasts the lower 2 packed 32-bit integers from a to all elements of dst.
+///
+/// [Intel's Documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_broadcast_i32x2&ig_expand=539)
+#[inline]
+#[target_feature(enable = "avx512dq")]
+#[cfg_attr(test, assert_instr(vbroadcasti32x2))]
+pub unsafe fn _mm512_broadcast_i32x2(a: __m128i) -> __m512i {
+ let a = a.as_i32x4();
+ let b: i32x16 = simd_shuffle!(a, a, [0, 1, 0, 1, 0, 1, 0, 1, 0, 1, 0, 1, 0, 1, 0, 1]);
+ transmute(b)
+}
+
+/// Broadcasts the lower 2 packed 32-bit integers from a to all elements of dst using writemask k
+/// (elements are copied from src if the corresponding bit is not set).
+///
+/// [Intel's Documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_mask_broadcast_i32x2&ig_expand=540)
+#[inline]
+#[target_feature(enable = "avx512dq")]
+#[cfg_attr(test, assert_instr(vbroadcasti32x2))]
+pub unsafe fn _mm512_mask_broadcast_i32x2(src: __m512i, k: __mmask16, a: __m128i) -> __m512i {
+ let b = _mm512_broadcast_i32x2(a).as_i32x16();
+ transmute(simd_select_bitmask(k, b, src.as_i32x16()))
+}
+
+/// Broadcasts the lower 2 packed 32-bit integers from a to all elements of dst using zeromask k
+/// (elements are zeroed out if the corresponding bit is not set).
+///
+/// [Intel's Documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_maskz_broadcast_i32x2&ig_expand=541)
+#[inline]
+#[target_feature(enable = "avx512dq")]
+#[cfg_attr(test, assert_instr(vbroadcasti32x2))]
+pub unsafe fn _mm512_maskz_broadcast_i32x2(k: __mmask16, a: __m128i) -> __m512i {
+ let b = _mm512_broadcast_i32x2(a).as_i32x16();
+ let zero = _mm512_setzero_si512().as_i32x16();
+ transmute(simd_select_bitmask(k, b, zero))
+}
+
+/// Broadcasts the 8 packed 32-bit integers from a to all elements of dst.
+///
+/// [Intel's Documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_broadcast_i32x8&ig_expand=548)
+#[inline]
+#[target_feature(enable = "avx512dq")]
+pub unsafe fn _mm512_broadcast_i32x8(a: __m256i) -> __m512i {
+ let a = a.as_i32x8();
+ let b: i32x16 = simd_shuffle!(a, a, [0, 1, 2, 3, 4, 5, 6, 7, 0, 1, 2, 3, 4, 5, 6, 7]);
+ transmute(b)
+}
+
+/// Broadcasts the 8 packed 32-bit integers from a to all elements of dst using writemask k
+/// (elements are copied from src if the corresponding bit is not set).
+///
+/// [Intel's Documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_mask_broadcast_i32x8&ig_expand=549)
+#[inline]
+#[target_feature(enable = "avx512dq")]
+pub unsafe fn _mm512_mask_broadcast_i32x8(src: __m512i, k: __mmask16, a: __m256i) -> __m512i {
+ let b = _mm512_broadcast_i32x8(a).as_i32x16();
+ transmute(simd_select_bitmask(k, b, src.as_i32x16()))
+}
+
+/// Broadcasts the 8 packed 32-bit integers from a to all elements of dst using zeromask k
+/// (elements are zeroed out if the corresponding bit is not set).
+///
+/// [Intel's Documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_maskz_broadcast_i32x8&ig_expand=550)
+#[inline]
+#[target_feature(enable = "avx512dq")]
+pub unsafe fn _mm512_maskz_broadcast_i32x8(k: __mmask16, a: __m256i) -> __m512i {
+ let b = _mm512_broadcast_i32x8(a).as_i32x16();
+ let zero = _mm512_setzero_si512().as_i32x16();
+ transmute(simd_select_bitmask(k, b, zero))
+}
+
+/// Broadcasts the 2 packed 64-bit integers from a to all elements of dst.
+///
+/// [Intel's Documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_broadcast_i64x2&ig_expand=551)
+#[inline]
+#[target_feature(enable = "avx512dq,avx512vl")]
+pub unsafe fn _mm256_broadcast_i64x2(a: __m128i) -> __m256i {
+ let a = a.as_i64x2();
+ let b: i64x4 = simd_shuffle!(a, a, [0, 1, 0, 1]);
+ transmute(b)
+}
+
+/// Broadcasts the 2 packed 64-bit integers from a to all elements of dst using writemask k
+/// (elements are copied from src if the corresponding bit is not set).
+///
+/// [Intel's Documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_mask_broadcast_i64x2&ig_expand=552)
+#[inline]
+#[target_feature(enable = "avx512dq,avx512vl")]
+pub unsafe fn _mm256_mask_broadcast_i64x2(src: __m256i, k: __mmask8, a: __m128i) -> __m256i {
+ let b = _mm256_broadcast_i64x2(a).as_i64x4();
+ transmute(simd_select_bitmask(k, b, src.as_i64x4()))
+}
+
+/// Broadcasts the 2 packed 64-bit integers from a to all elements of dst using zeromask k
+/// (elements are zeroed out if the corresponding bit is not set).
+///
+/// [Intel's Documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_maskz_broadcast_i64x2&ig_expand=553)
+#[inline]
+#[target_feature(enable = "avx512dq,avx512vl")]
+pub unsafe fn _mm256_maskz_broadcast_i64x2(k: __mmask8, a: __m128i) -> __m256i {
+ let b = _mm256_broadcast_i64x2(a).as_i64x4();
+ let zero = _mm256_setzero_si256().as_i64x4();
+ transmute(simd_select_bitmask(k, b, zero))
+}
+
+/// Broadcasts the 2 packed 64-bit integers from a to all elements of dst.
+///
+/// [Intel's Documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_broadcast_i64x2&ig_expand=554)
+#[inline]
+#[target_feature(enable = "avx512dq")]
+pub unsafe fn _mm512_broadcast_i64x2(a: __m128i) -> __m512i {
+ let a = a.as_i64x2();
+ let b: i64x8 = simd_shuffle!(a, a, [0, 1, 0, 1, 0, 1, 0, 1]);
+ transmute(b)
+}
+
+/// Broadcasts the 2 packed 64-bit integers from a to all elements of dst using writemask k
+/// (elements are copied from src if the corresponding bit is not set).
+///
+/// [Intel's Documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_mask_broadcast_i64x2&ig_expand=555)
+#[inline]
+#[target_feature(enable = "avx512dq")]
+pub unsafe fn _mm512_mask_broadcast_i64x2(src: __m512i, k: __mmask8, a: __m128i) -> __m512i {
+ let b = _mm512_broadcast_i64x2(a).as_i64x8();
+ transmute(simd_select_bitmask(k, b, src.as_i64x8()))
+}
+
+/// Broadcasts the 2 packed 64-bit integers from a to all elements of dst using zeromask k
+/// (elements are zeroed out if the corresponding bit is not set).
+///
+/// [Intel's Documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_maskz_broadcast_i64x2&ig_expand=556)
+#[inline]
+#[target_feature(enable = "avx512dq")]
+pub unsafe fn _mm512_maskz_broadcast_i64x2(k: __mmask8, a: __m128i) -> __m512i {
+ let b = _mm512_broadcast_i64x2(a).as_i64x8();
+ let zero = _mm512_setzero_si512().as_i64x8();
+ transmute(simd_select_bitmask(k, b, zero))
+}
+
+// Extract
+
+/// Extracts 256 bits (composed of 8 packed single-precision (32-bit) floating-point elements) from a,
+/// selected with IMM8, and stores the result in dst.
+///
+/// [Intel's Documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_extractf32x8_ps&ig_expand=2946)
+#[inline]
+#[target_feature(enable = "avx512dq")]
+#[cfg_attr(test, assert_instr(vextractf32x8, IMM8 = 1))]
+#[rustc_legacy_const_generics(1)]
+pub unsafe fn _mm512_extractf32x8_ps(a: __m512) -> __m256 {
+ static_assert_uimm_bits!(IMM8, 1);
+ match IMM8 & 1 {
+ 0 => simd_shuffle!(a, a, [0, 1, 2, 3, 4, 5, 6, 7]),
+ _ => simd_shuffle!(a, a, [8, 9, 10, 11, 12, 13, 14, 15]),
+ }
+}
+
+/// Extracts 256 bits (composed of 8 packed single-precision (32-bit) floating-point elements) from a,
+/// selected with IMM8, and stores the result in dst using writemask k (elements are copied from src
+/// if the corresponding bit is not set).
+///
+/// [Intel's Documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_mask_extractf32x8_ps&ig_expand=2947)
+#[inline]
+#[target_feature(enable = "avx512dq")]
+#[cfg_attr(test, assert_instr(vextractf32x8, IMM8 = 1))]
+#[rustc_legacy_const_generics(3)]
+pub unsafe fn _mm512_mask_extractf32x8_ps(
+ src: __m256,
+ k: __mmask8,
+ a: __m512,
+) -> __m256 {
+ static_assert_uimm_bits!(IMM8, 1);
+ let b = _mm512_extractf32x8_ps::(a);
+ transmute(simd_select_bitmask(k, b.as_f32x8(), src.as_f32x8()))
+}
+
+/// Extracts 256 bits (composed of 8 packed single-precision (32-bit) floating-point elements) from a,
+/// selected with IMM8, and stores the result in dst using zeromask k (elements are zeroed out if the
+/// corresponding bit is not set).
+///
+/// [Intel's Documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_maskz_extractf32x8_ps&ig_expand=2948)
+#[inline]
+#[target_feature(enable = "avx512dq")]
+#[cfg_attr(test, assert_instr(vextractf32x8, IMM8 = 1))]
+#[rustc_legacy_const_generics(2)]
+pub unsafe fn _mm512_maskz_extractf32x8_ps(k: __mmask8, a: __m512) -> __m256 {
+ static_assert_uimm_bits!(IMM8, 1);
+ let b = _mm512_extractf32x8_ps::(a);
+ let zero = _mm256_setzero_ps().as_f32x8();
+ transmute(simd_select_bitmask(k, b.as_f32x8(), zero))
+}
+
+/// Extracts 128 bits (composed of 2 packed double-precision (64-bit) floating-point elements) from a,
+/// selected with IMM8, and stores the result in dst.
+///
+/// [Intel's Documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_extractf64x2_pd&ig_expand=2949)
+#[inline]
+#[target_feature(enable = "avx512dq,avx512vl")]
+#[cfg_attr(test, assert_instr(vextractf64x2, IMM8 = 1))]
+#[rustc_legacy_const_generics(1)]
+pub unsafe fn _mm256_extractf64x2_pd(a: __m256d) -> __m128d {
+ static_assert_uimm_bits!(IMM8, 1);
+ match IMM8 & 1 {
+ 0 => simd_shuffle!(a, a, [0, 1]),
+ _ => simd_shuffle!(a, a, [2, 3]),
+ }
+}
+
+/// Extracts 128 bits (composed of 2 packed double-precision (64-bit) floating-point elements) from a,
+/// selected with IMM8, and stores the result in dst using writemask k (elements are copied from src
+/// if the corresponding bit is not set).
+///
+/// [Intel's Documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_mask_extractf64x2_pd&ig_expand=2950)
+#[inline]
+#[target_feature(enable = "avx512dq,avx512vl")]
+#[cfg_attr(test, assert_instr(vextractf64x2, IMM8 = 1))]
+#[rustc_legacy_const_generics(3)]
+pub unsafe fn _mm256_mask_extractf64x2_pd(
+ src: __m128d,
+ k: __mmask8,
+ a: __m256d,
+) -> __m128d {
+ static_assert_uimm_bits!(IMM8, 1);
+ let b = _mm256_extractf64x2_pd::(a);
+ transmute(simd_select_bitmask(k, b.as_f64x2(), src.as_f64x2()))
+}
+
+/// Extracts 128 bits (composed of 2 packed double-precision (64-bit) floating-point elements) from a,
+/// selected with IMM8, and stores the result in dst using zeromask k (elements are zeroed out if the
+/// corresponding bit is not set).
+///
+/// [Intel's Documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_maskz_extractf64x2_pd&ig_expand=2951)
+#[inline]
+#[target_feature(enable = "avx512dq,avx512vl")]
+#[cfg_attr(test, assert_instr(vextractf64x2, IMM8 = 1))]
+#[rustc_legacy_const_generics(2)]
+pub unsafe fn _mm256_maskz_extractf64x2_pd(k: __mmask8, a: __m256d) -> __m128d {
+ static_assert_uimm_bits!(IMM8, 1);
+ let b = _mm256_extractf64x2_pd::(a);
+ let zero = _mm_setzero_pd().as_f64x2();
+ transmute(simd_select_bitmask(k, b.as_f64x2(), zero))
+}
+
+/// Extracts 128 bits (composed of 2 packed double-precision (64-bit) floating-point elements) from a,
+/// selected with IMM8, and stores the result in dst.
+///
+/// [Intel's Documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_extractf64x2_pd&ig_expand=2952)
+#[inline]
+#[target_feature(enable = "avx512dq")]
+#[cfg_attr(test, assert_instr(vextractf64x2, IMM8 = 3))]
+#[rustc_legacy_const_generics(1)]
+pub unsafe fn _mm512_extractf64x2_pd(a: __m512d) -> __m128d {
+ static_assert_uimm_bits!(IMM8, 2);
+ match IMM8 & 3 {
+ 0 => simd_shuffle!(a, a, [0, 1]),
+ 1 => simd_shuffle!(a, a, [2, 3]),
+ 2 => simd_shuffle!(a, a, [4, 5]),
+ _ => simd_shuffle!(a, a, [6, 7]),
+ }
+}
+
+/// Extracts 128 bits (composed of 2 packed double-precision (64-bit) floating-point elements) from a,
+/// selected with IMM8, and stores the result in dst using writemask k (elements are copied from src
+/// if the corresponding bit is not set).
+///
+/// [Intel's Documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_mask_extractf64x2_pd&ig_expand=2953)
+#[inline]
+#[target_feature(enable = "avx512dq")]
+#[cfg_attr(test, assert_instr(vextractf64x2, IMM8 = 3))]
+#[rustc_legacy_const_generics(3)]
+pub unsafe fn _mm512_mask_extractf64x2_pd(
+ src: __m128d,
+ k: __mmask8,
+ a: __m512d,
+) -> __m128d {
+ static_assert_uimm_bits!(IMM8, 2);
+ let b = _mm512_extractf64x2_pd::(a).as_f64x2();
+ transmute(simd_select_bitmask(k, b, src.as_f64x2()))
+}
+
+/// Extracts 128 bits (composed of 2 packed double-precision (64-bit) floating-point elements) from a,
+/// selected with IMM8, and stores the result in dst using zeromask k (elements are zeroed out if the
+/// corresponding bit is not set).
+///
+/// [Intel's Documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_maskz_extractf64x2_pd&ig_expand=2954)
+#[inline]
+#[target_feature(enable = "avx512dq")]
+#[cfg_attr(test, assert_instr(vextractf64x2, IMM8 = 3))]
+#[rustc_legacy_const_generics(2)]
+pub unsafe fn _mm512_maskz_extractf64x2_pd(k: __mmask8, a: __m512d) -> __m128d {
+ static_assert_uimm_bits!(IMM8, 2);
+ let b = _mm512_extractf64x2_pd::(a).as_f64x2();
+ let zero = _mm_setzero_pd().as_f64x2();
+ transmute(simd_select_bitmask(k, b, zero))
+}
+
+/// Extracts 256 bits (composed of 8 packed 32-bit integers) from a, selected with IMM8, and stores
+/// the result in dst.
+///
+/// [Intel's Documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_extracti32x8_epi32&ig_expand=2965)
+#[inline]
+#[target_feature(enable = "avx512dq")]
+#[cfg_attr(test, assert_instr(vextracti32x8, IMM8 = 1))]
+#[rustc_legacy_const_generics(1)]
+pub unsafe fn _mm512_extracti32x8_epi32(a: __m512i) -> __m256i {
+ static_assert_uimm_bits!(IMM8, 1);
+ let a = a.as_i32x16();
+ let b: i32x8 = match IMM8 & 1 {
+ 0 => simd_shuffle!(a, a, [0, 1, 2, 3, 4, 5, 6, 7]),
+ _ => simd_shuffle!(a, a, [8, 9, 10, 11, 12, 13, 14, 15]),
+ };
+ transmute(b)
+}
+
+/// Extracts 256 bits (composed of 8 packed 32-bit integers) from a, selected with IMM8, and stores
+/// the result in dst using writemask k (elements are copied from src if the corresponding bit is not set).
+///
+/// [Intel's Documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_mask_extracti32x8_epi32&ig_expand=2966)
+#[inline]
+#[target_feature(enable = "avx512dq")]
+#[cfg_attr(test, assert_instr(vextracti32x8, IMM8 = 1))]
+#[rustc_legacy_const_generics(3)]
+pub unsafe fn _mm512_mask_extracti32x8_epi32(
+ src: __m256i,
+ k: __mmask8,
+ a: __m512i,
+) -> __m256i {
+ static_assert_uimm_bits!(IMM8, 1);
+ let b = _mm512_extracti32x8_epi32::(a).as_i32x8();
+ transmute(simd_select_bitmask(k, b, src.as_i32x8()))
+}
+
+/// Extracts 256 bits (composed of 8 packed 32-bit integers) from a, selected with IMM8, and stores
+/// the result in dst using zeromask k (elements are zeroed out if the corresponding bit is not set).
+///
+/// [Intel's Documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_maskz_extracti32x8_epi32&ig_expand=2967)
+#[inline]
+#[target_feature(enable = "avx512dq")]
+#[cfg_attr(test, assert_instr(vextracti32x8, IMM8 = 1))]
+#[rustc_legacy_const_generics(2)]
+pub unsafe fn _mm512_maskz_extracti32x8_epi32(k: __mmask8, a: __m512i) -> __m256i {
+ static_assert_uimm_bits!(IMM8, 1);
+ let b = _mm512_extracti32x8_epi32::(a).as_i32x8();
+ let zero = _mm256_setzero_si256().as_i32x8();
+ transmute(simd_select_bitmask(k, b, zero))
+}
+
+/// Extracts 128 bits (composed of 2 packed 64-bit integers) from a, selected with IMM8, and stores
+/// the result in dst.
+///
+/// [Intel's Documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_extracti64x2_epi64&ig_expand=2968)
+#[inline]
+#[target_feature(enable = "avx512dq,avx512vl")]
+#[cfg_attr(test, assert_instr(vextracti64x2, IMM8 = 1))]
+#[rustc_legacy_const_generics(1)]
+pub unsafe fn _mm256_extracti64x2_epi64(a: __m256i) -> __m128i {
+ static_assert_uimm_bits!(IMM8, 1);
+ match IMM8 & 1 {
+ 0 => simd_shuffle!(a, a, [0, 1]),
+ _ => simd_shuffle!(a, a, [2, 3]),
+ }
+}
+
+/// Extracts 128 bits (composed of 2 packed 64-bit integers) from a, selected with IMM8, and stores
+/// the result in dst using writemask k (elements are copied from src if the corresponding bit is not set).
+///
+/// [Intel's Documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_mask_extracti64x2_epi64&ig_expand=2969)
+#[inline]
+#[target_feature(enable = "avx512dq,avx512vl")]
+#[cfg_attr(test, assert_instr(vextracti64x2, IMM8 = 1))]
+#[rustc_legacy_const_generics(3)]
+pub unsafe fn _mm256_mask_extracti64x2_epi64(
+ src: __m128i,
+ k: __mmask8,
+ a: __m256i,
+) -> __m128i {
+ static_assert_uimm_bits!(IMM8, 1);
+ let b = _mm256_extracti64x2_epi64::(a).as_i64x2();
+ transmute(simd_select_bitmask(k, b, src.as_i64x2()))
+}
+
+/// Extracts 128 bits (composed of 2 packed 64-bit integers) from a, selected with IMM8, and stores
+/// the result in dst using zeromask k (elements are zeroed out if the corresponding bit is not set).
+///
+/// [Intel's Documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_maskz_extracti64x2_epi64&ig_expand=2970)
+#[inline]
+#[target_feature(enable = "avx512dq,avx512vl")]
+#[cfg_attr(test, assert_instr(vextracti64x2, IMM8 = 1))]
+#[rustc_legacy_const_generics(2)]
+pub unsafe fn _mm256_maskz_extracti64x2_epi64(k: __mmask8, a: __m256i) -> __m128i {
+ static_assert_uimm_bits!(IMM8, 1);
+ let b = _mm256_extracti64x2_epi64::(a).as_i64x2();
+ let zero = _mm_setzero_si128().as_i64x2();
+ transmute(simd_select_bitmask(k, b, zero))
+}
+
+/// Extracts 128 bits (composed of 2 packed 64-bit integers) from a, selected with IMM8, and stores
+/// the result in dst.
+///
+/// [Intel's Documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_extracti64x2_epi64&ig_expand=2971)
+#[inline]
+#[target_feature(enable = "avx512dq")]
+#[cfg_attr(test, assert_instr(vextracti64x2, IMM8 = 3))]
+#[rustc_legacy_const_generics(1)]
+pub unsafe fn _mm512_extracti64x2_epi64(a: __m512i) -> __m128i {
+ static_assert_uimm_bits!(IMM8, 2);
+ match IMM8 & 3 {
+ 0 => simd_shuffle!(a, a, [0, 1]),
+ 1 => simd_shuffle!(a, a, [2, 3]),
+ 2 => simd_shuffle!(a, a, [4, 5]),
+ _ => simd_shuffle!(a, a, [6, 7]),
+ }
+}
+
+/// Extracts 128 bits (composed of 2 packed 64-bit integers) from a, selected with IMM8, and stores
+/// the result in dst using writemask k (elements are copied from src if the corresponding bit is not set).
+///
+/// [Intel's Documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_mask_extracti64x2_epi64&ig_expand=2972)
+#[inline]
+#[target_feature(enable = "avx512dq")]
+#[cfg_attr(test, assert_instr(vextracti64x2, IMM8 = 3))]
+#[rustc_legacy_const_generics(3)]
+pub unsafe fn _mm512_mask_extracti64x2_epi64(
+ src: __m128i,
+ k: __mmask8,
+ a: __m512i,
+) -> __m128i {
+ static_assert_uimm_bits!(IMM8, 2);
+ let b = _mm512_extracti64x2_epi64::(a).as_i64x2();
+ transmute(simd_select_bitmask(k, b, src.as_i64x2()))
+}
+
+/// Extracts 128 bits (composed of 2 packed 64-bit integers) from a, selected with IMM8, and stores
+/// the result in dst using zeromask k (elements are zeroed out if the corresponding bit is not set).
+///
+/// [Intel's Documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_maskz_extracti64x2_epi64&ig_expand=2973)
+#[inline]
+#[target_feature(enable = "avx512dq")]
+#[cfg_attr(test, assert_instr(vextracti64x2, IMM8 = 3))]
+#[rustc_legacy_const_generics(2)]
+pub unsafe fn _mm512_maskz_extracti64x2_epi64(k: __mmask8, a: __m512i) -> __m128i {
+ static_assert_uimm_bits!(IMM8, 2);
+ let b = _mm512_extracti64x2_epi64::(a).as_i64x2();
+ let zero = _mm_setzero_si128().as_i64x2();
+ transmute(simd_select_bitmask(k, b, zero))
+}
+
+// Insert
+
+/// Copy a to dst, then insert 256 bits (composed of 8 packed single-precision (32-bit) floating-point
+/// elements) from b into dst at the location specified by IMM8.
+///
+/// [Intel's Documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_insertf32x8&ig_expand=3850)
+#[inline]
+#[target_feature(enable = "avx512dq")]
+#[cfg_attr(test, assert_instr(vinsertf32x8, IMM8 = 1))]
+#[rustc_legacy_const_generics(2)]
+pub unsafe fn _mm512_insertf32x8(a: __m512, b: __m256) -> __m512 {
+ static_assert_uimm_bits!(IMM8, 1);
+ let b = _mm512_castps256_ps512(b);
+ match IMM8 & 1 {
+ 0 => simd_shuffle!(
+ a,
+ b,
+ [16, 17, 18, 19, 20, 21, 22, 23, 8, 9, 10, 11, 12, 13, 14, 15]
+ ),
+ _ => simd_shuffle!(
+ a,
+ b,
+ [0, 1, 2, 3, 4, 5, 6, 7, 16, 17, 18, 19, 20, 21, 22, 23]
+ ),
+ }
+}
+
+/// Copy a to tmp, then insert 256 bits (composed of 8 packed single-precision (32-bit) floating-point
+/// elements) from b into tmp at the location specified by IMM8, and copy tmp to dst using writemask k
+/// (elements are copied from src if the corresponding bit is not set).
+///
+/// [Intel's Documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_mask_insertf32x8&ig_expand=3851)
+#[inline]
+#[target_feature(enable = "avx512dq")]
+#[cfg_attr(test, assert_instr(vinsertf32x8, IMM8 = 1))]
+#[rustc_legacy_const_generics(4)]
+pub unsafe fn _mm512_mask_insertf32x8(
+ src: __m512,
+ k: __mmask16,
+ a: __m512,
+ b: __m256,
+) -> __m512 {
+ static_assert_uimm_bits!(IMM8, 1);
+ let c = _mm512_insertf32x8::(a, b);
+ transmute(simd_select_bitmask(k, c.as_f32x16(), src.as_f32x16()))
+}
+
+/// Copy a to tmp, then insert 256 bits (composed of 8 packed single-precision (32-bit) floating-point
+/// elements) from b into tmp at the location specified by IMM8, and copy tmp to dst using zeromask k
+/// (elements are zeroed out if the corresponding bit is not set).
+///
+/// [Intel's Documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_maskz_insertf32x8&ig_expand=3852)
+#[inline]
+#[target_feature(enable = "avx512dq")]
+#[cfg_attr(test, assert_instr(vinsertf32x8, IMM8 = 1))]
+#[rustc_legacy_const_generics(3)]
+pub unsafe fn _mm512_maskz_insertf32x8(
+ k: __mmask16,
+ a: __m512,
+ b: __m256,
+) -> __m512 {
+ static_assert_uimm_bits!(IMM8, 1);
+ let c = _mm512_insertf32x8::(a, b).as_f32x16();
+ let zero = _mm512_setzero_ps().as_f32x16();
+ transmute(simd_select_bitmask(k, c, zero))
+}
+
+/// Copy a to dst, then insert 128 bits (composed of 2 packed double-precision (64-bit) floating-point
+/// elements) from b into dst at the location specified by IMM8.
+///
+/// [Intel's Documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_insertf64x2&ig_expand=3853)
+#[inline]
+#[target_feature(enable = "avx512dq,avx512vl")]
+#[cfg_attr(test, assert_instr(vinsertf64x2, IMM8 = 1))]
+#[rustc_legacy_const_generics(2)]
+pub unsafe fn _mm256_insertf64x2(a: __m256d, b: __m128d) -> __m256d {
+ static_assert_uimm_bits!(IMM8, 1);
+ let b = _mm256_castpd128_pd256(b);
+ match IMM8 & 1 {
+ 0 => simd_shuffle!(a, b, [4, 5, 2, 3]),
+ _ => simd_shuffle!(a, b, [0, 1, 4, 5]),
+ }
+}
+
+/// Copy a to tmp, then insert 128 bits (composed of 2 packed double-precision (64-bit) floating-point
+/// elements) from b into tmp at the location specified by IMM8, and copy tmp to dst using writemask k
+/// (elements are copied from src if the corresponding bit is not set).
+///
+/// [Intel's Documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_mask_insertf64x2&ig_expand=3854)
+#[inline]
+#[target_feature(enable = "avx512dq,avx512vl")]
+#[cfg_attr(test, assert_instr(vinsertf64x2, IMM8 = 1))]
+#[rustc_legacy_const_generics(4)]
+pub unsafe fn _mm256_mask_insertf64x2(
+ src: __m256d,
+ k: __mmask8,
+ a: __m256d,
+ b: __m128d,
+) -> __m256d {
+ static_assert_uimm_bits!(IMM8, 1);
+ let c = _mm256_insertf64x2::(a, b);
+ transmute(simd_select_bitmask(k, c.as_f64x4(), src.as_f64x4()))
+}
+
+/// Copy a to tmp, then insert 128 bits (composed of 2 packed double-precision (64-bit) floating-point
+/// elements) from b into tmp at the location specified by IMM8, and copy tmp to dst using zeromask k
+/// (elements are zeroed out if the corresponding bit is not set).
+///
+/// [Intel's Documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_maskz_insertf64x2&ig_expand=3855)
+#[inline]
+#[target_feature(enable = "avx512dq,avx512vl")]
+#[cfg_attr(test, assert_instr(vinsertf64x2, IMM8 = 1))]
+#[rustc_legacy_const_generics(3)]
+pub unsafe fn _mm256_maskz_insertf64x2(
+ k: __mmask8,
+ a: __m256d,
+ b: __m128d,
+) -> __m256d {
+ static_assert_uimm_bits!(IMM8, 1);
+ let c = _mm256_insertf64x2::(a, b).as_f64x4();
+ let zero = _mm256_setzero_pd().as_f64x4();
+ transmute(simd_select_bitmask(k, c, zero))
+}
+
+/// Copy a to dst, then insert 128 bits (composed of 2 packed double-precision (64-bit) floating-point
+/// elements) from b into dst at the location specified by IMM8.
+///
+/// [Intel's Documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_insertf64x2&ig_expand=3856)
+#[inline]
+#[target_feature(enable = "avx512dq")]
+#[cfg_attr(test, assert_instr(vinsertf64x2, IMM8 = 3))]
+#[rustc_legacy_const_generics(2)]
+pub unsafe fn _mm512_insertf64x2(a: __m512d, b: __m128d) -> __m512d {
+ static_assert_uimm_bits!(IMM8, 2);
+ let b = _mm512_castpd128_pd512(b);
+ match IMM8 & 3 {
+ 0 => simd_shuffle!(a, b, [8, 9, 2, 3, 4, 5, 6, 7]),
+ 1 => simd_shuffle!(a, b, [0, 1, 8, 9, 4, 5, 6, 7]),
+ 2 => simd_shuffle!(a, b, [0, 1, 2, 3, 8, 9, 6, 7]),
+ _ => simd_shuffle!(a, b, [0, 1, 2, 3, 4, 5, 8, 9]),
+ }
+}
+
+/// Copy a to tmp, then insert 128 bits (composed of 2 packed double-precision (64-bit) floating-point
+/// elements) from b into tmp at the location specified by IMM8, and copy tmp to dst using writemask k
+/// (elements are copied from src if the corresponding bit is not set).
+///
+/// [Intel's Documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_mask_insertf64x2&ig_expand=3857)
+#[inline]
+#[target_feature(enable = "avx512dq")]
+#[cfg_attr(test, assert_instr(vinsertf64x2, IMM8 = 3))]
+#[rustc_legacy_const_generics(4)]
+pub unsafe fn _mm512_mask_insertf64x2(
+ src: __m512d,
+ k: __mmask8,
+ a: __m512d,
+ b: __m128d,
+) -> __m512d {
+ static_assert_uimm_bits!(IMM8, 2);
+ let c = _mm512_insertf64x2::(a, b);
+ transmute(simd_select_bitmask(k, c.as_f64x8(), src.as_f64x8()))
+}
+
+/// Copy a to tmp, then insert 128 bits (composed of 2 packed double-precision (64-bit) floating-point
+/// elements) from b into tmp at the location specified by IMM8, and copy tmp to dst using zeromask k
+/// (elements are zeroed out if the corresponding bit is not set).
+///
+/// [Intel's Documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_maskz_insertf64x2&ig_expand=3858)
+#[inline]
+#[target_feature(enable = "avx512dq")]
+#[cfg_attr(test, assert_instr(vinsertf64x2, IMM8 = 3))]
+#[rustc_legacy_const_generics(3)]
+pub unsafe fn _mm512_maskz_insertf64x2(
+ k: __mmask8,
+ a: __m512d,
+ b: __m128d,
+) -> __m512d {
+ static_assert_uimm_bits!(IMM8, 2);
+ let c = _mm512_insertf64x2::(a, b).as_f64x8();
+ let zero = _mm512_setzero_pd().as_f64x8();
+ transmute(simd_select_bitmask(k, c, zero))
+}
+
+/// Copy a to dst, then insert 256 bits (composed of 8 packed 32-bit integers) from b into dst at the
+/// location specified by IMM8.
+///
+/// [Intel's Documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_inserti32x8&ig_expand=3869)
+#[inline]
+#[target_feature(enable = "avx512dq")]
+#[cfg_attr(test, assert_instr(vinserti32x8, IMM8 = 1))]
+#[rustc_legacy_const_generics(2)]
+pub unsafe fn _mm512_inserti32x8(a: __m512i, b: __m256i) -> __m512i {
+ static_assert_uimm_bits!(IMM8, 1);
+ let a = a.as_i32x16();
+ let b = _mm512_castsi256_si512(b).as_i32x16();
+ let r: i32x16 = match IMM8 & 1 {
+ 0 => simd_shuffle!(
+ a,
+ b,
+ [16, 17, 18, 19, 20, 21, 22, 23, 8, 9, 10, 11, 12, 13, 14, 15]
+ ),
+ _ => simd_shuffle!(
+ a,
+ b,
+ [0, 1, 2, 3, 4, 5, 6, 7, 16, 17, 18, 19, 20, 21, 22, 23]
+ ),
+ };
+ transmute(r)
+}
+
+/// Copy a to tmp, then insert 256 bits (composed of 8 packed 32-bit integers) from b into tmp at the
+/// location specified by IMM8, and copy tmp to dst using writemask k (elements are copied from src if
+/// the corresponding bit is not set).
+///
+/// [Intel's Documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_mask_inserti32x8&ig_expand=3870)
+#[inline]
+#[target_feature(enable = "avx512dq")]
+#[cfg_attr(test, assert_instr(vinserti32x8, IMM8 = 1))]
+#[rustc_legacy_const_generics(4)]
+pub unsafe fn _mm512_mask_inserti32x8(
+ src: __m512i,
+ k: __mmask16,
+ a: __m512i,
+ b: __m256i,
+) -> __m512i {
+ static_assert_uimm_bits!(IMM8, 1);
+ let c = _mm512_inserti32x8::(a, b);
+ transmute(simd_select_bitmask(k, c.as_i32x16(), src.as_i32x16()))
+}
+
+/// Copy a to tmp, then insert 256 bits (composed of 8 packed 32-bit integers) from b into tmp at the
+/// location specified by IMM8, and copy tmp to dst using zeromask k (elements are zeroed out if the
+/// corresponding bit is not set).
+///
+/// [Intel's Documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_maskz_inserti32x8&ig_expand=3871)
+#[inline]
+#[target_feature(enable = "avx512dq")]
+#[cfg_attr(test, assert_instr(vinserti32x8, IMM8 = 1))]
+#[rustc_legacy_const_generics(3)]
+pub unsafe fn _mm512_maskz_inserti32x8(
+ k: __mmask16,
+ a: __m512i,
+ b: __m256i,
+) -> __m512i {
+ static_assert_uimm_bits!(IMM8, 1);
+ let c = _mm512_inserti32x8::(a, b).as_i32x16();
+ let zero = _mm512_setzero_si512().as_i32x16();
+ transmute(simd_select_bitmask(k, c, zero))
+}
+
+/// Copy a to dst, then insert 128 bits (composed of 2 packed 64-bit integers) from b into dst at the
+/// location specified by IMM8.
+///
+/// [Intel's Documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_inserti64x2&ig_expand=3872)
+#[inline]
+#[target_feature(enable = "avx512dq,avx512vl")]
+#[cfg_attr(test, assert_instr(vinserti64x2, IMM8 = 1))]
+#[rustc_legacy_const_generics(2)]
+pub unsafe fn _mm256_inserti64x2(a: __m256i, b: __m128i) -> __m256i {
+ static_assert_uimm_bits!(IMM8, 1);
+ let b = _mm256_castsi128_si256(b);
+ match IMM8 & 1 {
+ 0 => simd_shuffle!(a, b, [4, 5, 2, 3]),
+ _ => simd_shuffle!(a, b, [0, 1, 4, 5]),
+ }
+}
+
+/// Copy a to tmp, then insert 128 bits (composed of 2 packed 64-bit integers) from b into tmp at the
+/// location specified by IMM8, and copy tmp to dst using writemask k (elements are copied from src if
+/// the corresponding bit is not set).
+///
+/// [Intel's Documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_mask_inserti64x2&ig_expand=3873)
+#[inline]
+#[target_feature(enable = "avx512dq,avx512vl")]
+#[cfg_attr(test, assert_instr(vinserti64x2, IMM8 = 1))]
+#[rustc_legacy_const_generics(4)]
+pub unsafe fn _mm256_mask_inserti64x2(
+ src: __m256i,
+ k: __mmask8,
+ a: __m256i,
+ b: __m128i,
+) -> __m256i {
+ static_assert_uimm_bits!(IMM8, 1);
+ let c = _mm256_inserti64x2::(a, b);
+ transmute(simd_select_bitmask(k, c.as_i64x4(), src.as_i64x4()))
+}
+
+/// Copy a to tmp, then insert 128 bits (composed of 2 packed 64-bit integers) from b into tmp at the
+/// location specified by IMM8, and copy tmp to dst using zeromask k (elements are zeroed out if the
+/// corresponding bit is not set).
+///
+/// [Intel's Documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_maskz_inserti64x2&ig_expand=3874)
+#[inline]
+#[target_feature(enable = "avx512dq,avx512vl")]
+#[cfg_attr(test, assert_instr(vinserti64x2, IMM8 = 1))]
+#[rustc_legacy_const_generics(3)]
+pub unsafe fn _mm256_maskz_inserti64x2(
+ k: __mmask8,
+ a: __m256i,
+ b: __m128i,
+) -> __m256i {
+ static_assert_uimm_bits!(IMM8, 1);
+ let c = _mm256_inserti64x2::(a, b).as_i64x4();
+ let zero = _mm256_setzero_si256().as_i64x4();
+ transmute(simd_select_bitmask(k, c, zero))
+}
+
+/// Copy a to dst, then insert 128 bits (composed of 2 packed 64-bit integers) from b into dst at the
+/// location specified by IMM8.
+///
+/// [Intel's Documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_inserti64x2&ig_expand=3875)
+#[inline]
+#[target_feature(enable = "avx512dq")]
+#[cfg_attr(test, assert_instr(vinserti64x2, IMM8 = 3))]
+#[rustc_legacy_const_generics(2)]
+pub unsafe fn _mm512_inserti64x2(a: __m512i, b: __m128i) -> __m512i {
+ static_assert_uimm_bits!(IMM8, 2);
+ let b = _mm512_castsi128_si512(b);
+ match IMM8 & 3 {
+ 0 => simd_shuffle!(a, b, [8, 9, 2, 3, 4, 5, 6, 7]),
+ 1 => simd_shuffle!(a, b, [0, 1, 8, 9, 4, 5, 6, 7]),
+ 2 => simd_shuffle!(a, b, [0, 1, 2, 3, 8, 9, 6, 7]),
+ _ => simd_shuffle!(a, b, [0, 1, 2, 3, 4, 5, 8, 9]),
+ }
+}
+
+/// Copy a to tmp, then insert 128 bits (composed of 2 packed 64-bit integers) from b into tmp at the
+/// location specified by IMM8, and copy tmp to dst using writemask k (elements are copied from src if
+/// the corresponding bit is not set).
+///
+/// [Intel's Documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_mask_inserti64x2&ig_expand=3876)
+#[inline]
+#[target_feature(enable = "avx512dq")]
+#[cfg_attr(test, assert_instr(vinserti64x2, IMM8 = 3))]
+#[rustc_legacy_const_generics(4)]
+pub unsafe fn _mm512_mask_inserti64x2(
+ src: __m512i,
+ k: __mmask8,
+ a: __m512i,
+ b: __m128i,
+) -> __m512i {
+ static_assert_uimm_bits!(IMM8, 2);
+ let c = _mm512_inserti64x2::(a, b);
+ transmute(simd_select_bitmask(k, c.as_i64x8(), src.as_i64x8()))
+}
+
+/// Copy a to tmp, then insert 128 bits (composed of 2 packed 64-bit integers) from b into tmp at the
+/// location specified by IMM8, and copy tmp to dst using zeromask k (elements are zeroed out if the
+/// corresponding bit is not set).
+///
+/// [Intel's Documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_maskz_inserti64x2&ig_expand=3877)
+#[inline]
+#[target_feature(enable = "avx512dq")]
+#[cfg_attr(test, assert_instr(vinserti64x2, IMM8 = 3))]
+#[rustc_legacy_const_generics(3)]
+pub unsafe fn _mm512_maskz_inserti64x2(
+ k: __mmask8,
+ a: __m512i,
+ b: __m128i,
+) -> __m512i {
+ static_assert_uimm_bits!(IMM8, 2);
+ let c = _mm512_inserti64x2::(a, b).as_i64x8();
+ let zero = _mm512_setzero_si512().as_i64x8();
+ transmute(simd_select_bitmask(k, c, zero))
+}
+
#[cfg(test)]
mod tests {
use super::*;
@@ -748,22 +1812,23 @@ mod tests {
use crate::core_arch::x86::*;
use crate::core_arch::x86_64::*;
+ use crate::mem::transmute;
- const OPRND1_64: f64 = f64::from_bits(0x3333333333333333);
- const OPRND2_64: f64 = f64::from_bits(0x5555555555555555);
+ const OPRND1_64: f64 = unsafe { transmute(0x3333333333333333_u64) };
+ const OPRND2_64: f64 = unsafe { transmute(0x5555555555555555_u64) };
- const AND_64: f64 = f64::from_bits(0x1111111111111111);
- const ANDN_64: f64 = f64::from_bits(0x4444444444444444);
- const OR_64: f64 = f64::from_bits(0x7777777777777777);
- const XOR_64: f64 = f64::from_bits(0x6666666666666666);
+ const AND_64: f64 = unsafe { transmute(0x1111111111111111_u64) };
+ const ANDN_64: f64 = unsafe { transmute(0x4444444444444444_u64) };
+ const OR_64: f64 = unsafe { transmute(0x7777777777777777_u64) };
+ const XOR_64: f64 = unsafe { transmute(0x6666666666666666_u64) };
- const OPRND1_32: f32 = f32::from_bits(0x33333333);
- const OPRND2_32: f32 = f32::from_bits(0x55555555);
+ const OPRND1_32: f32 = unsafe { transmute(0x33333333_u32) };
+ const OPRND2_32: f32 = unsafe { transmute(0x55555555_u32) };
- const AND_32: f32 = f32::from_bits(0x11111111);
- const ANDN_32: f32 = f32::from_bits(0x44444444);
- const OR_32: f32 = f32::from_bits(0x77777777);
- const XOR_32: f32 = f32::from_bits(0x66666666);
+ const AND_32: f32 = unsafe { transmute(0x11111111_u32) };
+ const ANDN_32: f32 = unsafe { transmute(0x44444444_u32) };
+ const OR_32: f32 = unsafe { transmute(0x77777777_u32) };
+ const XOR_32: f32 = unsafe { transmute(0x66666666_u32) };
#[simd_test(enable = "avx512dq,avx512vl")]
unsafe fn test_mm_mask_and_pd() {
@@ -882,9 +1947,14 @@ mod tests {
unsafe fn test_mm512_mask_and_ps() {
let a = _mm512_set1_ps(OPRND1_32);
let b = _mm512_set1_ps(OPRND2_32);
- let src = _mm512_set_ps(1., 2., 3., 4., 5., 6., 7., 8., 9., 10., 11., 12., 13., 14., 15., 16.);
+ let src = _mm512_set_ps(
+ 1., 2., 3., 4., 5., 6., 7., 8., 9., 10., 11., 12., 13., 14., 15., 16.,
+ );
let r = _mm512_mask_and_ps(src, 0b0101010101010101, a, b);
- let e = _mm512_set_ps(1., AND_32, 3., AND_32, 5., AND_32, 7., AND_32, 9., AND_32, 11., AND_32, 13., AND_32, 15., AND_32);
+ let e = _mm512_set_ps(
+ 1., AND_32, 3., AND_32, 5., AND_32, 7., AND_32, 9., AND_32, 11., AND_32, 13., AND_32,
+ 15., AND_32,
+ );
assert_eq_m512(r, e);
}
@@ -893,7 +1963,10 @@ mod tests {
let a = _mm512_set1_ps(OPRND1_32);
let b = _mm512_set1_ps(OPRND2_32);
let r = _mm512_maskz_and_ps(0b0101010101010101, a, b);
- let e = _mm512_set_ps(0., AND_32, 0., AND_32, 0., AND_32, 0., AND_32, 0., AND_32, 0., AND_32, 0., AND_32, 0., AND_32);
+ let e = _mm512_set_ps(
+ 0., AND_32, 0., AND_32, 0., AND_32, 0., AND_32, 0., AND_32, 0., AND_32, 0., AND_32, 0.,
+ AND_32,
+ );
assert_eq_m512(r, e);
}
@@ -1014,9 +2087,14 @@ mod tests {
unsafe fn test_mm512_mask_andnot_ps() {
let a = _mm512_set1_ps(OPRND1_32);
let b = _mm512_set1_ps(OPRND2_32);
- let src = _mm512_set_ps(1., 2., 3., 4., 5., 6., 7., 8., 9., 10., 11., 12., 13., 14., 15., 16.);
+ let src = _mm512_set_ps(
+ 1., 2., 3., 4., 5., 6., 7., 8., 9., 10., 11., 12., 13., 14., 15., 16.,
+ );
let r = _mm512_mask_andnot_ps(src, 0b0101010101010101, a, b);
- let e = _mm512_set_ps(1., ANDN_32, 3., ANDN_32, 5., ANDN_32, 7., ANDN_32, 9., ANDN_32, 11., ANDN_32, 13., ANDN_32, 15., ANDN_32);
+ let e = _mm512_set_ps(
+ 1., ANDN_32, 3., ANDN_32, 5., ANDN_32, 7., ANDN_32, 9., ANDN_32, 11., ANDN_32, 13.,
+ ANDN_32, 15., ANDN_32,
+ );
assert_eq_m512(r, e);
}
@@ -1025,7 +2103,10 @@ mod tests {
let a = _mm512_set1_ps(OPRND1_32);
let b = _mm512_set1_ps(OPRND2_32);
let r = _mm512_maskz_andnot_ps(0b0101010101010101, a, b);
- let e = _mm512_set_ps(0., ANDN_32, 0., ANDN_32, 0., ANDN_32, 0., ANDN_32, 0., ANDN_32, 0., ANDN_32, 0., ANDN_32, 0., ANDN_32);
+ let e = _mm512_set_ps(
+ 0., ANDN_32, 0., ANDN_32, 0., ANDN_32, 0., ANDN_32, 0., ANDN_32, 0., ANDN_32, 0.,
+ ANDN_32, 0., ANDN_32,
+ );
assert_eq_m512(r, e);
}
@@ -1146,9 +2227,14 @@ mod tests {
unsafe fn test_mm512_mask_or_ps() {
let a = _mm512_set1_ps(OPRND1_32);
let b = _mm512_set1_ps(OPRND2_32);
- let src = _mm512_set_ps(1., 2., 3., 4., 5., 6., 7., 8., 9., 10., 11., 12., 13., 14., 15., 16.);
+ let src = _mm512_set_ps(
+ 1., 2., 3., 4., 5., 6., 7., 8., 9., 10., 11., 12., 13., 14., 15., 16.,
+ );
let r = _mm512_mask_or_ps(src, 0b0101010101010101, a, b);
- let e = _mm512_set_ps(1., OR_32, 3., OR_32, 5., OR_32, 7., OR_32, 9., OR_32, 11., OR_32, 13., OR_32, 15., OR_32);
+ let e = _mm512_set_ps(
+ 1., OR_32, 3., OR_32, 5., OR_32, 7., OR_32, 9., OR_32, 11., OR_32, 13., OR_32, 15.,
+ OR_32,
+ );
assert_eq_m512(r, e);
}
@@ -1157,7 +2243,9 @@ mod tests {
let a = _mm512_set1_ps(OPRND1_32);
let b = _mm512_set1_ps(OPRND2_32);
let r = _mm512_maskz_or_ps(0b0101010101010101, a, b);
- let e = _mm512_set_ps(0., OR_32, 0., OR_32, 0., OR_32, 0., OR_32, 0., OR_32, 0., OR_32, 0., OR_32, 0., OR_32);
+ let e = _mm512_set_ps(
+ 0., OR_32, 0., OR_32, 0., OR_32, 0., OR_32, 0., OR_32, 0., OR_32, 0., OR_32, 0., OR_32,
+ );
assert_eq_m512(r, e);
}
@@ -1278,9 +2366,14 @@ mod tests {
unsafe fn test_mm512_mask_xor_ps() {
let a = _mm512_set1_ps(OPRND1_32);
let b = _mm512_set1_ps(OPRND2_32);
- let src = _mm512_set_ps(1., 2., 3., 4., 5., 6., 7., 8., 9., 10., 11., 12., 13., 14., 15., 16.);
+ let src = _mm512_set_ps(
+ 1., 2., 3., 4., 5., 6., 7., 8., 9., 10., 11., 12., 13., 14., 15., 16.,
+ );
let r = _mm512_mask_xor_ps(src, 0b0101010101010101, a, b);
- let e = _mm512_set_ps(1., XOR_32, 3., XOR_32, 5., XOR_32, 7., XOR_32, 9., XOR_32, 11., XOR_32, 13., XOR_32, 15., XOR_32);
+ let e = _mm512_set_ps(
+ 1., XOR_32, 3., XOR_32, 5., XOR_32, 7., XOR_32, 9., XOR_32, 11., XOR_32, 13., XOR_32,
+ 15., XOR_32,
+ );
assert_eq_m512(r, e);
}
@@ -1289,9 +2382,643 @@ mod tests {
let a = _mm512_set1_ps(OPRND1_32);
let b = _mm512_set1_ps(OPRND2_32);
let r = _mm512_maskz_xor_ps(0b0101010101010101, a, b);
- let e = _mm512_set_ps(0., XOR_32, 0., XOR_32, 0., XOR_32, 0., XOR_32, 0., XOR_32, 0., XOR_32, 0., XOR_32, 0., XOR_32);
+ let e = _mm512_set_ps(
+ 0., XOR_32, 0., XOR_32, 0., XOR_32, 0., XOR_32, 0., XOR_32, 0., XOR_32, 0., XOR_32, 0.,
+ XOR_32,
+ );
assert_eq_m512(r, e);
}
+ #[simd_test(enable = "avx512dq,avx512vl")]
+ unsafe fn test_mm256_broadcast_f32x2() {
+ let a = _mm_set_ps(1., 2., 3., 4.);
+ let r = _mm256_broadcast_f32x2(a);
+ let e = _mm256_set_ps(1., 2., 1., 2., 1., 2., 1., 2.);
+ assert_eq_m256(r, e);
+ }
+ #[simd_test(enable = "avx512dq,avx512vl")]
+ unsafe fn test_mm256_mask_broadcast_f32x2() {
+ let a = _mm_set_ps(1., 2., 3., 4.);
+ let b = _mm256_set_ps(5., 6., 7., 8., 9., 10., 11., 12.);
+ let r = _mm256_mask_broadcast_f32x2(b, 0b01101001, a);
+ let e = _mm256_set_ps(5., 2., 1., 8., 1., 10., 11., 2.);
+ assert_eq_m256(r, e);
+ }
+
+ #[simd_test(enable = "avx512dq,avx512vl")]
+ unsafe fn test_mm256_maskz_broadcast_f32x2() {
+ let a = _mm_set_ps(1., 2., 3., 4.);
+ let r = _mm256_maskz_broadcast_f32x2(0b01101001, a);
+ let e = _mm256_set_ps(0., 2., 1., 0., 1., 0., 0., 2.);
+ assert_eq_m256(r, e);
+ }
+
+ #[simd_test(enable = "avx512dq")]
+ unsafe fn test_mm512_broadcast_f32x2() {
+ let a = _mm_set_ps(1., 2., 3., 4.);
+ let r = _mm512_broadcast_f32x2(a);
+ let e = _mm512_set_ps(
+ 1., 2., 1., 2., 1., 2., 1., 2., 1., 2., 1., 2., 1., 2., 1., 2.,
+ );
+ assert_eq_m512(r, e);
+ }
+
+ #[simd_test(enable = "avx512dq")]
+ unsafe fn test_mm512_mask_broadcast_f32x2() {
+ let a = _mm_set_ps(1., 2., 3., 4.);
+ let b = _mm512_set_ps(
+ 5., 6., 7., 8., 9., 10., 11., 12., 13., 14., 15., 16., 17., 18., 19., 20.,
+ );
+ let r = _mm512_mask_broadcast_f32x2(b, 0b0110100100111100, a);
+ let e = _mm512_set_ps(
+ 5., 2., 1., 8., 1., 10., 11., 2., 13., 14., 1., 2., 1., 2., 19., 20.,
+ );
+ assert_eq_m512(r, e);
+ }
+
+ #[simd_test(enable = "avx512dq")]
+ unsafe fn test_mm512_maskz_broadcast_f32x2() {
+ let a = _mm_set_ps(1., 2., 3., 4.);
+ let r = _mm512_maskz_broadcast_f32x2(0b0110100100111100, a);
+ let e = _mm512_set_ps(
+ 0., 2., 1., 0., 1., 0., 0., 2., 0., 0., 1., 2., 1., 2., 0., 0.,
+ );
+ assert_eq_m512(r, e);
+ }
+
+ #[simd_test(enable = "avx512dq")]
+ unsafe fn test_mm512_broadcast_f32x8() {
+ let a = _mm256_set_ps(1., 2., 3., 4., 5., 6., 7., 8.);
+ let r = _mm512_broadcast_f32x8(a);
+ let e = _mm512_set_ps(
+ 1., 2., 3., 4., 5., 6., 7., 8., 1., 2., 3., 4., 5., 6., 7., 8.,
+ );
+ assert_eq_m512(r, e);
+ }
+
+ #[simd_test(enable = "avx512dq")]
+ unsafe fn test_mm512_mask_broadcast_f32x8() {
+ let a = _mm256_set_ps(1., 2., 3., 4., 5., 6., 7., 8.);
+ let b = _mm512_set_ps(
+ 9., 10., 11., 12., 13., 14., 15., 16., 17., 18., 19., 20., 21., 22., 23., 24.,
+ );
+ let r = _mm512_mask_broadcast_f32x8(b, 0b0110100100111100, a);
+ let e = _mm512_set_ps(
+ 9., 2., 3., 12., 5., 14., 15., 8., 17., 18., 3., 4., 5., 6., 23., 24.,
+ );
+ assert_eq_m512(r, e);
+ }
+
+ #[simd_test(enable = "avx512dq")]
+ unsafe fn test_mm512_maskz_broadcast_f32x8() {
+ let a = _mm256_set_ps(1., 2., 3., 4., 5., 6., 7., 8.);
+ let r = _mm512_maskz_broadcast_f32x8(0b0110100100111100, a);
+ let e = _mm512_set_ps(
+ 0., 2., 3., 0., 5., 0., 0., 8., 0., 0., 3., 4., 5., 6., 0., 0.,
+ );
+ assert_eq_m512(r, e);
+ }
+
+ #[simd_test(enable = "avx512dq,avx512vl")]
+ unsafe fn test_mm256_broadcast_f64x2() {
+ let a = _mm_set_pd(1., 2.);
+ let r = _mm256_broadcast_f64x2(a);
+ let e = _mm256_set_pd(1., 2., 1., 2.);
+ assert_eq_m256d(r, e);
+ }
+
+ #[simd_test(enable = "avx512dq,avx512vl")]
+ unsafe fn test_mm256_mask_broadcast_f64x2() {
+ let a = _mm_set_pd(1., 2.);
+ let b = _mm256_set_pd(3., 4., 5., 6.);
+ let r = _mm256_mask_broadcast_f64x2(b, 0b0110, a);
+ let e = _mm256_set_pd(3., 2., 1., 6.);
+ assert_eq_m256d(r, e);
+ }
+
+ #[simd_test(enable = "avx512dq,avx512vl")]
+ unsafe fn test_mm256_maskz_broadcast_f64x2() {
+ let a = _mm_set_pd(1., 2.);
+ let r = _mm256_maskz_broadcast_f64x2(0b0110, a);
+ let e = _mm256_set_pd(0., 2., 1., 0.);
+ assert_eq_m256d(r, e);
+ }
+
+ #[simd_test(enable = "avx512dq")]
+ unsafe fn test_mm512_broadcast_f64x2() {
+ let a = _mm_set_pd(1., 2.);
+ let r = _mm512_broadcast_f64x2(a);
+ let e = _mm512_set_pd(1., 2., 1., 2., 1., 2., 1., 2.);
+ assert_eq_m512d(r, e);
+ }
+
+ #[simd_test(enable = "avx512dq")]
+ unsafe fn test_mm512_mask_broadcast_f64x2() {
+ let a = _mm_set_pd(1., 2.);
+ let b = _mm512_set_pd(3., 4., 5., 6., 7., 8., 9., 10.);
+ let r = _mm512_mask_broadcast_f64x2(b, 0b01101001, a);
+ let e = _mm512_set_pd(3., 2., 1., 6., 1., 8., 9., 2.);
+ assert_eq_m512d(r, e);
+ }
+
+ #[simd_test(enable = "avx512dq")]
+ unsafe fn test_mm512_maskz_broadcast_f64x2() {
+ let a = _mm_set_pd(1., 2.);
+ let r = _mm512_maskz_broadcast_f64x2(0b01101001, a);
+ let e = _mm512_set_pd(0., 2., 1., 0., 1., 0., 0., 2.);
+ assert_eq_m512d(r, e);
+ }
+
+ #[simd_test(enable = "avx512dq,avx512vl")]
+ unsafe fn test_mm_broadcast_i32x2() {
+ let a = _mm_set_epi32(1, 2, 3, 4);
+ let r = _mm_broadcast_i32x2(a);
+ let e = _mm_set_epi32(1, 2, 1, 2);
+ assert_eq_m128i(r, e);
+ }
+
+ #[simd_test(enable = "avx512dq,avx512vl")]
+ unsafe fn test_mm_mask_broadcast_i32x2() {
+ let a = _mm_set_epi32(1, 2, 3, 4);
+ let b = _mm_set_epi32(5, 6, 7, 8);
+ let r = _mm_mask_broadcast_i32x2(b, 0b0110, a);
+ let e = _mm_set_epi32(5, 2, 1, 6);
+ assert_eq_m128i(r, e);
+ }
+
+ #[simd_test(enable = "avx512dq,avx512vl")]
+ unsafe fn test_mm_maskz_broadcast_i32x2() {
+ let a = _mm_set_epi32(1, 2, 3, 4);
+ let r = _mm_maskz_broadcast_i32x2(0b0110, a);
+ let e = _mm_set_epi32(0, 2, 1, 0);
+ assert_eq_m128i(r, e);
+ }
+
+ #[simd_test(enable = "avx512dq,avx512vl")]
+ unsafe fn test_mm256_broadcast_i32x2() {
+ let a = _mm_set_epi32(1, 2, 3, 4);
+ let r = _mm256_broadcast_i32x2(a);
+ let e = _mm256_set_epi32(1, 2, 1, 2, 1, 2, 1, 2);
+ assert_eq_m256i(r, e);
+ }
+
+ #[simd_test(enable = "avx512dq,avx512vl")]
+ unsafe fn test_mm256_mask_broadcast_i32x2() {
+ let a = _mm_set_epi32(1, 2, 3, 4);
+ let b = _mm256_set_epi32(5, 6, 7, 8, 9, 10, 11, 12);
+ let r = _mm256_mask_broadcast_i32x2(b, 0b01101001, a);
+ let e = _mm256_set_epi32(5, 2, 1, 6, 1, 10, 11, 2);
+ assert_eq_m256i(r, e);
+ }
+
+ #[simd_test(enable = "avx512dq,avx512vl")]
+ unsafe fn test_mm256_maskz_broadcast_i32x2() {
+ let a = _mm_set_epi32(1, 2, 3, 4);
+ let r = _mm256_maskz_broadcast_i32x2(0b01101001, a);
+ let e = _mm256_set_epi32(0, 2, 1, 0, 1, 0, 0, 2);
+ assert_eq_m256i(r, e);
+ }
+
+ #[simd_test(enable = "avx512dq")]
+ unsafe fn test_mm512_broadcast_i32x2() {
+ let a = _mm_set_epi32(1, 2, 3, 4);
+ let r = _mm512_broadcast_i32x2(a);
+ let e = _mm512_set_epi32(1, 2, 1, 2, 1, 2, 1, 2, 1, 2, 1, 2, 1, 2, 1, 2);
+ assert_eq_m512i(r, e);
+ }
+
+ #[simd_test(enable = "avx512dq")]
+ unsafe fn test_mm512_mask_broadcast_i32x2() {
+ let a = _mm_set_epi32(1, 2, 3, 4);
+ let b = _mm512_set_epi32(5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16, 17, 18, 19, 20);
+ let r = _mm512_mask_broadcast_i32x2(b, 0b0110100100111100, a);
+ let e = _mm512_set_epi32(5, 2, 1, 8, 1, 10, 11, 2, 13, 14, 1, 2, 1, 2, 19, 20);
+ assert_eq_m512i(r, e);
+ }
+
+ #[simd_test(enable = "avx512dq")]
+ unsafe fn test_mm512_maskz_broadcast_i32x2() {
+ let a = _mm_set_epi32(1, 2, 3, 4);
+ let r = _mm512_maskz_broadcast_i32x2(0b0110100100111100, a);
+ let e = _mm512_set_epi32(0, 2, 1, 0, 1, 0, 0, 2, 0, 0, 1, 2, 1, 2, 0, 0);
+ assert_eq_m512i(r, e);
+ }
+
+ #[simd_test(enable = "avx512dq")]
+ unsafe fn test_mm512_broadcast_i32x8() {
+ let a = _mm256_set_epi32(1, 2, 3, 4, 5, 6, 7, 8);
+ let r = _mm512_broadcast_i32x8(a);
+ let e = _mm512_set_epi32(1, 2, 3, 4, 5, 6, 7, 8, 1, 2, 3, 4, 5, 6, 7, 8);
+ assert_eq_m512i(r, e);
+ }
+
+ #[simd_test(enable = "avx512dq")]
+ unsafe fn test_mm512_mask_broadcast_i32x8() {
+ let a = _mm256_set_epi32(1, 2, 3, 4, 5, 6, 7, 8);
+ let b = _mm512_set_epi32(
+ 9, 10, 11, 12, 13, 14, 15, 16, 17, 18, 19, 20, 21, 22, 23, 24,
+ );
+ let r = _mm512_mask_broadcast_i32x8(b, 0b0110100100111100, a);
+ let e = _mm512_set_epi32(9, 2, 3, 12, 5, 14, 15, 8, 17, 18, 3, 4, 5, 6, 23, 24);
+ assert_eq_m512i(r, e);
+ }
+
+ #[simd_test(enable = "avx512dq")]
+ unsafe fn test_mm512_maskz_broadcast_i32x8() {
+ let a = _mm256_set_epi32(1, 2, 3, 4, 5, 6, 7, 8);
+ let r = _mm512_maskz_broadcast_i32x8(0b0110100100111100, a);
+ let e = _mm512_set_epi32(0, 2, 3, 0, 5, 0, 0, 8, 0, 0, 3, 4, 5, 6, 0, 0);
+ assert_eq_m512i(r, e);
+ }
+
+ #[simd_test(enable = "avx512dq,avx512vl")]
+ unsafe fn test_mm256_broadcast_i64x2() {
+ let a = _mm_set_epi64x(1, 2);
+ let r = _mm256_broadcast_i64x2(a);
+ let e = _mm256_set_epi64x(1, 2, 1, 2);
+ assert_eq_m256i(r, e);
+ }
+
+ #[simd_test(enable = "avx512dq,avx512vl")]
+ unsafe fn test_mm256_mask_broadcast_i64x2() {
+ let a = _mm_set_epi64x(1, 2);
+ let b = _mm256_set_epi64x(3, 4, 5, 6);
+ let r = _mm256_mask_broadcast_i64x2(b, 0b0110, a);
+ let e = _mm256_set_epi64x(3, 2, 1, 6);
+ assert_eq_m256i(r, e);
+ }
+
+ #[simd_test(enable = "avx512dq,avx512vl")]
+ unsafe fn test_mm256_maskz_broadcast_i64x2() {
+ let a = _mm_set_epi64x(1, 2);
+ let r = _mm256_maskz_broadcast_i64x2(0b0110, a);
+ let e = _mm256_set_epi64x(0, 2, 1, 0);
+ assert_eq_m256i(r, e);
+ }
+
+ #[simd_test(enable = "avx512dq")]
+ unsafe fn test_mm512_broadcast_i64x2() {
+ let a = _mm_set_epi64x(1, 2);
+ let r = _mm512_broadcast_i64x2(a);
+ let e = _mm512_set_epi64(1, 2, 1, 2, 1, 2, 1, 2);
+ assert_eq_m512i(r, e);
+ }
+
+ #[simd_test(enable = "avx512dq")]
+ unsafe fn test_mm512_mask_broadcast_i64x2() {
+ let a = _mm_set_epi64x(1, 2);
+ let b = _mm512_set_epi64(3, 4, 5, 6, 7, 8, 9, 10);
+ let r = _mm512_mask_broadcast_i64x2(b, 0b01101001, a);
+ let e = _mm512_set_epi64(3, 2, 1, 6, 1, 8, 9, 2);
+ assert_eq_m512i(r, e);
+ }
+
+ #[simd_test(enable = "avx512dq")]
+ unsafe fn test_mm512_maskz_broadcast_i64x2() {
+ let a = _mm_set_epi64x(1, 2);
+ let r = _mm512_maskz_broadcast_i64x2(0b01101001, a);
+ let e = _mm512_set_epi64(0, 2, 1, 0, 1, 0, 0, 2);
+ assert_eq_m512i(r, e);
+ }
+
+ #[simd_test(enable = "avx512dq")]
+ unsafe fn test_mm512_extractf32x8_ps() {
+ let a = _mm512_set_ps(
+ 1., 2., 3., 4., 5., 6., 7., 8., 9., 10., 11., 12., 13., 14., 15., 16.,
+ );
+ let r = _mm512_extractf32x8_ps::<1>(a);
+ let e = _mm256_set_ps(9., 10., 11., 12., 13., 14., 15., 16.);
+ assert_eq_m256(r, e);
+ }
+
+ #[simd_test(enable = "avx512dq")]
+ unsafe fn test_mm512_mask_extractf32x8_ps() {
+ let a = _mm512_set_ps(
+ 1., 2., 3., 4., 5., 6., 7., 8., 9., 10., 11., 12., 13., 14., 15., 16.,
+ );
+ let b = _mm256_set_ps(17., 18., 19., 20., 21., 22., 23., 24.);
+ let r = _mm512_mask_extractf32x8_ps::<1>(b, 0b01101001, a);
+ let e = _mm256_set_ps(17., 10., 11., 20., 13., 22., 23., 16.);
+ assert_eq_m256(r, e);
+ }
+
+ #[simd_test(enable = "avx512dq")]
+ unsafe fn test_mm512_maskz_extractf32x8_ps() {
+ let a = _mm512_set_ps(
+ 1., 2., 3., 4., 5., 6., 7., 8., 9., 10., 11., 12., 13., 14., 15., 16.,
+ );
+ let r = _mm512_maskz_extractf32x8_ps::<1>(0b01101001, a);
+ let e = _mm256_set_ps(0., 10., 11., 0., 13., 0., 0., 16.);
+ assert_eq_m256(r, e);
+ }
+
+ #[simd_test(enable = "avx512dq,avx512vl")]
+ unsafe fn test_mm256_extractf64x2_pd() {
+ let a = _mm256_set_pd(1., 2., 3., 4.);
+ let r = _mm256_extractf64x2_pd::<1>(a);
+ let e = _mm_set_pd(3., 4.);
+ assert_eq_m128d(r, e);
+ }
+
+ #[simd_test(enable = "avx512dq,avx512vl")]
+ unsafe fn test_mm256_mask_extractf64x2_pd() {
+ let a = _mm256_set_pd(1., 2., 3., 4.);
+ let b = _mm_set_pd(5., 6.);
+ let r = _mm256_mask_extractf64x2_pd::<1>(b, 0b01, a);
+ let e = _mm_set_pd(5., 4.);
+ assert_eq_m128d(r, e);
+ }
+
+ #[simd_test(enable = "avx512dq,avx512vl")]
+ unsafe fn test_mm256_maskz_extractf64x2_pd() {
+ let a = _mm256_set_pd(1., 2., 3., 4.);
+ let r = _mm256_maskz_extractf64x2_pd::<1>(0b01, a);
+ let e = _mm_set_pd(0., 4.);
+ assert_eq_m128d(r, e);
+ }
+
+ #[simd_test(enable = "avx512dq")]
+ unsafe fn test_mm512_extractf64x2_pd() {
+ let a = _mm512_set_pd(1., 2., 3., 4., 5., 6., 7., 8.);
+ let r = _mm512_extractf64x2_pd::<2>(a);
+ let e = _mm_set_pd(5., 6.);
+ assert_eq_m128d(r, e);
+ }
+
+ #[simd_test(enable = "avx512dq")]
+ unsafe fn test_mm512_mask_extractf64x2_pd() {
+ let a = _mm512_set_pd(1., 2., 3., 4., 5., 6., 7., 8.);
+ let b = _mm_set_pd(9., 10.);
+ let r = _mm512_mask_extractf64x2_pd::<2>(b, 0b01, a);
+ let e = _mm_set_pd(9., 6.);
+ assert_eq_m128d(r, e);
+ }
+
+ #[simd_test(enable = "avx512dq")]
+ unsafe fn test_mm512_maskz_extractf64x2_pd() {
+ let a = _mm512_set_pd(1., 2., 3., 4., 5., 6., 7., 8.);
+ let r = _mm512_maskz_extractf64x2_pd::<2>(0b01, a);
+ let e = _mm_set_pd(0., 6.);
+ assert_eq_m128d(r, e);
+ }
+
+ #[simd_test(enable = "avx512dq")]
+ unsafe fn test_mm512_extracti32x8_epi32() {
+ let a = _mm512_set_epi32(1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16);
+ let r = _mm512_extracti32x8_epi32::<1>(a);
+ let e = _mm256_set_epi32(9, 10, 11, 12, 13, 14, 15, 16);
+ assert_eq_m256i(r, e);
+ }
+
+ #[simd_test(enable = "avx512dq")]
+ unsafe fn test_mm512_mask_extracti32x8_epi32() {
+ let a = _mm512_set_epi32(1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16);
+ let b = _mm256_set_epi32(17, 18, 19, 20, 21, 22, 23, 24);
+ let r = _mm512_mask_extracti32x8_epi32::<1>(b, 0b01101001, a);
+ let e = _mm256_set_epi32(17, 10, 11, 20, 13, 22, 23, 16);
+ assert_eq_m256i(r, e);
+ }
+
+ #[simd_test(enable = "avx512dq")]
+ unsafe fn test_mm512_maskz_extracti32x8_epi32() {
+ let a = _mm512_set_epi32(1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16);
+ let r = _mm512_maskz_extracti32x8_epi32::<1>(0b01101001, a);
+ let e = _mm256_set_epi32(0, 10, 11, 0, 13, 0, 0, 16);
+ assert_eq_m256i(r, e);
+ }
+
+ #[simd_test(enable = "avx512dq,avx512vl")]
+ unsafe fn test_mm256_extracti64x2_epi64() {
+ let a = _mm256_set_epi64x(1, 2, 3, 4);
+ let r = _mm256_extracti64x2_epi64::<1>(a);
+ let e = _mm_set_epi64x(3, 4);
+ assert_eq_m128i(r, e);
+ }
+
+ #[simd_test(enable = "avx512dq,avx512vl")]
+ unsafe fn test_mm256_mask_extracti64x2_epi64() {
+ let a = _mm256_set_epi64x(1, 2, 3, 4);
+ let b = _mm_set_epi64x(5, 6);
+ let r = _mm256_mask_extracti64x2_epi64::<1>(b, 0b01, a);
+ let e = _mm_set_epi64x(5, 4);
+ assert_eq_m128i(r, e);
+ }
+
+ #[simd_test(enable = "avx512dq,avx512vl")]
+ unsafe fn test_mm256_maskz_extracti64x2_epi64() {
+ let a = _mm256_set_epi64x(1, 2, 3, 4);
+ let r = _mm256_maskz_extracti64x2_epi64::<1>(0b01, a);
+ let e = _mm_set_epi64x(0, 4);
+ assert_eq_m128i(r, e);
+ }
+
+ #[simd_test(enable = "avx512dq")]
+ unsafe fn test_mm512_extracti64x2_epi64() {
+ let a = _mm512_set_epi64(1, 2, 3, 4, 5, 6, 7, 8);
+ let r = _mm512_extracti64x2_epi64::<2>(a);
+ let e = _mm_set_epi64x(5, 6);
+ assert_eq_m128i(r, e);
+ }
+
+ #[simd_test(enable = "avx512dq")]
+ unsafe fn test_mm512_mask_extracti64x2_epi64() {
+ let a = _mm512_set_epi64(1, 2, 3, 4, 5, 6, 7, 8);
+ let b = _mm_set_epi64x(9, 10);
+ let r = _mm512_mask_extracti64x2_epi64::<2>(b, 0b01, a);
+ let e = _mm_set_epi64x(9, 6);
+ assert_eq_m128i(r, e);
+ }
+
+ #[simd_test(enable = "avx512dq")]
+ unsafe fn test_mm512_maskz_extracti64x2_epi64() {
+ let a = _mm512_set_epi64(1, 2, 3, 4, 5, 6, 7, 8);
+ let r = _mm512_maskz_extracti64x2_epi64::<2>(0b01, a);
+ let e = _mm_set_epi64x(0, 6);
+ assert_eq_m128i(r, e);
+ }
+
+ #[simd_test(enable = "avx512dq")]
+ unsafe fn test_mm512_insertf32x8() {
+ let a = _mm512_set_ps(
+ 1., 2., 3., 4., 5., 6., 7., 8., 9., 10., 11., 12., 13., 14., 15., 16.,
+ );
+ let b = _mm256_set_ps(17., 18., 19., 20., 21., 22., 23., 24.);
+ let r = _mm512_insertf32x8::<1>(a, b);
+ let e = _mm512_set_ps(
+ 1., 2., 3., 4., 5., 6., 7., 8., 17., 18., 19., 20., 21., 22., 23., 24.,
+ );
+ assert_eq_m512(r, e);
+ }
+
+ #[simd_test(enable = "avx512dq")]
+ unsafe fn test_mm512_mask_insertf32x8() {
+ let a = _mm512_set_ps(
+ 1., 2., 3., 4., 5., 6., 7., 8., 9., 10., 11., 12., 13., 14., 15., 16.,
+ );
+ let b = _mm256_set_ps(17., 18., 19., 20., 21., 22., 23., 24.);
+ let src = _mm512_set_ps(
+ 25., 26., 27., 28., 29., 30., 31., 32., 33., 34., 35., 36., 37., 38., 39., 40.,
+ );
+ let r = _mm512_mask_insertf32x8::<1>(src, 0b0110100100111100, a, b);
+ let e = _mm512_set_ps(
+ 25., 2., 3., 28., 5., 30., 31., 8., 33., 34., 19., 20., 21., 22., 39., 40.,
+ );
+ assert_eq_m512(r, e);
+ }
+
+ #[simd_test(enable = "avx512dq")]
+ unsafe fn test_mm512_maskz_insertf32x8() {
+ let a = _mm512_set_ps(
+ 1., 2., 3., 4., 5., 6., 7., 8., 9., 10., 11., 12., 13., 14., 15., 16.,
+ );
+ let b = _mm256_set_ps(17., 18., 19., 20., 21., 22., 23., 24.);
+ let r = _mm512_maskz_insertf32x8::<1>(0b0110100100111100, a, b);
+ let e = _mm512_set_ps(
+ 0., 2., 3., 0., 5., 0., 0., 8., 0., 0., 19., 20., 21., 22., 0., 0.,
+ );
+ assert_eq_m512(r, e);
+ }
+
+ #[simd_test(enable = "avx512dq,avx512vl")]
+ unsafe fn test_mm256_insertf64x2() {
+ let a = _mm256_set_pd(1., 2., 3., 4.);
+ let b = _mm_set_pd(5., 6.);
+ let r = _mm256_insertf64x2::<1>(a, b);
+ let e = _mm256_set_pd(1., 2., 5., 6.);
+ assert_eq_m256d(r, e);
+ }
+
+ #[simd_test(enable = "avx512dq,avx512vl")]
+ unsafe fn test_mm256_mask_insertf64x2() {
+ let a = _mm256_set_pd(1., 2., 3., 4.);
+ let b = _mm_set_pd(5., 6.);
+ let src = _mm256_set_pd(7., 8., 9., 10.);
+ let r = _mm256_mask_insertf64x2::<1>(src, 0b0110, a, b);
+ let e = _mm256_set_pd(7., 2., 5., 10.);
+ assert_eq_m256d(r, e);
+ }
+
+ #[simd_test(enable = "avx512dq,avx512vl")]
+ unsafe fn test_mm256_maskz_insertf64x2() {
+ let a = _mm256_set_pd(1., 2., 3., 4.);
+ let b = _mm_set_pd(5., 6.);
+ let r = _mm256_maskz_insertf64x2::<1>(0b0110, a, b);
+ let e = _mm256_set_pd(0., 2., 5., 0.);
+ assert_eq_m256d(r, e);
+ }
+
+ #[simd_test(enable = "avx512dq")]
+ unsafe fn test_mm512_insertf64x2() {
+ let a = _mm512_set_pd(1., 2., 3., 4., 5., 6., 7., 8.);
+ let b = _mm_set_pd(9., 10.);
+ let r = _mm512_insertf64x2::<2>(a, b);
+ let e = _mm512_set_pd(1., 2., 3., 4., 9., 10., 7., 8.);
+ assert_eq_m512d(r, e);
+ }
+
+ #[simd_test(enable = "avx512dq")]
+ unsafe fn test_mm512_mask_insertf64x2() {
+ let a = _mm512_set_pd(1., 2., 3., 4., 5., 6., 7., 8.);
+ let b = _mm_set_pd(9., 10.);
+ let src = _mm512_set_pd(11., 12., 13., 14., 15., 16., 17., 18.);
+ let r = _mm512_mask_insertf64x2::<2>(src, 0b01101001, a, b);
+ let e = _mm512_set_pd(11., 2., 3., 14., 9., 16., 17., 8.);
+ assert_eq_m512d(r, e);
+ }
+
+ #[simd_test(enable = "avx512dq")]
+ unsafe fn test_mm512_maskz_insertf64x2() {
+ let a = _mm512_set_pd(1., 2., 3., 4., 5., 6., 7., 8.);
+ let b = _mm_set_pd(9., 10.);
+ let r = _mm512_maskz_insertf64x2::<2>(0b01101001, a, b);
+ let e = _mm512_set_pd(0., 2., 3., 0., 9., 0., 0., 8.);
+ assert_eq_m512d(r, e);
+ }
+
+ #[simd_test(enable = "avx512dq")]
+ unsafe fn test_mm512_inserti32x8() {
+ let a = _mm512_set_epi32(1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16);
+ let b = _mm256_set_epi32(17, 18, 19, 20, 21, 22, 23, 24);
+ let r = _mm512_inserti32x8::<1>(a, b);
+ let e = _mm512_set_epi32(1, 2, 3, 4, 5, 6, 7, 8, 17, 18, 19, 20, 21, 22, 23, 24);
+ assert_eq_m512i(r, e);
+ }
+
+ #[simd_test(enable = "avx512dq")]
+ unsafe fn test_mm512_mask_inserti32x8() {
+ let a = _mm512_set_epi32(1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16);
+ let b = _mm256_set_epi32(17, 18, 19, 20, 21, 22, 23, 24);
+ let src = _mm512_set_epi32(
+ 25, 26, 27, 28, 29, 30, 31, 32, 33, 34, 35, 36, 37, 38, 39, 40,
+ );
+ let r = _mm512_mask_inserti32x8::<1>(src, 0b0110100100111100, a, b);
+ let e = _mm512_set_epi32(25, 2, 3, 28, 5, 30, 31, 8, 33, 34, 19, 20, 21, 22, 39, 40);
+ assert_eq_m512i(r, e);
+ }
+
+ #[simd_test(enable = "avx512dq")]
+ unsafe fn test_mm512_maskz_inserti32x8() {
+ let a = _mm512_set_epi32(1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16);
+ let b = _mm256_set_epi32(17, 18, 19, 20, 21, 22, 23, 24);
+ let r = _mm512_maskz_inserti32x8::<1>(0b0110100100111100, a, b);
+ let e = _mm512_set_epi32(0, 2, 3, 0, 5, 0, 0, 8, 0, 0, 19, 20, 21, 22, 0, 0);
+ assert_eq_m512i(r, e);
+ }
+
+ #[simd_test(enable = "avx512dq,avx512vl")]
+ unsafe fn test_mm256_inserti64x2() {
+ let a = _mm256_set_epi64x(1, 2, 3, 4);
+ let b = _mm_set_epi64x(5, 6);
+ let r = _mm256_inserti64x2::<1>(a, b);
+ let e = _mm256_set_epi64x(1, 2, 5, 6);
+ assert_eq_m256i(r, e);
+ }
+
+ #[simd_test(enable = "avx512dq,avx512vl")]
+ unsafe fn test_mm256_mask_inserti64x2() {
+ let a = _mm256_set_epi64x(1, 2, 3, 4);
+ let b = _mm_set_epi64x(5, 6);
+ let src = _mm256_set_epi64x(7, 8, 9, 10);
+ let r = _mm256_mask_inserti64x2::<1>(src, 0b0110, a, b);
+ let e = _mm256_set_epi64x(7, 2, 5, 10);
+ assert_eq_m256i(r, e);
+ }
+
+ #[simd_test(enable = "avx512dq,avx512vl")]
+ unsafe fn test_mm256_maskz_inserti64x2() {
+ let a = _mm256_set_epi64x(1, 2, 3, 4);
+ let b = _mm_set_epi64x(5, 6);
+ let r = _mm256_maskz_inserti64x2::<1>(0b0110, a, b);
+ let e = _mm256_set_epi64x(0, 2, 5, 0);
+ assert_eq_m256i(r, e);
+ }
+
+ #[simd_test(enable = "avx512dq")]
+ unsafe fn test_mm512_inserti64x2() {
+ let a = _mm512_set_epi64(1, 2, 3, 4, 5, 6, 7, 8);
+ let b = _mm_set_epi64x(9, 10);
+ let r = _mm512_inserti64x2::<2>(a, b);
+ let e = _mm512_set_epi64(1, 2, 3, 4, 9, 10, 7, 8);
+ assert_eq_m512i(r, e);
+ }
+
+ #[simd_test(enable = "avx512dq")]
+ unsafe fn test_mm512_mask_inserti64x2() {
+ let a = _mm512_set_epi64(1, 2, 3, 4, 5, 6, 7, 8);
+ let b = _mm_set_epi64x(9, 10);
+ let src = _mm512_set_epi64(11, 12, 13, 14, 15, 16, 17, 18);
+ let r = _mm512_mask_inserti64x2::<2>(src, 0b01101001, a, b);
+ let e = _mm512_set_epi64(11, 2, 3, 14, 9, 16, 17, 8);
+ assert_eq_m512i(r, e);
+ }
+
+ #[simd_test(enable = "avx512dq")]
+ unsafe fn test_mm512_maskz_inserti64x2() {
+ let a = _mm512_set_epi64(1, 2, 3, 4, 5, 6, 7, 8);
+ let b = _mm_set_epi64x(9, 10);
+ let r = _mm512_maskz_inserti64x2::<2>(0b01101001, a, b);
+ let e = _mm512_set_epi64(0, 2, 3, 0, 9, 0, 0, 8);
+ assert_eq_m512i(r, e);
+ }
}