From d31615425590f168ae6d821215b207d08a02cc36 Mon Sep 17 00:00:00 2001
From: sayantn <sayantan.chakraborty@students.iiserpune.ac.in>
Date: Sun, 9 Jun 2024 01:00:01 +0530
Subject: [PATCH] AVX512DQ Part 1: Logical Operations (and, andn, or, xor)

---
 library/stdarch/crates/core_arch/avx512dq.md  |  73 ++-
 .../crates/core_arch/src/x86/avx512dq.rs      | 515 ++++++++++++++++++
 2 files changed, 551 insertions(+), 37 deletions(-)
 create mode 100644 library/stdarch/crates/core_arch/src/x86/avx512dq.rs
diff --git a/library/stdarch/crates/core_arch/avx512dq.md b/library/stdarch/crates/core_arch/avx512dq.md
index 00969026d039..a51a3d833301 100644
--- a/library/stdarch/crates/core_arch/avx512dq.md
+++ b/library/stdarch/crates/core_arch/avx512dq.md
@@ -402,41 +402,40 @@
 
 
 - Range
-  * [ ] _mm512_reduce_round_pd
-  * [ ] _mm512_mask_reduce_round_pd
-  * [ ] _mm512_maskz_reduce_round_pd
-  * [ ] _mm_reduce_pd
-  * [ ] _mm_mask_reduce_pd
-  * [ ] _mm_maskz_reduce_pd
-  * [ ] _mm256_reduce_pd
-  * [ ] _mm256_mask_reduce_pd
-  * [ ] _mm256_maskz_reduce_pd
-  * [ ] _mm512_reduce_pd
-  * [ ] _mm512_mask_reduce_pd
-  * [ ] _mm512_maskz_reduce_pd
-  * [ ] _mm512_reduce_round_ps
-  * [ ] _mm512_mask_reduce_round_ps
-  * [ ] _mm512_maskz_reduce_round_ps
-  * [ ] _mm_reduce_ps
-  * [ ] _mm_mask_reduce_ps
-  * [ ] _mm_maskz_reduce_ps
-  * [ ] _mm256_reduce_ps
-  * [ ] _mm256_mask_reduce_ps
-  * [ ] _mm256_maskz_reduce_ps
-  * [ ] _mm512_reduce_ps
-  * [ ] _mm512_mask_reduce_ps
-  * [ ] _mm512_maskz_reduce_ps
-  * [ ] _mm_reduce_round_sd
-  * [ ] _mm_mask_reduce_round_sd
-  * [ ] _mm_maskz_reduce_round_sd
-  * [ ] _mm_reduce_sd
-  * [ ] _mm_mask_reduce_sd
-  * [ ] _mm_maskz_reduce_sd
-  * [ ] _mm_reduce_round_ss
-  * [ ] _mm_mask_reduce_round_ss
-  * [ ] _mm_maskz_reduce_round_ss
-  * [ ] _mm_reduce_ss
-  * [ ] _mm_mask_reduce_ss
-  * [ ] _mm_maskz_reduce_ss
-
+    * [ ] _mm512_reduce_round_pd
+    * [ ] _mm512_mask_reduce_round_pd
+    * [ ] _mm512_maskz_reduce_round_pd
+    * [ ] _mm_reduce_pd
+    * [ ] _mm_mask_reduce_pd
+    * [ ] _mm_maskz_reduce_pd
+    * [ ] _mm256_reduce_pd
+    * [ ] _mm256_mask_reduce_pd
+    * [ ] _mm256_maskz_reduce_pd
+    * [ ] _mm512_reduce_pd
+    * [ ] _mm512_mask_reduce_pd
+    * [ ] _mm512_maskz_reduce_pd
+    * [ ] _mm512_reduce_round_ps
+    * [ ] _mm512_mask_reduce_round_ps
+    * [ ] _mm512_maskz_reduce_round_ps
+    * [ ] _mm_reduce_ps
+    * [ ] _mm_mask_reduce_ps
+    * [ ] _mm_maskz_reduce_ps
+    * [ ] _mm256_reduce_ps
+    * [ ] _mm256_mask_reduce_ps
+    * [ ] _mm256_maskz_reduce_ps
+    * [ ] _mm512_reduce_ps
+    * [ ] _mm512_mask_reduce_ps
+    * [ ] _mm512_maskz_reduce_ps
+    * [ ] _mm_reduce_round_sd
+    * [ ] _mm_mask_reduce_round_sd
+    * [ ] _mm_maskz_reduce_round_sd
+    * [ ] _mm_reduce_sd
+    * [ ] _mm_mask_reduce_sd
+    * [ ] _mm_maskz_reduce_sd
+    * [ ] _mm_reduce_round_ss
+    * [ ] _mm_mask_reduce_round_ss
+    * [ ] _mm_maskz_reduce_round_ss
+    * [ ] _mm_reduce_ss
+    * [ ] _mm_mask_reduce_ss
+    * [ ] _mm_maskz_reduce_ss
 </p>
diff --git a/library/stdarch/crates/core_arch/src/x86/avx512dq.rs b/library/stdarch/crates/core_arch/src/x86/avx512dq.rs
new file mode 100644
index 000000000000..ba481ad42b11
--- /dev/null
+++ b/library/stdarch/crates/core_arch/src/x86/avx512dq.rs
@@ -0,0 +1,515 @@
+use crate::{
+    core_arch::{simd::*, x86::*},
+    intrinsics::simd::*,
+    mem::{self, transmute},
+    ptr,
+};
+
+// And //
+
+#[inline]
+#[target_feature(enable = "avx512dq,avx512vl")]
+#[cfg_attr(test, assert_instr(vandps))]
+// FIXME: should be `vandpd` instruction.
+pub unsafe fn _mm_mask_and_pd(src: __m128d, k: __mmask8, a: __m128d, b: __m128d) -> __m128d {
+    let and = _mm_and_pd(a, b).as_f64x2();
+    transmute(simd_select_bitmask(k, and, src.as_f64x2()))
+}
+
+#[inline]
+#[target_feature(enable = "avx512dq,avx512vl")]
+#[cfg_attr(test, assert_instr(vandps))]
+// FIXME: should be `vandpd` instruction.
+pub unsafe fn _mm_maskz_and_pd(k: __mmask8, a: __m128d, b: __m128d) -> __m128d {
+    let and = _mm_and_pd(a, b).as_f64x2();
+    let zero = _mm_setzero_pd().as_f64x2();
+    transmute(simd_select_bitmask(k, and, zero))
+}
+
+#[inline]
+#[target_feature(enable = "avx512dq,avx512vl")]
+#[cfg_attr(test, assert_instr(vandps))]
+// FIXME: should be `vandpd` instruction.
+pub unsafe fn _mm256_mask_and_pd(src: __m256d, k: __mmask8, a: __m256d, b: __m256d) -> __m256d {
+    let and = _mm256_and_pd(a, b).as_f64x4();
+    transmute(simd_select_bitmask(k, and, src.as_f64x4()))
+}
+
+#[inline]
+#[target_feature(enable = "avx512dq,avx512vl")]
+#[cfg_attr(test, assert_instr(vandps))]
+// FIXME: should be `vandpd` instruction.
+pub unsafe fn _mm256_maskz_and_pd(k: __mmask8, a: __m256d, b: __m256d) -> __m256d {
+    let and = _mm256_and_pd(a, b).as_f64x4();
+    let zero = _mm256_setzero_pd().as_f64x4();
+    transmute(simd_select_bitmask(k, and, zero))
+}
+
+#[inline]
+#[target_feature(enable = "avx512dq")]
+#[cfg_attr(test, assert_instr(vandps))]
+// FIXME: should be `vandpd` instruction.
+pub unsafe fn _mm512_and_pd(a: __m512d, b: __m512d) -> __m512d {
+    transmute(simd_and(transmute::<_, u64x8>(a), transmute::<_, u64x8>(b)))
+}
+
+#[inline]
+#[target_feature(enable = "avx512dq")]
+#[cfg_attr(test, assert_instr(vandps))]
+// FIXME: should be `vandpd` instruction.
+pub unsafe fn _mm512_mask_and_pd(src: __m512d, k: __mmask8, a: __m512d, b: __m512d) -> __m512d {
+    let and = _mm512_and_pd(a, b).as_f64x8();
+    transmute(simd_select_bitmask(k, and, src.as_f64x8()))
+}
+
+#[inline]
+#[target_feature(enable = "avx512dq")]
+#[cfg_attr(test, assert_instr(vandps))]
+// FIXME: should be `vandpd` instruction.
+pub unsafe fn _mm512_maskz_and_pd(k: __mmask8, a: __m512d, b: __m512d) -> __m512d {
+    let and = _mm512_and_pd(a, b).as_f64x8();
+    let zero = _mm512_setzero_pd().as_f64x8();
+    transmute(simd_select_bitmask(k, and, zero))
+}
+
+#[inline]
+#[target_feature(enable = "avx512dq,avx512vl")]
+#[cfg_attr(test, assert_instr(vandps))]
+pub unsafe fn _mm_mask_and_ps(src: __m128, k: __mmask8, a: __m128, b: __m128) -> __m128 {
+    let and = _mm_and_ps(a, b).as_f32x4();
+    transmute(simd_select_bitmask(k, and, src.as_f32x4()))
+}
+
+#[inline]
+#[target_feature(enable = "avx512dq,avx512vl")]
+#[cfg_attr(test, assert_instr(vandps))]
+pub unsafe fn _mm_maskz_and_ps(k: __mmask8, a: __m128, b: __m128) -> __m128 {
+    let and = _mm_and_ps(a, b).as_f32x4();
+    let zero = _mm_setzero_ps().as_f32x4();
+    transmute(simd_select_bitmask(k, and, zero))
+}
+
+#[inline]
+#[target_feature(enable = "avx512dq,avx512vl")]
+#[cfg_attr(test, assert_instr(vandps))]
+pub unsafe fn _mm256_mask_and_ps(src: __m256, k: __mmask8, a: __m256, b: __m256) -> __m256 {
+    let and = _mm256_and_ps(a, b).as_f32x8();
+    transmute(simd_select_bitmask(k, and, src.as_f32x8()))
+}
+
+#[inline]
+#[target_feature(enable = "avx512dq,avx512vl")]
+#[cfg_attr(test, assert_instr(vandps))]
+pub unsafe fn _mm256_maskz_and_ps(k: __mmask8, a: __m256, b: __m256) -> __m256 {
+    let and = _mm256_and_ps(a, b).as_f32x8();
+    let zero = _mm256_setzero_ps().as_f32x8();
+    transmute(simd_select_bitmask(k, and, zero))
+}
+
+#[inline]
+#[target_feature(enable = "avx512dq")]
+#[cfg_attr(test, assert_instr(vandps))]
+pub unsafe fn _mm512_and_ps(a: __m512, b: __m512) -> __m512 {
+    transmute(simd_and(
+        transmute::<_, u32x16>(a),
+        transmute::<_, u32x16>(b),
+    ))
+}
+
+#[inline]
+#[target_feature(enable = "avx512dq")]
+#[cfg_attr(test, assert_instr(vandps))]
+pub unsafe fn _mm512_mask_and_ps(src: __m512, k: __mmask8, a: __m512, b: __m512) -> __m512 {
+    let and = _mm512_and_ps(a, b).as_f32x16();
+    transmute(simd_select_bitmask(k, and, src.as_f32x16()))
+}
+
+#[inline]
+#[target_feature(enable = "avx512dq")]
+#[cfg_attr(test, assert_instr(vandps))]
+pub unsafe fn _mm512_maskz_and_ps(k: __mmask8, a: __m512, b: __m512) -> __m512 {
+    let and = _mm512_and_ps(a, b).as_f32x16();
+    let zero = _mm512_setzero_ps().as_f32x16();
+    transmute(simd_select_bitmask(k, and, zero))
+}
+
+// Andnot
+
+#[inline]
+#[target_feature(enable = "avx512dq,avx512vl")]
+#[cfg_attr(test, assert_instr(vandnps))]
+// FIXME: should be `vandnpd` instruction.
+pub unsafe fn _mm_mask_andnot_pd(src: __m128d, k: __mmask8, a: __m128d, b: __m128d) -> __m128d {
+    let andnot = _mm_andnot_pd(a, b).as_f64x2();
+    transmute(simd_select_bitmask(k, andnot, src.as_f64x2()))
+}
+
+#[inline]
+#[target_feature(enable = "avx512dq,avx512vl")]
+#[cfg_attr(test, assert_instr(vandnps))]
+// FIXME: should be `vandnpd` instruction.
+pub unsafe fn _mm_maskz_andnot_pd(k: __mmask8, a: __m128d, b: __m128d) -> __m128d {
+    let andnot = _mm_andnot_pd(a, b).as_f64x2();
+    let zero = _mm_setzero_pd().as_f64x2();
+    transmute(simd_select_bitmask(k, andnot, zero))
+}
+
+#[inline]
+#[target_feature(enable = "avx512dq,avx512vl")]
+#[cfg_attr(test, assert_instr(vandnps))]
+// FIXME: should be `vandnpd` instruction.
+pub unsafe fn _mm256_mask_andnot_pd(src: __m256d, k: __mmask8, a: __m256d, b: __m256d) -> __m256d {
+    let andnot = _mm256_andnot_pd(a, b).as_f64x4();
+    transmute(simd_select_bitmask(k, andnot, src.as_f64x4()))
+}
+
+#[inline]
+#[target_feature(enable = "avx512dq,avx512vl")]
+#[cfg_attr(test, assert_instr(vandnps))]
+// FIXME: should be `vandnpd` instruction.
+pub unsafe fn _mm256_maskz_andnot_pd(k: __mmask8, a: __m256d, b: __m256d) -> __m256d {
+    let andnot = _mm256_andnot_pd(a, b).as_f64x4();
+    let zero = _mm256_setzero_pd().as_f64x4();
+    transmute(simd_select_bitmask(k, andnot, zero))
+}
+
+#[inline]
+#[target_feature(enable = "avx512dq")]
+#[cfg_attr(test, assert_instr(vandnps))]
+// FIXME: should be `vandnpd` instruction.
+pub unsafe fn _mm512_andnot_pd(a: __m512d, b: __m512d) -> __m512d {
+    _mm512_and_pd(_mm512_xor_pd(a, transmute(_mm512_set1_epi64(-1))), b)
+}
+
+#[inline]
+#[target_feature(enable = "avx512dq")]
+#[cfg_attr(test, assert_instr(vandnps))]
+// FIXME: should be `vandnpd` instruction.
+pub unsafe fn _mm512_mask_andnot_pd(src: __m512d, k: __mmask8, a: __m512d, b: __m512d) -> __m512d {
+    let andnot = _mm512_andnot_pd(a, b).as_f64x8();
+    transmute(simd_select_bitmask(k, andnot, src.as_f64x8()))
+}
+
+#[inline]
+#[target_feature(enable = "avx512dq")]
+#[cfg_attr(test, assert_instr(vandnps))]
+// FIXME: should be `vandnpd` instruction.
+pub unsafe fn _mm512_maskz_andnot_pd(k: __mmask8, a: __m512d, b: __m512d) -> __m512d {
+    let andnot = _mm512_andnot_pd(a, b).as_f64x8();
+    let zero = _mm512_setzero_pd().as_f64x8();
+    transmute(simd_select_bitmask(k, andnot, zero))
+}
+
+#[inline]
+#[target_feature(enable = "avx512dq,avx512vl")]
+#[cfg_attr(test, assert_instr(vandnps))]
+pub unsafe fn _mm_mask_andnot_ps(src: __m128, k: __mmask8, a: __m128, b: __m128) -> __m128 {
+    let andnot = _mm_andnot_ps(a, b).as_f32x4();
+    transmute(simd_select_bitmask(k, andnot, src.as_f32x4()))
+}
+
+#[inline]
+#[target_feature(enable = "avx512dq,avx512vl")]
+#[cfg_attr(test, assert_instr(vandnps))]
+pub unsafe fn _mm_maskz_andnot_ps(k: __mmask8, a: __m128, b: __m128) -> __m128 {
+    let andnot = _mm_andnot_ps(a, b).as_f32x4();
+    let zero = _mm_setzero_ps().as_f32x4();
+    transmute(simd_select_bitmask(k, andnot, zero))
+}
+
+#[inline]
+#[target_feature(enable = "avx512dq,avx512vl")]
+#[cfg_attr(test, assert_instr(vandnps))]
+pub unsafe fn _mm256_mask_andnot_ps(src: __m256, k: __mmask8, a: __m256, b: __m256) -> __m256 {
+    let andnot = _mm256_andnot_ps(a, b).as_f32x8();
+    transmute(simd_select_bitmask(k, andnot, src.as_f32x8()))
+}
+
+#[inline]
+#[target_feature(enable = "avx512dq,avx512vl")]
+#[cfg_attr(test, assert_instr(vandnps))]
+pub unsafe fn _mm256_maskz_andnot_ps(k: __mmask8, a: __m256, b: __m256) -> __m256 {
+    let andnot = _mm256_andnot_ps(a, b).as_f32x8();
+    let zero = _mm256_setzero_ps().as_f32x8();
+    transmute(simd_select_bitmask(k, andnot, zero))
+}
+
+#[inline]
+#[target_feature(enable = "avx512dq")]
+#[cfg_attr(test, assert_instr(vandnps))]
+pub unsafe fn _mm512_andnot_ps(a: __m512, b: __m512) -> __m512 {
+    _mm512_and_ps(_mm512_xor_ps(a, transmute(_mm512_set1_epi32(-1))), b)
+}
+
+#[inline]
+#[target_feature(enable = "avx512dq")]
+#[cfg_attr(test, assert_instr(vandnps))]
+pub unsafe fn _mm512_mask_andnot_ps(src: __m512, k: __mmask8, a: __m512, b: __m512) -> __m512 {
+    let andnot = _mm512_andnot_ps(a, b).as_f32x16();
+    transmute(simd_select_bitmask(k, andnot, src.as_f32x16()))
+}
+
+#[inline]
+#[target_feature(enable = "avx512dq")]
+#[cfg_attr(test, assert_instr(vandnps))]
+pub unsafe fn _mm512_maskz_andnot_ps(k: __mmask8, a: __m512, b: __m512) -> __m512 {
+    let andnot = _mm512_andnot_ps(a, b).as_f32x16();
+    let zero = _mm512_setzero_ps().as_f32x16();
+    transmute(simd_select_bitmask(k, andnot, zero))
+}
+
+// Or
+
+#[inline]
+#[target_feature(enable = "avx512dq,avx512vl")]
+#[cfg_attr(test, assert_instr(vorps))]
+// FIXME: should be `vorpd` instruction.
+pub unsafe fn _mm_mask_or_pd(src: __m128d, k: __mmask8, a: __m128d, b: __m128d) -> __m128d {
+    let or = _mm_or_pd(a, b).as_f64x2();
+    transmute(simd_select_bitmask(k, or, src.as_f64x2()))
+}
+
+#[inline]
+#[target_feature(enable = "avx512dq,avx512vl")]
+#[cfg_attr(test, assert_instr(vorps))]
+// FIXME: should be `vorpd` instruction.
+pub unsafe fn _mm_maskz_or_pd(k: __mmask8, a: __m128d, b: __m128d) -> __m128d {
+    let or = _mm_or_pd(a, b).as_f64x2();
+    let zero = _mm_setzero_pd().as_f64x2();
+    transmute(simd_select_bitmask(k, or, zero))
+}
+
+#[inline]
+#[target_feature(enable = "avx512dq,avx512vl")]
+#[cfg_attr(test, assert_instr(vorps))]
+// FIXME: should be `vorpd` instruction.
+pub unsafe fn _mm256_mask_or_pd(src: __m256d, k: __mmask8, a: __m256d, b: __m256d) -> __m256d {
+    let or = _mm256_or_pd(a, b).as_f64x4();
+    transmute(simd_select_bitmask(k, or, src.as_f64x4()))
+}
+
+#[inline]
+#[target_feature(enable = "avx512dq,avx512vl")]
+#[cfg_attr(test, assert_instr(vorps))]
+// FIXME: should be `vorpd` instruction.
+pub unsafe fn _mm256_maskz_or_pd(k: __mmask8, a: __m256d, b: __m256d) -> __m256d {
+    let or = _mm256_or_pd(a, b).as_f64x4();
+    let zero = _mm256_setzero_pd().as_f64x4();
+    transmute(simd_select_bitmask(k, or, zero))
+}
+
+#[inline]
+#[target_feature(enable = "avx512dq")]
+#[cfg_attr(test, assert_instr(vorps))]
+// FIXME: should be `vorpd` instruction.
+pub unsafe fn _mm512_or_pd(a: __m512d, b: __m512d) -> __m512d {
+    transmute(simd_or(transmute::<_, u64x8>(a), transmute::<_, u64x8>(b)))
+}
+
+#[inline]
+#[target_feature(enable = "avx512dq")]
+#[cfg_attr(test, assert_instr(vorps))]
+// FIXME: should be `vorpd` instruction.
+pub unsafe fn _mm512_mask_or_pd(src: __m512d, k: __mmask8, a: __m512d, b: __m512d) -> __m512d {
+    let or = _mm512_or_pd(a, b).as_f64x8();
+    transmute(simd_select_bitmask(k, or, src.as_f64x8()))
+}
+
+#[inline]
+#[target_feature(enable = "avx512dq")]
+#[cfg_attr(test, assert_instr(vorps))]
+// FIXME: should be `vorpd` instruction.
+pub unsafe fn _mm512_maskz_or_pd(k: __mmask8, a: __m512d, b: __m512d) -> __m512d {
+    let or = _mm512_or_pd(a, b).as_f64x8();
+    let zero = _mm512_setzero_pd().as_f64x8();
+    transmute(simd_select_bitmask(k, or, zero))
+}
+
+#[inline]
+#[target_feature(enable = "avx512dq,avx512vl")]
+#[cfg_attr(test, assert_instr(vorps))]
+pub unsafe fn _mm_mask_or_ps(src: __m128, k: __mmask8, a: __m128, b: __m128) -> __m128 {
+    let or = _mm_or_ps(a, b).as_f32x4();
+    transmute(simd_select_bitmask(k, or, src.as_f32x4()))
+}
+
+#[inline]
+#[target_feature(enable = "avx512dq,avx512vl")]
+#[cfg_attr(test, assert_instr(vorps))]
+pub unsafe fn _mm_maskz_or_ps(k: __mmask8, a: __m128, b: __m128) -> __m128 {
+    let or = _mm_or_ps(a, b).as_f32x4();
+    let zero = _mm_setzero_ps().as_f32x4();
+    transmute(simd_select_bitmask(k, or, zero))
+}
+
+#[inline]
+#[target_feature(enable = "avx512dq,avx512vl")]
+#[cfg_attr(test, assert_instr(vorps))]
+pub unsafe fn _mm256_mask_or_ps(src: __m256, k: __mmask8, a: __m256, b: __m256) -> __m256 {
+    let or = _mm256_or_ps(a, b).as_f32x8();
+    transmute(simd_select_bitmask(k, or, src.as_f32x8()))
+}
+
+#[inline]
+#[target_feature(enable = "avx512dq,avx512vl")]
+#[cfg_attr(test, assert_instr(vorps))]
+pub unsafe fn _mm256_maskz_or_ps(k: __mmask8, a: __m256, b: __m256) -> __m256 {
+    let or = _mm256_or_ps(a, b).as_f32x8();
+    let zero = _mm256_setzero_ps().as_f32x8();
+    transmute(simd_select_bitmask(k, or, zero))
+}
+
+#[inline]
+#[target_feature(enable = "avx512dq")]
+#[cfg_attr(test, assert_instr(vorps))]
+pub unsafe fn _mm512_or_ps(a: __m512, b: __m512) -> __m512 {
+    transmute(simd_or(
+        transmute::<_, u32x16>(a),
+        transmute::<_, u32x16>(b),
+    ))
+}
+
+#[inline]
+#[target_feature(enable = "avx512dq")]
+#[cfg_attr(test, assert_instr(vorps))]
+pub unsafe fn _mm512_mask_or_ps(src: __m512, k: __mmask8, a: __m512, b: __m512) -> __m512 {
+    let or = _mm512_or_ps(a, b).as_f32x16();
+    transmute(simd_select_bitmask(k, or, src.as_f32x16()))
+}
+
+#[inline]
+#[target_feature(enable = "avx512dq")]
+#[cfg_attr(test, assert_instr(vorps))]
+pub unsafe fn _mm512_maskz_or_ps(k: __mmask8, a: __m512, b: __m512) -> __m512 {
+    let or = _mm512_or_ps(a, b).as_f32x16();
+    let zero = _mm512_setzero_ps().as_f32x16();
+    transmute(simd_select_bitmask(k, or, zero))
+}
+
+// Xor
+
+#[inline]
+#[target_feature(enable = "avx512dq,avx512vl")]
+#[cfg_attr(test, assert_instr(vxorps))]
+// FIXME: should be `vxorpd` instruction.
+pub unsafe fn _mm_mask_xor_pd(src: __m128d, k: __mmask8, a: __m128d, b: __m128d) -> __m128d {
+    let xor = _mm_xor_pd(a, b).as_f64x2();
+    transmute(simd_select_bitmask(k, xor, src.as_f64x2()))
+}
+
+#[inline]
+#[target_feature(enable = "avx512dq,avx512vl")]
+#[cfg_attr(test, assert_instr(vxorps))]
+// FIXME: should be `vxorpd` instruction.
+pub unsafe fn _mm_maskz_xor_pd(k: __mmask8, a: __m128d, b: __m128d) -> __m128d {
+    let xor = _mm_xor_pd(a, b).as_f64x2();
+    let zero = _mm_setzero_pd().as_f64x2();
+    transmute(simd_select_bitmask(k, xor, zero))
+}
+
+#[inline]
+#[target_feature(enable = "avx512dq,avx512vl")]
+#[cfg_attr(test, assert_instr(vxorps))]
+// FIXME: should be `vxorpd` instruction.
+pub unsafe fn _mm256_mask_xor_pd(src: __m256d, k: __mmask8, a: __m256d, b: __m256d) -> __m256d {
+    let xor = _mm256_xor_pd(a, b).as_f64x4();
+    transmute(simd_select_bitmask(k, xor, src.as_f64x4()))
+}
+
+#[inline]
+#[target_feature(enable = "avx512dq,avx512vl")]
+#[cfg_attr(test, assert_instr(vxorps))]
+// FIXME: should be `vxorpd` instruction.
+pub unsafe fn _mm256_maskz_xor_pd(k: __mmask8, a: __m256d, b: __m256d) -> __m256d {
+    let xor = _mm256_xor_pd(a, b).as_f64x4();
+    let zero = _mm256_setzero_pd().as_f64x4();
+    transmute(simd_select_bitmask(k, xor, zero))
+}
+
+#[inline]
+#[target_feature(enable = "avx512dq")]
+#[cfg_attr(test, assert_instr(vxorps))]
+// FIXME: should be `vxorpd` instruction.
+pub unsafe fn _mm512_xor_pd(a: __m512d, b: __m512d) -> __m512d {
+    transmute(simd_xor(transmute::<_, u64x8>(a), transmute::<_, u64x8>(b)))
+}
+
+#[inline]
+#[target_feature(enable = "avx512dq")]
+#[cfg_attr(test, assert_instr(vxorps))]
+// FIXME: should be `vxorpd` instruction.
+pub unsafe fn _mm512_mask_xor_pd(src: __m512d, k: __mmask8, a: __m512d, b: __m512d) -> __m512d {
+    let xor = _mm512_xor_pd(a, b).as_f64x8();
+    transmute(simd_select_bitmask(k, xor, src.as_f64x8()))
+}
+
+#[inline]
+#[target_feature(enable = "avx512dq")]
+#[cfg_attr(test, assert_instr(vxorps))]
+// FIXME: should be `vxorpd` instruction.
+pub unsafe fn _mm512_maskz_xor_pd(k: __mmask8, a: __m512d, b: __m512d) -> __m512d {
+    let xor = _mm512_xor_pd(a, b).as_f64x8();
+    let zero = _mm512_setzero_pd().as_f64x8();
+    transmute(simd_select_bitmask(k, xor, zero))
+}
+
+#[inline]
+#[target_feature(enable = "avx512dq,avx512vl")]
+#[cfg_attr(test, assert_instr(vxorps))]
+pub unsafe fn _mm_mask_xor_ps(src: __m128, k: __mmask8, a: __m128, b: __m128) -> __m128 {
+    let xor = _mm_xor_ps(a, b).as_f32x4();
+    transmute(simd_select_bitmask(k, xor, src.as_f32x4()))
+}
+
+#[inline]
+#[target_feature(enable = "avx512dq,avx512vl")]
+#[cfg_attr(test, assert_instr(vxorps))]
+pub unsafe fn _mm_maskz_xor_ps(k: __mmask8, a: __m128, b: __m128) -> __m128 {
+    let xor = _mm_xor_ps(a, b).as_f32x4();
+    let zero = _mm_setzero_ps().as_f32x4();
+    transmute(simd_select_bitmask(k, xor, zero))
+}
+
+#[inline]
+#[target_feature(enable = "avx512dq,avx512vl")]
+#[cfg_attr(test, assert_instr(vxorps))]
+pub unsafe fn _mm256_mask_xor_ps(src: __m256, k: __mmask8, a: __m256, b: __m256) -> __m256 {
+    let xor = _mm256_xor_ps(a, b).as_f32x8();
+    transmute(simd_select_bitmask(k, xor, src.as_f32x8()))
+}
+
+#[inline]
+#[target_feature(enable = "avx512dq,avx512vl")]
+#[cfg_attr(test, assert_instr(vxorps))]
+pub unsafe fn _mm256_maskz_xor_ps(k: __mmask8, a: __m256, b: __m256) -> __m256 {
+    let xor = _mm256_xor_ps(a, b).as_f32x8();
+    let zero = _mm256_setzero_ps().as_f32x8();
+    transmute(simd_select_bitmask(k, xor, zero))
+}
+
+#[inline]
+#[target_feature(enable = "avx512dq")]
+#[cfg_attr(test, assert_instr(vxorps))]
+pub unsafe fn _mm512_xor_ps(a: __m512, b: __m512) -> __m512 {
+    transmute(simd_xor(
+        transmute::<_, u32x16>(a),
+        transmute::<_, u32x16>(b),
+    ))
+}
+
+#[inline]
+#[target_feature(enable = "avx512dq")]
+#[cfg_attr(test, assert_instr(vxorps))]
+pub unsafe fn _mm512_mask_xor_ps(src: __m512, k: __mmask8, a: __m512, b: __m512) -> __m512 {
+    let xor = _mm512_xor_ps(a, b).as_f32x16();
+    transmute(simd_select_bitmask(k, xor, src.as_f32x16()))
+}
+
+#[inline]
+#[target_feature(enable = "avx512dq")]
+#[cfg_attr(test, assert_instr(vxorps))]
+pub unsafe fn _mm512_maskz_xor_ps(k: __mmask8, a: __m512, b: __m512) -> __m512 {
+    let xor = _mm512_xor_ps(a, b).as_f32x16();
+    let zero = _mm512_setzero_ps().as_f32x16();
+    transmute(simd_select_bitmask(k, xor, zero))
+}