AVX512DQ Part 1: Logical Operations (and, andn, or, xor)

This commit is contained in:
sayantn 2024-06-09 01:00:01 +05:30 committed by Amanieu d'Antras
parent cc2c29113f
commit d316154255
2 changed files with 551 additions and 37 deletions

View file

@ -402,41 +402,40 @@
- Range
* [ ] _mm512_reduce_round_pd
* [ ] _mm512_mask_reduce_round_pd
* [ ] _mm512_maskz_reduce_round_pd
* [ ] _mm_reduce_pd
* [ ] _mm_mask_reduce_pd
* [ ] _mm_maskz_reduce_pd
* [ ] _mm256_reduce_pd
* [ ] _mm256_mask_reduce_pd
* [ ] _mm256_maskz_reduce_pd
* [ ] _mm512_reduce_pd
* [ ] _mm512_mask_reduce_pd
* [ ] _mm512_maskz_reduce_pd
* [ ] _mm512_reduce_round_ps
* [ ] _mm512_mask_reduce_round_ps
* [ ] _mm512_maskz_reduce_round_ps
* [ ] _mm_reduce_ps
* [ ] _mm_mask_reduce_ps
* [ ] _mm_maskz_reduce_ps
* [ ] _mm256_reduce_ps
* [ ] _mm256_mask_reduce_ps
* [ ] _mm256_maskz_reduce_ps
* [ ] _mm512_reduce_ps
* [ ] _mm512_mask_reduce_ps
* [ ] _mm512_maskz_reduce_ps
* [ ] _mm_reduce_round_sd
* [ ] _mm_mask_reduce_round_sd
* [ ] _mm_maskz_reduce_round_sd
* [ ] _mm_reduce_sd
* [ ] _mm_mask_reduce_sd
* [ ] _mm_maskz_reduce_sd
* [ ] _mm_reduce_round_ss
* [ ] _mm_mask_reduce_round_ss
* [ ] _mm_maskz_reduce_round_ss
* [ ] _mm_reduce_ss
* [ ] _mm_mask_reduce_ss
* [ ] _mm_maskz_reduce_ss
* [ ] _mm512_reduce_round_pd
* [ ] _mm512_mask_reduce_round_pd
* [ ] _mm512_maskz_reduce_round_pd
* [ ] _mm_reduce_pd
* [ ] _mm_mask_reduce_pd
* [ ] _mm_maskz_reduce_pd
* [ ] _mm256_reduce_pd
* [ ] _mm256_mask_reduce_pd
* [ ] _mm256_maskz_reduce_pd
* [ ] _mm512_reduce_pd
* [ ] _mm512_mask_reduce_pd
* [ ] _mm512_maskz_reduce_pd
* [ ] _mm512_reduce_round_ps
* [ ] _mm512_mask_reduce_round_ps
* [ ] _mm512_maskz_reduce_round_ps
* [ ] _mm_reduce_ps
* [ ] _mm_mask_reduce_ps
* [ ] _mm_maskz_reduce_ps
* [ ] _mm256_reduce_ps
* [ ] _mm256_mask_reduce_ps
* [ ] _mm256_maskz_reduce_ps
* [ ] _mm512_reduce_ps
* [ ] _mm512_mask_reduce_ps
* [ ] _mm512_maskz_reduce_ps
* [ ] _mm_reduce_round_sd
* [ ] _mm_mask_reduce_round_sd
* [ ] _mm_maskz_reduce_round_sd
* [ ] _mm_reduce_sd
* [ ] _mm_mask_reduce_sd
* [ ] _mm_maskz_reduce_sd
* [ ] _mm_reduce_round_ss
* [ ] _mm_mask_reduce_round_ss
* [ ] _mm_maskz_reduce_round_ss
* [ ] _mm_reduce_ss
* [ ] _mm_mask_reduce_ss
* [ ] _mm_maskz_reduce_ss
</p>

View file

@ -0,0 +1,515 @@
use crate::{
core_arch::{simd::*, x86::*},
intrinsics::simd::*,
mem::{self, transmute},
ptr,
};
// And //
#[inline]
#[target_feature(enable = "avx512dq,avx512vl")]
#[cfg_attr(test, assert_instr(vandps))]
// FIXME: should be `vandpd` instruction.
pub unsafe fn _mm_mask_and_pd(src: __m128d, k: __mmask8, a: __m128d, b: __m128d) -> __m128d {
let and = _mm_and_pd(a, b).as_f64x2();
transmute(simd_select_bitmask(k, and, src.as_f64x2()))
}
#[inline]
#[target_feature(enable = "avx512dq,avx512vl")]
#[cfg_attr(test, assert_instr(vandps))]
// FIXME: should be `vandpd` instruction.
pub unsafe fn _mm_maskz_and_pd(k: __mmask8, a: __m128d, b: __m128d) -> __m128d {
let and = _mm_and_pd(a, b).as_f64x2();
let zero = _mm_setzero_pd().as_f64x2();
transmute(simd_select_bitmask(k, and, zero))
}
#[inline]
#[target_feature(enable = "avx512dq,avx512vl")]
#[cfg_attr(test, assert_instr(vandps))]
// FIXME: should be `vandpd` instruction.
pub unsafe fn _mm256_mask_and_pd(src: __m256d, k: __mmask8, a: __m256d, b: __m256d) -> __m256d {
let and = _mm256_and_pd(a, b).as_f64x4();
transmute(simd_select_bitmask(k, and, src.as_f64x4()))
}
#[inline]
#[target_feature(enable = "avx512dq,avx512vl")]
#[cfg_attr(test, assert_instr(vandps))]
// FIXME: should be `vandpd` instruction.
pub unsafe fn _mm256_maskz_and_pd(k: __mmask8, a: __m256d, b: __m256d) -> __m256d {
let and = _mm256_and_pd(a, b).as_f64x4();
let zero = _mm256_setzero_pd().as_f64x4();
transmute(simd_select_bitmask(k, and, zero))
}
#[inline]
#[target_feature(enable = "avx512dq")]
#[cfg_attr(test, assert_instr(vandps))]
// FIXME: should be `vandpd` instruction.
pub unsafe fn _mm512_and_pd(a: __m512d, b: __m512d) -> __m512d {
transmute(simd_and(transmute::<_, u64x8>(a), transmute::<_, u64x8>(b)))
}
#[inline]
#[target_feature(enable = "avx512dq")]
#[cfg_attr(test, assert_instr(vandps))]
// FIXME: should be `vandpd` instruction.
pub unsafe fn _mm512_mask_and_pd(src: __m512d, k: __mmask8, a: __m512d, b: __m512d) -> __m512d {
let and = _mm512_and_pd(a, b).as_f64x8();
transmute(simd_select_bitmask(k, and, src.as_f64x8()))
}
#[inline]
#[target_feature(enable = "avx512dq")]
#[cfg_attr(test, assert_instr(vandps))]
// FIXME: should be `vandpd` instruction.
pub unsafe fn _mm512_maskz_and_pd(k: __mmask8, a: __m512d, b: __m512d) -> __m512d {
let and = _mm512_and_pd(a, b).as_f64x8();
let zero = _mm512_setzero_pd().as_f64x8();
transmute(simd_select_bitmask(k, and, zero))
}
#[inline]
#[target_feature(enable = "avx512dq,avx512vl")]
#[cfg_attr(test, assert_instr(vandps))]
pub unsafe fn _mm_mask_and_ps(src: __m128, k: __mmask8, a: __m128, b: __m128) -> __m128 {
let and = _mm_and_ps(a, b).as_f32x4();
transmute(simd_select_bitmask(k, and, src.as_f32x4()))
}
#[inline]
#[target_feature(enable = "avx512dq,avx512vl")]
#[cfg_attr(test, assert_instr(vandps))]
pub unsafe fn _mm_maskz_and_ps(k: __mmask8, a: __m128, b: __m128) -> __m128 {
let and = _mm_and_ps(a, b).as_f32x4();
let zero = _mm_setzero_ps().as_f32x4();
transmute(simd_select_bitmask(k, and, zero))
}
#[inline]
#[target_feature(enable = "avx512dq,avx512vl")]
#[cfg_attr(test, assert_instr(vandps))]
pub unsafe fn _mm256_mask_and_ps(src: __m256, k: __mmask8, a: __m256, b: __m256) -> __m256 {
let and = _mm256_and_ps(a, b).as_f32x8();
transmute(simd_select_bitmask(k, and, src.as_f32x8()))
}
#[inline]
#[target_feature(enable = "avx512dq,avx512vl")]
#[cfg_attr(test, assert_instr(vandps))]
pub unsafe fn _mm256_maskz_and_ps(k: __mmask8, a: __m256, b: __m256) -> __m256 {
let and = _mm256_and_ps(a, b).as_f32x8();
let zero = _mm256_setzero_ps().as_f32x8();
transmute(simd_select_bitmask(k, and, zero))
}
#[inline]
#[target_feature(enable = "avx512dq")]
#[cfg_attr(test, assert_instr(vandps))]
pub unsafe fn _mm512_and_ps(a: __m512, b: __m512) -> __m512 {
transmute(simd_and(
transmute::<_, u32x16>(a),
transmute::<_, u32x16>(b),
))
}
#[inline]
#[target_feature(enable = "avx512dq")]
#[cfg_attr(test, assert_instr(vandps))]
pub unsafe fn _mm512_mask_and_ps(src: __m512, k: __mmask8, a: __m512, b: __m512) -> __m512 {
let and = _mm512_and_ps(a, b).as_f32x16();
transmute(simd_select_bitmask(k, and, src.as_f32x16()))
}
#[inline]
#[target_feature(enable = "avx512dq")]
#[cfg_attr(test, assert_instr(vandps))]
pub unsafe fn _mm512_maskz_and_ps(k: __mmask8, a: __m512, b: __m512) -> __m512 {
let and = _mm512_and_ps(a, b).as_f32x16();
let zero = _mm512_setzero_ps().as_f32x16();
transmute(simd_select_bitmask(k, and, zero))
}
// Andnot
#[inline]
#[target_feature(enable = "avx512dq,avx512vl")]
#[cfg_attr(test, assert_instr(vandnps))]
// FIXME: should be `vandnpd` instruction.
pub unsafe fn _mm_mask_andnot_pd(src: __m128d, k: __mmask8, a: __m128d, b: __m128d) -> __m128d {
let andnot = _mm_andnot_pd(a, b).as_f64x2();
transmute(simd_select_bitmask(k, andnot, src.as_f64x2()))
}
#[inline]
#[target_feature(enable = "avx512dq,avx512vl")]
#[cfg_attr(test, assert_instr(vandnps))]
// FIXME: should be `vandnpd` instruction.
pub unsafe fn _mm_maskz_andnot_pd(k: __mmask8, a: __m128d, b: __m128d) -> __m128d {
let andnot = _mm_andnot_pd(a, b).as_f64x2();
let zero = _mm_setzero_pd().as_f64x2();
transmute(simd_select_bitmask(k, andnot, zero))
}
#[inline]
#[target_feature(enable = "avx512dq,avx512vl")]
#[cfg_attr(test, assert_instr(vandnps))]
// FIXME: should be `vandnpd` instruction.
pub unsafe fn _mm256_mask_andnot_pd(src: __m256d, k: __mmask8, a: __m256d, b: __m256d) -> __m256d {
let andnot = _mm256_andnot_pd(a, b).as_f64x4();
transmute(simd_select_bitmask(k, andnot, src.as_f64x4()))
}
#[inline]
#[target_feature(enable = "avx512dq,avx512vl")]
#[cfg_attr(test, assert_instr(vandnps))]
// FIXME: should be `vandnpd` instruction.
pub unsafe fn _mm256_maskz_andnot_pd(k: __mmask8, a: __m256d, b: __m256d) -> __m256d {
let andnot = _mm256_andnot_pd(a, b).as_f64x4();
let zero = _mm256_setzero_pd().as_f64x4();
transmute(simd_select_bitmask(k, andnot, zero))
}
#[inline]
#[target_feature(enable = "avx512dq")]
#[cfg_attr(test, assert_instr(vandnps))]
// FIXME: should be `vandnpd` instruction.
pub unsafe fn _mm512_andnot_pd(a: __m512d, b: __m512d) -> __m512d {
_mm512_and_pd(_mm512_xor_pd(a, transmute(_mm512_set1_epi64(-1))), b)
}
#[inline]
#[target_feature(enable = "avx512dq")]
#[cfg_attr(test, assert_instr(vandnps))]
// FIXME: should be `vandnpd` instruction.
pub unsafe fn _mm512_mask_andnot_pd(src: __m512d, k: __mmask8, a: __m512d, b: __m512d) -> __m512d {
let andnot = _mm512_andnot_pd(a, b).as_f64x8();
transmute(simd_select_bitmask(k, andnot, src.as_f64x8()))
}
#[inline]
#[target_feature(enable = "avx512dq")]
#[cfg_attr(test, assert_instr(vandnps))]
// FIXME: should be `vandnpd` instruction.
pub unsafe fn _mm512_maskz_andnot_pd(k: __mmask8, a: __m512d, b: __m512d) -> __m512d {
let andnot = _mm512_andnot_pd(a, b).as_f64x8();
let zero = _mm512_setzero_pd().as_f64x8();
transmute(simd_select_bitmask(k, andnot, zero))
}
#[inline]
#[target_feature(enable = "avx512dq,avx512vl")]
#[cfg_attr(test, assert_instr(vandnps))]
pub unsafe fn _mm_mask_andnot_ps(src: __m128, k: __mmask8, a: __m128, b: __m128) -> __m128 {
let andnot = _mm_andnot_ps(a, b).as_f32x4();
transmute(simd_select_bitmask(k, andnot, src.as_f32x4()))
}
#[inline]
#[target_feature(enable = "avx512dq,avx512vl")]
#[cfg_attr(test, assert_instr(vandnps))]
pub unsafe fn _mm_maskz_andnot_ps(k: __mmask8, a: __m128, b: __m128) -> __m128 {
let andnot = _mm_andnot_ps(a, b).as_f32x4();
let zero = _mm_setzero_ps().as_f32x4();
transmute(simd_select_bitmask(k, andnot, zero))
}
#[inline]
#[target_feature(enable = "avx512dq,avx512vl")]
#[cfg_attr(test, assert_instr(vandnps))]
pub unsafe fn _mm256_mask_andnot_ps(src: __m256, k: __mmask8, a: __m256, b: __m256) -> __m256 {
let andnot = _mm256_andnot_ps(a, b).as_f32x8();
transmute(simd_select_bitmask(k, andnot, src.as_f32x8()))
}
#[inline]
#[target_feature(enable = "avx512dq,avx512vl")]
#[cfg_attr(test, assert_instr(vandnps))]
pub unsafe fn _mm256_maskz_andnot_ps(k: __mmask8, a: __m256, b: __m256) -> __m256 {
let andnot = _mm256_andnot_ps(a, b).as_f32x8();
let zero = _mm256_setzero_ps().as_f32x8();
transmute(simd_select_bitmask(k, andnot, zero))
}
#[inline]
#[target_feature(enable = "avx512dq")]
#[cfg_attr(test, assert_instr(vandnps))]
pub unsafe fn _mm512_andnot_ps(a: __m512, b: __m512) -> __m512 {
_mm512_and_ps(_mm512_xor_ps(a, transmute(_mm512_set1_epi32(-1))), b)
}
#[inline]
#[target_feature(enable = "avx512dq")]
#[cfg_attr(test, assert_instr(vandnps))]
pub unsafe fn _mm512_mask_andnot_ps(src: __m512, k: __mmask8, a: __m512, b: __m512) -> __m512 {
let andnot = _mm512_andnot_ps(a, b).as_f32x16();
transmute(simd_select_bitmask(k, andnot, src.as_f32x16()))
}
#[inline]
#[target_feature(enable = "avx512dq")]
#[cfg_attr(test, assert_instr(vandnps))]
pub unsafe fn _mm512_maskz_andnot_ps(k: __mmask8, a: __m512, b: __m512) -> __m512 {
let andnot = _mm512_andnot_ps(a, b).as_f32x16();
let zero = _mm512_setzero_ps().as_f32x16();
transmute(simd_select_bitmask(k, andnot, zero))
}
// Or
#[inline]
#[target_feature(enable = "avx512dq,avx512vl")]
#[cfg_attr(test, assert_instr(vorps))]
// FIXME: should be `vorpd` instruction.
pub unsafe fn _mm_mask_or_pd(src: __m128d, k: __mmask8, a: __m128d, b: __m128d) -> __m128d {
let or = _mm_or_pd(a, b).as_f64x2();
transmute(simd_select_bitmask(k, or, src.as_f64x2()))
}
#[inline]
#[target_feature(enable = "avx512dq,avx512vl")]
#[cfg_attr(test, assert_instr(vorps))]
// FIXME: should be `vorpd` instruction.
pub unsafe fn _mm_maskz_or_pd(k: __mmask8, a: __m128d, b: __m128d) -> __m128d {
let or = _mm_or_pd(a, b).as_f64x2();
let zero = _mm_setzero_pd().as_f64x2();
transmute(simd_select_bitmask(k, or, zero))
}
#[inline]
#[target_feature(enable = "avx512dq,avx512vl")]
#[cfg_attr(test, assert_instr(vorps))]
// FIXME: should be `vorpd` instruction.
pub unsafe fn _mm256_mask_or_pd(src: __m256d, k: __mmask8, a: __m256d, b: __m256d) -> __m256d {
let or = _mm256_or_pd(a, b).as_f64x4();
transmute(simd_select_bitmask(k, or, src.as_f64x4()))
}
#[inline]
#[target_feature(enable = "avx512dq,avx512vl")]
#[cfg_attr(test, assert_instr(vorps))]
// FIXME: should be `vorpd` instruction.
pub unsafe fn _mm256_maskz_or_pd(k: __mmask8, a: __m256d, b: __m256d) -> __m256d {
let or = _mm256_or_pd(a, b).as_f64x4();
let zero = _mm256_setzero_pd().as_f64x4();
transmute(simd_select_bitmask(k, or, zero))
}
#[inline]
#[target_feature(enable = "avx512dq")]
#[cfg_attr(test, assert_instr(vorps))]
// FIXME: should be `vorpd` instruction.
pub unsafe fn _mm512_or_pd(a: __m512d, b: __m512d) -> __m512d {
transmute(simd_or(transmute::<_, u64x8>(a), transmute::<_, u64x8>(b)))
}
#[inline]
#[target_feature(enable = "avx512dq")]
#[cfg_attr(test, assert_instr(vorps))]
// FIXME: should be `vorpd` instruction.
pub unsafe fn _mm512_mask_or_pd(src: __m512d, k: __mmask8, a: __m512d, b: __m512d) -> __m512d {
let or = _mm512_or_pd(a, b).as_f64x8();
transmute(simd_select_bitmask(k, or, src.as_f64x8()))
}
#[inline]
#[target_feature(enable = "avx512dq")]
#[cfg_attr(test, assert_instr(vorps))]
// FIXME: should be `vorpd` instruction.
pub unsafe fn _mm512_maskz_or_pd(k: __mmask8, a: __m512d, b: __m512d) -> __m512d {
let or = _mm512_or_pd(a, b).as_f64x8();
let zero = _mm512_setzero_pd().as_f64x8();
transmute(simd_select_bitmask(k, or, zero))
}
#[inline]
#[target_feature(enable = "avx512dq,avx512vl")]
#[cfg_attr(test, assert_instr(vorps))]
pub unsafe fn _mm_mask_or_ps(src: __m128, k: __mmask8, a: __m128, b: __m128) -> __m128 {
let or = _mm_or_ps(a, b).as_f32x4();
transmute(simd_select_bitmask(k, or, src.as_f32x4()))
}
#[inline]
#[target_feature(enable = "avx512dq,avx512vl")]
#[cfg_attr(test, assert_instr(vorps))]
pub unsafe fn _mm_maskz_or_ps(k: __mmask8, a: __m128, b: __m128) -> __m128 {
let or = _mm_or_ps(a, b).as_f32x4();
let zero = _mm_setzero_ps().as_f32x4();
transmute(simd_select_bitmask(k, or, zero))
}
#[inline]
#[target_feature(enable = "avx512dq,avx512vl")]
#[cfg_attr(test, assert_instr(vorps))]
pub unsafe fn _mm256_mask_or_ps(src: __m256, k: __mmask8, a: __m256, b: __m256) -> __m256 {
let or = _mm256_or_ps(a, b).as_f32x8();
transmute(simd_select_bitmask(k, or, src.as_f32x8()))
}
#[inline]
#[target_feature(enable = "avx512dq,avx512vl")]
#[cfg_attr(test, assert_instr(vorps))]
pub unsafe fn _mm256_maskz_or_ps(k: __mmask8, a: __m256, b: __m256) -> __m256 {
let or = _mm256_or_ps(a, b).as_f32x8();
let zero = _mm256_setzero_ps().as_f32x8();
transmute(simd_select_bitmask(k, or, zero))
}
#[inline]
#[target_feature(enable = "avx512dq")]
#[cfg_attr(test, assert_instr(vorps))]
pub unsafe fn _mm512_or_ps(a: __m512, b: __m512) -> __m512 {
transmute(simd_or(
transmute::<_, u32x16>(a),
transmute::<_, u32x16>(b),
))
}
#[inline]
#[target_feature(enable = "avx512dq")]
#[cfg_attr(test, assert_instr(vorps))]
pub unsafe fn _mm512_mask_or_ps(src: __m512, k: __mmask8, a: __m512, b: __m512) -> __m512 {
let or = _mm512_or_ps(a, b).as_f32x16();
transmute(simd_select_bitmask(k, or, src.as_f32x16()))
}
#[inline]
#[target_feature(enable = "avx512dq")]
#[cfg_attr(test, assert_instr(vorps))]
pub unsafe fn _mm512_maskz_or_ps(k: __mmask8, a: __m512, b: __m512) -> __m512 {
let or = _mm512_or_ps(a, b).as_f32x16();
let zero = _mm512_setzero_ps().as_f32x16();
transmute(simd_select_bitmask(k, or, zero))
}
// Xor
#[inline]
#[target_feature(enable = "avx512dq,avx512vl")]
#[cfg_attr(test, assert_instr(vxorps))]
// FIXME: should be `vxorpd` instruction.
pub unsafe fn _mm_mask_xor_pd(src: __m128d, k: __mmask8, a: __m128d, b: __m128d) -> __m128d {
let xor = _mm_xor_pd(a, b).as_f64x2();
transmute(simd_select_bitmask(k, xor, src.as_f64x2()))
}
#[inline]
#[target_feature(enable = "avx512dq,avx512vl")]
#[cfg_attr(test, assert_instr(vxorps))]
// FIXME: should be `vxorpd` instruction.
pub unsafe fn _mm_maskz_xor_pd(k: __mmask8, a: __m128d, b: __m128d) -> __m128d {
let xor = _mm_xor_pd(a, b).as_f64x2();
let zero = _mm_setzero_pd().as_f64x2();
transmute(simd_select_bitmask(k, xor, zero))
}
#[inline]
#[target_feature(enable = "avx512dq,avx512vl")]
#[cfg_attr(test, assert_instr(vxorps))]
// FIXME: should be `vxorpd` instruction.
pub unsafe fn _mm256_mask_xor_pd(src: __m256d, k: __mmask8, a: __m256d, b: __m256d) -> __m256d {
let xor = _mm256_xor_pd(a, b).as_f64x4();
transmute(simd_select_bitmask(k, xor, src.as_f64x4()))
}
#[inline]
#[target_feature(enable = "avx512dq,avx512vl")]
#[cfg_attr(test, assert_instr(vxorps))]
// FIXME: should be `vxorpd` instruction.
pub unsafe fn _mm256_maskz_xor_pd(k: __mmask8, a: __m256d, b: __m256d) -> __m256d {
let xor = _mm256_xor_pd(a, b).as_f64x4();
let zero = _mm256_setzero_pd().as_f64x4();
transmute(simd_select_bitmask(k, xor, zero))
}
#[inline]
#[target_feature(enable = "avx512dq")]
#[cfg_attr(test, assert_instr(vxorps))]
// FIXME: should be `vxorpd` instruction.
pub unsafe fn _mm512_xor_pd(a: __m512d, b: __m512d) -> __m512d {
transmute(simd_xor(transmute::<_, u64x8>(a), transmute::<_, u64x8>(b)))
}
#[inline]
#[target_feature(enable = "avx512dq")]
#[cfg_attr(test, assert_instr(vxorps))]
// FIXME: should be `vxorpd` instruction.
pub unsafe fn _mm512_mask_xor_pd(src: __m512d, k: __mmask8, a: __m512d, b: __m512d) -> __m512d {
let xor = _mm512_xor_pd(a, b).as_f64x8();
transmute(simd_select_bitmask(k, xor, src.as_f64x8()))
}
#[inline]
#[target_feature(enable = "avx512dq")]
#[cfg_attr(test, assert_instr(vxorps))]
// FIXME: should be `vxorpd` instruction.
pub unsafe fn _mm512_maskz_xor_pd(k: __mmask8, a: __m512d, b: __m512d) -> __m512d {
let xor = _mm512_xor_pd(a, b).as_f64x8();
let zero = _mm512_setzero_pd().as_f64x8();
transmute(simd_select_bitmask(k, xor, zero))
}
#[inline]
#[target_feature(enable = "avx512dq,avx512vl")]
#[cfg_attr(test, assert_instr(vxorps))]
pub unsafe fn _mm_mask_xor_ps(src: __m128, k: __mmask8, a: __m128, b: __m128) -> __m128 {
let xor = _mm_xor_ps(a, b).as_f32x4();
transmute(simd_select_bitmask(k, xor, src.as_f32x4()))
}
#[inline]
#[target_feature(enable = "avx512dq,avx512vl")]
#[cfg_attr(test, assert_instr(vxorps))]
pub unsafe fn _mm_maskz_xor_ps(k: __mmask8, a: __m128, b: __m128) -> __m128 {
let xor = _mm_xor_ps(a, b).as_f32x4();
let zero = _mm_setzero_ps().as_f32x4();
transmute(simd_select_bitmask(k, xor, zero))
}
#[inline]
#[target_feature(enable = "avx512dq,avx512vl")]
#[cfg_attr(test, assert_instr(vxorps))]
pub unsafe fn _mm256_mask_xor_ps(src: __m256, k: __mmask8, a: __m256, b: __m256) -> __m256 {
let xor = _mm256_xor_ps(a, b).as_f32x8();
transmute(simd_select_bitmask(k, xor, src.as_f32x8()))
}
#[inline]
#[target_feature(enable = "avx512dq,avx512vl")]
#[cfg_attr(test, assert_instr(vxorps))]
pub unsafe fn _mm256_maskz_xor_ps(k: __mmask8, a: __m256, b: __m256) -> __m256 {
let xor = _mm256_xor_ps(a, b).as_f32x8();
let zero = _mm256_setzero_ps().as_f32x8();
transmute(simd_select_bitmask(k, xor, zero))
}
#[inline]
#[target_feature(enable = "avx512dq")]
#[cfg_attr(test, assert_instr(vxorps))]
pub unsafe fn _mm512_xor_ps(a: __m512, b: __m512) -> __m512 {
transmute(simd_xor(
transmute::<_, u32x16>(a),
transmute::<_, u32x16>(b),
))
}
#[inline]
#[target_feature(enable = "avx512dq")]
#[cfg_attr(test, assert_instr(vxorps))]
pub unsafe fn _mm512_mask_xor_ps(src: __m512, k: __mmask8, a: __m512, b: __m512) -> __m512 {
let xor = _mm512_xor_ps(a, b).as_f32x16();
transmute(simd_select_bitmask(k, xor, src.as_f32x16()))
}
#[inline]
#[target_feature(enable = "avx512dq")]
#[cfg_attr(test, assert_instr(vxorps))]
pub unsafe fn _mm512_maskz_xor_ps(k: __mmask8, a: __m512, b: __m512) -> __m512 {
let xor = _mm512_xor_ps(a, b).as_f32x16();
let zero = _mm512_setzero_ps().as_f32x16();
transmute(simd_select_bitmask(k, xor, zero))
}