From fd6d97c21ee5142565136a93548be199b8dc1969 Mon Sep 17 00:00:00 2001 From: sayantn Date: Sun, 9 Jun 2024 22:06:45 +0530 Subject: [PATCH] AVX512DQ Part 1: Logical Operations (and, andn, or, xor) - tests and doc --- library/stdarch/crates/core_arch/avx512dq.md | 112 +-- .../crates/core_arch/src/x86/avx512dq.rs | 914 ++++++++++++++++-- 2 files changed, 904 insertions(+), 122 deletions(-) diff --git a/library/stdarch/crates/core_arch/avx512dq.md b/library/stdarch/crates/core_arch/avx512dq.md index a51a3d833301..82b66445a7c1 100644 --- a/library/stdarch/crates/core_arch/avx512dq.md +++ b/library/stdarch/crates/core_arch/avx512dq.md @@ -1,71 +1,71 @@ ["AVX512DQ"]

- And: - * [ ] _mm_mask_and_pd - * [ ] _mm_maskz_and_pd - * [ ] _mm_mask_and_ps - * [ ] _mm_maskz_and_ps - * [ ] _mm256_mask_and_pd - * [ ] _mm256_maskz_and_pd - * [ ] _mm256_mask_and_ps - * [ ] _mm256_maskz_and_ps - * [ ] _mm512_and_pd - * [ ] _mm512_mask_and_pd - * [ ] _mm512_maskz_and_pd - * [ ] _mm512_and_ps - * [ ] _mm512_mask_and_ps - * [ ] _mm512_maskz_and_ps + * [x] _mm_mask_and_pd + * [x] _mm_maskz_and_pd + * [x] _mm_mask_and_ps + * [x] _mm_maskz_and_ps + * [x] _mm256_mask_and_pd + * [x] _mm256_maskz_and_pd + * [x] _mm256_mask_and_ps + * [x] _mm256_maskz_and_ps + * [x] _mm512_and_pd + * [x] _mm512_mask_and_pd + * [x] _mm512_maskz_and_pd + * [x] _mm512_and_ps + * [x] _mm512_mask_and_ps + * [x] _mm512_maskz_and_ps - AndNot: - * [ ] _mm_mask_andnot_pd - * [ ] _mm_maskz_andnot_pd - * [ ] _mm_mask_andnot_ps - * [ ] _mm_maskz_andnot_ps - * [ ] _mm256_mask_andnot_pd - * [ ] _mm256_maskz_andnot_pd - * [ ] _mm256_mask_andnot_ps - * [ ] _mm256_maskz_andnot_ps - * [ ] _mm512_andnot_pd - * [ ] _mm512_mask_andnot_pd - * [ ] _mm512_maskz_andnot_pd - * [ ] _mm512_andnot_ps - * [ ] _mm512_mask_andnot_ps - * [ ] _mm512_maskz_andnot_ps + * [x] _mm_mask_andnot_pd + * [x] _mm_maskz_andnot_pd + * [x] _mm_mask_andnot_ps + * [x] _mm_maskz_andnot_ps + * [x] _mm256_mask_andnot_pd + * [x] _mm256_maskz_andnot_pd + * [x] _mm256_mask_andnot_ps + * [x] _mm256_maskz_andnot_ps + * [x] _mm512_andnot_pd + * [x] _mm512_mask_andnot_pd + * [x] _mm512_maskz_andnot_pd + * [x] _mm512_andnot_ps + * [x] _mm512_mask_andnot_ps + * [x] _mm512_maskz_andnot_ps - Or: - * [ ] _mm_mask_or_pd - * [ ] _mm_maskz_or_pd - * [ ] _mm_mask_or_ps - * [ ] _mm_maskz_or_ps - * [ ] _mm256_mask_or_pd - * [ ] _mm256_maskz_or_pd - * [ ] _mm256_mask_or_ps - * [ ] _mm256_maskz_or_ps - * [ ] _mm512_or_pd - * [ ] _mm512_mask_or_pd - * [ ] _mm512_maskz_or_pd - * [ ] _mm512_or_ps - * [ ] _mm512_mask_or_ps - * [ ] _mm512_maskz_or_ps + * [x] _mm_mask_or_pd + * [x] _mm_maskz_or_pd + * [x] _mm_mask_or_ps + * [x] _mm_maskz_or_ps + * [x] _mm256_mask_or_pd + * [x] _mm256_maskz_or_pd + * [x] _mm256_mask_or_ps + * [x] _mm256_maskz_or_ps + * [x] _mm512_or_pd + * [x] _mm512_mask_or_pd + * [x] _mm512_maskz_or_pd + * [x] _mm512_or_ps + * [x] _mm512_mask_or_ps + * [x] _mm512_maskz_or_ps - Xor: - * [ ] _mm_mask_xor_pd - * [ ] _mm_maskz_xor_pd - * [ ] _mm_mask_xor_ps - * [ ] _mm_maskz_xor_ps - * [ ] _mm256_mask_xor_pd - * [ ] _mm256_maskz_xor_pd - * [ ] _mm256_mask_xor_ps - * [ ] _mm256_maskz_xor_ps - * [ ] _mm512_xor_pd - * [ ] _mm512_mask_xor_pd - * [ ] _mm512_maskz_xor_pd - * [ ] _mm512_xor_ps - * [ ] _mm512_mask_xor_ps - * [ ] _mm512_maskz_xor_ps + * [x] _mm_mask_xor_pd + * [x] _mm_maskz_xor_pd + * [x] _mm_mask_xor_ps + * [x] _mm_maskz_xor_ps + * [x] _mm256_mask_xor_pd + * [x] _mm256_maskz_xor_pd + * [x] _mm256_mask_xor_ps + * [x] _mm256_maskz_xor_ps + * [x] _mm512_xor_pd + * [x] _mm512_mask_xor_pd + * [x] _mm512_maskz_xor_pd + * [x] _mm512_xor_ps + * [x] _mm512_mask_xor_ps + * [x] _mm512_maskz_xor_ps - Broadcast diff --git a/library/stdarch/crates/core_arch/src/x86/avx512dq.rs b/library/stdarch/crates/core_arch/src/x86/avx512dq.rs index ba481ad42b11..c4ff97035cfa 100644 --- a/library/stdarch/crates/core_arch/src/x86/avx512dq.rs +++ b/library/stdarch/crates/core_arch/src/x86/avx512dq.rs @@ -1,77 +1,106 @@ use crate::{ core_arch::{simd::*, x86::*}, intrinsics::simd::*, - mem::{self, transmute}, - ptr, + mem::transmute, }; // And // +/// Compute the bitwise AND of packed double-precision (64-bit) floating point numbers in a and b +/// and store the results in dst using writemask k (elements are copied from src if the corresponding +/// bit is not set). +/// +/// [Intel's Documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_mask_and_pd&ig_expand=288) #[inline] #[target_feature(enable = "avx512dq,avx512vl")] -#[cfg_attr(test, assert_instr(vandps))] -// FIXME: should be `vandpd` instruction. +#[cfg_attr(test, assert_instr(vandps))] // FIXME: should be `vandpd` instruction. pub unsafe fn _mm_mask_and_pd(src: __m128d, k: __mmask8, a: __m128d, b: __m128d) -> __m128d { let and = _mm_and_pd(a, b).as_f64x2(); transmute(simd_select_bitmask(k, and, src.as_f64x2())) } +/// Compute the bitwise AND of packed double-precision (64-bit) floating point numbers in a and b and +/// store the results in dst using zeromask k (elements are zeroed out if the corresponding bit is not set). +/// +/// [Intel's Documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_maskz_and_pd&ig_expand=289) #[inline] #[target_feature(enable = "avx512dq,avx512vl")] -#[cfg_attr(test, assert_instr(vandps))] -// FIXME: should be `vandpd` instruction. +#[cfg_attr(test, assert_instr(vandps))] // FIXME: should be `vandpd` instruction. pub unsafe fn _mm_maskz_and_pd(k: __mmask8, a: __m128d, b: __m128d) -> __m128d { let and = _mm_and_pd(a, b).as_f64x2(); let zero = _mm_setzero_pd().as_f64x2(); transmute(simd_select_bitmask(k, and, zero)) } + +/// Compute the bitwise AND of packed double-precision (64-bit) floating point numbers in a and b +/// and store the results in dst using writemask k (elements are copied from src if the corresponding +/// bit is not set). +/// +/// [Intel's Documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_mask_and_pd&ig_expand=291) #[inline] #[target_feature(enable = "avx512dq,avx512vl")] -#[cfg_attr(test, assert_instr(vandps))] -// FIXME: should be `vandpd` instruction. +#[cfg_attr(test, assert_instr(vandps))] // FIXME: should be `vandpd` instruction. pub unsafe fn _mm256_mask_and_pd(src: __m256d, k: __mmask8, a: __m256d, b: __m256d) -> __m256d { let and = _mm256_and_pd(a, b).as_f64x4(); transmute(simd_select_bitmask(k, and, src.as_f64x4())) } +/// Compute the bitwise AND of packed double-precision (64-bit) floating point numbers in a and b and +/// store the results in dst using zeromask k (elements are zeroed out if the corresponding bit is not set). +/// +/// [Intel's Documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_maskz_and_pd&ig_expand=292) #[inline] #[target_feature(enable = "avx512dq,avx512vl")] -#[cfg_attr(test, assert_instr(vandps))] -// FIXME: should be `vandpd` instruction. +#[cfg_attr(test, assert_instr(vandps))] // FIXME: should be `vandpd` instruction. pub unsafe fn _mm256_maskz_and_pd(k: __mmask8, a: __m256d, b: __m256d) -> __m256d { let and = _mm256_and_pd(a, b).as_f64x4(); let zero = _mm256_setzero_pd().as_f64x4(); transmute(simd_select_bitmask(k, and, zero)) } +/// Compute the bitwise AND of packed double-precision (64-bit) floating point numbers in a and b +/// and store the results in dst. +/// +/// [Intel's Documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_mask_and_pd&ig_expand=293) #[inline] #[target_feature(enable = "avx512dq")] -#[cfg_attr(test, assert_instr(vandps))] -// FIXME: should be `vandpd` instruction. +#[cfg_attr(test, assert_instr(vandps))] // FIXME: should be `vandpd` instruction. pub unsafe fn _mm512_and_pd(a: __m512d, b: __m512d) -> __m512d { transmute(simd_and(transmute::<_, u64x8>(a), transmute::<_, u64x8>(b))) } +/// Compute the bitwise AND of packed double-precision (64-bit) floating point numbers in a and b +/// and store the results in dst using writemask k (elements are copied from src if the corresponding +/// bit is not set). +/// +/// [Intel's Documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_mask_and_pd&ig_expand=294) #[inline] #[target_feature(enable = "avx512dq")] -#[cfg_attr(test, assert_instr(vandps))] -// FIXME: should be `vandpd` instruction. +#[cfg_attr(test, assert_instr(vandps))] // FIXME: should be `vandpd` instruction. pub unsafe fn _mm512_mask_and_pd(src: __m512d, k: __mmask8, a: __m512d, b: __m512d) -> __m512d { let and = _mm512_and_pd(a, b).as_f64x8(); transmute(simd_select_bitmask(k, and, src.as_f64x8())) } +/// Compute the bitwise AND of packed double-precision (64-bit) floating point numbers in a and b and +/// store the results in dst using zeromask k (elements are zeroed out if the corresponding bit is not set). +/// +/// [Intel's Documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_maskz_and_pd&ig_expand=295) #[inline] #[target_feature(enable = "avx512dq")] -#[cfg_attr(test, assert_instr(vandps))] -// FIXME: should be `vandpd` instruction. +#[cfg_attr(test, assert_instr(vandps))] // FIXME: should be `vandpd` instruction. pub unsafe fn _mm512_maskz_and_pd(k: __mmask8, a: __m512d, b: __m512d) -> __m512d { let and = _mm512_and_pd(a, b).as_f64x8(); let zero = _mm512_setzero_pd().as_f64x8(); transmute(simd_select_bitmask(k, and, zero)) } +/// Compute the bitwise AND of packed single-precision (32-bit) floating point numbers in a and b +/// and store the results in dst using writemask k (elements are copied from src if the corresponding +/// bit is not set). +/// +/// [Intel's Documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_mask_and_ps&ig_expand=297) #[inline] #[target_feature(enable = "avx512dq,avx512vl")] #[cfg_attr(test, assert_instr(vandps))] @@ -80,6 +109,10 @@ pub unsafe fn _mm_mask_and_ps(src: __m128, k: __mmask8, a: __m128, b: __m128) -> transmute(simd_select_bitmask(k, and, src.as_f32x4())) } +/// Compute the bitwise AND of packed single-precision (32-bit) floating point numbers in a and b and +/// store the results in dst using zeromask k (elements are zeroed out if the corresponding bit is not set). +/// +/// [Intel's Documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_maskz_and_ps&ig_expand=298) #[inline] #[target_feature(enable = "avx512dq,avx512vl")] #[cfg_attr(test, assert_instr(vandps))] @@ -89,6 +122,11 @@ pub unsafe fn _mm_maskz_and_ps(k: __mmask8, a: __m128, b: __m128) -> __m128 { transmute(simd_select_bitmask(k, and, zero)) } +/// Compute the bitwise AND of packed single-precision (32-bit) floating point numbers in a and b +/// and store the results in dst using writemask k (elements are copied from src if the corresponding +/// bit is not set). +/// +/// [Intel's Documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_mask_and_ps&ig_expand=300) #[inline] #[target_feature(enable = "avx512dq,avx512vl")] #[cfg_attr(test, assert_instr(vandps))] @@ -97,6 +135,10 @@ pub unsafe fn _mm256_mask_and_ps(src: __m256, k: __mmask8, a: __m256, b: __m256) transmute(simd_select_bitmask(k, and, src.as_f32x8())) } +/// Compute the bitwise AND of packed single-precision (32-bit) floating point numbers in a and b and +/// store the results in dst using zeromask k (elements are zeroed out if the corresponding bit is not set). +/// +/// [Intel's Documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_maskz_and_ps&ig_expand=301) #[inline] #[target_feature(enable = "avx512dq,avx512vl")] #[cfg_attr(test, assert_instr(vandps))] @@ -106,6 +148,10 @@ pub unsafe fn _mm256_maskz_and_ps(k: __mmask8, a: __m256, b: __m256) -> __m256 { transmute(simd_select_bitmask(k, and, zero)) } +/// Compute the bitwise AND of packed single-precision (32-bit) floating point numbers in a and b +/// and store the results in dst. +/// +/// [Intel's Documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_and_ps&ig_expand=303) #[inline] #[target_feature(enable = "avx512dq")] #[cfg_attr(test, assert_instr(vandps))] @@ -116,18 +162,27 @@ pub unsafe fn _mm512_and_ps(a: __m512, b: __m512) -> __m512 { )) } +/// Compute the bitwise AND of packed single-precision (32-bit) floating point numbers in a and b +/// and store the results in dst using writemask k (elements are copied from src if the corresponding +/// bit is not set). +/// +/// [Intel's Documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_mask_and_ps&ig_expand=304) #[inline] #[target_feature(enable = "avx512dq")] #[cfg_attr(test, assert_instr(vandps))] -pub unsafe fn _mm512_mask_and_ps(src: __m512, k: __mmask8, a: __m512, b: __m512) -> __m512 { +pub unsafe fn _mm512_mask_and_ps(src: __m512, k: __mmask16, a: __m512, b: __m512) -> __m512 { let and = _mm512_and_ps(a, b).as_f32x16(); transmute(simd_select_bitmask(k, and, src.as_f32x16())) } +/// Compute the bitwise AND of packed single-precision (32-bit) floating point numbers in a and b and +/// store the results in dst using zeromask k (elements are zeroed out if the corresponding bit is not set). +/// +/// [Intel's Documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_maskz_and_ps&ig_expand=305) #[inline] #[target_feature(enable = "avx512dq")] #[cfg_attr(test, assert_instr(vandps))] -pub unsafe fn _mm512_maskz_and_ps(k: __mmask8, a: __m512, b: __m512) -> __m512 { +pub unsafe fn _mm512_maskz_and_ps(k: __mmask16, a: __m512, b: __m512) -> __m512 { let and = _mm512_and_ps(a, b).as_f32x16(); let zero = _mm512_setzero_ps().as_f32x16(); transmute(simd_select_bitmask(k, and, zero)) @@ -135,71 +190,103 @@ pub unsafe fn _mm512_maskz_and_ps(k: __mmask8, a: __m512, b: __m512) -> __m512 { // Andnot +/// Compute the bitwise NOT of packed double-precision (64-bit) floating point numbers in a and then +/// bitwise AND with b and store the results in dst using writemask k (elements are copied from src if the +/// corresponding bit is not set). +/// +/// [Intel's Documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_mask_andnot_pd&ig_expand=326) #[inline] #[target_feature(enable = "avx512dq,avx512vl")] -#[cfg_attr(test, assert_instr(vandnps))] -// FIXME: should be `vandnpd` instruction. +#[cfg_attr(test, assert_instr(vandnps))] // FIXME: should be `vandnpd` instruction. pub unsafe fn _mm_mask_andnot_pd(src: __m128d, k: __mmask8, a: __m128d, b: __m128d) -> __m128d { let andnot = _mm_andnot_pd(a, b).as_f64x2(); transmute(simd_select_bitmask(k, andnot, src.as_f64x2())) } +/// Compute the bitwise NOT of packed double-precision (64-bit) floating point numbers in a and then +/// bitwise AND with b and store the results in dst using zeromask k (elements are zeroed out if the +/// corresponding bit is not set). +/// +/// [Intel's Documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_maskz_andnot_pd&ig_expand=327) #[inline] #[target_feature(enable = "avx512dq,avx512vl")] -#[cfg_attr(test, assert_instr(vandnps))] -// FIXME: should be `vandnpd` instruction. +#[cfg_attr(test, assert_instr(vandnps))] // FIXME: should be `vandnpd` instruction. pub unsafe fn _mm_maskz_andnot_pd(k: __mmask8, a: __m128d, b: __m128d) -> __m128d { let andnot = _mm_andnot_pd(a, b).as_f64x2(); let zero = _mm_setzero_pd().as_f64x2(); transmute(simd_select_bitmask(k, andnot, zero)) } +/// Compute the bitwise NOT of packed double-precision (64-bit) floating point numbers in a and then +/// bitwise AND with b and store the results in dst using writemask k (elements are copied from src if the +/// corresponding bit is not set). +/// +/// [Intel's Documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_mask_andnot_pd&ig_expand=329) #[inline] #[target_feature(enable = "avx512dq,avx512vl")] -#[cfg_attr(test, assert_instr(vandnps))] -// FIXME: should be `vandnpd` instruction. +#[cfg_attr(test, assert_instr(vandnps))] // FIXME: should be `vandnpd` instruction. pub unsafe fn _mm256_mask_andnot_pd(src: __m256d, k: __mmask8, a: __m256d, b: __m256d) -> __m256d { let andnot = _mm256_andnot_pd(a, b).as_f64x4(); transmute(simd_select_bitmask(k, andnot, src.as_f64x4())) } +/// Compute the bitwise NOT of packed double-precision (64-bit) floating point numbers in a and then +/// bitwise AND with b and store the results in dst using zeromask k (elements are zeroed out if the +/// corresponding bit is not set). +/// +/// [Intel's Documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_maskz_andnot_pd&ig_expand=330) #[inline] #[target_feature(enable = "avx512dq,avx512vl")] -#[cfg_attr(test, assert_instr(vandnps))] -// FIXME: should be `vandnpd` instruction. +#[cfg_attr(test, assert_instr(vandnps))] // FIXME: should be `vandnpd` instruction. pub unsafe fn _mm256_maskz_andnot_pd(k: __mmask8, a: __m256d, b: __m256d) -> __m256d { let andnot = _mm256_andnot_pd(a, b).as_f64x4(); let zero = _mm256_setzero_pd().as_f64x4(); transmute(simd_select_bitmask(k, andnot, zero)) } +/// Compute the bitwise NOT of packed double-precision (64-bit) floating point numbers in a and then +/// bitwise AND with b and store the results in dst. +/// +/// [Intel's Documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_andnot_pd&ig_expand=331) #[inline] #[target_feature(enable = "avx512dq")] -#[cfg_attr(test, assert_instr(vandnps))] -// FIXME: should be `vandnpd` instruction. +#[cfg_attr(test, assert_instr(vandnps))] // FIXME: should be `vandnpd` instruction. pub unsafe fn _mm512_andnot_pd(a: __m512d, b: __m512d) -> __m512d { _mm512_and_pd(_mm512_xor_pd(a, transmute(_mm512_set1_epi64(-1))), b) } +/// Compute the bitwise NOT of packed double-precision (64-bit) floating point numbers in a and then +/// bitwise AND with b and store the results in dst using writemask k (elements are copied from src if the +/// corresponding bit is not set). +/// +/// [Intel's Documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_mask_andnot_pd&ig_expand=332) #[inline] #[target_feature(enable = "avx512dq")] -#[cfg_attr(test, assert_instr(vandnps))] -// FIXME: should be `vandnpd` instruction. +#[cfg_attr(test, assert_instr(vandnps))] // FIXME: should be `vandnpd` instruction. pub unsafe fn _mm512_mask_andnot_pd(src: __m512d, k: __mmask8, a: __m512d, b: __m512d) -> __m512d { let andnot = _mm512_andnot_pd(a, b).as_f64x8(); transmute(simd_select_bitmask(k, andnot, src.as_f64x8())) } +/// Compute the bitwise NOT of packed double-precision (64-bit) floating point numbers in a and then +/// bitwise AND with b and store the results in dst using zeromask k (elements are zeroed out if the +/// corresponding bit is not set). +/// +/// [Intel's Documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_maskz_andnot_pd&ig_expand=333) #[inline] #[target_feature(enable = "avx512dq")] -#[cfg_attr(test, assert_instr(vandnps))] -// FIXME: should be `vandnpd` instruction. +#[cfg_attr(test, assert_instr(vandnps))] // FIXME: should be `vandnpd` instruction. pub unsafe fn _mm512_maskz_andnot_pd(k: __mmask8, a: __m512d, b: __m512d) -> __m512d { let andnot = _mm512_andnot_pd(a, b).as_f64x8(); let zero = _mm512_setzero_pd().as_f64x8(); transmute(simd_select_bitmask(k, andnot, zero)) } +/// Compute the bitwise NOT of packed single-precision (32-bit) floating point numbers in a and then +/// bitwise AND with b and store the results in dst using writemask k (elements are copied from src if the +/// corresponding bit is not set). +/// +/// [Intel's Documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_mask_andnot_ps&ig_expand=335) #[inline] #[target_feature(enable = "avx512dq,avx512vl")] #[cfg_attr(test, assert_instr(vandnps))] @@ -208,6 +295,11 @@ pub unsafe fn _mm_mask_andnot_ps(src: __m128, k: __mmask8, a: __m128, b: __m128) transmute(simd_select_bitmask(k, andnot, src.as_f32x4())) } +/// Compute the bitwise NOT of packed single-precision (32-bit) floating point numbers in a and then +/// bitwise AND with b and store the results in dst using zeromask k (elements are zeroed out if the +/// corresponding bit is not set). +/// +/// [Intel's Documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_maskz_andnot_ps&ig_expand=336) #[inline] #[target_feature(enable = "avx512dq,avx512vl")] #[cfg_attr(test, assert_instr(vandnps))] @@ -217,6 +309,11 @@ pub unsafe fn _mm_maskz_andnot_ps(k: __mmask8, a: __m128, b: __m128) -> __m128 { transmute(simd_select_bitmask(k, andnot, zero)) } +/// Compute the bitwise NOT of packed single-precision (32-bit) floating point numbers in a and then +/// bitwise AND with b and store the results in dst using writemask k (elements are copied from src if the +/// corresponding bit is not set). +/// +/// [Intel's Documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_mask_andnot_ps&ig_expand=338) #[inline] #[target_feature(enable = "avx512dq,avx512vl")] #[cfg_attr(test, assert_instr(vandnps))] @@ -225,6 +322,11 @@ pub unsafe fn _mm256_mask_andnot_ps(src: __m256, k: __mmask8, a: __m256, b: __m2 transmute(simd_select_bitmask(k, andnot, src.as_f32x8())) } +/// Compute the bitwise NOT of packed single-precision (32-bit) floating point numbers in a and then +/// bitwise AND with b and store the results in dst using zeromask k (elements are zeroed out if the +/// corresponding bit is not set). +/// +/// [Intel's Documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_maskz_andnot_ps&ig_expand=339) #[inline] #[target_feature(enable = "avx512dq,avx512vl")] #[cfg_attr(test, assert_instr(vandnps))] @@ -234,6 +336,10 @@ pub unsafe fn _mm256_maskz_andnot_ps(k: __mmask8, a: __m256, b: __m256) -> __m25 transmute(simd_select_bitmask(k, andnot, zero)) } +/// Compute the bitwise NOT of packed single-precision (32-bit) floating point numbers in a and then +/// bitwise AND with b and store the results in dst. +/// +/// [Intel's Documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_andnot_ps&ig_expand=340) #[inline] #[target_feature(enable = "avx512dq")] #[cfg_attr(test, assert_instr(vandnps))] @@ -241,18 +347,28 @@ pub unsafe fn _mm512_andnot_ps(a: __m512, b: __m512) -> __m512 { _mm512_and_ps(_mm512_xor_ps(a, transmute(_mm512_set1_epi32(-1))), b) } +/// Compute the bitwise NOT of packed single-precision (32-bit) floating point numbers in a and then +/// bitwise AND with b and store the results in dst using writemask k (elements are copied from src if the +/// corresponding bit is not set). +/// +/// [Intel's Documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_mask_andnot_ps&ig_expand=341) #[inline] #[target_feature(enable = "avx512dq")] #[cfg_attr(test, assert_instr(vandnps))] -pub unsafe fn _mm512_mask_andnot_ps(src: __m512, k: __mmask8, a: __m512, b: __m512) -> __m512 { +pub unsafe fn _mm512_mask_andnot_ps(src: __m512, k: __mmask16, a: __m512, b: __m512) -> __m512 { let andnot = _mm512_andnot_ps(a, b).as_f32x16(); transmute(simd_select_bitmask(k, andnot, src.as_f32x16())) } +/// Compute the bitwise NOT of packed single-precision (32-bit) floating point numbers in a and then +/// bitwise AND with b and store the results in dst using zeromask k (elements are zeroed out if the +/// corresponding bit is not set). +/// +/// [Intel's Documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_maskz_andnot_ps&ig_expand=342) #[inline] #[target_feature(enable = "avx512dq")] #[cfg_attr(test, assert_instr(vandnps))] -pub unsafe fn _mm512_maskz_andnot_ps(k: __mmask8, a: __m512, b: __m512) -> __m512 { +pub unsafe fn _mm512_maskz_andnot_ps(k: __mmask16, a: __m512, b: __m512) -> __m512 { let andnot = _mm512_andnot_ps(a, b).as_f32x16(); let zero = _mm512_setzero_ps().as_f32x16(); transmute(simd_select_bitmask(k, andnot, zero)) @@ -260,71 +376,100 @@ pub unsafe fn _mm512_maskz_andnot_ps(k: __mmask8, a: __m512, b: __m512) -> __m51 // Or +/// Compute the bitwise OR of packed double-precision (64-bit) floating point numbers in a and b +/// and store the results in dst using writemask k (elements are copied from src if the corresponding +/// bit is not set). +/// +/// [Intel's Documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_mask_or_pd&ig_expand=4824) #[inline] #[target_feature(enable = "avx512dq,avx512vl")] -#[cfg_attr(test, assert_instr(vorps))] -// FIXME: should be `vorpd` instruction. +#[cfg_attr(test, assert_instr(vorps))] // FIXME: should be `vorpd` instruction. pub unsafe fn _mm_mask_or_pd(src: __m128d, k: __mmask8, a: __m128d, b: __m128d) -> __m128d { let or = _mm_or_pd(a, b).as_f64x2(); transmute(simd_select_bitmask(k, or, src.as_f64x2())) } +/// Compute the bitwise OR of packed double-precision (64-bit) floating point numbers in a and b and +/// store the results in dst using zeromask k (elements are zeroed out if the corresponding bit is not set). +/// +/// [Intel's Documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_maskz_or_pd&ig_expand=4825) #[inline] #[target_feature(enable = "avx512dq,avx512vl")] -#[cfg_attr(test, assert_instr(vorps))] -// FIXME: should be `vorpd` instruction. +#[cfg_attr(test, assert_instr(vorps))] // FIXME: should be `vorpd` instruction. pub unsafe fn _mm_maskz_or_pd(k: __mmask8, a: __m128d, b: __m128d) -> __m128d { let or = _mm_or_pd(a, b).as_f64x2(); let zero = _mm_setzero_pd().as_f64x2(); transmute(simd_select_bitmask(k, or, zero)) } +/// Compute the bitwise OR of packed double-precision (64-bit) floating point numbers in a and b +/// and store the results in dst using writemask k (elements are copied from src if the corresponding +/// bit is not set). +/// +/// [Intel's Documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_mask_or_pd&ig_expand=4827) #[inline] #[target_feature(enable = "avx512dq,avx512vl")] -#[cfg_attr(test, assert_instr(vorps))] -// FIXME: should be `vorpd` instruction. +#[cfg_attr(test, assert_instr(vorps))] // FIXME: should be `vorpd` instruction. pub unsafe fn _mm256_mask_or_pd(src: __m256d, k: __mmask8, a: __m256d, b: __m256d) -> __m256d { let or = _mm256_or_pd(a, b).as_f64x4(); transmute(simd_select_bitmask(k, or, src.as_f64x4())) } +/// Compute the bitwise OR of packed double-precision (64-bit) floating point numbers in a and b and +/// store the results in dst using zeromask k (elements are zeroed out if the corresponding bit is not set). +/// +/// [Intel's Documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_maskz_or_pd&ig_expand=4828) #[inline] #[target_feature(enable = "avx512dq,avx512vl")] -#[cfg_attr(test, assert_instr(vorps))] -// FIXME: should be `vorpd` instruction. +#[cfg_attr(test, assert_instr(vorps))] // FIXME: should be `vorpd` instruction. pub unsafe fn _mm256_maskz_or_pd(k: __mmask8, a: __m256d, b: __m256d) -> __m256d { let or = _mm256_or_pd(a, b).as_f64x4(); let zero = _mm256_setzero_pd().as_f64x4(); transmute(simd_select_bitmask(k, or, zero)) } +/// Compute the bitwise OR of packed double-precision (64-bit) floating point numbers in a and b +/// and store the results in dst. +/// +/// [Intel's Documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_or_pd&ig_expand=4829) #[inline] #[target_feature(enable = "avx512dq")] -#[cfg_attr(test, assert_instr(vorps))] -// FIXME: should be `vorpd` instruction. +#[cfg_attr(test, assert_instr(vorps))] // FIXME: should be `vorpd` instruction. pub unsafe fn _mm512_or_pd(a: __m512d, b: __m512d) -> __m512d { transmute(simd_or(transmute::<_, u64x8>(a), transmute::<_, u64x8>(b))) } +/// Compute the bitwise OR of packed double-precision (64-bit) floating point numbers in a and b and +/// store the results in dst using writemask k (elements are copied from src if the corresponding +/// bit is not set). +/// +/// [Intel's Documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_mask_or_pd&ig_expand=4830) #[inline] #[target_feature(enable = "avx512dq")] -#[cfg_attr(test, assert_instr(vorps))] -// FIXME: should be `vorpd` instruction. +#[cfg_attr(test, assert_instr(vorps))] // FIXME: should be `vorpd` instruction. pub unsafe fn _mm512_mask_or_pd(src: __m512d, k: __mmask8, a: __m512d, b: __m512d) -> __m512d { let or = _mm512_or_pd(a, b).as_f64x8(); transmute(simd_select_bitmask(k, or, src.as_f64x8())) } +/// Compute the bitwise OR of packed double-precision (64-bit) floating point numbers in a and b and +/// store the results in dst using zeromask k (elements are zeroed out if the corresponding bit is not set). +/// +/// [Intel's Documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_maskz_or_pd&ig_expand=4831) #[inline] #[target_feature(enable = "avx512dq")] -#[cfg_attr(test, assert_instr(vorps))] -// FIXME: should be `vorpd` instruction. +#[cfg_attr(test, assert_instr(vorps))] // FIXME: should be `vorpd` instruction. pub unsafe fn _mm512_maskz_or_pd(k: __mmask8, a: __m512d, b: __m512d) -> __m512d { let or = _mm512_or_pd(a, b).as_f64x8(); let zero = _mm512_setzero_pd().as_f64x8(); transmute(simd_select_bitmask(k, or, zero)) } +/// Compute the bitwise OR of packed single-precision (32-bit) floating point numbers in a and b +/// and store the results in dst using writemask k (elements are copied from src if the corresponding +/// bit is not set). +/// +/// [Intel's Documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_mask_or_ps&ig_expand=4833) #[inline] #[target_feature(enable = "avx512dq,avx512vl")] #[cfg_attr(test, assert_instr(vorps))] @@ -333,6 +478,10 @@ pub unsafe fn _mm_mask_or_ps(src: __m128, k: __mmask8, a: __m128, b: __m128) -> transmute(simd_select_bitmask(k, or, src.as_f32x4())) } +/// Compute the bitwise OR of packed single-precision (32-bit) floating point numbers in a and b and +/// store the results in dst using zeromask k (elements are zeroed out if the corresponding bit is not set). +/// +/// [Intel's Documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_maskz_or_ps&ig_expand=4834) #[inline] #[target_feature(enable = "avx512dq,avx512vl")] #[cfg_attr(test, assert_instr(vorps))] @@ -342,6 +491,11 @@ pub unsafe fn _mm_maskz_or_ps(k: __mmask8, a: __m128, b: __m128) -> __m128 { transmute(simd_select_bitmask(k, or, zero)) } +/// Compute the bitwise OR of packed single-precision (32-bit) floating point numbers in a and b +/// and store the results in dst using writemask k (elements are copied from src if the corresponding +/// bit is not set). +/// +/// [Intel's Documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_mask_or_ps&ig_expand=4836) #[inline] #[target_feature(enable = "avx512dq,avx512vl")] #[cfg_attr(test, assert_instr(vorps))] @@ -350,6 +504,10 @@ pub unsafe fn _mm256_mask_or_ps(src: __m256, k: __mmask8, a: __m256, b: __m256) transmute(simd_select_bitmask(k, or, src.as_f32x8())) } +/// Compute the bitwise OR of packed single-precision (32-bit) floating point numbers in a and b and +/// store the results in dst using zeromask k (elements are zeroed out if the corresponding bit is not set). +/// +/// [Intel's Documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_maskz_or_ps&ig_expand=4837) #[inline] #[target_feature(enable = "avx512dq,avx512vl")] #[cfg_attr(test, assert_instr(vorps))] @@ -359,6 +517,10 @@ pub unsafe fn _mm256_maskz_or_ps(k: __mmask8, a: __m256, b: __m256) -> __m256 { transmute(simd_select_bitmask(k, or, zero)) } +/// Compute the bitwise OR of packed single-precision (32-bit) floating point numbers in a and b +/// and store the results in dst. +/// +/// [Intel's Documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_or_ps&ig_expand=4838) #[inline] #[target_feature(enable = "avx512dq")] #[cfg_attr(test, assert_instr(vorps))] @@ -369,18 +531,27 @@ pub unsafe fn _mm512_or_ps(a: __m512, b: __m512) -> __m512 { )) } +/// Compute the bitwise OR of packed single-precision (32-bit) floating point numbers in a and b and +/// store the results in dst using writemask k (elements are copied from src if the corresponding +/// bit is not set). +/// +/// [Intel's Documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_mask_or_ps&ig_expand=4839) #[inline] #[target_feature(enable = "avx512dq")] #[cfg_attr(test, assert_instr(vorps))] -pub unsafe fn _mm512_mask_or_ps(src: __m512, k: __mmask8, a: __m512, b: __m512) -> __m512 { +pub unsafe fn _mm512_mask_or_ps(src: __m512, k: __mmask16, a: __m512, b: __m512) -> __m512 { let or = _mm512_or_ps(a, b).as_f32x16(); transmute(simd_select_bitmask(k, or, src.as_f32x16())) } +/// Compute the bitwise OR of packed single-precision (32-bit) floating point numbers in a and b and +/// store the results in dst using zeromask k (elements are zeroed out if the corresponding bit is not set). +/// +/// [Intel's Documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_maskz_or_ps&ig_expand=4840) #[inline] #[target_feature(enable = "avx512dq")] #[cfg_attr(test, assert_instr(vorps))] -pub unsafe fn _mm512_maskz_or_ps(k: __mmask8, a: __m512, b: __m512) -> __m512 { +pub unsafe fn _mm512_maskz_or_ps(k: __mmask16, a: __m512, b: __m512) -> __m512 { let or = _mm512_or_ps(a, b).as_f32x16(); let zero = _mm512_setzero_ps().as_f32x16(); transmute(simd_select_bitmask(k, or, zero)) @@ -388,71 +559,100 @@ pub unsafe fn _mm512_maskz_or_ps(k: __mmask8, a: __m512, b: __m512) -> __m512 { // Xor +/// Compute the bitwise XOR of packed double-precision (64-bit) floating point numbers in a and b +/// and store the results in dst using writemask k (elements are copied from src if the corresponding +/// bit is not set). +/// +/// [Intel's Documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_mask_xor_pd&ig_expand=7094) #[inline] #[target_feature(enable = "avx512dq,avx512vl")] -#[cfg_attr(test, assert_instr(vxorps))] -// FIXME: should be `vxorpd` instruction. +#[cfg_attr(test, assert_instr(vxorps))] // FIXME: should be `vxorpd` instruction. pub unsafe fn _mm_mask_xor_pd(src: __m128d, k: __mmask8, a: __m128d, b: __m128d) -> __m128d { let xor = _mm_xor_pd(a, b).as_f64x2(); transmute(simd_select_bitmask(k, xor, src.as_f64x2())) } +/// Compute the bitwise XOR of packed double-precision (64-bit) floating point numbers in a and b and +/// store the results in dst using zeromask k (elements are zeroed out if the corresponding bit is not set). +/// +/// [Intel's Documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_maskz_xor_pd&ig_expand=7095) #[inline] #[target_feature(enable = "avx512dq,avx512vl")] -#[cfg_attr(test, assert_instr(vxorps))] -// FIXME: should be `vxorpd` instruction. +#[cfg_attr(test, assert_instr(vxorps))] // FIXME: should be `vxorpd` instruction. pub unsafe fn _mm_maskz_xor_pd(k: __mmask8, a: __m128d, b: __m128d) -> __m128d { let xor = _mm_xor_pd(a, b).as_f64x2(); let zero = _mm_setzero_pd().as_f64x2(); transmute(simd_select_bitmask(k, xor, zero)) } +/// Compute the bitwise XOR of packed double-precision (64-bit) floating point numbers in a and b +/// and store the results in dst using writemask k (elements are copied from src if the corresponding +/// bit is not set). +/// +/// [Intel's Documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_mask_xor_pd&ig_expand=7097) #[inline] #[target_feature(enable = "avx512dq,avx512vl")] -#[cfg_attr(test, assert_instr(vxorps))] -// FIXME: should be `vxorpd` instruction. +#[cfg_attr(test, assert_instr(vxorps))] // FIXME: should be `vxorpd` instruction. pub unsafe fn _mm256_mask_xor_pd(src: __m256d, k: __mmask8, a: __m256d, b: __m256d) -> __m256d { let xor = _mm256_xor_pd(a, b).as_f64x4(); transmute(simd_select_bitmask(k, xor, src.as_f64x4())) } +/// Compute the bitwise XOR of packed double-precision (64-bit) floating point numbers in a and b and +/// store the results in dst using zeromask k (elements are zeroed out if the corresponding bit is not set). +/// +/// [Intel's Documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_maskz_xor_pd&ig_expand=7098) #[inline] #[target_feature(enable = "avx512dq,avx512vl")] -#[cfg_attr(test, assert_instr(vxorps))] -// FIXME: should be `vxorpd` instruction. +#[cfg_attr(test, assert_instr(vxorps))] // FIXME: should be `vxorpd` instruction. pub unsafe fn _mm256_maskz_xor_pd(k: __mmask8, a: __m256d, b: __m256d) -> __m256d { let xor = _mm256_xor_pd(a, b).as_f64x4(); let zero = _mm256_setzero_pd().as_f64x4(); transmute(simd_select_bitmask(k, xor, zero)) } +/// Compute the bitwise XOR of packed double-precision (64-bit) floating point numbers in a and b +/// and store the results in dst. +/// +/// [Intel's Documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_xor_pd&ig_expand=7102) #[inline] #[target_feature(enable = "avx512dq")] -#[cfg_attr(test, assert_instr(vxorps))] -// FIXME: should be `vxorpd` instruction. +#[cfg_attr(test, assert_instr(vxorps))] // FIXME: should be `vxorpd` instruction. pub unsafe fn _mm512_xor_pd(a: __m512d, b: __m512d) -> __m512d { transmute(simd_xor(transmute::<_, u64x8>(a), transmute::<_, u64x8>(b))) } +/// Compute the bitwise XOR of packed double-precision (64-bit) floating point numbers in a and b and +/// store the results in dst using writemask k (elements are copied from src if the corresponding +/// bit is not set). +/// +/// [Intel's Documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_mask_xor_pd&ig_expand=7100) #[inline] #[target_feature(enable = "avx512dq")] -#[cfg_attr(test, assert_instr(vxorps))] -// FIXME: should be `vxorpd` instruction. +#[cfg_attr(test, assert_instr(vxorps))] // FIXME: should be `vxorpd` instruction. pub unsafe fn _mm512_mask_xor_pd(src: __m512d, k: __mmask8, a: __m512d, b: __m512d) -> __m512d { let xor = _mm512_xor_pd(a, b).as_f64x8(); transmute(simd_select_bitmask(k, xor, src.as_f64x8())) } +/// Compute the bitwise XOR of packed double-precision (64-bit) floating point numbers in a and b and +/// store the results in dst using zeromask k (elements are zeroed out if the corresponding bit is not set). +/// +/// [Intel's Documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_maskz_xor_pd&ig_expand=7101) #[inline] #[target_feature(enable = "avx512dq")] -#[cfg_attr(test, assert_instr(vxorps))] -// FIXME: should be `vxorpd` instruction. +#[cfg_attr(test, assert_instr(vxorps))] // FIXME: should be `vxorpd` instruction. pub unsafe fn _mm512_maskz_xor_pd(k: __mmask8, a: __m512d, b: __m512d) -> __m512d { let xor = _mm512_xor_pd(a, b).as_f64x8(); let zero = _mm512_setzero_pd().as_f64x8(); transmute(simd_select_bitmask(k, xor, zero)) } +/// Compute the bitwise XOR of packed single-precision (32-bit) floating point numbers in a and b +/// and store the results in dst using writemask k (elements are copied from src if the corresponding +/// bit is not set). +/// +/// [Intel's Documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_mask_xor_ps&ig_expand=7103) #[inline] #[target_feature(enable = "avx512dq,avx512vl")] #[cfg_attr(test, assert_instr(vxorps))] @@ -461,6 +661,10 @@ pub unsafe fn _mm_mask_xor_ps(src: __m128, k: __mmask8, a: __m128, b: __m128) -> transmute(simd_select_bitmask(k, xor, src.as_f32x4())) } +/// Compute the bitwise XOR of packed single-precision (32-bit) floating point numbers in a and b and +/// store the results in dst using zeromask k (elements are zeroed out if the corresponding bit is not set). +/// +/// [Intel's Documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_maskz_xor_ps&ig_expand=7104) #[inline] #[target_feature(enable = "avx512dq,avx512vl")] #[cfg_attr(test, assert_instr(vxorps))] @@ -470,6 +674,11 @@ pub unsafe fn _mm_maskz_xor_ps(k: __mmask8, a: __m128, b: __m128) -> __m128 { transmute(simd_select_bitmask(k, xor, zero)) } +/// Compute the bitwise XOR of packed single-precision (32-bit) floating point numbers in a and b +/// and store the results in dst using writemask k (elements are copied from src if the corresponding +/// bit is not set). +/// +/// [Intel's Documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_mask_xor_ps&ig_expand=7106) #[inline] #[target_feature(enable = "avx512dq,avx512vl")] #[cfg_attr(test, assert_instr(vxorps))] @@ -478,6 +687,10 @@ pub unsafe fn _mm256_mask_xor_ps(src: __m256, k: __mmask8, a: __m256, b: __m256) transmute(simd_select_bitmask(k, xor, src.as_f32x8())) } +/// Compute the bitwise XOR of packed single-precision (32-bit) floating point numbers in a and b and +/// store the results in dst using zeromask k (elements are zeroed out if the corresponding bit is not set). +/// +/// [Intel's Documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_maskz_xor_ps&ig_expand=7107) #[inline] #[target_feature(enable = "avx512dq,avx512vl")] #[cfg_attr(test, assert_instr(vxorps))] @@ -487,6 +700,10 @@ pub unsafe fn _mm256_maskz_xor_ps(k: __mmask8, a: __m256, b: __m256) -> __m256 { transmute(simd_select_bitmask(k, xor, zero)) } +/// Compute the bitwise XOR of packed single-precision (32-bit) floating point numbers in a and b +/// and store the results in dst. +/// +/// [Intel's Documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_xor_ps&ig_expand=7111) #[inline] #[target_feature(enable = "avx512dq")] #[cfg_attr(test, assert_instr(vxorps))] @@ -497,19 +714,584 @@ pub unsafe fn _mm512_xor_ps(a: __m512, b: __m512) -> __m512 { )) } +/// Compute the bitwise XOR of packed single-precision (32-bit) floating point numbers in a and b and +/// store the results in dst using writemask k (elements are copied from src if the corresponding +/// bit is not set). +/// +/// [Intel's Documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_mask_xor_ps&ig_expand=7109) #[inline] #[target_feature(enable = "avx512dq")] #[cfg_attr(test, assert_instr(vxorps))] -pub unsafe fn _mm512_mask_xor_ps(src: __m512, k: __mmask8, a: __m512, b: __m512) -> __m512 { +pub unsafe fn _mm512_mask_xor_ps(src: __m512, k: __mmask16, a: __m512, b: __m512) -> __m512 { let xor = _mm512_xor_ps(a, b).as_f32x16(); transmute(simd_select_bitmask(k, xor, src.as_f32x16())) } +/// Compute the bitwise XOR of packed single-precision (32-bit) floating point numbers in a and b and +/// store the results in dst using zeromask k (elements are zeroed out if the corresponding bit is not set). +/// +/// [Intel's Documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_maskz_xor_ps&ig_expand=7110) #[inline] #[target_feature(enable = "avx512dq")] #[cfg_attr(test, assert_instr(vxorps))] -pub unsafe fn _mm512_maskz_xor_ps(k: __mmask8, a: __m512, b: __m512) -> __m512 { +pub unsafe fn _mm512_maskz_xor_ps(k: __mmask16, a: __m512, b: __m512) -> __m512 { let xor = _mm512_xor_ps(a, b).as_f32x16(); let zero = _mm512_setzero_ps().as_f32x16(); transmute(simd_select_bitmask(k, xor, zero)) } + +#[cfg(test)] +mod tests { + use super::*; + + use stdarch_test::simd_test; + + use crate::core_arch::x86::*; + use crate::core_arch::x86_64::*; + + const OPRND1_64: f64 = f64::from_bits(0x3333333333333333); + const OPRND2_64: f64 = f64::from_bits(0x5555555555555555); + + const AND_64: f64 = f64::from_bits(0x1111111111111111); + const ANDN_64: f64 = f64::from_bits(0x4444444444444444); + const OR_64: f64 = f64::from_bits(0x7777777777777777); + const XOR_64: f64 = f64::from_bits(0x6666666666666666); + + const OPRND1_32: f32 = f32::from_bits(0x33333333); + const OPRND2_32: f32 = f32::from_bits(0x55555555); + + const AND_32: f32 = f32::from_bits(0x11111111); + const ANDN_32: f32 = f32::from_bits(0x44444444); + const OR_32: f32 = f32::from_bits(0x77777777); + const XOR_32: f32 = f32::from_bits(0x66666666); + + #[simd_test(enable = "avx512dq,avx512vl")] + unsafe fn test_mm_mask_and_pd() { + let a = _mm_set1_pd(OPRND1_64); + let b = _mm_set1_pd(OPRND2_64); + let src = _mm_set_pd(1., 2.); + let r = _mm_mask_and_pd(src, 0b01, a, b); + let e = _mm_set_pd(1., AND_64); + assert_eq_m128d(r, e); + } + + #[simd_test(enable = "avx512dq,avx512vl")] + unsafe fn test_mm_maskz_and_pd() { + let a = _mm_set1_pd(OPRND1_64); + let b = _mm_set1_pd(OPRND2_64); + let r = _mm_maskz_and_pd(0b01, a, b); + let e = _mm_set_pd(0.0, AND_64); + assert_eq_m128d(r, e); + } + + #[simd_test(enable = "avx512dq,avx512vl")] + unsafe fn test_mm256_mask_and_pd() { + let a = _mm256_set1_pd(OPRND1_64); + let b = _mm256_set1_pd(OPRND2_64); + let src = _mm256_set_pd(1., 2., 3., 4.); + let r = _mm256_mask_and_pd(src, 0b0101, a, b); + let e = _mm256_set_pd(1., AND_64, 3., AND_64); + assert_eq_m256d(r, e); + } + + #[simd_test(enable = "avx512dq,avx512vl")] + unsafe fn test_mm256_maskz_and_pd() { + let a = _mm256_set1_pd(OPRND1_64); + let b = _mm256_set1_pd(OPRND2_64); + let r = _mm256_maskz_and_pd(0b0101, a, b); + let e = _mm256_set_pd(0.0, AND_64, 0.0, AND_64); + assert_eq_m256d(r, e); + } + + #[simd_test(enable = "avx512dq")] + unsafe fn test_mm512_and_pd() { + let a = _mm512_set1_pd(OPRND1_64); + let b = _mm512_set1_pd(OPRND2_64); + let r = _mm512_and_pd(a, b); + let e = _mm512_set1_pd(AND_64); + assert_eq_m512d(r, e); + } + + #[simd_test(enable = "avx512dq")] + unsafe fn test_mm512_mask_and_pd() { + let a = _mm512_set1_pd(OPRND1_64); + let b = _mm512_set1_pd(OPRND2_64); + let src = _mm512_set_pd(1., 2., 3., 4., 5., 6., 7., 8.); + let r = _mm512_mask_and_pd(src, 0b01010101, a, b); + let e = _mm512_set_pd(1., AND_64, 3., AND_64, 5., AND_64, 7., AND_64); + assert_eq_m512d(r, e); + } + + #[simd_test(enable = "avx512dq")] + unsafe fn test_mm512_maskz_and_pd() { + let a = _mm512_set1_pd(OPRND1_64); + let b = _mm512_set1_pd(OPRND2_64); + let r = _mm512_maskz_and_pd(0b01010101, a, b); + let e = _mm512_set_pd(0.0, AND_64, 0.0, AND_64, 0.0, AND_64, 0.0, AND_64); + assert_eq_m512d(r, e); + } + + #[simd_test(enable = "avx512dq,avx512vl")] + unsafe fn test_mm_mask_and_ps() { + let a = _mm_set1_ps(OPRND1_32); + let b = _mm_set1_ps(OPRND2_32); + let src = _mm_set_ps(1., 2., 3., 4.); + let r = _mm_mask_and_ps(src, 0b0101, a, b); + let e = _mm_set_ps(1., AND_32, 3., AND_32); + assert_eq_m128(r, e); + } + + #[simd_test(enable = "avx512dq,avx512vl")] + unsafe fn test_mm_maskz_and_ps() { + let a = _mm_set1_ps(OPRND1_32); + let b = _mm_set1_ps(OPRND2_32); + let r = _mm_maskz_and_ps(0b0101, a, b); + let e = _mm_set_ps(0.0, AND_32, 0.0, AND_32); + assert_eq_m128(r, e); + } + + #[simd_test(enable = "avx512dq,avx512vl")] + unsafe fn test_mm256_mask_and_ps() { + let a = _mm256_set1_ps(OPRND1_32); + let b = _mm256_set1_ps(OPRND2_32); + let src = _mm256_set_ps(1., 2., 3., 4., 5., 6., 7., 8.); + let r = _mm256_mask_and_ps(src, 0b01010101, a, b); + let e = _mm256_set_ps(1., AND_32, 3., AND_32, 5., AND_32, 7., AND_32); + assert_eq_m256(r, e); + } + + #[simd_test(enable = "avx512dq,avx512vl")] + unsafe fn test_mm256_maskz_and_ps() { + let a = _mm256_set1_ps(OPRND1_32); + let b = _mm256_set1_ps(OPRND2_32); + let r = _mm256_maskz_and_ps(0b01010101, a, b); + let e = _mm256_set_ps(0.0, AND_32, 0.0, AND_32, 0.0, AND_32, 0.0, AND_32); + assert_eq_m256(r, e); + } + + #[simd_test(enable = "avx512dq")] + unsafe fn test_mm512_and_ps() { + let a = _mm512_set1_ps(OPRND1_32); + let b = _mm512_set1_ps(OPRND2_32); + let r = _mm512_and_ps(a, b); + let e = _mm512_set1_ps(AND_32); + assert_eq_m512(r, e); + } + + #[simd_test(enable = "avx512dq")] + unsafe fn test_mm512_mask_and_ps() { + let a = _mm512_set1_ps(OPRND1_32); + let b = _mm512_set1_ps(OPRND2_32); + let src = _mm512_set_ps(1., 2., 3., 4., 5., 6., 7., 8., 9., 10., 11., 12., 13., 14., 15., 16.); + let r = _mm512_mask_and_ps(src, 0b0101010101010101, a, b); + let e = _mm512_set_ps(1., AND_32, 3., AND_32, 5., AND_32, 7., AND_32, 9., AND_32, 11., AND_32, 13., AND_32, 15., AND_32); + assert_eq_m512(r, e); + } + + #[simd_test(enable = "avx512dq")] + unsafe fn test_mm512_maskz_and_ps() { + let a = _mm512_set1_ps(OPRND1_32); + let b = _mm512_set1_ps(OPRND2_32); + let r = _mm512_maskz_and_ps(0b0101010101010101, a, b); + let e = _mm512_set_ps(0., AND_32, 0., AND_32, 0., AND_32, 0., AND_32, 0., AND_32, 0., AND_32, 0., AND_32, 0., AND_32); + assert_eq_m512(r, e); + } + + #[simd_test(enable = "avx512dq,avx512vl")] + unsafe fn test_mm_mask_andnot_pd() { + let a = _mm_set1_pd(OPRND1_64); + let b = _mm_set1_pd(OPRND2_64); + let src = _mm_set_pd(1., 2.); + let r = _mm_mask_andnot_pd(src, 0b01, a, b); + let e = _mm_set_pd(1., ANDN_64); + assert_eq_m128d(r, e); + } + + #[simd_test(enable = "avx512dq,avx512vl")] + unsafe fn test_mm_maskz_andnot_pd() { + let a = _mm_set1_pd(OPRND1_64); + let b = _mm_set1_pd(OPRND2_64); + let r = _mm_maskz_andnot_pd(0b01, a, b); + let e = _mm_set_pd(0.0, ANDN_64); + assert_eq_m128d(r, e); + } + + #[simd_test(enable = "avx512dq,avx512vl")] + unsafe fn test_mm256_mask_andnot_pd() { + let a = _mm256_set1_pd(OPRND1_64); + let b = _mm256_set1_pd(OPRND2_64); + let src = _mm256_set_pd(1., 2., 3., 4.); + let r = _mm256_mask_andnot_pd(src, 0b0101, a, b); + let e = _mm256_set_pd(1., ANDN_64, 3., ANDN_64); + assert_eq_m256d(r, e); + } + + #[simd_test(enable = "avx512dq,avx512vl")] + unsafe fn test_mm256_maskz_andnot_pd() { + let a = _mm256_set1_pd(OPRND1_64); + let b = _mm256_set1_pd(OPRND2_64); + let r = _mm256_maskz_andnot_pd(0b0101, a, b); + let e = _mm256_set_pd(0.0, ANDN_64, 0.0, ANDN_64); + assert_eq_m256d(r, e); + } + + #[simd_test(enable = "avx512dq")] + unsafe fn test_mm512_andnot_pd() { + let a = _mm512_set1_pd(OPRND1_64); + let b = _mm512_set1_pd(OPRND2_64); + let r = _mm512_andnot_pd(a, b); + let e = _mm512_set1_pd(ANDN_64); + assert_eq_m512d(r, e); + } + + #[simd_test(enable = "avx512dq")] + unsafe fn test_mm512_mask_andnot_pd() { + let a = _mm512_set1_pd(OPRND1_64); + let b = _mm512_set1_pd(OPRND2_64); + let src = _mm512_set_pd(1., 2., 3., 4., 5., 6., 7., 8.); + let r = _mm512_mask_andnot_pd(src, 0b01010101, a, b); + let e = _mm512_set_pd(1., ANDN_64, 3., ANDN_64, 5., ANDN_64, 7., ANDN_64); + assert_eq_m512d(r, e); + } + + #[simd_test(enable = "avx512dq")] + unsafe fn test_mm512_maskz_andnot_pd() { + let a = _mm512_set1_pd(OPRND1_64); + let b = _mm512_set1_pd(OPRND2_64); + let r = _mm512_maskz_andnot_pd(0b01010101, a, b); + let e = _mm512_set_pd(0.0, ANDN_64, 0.0, ANDN_64, 0.0, ANDN_64, 0.0, ANDN_64); + assert_eq_m512d(r, e); + } + + #[simd_test(enable = "avx512dq,avx512vl")] + unsafe fn test_mm_mask_andnot_ps() { + let a = _mm_set1_ps(OPRND1_32); + let b = _mm_set1_ps(OPRND2_32); + let src = _mm_set_ps(1., 2., 3., 4.); + let r = _mm_mask_andnot_ps(src, 0b0101, a, b); + let e = _mm_set_ps(1., ANDN_32, 3., ANDN_32); + assert_eq_m128(r, e); + } + + #[simd_test(enable = "avx512dq,avx512vl")] + unsafe fn test_mm_maskz_andnot_ps() { + let a = _mm_set1_ps(OPRND1_32); + let b = _mm_set1_ps(OPRND2_32); + let r = _mm_maskz_andnot_ps(0b0101, a, b); + let e = _mm_set_ps(0.0, ANDN_32, 0.0, ANDN_32); + assert_eq_m128(r, e); + } + + #[simd_test(enable = "avx512dq,avx512vl")] + unsafe fn test_mm256_mask_andnot_ps() { + let a = _mm256_set1_ps(OPRND1_32); + let b = _mm256_set1_ps(OPRND2_32); + let src = _mm256_set_ps(1., 2., 3., 4., 5., 6., 7., 8.); + let r = _mm256_mask_andnot_ps(src, 0b01010101, a, b); + let e = _mm256_set_ps(1., ANDN_32, 3., ANDN_32, 5., ANDN_32, 7., ANDN_32); + assert_eq_m256(r, e); + } + + #[simd_test(enable = "avx512dq,avx512vl")] + unsafe fn test_mm256_maskz_andnot_ps() { + let a = _mm256_set1_ps(OPRND1_32); + let b = _mm256_set1_ps(OPRND2_32); + let r = _mm256_maskz_andnot_ps(0b01010101, a, b); + let e = _mm256_set_ps(0.0, ANDN_32, 0.0, ANDN_32, 0.0, ANDN_32, 0.0, ANDN_32); + assert_eq_m256(r, e); + } + + #[simd_test(enable = "avx512dq")] + unsafe fn test_mm512_andnot_ps() { + let a = _mm512_set1_ps(OPRND1_32); + let b = _mm512_set1_ps(OPRND2_32); + let r = _mm512_andnot_ps(a, b); + let e = _mm512_set1_ps(ANDN_32); + assert_eq_m512(r, e); + } + + #[simd_test(enable = "avx512dq")] + unsafe fn test_mm512_mask_andnot_ps() { + let a = _mm512_set1_ps(OPRND1_32); + let b = _mm512_set1_ps(OPRND2_32); + let src = _mm512_set_ps(1., 2., 3., 4., 5., 6., 7., 8., 9., 10., 11., 12., 13., 14., 15., 16.); + let r = _mm512_mask_andnot_ps(src, 0b0101010101010101, a, b); + let e = _mm512_set_ps(1., ANDN_32, 3., ANDN_32, 5., ANDN_32, 7., ANDN_32, 9., ANDN_32, 11., ANDN_32, 13., ANDN_32, 15., ANDN_32); + assert_eq_m512(r, e); + } + + #[simd_test(enable = "avx512dq")] + unsafe fn test_mm512_maskz_andnot_ps() { + let a = _mm512_set1_ps(OPRND1_32); + let b = _mm512_set1_ps(OPRND2_32); + let r = _mm512_maskz_andnot_ps(0b0101010101010101, a, b); + let e = _mm512_set_ps(0., ANDN_32, 0., ANDN_32, 0., ANDN_32, 0., ANDN_32, 0., ANDN_32, 0., ANDN_32, 0., ANDN_32, 0., ANDN_32); + assert_eq_m512(r, e); + } + + #[simd_test(enable = "avx512dq,avx512vl")] + unsafe fn test_mm_mask_or_pd() { + let a = _mm_set1_pd(OPRND1_64); + let b = _mm_set1_pd(OPRND2_64); + let src = _mm_set_pd(1., 2.); + let r = _mm_mask_or_pd(src, 0b01, a, b); + let e = _mm_set_pd(1., OR_64); + assert_eq_m128d(r, e); + } + + #[simd_test(enable = "avx512dq,avx512vl")] + unsafe fn test_mm_maskz_or_pd() { + let a = _mm_set1_pd(OPRND1_64); + let b = _mm_set1_pd(OPRND2_64); + let r = _mm_maskz_or_pd(0b01, a, b); + let e = _mm_set_pd(0.0, OR_64); + assert_eq_m128d(r, e); + } + + #[simd_test(enable = "avx512dq,avx512vl")] + unsafe fn test_mm256_mask_or_pd() { + let a = _mm256_set1_pd(OPRND1_64); + let b = _mm256_set1_pd(OPRND2_64); + let src = _mm256_set_pd(1., 2., 3., 4.); + let r = _mm256_mask_or_pd(src, 0b0101, a, b); + let e = _mm256_set_pd(1., OR_64, 3., OR_64); + assert_eq_m256d(r, e); + } + + #[simd_test(enable = "avx512dq,avx512vl")] + unsafe fn test_mm256_maskz_or_pd() { + let a = _mm256_set1_pd(OPRND1_64); + let b = _mm256_set1_pd(OPRND2_64); + let r = _mm256_maskz_or_pd(0b0101, a, b); + let e = _mm256_set_pd(0.0, OR_64, 0.0, OR_64); + assert_eq_m256d(r, e); + } + + #[simd_test(enable = "avx512dq")] + unsafe fn test_mm512_or_pd() { + let a = _mm512_set1_pd(OPRND1_64); + let b = _mm512_set1_pd(OPRND2_64); + let r = _mm512_or_pd(a, b); + let e = _mm512_set1_pd(OR_64); + assert_eq_m512d(r, e); + } + + #[simd_test(enable = "avx512dq")] + unsafe fn test_mm512_mask_or_pd() { + let a = _mm512_set1_pd(OPRND1_64); + let b = _mm512_set1_pd(OPRND2_64); + let src = _mm512_set_pd(1., 2., 3., 4., 5., 6., 7., 8.); + let r = _mm512_mask_or_pd(src, 0b01010101, a, b); + let e = _mm512_set_pd(1., OR_64, 3., OR_64, 5., OR_64, 7., OR_64); + assert_eq_m512d(r, e); + } + + #[simd_test(enable = "avx512dq")] + unsafe fn test_mm512_maskz_or_pd() { + let a = _mm512_set1_pd(OPRND1_64); + let b = _mm512_set1_pd(OPRND2_64); + let r = _mm512_maskz_or_pd(0b01010101, a, b); + let e = _mm512_set_pd(0.0, OR_64, 0.0, OR_64, 0.0, OR_64, 0.0, OR_64); + assert_eq_m512d(r, e); + } + + #[simd_test(enable = "avx512dq,avx512vl")] + unsafe fn test_mm_mask_or_ps() { + let a = _mm_set1_ps(OPRND1_32); + let b = _mm_set1_ps(OPRND2_32); + let src = _mm_set_ps(1., 2., 3., 4.); + let r = _mm_mask_or_ps(src, 0b0101, a, b); + let e = _mm_set_ps(1., OR_32, 3., OR_32); + assert_eq_m128(r, e); + } + + #[simd_test(enable = "avx512dq,avx512vl")] + unsafe fn test_mm_maskz_or_ps() { + let a = _mm_set1_ps(OPRND1_32); + let b = _mm_set1_ps(OPRND2_32); + let r = _mm_maskz_or_ps(0b0101, a, b); + let e = _mm_set_ps(0.0, OR_32, 0.0, OR_32); + assert_eq_m128(r, e); + } + + #[simd_test(enable = "avx512dq,avx512vl")] + unsafe fn test_mm256_mask_or_ps() { + let a = _mm256_set1_ps(OPRND1_32); + let b = _mm256_set1_ps(OPRND2_32); + let src = _mm256_set_ps(1., 2., 3., 4., 5., 6., 7., 8.); + let r = _mm256_mask_or_ps(src, 0b01010101, a, b); + let e = _mm256_set_ps(1., OR_32, 3., OR_32, 5., OR_32, 7., OR_32); + assert_eq_m256(r, e); + } + + #[simd_test(enable = "avx512dq,avx512vl")] + unsafe fn test_mm256_maskz_or_ps() { + let a = _mm256_set1_ps(OPRND1_32); + let b = _mm256_set1_ps(OPRND2_32); + let r = _mm256_maskz_or_ps(0b01010101, a, b); + let e = _mm256_set_ps(0.0, OR_32, 0.0, OR_32, 0.0, OR_32, 0.0, OR_32); + assert_eq_m256(r, e); + } + + #[simd_test(enable = "avx512dq")] + unsafe fn test_mm512_or_ps() { + let a = _mm512_set1_ps(OPRND1_32); + let b = _mm512_set1_ps(OPRND2_32); + let r = _mm512_or_ps(a, b); + let e = _mm512_set1_ps(OR_32); + assert_eq_m512(r, e); + } + + #[simd_test(enable = "avx512dq")] + unsafe fn test_mm512_mask_or_ps() { + let a = _mm512_set1_ps(OPRND1_32); + let b = _mm512_set1_ps(OPRND2_32); + let src = _mm512_set_ps(1., 2., 3., 4., 5., 6., 7., 8., 9., 10., 11., 12., 13., 14., 15., 16.); + let r = _mm512_mask_or_ps(src, 0b0101010101010101, a, b); + let e = _mm512_set_ps(1., OR_32, 3., OR_32, 5., OR_32, 7., OR_32, 9., OR_32, 11., OR_32, 13., OR_32, 15., OR_32); + assert_eq_m512(r, e); + } + + #[simd_test(enable = "avx512dq")] + unsafe fn test_mm512_maskz_or_ps() { + let a = _mm512_set1_ps(OPRND1_32); + let b = _mm512_set1_ps(OPRND2_32); + let r = _mm512_maskz_or_ps(0b0101010101010101, a, b); + let e = _mm512_set_ps(0., OR_32, 0., OR_32, 0., OR_32, 0., OR_32, 0., OR_32, 0., OR_32, 0., OR_32, 0., OR_32); + assert_eq_m512(r, e); + } + + #[simd_test(enable = "avx512dq,avx512vl")] + unsafe fn test_mm_mask_xor_pd() { + let a = _mm_set1_pd(OPRND1_64); + let b = _mm_set1_pd(OPRND2_64); + let src = _mm_set_pd(1., 2.); + let r = _mm_mask_xor_pd(src, 0b01, a, b); + let e = _mm_set_pd(1., XOR_64); + assert_eq_m128d(r, e); + } + + #[simd_test(enable = "avx512dq,avx512vl")] + unsafe fn test_mm_maskz_xor_pd() { + let a = _mm_set1_pd(OPRND1_64); + let b = _mm_set1_pd(OPRND2_64); + let r = _mm_maskz_xor_pd(0b01, a, b); + let e = _mm_set_pd(0.0, XOR_64); + assert_eq_m128d(r, e); + } + + #[simd_test(enable = "avx512dq,avx512vl")] + unsafe fn test_mm256_mask_xor_pd() { + let a = _mm256_set1_pd(OPRND1_64); + let b = _mm256_set1_pd(OPRND2_64); + let src = _mm256_set_pd(1., 2., 3., 4.); + let r = _mm256_mask_xor_pd(src, 0b0101, a, b); + let e = _mm256_set_pd(1., XOR_64, 3., XOR_64); + assert_eq_m256d(r, e); + } + + #[simd_test(enable = "avx512dq,avx512vl")] + unsafe fn test_mm256_maskz_xor_pd() { + let a = _mm256_set1_pd(OPRND1_64); + let b = _mm256_set1_pd(OPRND2_64); + let r = _mm256_maskz_xor_pd(0b0101, a, b); + let e = _mm256_set_pd(0.0, XOR_64, 0.0, XOR_64); + assert_eq_m256d(r, e); + } + + #[simd_test(enable = "avx512dq")] + unsafe fn test_mm512_xor_pd() { + let a = _mm512_set1_pd(OPRND1_64); + let b = _mm512_set1_pd(OPRND2_64); + let r = _mm512_xor_pd(a, b); + let e = _mm512_set1_pd(XOR_64); + assert_eq_m512d(r, e); + } + + #[simd_test(enable = "avx512dq")] + unsafe fn test_mm512_mask_xor_pd() { + let a = _mm512_set1_pd(OPRND1_64); + let b = _mm512_set1_pd(OPRND2_64); + let src = _mm512_set_pd(1., 2., 3., 4., 5., 6., 7., 8.); + let r = _mm512_mask_xor_pd(src, 0b01010101, a, b); + let e = _mm512_set_pd(1., XOR_64, 3., XOR_64, 5., XOR_64, 7., XOR_64); + assert_eq_m512d(r, e); + } + + #[simd_test(enable = "avx512dq")] + unsafe fn test_mm512_maskz_xor_pd() { + let a = _mm512_set1_pd(OPRND1_64); + let b = _mm512_set1_pd(OPRND2_64); + let r = _mm512_maskz_xor_pd(0b01010101, a, b); + let e = _mm512_set_pd(0.0, XOR_64, 0.0, XOR_64, 0.0, XOR_64, 0.0, XOR_64); + assert_eq_m512d(r, e); + } + + #[simd_test(enable = "avx512dq,avx512vl")] + unsafe fn test_mm_mask_xor_ps() { + let a = _mm_set1_ps(OPRND1_32); + let b = _mm_set1_ps(OPRND2_32); + let src = _mm_set_ps(1., 2., 3., 4.); + let r = _mm_mask_xor_ps(src, 0b0101, a, b); + let e = _mm_set_ps(1., XOR_32, 3., XOR_32); + assert_eq_m128(r, e); + } + + #[simd_test(enable = "avx512dq,avx512vl")] + unsafe fn test_mm_maskz_xor_ps() { + let a = _mm_set1_ps(OPRND1_32); + let b = _mm_set1_ps(OPRND2_32); + let r = _mm_maskz_xor_ps(0b0101, a, b); + let e = _mm_set_ps(0.0, XOR_32, 0.0, XOR_32); + assert_eq_m128(r, e); + } + + #[simd_test(enable = "avx512dq,avx512vl")] + unsafe fn test_mm256_mask_xor_ps() { + let a = _mm256_set1_ps(OPRND1_32); + let b = _mm256_set1_ps(OPRND2_32); + let src = _mm256_set_ps(1., 2., 3., 4., 5., 6., 7., 8.); + let r = _mm256_mask_xor_ps(src, 0b01010101, a, b); + let e = _mm256_set_ps(1., XOR_32, 3., XOR_32, 5., XOR_32, 7., XOR_32); + assert_eq_m256(r, e); + } + + #[simd_test(enable = "avx512dq,avx512vl")] + unsafe fn test_mm256_maskz_xor_ps() { + let a = _mm256_set1_ps(OPRND1_32); + let b = _mm256_set1_ps(OPRND2_32); + let r = _mm256_maskz_xor_ps(0b01010101, a, b); + let e = _mm256_set_ps(0.0, XOR_32, 0.0, XOR_32, 0.0, XOR_32, 0.0, XOR_32); + assert_eq_m256(r, e); + } + + #[simd_test(enable = "avx512dq")] + unsafe fn test_mm512_xor_ps() { + let a = _mm512_set1_ps(OPRND1_32); + let b = _mm512_set1_ps(OPRND2_32); + let r = _mm512_xor_ps(a, b); + let e = _mm512_set1_ps(XOR_32); + assert_eq_m512(r, e); + } + + #[simd_test(enable = "avx512dq")] + unsafe fn test_mm512_mask_xor_ps() { + let a = _mm512_set1_ps(OPRND1_32); + let b = _mm512_set1_ps(OPRND2_32); + let src = _mm512_set_ps(1., 2., 3., 4., 5., 6., 7., 8., 9., 10., 11., 12., 13., 14., 15., 16.); + let r = _mm512_mask_xor_ps(src, 0b0101010101010101, a, b); + let e = _mm512_set_ps(1., XOR_32, 3., XOR_32, 5., XOR_32, 7., XOR_32, 9., XOR_32, 11., XOR_32, 13., XOR_32, 15., XOR_32); + assert_eq_m512(r, e); + } + + #[simd_test(enable = "avx512dq")] + unsafe fn test_mm512_maskz_xor_ps() { + let a = _mm512_set1_ps(OPRND1_32); + let b = _mm512_set1_ps(OPRND2_32); + let r = _mm512_maskz_xor_ps(0b0101010101010101, a, b); + let e = _mm512_set_ps(0., XOR_32, 0., XOR_32, 0., XOR_32, 0., XOR_32, 0., XOR_32, 0., XOR_32, 0., XOR_32, 0., XOR_32); + assert_eq_m512(r, e); + } + + +}