diff --git a/library/stdarch/crates/core_arch/avx512bw.md b/library/stdarch/crates/core_arch/avx512bw.md
new file mode 100644
index 000000000000..5a5c7b7ec7cc
--- /dev/null
+++ b/library/stdarch/crates/core_arch/avx512bw.md
@@ -0,0 +1,322 @@
+["AVX512BW"]
+
+ * [x] [`_mm512_abs_epi16`]
+ * [x] [`_mm512_mask_abs_epi16`]
+ * [x] [`_mm512_maskz_abs_epi16`]
+ * [x] [`_mm512_abs_epi8`]
+ * [x] [`_mm512_mask_abs_epi8`]
+ * [x] [`_mm512_maskz_abs_epi8`]
+ * [x] [`_mm512_add_epi16`]
+ * [x] [`_mm512_mask_add_epi16`]
+ * [x] [`_mm512_maskz_add_epi16`]
+ * [x] [`_mm512_add_epi8`]
+ * [x] [`_mm512_mask_add_epi8`]
+ * [x] [`_mm512_maskz_add_epi8`]
+ * [x] [`_mm512_adds_epi16`]
+ * [x] [`_mm512_mask_adds_epi16`]
+ * [x] [`_mm512_maskz_adds_epi16`]
+ * [x] [`_mm512_adds_epi8`]
+ * [x] [`_mm512_mask_adds_epi8`]
+ * [x] [`_mm512_maskz_adds_epi8`]
+ * [x] [`_mm512_adds_epu16`]
+ * [x] [`_mm512_mask_adds_epu16`]
+ * [x] [`_mm512_maskz_adds_epu16`]
+ * [x] [`_mm512_adds_epu8`]
+ * [x] [`_mm512_mask_adds_epu8`]
+ * [x] [`_mm512_maskz_adds_epu8`]
+ * [x] [`_mm512_alignr_epi8`]
+ * [_] [`_mm512_mask_alignr_epi8`]
+ * [_] [`_mm512_maskz_alignr_epi8`]
+ * [x] [`_mm512_avg_epu16`]
+ * [x] [`_mm512_mask_avg_epu16`]
+ * [x] [`_mm512_maskz_avg_epu16`]
+ * [x] [`_mm512_avg_epu8`]
+ * [x] [`_mm512_mask_avg_epu8`]
+ * [x] [`_mm512_maskz_avg_epu8`]
+ * [x] [`_mm512_mask_blend_epi16`]
+ * [x] [`_mm512_mask_blend_epi8`]
+ * [x] [`_mm512_broadcastb_epi8`]
+ * [x] [`_mm512_mask_broadcastb_epi8`]
+ * [x] [`_mm512_maskz_broadcastb_epi8`]
+ * [x] [`_mm512_broadcastw_epi16`]
+ * [x] [`_mm512_mask_broadcastw_epi16`]
+ * [x] [`_mm512_maskz_broadcastw_epi16`]
+ * [_] [`_mm512_bslli_epi128`]
+ * [_] [`_mm512_bsrli_epi128`]
+ * [x] [`_mm512_cmp_epi16_mask`]
+ * [x] [`_mm512_mask_cmp_epi16_mask`]
+ * [x] [`_mm512_cmp_epi8_mask`]
+ * [x] [`_mm512_mask_cmp_epi8_mask`]
+ * [x] [`_mm512_cmp_epu16_mask`]
+ * [x] [`_mm512_mask_cmp_epu16_mask`]
+ * [x] [`_mm512_cmp_epu8_mask`]
+ * [x] [`_mm512_mask_cmp_epu8_mask`]
+ * [x] [`_mm512_cmpeq_epi16_mask`]
+ * [x] [`_mm512_mask_cmpeq_epi16_mask`]
+ * [x] [`_mm512_cmpeq_epi8_mask`]
+ * [x] [`_mm512_mask_cmpeq_epi8_mask`]
+ * [x] [`_mm512_cmpeq_epu16_mask`]
+ * [x] [`_mm512_mask_cmpeq_epu16_mask`]
+ * [x] [`_mm512_cmpeq_epu8_mask`]
+ * [x] [`_mm512_mask_cmpeq_epu8_mask`]
+ * [x] [`_mm512_cmpge_epi16_mask`]
+ * [x] [`_mm512_mask_cmpge_epi16_mask`]
+ * [x] [`_mm512_cmpge_epi8_mask`]
+ * [x] [`_mm512_mask_cmpge_epi8_mask`]
+ * [x] [`_mm512_cmpge_epu16_mask`]
+ * [x] [`_mm512_mask_cmpge_epu16_mask`]
+ * [x] [`_mm512_cmpge_epu8_mask`]
+ * [x] [`_mm512_mask_cmpge_epu8_mask`]
+ * [x] [`_mm512_cmpgt_epi16_mask`]
+ * [x] [`_mm512_mask_cmpgt_epi16_mask`]
+ * [x] [`_mm512_cmpgt_epi8_mask`]
+ * [x] [`_mm512_mask_cmpgt_epi8_mask`]
+ * [x] [`_mm512_cmpgt_epu16_mask`]
+ * [x] [`_mm512_mask_cmpgt_epu16_mask`]
+ * [x] [`_mm512_cmpgt_epu8_mask`]
+ * [x] [`_mm512_mask_cmpgt_epu8_mask`]
+ * [x] [`_mm512_cmple_epi16_mask`]
+ * [x] [`_mm512_mask_cmple_epi16_mask`]
+ * [x] [`_mm512_cmple_epi8_mask`]
+ * [x] [`_mm512_mask_cmple_epi8_mask`]
+ * [x] [`_mm512_cmple_epu16_mask`]
+ * [x] [`_mm512_mask_cmple_epu16_mask`]
+ * [x] [`_mm512_cmple_epu8_mask`]
+ * [x] [`_mm512_mask_cmple_epu8_mask`]
+ * [x] [`_mm512_cmplt_epi16_mask`]
+ * [x] [`_mm512_mask_cmplt_epi16_mask`]
+ * [x] [`_mm512_cmplt_epi8_mask`]
+ * [x] [`_mm512_mask_cmplt_epi8_mask`]
+ * [x] [`_mm512_cmplt_epu16_mask`]
+ * [x] [`_mm512_mask_cmplt_epu16_mask`]
+ * [x] [`_mm512_cmplt_epu8_mask`]
+ * [x] [`_mm512_mask_cmplt_epu8_mask`]
+ * [x] [`_mm512_cmpneq_epi16_mask`]
+ * [x] [`_mm512_mask_cmpneq_epi16_mask`]
+ * [x] [`_mm512_cmpneq_epi8_mask`]
+ * [x] [`_mm512_mask_cmpneq_epi8_mask`]
+ * [x] [`_mm512_cmpneq_epu16_mask`]
+ * [x] [`_mm512_mask_cmpneq_epu16_mask`]
+ * [x] [`_mm512_cmpneq_epu8_mask`]
+ * [x] [`_mm512_mask_cmpneq_epu8_mask`]
+ * [_] [`_mm512_cvtepi16_epi8`]
+ * [_] [`_mm512_mask_cvtepi16_epi8`]
+ * [_] [`_mm512_maskz_cvtepi16_epi8`]
+ * [_] [`_mm512_mask_cvtepi16_storeu_epi8`]
+ * [_] [`_mm512_cvtepi8_epi16`]
+ * [_] [`_mm512_mask_cvtepi8_epi16`]
+ * [_] [`_mm512_maskz_cvtepi8_epi16`]
+ * [_] [`_mm512_cvtepu8_epi16`]
+ * [_] [`_mm512_mask_cvtepu8_epi16`]
+ * [_] [`_mm512_maskz_cvtepu8_epi16`]
+ * [_] [`_cvtmask32_u32`]
+ * [_] [`_cvtmask64_u64`]
+ * [_] [`_mm512_cvtsepi16_epi8`]
+ * [_] [`_mm512_mask_cvtsepi16_epi8`]
+ * [_] [`_mm512_maskz_cvtsepi16_epi8`]
+ * [_] [`_mm512_mask_cvtsepi16_storeu_epi8`]
+ * [_] [`_cvtu32_mask32`]
+ * [_] [`_cvtu64_mask64`]
+ * [_] [`_mm512_cvtusepi16_epi8`]
+ * [_] [`_mm512_mask_cvtusepi16_epi8`]
+ * [_] [`_mm512_maskz_cvtusepi16_epi8`]
+ * [_] [`_mm512_mask_cvtusepi16_storeu_epi8`]
+ * [_] [`_mm512_dbsad_epu8`]
+ * [_] [`_mm512_mask_dbsad_epu8`]
+ * [_] [`_mm512_maskz_dbsad_epu8`]
+ * [_] [`_kadd_mask32`]
+ * [_] [`_kadd_mask64`]
+ * [_] [`_kand_mask32`]
+ * [_] [`_kand_mask64`]
+ * [_] [`_kandn_mask32`]
+ * [_] [`_kandn_mask64`]
+ * [_] [`_knot_mask32`]
+ * [_] [`_knot_mask64`]
+ * [_] [`_kor_mask32`]
+ * [_] [`_kor_mask64`]
+ * [_] [`_kortest_mask32_u8`]
+ * [_] [`_kortest_mask64_u8`]
+ * [_] [`_kortestc_mask32_u8`]
+ * [_] [`_kortestc_mask64_u8`]
+ * [_] [`_kortestz_mask32_u8`]
+ * [_] [`_kortestz_mask64_u8`]
+ * [_] [`_kshiftli_mask32`]
+ * [_] [`_kshiftli_mask64`]
+ * [_] [`_kshiftri_mask32`]
+ * [_] [`_kshiftri_mask64`]
+ * [_] [`_ktest_mask32_u8`]
+ * [_] [`_ktest_mask64_u8`]
+ * [_] [`_ktestc_mask32_u8`]
+ * [_] [`_ktestc_mask64_u8`]
+ * [_] [`_ktestz_mask32_u8`]
+ * [_] [`_ktestz_mask64_u8`]
+ * [_] [`_mm512_kunpackd`]
+ * [_] [`_mm512_kunpackw`]
+ * [_] [`_kxnor_mask32`]
+ * [_] [`_kxnor_mask64`]
+ * [_] [`_kxor_mask32`]
+ * [_] [`_kxor_mask64`]
+ * [_] [`_load_mask32`]
+ * [_] [`_load_mask64`]
+ * [x] [`_mm512_loadu_epi16`]
+ * [_] [`_mm512_mask_loadu_epi16`]
+ * [_] [`_mm512_maskz_loadu_epi16`]
+ * [x] [`_mm512_loadu_epi8`]
+ * [_] [`_mm512_mask_loadu_epi8`]
+ * [_] [`_mm512_maskz_loadu_epi8`]
+ * [x] [`_mm512_madd_epi16`]
+ * [x] [`_mm512_mask_madd_epi16`]
+ * [x] [`_mm512_maskz_madd_epi16`]
+ * [x] [`_mm512_maddubs_epi16`]
+ * [x] [`_mm512_mask_maddubs_epi16`]
+ * [x] [`_mm512_maskz_maddubs_epi16`]
+ * [x] [`_mm512_mask_max_epi16`]
+ * [x] [`_mm512_maskz_max_epi16`]
+ * [x] [`_mm512_max_epi16`]
+ * [x] [`_mm512_mask_max_epi8`]
+ * [x] [`_mm512_maskz_max_epi8`]
+ * [x] [`_mm512_max_epi8`]
+ * [x] [`_mm512_mask_max_epu16`]
+ * [x] [`_mm512_maskz_max_epu16`]
+ * [x] [`_mm512_max_epu16`]
+ * [x] [`_mm512_mask_max_epu8`]
+ * [x] [`_mm512_maskz_max_epu8`]
+ * [x] [`_mm512_max_epu8`]
+ * [x] [`_mm512_mask_min_epi16`]
+ * [x] [`_mm512_maskz_min_epi16`]
+ * [x] [`_mm512_min_epi16`]
+ * [x] [`_mm512_mask_min_epi8`]
+ * [x] [`_mm512_maskz_min_epi8`]
+ * [x] [`_mm512_min_epi8`]
+ * [x] [`_mm512_mask_min_epu16`]
+ * [x] [`_mm512_maskz_min_epu16`]
+ * [x] [`_mm512_min_epu16`]
+ * [x] [`_mm512_mask_min_epu8`]
+ * [x] [`_mm512_maskz_min_epu8`]
+ * [x] [`_mm512_min_epu8`]
+ * [x] [`_mm512_mask_mov_epi16`]
+ * [x] [`_mm512_maskz_mov_epi16`]
+ * [x] [`_mm512_mask_mov_epi8`]
+ * [x] [`_mm512_maskz_mov_epi8`]
+ * [_] [`_mm512_movepi16_mask`]
+ * [_] [`_mm512_movepi8_mask`]
+ * [_] [`_mm512_movm_epi16`]
+ * [_] [`_mm512_movm_epi8`]
+ * [x] [`_mm512_mask_mulhi_epi16`]
+ * [x] [`_mm512_maskz_mulhi_epi16`]
+ * [x] [`_mm512_mulhi_epi16`]
+ * [x] [`_mm512_mask_mulhi_epu16`]
+ * [x] [`_mm512_maskz_mulhi_epu16`]
+ * [x] [`_mm512_mulhi_epu16`]
+ * [x] [`_mm512_mask_mulhrs_epi16`]
+ * [x] [`_mm512_maskz_mulhrs_epi16`]
+ * [x] [`_mm512_mulhrs_epi16`]
+ * [x] [`_mm512_mask_mullo_epi16`]
+ * [x] [`_mm512_maskz_mullo_epi16`]
+ * [x] [`_mm512_mullo_epi16`]
+ * [x] [`_mm512_mask_packs_epi16`]
+ * [x] [`_mm512_maskz_packs_epi16`]
+ * [x] [`_mm512_packs_epi16`]
+ * [x] [`_mm512_mask_packs_epi32`]
+ * [x] [`_mm512_maskz_packs_epi32`]
+ * [x] [`_mm512_packs_epi32`]
+ * [x] [`_mm512_mask_packus_epi16`]
+ * [x] [`_mm512_maskz_packus_epi16`]
+ * [x] [`_mm512_packus_epi16`]
+ * [x] [`_mm512_mask_packus_epi32`]
+ * [x] [`_mm512_maskz_packus_epi32`]
+ * [x] [`_mm512_packus_epi32`]
+ * [x] [`_mm512_mask_permutex2var_epi16`]
+ * [x] [`_mm512_mask2_permutex2var_epi16`]
+ * [x] [`_mm512_maskz_permutex2var_epi16`]
+ * [x] [`_mm512_permutex2var_epi16`]
+ * [x] [`_mm512_mask_permutexvar_epi16`]
+ * [x] [`_mm512_maskz_permutexvar_epi16`]
+ * [x] [`_mm512_permutexvar_epi16`]
+ * [_] [`_mm512_sad_epu8`]
+ * [x] [`_mm512_mask_set1_epi16`]
+ * [x] [`_mm512_maskz_set1_epi16`]
+ * [x] [`_mm512_mask_set1_epi8`]
+ * [x] [`_mm512_maskz_set1_epi8`]
+ * [_] [`_mm512_mask_shuffle_epi8`]
+ * [_] [`_mm512_maskz_shuffle_epi8`]
+ * [_] [`_mm512_shuffle_epi8`]
+ * [x] [`_mm512_mask_shufflehi_epi16`]
+ * [x] [`_mm512_maskz_shufflehi_epi16`]
+ * [x] [`_mm512_shufflehi_epi16`]
+ * [x] [`_mm512_mask_shufflelo_epi16`]
+ * [x] [`_mm512_maskz_shufflelo_epi16`]
+ * [x] [`_mm512_shufflelo_epi16`]
+ * [x] [`_mm512_mask_sll_epi16`]
+ * [x] [`_mm512_maskz_sll_epi16`]
+ * [x] [`_mm512_sll_epi16`]
+ * [x] [`_mm512_mask_slli_epi16`]
+ * [x] [`_mm512_maskz_slli_epi16`]
+ * [x] [`_mm512_slli_epi16`]
+ * [x] [`_mm512_mask_sllv_epi16`]
+ * [x] [`_mm512_maskz_sllv_epi16`]
+ * [x] [`_mm512_sllv_epi16`]
+ * [x] [`_mm512_mask_sra_epi16`]
+ * [x] [`_mm512_maskz_sra_epi16`]
+ * [x] [`_mm512_sra_epi16`]
+ * [x] [`_mm512_mask_srai_epi16`]
+ * [x] [`_mm512_maskz_srai_epi16`]
+ * [x] [`_mm512_srai_epi16`]
+ * [x] [`_mm512_mask_srav_epi16`]
+ * [x] [`_mm512_maskz_srav_epi16`]
+ * [x] [`_mm512_srav_epi16`]
+ * [x] [`_mm512_mask_srl_epi16`]
+ * [x] [`_mm512_maskz_srl_epi16`]
+ * [x] [`_mm512_srl_epi16`]
+ * [x] [`_mm512_mask_srli_epi16`]
+ * [x] [`_mm512_maskz_srli_epi16`]
+ * [x] [`_mm512_srli_epi16`]
+ * [x] [`_mm512_mask_srlv_epi16`]
+ * [x] [`_mm512_maskz_srlv_epi16`]
+ * [x] [`_mm512_srlv_epi16`]
+ * [_] [`_store_mask32`]
+ * [_] [`_store_mask64`]
+ * [_] [`_mm512_mask_storeu_epi16`]
+ * [x] [`_mm512_storeu_epi16`]
+ * [_] [`_mm512_mask_storeu_epi8`]
+ * [x] [`_mm512_storeu_epi8`]
+ * [x] [`_mm512_mask_sub_epi16`]
+ * [x] [`_mm512_maskz_sub_epi16`]
+ * [x] [`_mm512_sub_epi16`]
+ * [x] [`_mm512_mask_sub_epi8`]
+ * [x] [`_mm512_maskz_sub_epi8`
+ * [x] [`_mm512_sub_epi8`]
+ * [x] [`_mm512_mask_subs_epi16`]
+ * [x] [`_mm512_maskz_subs_epi16`]
+ * [x] [`_mm512_subs_epi16`]
+ * [x] [`_mm512_mask_subs_epi8`]
+ * [x] [`_mm512_maskz_subs_epi8`]
+ * [x] [`_mm512_subs_epi8`]
+ * [x] [`_mm512_mask_subs_epu16`]
+ * [x] [`_mm512_maskz_subs_epu16`]
+ * [x] [`_mm512_subs_epu16`]
+ * [x] [`_mm512_mask_subs_epu8`]
+ * [x] [`_mm512_maskz_subs_epu8`]
+ * [x] [`_mm512_subs_epu8`]
+ * [_] [`_mm512_mask_test_epi16_mask`]
+ * [_] [`_mm512_test_epi16_mask`]
+ * [_] [`_mm512_mask_test_epi8_mask`]
+ * [_] [`_mm512_test_epi8_mask`]
+ * [_] [`_mm512_mask_testn_epi16_mask`]
+ * [_] [`_mm512_testn_epi16_mask`]
+ * [_] [`_mm512_mask_testn_epi8_mask`]
+ * [_] [`_mm512_testn_epi8_mask`]
+ * [x] [`_mm512_mask_unpackhi_epi16`]
+ * [x] [`_mm512_maskz_unpackhi_epi16`]
+ * [x] [`_mm512_unpackhi_epi16`]
+ * [x] [`_mm512_mask_unpackhi_epi8`]
+ * [x] [`_mm512_maskz_unpackhi_epi8`]
+ * [x] [`_mm512_unpackhi_epi8`]
+ * [x] [`_mm512_mask_unpacklo_epi16`]
+ * [x] [`_mm512_maskz_unpacklo_epi16`]
+ * [x] [`_mm512_unpacklo_epi16`]
+ * [x] [`_mm512_mask_unpacklo_epi8`]
+ * [x] [`_mm512_maskz_unpacklo_epi8`]
+ * [x] [`_mm512_unpacklo_epi8`]
+
+
diff --git a/library/stdarch/crates/core_arch/src/simd.rs b/library/stdarch/crates/core_arch/src/simd.rs
index de1482f3227d..6108bc40dee8 100644
--- a/library/stdarch/crates/core_arch/src/simd.rs
+++ b/library/stdarch/crates/core_arch/src/simd.rs
@@ -685,6 +685,136 @@ simd_ty!(
x63
);
+simd_ty!(
+ u8x64[u8]: u8,
+ u8,
+ u8,
+ u8,
+ u8,
+ u8,
+ u8,
+ u8,
+ u8,
+ u8,
+ u8,
+ u8,
+ u8,
+ u8,
+ u8,
+ u8,
+ u8,
+ u8,
+ u8,
+ u8,
+ u8,
+ u8,
+ u8,
+ u8,
+ u8,
+ u8,
+ u8,
+ u8,
+ u8,
+ u8,
+ u8,
+ u8,
+ u8,
+ u8,
+ u8,
+ u8,
+ u8,
+ u8,
+ u8,
+ u8,
+ u8,
+ u8,
+ u8,
+ u8,
+ u8,
+ u8,
+ u8,
+ u8,
+ u8,
+ u8,
+ u8,
+ u8,
+ u8,
+ u8,
+ u8,
+ u8,
+ u8,
+ u8,
+ u8,
+ u8,
+ u8,
+ u8,
+ u8,
+ u8 | x0,
+ x1,
+ x2,
+ x3,
+ x4,
+ x5,
+ x6,
+ x7,
+ x8,
+ x9,
+ x10,
+ x11,
+ x12,
+ x13,
+ x14,
+ x15,
+ x16,
+ x17,
+ x18,
+ x19,
+ x20,
+ x21,
+ x22,
+ x23,
+ x24,
+ x25,
+ x26,
+ x27,
+ x28,
+ x29,
+ x30,
+ x31,
+ x32,
+ x33,
+ x34,
+ x35,
+ x36,
+ x37,
+ x38,
+ x39,
+ x40,
+ x41,
+ x42,
+ x43,
+ x44,
+ x45,
+ x46,
+ x47,
+ x48,
+ x49,
+ x50,
+ x51,
+ x52,
+ x53,
+ x54,
+ x55,
+ x56,
+ x57,
+ x58,
+ x59,
+ x60,
+ x61,
+ x62,
+ x63
+);
+
simd_ty!(
i16x32[i16]: i16,
i16,
@@ -751,6 +881,72 @@ simd_ty!(
x31
);
+simd_ty!(
+ u16x32[u16]: u16,
+ u16,
+ u16,
+ u16,
+ u16,
+ u16,
+ u16,
+ u16,
+ u16,
+ u16,
+ u16,
+ u16,
+ u16,
+ u16,
+ u16,
+ u16,
+ u16,
+ u16,
+ u16,
+ u16,
+ u16,
+ u16,
+ u16,
+ u16,
+ u16,
+ u16,
+ u16,
+ u16,
+ u16,
+ u16,
+ u16,
+ u16 | x0,
+ x1,
+ x2,
+ x3,
+ x4,
+ x5,
+ x6,
+ x7,
+ x8,
+ x9,
+ x10,
+ x11,
+ x12,
+ x13,
+ x14,
+ x15,
+ x16,
+ x17,
+ x18,
+ x19,
+ x20,
+ x21,
+ x22,
+ x23,
+ x24,
+ x25,
+ x26,
+ x27,
+ x28,
+ x29,
+ x30,
+ x31
+);
+
simd_ty!(
i32x16[i32]: i32,
i32,
diff --git a/library/stdarch/crates/core_arch/src/x86/avx512bw.rs b/library/stdarch/crates/core_arch/src/x86/avx512bw.rs
new file mode 100644
index 000000000000..3d4a5b6ba040
--- /dev/null
+++ b/library/stdarch/crates/core_arch/src/x86/avx512bw.rs
@@ -0,0 +1,6447 @@
+use crate::{
+ core_arch::{simd::*, simd_llvm::*, x86::*},
+ mem::{self, transmute},
+ ptr,
+};
+
+#[cfg(test)]
+use stdarch_test::assert_instr;
+
+/// Compute the absolute value of packed signed 16-bit integers in a, and store the unsigned results in dst.
+///
+/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm512_abs_epi16&expand=30)
+#[inline]
+#[target_feature(enable = "avx512bw")]
+#[cfg_attr(test, assert_instr(vpabsw))]
+pub unsafe fn _mm512_abs_epi16(a: __m512i) -> __m512i {
+ let a = a.as_i16x32();
+ // all-0 is a properly initialized i16x32
+ let zero: i16x32 = mem::zeroed();
+ let sub = simd_sub(zero, a);
+ let cmp: i16x32 = simd_gt(a, zero);
+ transmute(simd_select(cmp, a, sub))
+}
+
+/// Compute the absolute value of packed signed 16-bit integers in a, and store the unsigned results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set).
+///
+/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm512_mask_abs_epi16&expand=31)
+#[inline]
+#[target_feature(enable = "avx512bw")]
+#[cfg_attr(test, assert_instr(vpabsw))]
+pub unsafe fn _mm512_mask_abs_epi16(src: __m512i, k: __mmask32, a: __m512i) -> __m512i {
+ let abs = _mm512_abs_epi16(a).as_i16x32();
+ transmute(simd_select_bitmask(k, abs, src.as_i16x32()))
+}
+
+/// Compute the absolute value of packed signed 16-bit integers in a, and store the unsigned results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
+///
+/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm512_maskz_abs_epi16&expand=32)
+#[inline]
+#[target_feature(enable = "avx512bw")]
+#[cfg_attr(test, assert_instr(vpabsw))]
+pub unsafe fn _mm512_maskz_abs_epi16(k: __mmask32, a: __m512i) -> __m512i {
+ let abs = _mm512_abs_epi16(a).as_i16x32();
+ let zero = _mm512_setzero_si512().as_i16x32();
+ transmute(simd_select_bitmask(k, abs, zero))
+}
+
+/// Compute the absolute value of packed signed 8-bit integers in a, and store the unsigned results in dst.
+///
+/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm512_abs_epi8&expand=57)
+#[inline]
+#[target_feature(enable = "avx512bw")]
+#[cfg_attr(test, assert_instr(vpabsb))]
+pub unsafe fn _mm512_abs_epi8(a: __m512i) -> __m512i {
+ let a = a.as_i8x64();
+ // all-0 is a properly initialized i8x64
+ let zero: i8x64 = mem::zeroed();
+ let sub = simd_sub(zero, a);
+ let cmp: i8x64 = simd_gt(a, zero);
+ transmute(simd_select(cmp, a, sub))
+}
+
+/// Compute the absolute value of packed signed 8-bit integers in a, and store the unsigned results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set).
+///
+/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm512_mask_abs_epi8&expand=58)
+#[inline]
+#[target_feature(enable = "avx512bw")]
+#[cfg_attr(test, assert_instr(vpabsb))]
+pub unsafe fn _mm512_mask_abs_epi8(src: __m512i, k: __mmask64, a: __m512i) -> __m512i {
+ let abs = _mm512_abs_epi8(a).as_i8x64();
+ transmute(simd_select_bitmask(k, abs, src.as_i8x64()))
+}
+
+/// Compute the absolute value of packed signed 8-bit integers in a, and store the unsigned results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
+///
+/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm512_maskz_abs_epi8&expand=59)
+#[inline]
+#[target_feature(enable = "avx512bw")]
+#[cfg_attr(test, assert_instr(vpabsb))]
+pub unsafe fn _mm512_maskz_abs_epi8(k: __mmask64, a: __m512i) -> __m512i {
+ let abs = _mm512_abs_epi8(a).as_i8x64();
+ let zero = _mm512_setzero_si512().as_i8x64();
+ transmute(simd_select_bitmask(k, abs, zero))
+}
+
+/// Add packed 16-bit integers in a and b, and store the results in dst.
+///
+/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm512_add_epi16&expand=91)
+#[inline]
+#[target_feature(enable = "avx512bw")]
+#[cfg_attr(test, assert_instr(vpaddw))]
+pub unsafe fn _mm512_add_epi16(a: __m512i, b: __m512i) -> __m512i {
+ transmute(simd_add(a.as_i16x32(), b.as_i16x32()))
+}
+
+/// Add packed 16-bit integers in a and b, and store the results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set).
+///
+/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm512_mask_add_epi16&expand=92)
+#[inline]
+#[target_feature(enable = "avx512bw")]
+#[cfg_attr(test, assert_instr(vpaddw))]
+pub unsafe fn _mm512_mask_add_epi16(src: __m512i, k: __mmask32, a: __m512i, b: __m512i) -> __m512i {
+ let add = _mm512_add_epi16(a, b).as_i16x32();
+ transmute(simd_select_bitmask(k, add, src.as_i16x32()))
+}
+
+/// Add packed 16-bit integers in a and b, and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
+///
+/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm512_maskz_add_epi16&expand=93)
+#[inline]
+#[target_feature(enable = "avx512bw")]
+#[cfg_attr(test, assert_instr(vpaddw))]
+pub unsafe fn _mm512_maskz_add_epi16(k: __mmask32, a: __m512i, b: __m512i) -> __m512i {
+ let add = _mm512_add_epi16(a, b).as_i16x32();
+ let zero = _mm512_setzero_si512().as_i16x32();
+ transmute(simd_select_bitmask(k, add, zero))
+}
+
+/// Add packed 8-bit integers in a and b, and store the results in dst.
+///
+/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm512_add_epi8&expand=118)
+#[inline]
+#[target_feature(enable = "avx512bw")]
+#[cfg_attr(test, assert_instr(vpaddb))]
+pub unsafe fn _mm512_add_epi8(a: __m512i, b: __m512i) -> __m512i {
+ transmute(simd_add(a.as_i8x64(), b.as_i8x64()))
+}
+
+/// Add packed 8-bit integers in a and b, and store the results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set).
+///
+/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm512_mask_add_epi8&expand=119)
+#[inline]
+#[target_feature(enable = "avx512bw")]
+#[cfg_attr(test, assert_instr(vpaddb))]
+pub unsafe fn _mm512_mask_add_epi8(src: __m512i, k: __mmask64, a: __m512i, b: __m512i) -> __m512i {
+ let add = _mm512_add_epi8(a, b).as_i8x64();
+ transmute(simd_select_bitmask(k, add, src.as_i8x64()))
+}
+
+/// Add packed 8-bit integers in a and b, and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
+///
+/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm512_maskz_add_epi8&expand=120)
+#[inline]
+#[target_feature(enable = "avx512bw")]
+#[cfg_attr(test, assert_instr(vpaddb))]
+pub unsafe fn _mm512_maskz_add_epi8(k: __mmask64, a: __m512i, b: __m512i) -> __m512i {
+ let add = _mm512_add_epi8(a, b).as_i8x64();
+ let zero = _mm512_setzero_si512().as_i8x64();
+ transmute(simd_select_bitmask(k, add, zero))
+}
+
+/// Add packed unsigned 16-bit integers in a and b using saturation, and store the results in dst.
+///
+/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm512_adds_epu16&expand=197)
+#[inline]
+#[target_feature(enable = "avx512bw")]
+#[cfg_attr(test, assert_instr(vpaddusw))]
+pub unsafe fn _mm512_adds_epu16(a: __m512i, b: __m512i) -> __m512i {
+ transmute(vpaddusw(
+ a.as_u16x32(),
+ b.as_u16x32(),
+ _mm512_setzero_si512().as_u16x32(),
+ 0b11111111_11111111_11111111_11111111,
+ ))
+}
+
+/// Add packed unsigned 16-bit integers in a and b using saturation, and store the results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set).
+///
+/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm512_mask_adds_epu16&expand=198)
+#[inline]
+#[target_feature(enable = "avx512bw")]
+#[cfg_attr(test, assert_instr(vpaddusw))]
+pub unsafe fn _mm512_mask_adds_epu16(
+ src: __m512i,
+ k: __mmask32,
+ a: __m512i,
+ b: __m512i,
+) -> __m512i {
+ transmute(vpaddusw(a.as_u16x32(), b.as_u16x32(), src.as_u16x32(), k))
+}
+
+/// Add packed unsigned 16-bit integers in a and b using saturation, and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
+///
+/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm512_maskz_adds_epu16&expand=199)
+#[inline]
+#[target_feature(enable = "avx512bw")]
+#[cfg_attr(test, assert_instr(vpaddusw))]
+pub unsafe fn _mm512_maskz_adds_epu16(k: __mmask32, a: __m512i, b: __m512i) -> __m512i {
+ transmute(vpaddusw(
+ a.as_u16x32(),
+ b.as_u16x32(),
+ _mm512_setzero_si512().as_u16x32(),
+ k,
+ ))
+}
+
+/// Add packed unsigned 8-bit integers in a and b using saturation, and store the results in dst.
+///
+/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm512_adds_epu8&expand=206)
+#[inline]
+#[target_feature(enable = "avx512bw")]
+#[cfg_attr(test, assert_instr(vpaddusb))]
+pub unsafe fn _mm512_adds_epu8(a: __m512i, b: __m512i) -> __m512i {
+ transmute(vpaddusb(
+ a.as_u8x64(),
+ b.as_u8x64(),
+ _mm512_setzero_si512().as_u8x64(),
+ 0b11111111_11111111_11111111_11111111_11111111_11111111_11111111_11111111,
+ ))
+}
+
+/// Add packed unsigned 8-bit integers in a and b using saturation, and store the results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set).
+///
+/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm512_mask_adds_epu8&expand=207)
+#[inline]
+#[target_feature(enable = "avx512bw")]
+#[cfg_attr(test, assert_instr(vpaddusb))]
+pub unsafe fn _mm512_mask_adds_epu8(src: __m512i, k: __mmask64, a: __m512i, b: __m512i) -> __m512i {
+ transmute(vpaddusb(a.as_u8x64(), b.as_u8x64(), src.as_u8x64(), k))
+}
+
+/// Add packed unsigned 8-bit integers in a and b using saturation, and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
+///
+/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm512_maskz_adds_epu8&expand=208)
+#[inline]
+#[target_feature(enable = "avx512bw")]
+#[cfg_attr(test, assert_instr(vpaddusb))]
+pub unsafe fn _mm512_maskz_adds_epu8(k: __mmask64, a: __m512i, b: __m512i) -> __m512i {
+ transmute(vpaddusb(
+ a.as_u8x64(),
+ b.as_u8x64(),
+ _mm512_setzero_si512().as_u8x64(),
+ k,
+ ))
+}
+
+/// Add packed signed 16-bit integers in a and b using saturation, and store the results in dst.
+///
+/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm512_adds_epi16&expand=179)
+#[inline]
+#[target_feature(enable = "avx512bw")]
+#[cfg_attr(test, assert_instr(vpaddsw))]
+pub unsafe fn _mm512_adds_epi16(a: __m512i, b: __m512i) -> __m512i {
+ transmute(vpaddsw(
+ a.as_i16x32(),
+ b.as_i16x32(),
+ _mm512_setzero_si512().as_i16x32(),
+ 0b11111111_11111111_11111111_11111111,
+ ))
+}
+
+/// Add packed signed 16-bit integers in a and b using saturation, and store the results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set).
+///
+/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm512_mask_adds_epi16&expand=180)
+#[inline]
+#[target_feature(enable = "avx512bw")]
+#[cfg_attr(test, assert_instr(vpaddsw))]
+pub unsafe fn _mm512_mask_adds_epi16(
+ src: __m512i,
+ k: __mmask32,
+ a: __m512i,
+ b: __m512i,
+) -> __m512i {
+ transmute(vpaddsw(a.as_i16x32(), b.as_i16x32(), src.as_i16x32(), k))
+}
+
+/// Add packed signed 16-bit integers in a and b using saturation, and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
+///
+/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm512_maskz_adds_epi16&expand=181)
+#[inline]
+#[target_feature(enable = "avx512bw")]
+#[cfg_attr(test, assert_instr(vpaddsw))]
+pub unsafe fn _mm512_maskz_adds_epi16(k: __mmask32, a: __m512i, b: __m512i) -> __m512i {
+ transmute(vpaddsw(
+ a.as_i16x32(),
+ b.as_i16x32(),
+ _mm512_setzero_si512().as_i16x32(),
+ k,
+ ))
+}
+
+/// Add packed signed 8-bit integers in a and b using saturation, and store the results in dst.
+///
+/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm512_adds_epi8&expand=188)
+#[inline]
+#[target_feature(enable = "avx512bw")]
+#[cfg_attr(test, assert_instr(vpaddsb))]
+pub unsafe fn _mm512_adds_epi8(a: __m512i, b: __m512i) -> __m512i {
+ transmute(vpaddsb(
+ a.as_i8x64(),
+ b.as_i8x64(),
+ _mm512_setzero_si512().as_i8x64(),
+ 0b11111111_11111111_11111111_11111111_11111111_11111111_11111111_11111111,
+ ))
+}
+
+/// Add packed signed 8-bit integers in a and b using saturation, and store the results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set).
+///
+/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm512_mask_adds_epi8&expand=189)
+#[inline]
+#[target_feature(enable = "avx512bw")]
+#[cfg_attr(test, assert_instr(vpaddsb))]
+pub unsafe fn _mm512_mask_adds_epi8(src: __m512i, k: __mmask64, a: __m512i, b: __m512i) -> __m512i {
+ transmute(vpaddsb(a.as_i8x64(), b.as_i8x64(), src.as_i8x64(), k))
+}
+
+/// Add packed signed 8-bit integers in a and b using saturation, and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
+///
+/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm512_maskz_adds_epi8&expand=190)
+#[inline]
+#[target_feature(enable = "avx512bw")]
+#[cfg_attr(test, assert_instr(vpaddsb))]
+pub unsafe fn _mm512_maskz_adds_epi8(k: __mmask64, a: __m512i, b: __m512i) -> __m512i {
+ transmute(vpaddsb(
+ a.as_i8x64(),
+ b.as_i8x64(),
+ _mm512_setzero_si512().as_i8x64(),
+ k,
+ ))
+}
+
+/// Subtract packed 16-bit integers in b from packed 16-bit integers in a, and store the results in dst.
+///
+/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm512_sub_epi16&expand=5685)
+#[inline]
+#[target_feature(enable = "avx512bw")]
+#[cfg_attr(test, assert_instr(vpsubw))]
+pub unsafe fn _mm512_sub_epi16(a: __m512i, b: __m512i) -> __m512i {
+ transmute(simd_sub(a.as_i16x32(), b.as_i16x32()))
+}
+
+/// Subtract packed 16-bit integers in b from packed 16-bit integers in a, and store the results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set).
+///
+/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm512_mask_sub_epi16&expand=5683)
+#[inline]
+#[target_feature(enable = "avx512bw")]
+#[cfg_attr(test, assert_instr(vpsubw))]
+pub unsafe fn _mm512_mask_sub_epi16(src: __m512i, k: __mmask32, a: __m512i, b: __m512i) -> __m512i {
+ let sub = _mm512_sub_epi16(a, b).as_i16x32();
+ transmute(simd_select_bitmask(k, sub, src.as_i16x32()))
+}
+
+/// Subtract packed 16-bit integers in b from packed 16-bit integers in a, and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
+///
+/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm512_maskz_sub_epi16&expand=5684)
+#[inline]
+#[target_feature(enable = "avx512bw")]
+#[cfg_attr(test, assert_instr(vpsubw))]
+pub unsafe fn _mm512_maskz_sub_epi16(k: __mmask32, a: __m512i, b: __m512i) -> __m512i {
+ let sub = _mm512_sub_epi16(a, b).as_i16x32();
+ let zero = _mm512_setzero_si512().as_i16x32();
+ transmute(simd_select_bitmask(k, sub, zero))
+}
+
+/// Subtract packed 8-bit integers in b from packed 8-bit integers in a, and store the results in dst.
+///
+/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm512_sub_epi8&expand=5712)
+#[inline]
+#[target_feature(enable = "avx512bw")]
+#[cfg_attr(test, assert_instr(vpsubb))]
+pub unsafe fn _mm512_sub_epi8(a: __m512i, b: __m512i) -> __m512i {
+ transmute(simd_sub(a.as_i8x64(), b.as_i8x64()))
+}
+
+/// Subtract packed 8-bit integers in b from packed 8-bit integers in a, and store the results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set).
+///
+/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm512_mask_sub_epi8&expand=5710)
+#[inline]
+#[target_feature(enable = "avx512bw")]
+#[cfg_attr(test, assert_instr(vpsubb))]
+pub unsafe fn _mm512_mask_sub_epi8(src: __m512i, k: __mmask64, a: __m512i, b: __m512i) -> __m512i {
+ let sub = _mm512_sub_epi8(a, b).as_i8x64();
+ transmute(simd_select_bitmask(k, sub, src.as_i8x64()))
+}
+
+/// Subtract packed 8-bit integers in b from packed 8-bit integers in a, and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
+///
+/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm512_maskz_sub_epi8&expand=5711)
+#[inline]
+#[target_feature(enable = "avx512bw")]
+#[cfg_attr(test, assert_instr(vpsubb))]
+pub unsafe fn _mm512_maskz_sub_epi8(k: __mmask64, a: __m512i, b: __m512i) -> __m512i {
+ let sub = _mm512_sub_epi8(a, b).as_i8x64();
+ let zero = _mm512_setzero_si512().as_i8x64();
+ transmute(simd_select_bitmask(k, sub, zero))
+}
+
+/// Subtract packed unsigned 16-bit integers in b from packed unsigned 16-bit integers in a using saturation, and store the results in dst.
+///
+/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm512_subs_epu16&expand=5793)
+#[inline]
+#[target_feature(enable = "avx512bw")]
+#[cfg_attr(test, assert_instr(vpsubusw))]
+pub unsafe fn _mm512_subs_epu16(a: __m512i, b: __m512i) -> __m512i {
+ transmute(vpsubusw(
+ a.as_u16x32(),
+ b.as_u16x32(),
+ _mm512_setzero_si512().as_u16x32(),
+ 0b11111111_11111111_11111111_11111111,
+ ))
+}
+
+/// Subtract packed unsigned 16-bit integers in b from packed unsigned 16-bit integers in a using saturation, and store the results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set).
+///
+/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm512_mask_subs_epu16&expand=5791)
+#[inline]
+#[target_feature(enable = "avx512bw")]
+#[cfg_attr(test, assert_instr(vpsubusw))]
+pub unsafe fn _mm512_mask_subs_epu16(
+ src: __m512i,
+ k: __mmask32,
+ a: __m512i,
+ b: __m512i,
+) -> __m512i {
+ transmute(vpsubusw(a.as_u16x32(), b.as_u16x32(), src.as_u16x32(), k))
+}
+
+/// Subtract packed unsigned 16-bit integers in b from packed unsigned 16-bit integers in a using saturation, and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
+///
+/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm512_maskz_subs_epu16&expand=5792)
+#[inline]
+#[target_feature(enable = "avx512bw")]
+#[cfg_attr(test, assert_instr(vpsubusw))]
+pub unsafe fn _mm512_maskz_subs_epu16(k: __mmask32, a: __m512i, b: __m512i) -> __m512i {
+ transmute(vpsubusw(
+ a.as_u16x32(),
+ b.as_u16x32(),
+ _mm512_setzero_si512().as_u16x32(),
+ k,
+ ))
+}
+
+/// Subtract packed unsigned 8-bit integers in b from packed unsigned 8-bit integers in a using saturation, and store the results in dst.
+///
+/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm512_subs_epu8&expand=5802)
+#[inline]
+#[target_feature(enable = "avx512bw")]
+#[cfg_attr(test, assert_instr(vpsubusb))]
+pub unsafe fn _mm512_subs_epu8(a: __m512i, b: __m512i) -> __m512i {
+ transmute(vpsubusb(
+ a.as_u8x64(),
+ b.as_u8x64(),
+ _mm512_setzero_si512().as_u8x64(),
+ 0b11111111_11111111_11111111_11111111_11111111_11111111_11111111_11111111,
+ ))
+}
+
+/// Subtract packed unsigned 8-bit integers in b from packed unsigned 8-bit integers in a using saturation, and store the results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set).
+///
+/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm512_mask_subs_epu8&expand=5800)
+#[inline]
+#[target_feature(enable = "avx512bw")]
+#[cfg_attr(test, assert_instr(vpsubusb))]
+pub unsafe fn _mm512_mask_subs_epu8(src: __m512i, k: __mmask64, a: __m512i, b: __m512i) -> __m512i {
+ transmute(vpsubusb(a.as_u8x64(), b.as_u8x64(), src.as_u8x64(), k))
+}
+
+/// Subtract packed unsigned 8-bit integers in b from packed unsigned 8-bit integers in a using saturation, and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
+///
+/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm512_maskz_subs_epu8&expand=5801)
+#[inline]
+#[target_feature(enable = "avx512bw")]
+#[cfg_attr(test, assert_instr(vpsubusb))]
+pub unsafe fn _mm512_maskz_subs_epu8(k: __mmask64, a: __m512i, b: __m512i) -> __m512i {
+ transmute(vpsubusb(
+ a.as_u8x64(),
+ b.as_u8x64(),
+ _mm512_setzero_si512().as_u8x64(),
+ k,
+ ))
+}
+
+/// Subtract packed signed 16-bit integers in b from packed 16-bit integers in a using saturation, and store the results in dst.
+///
+/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm512_subs_epi16&expand=5775)
+#[inline]
+#[target_feature(enable = "avx512bw")]
+#[cfg_attr(test, assert_instr(vpsubsw))]
+pub unsafe fn _mm512_subs_epi16(a: __m512i, b: __m512i) -> __m512i {
+ transmute(vpsubsw(
+ a.as_i16x32(),
+ b.as_i16x32(),
+ _mm512_setzero_si512().as_i16x32(),
+ 0b11111111_11111111_11111111_11111111,
+ ))
+}
+
+/// Subtract packed signed 16-bit integers in b from packed 16-bit integers in a using saturation, and store the results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set).
+///
+/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm512_mask_subs_epi16&expand=5773)
+#[inline]
+#[target_feature(enable = "avx512bw")]
+#[cfg_attr(test, assert_instr(vpsubsw))]
+pub unsafe fn _mm512_mask_subs_epi16(
+ src: __m512i,
+ k: __mmask32,
+ a: __m512i,
+ b: __m512i,
+) -> __m512i {
+ transmute(vpsubsw(a.as_i16x32(), b.as_i16x32(), src.as_i16x32(), k))
+}
+
+/// Subtract packed signed 16-bit integers in b from packed 16-bit integers in a using saturation, and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
+///
+/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm512_maskz_subs_epi16&expand=5774)
+#[inline]
+#[target_feature(enable = "avx512bw")]
+#[cfg_attr(test, assert_instr(vpsubsw))]
+pub unsafe fn _mm512_maskz_subs_epi16(k: __mmask32, a: __m512i, b: __m512i) -> __m512i {
+ transmute(vpsubsw(
+ a.as_i16x32(),
+ b.as_i16x32(),
+ _mm512_setzero_si512().as_i16x32(),
+ k,
+ ))
+}
+
+/// Subtract packed signed 8-bit integers in b from packed 8-bit integers in a using saturation, and store the results in dst.
+///
+/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm512_subs_epi8&expand=5784)
+#[inline]
+#[target_feature(enable = "avx512bw")]
+#[cfg_attr(test, assert_instr(vpsubsb))]
+pub unsafe fn _mm512_subs_epi8(a: __m512i, b: __m512i) -> __m512i {
+ transmute(vpsubsb(
+ a.as_i8x64(),
+ b.as_i8x64(),
+ _mm512_setzero_si512().as_i8x64(),
+ 0b11111111_11111111_11111111_11111111_11111111_11111111_11111111_11111111,
+ ))
+}
+
+/// Subtract packed signed 8-bit integers in b from packed 8-bit integers in a using saturation, and store the results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set).
+///
+/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm512_mask_subs_epi8&expand=5782)
+#[inline]
+#[target_feature(enable = "avx512bw")]
+#[cfg_attr(test, assert_instr(vpsubsb))]
+pub unsafe fn _mm512_mask_subs_epi8(src: __m512i, k: __mmask64, a: __m512i, b: __m512i) -> __m512i {
+ transmute(vpsubsb(a.as_i8x64(), b.as_i8x64(), src.as_i8x64(), k))
+}
+
+/// Subtract packed signed 8-bit integers in b from packed 8-bit integers in a using saturation, and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
+///
+/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm512_maskz_subs_epi8&expand=5783)
+#[inline]
+#[target_feature(enable = "avx512bw")]
+#[cfg_attr(test, assert_instr(vpsubsb))]
+pub unsafe fn _mm512_maskz_subs_epi8(k: __mmask64, a: __m512i, b: __m512i) -> __m512i {
+ transmute(vpsubsb(
+ a.as_i8x64(),
+ b.as_i8x64(),
+ _mm512_setzero_si512().as_i8x64(),
+ k,
+ ))
+}
+
+/// Multiply the packed unsigned 16-bit integers in a and b, producing intermediate 32-bit integers, and store the high 16 bits of the intermediate integers in dst.
+///
+/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm512_mulhi_epu16&expand=3973)
+#[inline]
+#[target_feature(enable = "avx512bw")]
+#[cfg_attr(test, assert_instr(vpmulhuw))]
+pub unsafe fn _mm512_mulhi_epu16(a: __m512i, b: __m512i) -> __m512i {
+ transmute(vpmulhuw(a.as_u16x32(), b.as_u16x32()))
+}
+
+/// Multiply the packed unsigned 16-bit integers in a and b, producing intermediate 32-bit integers, and store the high 16 bits of the intermediate integers in dst using writemask k (elements are copied from src when the corresponding mask bit is not set).
+///
+/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm512_mask_mulhi_epu16&expand=3971)
+#[inline]
+#[target_feature(enable = "avx512bw")]
+#[cfg_attr(test, assert_instr(vpmulhuw))]
+pub unsafe fn _mm512_mask_mulhi_epu16(
+ src: __m512i,
+ k: __mmask32,
+ a: __m512i,
+ b: __m512i,
+) -> __m512i {
+ let mul = _mm512_mulhi_epu16(a, b).as_u16x32();
+ transmute(simd_select_bitmask(k, mul, src.as_u16x32()))
+}
+
+/// Multiply the packed unsigned 16-bit integers in a and b, producing intermediate 32-bit integers, and store the high 16 bits of the intermediate integers in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
+///
+/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm512_maskz_mulhi_epu16&expand=3972)
+#[inline]
+#[target_feature(enable = "avx512bw")]
+#[cfg_attr(test, assert_instr(vpmulhuw))]
+pub unsafe fn _mm512_maskz_mulhi_epu16(k: __mmask32, a: __m512i, b: __m512i) -> __m512i {
+ let mul = _mm512_mulhi_epu16(a, b).as_u16x32();
+ let zero = _mm512_setzero_si512().as_u16x32();
+ transmute(simd_select_bitmask(k, mul, zero))
+}
+
+/// Multiply the packed signed 16-bit integers in a and b, producing intermediate 32-bit integers, and store the high 16 bits of the intermediate integers in dst.
+///
+/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm512_mulhi_epi16&expand=3962)
+#[inline]
+#[target_feature(enable = "avx512bw")]
+#[cfg_attr(test, assert_instr(vpmulhw))]
+pub unsafe fn _mm512_mulhi_epi16(a: __m512i, b: __m512i) -> __m512i {
+ transmute(vpmulhw(a.as_i16x32(), b.as_i16x32()))
+}
+
+/// Multiply the packed signed 16-bit integers in a and b, producing intermediate 32-bit integers, and store the high 16 bits of the intermediate integers in dst using writemask k (elements are copied from src when the corresponding mask bit is not set).
+///
+/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm512_mask_mulhi_epi16&expand=3960)
+#[inline]
+#[target_feature(enable = "avx512bw")]
+#[cfg_attr(test, assert_instr(vpmulhw))]
+pub unsafe fn _mm512_mask_mulhi_epi16(
+ src: __m512i,
+ k: __mmask32,
+ a: __m512i,
+ b: __m512i,
+) -> __m512i {
+ let mul = _mm512_mulhi_epi16(a, b).as_i16x32();
+ transmute(simd_select_bitmask(k, mul, src.as_i16x32()))
+}
+
+/// Multiply the packed signed 16-bit integers in a and b, producing intermediate 32-bit integers, and store the high 16 bits of the intermediate integers in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
+///
+/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm512_maskz_mulhi_epi16&expand=3961)
+#[inline]
+#[target_feature(enable = "avx512bw")]
+#[cfg_attr(test, assert_instr(vpmulhw))]
+pub unsafe fn _mm512_maskz_mulhi_epi16(k: __mmask32, a: __m512i, b: __m512i) -> __m512i {
+ let mul = _mm512_mulhi_epi16(a, b).as_i16x32();
+ let zero = _mm512_setzero_si512().as_i16x32();
+ transmute(simd_select_bitmask(k, mul, zero))
+}
+
+/// Multiply packed signed 16-bit integers in a and b, producing intermediate signed 32-bit integers. Truncate each intermediate integer to the 18 most significant bits, round by adding 1, and store bits \[16:1\] to dst.
+///
+/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm512_mulhrs_epi16&expand=3986)
+#[inline]
+#[target_feature(enable = "avx512bw")]
+#[cfg_attr(test, assert_instr(vpmulhrsw))]
+pub unsafe fn _mm512_mulhrs_epi16(a: __m512i, b: __m512i) -> __m512i {
+ transmute(vpmulhrsw(a.as_i16x32(), b.as_i16x32()))
+}
+
+/// Multiply packed signed 16-bit integers in a and b, producing intermediate signed 32-bit integers. Truncate each intermediate integer to the 18 most significant bits, round by adding 1, and store bits \[16:1\] to dst using writemask k (elements are copied from src when the corresponding mask bit is not set).
+///
+/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm512_mask_mulhrs_epi16&expand=3984)
+#[inline]
+#[target_feature(enable = "avx512bw")]
+#[cfg_attr(test, assert_instr(vpmulhrsw))]
+pub unsafe fn _mm512_mask_mulhrs_epi16(
+ src: __m512i,
+ k: __mmask32,
+ a: __m512i,
+ b: __m512i,
+) -> __m512i {
+ let mul = _mm512_mulhrs_epi16(a, b).as_i16x32();
+ transmute(simd_select_bitmask(k, mul, src.as_i16x32()))
+}
+
+/// Multiply packed signed 16-bit integers in a and b, producing intermediate signed 32-bit integers. Truncate each intermediate integer to the 18 most significant bits, round by adding 1, and store bits \[16:1\] to dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
+///
+/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm512_maskz_mulhrs_epi16&expand=3985)
+#[inline]
+#[target_feature(enable = "avx512bw")]
+#[cfg_attr(test, assert_instr(vpmulhrsw))]
+pub unsafe fn _mm512_maskz_mulhrs_epi16(k: __mmask32, a: __m512i, b: __m512i) -> __m512i {
+ let mul = _mm512_mulhrs_epi16(a, b).as_i16x32();
+ let zero = _mm512_setzero_si512().as_i16x32();
+ transmute(simd_select_bitmask(k, mul, zero))
+}
+
+/// Multiply the packed 16-bit integers in a and b, producing intermediate 32-bit integers, and store the low 16 bits of the intermediate integers in dst.
+///
+/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm512_mullo_epi16&expand=3996)
+#[inline]
+#[target_feature(enable = "avx512bw")]
+#[cfg_attr(test, assert_instr(vpmullw))]
+pub unsafe fn _mm512_mullo_epi16(a: __m512i, b: __m512i) -> __m512i {
+ transmute(simd_mul(a.as_i16x32(), b.as_i16x32()))
+}
+
+/// Multiply the packed 16-bit integers in a and b, producing intermediate 32-bit integers, and store the low 16 bits of the intermediate integers in dst using writemask k (elements are copied from src when the corresponding mask bit is not set).
+///
+/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm512_mask_mullo_epi16&expand=3994)
+#[inline]
+#[target_feature(enable = "avx512bw")]
+#[cfg_attr(test, assert_instr(vpmullw))]
+pub unsafe fn _mm512_mask_mullo_epi16(
+ src: __m512i,
+ k: __mmask32,
+ a: __m512i,
+ b: __m512i,
+) -> __m512i {
+ let mul = _mm512_mullo_epi16(a, b).as_i16x32();
+ transmute(simd_select_bitmask(k, mul, src.as_i16x32()))
+}
+
+/// Multiply the packed 16-bit integers in a and b, producing intermediate 32-bit integers, and store the low 16 bits of the intermediate integers in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
+///
+/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm512_maskz_mullo_epi16&expand=3995)
+#[inline]
+#[target_feature(enable = "avx512bw")]
+#[cfg_attr(test, assert_instr(vpmullw))]
+pub unsafe fn _mm512_maskz_mullo_epi16(k: __mmask32, a: __m512i, b: __m512i) -> __m512i {
+ let mul = _mm512_mullo_epi16(a, b).as_i16x32();
+ let zero = _mm512_setzero_si512().as_i16x32();
+ transmute(simd_select_bitmask(k, mul, zero))
+}
+
+/// Compare packed unsigned 16-bit integers in a and b, and store packed maximum values in dst.
+///
+/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm512_max_epu16&expand=3609)
+#[inline]
+#[target_feature(enable = "avx512bw")]
+#[cfg_attr(test, assert_instr(vpmaxuw))]
+pub unsafe fn _mm512_max_epu16(a: __m512i, b: __m512i) -> __m512i {
+ transmute(vpmaxuw(a.as_u16x32(), b.as_u16x32()))
+}
+
+/// Compare packed unsigned 16-bit integers in a and b, and store packed maximum values in dst using writemask k (elements are copied from src when the corresponding mask bit is not set).
+///
+/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm512_mask_max_epu16&expand=3607)
+#[inline]
+#[target_feature(enable = "avx512bw")]
+#[cfg_attr(test, assert_instr(vpmaxuw))]
+pub unsafe fn _mm512_mask_max_epu16(src: __m512i, k: __mmask32, a: __m512i, b: __m512i) -> __m512i {
+ let max = _mm512_max_epu16(a, b).as_u16x32();
+ transmute(simd_select_bitmask(k, max, src.as_u16x32()))
+}
+
+/// Compare packed unsigned 16-bit integers in a and b, and store packed maximum values in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
+///
+/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm512_maskz_max_epu16&expand=3608)
+#[inline]
+#[target_feature(enable = "avx512bw")]
+#[cfg_attr(test, assert_instr(vpmaxuw))]
+pub unsafe fn _mm512_maskz_max_epu16(k: __mmask32, a: __m512i, b: __m512i) -> __m512i {
+ let max = _mm512_max_epu16(a, b).as_u16x32();
+ let zero = _mm512_setzero_si512().as_u16x32();
+ transmute(simd_select_bitmask(k, max, zero))
+}
+
+/// Compare packed unsigned 8-bit integers in a and b, and store packed maximum values in dst.
+///
+/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm512_max_epu8&expand=3636)
+#[inline]
+#[target_feature(enable = "avx512bw")]
+#[cfg_attr(test, assert_instr(vpmaxub))]
+pub unsafe fn _mm512_max_epu8(a: __m512i, b: __m512i) -> __m512i {
+ transmute(vpmaxub(a.as_u8x64(), b.as_u8x64()))
+}
+
+/// Compare packed unsigned 8-bit integers in a and b, and store packed maximum values in dst using writemask k (elements are copied from src when the corresponding mask bit is not set).
+///
+/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm512_mask_max_epu8&expand=3634)
+#[inline]
+#[target_feature(enable = "avx512bw")]
+#[cfg_attr(test, assert_instr(vpmaxub))]
+pub unsafe fn _mm512_mask_max_epu8(src: __m512i, k: __mmask64, a: __m512i, b: __m512i) -> __m512i {
+ let max = _mm512_max_epu8(a, b).as_u8x64();
+ transmute(simd_select_bitmask(k, max, src.as_u8x64()))
+}
+
+/// Compare packed unsigned 8-bit integers in a and b, and store packed maximum values in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
+///
+/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm512_maskz_max_epu8&expand=3635)
+#[inline]
+#[target_feature(enable = "avx512bw")]
+#[cfg_attr(test, assert_instr(vpmaxub))]
+pub unsafe fn _mm512_maskz_max_epu8(k: __mmask64, a: __m512i, b: __m512i) -> __m512i {
+ let max = _mm512_max_epu8(a, b).as_u8x64();
+ let zero = _mm512_setzero_si512().as_u8x64();
+ transmute(simd_select_bitmask(k, max, zero))
+}
+
+/// Compare packed signed 16-bit integers in a and b, and store packed maximum values in dst.
+///
+/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm512_max_epi16&expand=3573)
+#[inline]
+#[target_feature(enable = "avx512bw")]
+#[cfg_attr(test, assert_instr(vpmaxsw))]
+pub unsafe fn _mm512_max_epi16(a: __m512i, b: __m512i) -> __m512i {
+ transmute(vpmaxsw(a.as_i16x32(), b.as_i16x32()))
+}
+
+/// Compare packed signed 16-bit integers in a and b, and store packed maximum values in dst using writemask k (elements are copied from src when the corresponding mask bit is not set).
+///
+/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm512_mask_max_epi16&expand=3571)
+#[inline]
+#[target_feature(enable = "avx512bw")]
+#[cfg_attr(test, assert_instr(vpmaxsw))]
+pub unsafe fn _mm512_mask_max_epi16(src: __m512i, k: __mmask32, a: __m512i, b: __m512i) -> __m512i {
+ let max = _mm512_max_epi16(a, b).as_i16x32();
+ transmute(simd_select_bitmask(k, max, src.as_i16x32()))
+}
+
+/// Compare packed signed 16-bit integers in a and b, and store packed maximum values in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
+///
+/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm512_maskz_max_epi16&expand=3572)
+#[inline]
+#[target_feature(enable = "avx512bw")]
+#[cfg_attr(test, assert_instr(vpmaxsw))]
+pub unsafe fn _mm512_maskz_max_epi16(k: __mmask32, a: __m512i, b: __m512i) -> __m512i {
+ let max = _mm512_max_epi16(a, b).as_i16x32();
+ let zero = _mm512_setzero_si512().as_i16x32();
+ transmute(simd_select_bitmask(k, max, zero))
+}
+
+/// Compare packed signed 8-bit integers in a and b, and store packed maximum values in dst.
+///
+/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm512_max_epi8&expand=3600)
+#[inline]
+#[target_feature(enable = "avx512bw")]
+#[cfg_attr(test, assert_instr(vpmaxsb))]
+pub unsafe fn _mm512_max_epi8(a: __m512i, b: __m512i) -> __m512i {
+ transmute(vpmaxsb(a.as_i8x64(), b.as_i8x64()))
+}
+
+/// Compare packed signed 8-bit integers in a and b, and store packed maximum values in dst using writemask k (elements are copied from src when the corresponding mask bit is not set).
+///
+/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm512_mask_max_epi8&expand=3598)
+#[inline]
+#[target_feature(enable = "avx512bw")]
+#[cfg_attr(test, assert_instr(vpmaxsb))]
+pub unsafe fn _mm512_mask_max_epi8(src: __m512i, k: __mmask64, a: __m512i, b: __m512i) -> __m512i {
+ let max = _mm512_max_epi8(a, b).as_i8x64();
+ transmute(simd_select_bitmask(k, max, src.as_i8x64()))
+}
+
+/// Compare packed signed 8-bit integers in a and b, and store packed maximum values in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
+///
+/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm512_maskz_max_epi8&expand=3599)
+#[inline]
+#[target_feature(enable = "avx512bw")]
+#[cfg_attr(test, assert_instr(vpmaxsb))]
+pub unsafe fn _mm512_maskz_max_epi8(k: __mmask64, a: __m512i, b: __m512i) -> __m512i {
+ let max = _mm512_max_epi8(a, b).as_i8x64();
+ let zero = _mm512_setzero_si512().as_i8x64();
+ transmute(simd_select_bitmask(k, max, zero))
+}
+
+/// Compare packed unsigned 16-bit integers in a and b, and store packed minimum values in dst.
+///
+/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm512_min_epu16&expand=3723)
+#[inline]
+#[target_feature(enable = "avx512bw")]
+#[cfg_attr(test, assert_instr(vpminuw))]
+pub unsafe fn _mm512_min_epu16(a: __m512i, b: __m512i) -> __m512i {
+ transmute(vpminuw(a.as_u16x32(), b.as_u16x32()))
+}
+
+/// Compare packed unsigned 16-bit integers in a and b, and store packed minimum values in dst using writemask k (elements are copied from src when the corresponding mask bit is not set).
+///
+/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm512_mask_min_epu16&expand=3721)
+#[inline]
+#[target_feature(enable = "avx512bw")]
+#[cfg_attr(test, assert_instr(vpminuw))]
+pub unsafe fn _mm512_mask_min_epu16(src: __m512i, k: __mmask32, a: __m512i, b: __m512i) -> __m512i {
+ let min = _mm512_min_epu16(a, b).as_u16x32();
+ transmute(simd_select_bitmask(k, min, src.as_u16x32()))
+}
+
+/// Compare packed unsigned 16-bit integers in a and b, and store packed minimum values in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
+///
+/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm512_maskz_min_epu16&expand=3722)
+#[inline]
+#[target_feature(enable = "avx512bw")]
+#[cfg_attr(test, assert_instr(vpminuw))]
+pub unsafe fn _mm512_maskz_min_epu16(k: __mmask32, a: __m512i, b: __m512i) -> __m512i {
+ let min = _mm512_min_epu16(a, b).as_u16x32();
+ let zero = _mm512_setzero_si512().as_u16x32();
+ transmute(simd_select_bitmask(k, min, zero))
+}
+
+/// Compare packed unsigned 8-bit integers in a and b, and store packed minimum values in dst.
+///
+/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm512_min_epu8&expand=3750)
+#[inline]
+#[target_feature(enable = "avx512bw")]
+#[cfg_attr(test, assert_instr(vpminub))]
+pub unsafe fn _mm512_min_epu8(a: __m512i, b: __m512i) -> __m512i {
+ transmute(vpminub(a.as_u8x64(), b.as_u8x64()))
+}
+
+/// Compare packed unsigned 8-bit integers in a and b, and store packed minimum values in dst using writemask k (elements are copied from src when the corresponding mask bit is not set).
+///
+/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm512_mask_min_epu8&expand=3748)
+#[inline]
+#[target_feature(enable = "avx512bw")]
+#[cfg_attr(test, assert_instr(vpminub))]
+pub unsafe fn _mm512_mask_min_epu8(src: __m512i, k: __mmask64, a: __m512i, b: __m512i) -> __m512i {
+ let min = _mm512_min_epu8(a, b).as_u8x64();
+ transmute(simd_select_bitmask(k, min, src.as_u8x64()))
+}
+
+/// Compare packed unsigned 8-bit integers in a and b, and store packed minimum values in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
+///
+/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm512_maskz_min_epu8&expand=3749)
+#[inline]
+#[target_feature(enable = "avx512bw")]
+#[cfg_attr(test, assert_instr(vpminub))]
+pub unsafe fn _mm512_maskz_min_epu8(k: __mmask64, a: __m512i, b: __m512i) -> __m512i {
+ let min = _mm512_min_epu8(a, b).as_u8x64();
+ let zero = _mm512_setzero_si512().as_u8x64();
+ transmute(simd_select_bitmask(k, min, zero))
+}
+
+/// Compare packed signed 16-bit integers in a and b, and store packed minimum values in dst.
+///
+/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm512_min_epi16&expand=3687)
+#[inline]
+#[target_feature(enable = "avx512bw")]
+#[cfg_attr(test, assert_instr(vpminsw))]
+pub unsafe fn _mm512_min_epi16(a: __m512i, b: __m512i) -> __m512i {
+ transmute(vpminsw(a.as_i16x32(), b.as_i16x32()))
+}
+
+/// Compare packed signed 16-bit integers in a and b, and store packed minimum values in dst using writemask k (elements are copied from src when the corresponding mask bit is not set).
+///
+/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm512_mask_min_epi16&expand=3685)
+#[inline]
+#[target_feature(enable = "avx512bw")]
+#[cfg_attr(test, assert_instr(vpminsw))]
+pub unsafe fn _mm512_mask_min_epi16(src: __m512i, k: __mmask32, a: __m512i, b: __m512i) -> __m512i {
+ let min = _mm512_min_epi16(a, b).as_i16x32();
+ transmute(simd_select_bitmask(k, min, src.as_i16x32()))
+}
+
+/// Compare packed signed 16-bit integers in a and b, and store packed minimum values in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
+///
+/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm512_maskz_min_epi16&expand=3686)
+#[inline]
+#[target_feature(enable = "avx512bw")]
+#[cfg_attr(test, assert_instr(vpminsw))]
+pub unsafe fn _mm512_maskz_min_epi16(k: __mmask32, a: __m512i, b: __m512i) -> __m512i {
+ let min = _mm512_min_epi16(a, b).as_i16x32();
+ let zero = _mm512_setzero_si512().as_i16x32();
+ transmute(simd_select_bitmask(k, min, zero))
+}
+
+/// Compare packed signed 8-bit integers in a and b, and store packed minimum values in dst.
+///
+/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm512_min_epi8&expand=3714)
+#[inline]
+#[target_feature(enable = "avx512bw")]
+#[cfg_attr(test, assert_instr(vpminsb))]
+pub unsafe fn _mm512_min_epi8(a: __m512i, b: __m512i) -> __m512i {
+ transmute(vpminsb(a.as_i8x64(), b.as_i8x64()))
+}
+
+/// Compare packed signed 8-bit integers in a and b, and store packed minimum values in dst using writemask k (elements are copied from src when the corresponding mask bit is not set).
+///
+/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm512_mask_min_epi8&expand=3712)
+#[inline]
+#[target_feature(enable = "avx512bw")]
+#[cfg_attr(test, assert_instr(vpminsb))]
+pub unsafe fn _mm512_mask_min_epi8(src: __m512i, k: __mmask64, a: __m512i, b: __m512i) -> __m512i {
+ let min = _mm512_min_epi8(a, b).as_i8x64();
+ transmute(simd_select_bitmask(k, min, src.as_i8x64()))
+}
+
+/// Compare packed signed 8-bit integers in a and b, and store packed minimum values in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
+///
+/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm512_maskz_min_epi8&expand=3713)
+#[inline]
+#[target_feature(enable = "avx512bw")]
+#[cfg_attr(test, assert_instr(vpminsb))]
+pub unsafe fn _mm512_maskz_min_epi8(k: __mmask64, a: __m512i, b: __m512i) -> __m512i {
+ let min = _mm512_min_epi8(a, b).as_i8x64();
+ let zero = _mm512_setzero_si512().as_i8x64();
+ transmute(simd_select_bitmask(k, min, zero))
+}
+
+/// Compare packed unsigned 16-bit integers in a and b for less-than, and store the results in mask vector k.
+///
+/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=512_cmplt_epu16_mask&expand=1050)
+#[inline]
+#[target_feature(enable = "avx512bw")]
+#[cfg_attr(test, assert_instr(vpcmp))]
+pub unsafe fn _mm512_cmplt_epu16_mask(a: __m512i, b: __m512i) -> __mmask32 {
+ simd_bitmask::(simd_lt(a.as_u16x32(), b.as_u16x32()))
+}
+
+/// Compare packed unsigned 16-bit integers in a and b for less-than, and store the results in mask vector k using zeromask k1 (elements are zeroed out when the corresponding mask bit is not set).
+///
+/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm512_mask_cmplt_epu16_mask&expand=1051)
+#[inline]
+#[target_feature(enable = "avx512bw")]
+#[cfg_attr(test, assert_instr(vpcmp))]
+pub unsafe fn _mm512_mask_cmplt_epu16_mask(k1: __mmask32, a: __m512i, b: __m512i) -> __mmask32 {
+ _mm512_cmplt_epu16_mask(a, b) & k1
+}
+
+/// Compare packed unsigned 8-bit integers in a and b for less-than, and store the results in mask vector k.
+///
+/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=mm512_cmplt_epu8_mask&expand=1068)
+#[inline]
+#[target_feature(enable = "avx512bw")]
+#[cfg_attr(test, assert_instr(vpcmp))]
+pub unsafe fn _mm512_cmplt_epu8_mask(a: __m512i, b: __m512i) -> __mmask64 {
+ simd_bitmask::(simd_lt(a.as_u8x64(), b.as_u8x64()))
+}
+
+/// Compare packed unsigned 8-bit integers in a and b for less-than, and store the results in mask vector k using zeromask k1 (elements are zeroed out when the corresponding mask bit is not set).
+///
+/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm512_mask_cmplt_epu8_mask&expand=1069)
+#[inline]
+#[target_feature(enable = "avx512bw")]
+#[cfg_attr(test, assert_instr(vpcmp))]
+pub unsafe fn _mm512_mask_cmplt_epu8_mask(k1: __mmask64, a: __m512i, b: __m512i) -> __mmask64 {
+ _mm512_cmplt_epu8_mask(a, b) & k1
+}
+
+/// Compare packed signed 16-bit integers in a and b for less-than, and store the results in mask vector k.
+///
+/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm512_cmplt_epi16_mask&expand=1022)
+#[inline]
+#[target_feature(enable = "avx512bw")]
+#[cfg_attr(test, assert_instr(vpcmp))]
+pub unsafe fn _mm512_cmplt_epi16_mask(a: __m512i, b: __m512i) -> __mmask32 {
+ simd_bitmask::(simd_lt(a.as_i16x32(), b.as_i16x32()))
+}
+
+/// Compare packed signed 16-bit integers in a and b for less-than, and store the results in mask vector k using zeromask k1 (elements are zeroed out when the corresponding mask bit is not set).
+///
+/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm512_mask_cmplt_epi16_mask&expand=1023)
+#[inline]
+#[target_feature(enable = "avx512bw")]
+#[cfg_attr(test, assert_instr(vpcmp))]
+pub unsafe fn _mm512_mask_cmplt_epi16_mask(k1: __mmask32, a: __m512i, b: __m512i) -> __mmask32 {
+ _mm512_cmplt_epi16_mask(a, b) & k1
+}
+
+/// Compare packed signed 8-bit integers in a and b for less-than, and store the results in mask vector k.
+///
+/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm512_cmplt_epi8_mask&expand=1044)
+#[inline]
+#[target_feature(enable = "avx512bw")]
+#[cfg_attr(test, assert_instr(vpcmp))]
+pub unsafe fn _mm512_cmplt_epi8_mask(a: __m512i, b: __m512i) -> __mmask64 {
+ simd_bitmask::(simd_lt(a.as_i8x64(), b.as_i8x64()))
+}
+
+/// Compare packed signed 8-bit integers in a and b for less-than, and store the results in mask vector k using zeromask k1 (elements are zeroed out when the corresponding mask bit is not set).
+///
+/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm512_mask_cmplt_epi8_mask&expand=1045)
+#[inline]
+#[target_feature(enable = "avx512bw")]
+#[cfg_attr(test, assert_instr(vpcmp))]
+pub unsafe fn _mm512_mask_cmplt_epi8_mask(k1: __mmask64, a: __m512i, b: __m512i) -> __mmask64 {
+ _mm512_cmplt_epi8_mask(a, b) & k1
+}
+
+/// Compare packed unsigned 16-bit integers in a and b for greater-than, and store the results in mask vector k.
+///
+/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm512_cmpgt_epu16_mask&expand=927)
+#[inline]
+#[target_feature(enable = "avx512bw")]
+#[cfg_attr(test, assert_instr(vpcmp))]
+pub unsafe fn _mm512_cmpgt_epu16_mask(a: __m512i, b: __m512i) -> __mmask32 {
+ simd_bitmask::(simd_gt(a.as_u16x32(), b.as_u16x32()))
+}
+
+/// Compare packed unsigned 16-bit integers in a and b for greater-than, and store the results in mask vector k using zeromask k1 (elements are zeroed out when the corresponding mask bit is not set).
+///
+/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm512_mask_cmpgt_epu16_mask&expand=928)
+#[inline]
+#[target_feature(enable = "avx512bw")]
+#[cfg_attr(test, assert_instr(vpcmp))]
+pub unsafe fn _mm512_mask_cmpgt_epu16_mask(k1: __mmask32, a: __m512i, b: __m512i) -> __mmask32 {
+ _mm512_cmpgt_epu16_mask(a, b) & k1
+}
+
+/// Compare packed unsigned 8-bit integers in a and b for greater-than, and store the results in mask vector k.
+///
+/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm512_cmpgt_epu8_mask&expand=945)
+#[inline]
+#[target_feature(enable = "avx512bw")]
+#[cfg_attr(test, assert_instr(vpcmp))]
+pub unsafe fn _mm512_cmpgt_epu8_mask(a: __m512i, b: __m512i) -> __mmask64 {
+ simd_bitmask::(simd_gt(a.as_u8x64(), b.as_u8x64()))
+}
+
+/// Compare packed unsigned 8-bit integers in a and b for greater-than, and store the results in mask vector k using zeromask k1 (elements are zeroed out when the corresponding mask bit is not set).
+///
+/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm512_mask_cmpgt_epu8_mask&expand=946)
+#[inline]
+#[target_feature(enable = "avx512bw")]
+#[cfg_attr(test, assert_instr(vpcmp))]
+pub unsafe fn _mm512_mask_cmpgt_epu8_mask(k1: __mmask64, a: __m512i, b: __m512i) -> __mmask64 {
+ _mm512_cmpgt_epu8_mask(a, b) & k1
+}
+
+/// Compare packed signed 16-bit integers in a and b for greater-than, and store the results in mask vector k.
+///
+/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm512_cmpgt_epi16_mask&expand=897)
+#[inline]
+#[target_feature(enable = "avx512bw")]
+#[cfg_attr(test, assert_instr(vpcmp))]
+pub unsafe fn _mm512_cmpgt_epi16_mask(a: __m512i, b: __m512i) -> __mmask32 {
+ simd_bitmask::(simd_gt(a.as_i16x32(), b.as_i16x32()))
+}
+
+/// Compare packed signed 16-bit integers in a and b for greater-than, and store the results in mask vector k using zeromask k1 (elements are zeroed out when the corresponding mask bit is not set).
+///
+/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm512_mask_cmpgt_epi16_mask&expand=898)
+#[inline]
+#[target_feature(enable = "avx512bw")]
+#[cfg_attr(test, assert_instr(vpcmp))]
+pub unsafe fn _mm512_mask_cmpgt_epi16_mask(k1: __mmask32, a: __m512i, b: __m512i) -> __mmask32 {
+ _mm512_cmpgt_epi16_mask(a, b) & k1
+}
+
+/// Compare packed signed 8-bit integers in a and b for greater-than, and store the results in mask vector k.
+///
+/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm512_cmpgt_epi8_mask&expand=921)
+#[inline]
+#[target_feature(enable = "avx512bw")]
+#[cfg_attr(test, assert_instr(vpcmp))]
+pub unsafe fn _mm512_cmpgt_epi8_mask(a: __m512i, b: __m512i) -> __mmask64 {
+ simd_bitmask::(simd_gt(a.as_i8x64(), b.as_i8x64()))
+}
+
+/// Compare packed signed 8-bit integers in a and b for greater-than, and store the results in mask vector k using zeromask k1 (elements are zeroed out when the corresponding mask bit is not set).
+///
+/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm512_mask_cmpgt_epi8_mask&expand=922)
+#[inline]
+#[target_feature(enable = "avx512bw")]
+#[cfg_attr(test, assert_instr(vpcmp))]
+pub unsafe fn _mm512_mask_cmpgt_epi8_mask(k1: __mmask64, a: __m512i, b: __m512i) -> __mmask64 {
+ _mm512_cmpgt_epi8_mask(a, b) & k1
+}
+
+/// Compare packed unsigned 16-bit integers in a and b for less-than-or-equal, and store the results in mask vector k.
+///
+/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm512_cmple_epu16_mask&expand=989)
+#[inline]
+#[target_feature(enable = "avx512bw")]
+#[cfg_attr(test, assert_instr(vpcmp))]
+pub unsafe fn _mm512_cmple_epu16_mask(a: __m512i, b: __m512i) -> __mmask32 {
+ simd_bitmask::(simd_le(a.as_u16x32(), b.as_u16x32()))
+}
+
+/// Compare packed unsigned 16-bit integers in a and b for less-than-or-equal, and store the results in mask vector k using zeromask k1 (elements are zeroed out when the corresponding mask bit is not set).
+///
+/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm512_mask_cmple_epu16_mask&expand=990)
+#[inline]
+#[target_feature(enable = "avx512bw")]
+#[cfg_attr(test, assert_instr(vpcmp))]
+pub unsafe fn _mm512_mask_cmple_epu16_mask(k1: __mmask32, a: __m512i, b: __m512i) -> __mmask32 {
+ _mm512_cmple_epu16_mask(a, b) & k1
+}
+
+/// Compare packed unsigned 8-bit integers in a and b for less-than-or-equal, and store the results in mask vector k.
+///
+/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm512_cmple_epu8_mask&expand=1007)
+#[inline]
+#[target_feature(enable = "avx512bw")]
+#[cfg_attr(test, assert_instr(vpcmp))]
+pub unsafe fn _mm512_cmple_epu8_mask(a: __m512i, b: __m512i) -> __mmask64 {
+ simd_bitmask::(simd_le(a.as_u8x64(), b.as_u8x64()))
+}
+
+/// Compare packed unsigned 8-bit integers in a and b for less-than-or-equal, and store the results in mask vector k using zeromask k1 (elements are zeroed out when the corresponding mask bit is not set).
+///
+/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm512_mask_cmple_epu8_mask&expand=1008)
+#[inline]
+#[target_feature(enable = "avx512bw")]
+#[cfg_attr(test, assert_instr(vpcmp))]
+pub unsafe fn _mm512_mask_cmple_epu8_mask(k1: __mmask64, a: __m512i, b: __m512i) -> __mmask64 {
+ _mm512_cmple_epu8_mask(a, b) & k1
+}
+
+/// Compare packed signed 16-bit integers in a and b for less-than-or-equal, and store the results in mask vector k.
+///
+/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm512_cmple_epi16_mask&expand=965)
+#[inline]
+#[target_feature(enable = "avx512bw")]
+#[cfg_attr(test, assert_instr(vpcmp))]
+pub unsafe fn _mm512_cmple_epi16_mask(a: __m512i, b: __m512i) -> __mmask32 {
+ simd_bitmask::(simd_le(a.as_i16x32(), b.as_i16x32()))
+}
+
+/// Compare packed signed 16-bit integers in a and b for less-than-or-equal, and store the results in mask vector k using zeromask k1 (elements are zeroed out when the corresponding mask bit is not set).
+///
+/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm512_mask_cmple_epi16_mask&expand=966)
+#[inline]
+#[target_feature(enable = "avx512bw")]
+#[cfg_attr(test, assert_instr(vpcmp))]
+pub unsafe fn _mm512_mask_cmple_epi16_mask(k1: __mmask32, a: __m512i, b: __m512i) -> __mmask32 {
+ _mm512_cmple_epi16_mask(a, b) & k1
+}
+
+/// Compare packed signed 8-bit integers in a and b for less-than-or-equal, and store the results in mask vector k.
+///
+/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm512_cmple_epi8_mask&expand=983)
+#[inline]
+#[target_feature(enable = "avx512bw")]
+#[cfg_attr(test, assert_instr(vpcmp))]
+pub unsafe fn _mm512_cmple_epi8_mask(a: __m512i, b: __m512i) -> __mmask64 {
+ simd_bitmask::(simd_le(a.as_i8x64(), b.as_i8x64()))
+}
+
+/// Compare packed signed 8-bit integers in a and b for less-than-or-equal, and store the results in mask vector k using zeromask k1 (elements are zeroed out when the corresponding mask bit is not set).
+///
+/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm512_mask_cmple_epi8_mask&expand=984)
+#[inline]
+#[target_feature(enable = "avx512bw")]
+#[cfg_attr(test, assert_instr(vpcmp))]
+pub unsafe fn _mm512_mask_cmple_epi8_mask(k1: __mmask64, a: __m512i, b: __m512i) -> __mmask64 {
+ _mm512_cmple_epi8_mask(a, b) & k1
+}
+
+/// Compare packed unsigned 16-bit integers in a and b for greater-than-or-equal, and store the results in mask vector k.
+///
+/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm512_cmpge_epu16_mask&expand=867)
+#[inline]
+#[target_feature(enable = "avx512bw")]
+#[cfg_attr(test, assert_instr(vpcmp))]
+pub unsafe fn _mm512_cmpge_epu16_mask(a: __m512i, b: __m512i) -> __mmask32 {
+ simd_bitmask::(simd_ge(a.as_u16x32(), b.as_u16x32()))
+}
+
+/// Compare packed unsigned 16-bit integers in a and b for greater-than-or-equal, and store the results in mask vector k using zeromask k1 (elements are zeroed out when the corresponding mask bit is not set).
+///
+/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm512_mask_cmpge_epu16_mask&expand=868)
+#[inline]
+#[target_feature(enable = "avx512bw")]
+#[cfg_attr(test, assert_instr(vpcmp))]
+pub unsafe fn _mm512_mask_cmpge_epu16_mask(k1: __mmask32, a: __m512i, b: __m512i) -> __mmask32 {
+ _mm512_cmpge_epu16_mask(a, b) & k1
+}
+
+/// Compare packed unsigned 8-bit integers in a and b for greater-than-or-equal, and store the results in mask vector k.
+///
+/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm512_cmpge_epu8_mask&expand=885)
+#[inline]
+#[target_feature(enable = "avx512bw")]
+#[cfg_attr(test, assert_instr(vpcmp))]
+pub unsafe fn _mm512_cmpge_epu8_mask(a: __m512i, b: __m512i) -> __mmask64 {
+ simd_bitmask::(simd_ge(a.as_u8x64(), b.as_u8x64()))
+}
+
+/// Compare packed unsigned 8-bit integers in a and b for greater-than-or-equal, and store the results in mask vector k using zeromask k1 (elements are zeroed out when the corresponding mask bit is not set).
+///
+/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm512_mask_cmpge_epu8_mask&expand=886)
+#[inline]
+#[target_feature(enable = "avx512bw")]
+#[cfg_attr(test, assert_instr(vpcmp))]
+pub unsafe fn _mm512_mask_cmpge_epu8_mask(k1: __mmask64, a: __m512i, b: __m512i) -> __mmask64 {
+ _mm512_cmpge_epu8_mask(a, b) & k1
+}
+
+/// Compare packed signed 16-bit integers in a and b for greater-than-or-equal, and store the results in mask vector k.
+///
+/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm512_cmpge_epi16_mask&expand=843)
+#[inline]
+#[target_feature(enable = "avx512bw")]
+#[cfg_attr(test, assert_instr(vpcmp))]
+pub unsafe fn _mm512_cmpge_epi16_mask(a: __m512i, b: __m512i) -> __mmask32 {
+ simd_bitmask::(simd_ge(a.as_i16x32(), b.as_i16x32()))
+}
+
+/// Compare packed signed 16-bit integers in a and b for greater-than-or-equal, and store the results in mask vector k using zeromask k1 (elements are zeroed out when the corresponding mask bit is not set).
+///
+/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm512_mask_cmpge_epi16_mask&expand=844)
+#[inline]
+#[target_feature(enable = "avx512bw")]
+#[cfg_attr(test, assert_instr(vpcmp))]
+pub unsafe fn _mm512_mask_cmpge_epi16_mask(k1: __mmask32, a: __m512i, b: __m512i) -> __mmask32 {
+ _mm512_cmpge_epi16_mask(a, b) & k1
+}
+
+/// Compare packed signed 8-bit integers in a and b for greater-than-or-equal, and store the results in mask vector k.
+///
+/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm512_cmpge_epi8_mask&expand=861)
+#[inline]
+#[target_feature(enable = "avx512bw")]
+#[cfg_attr(test, assert_instr(vpcmp))]
+pub unsafe fn _mm512_cmpge_epi8_mask(a: __m512i, b: __m512i) -> __mmask64 {
+ simd_bitmask::(simd_ge(a.as_i8x64(), b.as_i8x64()))
+}
+
+/// Compare packed signed 8-bit integers in a and b for greater-than-or-equal, and store the results in mask vector k using zeromask k1 (elements are zeroed out when the corresponding mask bit is not set).
+///
+/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm512_mask_cmpge_epi8_mask&expand=862)
+#[inline]
+#[target_feature(enable = "avx512bw")]
+#[cfg_attr(test, assert_instr(vpcmp))]
+pub unsafe fn _mm512_mask_cmpge_epi8_mask(k1: __mmask64, a: __m512i, b: __m512i) -> __mmask64 {
+ _mm512_cmpge_epi8_mask(a, b) & k1
+}
+
+/// Compare packed unsigned 16-bit integers in a and b for equality, and store the results in mask vector k.
+///
+/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm512_cmpeq_epu16_mask&expand=801)
+#[inline]
+#[target_feature(enable = "avx512bw")]
+#[cfg_attr(test, assert_instr(vpcmp))]
+pub unsafe fn _mm512_cmpeq_epu16_mask(a: __m512i, b: __m512i) -> __mmask32 {
+ simd_bitmask::(simd_eq(a.as_u16x32(), b.as_u16x32()))
+}
+
+/// Compare packed unsigned 16-bit integers in a and b for equality, and store the results in mask vector k using zeromask k1 (elements are zeroed out when the corresponding mask bit is not set).
+///
+/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm512_mask_cmpeq_epu16_mask&expand=802)
+#[inline]
+#[target_feature(enable = "avx512bw")]
+#[cfg_attr(test, assert_instr(vpcmp))]
+pub unsafe fn _mm512_mask_cmpeq_epu16_mask(k1: __mmask32, a: __m512i, b: __m512i) -> __mmask32 {
+ _mm512_cmpeq_epu16_mask(a, b) & k1
+}
+
+/// Compare packed unsigned 8-bit integers in a and b for equality, and store the results in mask vector k.
+///
+/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm512_cmpeq_epu8_mask&expand=819)
+#[inline]
+#[target_feature(enable = "avx512bw")]
+#[cfg_attr(test, assert_instr(vpcmp))]
+pub unsafe fn _mm512_cmpeq_epu8_mask(a: __m512i, b: __m512i) -> __mmask64 {
+ simd_bitmask::(simd_eq(a.as_u8x64(), b.as_u8x64()))
+}
+
+/// Compare packed unsigned 8-bit integers in a and b for equality, and store the results in mask vector k using zeromask k1 (elements are zeroed out when the corresponding mask bit is not set).
+///
+/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm512_mask_cmpeq_epu8_mask&expand=820)
+#[inline]
+#[target_feature(enable = "avx512bw")]
+#[cfg_attr(test, assert_instr(vpcmp))]
+pub unsafe fn _mm512_mask_cmpeq_epu8_mask(k1: __mmask64, a: __m512i, b: __m512i) -> __mmask64 {
+ _mm512_cmpeq_epu8_mask(a, b) & k1
+}
+
+/// Compare packed signed 16-bit integers in a and b for equality, and store the results in mask vector k.
+///
+/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm512_cmpeq_epi16_mask&expand=771)
+#[inline]
+#[target_feature(enable = "avx512bw")]
+#[cfg_attr(test, assert_instr(vpcmp))]
+pub unsafe fn _mm512_cmpeq_epi16_mask(a: __m512i, b: __m512i) -> __mmask32 {
+ simd_bitmask::(simd_eq(a.as_i16x32(), b.as_i16x32()))
+}
+
+/// Compare packed signed 16-bit integers in a and b for equality, and store the results in mask vector k using zeromask k1 (elements are zeroed out when the corresponding mask bit is not set).
+///
+/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm512_mask_cmpeq_epi16_mask&expand=772)
+#[inline]
+#[target_feature(enable = "avx512bw")]
+#[cfg_attr(test, assert_instr(vpcmp))]
+pub unsafe fn _mm512_mask_cmpeq_epi16_mask(k1: __mmask32, a: __m512i, b: __m512i) -> __mmask32 {
+ _mm512_cmpeq_epi16_mask(a, b) & k1
+}
+
+/// Compare packed signed 8-bit integers in a and b for equality, and store the results in mask vector k.
+///
+/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm512_cmpeq_epi8_mask&expand=795)
+#[inline]
+#[target_feature(enable = "avx512bw")]
+#[cfg_attr(test, assert_instr(vpcmp))]
+pub unsafe fn _mm512_cmpeq_epi8_mask(a: __m512i, b: __m512i) -> __mmask64 {
+ simd_bitmask::(simd_eq(a.as_i8x64(), b.as_i8x64()))
+}
+
+/// Compare packed signed 8-bit integers in a and b for equality, and store the results in mask vector k using zeromask k1 (elements are zeroed out when the corresponding mask bit is not set).
+///
+/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm512_mask_cmpeq_epi8_mask&expand=796)
+#[inline]
+#[target_feature(enable = "avx512bw")]
+#[cfg_attr(test, assert_instr(vpcmp))]
+pub unsafe fn _mm512_mask_cmpeq_epi8_mask(k1: __mmask64, a: __m512i, b: __m512i) -> __mmask64 {
+ _mm512_cmpeq_epi8_mask(a, b) & k1
+}
+
+/// Compare packed unsigned 16-bit integers in a and b for not-equal, and store the results in mask vector k.
+///
+/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm512_cmpneq_epu16_mask&expand=1106)
+#[inline]
+#[target_feature(enable = "avx512bw")]
+#[cfg_attr(test, assert_instr(vpcmp))]
+pub unsafe fn _mm512_cmpneq_epu16_mask(a: __m512i, b: __m512i) -> __mmask32 {
+ simd_bitmask::(simd_ne(a.as_u16x32(), b.as_u16x32()))
+}
+
+/// Compare packed unsigned 16-bit integers in a and b for not-equal, and store the results in mask vector k using zeromask k1 (elements are zeroed out when the corresponding mask bit is not set).
+///
+/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm512_mask_cmpneq_epu16_mask&expand=1107)
+#[inline]
+#[target_feature(enable = "avx512bw")]
+#[cfg_attr(test, assert_instr(vpcmp))]
+pub unsafe fn _mm512_mask_cmpneq_epu16_mask(k1: __mmask32, a: __m512i, b: __m512i) -> __mmask32 {
+ _mm512_cmpneq_epu16_mask(a, b) & k1
+}
+
+/// Compare packed unsigned 8-bit integers in a and b for not-equal, and store the results in mask vector k.
+///
+/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm512_cmpneq_epu8_mask&expand=1124)
+#[inline]
+#[target_feature(enable = "avx512bw")]
+#[cfg_attr(test, assert_instr(vpcmp))]
+pub unsafe fn _mm512_cmpneq_epu8_mask(a: __m512i, b: __m512i) -> __mmask64 {
+ simd_bitmask::(simd_ne(a.as_u8x64(), b.as_u8x64()))
+}
+
+/// Compare packed unsigned 8-bit integers in a and b for not-equal, and store the results in mask vector k using zeromask k1 (elements are zeroed out when the corresponding mask bit is not set).
+///
+/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm512_mask_cmpneq_epu8_mask&expand=1125)
+#[inline]
+#[target_feature(enable = "avx512bw")]
+#[cfg_attr(test, assert_instr(vpcmp))]
+pub unsafe fn _mm512_mask_cmpneq_epu8_mask(k1: __mmask64, a: __m512i, b: __m512i) -> __mmask64 {
+ _mm512_cmpneq_epu8_mask(a, b) & k1
+}
+
+/// Compare packed signed 16-bit integers in a and b for not-equal, and store the results in mask vector k.
+///
+/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm512_cmpneq_epi16_mask&expand=1082)
+#[inline]
+#[target_feature(enable = "avx512bw")]
+#[cfg_attr(test, assert_instr(vpcmp))]
+pub unsafe fn _mm512_cmpneq_epi16_mask(a: __m512i, b: __m512i) -> __mmask32 {
+ simd_bitmask::(simd_ne(a.as_i16x32(), b.as_i16x32()))
+}
+
+/// Compare packed signed 16-bit integers in a and b for not-equal, and store the results in mask vector k using zeromask k1 (elements are zeroed out when the corresponding mask bit is not set).
+///
+/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm512_mask_cmpneq_epi16_mask&expand=1083)
+#[inline]
+#[target_feature(enable = "avx512bw")]
+#[cfg_attr(test, assert_instr(vpcmp))]
+pub unsafe fn _mm512_mask_cmpneq_epi16_mask(k1: __mmask32, a: __m512i, b: __m512i) -> __mmask32 {
+ _mm512_cmpneq_epi16_mask(a, b) & k1
+}
+
+/// Compare packed signed 8-bit integers in a and b for not-equal, and store the results in mask vector k.
+///
+/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm512_cmpneq_epi8_mask&expand=1100)
+#[inline]
+#[target_feature(enable = "avx512bw")]
+#[cfg_attr(test, assert_instr(vpcmp))]
+pub unsafe fn _mm512_cmpneq_epi8_mask(a: __m512i, b: __m512i) -> __mmask64 {
+ simd_bitmask::(simd_ne(a.as_i8x64(), b.as_i8x64()))
+}
+
+/// Compare packed signed 8-bit integers in a and b for not-equal, and store the results in mask vector k using zeromask k1 (elements are zeroed out when the corresponding mask bit is not set).
+///
+/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm512_mask_cmpneq_epi8_mask&expand=1101)
+#[inline]
+#[target_feature(enable = "avx512bw")]
+#[cfg_attr(test, assert_instr(vpcmp))]
+pub unsafe fn _mm512_mask_cmpneq_epi8_mask(k1: __mmask64, a: __m512i, b: __m512i) -> __mmask64 {
+ _mm512_cmpneq_epi8_mask(a, b) & k1
+}
+
+/// Compare packed unsigned 16-bit integers in a and b based on the comparison operand specified by imm8, and store the results in mask vector k.
+///
+/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm512_cmp_epu16_mask&expand=715)
+#[inline]
+#[target_feature(enable = "avx512bw")]
+#[rustc_args_required_const(2)]
+#[cfg_attr(test, assert_instr(vpcmp, imm8 = 0))]
+pub unsafe fn _mm512_cmp_epu16_mask(a: __m512i, b: __m512i, imm8: i32) -> __mmask32 {
+ macro_rules! call {
+ ($imm3:expr) => {
+ vpcmpuw(
+ a.as_u16x32(),
+ b.as_u16x32(),
+ $imm3,
+ 0b11111111_11111111_11111111_11111111,
+ )
+ };
+ }
+ let r = constify_imm3!(imm8, call);
+ transmute(r)
+}
+
+/// Compare packed unsigned 16-bit integers in a and b based on the comparison operand specified by imm8, and store the results in mask vector k using zeromask k1 (elements are zeroed out when the corresponding mask bit is not set).
+///
+/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm512_mask_cmp_epu16_mask&expand=716)
+#[inline]
+#[target_feature(enable = "avx512bw")]
+#[rustc_args_required_const(3)]
+#[cfg_attr(test, assert_instr(vpcmp, imm8 = 0))]
+pub unsafe fn _mm512_mask_cmp_epu16_mask(
+ k1: __mmask32,
+ a: __m512i,
+ b: __m512i,
+ imm8: i32,
+) -> __mmask32 {
+ macro_rules! call {
+ ($imm3:expr) => {
+ vpcmpuw(a.as_u16x32(), b.as_u16x32(), $imm3, k1)
+ };
+ }
+ let r = constify_imm3!(imm8, call);
+ transmute(r)
+}
+
+/// Compare packed unsigned 8-bit integers in a and b based on the comparison operand specified by imm8, and store the results in mask vector k.
+///
+/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm512_cmp_epu8_mask&expand=733)
+#[inline]
+#[target_feature(enable = "avx512bw")]
+#[rustc_args_required_const(2)]
+#[cfg_attr(test, assert_instr(vpcmp, imm8 = 0))]
+pub unsafe fn _mm512_cmp_epu8_mask(a: __m512i, b: __m512i, imm8: i32) -> __mmask64 {
+ macro_rules! call {
+ ($imm3:expr) => {
+ vpcmpub(
+ a.as_u8x64(),
+ b.as_u8x64(),
+ $imm3,
+ 0b11111111_11111111_11111111_11111111_11111111_11111111_11111111_11111111,
+ )
+ };
+ }
+ let r = constify_imm3!(imm8, call);
+ transmute(r)
+}
+
+/// Compare packed unsigned 8-bit integers in a and b based on the comparison operand specified by imm8, and store the results in mask vector k using zeromask k1 (elements are zeroed out when the corresponding mask bit is not set).
+///
+/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm512_mask_cmp_epu8_mask&expand=734)
+#[inline]
+#[target_feature(enable = "avx512bw")]
+#[rustc_args_required_const(3)]
+#[cfg_attr(test, assert_instr(vpcmp, imm8 = 0))]
+pub unsafe fn _mm512_mask_cmp_epu8_mask(
+ k1: __mmask64,
+ a: __m512i,
+ b: __m512i,
+ imm8: i32,
+) -> __mmask64 {
+ macro_rules! call {
+ ($imm3:expr) => {
+ vpcmpub(a.as_u8x64(), b.as_u8x64(), $imm3, k1)
+ };
+ }
+ let r = constify_imm3!(imm8, call);
+ transmute(r)
+}
+
+/// Compare packed signed 16-bit integers in a and b based on the comparison operand specified by imm8, and store the results in mask vector k.
+///
+/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm512_cmp_epi16_mask&expand=691)
+#[inline]
+#[target_feature(enable = "avx512bw")]
+#[rustc_args_required_const(2)]
+#[cfg_attr(test, assert_instr(vpcmp, imm8 = 0))]
+pub unsafe fn _mm512_cmp_epi16_mask(a: __m512i, b: __m512i, imm8: i32) -> __mmask32 {
+ macro_rules! call {
+ ($imm3:expr) => {
+ vpcmpw(
+ a.as_i16x32(),
+ b.as_i16x32(),
+ $imm3,
+ 0b11111111_11111111_11111111_11111111,
+ )
+ };
+ }
+ let r = constify_imm3!(imm8, call);
+ transmute(r)
+}
+
+/// Compare packed signed 16-bit integers in a and b based on the comparison operand specified by imm8, and store the results in mask vector k using zeromask k1 (elements are zeroed out when the corresponding mask bit is not set).
+///
+/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm512_mask_cmp_epi16_mask&expand=692)
+#[inline]
+#[target_feature(enable = "avx512bw")]
+#[rustc_args_required_const(3)]
+#[cfg_attr(test, assert_instr(vpcmp, imm8 = 0))]
+pub unsafe fn _mm512_mask_cmp_epi16_mask(
+ k1: __mmask32,
+ a: __m512i,
+ b: __m512i,
+ imm8: i32,
+) -> __mmask32 {
+ macro_rules! call {
+ ($imm3:expr) => {
+ vpcmpw(a.as_i16x32(), b.as_i16x32(), $imm3, k1)
+ };
+ }
+ let r = constify_imm3!(imm8, call);
+ transmute(r)
+}
+
+/// Compare packed signed 8-bit integers in a and b based on the comparison operand specified by imm8, and store the results in mask vector k.
+///
+/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm512_cmp_epi8_mask&expand=709)
+#[inline]
+#[target_feature(enable = "avx512bw")]
+#[rustc_args_required_const(2)]
+#[cfg_attr(test, assert_instr(vpcmp, imm8 = 0))]
+pub unsafe fn _mm512_cmp_epi8_mask(a: __m512i, b: __m512i, imm8: i32) -> __mmask64 {
+ macro_rules! call {
+ ($imm3:expr) => {
+ vpcmpb(
+ a.as_i8x64(),
+ b.as_i8x64(),
+ $imm3,
+ 0b11111111_11111111_11111111_11111111_11111111_11111111_11111111_11111111,
+ )
+ };
+ }
+ let r = constify_imm3!(imm8, call);
+ transmute(r)
+}
+
+/// Compare packed signed 8-bit integers in a and b based on the comparison operand specified by imm8, and store the results in mask vector k using zeromask k1 (elements are zeroed out when the corresponding mask bit is not set).
+///
+/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm512_mask_cmp_epi8_mask&expand=710)
+#[inline]
+#[target_feature(enable = "avx512bw")]
+#[rustc_args_required_const(3)]
+#[cfg_attr(test, assert_instr(vpcmp, imm8 = 0))]
+pub unsafe fn _mm512_mask_cmp_epi8_mask(
+ k1: __mmask64,
+ a: __m512i,
+ b: __m512i,
+ imm8: i32,
+) -> __mmask64 {
+ macro_rules! call {
+ ($imm3:expr) => {
+ vpcmpb(a.as_i8x64(), b.as_i8x64(), $imm3, k1)
+ };
+ }
+ let r = constify_imm3!(imm8, call);
+ transmute(r)
+}
+
+/// Load 512-bits (composed of 32 packed 16-bit integers) from memory into dst. mem_addr does not need to be aligned on any particular boundary.
+///
+/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm512_loadu_epi16&expand=3368)
+#[inline]
+#[target_feature(enable = "avx512bw")]
+#[cfg_attr(test, assert_instr(vmovups))] //should be vmovdqu16
+pub unsafe fn _mm512_loadu_epi16(mem_addr: *const i16) -> __m512i {
+ ptr::read_unaligned(mem_addr as *const __m512i)
+}
+
+/// Load 512-bits (composed of 64 packed 8-bit integers) from memory into dst. mem_addr does not need to be aligned on any particular boundary.
+///
+/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm512_loadu_epi8&expand=3395)
+#[inline]
+#[target_feature(enable = "avx512bw")]
+#[cfg_attr(test, assert_instr(vmovups))] //should be vmovdqu8
+pub unsafe fn _mm512_loadu_epi8(mem_addr: *const i8) -> __m512i {
+ ptr::read_unaligned(mem_addr as *const __m512i)
+}
+
+/// Store 512-bits (composed of 32 packed 16-bit integers) from a into memory. mem_addr does not need to be aligned on any particular boundary.
+///
+/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm512_storeu_epi16&expand=5622)
+#[inline]
+#[target_feature(enable = "avx512bw")]
+#[cfg_attr(test, assert_instr(vmovups))] //should be vmovdqu32
+pub unsafe fn _mm512_storeu_epi16(mem_addr: *mut i16, a: __m512i) {
+ ptr::write_unaligned(mem_addr as *mut __m512i, a);
+}
+
+/// Store 512-bits (composed of 64 packed 8-bit integers) from a into memory. mem_addr does not need to be aligned on any particular boundary.
+///
+/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm512_storeu_epi8&expand=5640)
+#[inline]
+#[target_feature(enable = "avx512bw")]
+#[cfg_attr(test, assert_instr(vmovups))] //should be vmovdqu8
+pub unsafe fn _mm512_storeu_epi8(mem_addr: *mut i8, a: __m512i) {
+ ptr::write_unaligned(mem_addr as *mut __m512i, a);
+}
+
+/// Multiply packed signed 16-bit integers in a and b, producing intermediate signed 32-bit integers. Horizontally add adjacent pairs of intermediate 32-bit integers, and pack the results in dst.
+///
+/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm512_madd_epi16&expand=3511)
+#[inline]
+#[target_feature(enable = "avx512bw")]
+#[cfg_attr(test, assert_instr(vpmaddwd))]
+pub unsafe fn _mm512_madd_epi16(a: __m512i, b: __m512i) -> __m512i {
+ transmute(vpmaddwd(a.as_i16x32(), b.as_i16x32()))
+}
+
+/// Multiply packed signed 16-bit integers in a and b, producing intermediate signed 32-bit integers. Horizontally add adjacent pairs of intermediate 32-bit integers, and pack the results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set).
+///
+/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm512_mask_madd_epi16&expand=3512)
+#[inline]
+#[target_feature(enable = "avx512bw")]
+#[cfg_attr(test, assert_instr(vpmaddwd))]
+pub unsafe fn _mm512_mask_madd_epi16(
+ src: __m512i,
+ k: __mmask16,
+ a: __m512i,
+ b: __m512i,
+) -> __m512i {
+ let add = _mm512_madd_epi16(a, b).as_i32x16();
+ transmute(simd_select_bitmask(k, add, src.as_i32x16()))
+}
+
+/// Multiply packed signed 16-bit integers in a and b, producing intermediate signed 32-bit integers. Horizontally add adjacent pairs of intermediate 32-bit integers, and pack the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
+///
+/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm512_maskz_madd_epi16&expand=3513)
+#[inline]
+#[target_feature(enable = "avx512bw")]
+#[cfg_attr(test, assert_instr(vpmaddwd))]
+pub unsafe fn _mm512_maskz_madd_epi16(k: __mmask16, a: __m512i, b: __m512i) -> __m512i {
+ let add = _mm512_madd_epi16(a, b).as_i32x16();
+ let zero = _mm512_setzero_si512().as_i32x16();
+ transmute(simd_select_bitmask(k, add, zero))
+}
+
+/// Vertically multiply each unsigned 8-bit integer from a with the corresponding signed 8-bit integer from b, producing intermediate signed 16-bit integers. Horizontally add adjacent pairs of intermediate signed 16-bit integers, and pack the saturated results in dst.
+///
+/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm512_maddubs_epi16&expand=3539)
+#[inline]
+#[target_feature(enable = "avx512bw")]
+#[cfg_attr(test, assert_instr(vpmaddubsw))]
+pub unsafe fn _mm512_maddubs_epi16(a: __m512i, b: __m512i) -> __m512i {
+ transmute(vpmaddubsw(a.as_i8x64(), b.as_i8x64()))
+}
+
+/// Multiply packed unsigned 8-bit integers in a by packed signed 8-bit integers in b, producing intermediate signed 16-bit integers. Horizontally add adjacent pairs of intermediate signed 16-bit integers, and pack the saturated results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set).
+///
+/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm512_mask_maddubs_epi16&expand=3540)
+#[inline]
+#[target_feature(enable = "avx512bw")]
+#[cfg_attr(test, assert_instr(vpmaddubsw))]
+pub unsafe fn _mm512_mask_maddubs_epi16(
+ src: __m512i,
+ k: __mmask32,
+ a: __m512i,
+ b: __m512i,
+) -> __m512i {
+ let add = _mm512_maddubs_epi16(a, b).as_i16x32();
+ transmute(simd_select_bitmask(k, add, src.as_i16x32()))
+}
+
+/// Multiply packed unsigned 8-bit integers in a by packed signed 8-bit integers in b, producing intermediate signed 16-bit integers. Horizontally add adjacent pairs of intermediate signed 16-bit integers, and pack the saturated results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
+///
+/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm512_maskz_maddubs_epi16&expand=3541)
+#[inline]
+#[target_feature(enable = "avx512bw")]
+#[cfg_attr(test, assert_instr(vpmaddubsw))]
+pub unsafe fn _mm512_maskz_maddubs_epi16(k: __mmask32, a: __m512i, b: __m512i) -> __m512i {
+ let add = _mm512_maddubs_epi16(a, b).as_i16x32();
+ let zero = _mm512_setzero_si512().as_i16x32();
+ transmute(simd_select_bitmask(k, add, zero))
+}
+
+/// Convert packed signed 32-bit integers from a and b to packed 16-bit integers using signed saturation, and store the results in dst.
+///
+/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm512_packs_epi32&expand=4091)
+#[inline]
+#[target_feature(enable = "avx512bw")]
+#[cfg_attr(test, assert_instr(vpackssdw))]
+pub unsafe fn _mm512_packs_epi32(a: __m512i, b: __m512i) -> __m512i {
+ transmute(vpackssdw(a.as_i32x16(), b.as_i32x16()))
+}
+
+/// Convert packed signed 32-bit integers from a and b to packed 16-bit integers using signed saturation, and store the results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set).
+///
+/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm512_mask_packs_epi32&expand=4089)
+#[inline]
+#[target_feature(enable = "avx512bw")]
+#[cfg_attr(test, assert_instr(vpackssdw))]
+pub unsafe fn _mm512_mask_packs_epi32(
+ src: __m512i,
+ k: __mmask32,
+ a: __m512i,
+ b: __m512i,
+) -> __m512i {
+ let pack = _mm512_packs_epi32(a, b).as_i16x32();
+ transmute(simd_select_bitmask(k, pack, src.as_i16x32()))
+}
+
+/// Convert packed signed 32-bit integers from a and b to packed 16-bit integers using signed saturation, and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
+///
+/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm512_maskz_packs_epi32&expand=4090)
+#[inline]
+#[target_feature(enable = "avx512bw")]
+#[cfg_attr(test, assert_instr(vpackssdw))]
+pub unsafe fn _mm512_maskz_packs_epi32(k: __mmask32, a: __m512i, b: __m512i) -> __m512i {
+ let pack = _mm512_packs_epi32(a, b).as_i16x32();
+ let zero = _mm512_setzero_si512().as_i16x32();
+ transmute(simd_select_bitmask(k, pack, zero))
+}
+
+/// Convert packed signed 16-bit integers from a and b to packed 8-bit integers using signed saturation, and store the results in dst.
+///
+/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm512_packs_epi16&expand=4082)
+#[inline]
+#[target_feature(enable = "avx512bw")]
+#[cfg_attr(test, assert_instr(vpacksswb))]
+pub unsafe fn _mm512_packs_epi16(a: __m512i, b: __m512i) -> __m512i {
+ transmute(vpacksswb(a.as_i16x32(), b.as_i16x32()))
+}
+
+/// Convert packed signed 16-bit integers from a and b to packed 8-bit integers using signed saturation, and store the results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set).
+///
+/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm512_mask_packs_epi16&expand=4080)
+#[inline]
+#[target_feature(enable = "avx512bw")]
+#[cfg_attr(test, assert_instr(vpacksswb))]
+pub unsafe fn _mm512_mask_packs_epi16(
+ src: __m512i,
+ k: __mmask64,
+ a: __m512i,
+ b: __m512i,
+) -> __m512i {
+ let pack = _mm512_packs_epi16(a, b).as_i8x64();
+ transmute(simd_select_bitmask(k, pack, src.as_i8x64()))
+}
+
+/// Convert packed signed 16-bit integers from a and b to packed 8-bit integers using signed saturation, and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
+///
+/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm512_maskz_packs_epi16&expand=4081)
+#[inline]
+#[target_feature(enable = "avx512bw")]
+#[cfg_attr(test, assert_instr(vpacksswb))]
+pub unsafe fn _mm512_maskz_packs_epi16(k: __mmask64, a: __m512i, b: __m512i) -> __m512i {
+ let pack = _mm512_packs_epi16(a, b).as_i8x64();
+ let zero = _mm512_setzero_si512().as_i8x64();
+ transmute(simd_select_bitmask(k, pack, zero))
+}
+
+/// Convert packed signed 32-bit integers from a and b to packed 16-bit integers using unsigned saturation, and store the results in dst.
+///
+/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm512_packus_epi32&expand=4130)
+#[inline]
+#[target_feature(enable = "avx512bw")]
+#[cfg_attr(test, assert_instr(vpackusdw))]
+pub unsafe fn _mm512_packus_epi32(a: __m512i, b: __m512i) -> __m512i {
+ transmute(vpackusdw(a.as_i32x16(), b.as_i32x16()))
+}
+
+/// Convert packed signed 32-bit integers from a and b to packed 16-bit integers using unsigned saturation, and store the results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set).
+///
+/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm512_mask_packus_epi32&expand=4128)
+#[inline]
+#[target_feature(enable = "avx512bw")]
+#[cfg_attr(test, assert_instr(vpackusdw))]
+pub unsafe fn _mm512_mask_packus_epi32(
+ src: __m512i,
+ k: __mmask32,
+ a: __m512i,
+ b: __m512i,
+) -> __m512i {
+ let pack = _mm512_packus_epi32(a, b).as_i16x32();
+ transmute(simd_select_bitmask(k, pack, src.as_i16x32()))
+}
+
+/// Convert packed signed 32-bit integers from a and b to packed 16-bit integers using unsigned saturation, and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
+///
+/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm512_maskz_packus_epi32&expand=4129)
+#[inline]
+#[target_feature(enable = "avx512bw")]
+#[cfg_attr(test, assert_instr(vpackusdw))]
+pub unsafe fn _mm512_maskz_packus_epi32(k: __mmask32, a: __m512i, b: __m512i) -> __m512i {
+ let pack = _mm512_packus_epi32(a, b).as_i16x32();
+ let zero = _mm512_setzero_si512().as_i16x32();
+ transmute(simd_select_bitmask(k, pack, zero))
+}
+
+/// Convert packed signed 16-bit integers from a and b to packed 8-bit integers using unsigned saturation, and store the results in dst.
+///
+/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm512_packus_epi16&expand=4121)
+#[inline]
+#[target_feature(enable = "avx512bw")]
+#[cfg_attr(test, assert_instr(vpackuswb))]
+pub unsafe fn _mm512_packus_epi16(a: __m512i, b: __m512i) -> __m512i {
+ transmute(vpackuswb(a.as_i16x32(), b.as_i16x32()))
+}
+
+/// Convert packed signed 16-bit integers from a and b to packed 8-bit integers using unsigned saturation, and store the results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set).
+///
+/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm512_mask_packus_epi16&expand=4119)
+#[inline]
+#[target_feature(enable = "avx512bw")]
+#[cfg_attr(test, assert_instr(vpackuswb))]
+pub unsafe fn _mm512_mask_packus_epi16(
+ src: __m512i,
+ k: __mmask64,
+ a: __m512i,
+ b: __m512i,
+) -> __m512i {
+ let pack = _mm512_packus_epi16(a, b).as_i8x64();
+ transmute(simd_select_bitmask(k, pack, src.as_i8x64()))
+}
+
+/// Convert packed signed 16-bit integers from a and b to packed 8-bit integers using unsigned saturation, and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
+///
+/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm512_maskz_packus_epi16&expand=4120)
+#[inline]
+#[target_feature(enable = "avx512bw")]
+#[cfg_attr(test, assert_instr(vpackuswb))]
+pub unsafe fn _mm512_maskz_packus_epi16(k: __mmask64, a: __m512i, b: __m512i) -> __m512i {
+ let pack = _mm512_packus_epi16(a, b).as_i8x64();
+ let zero = _mm512_setzero_si512().as_i8x64();
+ transmute(simd_select_bitmask(k, pack, zero))
+}
+
+/// Average packed unsigned 16-bit integers in a and b, and store the results in dst.
+///
+/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm512_avg_epu16&expand=388)
+#[inline]
+#[target_feature(enable = "avx512bw")]
+#[cfg_attr(test, assert_instr(vpavgw))]
+pub unsafe fn _mm512_avg_epu16(a: __m512i, b: __m512i) -> __m512i {
+ transmute(vpavgw(a.as_u16x32(), b.as_u16x32()))
+}
+
+/// Average packed unsigned 16-bit integers in a and b, and store the results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set).
+///
+/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm512_mask_avg_epu16&expand=389)
+#[inline]
+#[target_feature(enable = "avx512bw")]
+#[cfg_attr(test, assert_instr(vpavgw))]
+pub unsafe fn _mm512_mask_avg_epu16(src: __m512i, k: __mmask32, a: __m512i, b: __m512i) -> __m512i {
+ let avg = _mm512_avg_epu16(a, b).as_u16x32();
+ transmute(simd_select_bitmask(k, avg, src.as_u16x32()))
+}
+
+/// Average packed unsigned 16-bit integers in a and b, and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
+///
+/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm512_maskz_avg_epu16&expand=390)
+#[inline]
+#[target_feature(enable = "avx512bw")]
+#[cfg_attr(test, assert_instr(vpavgw))]
+pub unsafe fn _mm512_maskz_avg_epu16(k: __mmask32, a: __m512i, b: __m512i) -> __m512i {
+ let avg = _mm512_avg_epu16(a, b).as_u16x32();
+ let zero = _mm512_setzero_si512().as_u16x32();
+ transmute(simd_select_bitmask(k, avg, zero))
+}
+
+/// Average packed unsigned 8-bit integers in a and b, and store the results in dst.
+///
+/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm512_avg_epu8&expand=397)
+#[inline]
+#[target_feature(enable = "avx512bw")]
+#[cfg_attr(test, assert_instr(vpavgb))]
+pub unsafe fn _mm512_avg_epu8(a: __m512i, b: __m512i) -> __m512i {
+ transmute(vpavgb(a.as_u8x64(), b.as_u8x64()))
+}
+
+/// Average packed unsigned 8-bit integers in a and b, and store the results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set).
+///
+/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm512_mask_avg_epu8&expand=398)
+#[inline]
+#[target_feature(enable = "avx512bw")]
+#[cfg_attr(test, assert_instr(vpavgb))]
+pub unsafe fn _mm512_mask_avg_epu8(src: __m512i, k: __mmask64, a: __m512i, b: __m512i) -> __m512i {
+ let avg = _mm512_avg_epu8(a, b).as_u8x64();
+ transmute(simd_select_bitmask(k, avg, src.as_u8x64()))
+}
+
+/// Average packed unsigned 8-bit integers in a and b, and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
+///
+/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm512_maskz_avg_epu8&expand=399)
+#[inline]
+#[target_feature(enable = "avx512bw")]
+#[cfg_attr(test, assert_instr(vpavgb))]
+pub unsafe fn _mm512_maskz_avg_epu8(k: __mmask64, a: __m512i, b: __m512i) -> __m512i {
+ let avg = _mm512_avg_epu8(a, b).as_u8x64();
+ let zero = _mm512_setzero_si512().as_u8x64();
+ transmute(simd_select_bitmask(k, avg, zero))
+}
+
+/// Shift packed 16-bit integers in a left by count while shifting in zeros, and store the results in dst.
+///
+/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm512_sll_epi16&expand=5271)
+#[inline]
+#[target_feature(enable = "avx512bw")]
+#[cfg_attr(test, assert_instr(vpsllw))]
+pub unsafe fn _mm512_sll_epi16(a: __m512i, count: __m128i) -> __m512i {
+ transmute(vpsllw(a.as_i16x32(), count.as_i16x8()))
+}
+
+/// Shift packed 16-bit integers in a left by count while shifting in zeros, and store the results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set).
+///
+/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm512_mask_sll_epi16&expand=5269)
+#[inline]
+#[target_feature(enable = "avx512bw")]
+#[cfg_attr(test, assert_instr(vpsllw))]
+pub unsafe fn _mm512_mask_sll_epi16(
+ src: __m512i,
+ k: __mmask32,
+ a: __m512i,
+ count: __m128i,
+) -> __m512i {
+ let shf = _mm512_sll_epi16(a, count).as_i16x32();
+ transmute(simd_select_bitmask(k, shf, src.as_i16x32()))
+}
+
+/// Shift packed 16-bit integers in a left by count while shifting in zeros, and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
+///
+/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm512_maskz_sll_epi16&expand=5270)
+#[inline]
+#[target_feature(enable = "avx512bw")]
+#[cfg_attr(test, assert_instr(vpsllw))]
+pub unsafe fn _mm512_maskz_sll_epi16(k: __mmask32, a: __m512i, count: __m128i) -> __m512i {
+ let shf = _mm512_sll_epi16(a, count).as_i16x32();
+ let zero = _mm512_setzero_si512().as_i16x32();
+ transmute(simd_select_bitmask(k, shf, zero))
+}
+
+/// Shift packed 16-bit integers in a left by imm8 while shifting in zeros, and store the results in dst.
+///
+/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm512_slli_epi16&expand=5301)
+#[inline]
+#[target_feature(enable = "avx512bw")]
+#[cfg_attr(test, assert_instr(vpsllw, imm8 = 5))]
+#[rustc_args_required_const(1)]
+pub unsafe fn _mm512_slli_epi16(a: __m512i, imm8: u32) -> __m512i {
+ macro_rules! call {
+ ($imm8:expr) => {
+ vpslliw(a.as_i16x32(), $imm8)
+ };
+ }
+ let r = constify_imm8_sae!(imm8, call);
+ transmute(r)
+}
+
+/// Shift packed 16-bit integers in a left by imm8 while shifting in zeros, and store the results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set).
+///
+/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm512_mask_slli_epi16&expand=5299)
+#[inline]
+#[target_feature(enable = "avx512bw")]
+#[cfg_attr(test, assert_instr(vpsllw, imm8 = 5))]
+#[rustc_args_required_const(3)]
+pub unsafe fn _mm512_mask_slli_epi16(src: __m512i, k: __mmask32, a: __m512i, imm8: u32) -> __m512i {
+ macro_rules! call {
+ ($imm8:expr) => {
+ vpslliw(a.as_i16x32(), $imm8)
+ };
+ }
+ let shf = constify_imm8_sae!(imm8, call);
+ transmute(simd_select_bitmask(k, shf, src.as_i16x32()))
+}
+
+/// Shift packed 16-bit integers in a left by imm8 while shifting in zeros, and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
+///
+/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm512_maskz_slli_epi16&expand=5300)
+#[inline]
+#[target_feature(enable = "avx512bw")]
+#[cfg_attr(test, assert_instr(vpsllw, imm8 = 5))]
+#[rustc_args_required_const(2)]
+pub unsafe fn _mm512_maskz_slli_epi16(k: __mmask32, a: __m512i, imm8: u32) -> __m512i {
+ macro_rules! call {
+ ($imm8:expr) => {
+ vpslliw(a.as_i16x32(), $imm8)
+ };
+ }
+ let shf = constify_imm8_sae!(imm8, call);
+ let zero = _mm512_setzero_si512().as_i16x32();
+ transmute(simd_select_bitmask(k, shf, zero))
+}
+
+/// Shift packed 16-bit integers in a left by the amount specified by the corresponding element in count while shifting in zeros, and store the results in dst.
+///
+/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm512_sllv_epi16&expand=5333)
+#[inline]
+#[target_feature(enable = "avx512bw")]
+#[cfg_attr(test, assert_instr(vpsllvw))]
+pub unsafe fn _mm512_sllv_epi16(a: __m512i, count: __m512i) -> __m512i {
+ transmute(vpsllvw(a.as_i16x32(), count.as_i16x32()))
+}
+
+/// Shift packed 16-bit integers in a left by the amount specified by the corresponding element in count while shifting in zeros, and store the results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set).
+///
+/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm512_mask_sllv_epi16&expand=5331)
+#[inline]
+#[target_feature(enable = "avx512bw")]
+#[cfg_attr(test, assert_instr(vpsllvw))]
+pub unsafe fn _mm512_mask_sllv_epi16(
+ src: __m512i,
+ k: __mmask32,
+ a: __m512i,
+ count: __m512i,
+) -> __m512i {
+ let shf = _mm512_sllv_epi16(a, count).as_i16x32();
+ transmute(simd_select_bitmask(k, shf, src.as_i16x32()))
+}
+
+/// Shift packed 16-bit integers in a left by the amount specified by the corresponding element in count while shifting in zeros, and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
+///
+/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm512_maskz_sllv_epi16&expand=5332)
+#[inline]
+#[target_feature(enable = "avx512bw")]
+#[cfg_attr(test, assert_instr(vpsllvw))]
+pub unsafe fn _mm512_maskz_sllv_epi16(k: __mmask32, a: __m512i, count: __m512i) -> __m512i {
+ let shf = _mm512_sllv_epi16(a, count).as_i16x32();
+ let zero = _mm512_setzero_si512().as_i16x32();
+ transmute(simd_select_bitmask(k, shf, zero))
+}
+
+/// Shift packed 16-bit integers in a right by count while shifting in zeros, and store the results in dst.
+///
+/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm512_srl_epi16&expand=5483)
+#[inline]
+#[target_feature(enable = "avx512bw")]
+#[cfg_attr(test, assert_instr(vpsrlw))]
+pub unsafe fn _mm512_srl_epi16(a: __m512i, count: __m128i) -> __m512i {
+ transmute(vpsrlw(a.as_i16x32(), count.as_i16x8()))
+}
+
+/// Shift packed 16-bit integers in a right by count while shifting in zeros, and store the results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set).
+///
+/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm512_mask_srl_epi16&expand=5481)
+#[inline]
+#[target_feature(enable = "avx512bw")]
+#[cfg_attr(test, assert_instr(vpsrlw))]
+pub unsafe fn _mm512_mask_srl_epi16(
+ src: __m512i,
+ k: __mmask32,
+ a: __m512i,
+ count: __m128i,
+) -> __m512i {
+ let shf = _mm512_srl_epi16(a, count).as_i16x32();
+ transmute(simd_select_bitmask(k, shf, src.as_i16x32()))
+}
+
+/// Shift packed 16-bit integers in a right by count while shifting in zeros, and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
+///
+/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm512_maskz_srl_epi16&expand=5482)
+#[inline]
+#[target_feature(enable = "avx512bw")]
+#[cfg_attr(test, assert_instr(vpsrlw))]
+pub unsafe fn _mm512_maskz_srl_epi16(k: __mmask32, a: __m512i, count: __m128i) -> __m512i {
+ let shf = _mm512_srl_epi16(a, count).as_i16x32();
+ let zero = _mm512_setzero_si512().as_i16x32();
+ transmute(simd_select_bitmask(k, shf, zero))
+}
+
+/// Shift packed 16-bit integers in a right by imm8 while shifting in zeros, and store the results in dst.
+///
+/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm512_srli_epi16&expand=5513)
+#[inline]
+#[target_feature(enable = "avx512bw")]
+#[cfg_attr(test, assert_instr(vpsrlw, imm8 = 5))]
+#[rustc_args_required_const(1)]
+pub unsafe fn _mm512_srli_epi16(a: __m512i, imm8: u32) -> __m512i {
+ macro_rules! call {
+ ($imm8:expr) => {
+ vpsrliw(a.as_i16x32(), $imm8)
+ };
+ }
+ let r = constify_imm8_sae!(imm8, call);
+ transmute(r)
+}
+
+/// Shift packed 16-bit integers in a right by imm8 while shifting in zeros, and store the results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set).
+///
+/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm512_mask_srli_epi16&expand=5511)
+#[inline]
+#[target_feature(enable = "avx512bw")]
+#[cfg_attr(test, assert_instr(vpsrlw, imm8 = 5))]
+#[rustc_args_required_const(3)]
+pub unsafe fn _mm512_mask_srli_epi16(src: __m512i, k: __mmask32, a: __m512i, imm8: u32) -> __m512i {
+ macro_rules! call {
+ ($imm8:expr) => {
+ vpsrliw(a.as_i16x32(), $imm8)
+ };
+ }
+ let shf = constify_imm8_sae!(imm8, call);
+ transmute(simd_select_bitmask(k, shf, src.as_i16x32()))
+}
+
+/// Shift packed 16-bit integers in a right by imm8 while shifting in zeros, and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
+///
+/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm512_maskz_srli_epi16&expand=5512)
+#[inline]
+#[target_feature(enable = "avx512bw")]
+#[cfg_attr(test, assert_instr(vpsrlw, imm8 = 5))]
+#[rustc_args_required_const(2)]
+pub unsafe fn _mm512_maskz_srli_epi16(k: __mmask32, a: __m512i, imm8: i32) -> __m512i {
+ //imm8 should be u32, it seems the document to verify is incorrect
+ macro_rules! call {
+ ($imm8:expr) => {
+ vpsrliw(a.as_i16x32(), $imm8)
+ };
+ }
+ let shf = constify_imm8_sae!(imm8, call);
+ let zero = _mm512_setzero_si512().as_i16x32();
+ transmute(simd_select_bitmask(k, shf, zero))
+}
+
+/// Shift packed 16-bit integers in a right by the amount specified by the corresponding element in count while shifting in zeros, and store the results in dst.
+///
+/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm512_srlv_epi16&expand=5545)
+#[inline]
+#[target_feature(enable = "avx512bw")]
+#[cfg_attr(test, assert_instr(vpsrlvw))]
+pub unsafe fn _mm512_srlv_epi16(a: __m512i, count: __m512i) -> __m512i {
+ transmute(vpsrlvw(a.as_i16x32(), count.as_i16x32()))
+}
+
+/// Shift packed 16-bit integers in a right by the amount specified by the corresponding element in count while shifting in zeros, and store the results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set).
+///
+/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm512_mask_srlv_epi16&expand=5543)
+#[inline]
+#[target_feature(enable = "avx512bw")]
+#[cfg_attr(test, assert_instr(vpsrlvw))]
+pub unsafe fn _mm512_mask_srlv_epi16(
+ src: __m512i,
+ k: __mmask32,
+ a: __m512i,
+ count: __m512i,
+) -> __m512i {
+ let shf = _mm512_srlv_epi16(a, count).as_i16x32();
+ transmute(simd_select_bitmask(k, shf, src.as_i16x32()))
+}
+
+/// Shift packed 16-bit integers in a right by the amount specified by the corresponding element in count while shifting in zeros, and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
+///
+/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm512_maskz_srlv_epi16&expand=5544)
+#[inline]
+#[target_feature(enable = "avx512bw")]
+#[cfg_attr(test, assert_instr(vpsrlvw))]
+pub unsafe fn _mm512_maskz_srlv_epi16(k: __mmask32, a: __m512i, count: __m512i) -> __m512i {
+ let shf = _mm512_srlv_epi16(a, count).as_i16x32();
+ let zero = _mm512_setzero_si512().as_i16x32();
+ transmute(simd_select_bitmask(k, shf, zero))
+}
+
+/// Shift packed 16-bit integers in a right by count while shifting in sign bits, and store the results in dst.
+///
+/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm512_sra_epi16&expand=5398)
+#[inline]
+#[target_feature(enable = "avx512bw")]
+#[cfg_attr(test, assert_instr(vpsraw))]
+pub unsafe fn _mm512_sra_epi16(a: __m512i, count: __m128i) -> __m512i {
+ transmute(vpsraw(a.as_i16x32(), count.as_i16x8()))
+}
+
+/// Shift packed 16-bit integers in a right by count while shifting in sign bits, and store the results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set).
+///
+/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm512_mask_sra_epi16&expand=5396)
+#[inline]
+#[target_feature(enable = "avx512bw")]
+#[cfg_attr(test, assert_instr(vpsraw))]
+pub unsafe fn _mm512_mask_sra_epi16(
+ src: __m512i,
+ k: __mmask32,
+ a: __m512i,
+ count: __m128i,
+) -> __m512i {
+ let shf = _mm512_sra_epi16(a, count).as_i16x32();
+ transmute(simd_select_bitmask(k, shf, src.as_i16x32()))
+}
+
+/// Shift packed 16-bit integers in a right by count while shifting in sign bits, and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
+///
+/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm512_maskz_sra_epi16&expand=5397)
+#[inline]
+#[target_feature(enable = "avx512bw")]
+#[cfg_attr(test, assert_instr(vpsraw))]
+pub unsafe fn _mm512_maskz_sra_epi16(k: __mmask32, a: __m512i, count: __m128i) -> __m512i {
+ let shf = _mm512_sra_epi16(a, count).as_i16x32();
+ let zero = _mm512_setzero_si512().as_i16x32();
+ transmute(simd_select_bitmask(k, shf, zero))
+}
+
+/// Shift packed 16-bit integers in a right by imm8 while shifting in sign bits, and store the results in dst.
+///
+/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm512_srai_epi16&expand=5427)
+#[inline]
+#[target_feature(enable = "avx512bw")]
+#[cfg_attr(test, assert_instr(vpsraw, imm8 = 1))]
+#[rustc_args_required_const(1)]
+pub unsafe fn _mm512_srai_epi16(a: __m512i, imm8: u32) -> __m512i {
+ macro_rules! call {
+ ($imm8:expr) => {
+ vpsraiw(a.as_i16x32(), $imm8)
+ };
+ }
+ let r = constify_imm8_sae!(imm8, call);
+ transmute(r)
+}
+
+/// Shift packed 16-bit integers in a right by imm8 while shifting in sign bits, and store the results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set).
+///
+/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm512_mask_srai_epi16&expand=5425)
+#[inline]
+#[target_feature(enable = "avx512bw")]
+#[cfg_attr(test, assert_instr(vpsraw, imm8 = 1))]
+#[rustc_args_required_const(3)]
+pub unsafe fn _mm512_mask_srai_epi16(src: __m512i, k: __mmask32, a: __m512i, imm8: u32) -> __m512i {
+ macro_rules! call {
+ ($imm8:expr) => {
+ vpsraiw(a.as_i16x32(), $imm8)
+ };
+ }
+ let shf = constify_imm8_sae!(imm8, call);
+ transmute(simd_select_bitmask(k, shf, src.as_i16x32()))
+}
+
+/// Shift packed 16-bit integers in a right by imm8 while shifting in sign bits, and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
+///
+/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm512_maskz_srai_epi16&expand=5426)
+#[inline]
+#[target_feature(enable = "avx512bw")]
+#[cfg_attr(test, assert_instr(vpsraw, imm8 = 1))]
+#[rustc_args_required_const(2)]
+pub unsafe fn _mm512_maskz_srai_epi16(k: __mmask32, a: __m512i, imm8: u32) -> __m512i {
+ macro_rules! call {
+ ($imm8:expr) => {
+ vpsraiw(a.as_i16x32(), $imm8)
+ };
+ }
+ let shf = constify_imm8_sae!(imm8, call);
+ let zero = _mm512_setzero_si512().as_i16x32();
+ transmute(simd_select_bitmask(k, shf, zero))
+}
+
+/// Shift packed 16-bit integers in a right by the amount specified by the corresponding element in count while shifting in sign bits, and store the results in dst.
+///
+/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm512_srav_epi16&expand=5456)
+#[inline]
+#[target_feature(enable = "avx512bw")]
+#[cfg_attr(test, assert_instr(vpsravw))]
+pub unsafe fn _mm512_srav_epi16(a: __m512i, count: __m512i) -> __m512i {
+ transmute(vpsravw(a.as_i16x32(), count.as_i16x32()))
+}
+
+/// Shift packed 16-bit integers in a right by the amount specified by the corresponding element in count while shifting in sign bits, and store the results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set).
+///
+/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm512_mask_srav_epi16&expand=5454)
+#[inline]
+#[target_feature(enable = "avx512bw")]
+#[cfg_attr(test, assert_instr(vpsravw))]
+pub unsafe fn _mm512_mask_srav_epi16(
+ src: __m512i,
+ k: __mmask32,
+ a: __m512i,
+ count: __m512i,
+) -> __m512i {
+ let shf = _mm512_srav_epi16(a, count).as_i16x32();
+ transmute(simd_select_bitmask(k, shf, src.as_i16x32()))
+}
+
+/// Shift packed 16-bit integers in a right by the amount specified by the corresponding element in count while shifting in sign bits, and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
+///
+/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm512_maskz_srav_epi16&expand=5455)
+#[inline]
+#[target_feature(enable = "avx512bw")]
+#[cfg_attr(test, assert_instr(vpsravw))]
+pub unsafe fn _mm512_maskz_srav_epi16(k: __mmask32, a: __m512i, count: __m512i) -> __m512i {
+ let shf = _mm512_srav_epi16(a, count).as_i16x32();
+ let zero = _mm512_setzero_si512().as_i16x32();
+ transmute(simd_select_bitmask(k, shf, zero))
+}
+
+/// Shuffle 16-bit integers in a and b across lanes using the corresponding selector and index in idx, and store the results in dst.
+///
+/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm512_permutex2var_epi16&expand=4226)
+#[inline]
+#[target_feature(enable = "avx512bw")]
+#[cfg_attr(test, assert_instr(vperm))] //vpermi2w or vpermt2w
+pub unsafe fn _mm512_permutex2var_epi16(a: __m512i, idx: __m512i, b: __m512i) -> __m512i {
+ transmute(vpermi2w(a.as_i16x32(), idx.as_i16x32(), b.as_i16x32()))
+}
+
+/// Shuffle 16-bit integers in a and b across lanes using the corresponding selector and index in idx, and store the results in dst using writemask k (elements are copied from a when the corresponding mask bit is not set).
+///
+/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm512_mask_permutex2var_epi16&expand=4223)
+#[inline]
+#[target_feature(enable = "avx512bw")]
+#[cfg_attr(test, assert_instr(vpermt2w))]
+pub unsafe fn _mm512_mask_permutex2var_epi16(
+ a: __m512i,
+ k: __mmask32,
+ idx: __m512i,
+ b: __m512i,
+) -> __m512i {
+ let permute = _mm512_permutex2var_epi16(a, idx, b).as_i16x32();
+ transmute(simd_select_bitmask(k, permute, a.as_i16x32()))
+}
+
+/// Shuffle 16-bit integers in a and b across lanes using the corresponding selector and index in idx, and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
+///
+/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm512_maskz_permutex2var_epi16&expand=4225)
+#[inline]
+#[target_feature(enable = "avx512bw")]
+#[cfg_attr(test, assert_instr(vperm))] //vpermi2w or vpermt2w
+pub unsafe fn _mm512_maskz_permutex2var_epi16(
+ k: __mmask32,
+ a: __m512i,
+ idx: __m512i,
+ b: __m512i,
+) -> __m512i {
+ let permute = _mm512_permutex2var_epi16(a, idx, b).as_i16x32();
+ let zero = _mm512_setzero_si512().as_i16x32();
+ transmute(simd_select_bitmask(k, permute, zero))
+}
+
+/// Shuffle 16-bit integers in a and b across lanes using the corresponding selector and index in idx, and store the results in dst using writemask k (elements are copied from idx when the corresponding mask bit is not set).
+///
+/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm512_mask2_permutex2var_epi16&expand=4224)
+#[inline]
+#[target_feature(enable = "avx512bw")]
+#[cfg_attr(test, assert_instr(vpermi2w))]
+pub unsafe fn _mm512_mask2_permutex2var_epi16(
+ a: __m512i,
+ idx: __m512i,
+ k: __mmask32,
+ b: __m512i,
+) -> __m512i {
+ let permute = _mm512_permutex2var_epi16(a, idx, b).as_i16x32();
+ transmute(simd_select_bitmask(k, permute, idx.as_i16x32()))
+}
+
+/// Shuffle 16-bit integers in a across lanes using the corresponding index in idx, and store the results in dst.
+///
+/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm512_permutexvar_epi16&expand=4295)
+#[inline]
+#[target_feature(enable = "avx512bw")]
+#[cfg_attr(test, assert_instr(vpermw))]
+pub unsafe fn _mm512_permutexvar_epi16(idx: __m512i, a: __m512i) -> __m512i {
+ transmute(vpermw(a.as_i16x32(), idx.as_i16x32()))
+}
+
+/// Shuffle 16-bit integers in a across lanes using the corresponding index in idx, and store the results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set).
+///
+/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm512_mask_permutexvar_epi16&expand=4293)
+#[inline]
+#[target_feature(enable = "avx512bw")]
+#[cfg_attr(test, assert_instr(vpermw))]
+pub unsafe fn _mm512_mask_permutexvar_epi16(
+ src: __m512i,
+ k: __mmask32,
+ idx: __m512i,
+ a: __m512i,
+) -> __m512i {
+ let permute = _mm512_permutexvar_epi16(idx, a).as_i16x32();
+ transmute(simd_select_bitmask(k, permute, src.as_i16x32()))
+}
+
+/// Shuffle 16-bit integers in a across lanes using the corresponding index in idx, and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
+///
+/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm512_maskz_permutexvar_epi16&expand=4294)
+#[inline]
+#[target_feature(enable = "avx512bw")]
+#[cfg_attr(test, assert_instr(vpermw))]
+pub unsafe fn _mm512_maskz_permutexvar_epi16(k: __mmask32, idx: __m512i, a: __m512i) -> __m512i {
+ let permute = _mm512_permutexvar_epi16(idx, a).as_i16x32();
+ let zero = _mm512_setzero_si512().as_i16x32();
+ transmute(simd_select_bitmask(k, permute, zero))
+}
+
+/// Blend packed 16-bit integers from a and b using control mask k, and store the results in dst.
+///
+/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm512_mask_blend_epi16&expand=430)
+#[inline]
+#[target_feature(enable = "avx512bw")]
+#[cfg_attr(test, assert_instr(vmovdqu16))] //should be vpblendmw
+pub unsafe fn _mm512_mask_blend_epi16(k: __mmask32, a: __m512i, b: __m512i) -> __m512i {
+ transmute(simd_select_bitmask(k, b.as_i16x32(), a.as_i16x32()))
+}
+
+/// Blend packed 8-bit integers from a and b using control mask k, and store the results in dst.
+///
+/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm512_mask_blend_epi8&expand=441)
+#[inline]
+#[target_feature(enable = "avx512bw")]
+#[cfg_attr(test, assert_instr(vmovdqu8))] //should be vpblendmb
+pub unsafe fn _mm512_mask_blend_epi8(k: __mmask64, a: __m512i, b: __m512i) -> __m512i {
+ transmute(simd_select_bitmask(k, b.as_i8x64(), a.as_i8x64()))
+}
+
+/// Broadcast the low packed 16-bit integer from a to all elements of dst.
+///
+/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm512_broadcastw_epi16&expand=587)
+#[inline]
+#[target_feature(enable = "avx512bw")]
+#[cfg_attr(test, assert_instr(vpbroadcastw))]
+pub unsafe fn _mm512_broadcastw_epi16(a: __m128i) -> __m512i {
+ let a = _mm512_castsi128_si512(a).as_i16x32();
+ let ret: i16x32 = simd_shuffle32(
+ a,
+ a,
+ [
+ 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
+ 0, 0, 0,
+ ],
+ );
+ transmute(ret)
+}
+
+/// Broadcast the low packed 16-bit integer from a to all elements of dst using writemask k (elements are copied from src when the corresponding mask bit is not set).
+///
+/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm512_mask_broadcastw_epi16&expand=588)
+#[inline]
+#[target_feature(enable = "avx512bw")]
+#[cfg_attr(test, assert_instr(vpbroadcastw))]
+pub unsafe fn _mm512_mask_broadcastw_epi16(src: __m512i, k: __mmask32, a: __m128i) -> __m512i {
+ let broadcast = _mm512_broadcastw_epi16(a).as_i16x32();
+ transmute(simd_select_bitmask(k, broadcast, src.as_i16x32()))
+}
+
+/// Broadcast the low packed 16-bit integer from a to all elements of dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
+///
+/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm512_maskz_broadcastw_epi16&expand=589)
+#[inline]
+#[target_feature(enable = "avx512bw")]
+#[cfg_attr(test, assert_instr(vpbroadcastw))]
+pub unsafe fn _mm512_maskz_broadcastw_epi16(k: __mmask32, a: __m128i) -> __m512i {
+ let broadcast = _mm512_broadcastw_epi16(a).as_i16x32();
+ let zero = _mm512_setzero_si512().as_i16x32();
+ transmute(simd_select_bitmask(k, broadcast, zero))
+}
+
+/// Broadcast the low packed 8-bit integer from a to all elements of dst.
+///
+/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm512_broadcastb_epi8&expand=536)
+#[inline]
+#[target_feature(enable = "avx512bw")]
+#[cfg_attr(test, assert_instr(vpbroadcastb))]
+pub unsafe fn _mm512_broadcastb_epi8(a: __m128i) -> __m512i {
+ let a = _mm512_castsi128_si512(a).as_i8x64();
+ let ret: i8x64 = simd_shuffle64(
+ a,
+ a,
+ [
+ 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
+ 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
+ 0, 0, 0, 0, 0, 0,
+ ],
+ );
+ transmute(ret)
+}
+
+/// Broadcast the low packed 8-bit integer from a to all elements of dst using writemask k (elements are copied from src when the corresponding mask bit is not set).
+///
+/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm512_mask_broadcastb_epi8&expand=537)
+#[inline]
+#[target_feature(enable = "avx512bw")]
+#[cfg_attr(test, assert_instr(vpbroadcastb))]
+pub unsafe fn _mm512_mask_broadcastb_epi8(src: __m512i, k: __mmask64, a: __m128i) -> __m512i {
+ let broadcast = _mm512_broadcastb_epi8(a).as_i8x64();
+ transmute(simd_select_bitmask(k, broadcast, src.as_i8x64()))
+}
+
+/// Broadcast the low packed 8-bit integer from a to all elements of dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
+///
+/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm512_maskz_broadcastb_epi8&expand=538)
+#[inline]
+#[target_feature(enable = "avx512bw")]
+#[cfg_attr(test, assert_instr(vpbroadcastb))]
+pub unsafe fn _mm512_maskz_broadcastb_epi8(k: __mmask64, a: __m128i) -> __m512i {
+ let broadcast = _mm512_broadcastb_epi8(a).as_i8x64();
+ let zero = _mm512_setzero_si512().as_i8x64();
+ transmute(simd_select_bitmask(k, broadcast, zero))
+}
+
+/// Unpack and interleave 16-bit integers from the high half of each 128-bit lane in a and b, and store the results in dst.
+///
+/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm512_unpackhi_epi16&expand=6012)
+#[inline]
+#[target_feature(enable = "avx512bw")]
+#[cfg_attr(test, assert_instr(vpunpckhwd))]
+pub unsafe fn _mm512_unpackhi_epi16(a: __m512i, b: __m512i) -> __m512i {
+ let a = a.as_i16x32();
+ let b = b.as_i16x32();
+ #[rustfmt::skip]
+ let r: i16x32 = simd_shuffle32(
+ a,
+ b,
+ [
+ 4, 32 + 4, 5, 32 + 5,
+ 6, 32 + 6, 7, 32 + 7,
+ 12, 32 + 12, 13, 32 + 13,
+ 14, 32 + 14, 15, 32 + 15,
+ 20, 32 + 20, 21, 32 + 21,
+ 22, 32 + 22, 23, 32 + 23,
+ 28, 32 + 28, 29, 32 + 29,
+ 30, 32 + 30, 31, 32 + 31,
+ ],
+ );
+ transmute(r)
+}
+
+/// Unpack and interleave 16-bit integers from the high half of each 128-bit lane in a and b, and store the results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set).
+///
+/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm512_mask_unpackhi_epi16&expand=6010)
+#[inline]
+#[target_feature(enable = "avx512bw")]
+#[cfg_attr(test, assert_instr(vpunpckhwd))]
+pub unsafe fn _mm512_mask_unpackhi_epi16(
+ src: __m512i,
+ k: __mmask32,
+ a: __m512i,
+ b: __m512i,
+) -> __m512i {
+ let unpackhi = _mm512_unpackhi_epi16(a, b).as_i16x32();
+ transmute(simd_select_bitmask(k, unpackhi, src.as_i16x32()))
+}
+
+/// Unpack and interleave 16-bit integers from the high half of each 128-bit lane in a and b, and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
+///
+/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm512_maskz_unpackhi_epi16&expand=6011)
+#[inline]
+#[target_feature(enable = "avx512bw")]
+#[cfg_attr(test, assert_instr(vpunpckhwd))]
+pub unsafe fn _mm512_maskz_unpackhi_epi16(k: __mmask32, a: __m512i, b: __m512i) -> __m512i {
+ let unpackhi = _mm512_unpackhi_epi16(a, b).as_i16x32();
+ let zero = _mm512_setzero_si512().as_i16x32();
+ transmute(simd_select_bitmask(k, unpackhi, zero))
+}
+
+/// Unpack and interleave 8-bit integers from the high half of each 128-bit lane in a and b, and store the results in dst.
+///
+/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm512_unpackhi_epi8&expand=6039)
+#[inline]
+#[target_feature(enable = "avx512bw")]
+#[cfg_attr(test, assert_instr(vpunpckhbw))]
+pub unsafe fn _mm512_unpackhi_epi8(a: __m512i, b: __m512i) -> __m512i {
+ let a = a.as_i8x64();
+ let b = b.as_i8x64();
+ #[rustfmt::skip]
+ let r: i8x64 = simd_shuffle64(
+ a,
+ b,
+ [
+ 8, 64+8, 9, 64+9,
+ 10, 64+10, 11, 64+11,
+ 12, 64+12, 13, 64+13,
+ 14, 64+14, 15, 64+15,
+ 24, 64+24, 25, 64+25,
+ 26, 64+26, 27, 64+27,
+ 28, 64+28, 29, 64+29,
+ 30, 64+30, 31, 64+31,
+ 40, 64+40, 41, 64+41,
+ 42, 64+42, 43, 64+43,
+ 44, 64+44, 45, 64+45,
+ 46, 64+46, 47, 64+47,
+ 56, 64+56, 57, 64+57,
+ 58, 64+58, 59, 64+59,
+ 60, 64+60, 61, 64+61,
+ 62, 64+62, 63, 64+63,
+ ],
+ );
+ transmute(r)
+}
+
+/// Unpack and interleave 8-bit integers from the high half of each 128-bit lane in a and b, and store the results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set).
+///
+/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm512_mask_unpackhi_epi8&expand=6037)
+#[inline]
+#[target_feature(enable = "avx512bw")]
+#[cfg_attr(test, assert_instr(vpunpckhbw))]
+pub unsafe fn _mm512_mask_unpackhi_epi8(
+ src: __m512i,
+ k: __mmask64,
+ a: __m512i,
+ b: __m512i,
+) -> __m512i {
+ let unpackhi = _mm512_unpackhi_epi8(a, b).as_i8x64();
+ transmute(simd_select_bitmask(k, unpackhi, src.as_i8x64()))
+}
+
+/// Unpack and interleave 8-bit integers from the high half of each 128-bit lane in a and b, and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
+///
+/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm512_maskz_unpackhi_epi8&expand=6038)
+#[inline]
+#[target_feature(enable = "avx512bw")]
+#[cfg_attr(test, assert_instr(vpunpckhbw))]
+pub unsafe fn _mm512_maskz_unpackhi_epi8(k: __mmask64, a: __m512i, b: __m512i) -> __m512i {
+ let unpackhi = _mm512_unpackhi_epi8(a, b).as_i8x64();
+ let zero = _mm512_setzero_si512().as_i8x64();
+ transmute(simd_select_bitmask(k, unpackhi, zero))
+}
+
+/// Unpack and interleave 16-bit integers from the low half of each 128-bit lane in a and b, and store the results in dst.
+///
+/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm512_unpacklo_epi16&expand=6069)
+#[inline]
+#[target_feature(enable = "avx512bw")]
+#[cfg_attr(test, assert_instr(vpunpcklwd))]
+pub unsafe fn _mm512_unpacklo_epi16(a: __m512i, b: __m512i) -> __m512i {
+ let a = a.as_i16x32();
+ let b = b.as_i16x32();
+ #[rustfmt::skip]
+ let r: i16x32 = simd_shuffle32(
+ a,
+ b,
+ [
+ 0, 32+0, 1, 32+1,
+ 2, 32+2, 3, 32+3,
+ 8, 32+8, 9, 32+9,
+ 10, 32+10, 11, 32+11,
+ 16, 32+16, 17, 32+17,
+ 18, 32+18, 19, 32+19,
+ 24, 32+24, 25, 32+25,
+ 26, 32+26, 27, 32+27
+ ],
+ );
+ transmute(r)
+}
+
+/// Unpack and interleave 16-bit integers from the low half of each 128-bit lane in a and b, and store the results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set).
+///
+/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm512_mask_unpacklo_epi16&expand=6067)
+#[inline]
+#[target_feature(enable = "avx512bw")]
+#[cfg_attr(test, assert_instr(vpunpcklwd))]
+pub unsafe fn _mm512_mask_unpacklo_epi16(
+ src: __m512i,
+ k: __mmask32,
+ a: __m512i,
+ b: __m512i,
+) -> __m512i {
+ let unpacklo = _mm512_unpacklo_epi16(a, b).as_i16x32();
+ transmute(simd_select_bitmask(k, unpacklo, src.as_i16x32()))
+}
+
+/// Unpack and interleave 16-bit integers from the low half of each 128-bit lane in a and b, and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
+///
+/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm512_maskz_unpacklo_epi16&expand=6068)
+#[inline]
+#[target_feature(enable = "avx512bw")]
+#[cfg_attr(test, assert_instr(vpunpcklwd))]
+pub unsafe fn _mm512_maskz_unpacklo_epi16(k: __mmask32, a: __m512i, b: __m512i) -> __m512i {
+ let unpacklo = _mm512_unpacklo_epi16(a, b).as_i16x32();
+ let zero = _mm512_setzero_si512().as_i16x32();
+ transmute(simd_select_bitmask(k, unpacklo, zero))
+}
+
+/// Unpack and interleave 8-bit integers from the low half of each 128-bit lane in a and b, and store the results in dst.
+///
+/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm512_unpacklo_epi8&expand=6096)
+#[inline]
+#[target_feature(enable = "avx512bw")]
+#[cfg_attr(test, assert_instr(vpunpcklbw))]
+pub unsafe fn _mm512_unpacklo_epi8(a: __m512i, b: __m512i) -> __m512i {
+ let a = a.as_i8x64();
+ let b = b.as_i8x64();
+ #[rustfmt::skip]
+ let r: i8x64 = simd_shuffle64(
+ a,
+ b,
+ [
+ 0, 64+0, 1, 64+1,
+ 2, 64+2, 3, 64+3,
+ 4, 64+4, 5, 64+5,
+ 6, 64+6, 7, 64+7,
+ 16, 64+16, 17, 64+17,
+ 18, 64+18, 19, 64+19,
+ 20, 64+20, 21, 64+21,
+ 22, 64+22, 23, 64+23,
+ 32, 64+32, 33, 64+33,
+ 34, 64+34, 35, 64+35,
+ 36, 64+36, 37, 64+37,
+ 38, 64+38, 39, 64+39,
+ 48, 64+48, 49, 64+49,
+ 50, 64+50, 51, 64+51,
+ 52, 64+52, 53, 64+53,
+ 54, 64+54, 55, 64+55,
+ ],
+ );
+ transmute(r)
+}
+
+/// Unpack and interleave 8-bit integers from the low half of each 128-bit lane in a and b, and store the results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set).
+///
+/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm512_mask_unpacklo_epi8&expand=6094)
+#[inline]
+#[target_feature(enable = "avx512bw")]
+#[cfg_attr(test, assert_instr(vpunpcklbw))]
+pub unsafe fn _mm512_mask_unpacklo_epi8(
+ src: __m512i,
+ k: __mmask64,
+ a: __m512i,
+ b: __m512i,
+) -> __m512i {
+ let unpacklo = _mm512_unpacklo_epi8(a, b).as_i8x64();
+ transmute(simd_select_bitmask(k, unpacklo, src.as_i8x64()))
+}
+
+/// Unpack and interleave 8-bit integers from the low half of each 128-bit lane in a and b, and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
+///
+/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm512_maskz_unpacklo_epi8&expand=6095)
+#[inline]
+#[target_feature(enable = "avx512bw")]
+#[cfg_attr(test, assert_instr(vpunpcklbw))]
+pub unsafe fn _mm512_maskz_unpacklo_epi8(k: __mmask64, a: __m512i, b: __m512i) -> __m512i {
+ let unpacklo = _mm512_unpacklo_epi8(a, b).as_i8x64();
+ let zero = _mm512_setzero_si512().as_i8x64();
+ transmute(simd_select_bitmask(k, unpacklo, zero))
+}
+
+/// Move packed 16-bit integers from a into dst using writemask k (elements are copied from src when the corresponding mask bit is not set).
+///
+/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm512_mask_mov_epi16&expand=3795)
+#[inline]
+#[target_feature(enable = "avx512bw")]
+#[cfg_attr(test, assert_instr(vmovdqu16))]
+pub unsafe fn _mm512_mask_mov_epi16(src: __m512i, k: __mmask32, a: __m512i) -> __m512i {
+ let mov = a.as_i16x32();
+ transmute(simd_select_bitmask(k, mov, src.as_i16x32()))
+}
+
+/// Move packed 16-bit integers from a into dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
+///
+/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm512_maskz_mov_epi16&expand=3796)
+#[inline]
+#[target_feature(enable = "avx512bw")]
+#[cfg_attr(test, assert_instr(vmovdqu16))]
+pub unsafe fn _mm512_maskz_mov_epi16(k: __mmask32, a: __m512i) -> __m512i {
+ let mov = a.as_i16x32();
+ let zero = _mm512_setzero_si512().as_i16x32();
+ transmute(simd_select_bitmask(k, mov, zero))
+}
+
+/// Move packed 8-bit integers from a into dst using writemask k (elements are copied from src when the corresponding mask bit is not set).
+///
+/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm512_mask_mov_epi8&expand=3813)
+#[inline]
+#[target_feature(enable = "avx512bw")]
+#[cfg_attr(test, assert_instr(vmovdqu8))]
+pub unsafe fn _mm512_mask_mov_epi8(src: __m512i, k: __mmask64, a: __m512i) -> __m512i {
+ let mov = a.as_i8x64();
+ transmute(simd_select_bitmask(k, mov, src.as_i8x64()))
+}
+
+/// Move packed 8-bit integers from a into dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
+///
+/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm512_maskz_mov_epi8&expand=3814)
+#[inline]
+#[target_feature(enable = "avx512bw")]
+#[cfg_attr(test, assert_instr(vmovdqu8))]
+pub unsafe fn _mm512_maskz_mov_epi8(k: __mmask64, a: __m512i) -> __m512i {
+ let mov = a.as_i8x64();
+ let zero = _mm512_setzero_si512().as_i8x64();
+ transmute(simd_select_bitmask(k, mov, zero))
+}
+
+/// Broadcast 16-bit integer a to all elements of dst using writemask k (elements are copied from src when the corresponding mask bit is not set).
+///
+/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm512_mask_set1_epi16&expand=4942)
+#[inline]
+#[target_feature(enable = "avx512bw")]
+#[cfg_attr(test, assert_instr(vpbroadcastw))]
+pub unsafe fn _mm512_mask_set1_epi16(src: __m512i, k: __mmask32, a: i16) -> __m512i {
+ let r = _mm512_set1_epi16(a).as_i16x32();
+ transmute(simd_select_bitmask(k, r, src.as_i16x32()))
+}
+
+/// Broadcast the low packed 16-bit integer from a to all elements of dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
+///
+/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm512_maskz_set1_epi16&expand=4943)
+#[inline]
+#[target_feature(enable = "avx512bw")]
+#[cfg_attr(test, assert_instr(vpbroadcastw))]
+pub unsafe fn _mm512_maskz_set1_epi16(k: __mmask32, a: i16) -> __m512i {
+ let r = _mm512_set1_epi16(a).as_i16x32();
+ let zero = _mm512_setzero_si512().as_i16x32();
+ transmute(simd_select_bitmask(k, r, zero))
+}
+
+/// Broadcast 8-bit integer a to all elements of dst using writemask k (elements are copied from src when the corresponding mask bit is not set).
+///
+/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm512_mask_set1_epi8&expand=4970)
+#[inline]
+#[target_feature(enable = "avx512bw")]
+#[cfg_attr(test, assert_instr(vpbroadcastb))]
+pub unsafe fn _mm512_mask_set1_epi8(src: __m512i, k: __mmask64, a: i8) -> __m512i {
+ let r = _mm512_set1_epi8(a).as_i8x64();
+ transmute(simd_select_bitmask(k, r, src.as_i8x64()))
+}
+
+/// Broadcast 8-bit integer a to all elements of dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
+///
+/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm512_maskz_set1_epi8&expand=4971)
+#[inline]
+#[target_feature(enable = "avx512bw")]
+#[cfg_attr(test, assert_instr(vpbroadcastb))]
+pub unsafe fn _mm512_maskz_set1_epi8(k: __mmask64, a: i8) -> __m512i {
+ let r = _mm512_set1_epi8(a).as_i8x64();
+ let zero = _mm512_setzero_si512().as_i8x64();
+ transmute(simd_select_bitmask(k, r, zero))
+}
+
+/// Shuffle 16-bit integers in the low 64 bits of 128-bit lanes of a using the control in imm8. Store the results in the low 64 bits of 128-bit lanes of dst, with the high 64 bits of 128-bit lanes being copied from from a to dst.
+///
+/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm512_shufflelo_epi16&expand=5221)
+#[inline]
+#[target_feature(enable = "avx512bw")]
+#[cfg_attr(test, assert_instr(vpshuflw, imm8 = 0))]
+#[rustc_args_required_const(1)]
+pub unsafe fn _mm512_shufflelo_epi16(a: __m512i, imm8: i32) -> __m512i {
+ let imm8 = (imm8 & 0xFF) as u8;
+ let a = a.as_i16x32();
+ macro_rules! shuffle_done {
+ ($x01: expr, $x23: expr, $x45: expr, $x67: expr) => {
+ #[rustfmt::skip]
+ simd_shuffle32(a, a, [
+ 0+$x01, 0+$x23, 0+$x45, 0+$x67, 4, 5, 6, 7, 8+$x01, 8+$x23, 8+$x45, 8+$x67, 12, 13, 14, 15,
+ 16+$x01, 16+$x23, 16+$x45, 16+$x67, 20, 21, 22, 23, 24+$x01, 24+$x23, 24+$x45, 24+$x67, 28, 29, 30, 31,
+ ])
+ };
+ }
+ macro_rules! shuffle_x67 {
+ ($x01:expr, $x23:expr, $x45:expr) => {
+ match (imm8 >> 6) & 0b11 {
+ 0b00 => shuffle_done!($x01, $x23, $x45, 0),
+ 0b01 => shuffle_done!($x01, $x23, $x45, 1),
+ 0b10 => shuffle_done!($x01, $x23, $x45, 2),
+ _ => shuffle_done!($x01, $x23, $x45, 3),
+ }
+ };
+ }
+ macro_rules! shuffle_x45 {
+ ($x01:expr, $x23:expr) => {
+ match (imm8 >> 4) & 0b11 {
+ 0b00 => shuffle_x67!($x01, $x23, 0),
+ 0b01 => shuffle_x67!($x01, $x23, 1),
+ 0b10 => shuffle_x67!($x01, $x23, 2),
+ _ => shuffle_x67!($x01, $x23, 3),
+ }
+ };
+ }
+ macro_rules! shuffle_x23 {
+ ($x01:expr) => {
+ match (imm8 >> 2) & 0b11 {
+ 0b00 => shuffle_x45!($x01, 0),
+ 0b01 => shuffle_x45!($x01, 1),
+ 0b10 => shuffle_x45!($x01, 2),
+ _ => shuffle_x45!($x01, 3),
+ }
+ };
+ }
+ let r: i16x32 = match imm8 & 0b11 {
+ 0b00 => shuffle_x23!(0),
+ 0b01 => shuffle_x23!(1),
+ 0b10 => shuffle_x23!(2),
+ _ => shuffle_x23!(3),
+ };
+ transmute(r)
+}
+
+/// Shuffle 16-bit integers in the low 64 bits of 128-bit lanes of a using the control in imm8. Store the results in the low 64 bits of 128-bit lanes of dst, with the high 64 bits of 128-bit lanes being copied from from a to dst, using writemask k (elements are copied from src when the corresponding mask bit is not set).
+///
+/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm512_mask_shufflelo_epi16&expand=5219)
+#[inline]
+#[target_feature(enable = "avx512bw")]
+#[cfg_attr(test, assert_instr(vpshuflw, imm8 = 0))]
+#[rustc_args_required_const(3)]
+pub unsafe fn _mm512_mask_shufflelo_epi16(
+ src: __m512i,
+ k: __mmask32,
+ a: __m512i,
+ imm8: i32,
+) -> __m512i {
+ let imm8 = (imm8 & 0xFF) as u8;
+ let a = a.as_i16x32();
+ macro_rules! shuffle_done {
+ ($x01: expr, $x23: expr, $x45: expr, $x67: expr) => {
+ #[rustfmt::skip]
+ simd_shuffle32(a, a, [
+ 0+$x01, 0+$x23, 0+$x45, 0+$x67, 4, 5, 6, 7, 8+$x01, 8+$x23, 8+$x45, 8+$x67, 12, 13, 14, 15,
+ 16+$x01, 16+$x23, 16+$x45, 16+$x67, 20, 21, 22, 23, 24+$x01, 24+$x23, 24+$x45, 24+$x67, 28, 29, 30, 31,
+ ])
+ };
+ }
+ macro_rules! shuffle_x67 {
+ ($x01:expr, $x23:expr, $x45:expr) => {
+ match (imm8 >> 6) & 0b11 {
+ 0b00 => shuffle_done!($x01, $x23, $x45, 0),
+ 0b01 => shuffle_done!($x01, $x23, $x45, 1),
+ 0b10 => shuffle_done!($x01, $x23, $x45, 2),
+ _ => shuffle_done!($x01, $x23, $x45, 3),
+ }
+ };
+ }
+ macro_rules! shuffle_x45 {
+ ($x01:expr, $x23:expr) => {
+ match (imm8 >> 4) & 0b11 {
+ 0b00 => shuffle_x67!($x01, $x23, 0),
+ 0b01 => shuffle_x67!($x01, $x23, 1),
+ 0b10 => shuffle_x67!($x01, $x23, 2),
+ _ => shuffle_x67!($x01, $x23, 3),
+ }
+ };
+ }
+ macro_rules! shuffle_x23 {
+ ($x01:expr) => {
+ match (imm8 >> 2) & 0b11 {
+ 0b00 => shuffle_x45!($x01, 0),
+ 0b01 => shuffle_x45!($x01, 1),
+ 0b10 => shuffle_x45!($x01, 2),
+ _ => shuffle_x45!($x01, 3),
+ }
+ };
+ }
+ let r: i16x32 = match imm8 & 0b11 {
+ 0b00 => shuffle_x23!(0),
+ 0b01 => shuffle_x23!(1),
+ 0b10 => shuffle_x23!(2),
+ _ => shuffle_x23!(3),
+ };
+ transmute(simd_select_bitmask(k, r, src.as_i16x32()))
+}
+
+/// Shuffle 16-bit integers in the low 64 bits of 128-bit lanes of a using the control in imm8. Store the results in the low 64 bits of 128-bit lanes of dst, with the high 64 bits of 128-bit lanes being copied from from a to dst, using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
+///
+/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm512_maskz_shufflelo_epi16&expand=5220)
+#[inline]
+#[target_feature(enable = "avx512bw")]
+#[cfg_attr(test, assert_instr(vpshuflw, imm8 = 0))]
+#[rustc_args_required_const(2)]
+pub unsafe fn _mm512_maskz_shufflelo_epi16(k: __mmask32, a: __m512i, imm8: i32) -> __m512i {
+ let imm8 = (imm8 & 0xFF) as u8;
+ let a = a.as_i16x32();
+ macro_rules! shuffle_done {
+ ($x01: expr, $x23: expr, $x45: expr, $x67: expr) => {
+ #[rustfmt::skip]
+ simd_shuffle32(a, a, [
+ 0+$x01, 0+$x23, 0+$x45, 0+$x67, 4, 5, 6, 7, 8+$x01, 8+$x23, 8+$x45, 8+$x67, 12, 13, 14, 15,
+ 16+$x01, 16+$x23, 16+$x45, 16+$x67, 20, 21, 22, 23, 24+$x01, 24+$x23, 24+$x45, 24+$x67, 28, 29, 30, 31,
+ ])
+ };
+ }
+ macro_rules! shuffle_x67 {
+ ($x01:expr, $x23:expr, $x45:expr) => {
+ match (imm8 >> 6) & 0b11 {
+ 0b00 => shuffle_done!($x01, $x23, $x45, 0),
+ 0b01 => shuffle_done!($x01, $x23, $x45, 1),
+ 0b10 => shuffle_done!($x01, $x23, $x45, 2),
+ _ => shuffle_done!($x01, $x23, $x45, 3),
+ }
+ };
+ }
+ macro_rules! shuffle_x45 {
+ ($x01:expr, $x23:expr) => {
+ match (imm8 >> 4) & 0b11 {
+ 0b00 => shuffle_x67!($x01, $x23, 0),
+ 0b01 => shuffle_x67!($x01, $x23, 1),
+ 0b10 => shuffle_x67!($x01, $x23, 2),
+ _ => shuffle_x67!($x01, $x23, 3),
+ }
+ };
+ }
+ macro_rules! shuffle_x23 {
+ ($x01:expr) => {
+ match (imm8 >> 2) & 0b11 {
+ 0b00 => shuffle_x45!($x01, 0),
+ 0b01 => shuffle_x45!($x01, 1),
+ 0b10 => shuffle_x45!($x01, 2),
+ _ => shuffle_x45!($x01, 3),
+ }
+ };
+ }
+ let r: i16x32 = match imm8 & 0b11 {
+ 0b00 => shuffle_x23!(0),
+ 0b01 => shuffle_x23!(1),
+ 0b10 => shuffle_x23!(2),
+ _ => shuffle_x23!(3),
+ };
+ transmute(simd_select_bitmask(
+ k,
+ r,
+ _mm512_setzero_si512().as_i16x32(),
+ ))
+}
+
+/// Shuffle 16-bit integers in the high 64 bits of 128-bit lanes of a using the control in imm8. Store the results in the high 64 bits of 128-bit lanes of dst, with the low 64 bits of 128-bit lanes being copied from from a to dst.
+///
+/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm512_shufflehi_epi16&expand=5212)
+#[inline]
+#[target_feature(enable = "avx512bw")]
+#[cfg_attr(test, assert_instr(vpshufhw, imm8 = 0))]
+#[rustc_args_required_const(1)]
+pub unsafe fn _mm512_shufflehi_epi16(a: __m512i, imm8: i32) -> __m512i {
+ let imm8 = (imm8 & 0xFF) as u8;
+ let a = a.as_i16x32();
+ macro_rules! shuffle_done {
+ ($x01: expr, $x23: expr, $x45: expr, $x67: expr) => {
+ #[rustfmt::skip]
+ simd_shuffle32(a, a, [
+ 0, 1, 2, 3, 4+$x01, 4+$x23, 4+$x45, 4+$x67, 8, 9, 10, 11, 12+$x01, 12+$x23, 12+$x45, 12+$x67,
+ 16, 17, 18, 19, 20+$x01, 20+$x23, 20+$x45, 20+$x67, 24, 25, 26, 27, 28+$x01, 28+$x23, 28+$x45, 28+$x67,
+ ])
+ };
+ }
+ macro_rules! shuffle_x67 {
+ ($x01:expr, $x23:expr, $x45:expr) => {
+ match (imm8 >> 6) & 0b11 {
+ 0b00 => shuffle_done!($x01, $x23, $x45, 0),
+ 0b01 => shuffle_done!($x01, $x23, $x45, 1),
+ 0b10 => shuffle_done!($x01, $x23, $x45, 2),
+ _ => shuffle_done!($x01, $x23, $x45, 3),
+ }
+ };
+ }
+ macro_rules! shuffle_x45 {
+ ($x01:expr, $x23:expr) => {
+ match (imm8 >> 4) & 0b11 {
+ 0b00 => shuffle_x67!($x01, $x23, 0),
+ 0b01 => shuffle_x67!($x01, $x23, 1),
+ 0b10 => shuffle_x67!($x01, $x23, 2),
+ _ => shuffle_x67!($x01, $x23, 3),
+ }
+ };
+ }
+ macro_rules! shuffle_x23 {
+ ($x01:expr) => {
+ match (imm8 >> 2) & 0b11 {
+ 0b00 => shuffle_x45!($x01, 0),
+ 0b01 => shuffle_x45!($x01, 1),
+ 0b10 => shuffle_x45!($x01, 2),
+ _ => shuffle_x45!($x01, 3),
+ }
+ };
+ }
+ let r: i16x32 = match imm8 & 0b11 {
+ 0b00 => shuffle_x23!(0),
+ 0b01 => shuffle_x23!(1),
+ 0b10 => shuffle_x23!(2),
+ _ => shuffle_x23!(3),
+ };
+ transmute(r)
+}
+
+/// Shuffle 16-bit integers in the high 64 bits of 128-bit lanes of a using the control in imm8. Store the results in the high 64 bits of 128-bit lanes of dst, with the low 64 bits of 128-bit lanes being copied from from a to dst, using writemask k (elements are copied from src when the corresponding mask bit is not set).
+///
+/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm512_mask_shufflehi_epi16&expand=5210)
+#[inline]
+#[target_feature(enable = "avx512bw")]
+#[cfg_attr(test, assert_instr(vpshufhw, imm8 = 0))]
+#[rustc_args_required_const(3)]
+pub unsafe fn _mm512_mask_shufflehi_epi16(
+ src: __m512i,
+ k: __mmask32,
+ a: __m512i,
+ imm8: i32,
+) -> __m512i {
+ let imm8 = (imm8 & 0xFF) as u8;
+ let a = a.as_i16x32();
+ macro_rules! shuffle_done {
+ ($x01: expr, $x23: expr, $x45: expr, $x67: expr) => {
+ #[rustfmt::skip]
+ simd_shuffle32(a, a, [
+ 0, 1, 2, 3, 4+$x01, 4+$x23, 4+$x45, 4+$x67, 8, 9, 10, 11, 12+$x01, 12+$x23, 12+$x45, 12+$x67,
+ 16, 17, 18, 19, 20+$x01, 20+$x23, 20+$x45, 20+$x67, 24, 25, 26, 27, 28+$x01, 28+$x23, 28+$x45, 28+$x67,
+ ])
+ };
+ }
+ macro_rules! shuffle_x67 {
+ ($x01:expr, $x23:expr, $x45:expr) => {
+ match (imm8 >> 6) & 0b11 {
+ 0b00 => shuffle_done!($x01, $x23, $x45, 0),
+ 0b01 => shuffle_done!($x01, $x23, $x45, 1),
+ 0b10 => shuffle_done!($x01, $x23, $x45, 2),
+ _ => shuffle_done!($x01, $x23, $x45, 3),
+ }
+ };
+ }
+ macro_rules! shuffle_x45 {
+ ($x01:expr, $x23:expr) => {
+ match (imm8 >> 4) & 0b11 {
+ 0b00 => shuffle_x67!($x01, $x23, 0),
+ 0b01 => shuffle_x67!($x01, $x23, 1),
+ 0b10 => shuffle_x67!($x01, $x23, 2),
+ _ => shuffle_x67!($x01, $x23, 3),
+ }
+ };
+ }
+ macro_rules! shuffle_x23 {
+ ($x01:expr) => {
+ match (imm8 >> 2) & 0b11 {
+ 0b00 => shuffle_x45!($x01, 0),
+ 0b01 => shuffle_x45!($x01, 1),
+ 0b10 => shuffle_x45!($x01, 2),
+ _ => shuffle_x45!($x01, 3),
+ }
+ };
+ }
+ let r: i16x32 = match imm8 & 0b11 {
+ 0b00 => shuffle_x23!(0),
+ 0b01 => shuffle_x23!(1),
+ 0b10 => shuffle_x23!(2),
+ _ => shuffle_x23!(3),
+ };
+ transmute(simd_select_bitmask(k, r, src.as_i16x32()))
+}
+
+/// Shuffle 16-bit integers in the high 64 bits of 128-bit lanes of a using the control in imm8. Store the results in the high 64 bits of 128-bit lanes of dst, with the low 64 bits of 128-bit lanes being copied from from a to dst, using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
+///
+/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm512_maskz_shufflehi_epi16&expand=5211)
+#[inline]
+#[target_feature(enable = "avx512bw")]
+#[cfg_attr(test, assert_instr(vpshufhw, imm8 = 0))]
+#[rustc_args_required_const(2)]
+pub unsafe fn _mm512_maskz_shufflehi_epi16(k: __mmask32, a: __m512i, imm8: i32) -> __m512i {
+ let imm8 = (imm8 & 0xFF) as u8;
+ let a = a.as_i16x32();
+ macro_rules! shuffle_done {
+ ($x01: expr, $x23: expr, $x45: expr, $x67: expr) => {
+ #[rustfmt::skip]
+ simd_shuffle32(a, a, [
+ 0, 1, 2, 3, 4+$x01, 4+$x23, 4+$x45, 4+$x67, 8, 9, 10, 11, 12+$x01, 12+$x23, 12+$x45, 12+$x67,
+ 16, 17, 18, 19, 20+$x01, 20+$x23, 20+$x45, 20+$x67, 24, 25, 26, 27, 28+$x01, 28+$x23, 28+$x45, 28+$x67,
+ ])
+ };
+ }
+ macro_rules! shuffle_x67 {
+ ($x01:expr, $x23:expr, $x45:expr) => {
+ match (imm8 >> 6) & 0b11 {
+ 0b00 => shuffle_done!($x01, $x23, $x45, 0),
+ 0b01 => shuffle_done!($x01, $x23, $x45, 1),
+ 0b10 => shuffle_done!($x01, $x23, $x45, 2),
+ _ => shuffle_done!($x01, $x23, $x45, 3),
+ }
+ };
+ }
+ macro_rules! shuffle_x45 {
+ ($x01:expr, $x23:expr) => {
+ match (imm8 >> 4) & 0b11 {
+ 0b00 => shuffle_x67!($x01, $x23, 0),
+ 0b01 => shuffle_x67!($x01, $x23, 1),
+ 0b10 => shuffle_x67!($x01, $x23, 2),
+ _ => shuffle_x67!($x01, $x23, 3),
+ }
+ };
+ }
+ macro_rules! shuffle_x23 {
+ ($x01:expr) => {
+ match (imm8 >> 2) & 0b11 {
+ 0b00 => shuffle_x45!($x01, 0),
+ 0b01 => shuffle_x45!($x01, 1),
+ 0b10 => shuffle_x45!($x01, 2),
+ _ => shuffle_x45!($x01, 3),
+ }
+ };
+ }
+ let r: i16x32 = match imm8 & 0b11 {
+ 0b00 => shuffle_x23!(0),
+ 0b01 => shuffle_x23!(1),
+ 0b10 => shuffle_x23!(2),
+ _ => shuffle_x23!(3),
+ };
+ transmute(simd_select_bitmask(
+ k,
+ r,
+ _mm512_setzero_si512().as_i16x32(),
+ ))
+}
+
+#[allow(improper_ctypes)]
+extern "C" {
+ #[link_name = "llvm.x86.avx512.mask.paddus.w.512"]
+ fn vpaddusw(a: u16x32, b: u16x32, src: u16x32, mask: u32) -> u16x32;
+ #[link_name = "llvm.x86.avx512.mask.paddus.b.512"]
+ fn vpaddusb(a: u8x64, b: u8x64, src: u8x64, mask: u64) -> u8x64;
+ #[link_name = "llvm.x86.avx512.mask.padds.w.512"]
+ fn vpaddsw(a: i16x32, b: i16x32, src: i16x32, mask: u32) -> i16x32;
+ #[link_name = "llvm.x86.avx512.mask.padds.b.512"]
+ fn vpaddsb(a: i8x64, b: i8x64, src: i8x64, mask: u64) -> i8x64;
+
+ #[link_name = "llvm.x86.avx512.mask.psubus.w.512"]
+ fn vpsubusw(a: u16x32, b: u16x32, src: u16x32, mask: u32) -> u16x32;
+ #[link_name = "llvm.x86.avx512.mask.psubus.b.512"]
+ fn vpsubusb(a: u8x64, b: u8x64, src: u8x64, mask: u64) -> u8x64;
+ #[link_name = "llvm.x86.avx512.mask.psubs.w.512"]
+ fn vpsubsw(a: i16x32, b: i16x32, src: i16x32, mask: u32) -> i16x32;
+ #[link_name = "llvm.x86.avx512.mask.psubs.b.512"]
+ fn vpsubsb(a: i8x64, b: i8x64, src: i8x64, mask: u64) -> i8x64;
+
+ #[link_name = "llvm.x86.avx512.pmulhu.w.512"]
+ fn vpmulhuw(a: u16x32, b: u16x32) -> u16x32;
+ #[link_name = "llvm.x86.avx512.pmulh.w.512"]
+ fn vpmulhw(a: i16x32, b: i16x32) -> i16x32;
+ #[link_name = "llvm.x86.avx512.pmul.hr.sw.512"]
+ fn vpmulhrsw(a: i16x32, b: i16x32) -> i16x32;
+
+ #[link_name = "llvm.x86.avx512.mask.ucmp.w.512"]
+ fn vpcmpuw(a: u16x32, b: u16x32, op: i32, mask: u32) -> u32;
+ #[link_name = "llvm.x86.avx512.mask.ucmp.b.512"]
+ fn vpcmpub(a: u8x64, b: u8x64, op: i32, mask: u64) -> u64;
+ #[link_name = "llvm.x86.avx512.mask.cmp.w.512"]
+ fn vpcmpw(a: i16x32, b: i16x32, op: i32, mask: u32) -> u32;
+ #[link_name = "llvm.x86.avx512.mask.cmp.b.512"]
+ fn vpcmpb(a: i8x64, b: i8x64, op: i32, mask: u64) -> u64;
+
+ #[link_name = "llvm.x86.avx512.mask.pmaxu.w.512"]
+ fn vpmaxuw(a: u16x32, b: u16x32) -> u16x32;
+ #[link_name = "llvm.x86.avx512.mask.pmaxu.b.512"]
+ fn vpmaxub(a: u8x64, b: u8x64) -> u8x64;
+ #[link_name = "llvm.x86.avx512.mask.pmaxs.w.512"]
+ fn vpmaxsw(a: i16x32, b: i16x32) -> i16x32;
+ #[link_name = "llvm.x86.avx512.mask.pmaxs.b.512"]
+ fn vpmaxsb(a: i8x64, b: i8x64) -> i8x64;
+
+ #[link_name = "llvm.x86.avx512.mask.pminu.w.512"]
+ fn vpminuw(a: u16x32, b: u16x32) -> u16x32;
+ #[link_name = "llvm.x86.avx512.mask.pminu.b.512"]
+ fn vpminub(a: u8x64, b: u8x64) -> u8x64;
+ #[link_name = "llvm.x86.avx512.mask.pmins.w.512"]
+ fn vpminsw(a: i16x32, b: i16x32) -> i16x32;
+ #[link_name = "llvm.x86.avx512.mask.pmins.b.512"]
+ fn vpminsb(a: i8x64, b: i8x64) -> i8x64;
+
+ #[link_name = "llvm.x86.avx512.pmaddw.d.512"]
+ fn vpmaddwd(a: i16x32, b: i16x32) -> i32x16;
+ #[link_name = "llvm.x86.avx512.pmaddubs.w.512"]
+ fn vpmaddubsw(a: i8x64, b: i8x64) -> i16x32;
+
+ #[link_name = "llvm.x86.avx512.packssdw.512"]
+ fn vpackssdw(a: i32x16, b: i32x16) -> i16x32;
+ #[link_name = "llvm.x86.avx512.packsswb.512"]
+ fn vpacksswb(a: i16x32, b: i16x32) -> i8x64;
+ #[link_name = "llvm.x86.avx512.packusdw.512"]
+ fn vpackusdw(a: i32x16, b: i32x16) -> u16x32;
+ #[link_name = "llvm.x86.avx512.packuswb.512"]
+ fn vpackuswb(a: i16x32, b: i16x32) -> u8x64;
+
+ #[link_name = "llvm.x86.avx512.pavg.w.512"]
+ fn vpavgw(a: u16x32, b: u16x32) -> u16x32;
+ #[link_name = "llvm.x86.avx512.pavg.b.512"]
+ fn vpavgb(a: u8x64, b: u8x64) -> u8x64;
+
+ #[link_name = "llvm.x86.avx512.psll.w.512"]
+ fn vpsllw(a: i16x32, count: i16x8) -> i16x32;
+ #[link_name = "llvm.x86.avx512.pslli.w.512"]
+ fn vpslliw(a: i16x32, imm8: u32) -> i16x32;
+ #[link_name = "llvm.x86.avx512.psllv.w.512"]
+ fn vpsllvw(a: i16x32, b: i16x32) -> i16x32;
+ #[link_name = "llvm.x86.avx512.psrl.w.512"]
+ fn vpsrlw(a: i16x32, count: i16x8) -> i16x32;
+ #[link_name = "llvm.x86.avx512.psrli.w.512"]
+ fn vpsrliw(a: i16x32, imm8: u32) -> i16x32;
+ #[link_name = "llvm.x86.avx512.psrlv.w.512"]
+ fn vpsrlvw(a: i16x32, b: i16x32) -> i16x32;
+
+ #[link_name = "llvm.x86.avx512.psra.w.512"]
+ fn vpsraw(a: i16x32, count: i16x8) -> i16x32;
+ #[link_name = "llvm.x86.avx512.psrai.w.512"]
+ fn vpsraiw(a: i16x32, imm8: u32) -> i16x32;
+ #[link_name = "llvm.x86.avx512.psrav.w.512"]
+ fn vpsravw(a: i16x32, count: i16x32) -> i16x32;
+
+ #[link_name = "llvm.x86.avx512.vpermi2var.hi.512"]
+ fn vpermi2w(a: i16x32, idx: i16x32, b: i16x32) -> i16x32;
+ #[link_name = "llvm.x86.avx512.permvar.hi.512"]
+ fn vpermw(a: i16x32, idx: i16x32) -> i16x32;
+
+ #[link_name = "llvm.x86.avx512.pshuf.b.512"]
+ fn vpshufb(a: i8x64, b: i8x64) -> i8x64;
+}
+
+#[cfg(test)]
+mod tests {
+
+ use stdarch_test::simd_test;
+
+ use crate::core_arch::x86::*;
+ use crate::hint::black_box;
+ use crate::mem::{self};
+
+ #[simd_test(enable = "avx512bw")]
+ unsafe fn test_mm512_abs_epi16() {
+ let a = _mm512_set1_epi16(-1);
+ let r = _mm512_abs_epi16(a);
+ let e = _mm512_set1_epi16(1);
+ assert_eq_m512i(r, e);
+ }
+
+ #[simd_test(enable = "avx512bw")]
+ unsafe fn test_mm512_mask_abs_epi16() {
+ let a = _mm512_set1_epi16(-1);
+ let r = _mm512_mask_abs_epi16(a, 0, a);
+ assert_eq_m512i(r, a);
+ let r = _mm512_mask_abs_epi16(a, 0b00000000_11111111_00000000_11111111, a);
+ #[rustfmt::skip]
+ let e = _mm512_set_epi16(-1, -1, -1, -1, -1, -1, -1, -1, 1, 1, 1, 1, 1, 1, 1, 1,
+ -1, -1, -1, -1, -1, -1, -1, -1, 1, 1, 1, 1, 1, 1, 1, 1);
+ assert_eq_m512i(r, e);
+ }
+
+ #[simd_test(enable = "avx512bw")]
+ unsafe fn test_mm512_maskz_abs_epi16() {
+ let a = _mm512_set1_epi16(-1);
+ let r = _mm512_maskz_abs_epi16(0, a);
+ assert_eq_m512i(r, _mm512_setzero_si512());
+ let r = _mm512_maskz_abs_epi16(0b00000000_11111111_00000000_11111111, a);
+ #[rustfmt::skip]
+ let e = _mm512_set_epi16(0, 0, 0, 0, 0, 0, 0, 0, 1, 1, 1, 1, 1, 1, 1, 1,
+ 0, 0, 0, 0, 0, 0, 0, 0, 1, 1, 1, 1, 1, 1, 1, 1);
+ assert_eq_m512i(r, e);
+ }
+
+ #[simd_test(enable = "avx512bw")]
+ unsafe fn test_mm512_abs_epi8() {
+ let a = _mm512_set1_epi8(-1);
+ let r = _mm512_abs_epi8(a);
+ let e = _mm512_set1_epi8(1);
+ assert_eq_m512i(r, e);
+ }
+
+ #[simd_test(enable = "avx512bw")]
+ unsafe fn test_mm512_mask_abs_epi8() {
+ let a = _mm512_set1_epi8(-1);
+ let r = _mm512_mask_abs_epi8(a, 0, a);
+ assert_eq_m512i(r, a);
+ let r = _mm512_mask_abs_epi8(
+ a,
+ 0b00000000_11111111_00000000_11111111_00000000_11111111_00000000_11111111,
+ a,
+ );
+ #[rustfmt::skip]
+ let e = _mm512_set_epi8(-1, -1, -1, -1, -1, -1, -1, -1, 1, 1, 1, 1, 1, 1, 1, 1,
+ -1, -1, -1, -1, -1, -1, -1, -1, 1, 1, 1, 1, 1, 1, 1, 1,
+ -1, -1, -1, -1, -1, -1, -1, -1, 1, 1, 1, 1, 1, 1, 1, 1,
+ -1, -1, -1, -1, -1, -1, -1, -1, 1, 1, 1, 1, 1, 1, 1, 1);
+ assert_eq_m512i(r, e);
+ }
+
+ #[simd_test(enable = "avx512bw")]
+ unsafe fn test_mm512_maskz_abs_epi8() {
+ let a = _mm512_set1_epi8(-1);
+ let r = _mm512_maskz_abs_epi8(0, a);
+ assert_eq_m512i(r, _mm512_setzero_si512());
+ let r = _mm512_maskz_abs_epi8(
+ 0b00000000_11111111_00000000_11111111_00000000_11111111_00000000_11111111,
+ a,
+ );
+ #[rustfmt::skip]
+ let e = _mm512_set_epi8(0, 0, 0, 0, 0, 0, 0, 0, 1, 1, 1, 1, 1, 1, 1, 1,
+ 0, 0, 0, 0, 0, 0, 0, 0, 1, 1, 1, 1, 1, 1, 1, 1,
+ 0, 0, 0, 0, 0, 0, 0, 0, 1, 1, 1, 1, 1, 1, 1, 1,
+ 0, 0, 0, 0, 0, 0, 0, 0, 1, 1, 1, 1, 1, 1, 1, 1);
+ assert_eq_m512i(r, e);
+ }
+
+ #[simd_test(enable = "avx512bw")]
+ unsafe fn test_mm512_add_epi16() {
+ let a = _mm512_set1_epi16(1);
+ let b = _mm512_set1_epi16(2);
+ let r = _mm512_add_epi16(a, b);
+ let e = _mm512_set1_epi16(3);
+ assert_eq_m512i(r, e);
+ }
+
+ #[simd_test(enable = "avx512bw")]
+ unsafe fn test_mm512_mask_add_epi16() {
+ let a = _mm512_set1_epi16(1);
+ let b = _mm512_set1_epi16(2);
+ let r = _mm512_mask_add_epi16(a, 0, a, b);
+ assert_eq_m512i(r, a);
+ let r = _mm512_mask_add_epi16(a, 0b00000000_11111111_00000000_11111111, a, b);
+ #[rustfmt::skip]
+ let e = _mm512_set_epi16(1, 1, 1, 1, 1, 1, 1, 1, 3, 3, 3, 3, 3, 3, 3, 3,
+ 1, 1, 1, 1, 1, 1, 1, 1, 3, 3, 3, 3, 3, 3, 3, 3);
+ assert_eq_m512i(r, e);
+ }
+
+ #[simd_test(enable = "avx512bw")]
+ unsafe fn test_mm512_maskz_add_epi16() {
+ let a = _mm512_set1_epi16(1);
+ let b = _mm512_set1_epi16(2);
+ let r = _mm512_maskz_add_epi16(0, a, b);
+ assert_eq_m512i(r, _mm512_setzero_si512());
+ let r = _mm512_maskz_add_epi16(0b00000000_11111111_00000000_11111111, a, b);
+ #[rustfmt::skip]
+ let e = _mm512_set_epi16(0, 0, 0, 0, 0, 0, 0, 0, 3, 3, 3, 3, 3, 3, 3, 3,
+ 0, 0, 0, 0, 0, 0, 0, 0, 3, 3, 3, 3, 3, 3, 3, 3);
+ assert_eq_m512i(r, e);
+ }
+
+ #[simd_test(enable = "avx512bw")]
+ unsafe fn test_mm512_add_epi8() {
+ let a = _mm512_set1_epi8(1);
+ let b = _mm512_set1_epi8(2);
+ let r = _mm512_add_epi8(a, b);
+ let e = _mm512_set1_epi8(3);
+ assert_eq_m512i(r, e);
+ }
+
+ #[simd_test(enable = "avx512bw")]
+ unsafe fn test_mm512_mask_add_epi8() {
+ let a = _mm512_set1_epi8(1);
+ let b = _mm512_set1_epi8(2);
+ let r = _mm512_mask_add_epi8(a, 0, a, b);
+ assert_eq_m512i(r, a);
+ let r = _mm512_mask_add_epi8(
+ a,
+ 0b00000000_11111111_00000000_11111111_00000000_11111111_00000000_11111111,
+ a,
+ b,
+ );
+ #[rustfmt::skip]
+ let e = _mm512_set_epi8(1, 1, 1, 1, 1, 1, 1, 1, 3, 3, 3, 3, 3, 3, 3, 3,
+ 1, 1, 1, 1, 1, 1, 1, 1, 3, 3, 3, 3, 3, 3, 3, 3,
+ 1, 1, 1, 1, 1, 1, 1, 1, 3, 3, 3, 3, 3, 3, 3, 3,
+ 1, 1, 1, 1, 1, 1, 1, 1, 3, 3, 3, 3, 3, 3, 3, 3);
+ assert_eq_m512i(r, e);
+ }
+
+ #[simd_test(enable = "avx512bw")]
+ unsafe fn test_mm512_maskz_add_epi8() {
+ let a = _mm512_set1_epi8(1);
+ let b = _mm512_set1_epi8(2);
+ let r = _mm512_maskz_add_epi8(0, a, b);
+ assert_eq_m512i(r, _mm512_setzero_si512());
+ let r = _mm512_maskz_add_epi8(
+ 0b00000000_11111111_00000000_11111111_00000000_11111111_00000000_11111111,
+ a,
+ b,
+ );
+ #[rustfmt::skip]
+ let e = _mm512_set_epi8(0, 0, 0, 0, 0, 0, 0, 0, 3, 3, 3, 3, 3, 3, 3, 3,
+ 0, 0, 0, 0, 0, 0, 0, 0, 3, 3, 3, 3, 3, 3, 3, 3,
+ 0, 0, 0, 0, 0, 0, 0, 0, 3, 3, 3, 3, 3, 3, 3, 3,
+ 0, 0, 0, 0, 0, 0, 0, 0, 3, 3, 3, 3, 3, 3, 3, 3);
+ assert_eq_m512i(r, e);
+ }
+
+ #[simd_test(enable = "avx512bw")]
+ unsafe fn test_mm512_adds_epu16() {
+ let a = _mm512_set1_epi16(1);
+ let b = _mm512_set1_epi16(u16::MAX as i16);
+ let r = _mm512_adds_epu16(a, b);
+ let e = _mm512_set1_epi16(u16::MAX as i16);
+ assert_eq_m512i(r, e);
+ }
+
+ #[simd_test(enable = "avx512bw")]
+ unsafe fn test_mm512_mask_adds_epu16() {
+ let a = _mm512_set1_epi16(1);
+ let b = _mm512_set1_epi16(u16::MAX as i16);
+ let r = _mm512_mask_adds_epu16(a, 0, a, b);
+ assert_eq_m512i(r, a);
+ let r = _mm512_mask_adds_epu16(a, 0b00000000_00000000_00000000_00001111, a, b);
+ #[rustfmt::skip]
+ let e = _mm512_set_epi16(1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
+ 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, u16::MAX as i16, u16::MAX as i16, u16::MAX as i16, u16::MAX as i16);
+ assert_eq_m512i(r, e);
+ }
+
+ #[simd_test(enable = "avx512bw")]
+ unsafe fn test_mm512_maskz_adds_epu16() {
+ let a = _mm512_set1_epi16(1);
+ let b = _mm512_set1_epi16(u16::MAX as i16);
+ let r = _mm512_maskz_adds_epu16(0, a, b);
+ assert_eq_m512i(r, _mm512_setzero_si512());
+ let r = _mm512_maskz_adds_epu16(0b00000000_00000000_00000000_00001111, a, b);
+ #[rustfmt::skip]
+ let e = _mm512_set_epi16(0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
+ 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, u16::MAX as i16, u16::MAX as i16, u16::MAX as i16, u16::MAX as i16);
+ assert_eq_m512i(r, e);
+ }
+
+ #[simd_test(enable = "avx512bw")]
+ unsafe fn test_mm512_adds_epu8() {
+ let a = _mm512_set1_epi8(1);
+ let b = _mm512_set1_epi8(u8::MAX as i8);
+ let r = _mm512_adds_epu8(a, b);
+ let e = _mm512_set1_epi8(u8::MAX as i8);
+ assert_eq_m512i(r, e);
+ }
+
+ #[simd_test(enable = "avx512bw")]
+ unsafe fn test_mm512_mask_adds_epu8() {
+ let a = _mm512_set1_epi8(1);
+ let b = _mm512_set1_epi8(u8::MAX as i8);
+ let r = _mm512_mask_adds_epu8(a, 0, a, b);
+ assert_eq_m512i(r, a);
+ let r = _mm512_mask_adds_epu8(
+ a,
+ 0b00000000_00000000_00000000_00000000_00000000_00000000_00000000_00001111,
+ a,
+ b,
+ );
+ #[rustfmt::skip]
+ let e = _mm512_set_epi8(1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
+ 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
+ 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
+ 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, u8::MAX as i8, u8::MAX as i8, u8::MAX as i8, u8::MAX as i8);
+ assert_eq_m512i(r, e);
+ }
+
+ #[simd_test(enable = "avx512bw")]
+ unsafe fn test_mm512_maskz_adds_epu8() {
+ let a = _mm512_set1_epi8(1);
+ let b = _mm512_set1_epi8(u8::MAX as i8);
+ let r = _mm512_maskz_adds_epu8(0, a, b);
+ assert_eq_m512i(r, _mm512_setzero_si512());
+ let r = _mm512_maskz_adds_epu8(
+ 0b00000000_00000000_00000000_00000000_00000000_00000000_00000000_00001111,
+ a,
+ b,
+ );
+ #[rustfmt::skip]
+ let e = _mm512_set_epi8(0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
+ 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
+ 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
+ 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, u8::MAX as i8, u8::MAX as i8, u8::MAX as i8, u8::MAX as i8);
+ assert_eq_m512i(r, e);
+ }
+
+ #[simd_test(enable = "avx512bw")]
+ unsafe fn test_mm512_adds_epi16() {
+ let a = _mm512_set1_epi16(1);
+ let b = _mm512_set1_epi16(i16::MAX);
+ let r = _mm512_adds_epi16(a, b);
+ let e = _mm512_set1_epi16(i16::MAX);
+ assert_eq_m512i(r, e);
+ }
+
+ #[simd_test(enable = "avx512bw")]
+ unsafe fn test_mm512_mask_adds_epi16() {
+ let a = _mm512_set1_epi16(1);
+ let b = _mm512_set1_epi16(i16::MAX);
+ let r = _mm512_mask_adds_epi16(a, 0, a, b);
+ assert_eq_m512i(r, a);
+ let r = _mm512_mask_adds_epi16(a, 0b00000000_00000000_00000000_00001111, a, b);
+ #[rustfmt::skip]
+ let e = _mm512_set_epi16(1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
+ 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, i16::MAX, i16::MAX, i16::MAX, i16::MAX);
+ assert_eq_m512i(r, e);
+ }
+
+ #[simd_test(enable = "avx512bw")]
+ unsafe fn test_mm512_maskz_adds_epi16() {
+ let a = _mm512_set1_epi16(1);
+ let b = _mm512_set1_epi16(i16::MAX);
+ let r = _mm512_maskz_adds_epi16(0, a, b);
+ assert_eq_m512i(r, _mm512_setzero_si512());
+ let r = _mm512_maskz_adds_epi16(0b00000000_00000000_00000000_00001111, a, b);
+ #[rustfmt::skip]
+ let e = _mm512_set_epi16(0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
+ 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, i16::MAX, i16::MAX, i16::MAX, i16::MAX);
+ assert_eq_m512i(r, e);
+ }
+
+ #[simd_test(enable = "avx512bw")]
+ unsafe fn test_mm512_adds_epi8() {
+ let a = _mm512_set1_epi8(1);
+ let b = _mm512_set1_epi8(i8::MAX);
+ let r = _mm512_adds_epi8(a, b);
+ let e = _mm512_set1_epi8(i8::MAX);
+ assert_eq_m512i(r, e);
+ }
+
+ #[simd_test(enable = "avx512bw")]
+ unsafe fn test_mm512_mask_adds_epi8() {
+ let a = _mm512_set1_epi8(1);
+ let b = _mm512_set1_epi8(i8::MAX);
+ let r = _mm512_mask_adds_epi8(a, 0, a, b);
+ assert_eq_m512i(r, a);
+ let r = _mm512_mask_adds_epi8(
+ a,
+ 0b00000000_00000000_00000000_00000000_00000000_00000000_00000000_00001111,
+ a,
+ b,
+ );
+ #[rustfmt::skip]
+ let e = _mm512_set_epi8(1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
+ 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
+ 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
+ 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, i8::MAX, i8::MAX, i8::MAX, i8::MAX);
+ assert_eq_m512i(r, e);
+ }
+
+ #[simd_test(enable = "avx512bw")]
+ unsafe fn test_mm512_maskz_adds_epi8() {
+ let a = _mm512_set1_epi8(1);
+ let b = _mm512_set1_epi8(i8::MAX);
+ let r = _mm512_maskz_adds_epi8(0, a, b);
+ assert_eq_m512i(r, _mm512_setzero_si512());
+ let r = _mm512_maskz_adds_epi8(
+ 0b00000000_00000000_00000000_00000000_00000000_00000000_00000000_00001111,
+ a,
+ b,
+ );
+ #[rustfmt::skip]
+ let e = _mm512_set_epi8(0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
+ 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
+ 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
+ 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, i8::MAX, i8::MAX, i8::MAX, i8::MAX);
+ assert_eq_m512i(r, e);
+ }
+
+ #[simd_test(enable = "avx512bw")]
+ unsafe fn test_mm512_sub_epi16() {
+ let a = _mm512_set1_epi16(1);
+ let b = _mm512_set1_epi16(2);
+ let r = _mm512_sub_epi16(a, b);
+ let e = _mm512_set1_epi16(-1);
+ assert_eq_m512i(r, e);
+ }
+
+ #[simd_test(enable = "avx512bw")]
+ unsafe fn test_mm512_mask_sub_epi16() {
+ let a = _mm512_set1_epi16(1);
+ let b = _mm512_set1_epi16(2);
+ let r = _mm512_mask_sub_epi16(a, 0, a, b);
+ assert_eq_m512i(r, a);
+ let r = _mm512_mask_sub_epi16(a, 0b00000000_11111111_00000000_11111111, a, b);
+ #[rustfmt::skip]
+ let e = _mm512_set_epi16(1, 1, 1, 1, 1, 1, 1, 1, -1, -1, -1, -1, -1, -1, -1, -1,
+ 1, 1, 1, 1, 1, 1, 1, 1, -1, -1, -1, -1, -1, -1, -1, -1);
+ assert_eq_m512i(r, e);
+ }
+
+ #[simd_test(enable = "avx512bw")]
+ unsafe fn test_mm512_maskz_sub_epi16() {
+ let a = _mm512_set1_epi16(1);
+ let b = _mm512_set1_epi16(2);
+ let r = _mm512_maskz_sub_epi16(0, a, b);
+ assert_eq_m512i(r, _mm512_setzero_si512());
+ let r = _mm512_maskz_sub_epi16(0b00000000_11111111_00000000_11111111, a, b);
+ #[rustfmt::skip]
+ let e = _mm512_set_epi16(0, 0, 0, 0, 0, 0, 0, 0, -1, -1, -1, -1, -1, -1, -1, -1,
+ 0, 0, 0, 0, 0, 0, 0, 0, -1, -1, -1, -1, -1, -1, -1, -1);
+ assert_eq_m512i(r, e);
+ }
+
+ #[simd_test(enable = "avx512bw")]
+ unsafe fn test_mm512_sub_epi8() {
+ let a = _mm512_set1_epi8(1);
+ let b = _mm512_set1_epi8(2);
+ let r = _mm512_sub_epi8(a, b);
+ let e = _mm512_set1_epi8(-1);
+ assert_eq_m512i(r, e);
+ }
+
+ #[simd_test(enable = "avx512bw")]
+ unsafe fn test_mm512_mask_sub_epi8() {
+ let a = _mm512_set1_epi8(1);
+ let b = _mm512_set1_epi8(2);
+ let r = _mm512_mask_sub_epi8(a, 0, a, b);
+ assert_eq_m512i(r, a);
+ let r = _mm512_mask_sub_epi8(
+ a,
+ 0b00000000_11111111_00000000_11111111_00000000_11111111_00000000_11111111,
+ a,
+ b,
+ );
+ #[rustfmt::skip]
+ let e = _mm512_set_epi8(1, 1, 1, 1, 1, 1, 1, 1, -1, -1, -1, -1, -1, -1, -1, -1,
+ 1, 1, 1, 1, 1, 1, 1, 1, -1, -1, -1, -1, -1, -1, -1, -1,
+ 1, 1, 1, 1, 1, 1, 1, 1, -1, -1, -1, -1, -1, -1, -1, -1,
+ 1, 1, 1, 1, 1, 1, 1, 1, -1, -1, -1, -1, -1, -1, -1, -1);
+ assert_eq_m512i(r, e);
+ }
+
+ #[simd_test(enable = "avx512bw")]
+ unsafe fn test_mm512_maskz_sub_epi8() {
+ let a = _mm512_set1_epi8(1);
+ let b = _mm512_set1_epi8(2);
+ let r = _mm512_maskz_sub_epi8(0, a, b);
+ assert_eq_m512i(r, _mm512_setzero_si512());
+ let r = _mm512_maskz_sub_epi8(
+ 0b00000000_11111111_00000000_11111111_00000000_11111111_00000000_11111111,
+ a,
+ b,
+ );
+ #[rustfmt::skip]
+ let e = _mm512_set_epi8(0, 0, 0, 0, 0, 0, 0, 0, -1, -1, -1, -1, -1, -1, -1, -1,
+ 0, 0, 0, 0, 0, 0, 0, 0, -1, -1, -1, -1, -1, -1, -1, -1,
+ 0, 0, 0, 0, 0, 0, 0, 0, -1, -1, -1, -1, -1, -1, -1, -1,
+ 0, 0, 0, 0, 0, 0, 0, 0, -1, -1, -1, -1, -1, -1, -1, -1);
+ assert_eq_m512i(r, e);
+ }
+
+ #[simd_test(enable = "avx512bw")]
+ unsafe fn test_mm512_subs_epu16() {
+ let a = _mm512_set1_epi16(1);
+ let b = _mm512_set1_epi16(u16::MAX as i16);
+ let r = _mm512_subs_epu16(a, b);
+ let e = _mm512_set1_epi16(0);
+ assert_eq_m512i(r, e);
+ }
+
+ #[simd_test(enable = "avx512bw")]
+ unsafe fn test_mm512_mask_subs_epu16() {
+ let a = _mm512_set1_epi16(1);
+ let b = _mm512_set1_epi16(u16::MAX as i16);
+ let r = _mm512_mask_subs_epu16(a, 0, a, b);
+ assert_eq_m512i(r, a);
+ let r = _mm512_mask_subs_epu16(a, 0b00000000_00000000_00000000_00001111, a, b);
+ #[rustfmt::skip]
+ let e = _mm512_set_epi16(1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
+ 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 0, 0, 0, 0);
+ assert_eq_m512i(r, e);
+ }
+
+ #[simd_test(enable = "avx512bw")]
+ unsafe fn test_mm512_maskz_subs_epu16() {
+ let a = _mm512_set1_epi16(1);
+ let b = _mm512_set1_epi16(u16::MAX as i16);
+ let r = _mm512_maskz_subs_epu16(0, a, b);
+ assert_eq_m512i(r, _mm512_setzero_si512());
+ let r = _mm512_maskz_subs_epu16(0b00000000_00000000_00000000_00001111, a, b);
+ #[rustfmt::skip]
+ let e = _mm512_set_epi16(0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
+ 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0);
+ assert_eq_m512i(r, e);
+ }
+
+ #[simd_test(enable = "avx512bw")]
+ unsafe fn test_mm512_subs_epu8() {
+ let a = _mm512_set1_epi8(1);
+ let b = _mm512_set1_epi8(u8::MAX as i8);
+ let r = _mm512_subs_epu8(a, b);
+ let e = _mm512_set1_epi8(0);
+ assert_eq_m512i(r, e);
+ }
+
+ #[simd_test(enable = "avx512bw")]
+ unsafe fn test_mm512_mask_subs_epu8() {
+ let a = _mm512_set1_epi8(1);
+ let b = _mm512_set1_epi8(u8::MAX as i8);
+ let r = _mm512_mask_subs_epu8(a, 0, a, b);
+ assert_eq_m512i(r, a);
+ let r = _mm512_mask_subs_epu8(
+ a,
+ 0b00000000_00000000_00000000_00000000_00000000_00000000_00000000_00001111,
+ a,
+ b,
+ );
+ #[rustfmt::skip]
+ let e = _mm512_set_epi8(1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
+ 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
+ 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
+ 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 0, 0, 0, 0);
+ assert_eq_m512i(r, e);
+ }
+
+ #[simd_test(enable = "avx512bw")]
+ unsafe fn test_mm512_maskz_subs_epu8() {
+ let a = _mm512_set1_epi8(1);
+ let b = _mm512_set1_epi8(u8::MAX as i8);
+ let r = _mm512_maskz_subs_epu8(0, a, b);
+ assert_eq_m512i(r, _mm512_setzero_si512());
+ let r = _mm512_maskz_subs_epu8(
+ 0b00000000_00000000_00000000_00000000_00000000_00000000_00000000_00001111,
+ a,
+ b,
+ );
+ #[rustfmt::skip]
+ let e = _mm512_set_epi8(0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
+ 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
+ 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
+ 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0);
+ assert_eq_m512i(r, e);
+ }
+
+ #[simd_test(enable = "avx512bw")]
+ unsafe fn test_mm512_subs_epi16() {
+ let a = _mm512_set1_epi16(-1);
+ let b = _mm512_set1_epi16(i16::MAX);
+ let r = _mm512_subs_epi16(a, b);
+ let e = _mm512_set1_epi16(i16::MIN);
+ assert_eq_m512i(r, e);
+ }
+
+ #[simd_test(enable = "avx512bw")]
+ unsafe fn test_mm512_mask_subs_epi16() {
+ let a = _mm512_set1_epi16(-1);
+ let b = _mm512_set1_epi16(i16::MAX);
+ let r = _mm512_mask_subs_epi16(a, 0, a, b);
+ assert_eq_m512i(r, a);
+ let r = _mm512_mask_subs_epi16(a, 0b00000000_00000000_00000000_00001111, a, b);
+ #[rustfmt::skip]
+ let e = _mm512_set_epi16(-1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1,
+ -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, i16::MIN, i16::MIN, i16::MIN, i16::MIN);
+ assert_eq_m512i(r, e);
+ }
+
+ #[simd_test(enable = "avx512bw")]
+ unsafe fn test_mm512_maskz_subs_epi16() {
+ let a = _mm512_set1_epi16(-1);
+ let b = _mm512_set1_epi16(i16::MAX);
+ let r = _mm512_maskz_subs_epi16(0, a, b);
+ assert_eq_m512i(r, _mm512_setzero_si512());
+ let r = _mm512_maskz_subs_epi16(0b00000000_00000000_00000000_00001111, a, b);
+ #[rustfmt::skip]
+ let e = _mm512_set_epi16(0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
+ 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, i16::MIN, i16::MIN, i16::MIN, i16::MIN);
+ assert_eq_m512i(r, e);
+ }
+
+ #[simd_test(enable = "avx512bw")]
+ unsafe fn test_mm512_subs_epi8() {
+ let a = _mm512_set1_epi8(-1);
+ let b = _mm512_set1_epi8(i8::MAX);
+ let r = _mm512_subs_epi8(a, b);
+ let e = _mm512_set1_epi8(i8::MIN);
+ assert_eq_m512i(r, e);
+ }
+
+ #[simd_test(enable = "avx512bw")]
+ unsafe fn test_mm512_mask_subs_epi8() {
+ let a = _mm512_set1_epi8(-1);
+ let b = _mm512_set1_epi8(i8::MAX);
+ let r = _mm512_mask_subs_epi8(a, 0, a, b);
+ assert_eq_m512i(r, a);
+ let r = _mm512_mask_subs_epi8(
+ a,
+ 0b00000000_00000000_00000000_00000000_00000000_00000000_00000000_00001111,
+ a,
+ b,
+ );
+ #[rustfmt::skip]
+ let e = _mm512_set_epi8(-1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1,
+ -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1,
+ -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1,
+ -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, i8::MIN, i8::MIN, i8::MIN, i8::MIN);
+ assert_eq_m512i(r, e);
+ }
+
+ #[simd_test(enable = "avx512bw")]
+ unsafe fn test_mm512_maskz_subs_epi8() {
+ let a = _mm512_set1_epi8(-1);
+ let b = _mm512_set1_epi8(i8::MAX);
+ let r = _mm512_maskz_subs_epi8(0, a, b);
+ assert_eq_m512i(r, _mm512_setzero_si512());
+ let r = _mm512_maskz_subs_epi8(
+ 0b00000000_00000000_00000000_00000000_00000000_00000000_00000000_00001111,
+ a,
+ b,
+ );
+ #[rustfmt::skip]
+ let e = _mm512_set_epi8(0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
+ 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
+ 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
+ 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, i8::MIN, i8::MIN, i8::MIN, i8::MIN);
+ assert_eq_m512i(r, e);
+ }
+
+ #[simd_test(enable = "avx512bw")]
+ unsafe fn test_mm512_mulhi_epu16() {
+ let a = _mm512_set1_epi16(1);
+ let b = _mm512_set1_epi16(1);
+ let r = _mm512_mulhi_epu16(a, b);
+ let e = _mm512_set1_epi16(0);
+ assert_eq_m512i(r, e);
+ }
+
+ #[simd_test(enable = "avx512bw")]
+ unsafe fn test_mm512_mask_mulhi_epu16() {
+ let a = _mm512_set1_epi16(1);
+ let b = _mm512_set1_epi16(1);
+ let r = _mm512_mask_mulhi_epu16(a, 0, a, b);
+ assert_eq_m512i(r, a);
+ let r = _mm512_mask_mulhi_epu16(a, 0b00000000_00000000_00000000_00001111, a, b);
+ #[rustfmt::skip]
+ let e = _mm512_set_epi16(1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
+ 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 0, 0, 0, 0);
+ assert_eq_m512i(r, e);
+ }
+
+ #[simd_test(enable = "avx512bw")]
+ unsafe fn test_mm512_maskz_mulhi_epu16() {
+ let a = _mm512_set1_epi16(1);
+ let b = _mm512_set1_epi16(1);
+ let r = _mm512_maskz_mulhi_epu16(0, a, b);
+ assert_eq_m512i(r, _mm512_setzero_si512());
+ let r = _mm512_maskz_mulhi_epu16(0b00000000_00000000_00000000_00001111, a, b);
+ #[rustfmt::skip]
+ let e = _mm512_set_epi16(0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
+ 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0);
+ assert_eq_m512i(r, e);
+ }
+
+ #[simd_test(enable = "avx512bw")]
+ unsafe fn test_mm512_mulhi_epi16() {
+ let a = _mm512_set1_epi16(1);
+ let b = _mm512_set1_epi16(1);
+ let r = _mm512_mulhi_epi16(a, b);
+ let e = _mm512_set1_epi16(0);
+ assert_eq_m512i(r, e);
+ }
+
+ #[simd_test(enable = "avx512bw")]
+ unsafe fn test_mm512_mask_mulhi_epi16() {
+ let a = _mm512_set1_epi16(1);
+ let b = _mm512_set1_epi16(1);
+ let r = _mm512_mask_mulhi_epi16(a, 0, a, b);
+ assert_eq_m512i(r, a);
+ let r = _mm512_mask_mulhi_epi16(a, 0b00000000_00000000_00000000_00001111, a, b);
+ #[rustfmt::skip]
+ let e = _mm512_set_epi16(1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
+ 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 0, 0, 0, 0);
+ assert_eq_m512i(r, e);
+ }
+
+ #[simd_test(enable = "avx512bw")]
+ unsafe fn test_mm512_maskz_mulhi_epi16() {
+ let a = _mm512_set1_epi16(1);
+ let b = _mm512_set1_epi16(1);
+ let r = _mm512_maskz_mulhi_epi16(0, a, b);
+ assert_eq_m512i(r, _mm512_setzero_si512());
+ let r = _mm512_maskz_mulhi_epi16(0b00000000_00000000_00000000_00001111, a, b);
+ #[rustfmt::skip]
+ let e = _mm512_set_epi16(0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
+ 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0);
+ assert_eq_m512i(r, e);
+ }
+
+ #[simd_test(enable = "avx512bw")]
+ unsafe fn test_mm512_mulhrs_epi16() {
+ let a = _mm512_set1_epi16(1);
+ let b = _mm512_set1_epi16(1);
+ let r = _mm512_mulhrs_epi16(a, b);
+ let e = _mm512_set1_epi16(0);
+ assert_eq_m512i(r, e);
+ }
+
+ #[simd_test(enable = "avx512bw")]
+ unsafe fn test_mm512_mask_mulhrs_epi16() {
+ let a = _mm512_set1_epi16(1);
+ let b = _mm512_set1_epi16(1);
+ let r = _mm512_mask_mulhrs_epi16(a, 0, a, b);
+ assert_eq_m512i(r, a);
+ let r = _mm512_mask_mulhrs_epi16(a, 0b00000000_00000000_00000000_00001111, a, b);
+ #[rustfmt::skip]
+ let e = _mm512_set_epi16(1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
+ 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 0, 0, 0, 0);
+ assert_eq_m512i(r, e);
+ }
+
+ #[simd_test(enable = "avx512bw")]
+ unsafe fn test_mm512_maskz_mulhrs_epi16() {
+ let a = _mm512_set1_epi16(1);
+ let b = _mm512_set1_epi16(1);
+ let r = _mm512_maskz_mulhrs_epi16(0, a, b);
+ assert_eq_m512i(r, _mm512_setzero_si512());
+ let r = _mm512_maskz_mulhrs_epi16(0b00000000_00000000_00000000_00001111, a, b);
+ #[rustfmt::skip]
+ let e = _mm512_set_epi16(0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
+ 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0);
+ assert_eq_m512i(r, e);
+ }
+
+ #[simd_test(enable = "avx512bw")]
+ unsafe fn test_mm512_mullo_epi16() {
+ let a = _mm512_set1_epi16(1);
+ let b = _mm512_set1_epi16(1);
+ let r = _mm512_mullo_epi16(a, b);
+ let e = _mm512_set1_epi16(1);
+ assert_eq_m512i(r, e);
+ }
+
+ #[simd_test(enable = "avx512bw")]
+ unsafe fn test_mm512_mask_mullo_epi16() {
+ let a = _mm512_set1_epi16(1);
+ let b = _mm512_set1_epi16(1);
+ let r = _mm512_mask_mullo_epi16(a, 0, a, b);
+ assert_eq_m512i(r, a);
+ let r = _mm512_mask_mullo_epi16(a, 0b00000000_00000000_00000000_00001111, a, b);
+ #[rustfmt::skip]
+ let e = _mm512_set_epi16(1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
+ 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1);
+ assert_eq_m512i(r, e);
+ }
+
+ #[simd_test(enable = "avx512bw")]
+ unsafe fn test_mm512_maskz_mullo_epi16() {
+ let a = _mm512_set1_epi16(1);
+ let b = _mm512_set1_epi16(1);
+ let r = _mm512_maskz_mullo_epi16(0, a, b);
+ assert_eq_m512i(r, _mm512_setzero_si512());
+ let r = _mm512_maskz_mullo_epi16(0b00000000_00000000_00000000_00001111, a, b);
+ #[rustfmt::skip]
+ let e = _mm512_set_epi16(0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
+ 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1, 1, 1, 1);
+ assert_eq_m512i(r, e);
+ }
+
+ #[simd_test(enable = "avx512bw")]
+ unsafe fn test_mm512_max_epu16() {
+ #[rustfmt::skip]
+ let a = _mm512_set_epi16(0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15,
+ 0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15);
+ #[rustfmt::skip]
+ let b = _mm512_set_epi16(15, 14, 13, 12, 11, 10, 9, 8, 7, 6, 5, 4, 3, 2, 1, 0,
+ 15, 14, 13, 12, 11, 10, 9, 8, 7, 6, 5, 4, 3, 2, 1, 0);
+ let r = _mm512_max_epu16(a, b);
+ #[rustfmt::skip]
+ let e = _mm512_set_epi16(15, 14, 13, 12, 11, 10, 9, 8, 8, 9, 10, 11, 12, 13, 14, 15,
+ 15, 14, 13, 12, 11, 10, 9, 8, 8, 9, 10, 11, 12, 13, 14, 15);
+ assert_eq_m512i(r, e);
+ }
+
+ #[simd_test(enable = "avx512f")]
+ unsafe fn test_mm512_mask_max_epu16() {
+ #[rustfmt::skip]
+ let a = _mm512_set_epi16(0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15,
+ 0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15);
+ #[rustfmt::skip]
+ let b = _mm512_set_epi16(15, 14, 13, 12, 11, 10, 9, 8, 7, 6, 5, 4, 3, 2, 1, 0,
+ 15, 14, 13, 12, 11, 10, 9, 8, 7, 6, 5, 4, 3, 2, 1, 0);
+ let r = _mm512_mask_max_epu16(a, 0, a, b);
+ assert_eq_m512i(r, a);
+ let r = _mm512_mask_max_epu16(a, 0b00000000_11111111_00000000_11111111, a, b);
+ #[rustfmt::skip]
+ let e = _mm512_set_epi16(0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15,
+ 0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15);
+ assert_eq_m512i(r, e);
+ }
+
+ #[simd_test(enable = "avx512f")]
+ unsafe fn test_mm512_maskz_max_epu16() {
+ #[rustfmt::skip]
+ let a = _mm512_set_epi16(0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15,
+ 0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15);
+ #[rustfmt::skip]
+ let b = _mm512_set_epi16(15, 14, 13, 12, 11, 10, 9, 8, 7, 6, 5, 4, 3, 2, 1, 0,
+ 15, 14, 13, 12, 11, 10, 9, 8, 7, 6, 5, 4, 3, 2, 1, 0);
+ let r = _mm512_maskz_max_epu16(0, a, b);
+ assert_eq_m512i(r, _mm512_setzero_si512());
+ let r = _mm512_maskz_max_epu16(0b00000000_11111111_00000000_11111111, a, b);
+ #[rustfmt::skip]
+ let e = _mm512_set_epi16(0, 0, 0, 0, 0, 0, 0, 0, 8, 9, 10, 11, 12, 13, 14, 15,
+ 0, 0, 0, 0, 0, 0, 0, 0, 8, 9, 10, 11, 12, 13, 14, 15);
+ assert_eq_m512i(r, e);
+ }
+
+ #[simd_test(enable = "avx512bw")]
+ unsafe fn test_mm512_max_epu8() {
+ #[rustfmt::skip]
+ let a = _mm512_set_epi8(0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15,
+ 0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15,
+ 0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15,
+ 0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15);
+ #[rustfmt::skip]
+ let b = _mm512_set_epi8(15, 14, 13, 12, 11, 10, 9, 8, 7, 6, 5, 4, 3, 2, 1, 0,
+ 15, 14, 13, 12, 11, 10, 9, 8, 7, 6, 5, 4, 3, 2, 1, 0,
+ 15, 14, 13, 12, 11, 10, 9, 8, 7, 6, 5, 4, 3, 2, 1, 0,
+ 15, 14, 13, 12, 11, 10, 9, 8, 7, 6, 5, 4, 3, 2, 1, 0);
+ let r = _mm512_max_epu8(a, b);
+ #[rustfmt::skip]
+ let e = _mm512_set_epi8(15, 14, 13, 12, 11, 10, 9, 8, 8, 9, 10, 11, 12, 13, 14, 15,
+ 15, 14, 13, 12, 11, 10, 9, 8, 8, 9, 10, 11, 12, 13, 14, 15,
+ 15, 14, 13, 12, 11, 10, 9, 8, 8, 9, 10, 11, 12, 13, 14, 15,
+ 15, 14, 13, 12, 11, 10, 9, 8, 8, 9, 10, 11, 12, 13, 14, 15);
+ assert_eq_m512i(r, e);
+ }
+
+ #[simd_test(enable = "avx512f")]
+ unsafe fn test_mm512_mask_max_epu8() {
+ #[rustfmt::skip]
+ let a = _mm512_set_epi8(0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15,
+ 0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15,
+ 0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15,
+ 0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15);
+ #[rustfmt::skip]
+ let b = _mm512_set_epi8(15, 14, 13, 12, 11, 10, 9, 8, 7, 6, 5, 4, 3, 2, 1, 0,
+ 15, 14, 13, 12, 11, 10, 9, 8, 7, 6, 5, 4, 3, 2, 1, 0,
+ 15, 14, 13, 12, 11, 10, 9, 8, 7, 6, 5, 4, 3, 2, 1, 0,
+ 15, 14, 13, 12, 11, 10, 9, 8, 7, 6, 5, 4, 3, 2, 1, 0);
+ let r = _mm512_mask_max_epu8(a, 0, a, b);
+ assert_eq_m512i(r, a);
+ let r = _mm512_mask_max_epu8(
+ a,
+ 0b00000000_11111111_00000000_11111111_00000000_11111111_00000000_11111111,
+ a,
+ b,
+ );
+ #[rustfmt::skip]
+ let e = _mm512_set_epi8(0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15,
+ 0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15,
+ 0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15,
+ 0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15);
+ assert_eq_m512i(r, e);
+ }
+
+ #[simd_test(enable = "avx512f")]
+ unsafe fn test_mm512_maskz_max_epu8() {
+ #[rustfmt::skip]
+ let a = _mm512_set_epi8(0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15,
+ 0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15,
+ 0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15,
+ 0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15);
+ #[rustfmt::skip]
+ let b = _mm512_set_epi8(15, 14, 13, 12, 11, 10, 9, 8, 7, 6, 5, 4, 3, 2, 1, 0,
+ 15, 14, 13, 12, 11, 10, 9, 8, 7, 6, 5, 4, 3, 2, 1, 0,
+ 15, 14, 13, 12, 11, 10, 9, 8, 7, 6, 5, 4, 3, 2, 1, 0,
+ 15, 14, 13, 12, 11, 10, 9, 8, 7, 6, 5, 4, 3, 2, 1, 0);
+ let r = _mm512_maskz_max_epu8(0, a, b);
+ assert_eq_m512i(r, _mm512_setzero_si512());
+ let r = _mm512_maskz_max_epu8(
+ 0b00000000_11111111_00000000_11111111_00000000_11111111_00000000_11111111,
+ a,
+ b,
+ );
+ #[rustfmt::skip]
+ let e = _mm512_set_epi8(0, 0, 0, 0, 0, 0, 0, 0, 8, 9, 10, 11, 12, 13, 14, 15,
+ 0, 0, 0, 0, 0, 0, 0, 0, 8, 9, 10, 11, 12, 13, 14, 15,
+ 0, 0, 0, 0, 0, 0, 0, 0, 8, 9, 10, 11, 12, 13, 14, 15,
+ 0, 0, 0, 0, 0, 0, 0, 0, 8, 9, 10, 11, 12, 13, 14, 15);
+ assert_eq_m512i(r, e);
+ }
+
+ #[simd_test(enable = "avx512bw")]
+ unsafe fn test_mm512_max_epi16() {
+ #[rustfmt::skip]
+ let a = _mm512_set_epi16(0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15,
+ 0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15);
+ #[rustfmt::skip]
+ let b = _mm512_set_epi16(15, 14, 13, 12, 11, 10, 9, 8, 7, 6, 5, 4, 3, 2, 1, 0,
+ 15, 14, 13, 12, 11, 10, 9, 8, 7, 6, 5, 4, 3, 2, 1, 0);
+ let r = _mm512_max_epi16(a, b);
+ #[rustfmt::skip]
+ let e = _mm512_set_epi16(15, 14, 13, 12, 11, 10, 9, 8, 8, 9, 10, 11, 12, 13, 14, 15,
+ 15, 14, 13, 12, 11, 10, 9, 8, 8, 9, 10, 11, 12, 13, 14, 15);
+ assert_eq_m512i(r, e);
+ }
+
+ #[simd_test(enable = "avx512f")]
+ unsafe fn test_mm512_mask_max_epi16() {
+ #[rustfmt::skip]
+ let a = _mm512_set_epi16(0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15,
+ 0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15);
+ #[rustfmt::skip]
+ let b = _mm512_set_epi16(15, 14, 13, 12, 11, 10, 9, 8, 7, 6, 5, 4, 3, 2, 1, 0,
+ 15, 14, 13, 12, 11, 10, 9, 8, 7, 6, 5, 4, 3, 2, 1, 0);
+ let r = _mm512_mask_max_epi16(a, 0, a, b);
+ assert_eq_m512i(r, a);
+ let r = _mm512_mask_max_epi16(a, 0b00000000_11111111_00000000_11111111, a, b);
+ #[rustfmt::skip]
+ let e = _mm512_set_epi16(0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15,
+ 0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15);
+ assert_eq_m512i(r, e);
+ }
+
+ #[simd_test(enable = "avx512f")]
+ unsafe fn test_mm512_maskz_max_epi16() {
+ #[rustfmt::skip]
+ let a = _mm512_set_epi16(0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15,
+ 0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15);
+ #[rustfmt::skip]
+ let b = _mm512_set_epi16(15, 14, 13, 12, 11, 10, 9, 8, 7, 6, 5, 4, 3, 2, 1, 0,
+ 15, 14, 13, 12, 11, 10, 9, 8, 7, 6, 5, 4, 3, 2, 1, 0);
+ let r = _mm512_maskz_max_epi16(0, a, b);
+ assert_eq_m512i(r, _mm512_setzero_si512());
+ let r = _mm512_maskz_max_epi16(0b00000000_11111111_00000000_11111111, a, b);
+ #[rustfmt::skip]
+ let e = _mm512_set_epi16(0, 0, 0, 0, 0, 0, 0, 0, 8, 9, 10, 11, 12, 13, 14, 15,
+ 0, 0, 0, 0, 0, 0, 0, 0, 8, 9, 10, 11, 12, 13, 14, 15);
+ assert_eq_m512i(r, e);
+ }
+
+ #[simd_test(enable = "avx512bw")]
+ unsafe fn test_mm512_max_epi8() {
+ #[rustfmt::skip]
+ let a = _mm512_set_epi8(0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15,
+ 0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15,
+ 0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15,
+ 0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15);
+ #[rustfmt::skip]
+ let b = _mm512_set_epi8(15, 14, 13, 12, 11, 10, 9, 8, 7, 6, 5, 4, 3, 2, 1, 0,
+ 15, 14, 13, 12, 11, 10, 9, 8, 7, 6, 5, 4, 3, 2, 1, 0,
+ 15, 14, 13, 12, 11, 10, 9, 8, 7, 6, 5, 4, 3, 2, 1, 0,
+ 15, 14, 13, 12, 11, 10, 9, 8, 7, 6, 5, 4, 3, 2, 1, 0);
+ let r = _mm512_max_epi8(a, b);
+ #[rustfmt::skip]
+ let e = _mm512_set_epi8(15, 14, 13, 12, 11, 10, 9, 8, 8, 9, 10, 11, 12, 13, 14, 15,
+ 15, 14, 13, 12, 11, 10, 9, 8, 8, 9, 10, 11, 12, 13, 14, 15,
+ 15, 14, 13, 12, 11, 10, 9, 8, 8, 9, 10, 11, 12, 13, 14, 15,
+ 15, 14, 13, 12, 11, 10, 9, 8, 8, 9, 10, 11, 12, 13, 14, 15);
+ assert_eq_m512i(r, e);
+ }
+
+ #[simd_test(enable = "avx512f")]
+ unsafe fn test_mm512_mask_max_epi8() {
+ #[rustfmt::skip]
+ let a = _mm512_set_epi8(0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15,
+ 0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15,
+ 0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15,
+ 0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15);
+ #[rustfmt::skip]
+ let b = _mm512_set_epi8(15, 14, 13, 12, 11, 10, 9, 8, 7, 6, 5, 4, 3, 2, 1, 0,
+ 15, 14, 13, 12, 11, 10, 9, 8, 7, 6, 5, 4, 3, 2, 1, 0,
+ 15, 14, 13, 12, 11, 10, 9, 8, 7, 6, 5, 4, 3, 2, 1, 0,
+ 15, 14, 13, 12, 11, 10, 9, 8, 7, 6, 5, 4, 3, 2, 1, 0);
+ let r = _mm512_mask_max_epi8(a, 0, a, b);
+ assert_eq_m512i(r, a);
+ let r = _mm512_mask_max_epi8(
+ a,
+ 0b00000000_11111111_00000000_11111111_00000000_11111111_00000000_11111111,
+ a,
+ b,
+ );
+ #[rustfmt::skip]
+ let e = _mm512_set_epi8(0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15,
+ 0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15,
+ 0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15,
+ 0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15);
+ assert_eq_m512i(r, e);
+ }
+
+ #[simd_test(enable = "avx512f")]
+ unsafe fn test_mm512_maskz_max_epi8() {
+ #[rustfmt::skip]
+ let a = _mm512_set_epi8(0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15,
+ 0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15,
+ 0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15,
+ 0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15);
+ #[rustfmt::skip]
+ let b = _mm512_set_epi8(15, 14, 13, 12, 11, 10, 9, 8, 7, 6, 5, 4, 3, 2, 1, 0,
+ 15, 14, 13, 12, 11, 10, 9, 8, 7, 6, 5, 4, 3, 2, 1, 0,
+ 15, 14, 13, 12, 11, 10, 9, 8, 7, 6, 5, 4, 3, 2, 1, 0,
+ 15, 14, 13, 12, 11, 10, 9, 8, 7, 6, 5, 4, 3, 2, 1, 0);
+ let r = _mm512_maskz_max_epi8(0, a, b);
+ assert_eq_m512i(r, _mm512_setzero_si512());
+ let r = _mm512_maskz_max_epi8(
+ 0b00000000_11111111_00000000_11111111_00000000_11111111_00000000_11111111,
+ a,
+ b,
+ );
+ #[rustfmt::skip]
+ let e = _mm512_set_epi8(0, 0, 0, 0, 0, 0, 0, 0, 8, 9, 10, 11, 12, 13, 14, 15,
+ 0, 0, 0, 0, 0, 0, 0, 0, 8, 9, 10, 11, 12, 13, 14, 15,
+ 0, 0, 0, 0, 0, 0, 0, 0, 8, 9, 10, 11, 12, 13, 14, 15,
+ 0, 0, 0, 0, 0, 0, 0, 0, 8, 9, 10, 11, 12, 13, 14, 15);
+ assert_eq_m512i(r, e);
+ }
+
+ #[simd_test(enable = "avx512bw")]
+ unsafe fn test_mm512_min_epu16() {
+ #[rustfmt::skip]
+ let a = _mm512_set_epi16(0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15,
+ 0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15);
+ #[rustfmt::skip]
+ let b = _mm512_set_epi16(15, 14, 13, 12, 11, 10, 9, 8, 7, 6, 5, 4, 3, 2, 1, 0,
+ 15, 14, 13, 12, 11, 10, 9, 8, 7, 6, 5, 4, 3, 2, 1, 0);
+ let r = _mm512_min_epu16(a, b);
+ #[rustfmt::skip]
+ let e = _mm512_set_epi16(0, 1, 2, 3, 4, 5, 6, 7, 7, 6, 5, 4, 3, 2, 1, 0,
+ 0, 1, 2, 3, 4, 5, 6, 7, 7, 6, 5, 4, 3, 2, 1, 0);
+ assert_eq_m512i(r, e);
+ }
+
+ #[simd_test(enable = "avx512f")]
+ unsafe fn test_mm512_mask_min_epu16() {
+ #[rustfmt::skip]
+ let a = _mm512_set_epi16(0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15,
+ 0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15);
+ #[rustfmt::skip]
+ let b = _mm512_set_epi16(15, 14, 13, 12, 11, 10, 9, 8, 7, 6, 5, 4, 3, 2, 1, 0,
+ 15, 14, 13, 12, 11, 10, 9, 8, 7, 6, 5, 4, 3, 2, 1, 0);
+ let r = _mm512_mask_min_epu16(a, 0, a, b);
+ assert_eq_m512i(r, a);
+ let r = _mm512_mask_min_epu16(a, 0b00000000_11111111_00000000_11111111, a, b);
+ #[rustfmt::skip]
+ let e = _mm512_set_epi16(0, 1, 2, 3, 4, 5, 6, 7, 7, 6, 5, 4, 3, 2, 1, 0,
+ 0, 1, 2, 3, 4, 5, 6, 7, 7, 6, 5, 4, 3, 2, 1, 0);
+ assert_eq_m512i(r, e);
+ }
+
+ #[simd_test(enable = "avx512f")]
+ unsafe fn test_mm512_maskz_min_epu16() {
+ #[rustfmt::skip]
+ let a = _mm512_set_epi16(0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15,
+ 0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15);
+ #[rustfmt::skip]
+ let b = _mm512_set_epi16(15, 14, 13, 12, 11, 10, 9, 8, 7, 6, 5, 4, 3, 2, 1, 0,
+ 15, 14, 13, 12, 11, 10, 9, 8, 7, 6, 5, 4, 3, 2, 1, 0);
+ let r = _mm512_maskz_min_epu16(0, a, b);
+ assert_eq_m512i(r, _mm512_setzero_si512());
+ let r = _mm512_maskz_min_epu16(0b00000000_11111111_00000000_11111111, a, b);
+ #[rustfmt::skip]
+ let e = _mm512_set_epi16(0, 0, 0, 0, 0, 0, 0, 0, 7, 6, 5, 4, 3, 2, 1, 0,
+ 0, 0, 0, 0, 0, 0, 0, 0, 7, 6, 5, 4, 3, 2, 1, 0);
+ assert_eq_m512i(r, e);
+ }
+
+ #[simd_test(enable = "avx512bw")]
+ unsafe fn test_mm512_min_epu8() {
+ #[rustfmt::skip]
+ let a = _mm512_set_epi8(0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15,
+ 0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15,
+ 0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15,
+ 0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15);
+ #[rustfmt::skip]
+ let b = _mm512_set_epi8(15, 14, 13, 12, 11, 10, 9, 8, 7, 6, 5, 4, 3, 2, 1, 0,
+ 15, 14, 13, 12, 11, 10, 9, 8, 7, 6, 5, 4, 3, 2, 1, 0,
+ 15, 14, 13, 12, 11, 10, 9, 8, 7, 6, 5, 4, 3, 2, 1, 0,
+ 15, 14, 13, 12, 11, 10, 9, 8, 7, 6, 5, 4, 3, 2, 1, 0);
+ let r = _mm512_min_epu8(a, b);
+ #[rustfmt::skip]
+ let e = _mm512_set_epi8(0, 1, 2, 3, 4, 5, 6, 7, 7, 6, 5, 4, 3, 2, 1, 0,
+ 0, 1, 2, 3, 4, 5, 6, 7, 7, 6, 5, 4, 3, 2, 1, 0,
+ 0, 1, 2, 3, 4, 5, 6, 7, 7, 6, 5, 4, 3, 2, 1, 0,
+ 0, 1, 2, 3, 4, 5, 6, 7, 7, 6, 5, 4, 3, 2, 1, 0);
+ assert_eq_m512i(r, e);
+ }
+
+ #[simd_test(enable = "avx512f")]
+ unsafe fn test_mm512_mask_min_epu8() {
+ #[rustfmt::skip]
+ let a = _mm512_set_epi8(0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15,
+ 0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15,
+ 0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15,
+ 0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15);
+ #[rustfmt::skip]
+ let b = _mm512_set_epi8(15, 14, 13, 12, 11, 10, 9, 8, 7, 6, 5, 4, 3, 2, 1, 0,
+ 15, 14, 13, 12, 11, 10, 9, 8, 7, 6, 5, 4, 3, 2, 1, 0,
+ 15, 14, 13, 12, 11, 10, 9, 8, 7, 6, 5, 4, 3, 2, 1, 0,
+ 15, 14, 13, 12, 11, 10, 9, 8, 7, 6, 5, 4, 3, 2, 1, 0);
+ let r = _mm512_mask_min_epu8(a, 0, a, b);
+ assert_eq_m512i(r, a);
+ let r = _mm512_mask_min_epu8(
+ a,
+ 0b00000000_11111111_00000000_11111111_00000000_11111111_00000000_11111111,
+ a,
+ b,
+ );
+ #[rustfmt::skip]
+ let e = _mm512_set_epi8(0, 1, 2, 3, 4, 5, 6, 7, 7, 6, 5, 4, 3, 2, 1, 0,
+ 0, 1, 2, 3, 4, 5, 6, 7, 7, 6, 5, 4, 3, 2, 1, 0,
+ 0, 1, 2, 3, 4, 5, 6, 7, 7, 6, 5, 4, 3, 2, 1, 0,
+ 0, 1, 2, 3, 4, 5, 6, 7, 7, 6, 5, 4, 3, 2, 1, 0);
+ assert_eq_m512i(r, e);
+ }
+
+ #[simd_test(enable = "avx512f")]
+ unsafe fn test_mm512_maskz_min_epu8() {
+ #[rustfmt::skip]
+ let a = _mm512_set_epi8(0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15,
+ 0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15,
+ 0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15,
+ 0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15);
+ #[rustfmt::skip]
+ let b = _mm512_set_epi8(15, 14, 13, 12, 11, 10, 9, 8, 7, 6, 5, 4, 3, 2, 1, 0,
+ 15, 14, 13, 12, 11, 10, 9, 8, 7, 6, 5, 4, 3, 2, 1, 0,
+ 15, 14, 13, 12, 11, 10, 9, 8, 7, 6, 5, 4, 3, 2, 1, 0,
+ 15, 14, 13, 12, 11, 10, 9, 8, 7, 6, 5, 4, 3, 2, 1, 0);
+ let r = _mm512_maskz_min_epu8(0, a, b);
+ assert_eq_m512i(r, _mm512_setzero_si512());
+ let r = _mm512_maskz_min_epu8(
+ 0b00000000_11111111_00000000_11111111_00000000_11111111_00000000_11111111,
+ a,
+ b,
+ );
+ #[rustfmt::skip]
+ let e = _mm512_set_epi8(0, 0, 0, 0, 0, 0, 0, 0, 7, 6, 5, 4, 3, 2, 1, 0,
+ 0, 0, 0, 0, 0, 0, 0, 0, 7, 6, 5, 4, 3, 2, 1, 0,
+ 0, 0, 0, 0, 0, 0, 0, 0, 7, 6, 5, 4, 3, 2, 1, 0,
+ 0, 0, 0, 0, 0, 0, 0, 0, 7, 6, 5, 4, 3, 2, 1, 0);
+ assert_eq_m512i(r, e);
+ }
+
+ #[simd_test(enable = "avx512bw")]
+ unsafe fn test_mm512_min_epi16() {
+ #[rustfmt::skip]
+ let a = _mm512_set_epi16(0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15,
+ 0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15);
+ #[rustfmt::skip]
+ let b = _mm512_set_epi16(15, 14, 13, 12, 11, 10, 9, 8, 7, 6, 5, 4, 3, 2, 1, 0,
+ 15, 14, 13, 12, 11, 10, 9, 8, 7, 6, 5, 4, 3, 2, 1, 0);
+ let r = _mm512_min_epi16(a, b);
+ #[rustfmt::skip]
+ let e = _mm512_set_epi16(0, 1, 2, 3, 4, 5, 6, 7, 7, 6, 5, 4, 3, 2, 1, 0,
+ 0, 1, 2, 3, 4, 5, 6, 7, 7, 6, 5, 4, 3, 2, 1, 0);
+ assert_eq_m512i(r, e);
+ }
+
+ #[simd_test(enable = "avx512f")]
+ unsafe fn test_mm512_mask_min_epi16() {
+ #[rustfmt::skip]
+ let a = _mm512_set_epi16(0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15,
+ 0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15);
+ #[rustfmt::skip]
+ let b = _mm512_set_epi16(15, 14, 13, 12, 11, 10, 9, 8, 7, 6, 5, 4, 3, 2, 1, 0,
+ 15, 14, 13, 12, 11, 10, 9, 8, 7, 6, 5, 4, 3, 2, 1, 0);
+ let r = _mm512_mask_min_epi16(a, 0, a, b);
+ assert_eq_m512i(r, a);
+ let r = _mm512_mask_min_epi16(a, 0b00000000_11111111_00000000_11111111, a, b);
+ #[rustfmt::skip]
+ let e = _mm512_set_epi16(0, 1, 2, 3, 4, 5, 6, 7, 7, 6, 5, 4, 3, 2, 1, 0,
+ 0, 1, 2, 3, 4, 5, 6, 7, 7, 6, 5, 4, 3, 2, 1, 0);
+ assert_eq_m512i(r, e);
+ }
+
+ #[simd_test(enable = "avx512f")]
+ unsafe fn test_mm512_maskz_min_epi16() {
+ #[rustfmt::skip]
+ let a = _mm512_set_epi16(0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15,
+ 0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15);
+ #[rustfmt::skip]
+ let b = _mm512_set_epi16(15, 14, 13, 12, 11, 10, 9, 8, 7, 6, 5, 4, 3, 2, 1, 0,
+ 15, 14, 13, 12, 11, 10, 9, 8, 7, 6, 5, 4, 3, 2, 1, 0);
+ let r = _mm512_maskz_min_epi16(0, a, b);
+ assert_eq_m512i(r, _mm512_setzero_si512());
+ let r = _mm512_maskz_min_epi16(0b00000000_11111111_00000000_11111111, a, b);
+ #[rustfmt::skip]
+ let e = _mm512_set_epi16(0, 0, 0, 0, 0, 0, 0, 0, 7, 6, 5, 4, 3, 2, 1, 0,
+ 0, 0, 0, 0, 0, 0, 0, 0, 7, 6, 5, 4, 3, 2, 1, 0);
+ assert_eq_m512i(r, e);
+ }
+
+ #[simd_test(enable = "avx512bw")]
+ unsafe fn test_mm512_min_epi8() {
+ #[rustfmt::skip]
+ let a = _mm512_set_epi8(0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15,
+ 0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15,
+ 0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15,
+ 0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15);
+ #[rustfmt::skip]
+ let b = _mm512_set_epi8(15, 14, 13, 12, 11, 10, 9, 8, 7, 6, 5, 4, 3, 2, 1, 0,
+ 15, 14, 13, 12, 11, 10, 9, 8, 7, 6, 5, 4, 3, 2, 1, 0,
+ 15, 14, 13, 12, 11, 10, 9, 8, 7, 6, 5, 4, 3, 2, 1, 0,
+ 15, 14, 13, 12, 11, 10, 9, 8, 7, 6, 5, 4, 3, 2, 1, 0);
+ let r = _mm512_min_epi8(a, b);
+ #[rustfmt::skip]
+ let e = _mm512_set_epi8(0, 1, 2, 3, 4, 5, 6, 7, 7, 6, 5, 4, 3, 2, 1, 0,
+ 0, 1, 2, 3, 4, 5, 6, 7, 7, 6, 5, 4, 3, 2, 1, 0,
+ 0, 1, 2, 3, 4, 5, 6, 7, 7, 6, 5, 4, 3, 2, 1, 0,
+ 0, 1, 2, 3, 4, 5, 6, 7, 7, 6, 5, 4, 3, 2, 1, 0);
+ assert_eq_m512i(r, e);
+ }
+
+ #[simd_test(enable = "avx512f")]
+ unsafe fn test_mm512_mask_min_epi8() {
+ #[rustfmt::skip]
+ let a = _mm512_set_epi8(0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15,
+ 0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15,
+ 0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15,
+ 0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15);
+ #[rustfmt::skip]
+ let b = _mm512_set_epi8(15, 14, 13, 12, 11, 10, 9, 8, 7, 6, 5, 4, 3, 2, 1, 0,
+ 15, 14, 13, 12, 11, 10, 9, 8, 7, 6, 5, 4, 3, 2, 1, 0,
+ 15, 14, 13, 12, 11, 10, 9, 8, 7, 6, 5, 4, 3, 2, 1, 0,
+ 15, 14, 13, 12, 11, 10, 9, 8, 7, 6, 5, 4, 3, 2, 1, 0);
+ let r = _mm512_mask_min_epi8(a, 0, a, b);
+ assert_eq_m512i(r, a);
+ let r = _mm512_mask_min_epi8(
+ a,
+ 0b00000000_11111111_00000000_11111111_00000000_11111111_00000000_11111111,
+ a,
+ b,
+ );
+ #[rustfmt::skip]
+ let e = _mm512_set_epi8(0, 1, 2, 3, 4, 5, 6, 7, 7, 6, 5, 4, 3, 2, 1, 0,
+ 0, 1, 2, 3, 4, 5, 6, 7, 7, 6, 5, 4, 3, 2, 1, 0,
+ 0, 1, 2, 3, 4, 5, 6, 7, 7, 6, 5, 4, 3, 2, 1, 0,
+ 0, 1, 2, 3, 4, 5, 6, 7, 7, 6, 5, 4, 3, 2, 1, 0);
+ assert_eq_m512i(r, e);
+ }
+
+ #[simd_test(enable = "avx512f")]
+ unsafe fn test_mm512_maskz_min_epi8() {
+ #[rustfmt::skip]
+ let a = _mm512_set_epi8(0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15,
+ 0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15,
+ 0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15,
+ 0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15);
+ #[rustfmt::skip]
+ let b = _mm512_set_epi8(15, 14, 13, 12, 11, 10, 9, 8, 7, 6, 5, 4, 3, 2, 1, 0,
+ 15, 14, 13, 12, 11, 10, 9, 8, 7, 6, 5, 4, 3, 2, 1, 0,
+ 15, 14, 13, 12, 11, 10, 9, 8, 7, 6, 5, 4, 3, 2, 1, 0,
+ 15, 14, 13, 12, 11, 10, 9, 8, 7, 6, 5, 4, 3, 2, 1, 0);
+ let r = _mm512_maskz_min_epi8(0, a, b);
+ assert_eq_m512i(r, _mm512_setzero_si512());
+ let r = _mm512_maskz_min_epi8(
+ 0b00000000_11111111_00000000_11111111_00000000_11111111_00000000_11111111,
+ a,
+ b,
+ );
+ #[rustfmt::skip]
+ let e = _mm512_set_epi8(0, 0, 0, 0, 0, 0, 0, 0, 7, 6, 5, 4, 3, 2, 1, 0,
+ 0, 0, 0, 0, 0, 0, 0, 0, 7, 6, 5, 4, 3, 2, 1, 0,
+ 0, 0, 0, 0, 0, 0, 0, 0, 7, 6, 5, 4, 3, 2, 1, 0,
+ 0, 0, 0, 0, 0, 0, 0, 0, 7, 6, 5, 4, 3, 2, 1, 0);
+ assert_eq_m512i(r, e);
+ }
+
+ #[simd_test(enable = "avx512bw")]
+ unsafe fn test_mm512_cmplt_epu16_mask() {
+ let a = _mm512_set1_epi16(-2);
+ let b = _mm512_set1_epi16(-1);
+ let m = _mm512_cmplt_epu16_mask(a, b);
+ assert_eq!(m, 0b11111111_11111111_11111111_11111111);
+ }
+
+ #[simd_test(enable = "avx512bw")]
+ unsafe fn test_mm512_mask_cmplt_epu16_mask() {
+ let a = _mm512_set1_epi16(-2);
+ let b = _mm512_set1_epi16(-1);
+ let mask = 0b01010101_01010101_01010101_01010101;
+ let r = _mm512_mask_cmplt_epu16_mask(mask, a, b);
+ assert_eq!(r, 0b01010101_01010101_01010101_01010101);
+ }
+
+ #[simd_test(enable = "avx512bw")]
+ unsafe fn test_mm512_cmplt_epu8_mask() {
+ let a = _mm512_set1_epi8(-2);
+ let b = _mm512_set1_epi8(-1);
+ let m = _mm512_cmplt_epu8_mask(a, b);
+ assert_eq!(
+ m,
+ 0b11111111_11111111_11111111_11111111_11111111_11111111_11111111_11111111
+ );
+ }
+
+ #[simd_test(enable = "avx512bw")]
+ unsafe fn test_mm512_mask_cmplt_epu8_mask() {
+ let a = _mm512_set1_epi8(-2);
+ let b = _mm512_set1_epi8(-1);
+ let mask = 0b01010101_01010101_01010101_01010101_01010101_01010101_01010101_01010101;
+ let r = _mm512_mask_cmplt_epu8_mask(mask, a, b);
+ assert_eq!(
+ r,
+ 0b01010101_01010101_01010101_01010101_01010101_01010101_01010101_01010101
+ );
+ }
+
+ #[simd_test(enable = "avx512bw")]
+ unsafe fn test_mm512_cmplt_epi16_mask() {
+ let a = _mm512_set1_epi16(-2);
+ let b = _mm512_set1_epi16(-1);
+ let m = _mm512_cmplt_epi16_mask(a, b);
+ assert_eq!(m, 0b11111111_11111111_11111111_11111111);
+ }
+
+ #[simd_test(enable = "avx512bw")]
+ unsafe fn test_mm512_mask_cmplt_epi16_mask() {
+ let a = _mm512_set1_epi16(-2);
+ let b = _mm512_set1_epi16(-1);
+ let mask = 0b01010101_01010101_01010101_01010101;
+ let r = _mm512_mask_cmplt_epi16_mask(mask, a, b);
+ assert_eq!(r, 0b01010101_01010101_01010101_01010101);
+ }
+
+ #[simd_test(enable = "avx512bw")]
+ unsafe fn test_mm512_cmplt_epi8_mask() {
+ let a = _mm512_set1_epi8(-2);
+ let b = _mm512_set1_epi8(-1);
+ let m = _mm512_cmplt_epi8_mask(a, b);
+ assert_eq!(
+ m,
+ 0b11111111_11111111_11111111_11111111_11111111_11111111_11111111_11111111
+ );
+ }
+
+ #[simd_test(enable = "avx512bw")]
+ unsafe fn test_mm512_mask_cmplt_epi8_mask() {
+ let a = _mm512_set1_epi8(-2);
+ let b = _mm512_set1_epi8(-1);
+ let mask = 0b01010101_01010101_01010101_01010101_01010101_01010101_01010101_01010101;
+ let r = _mm512_mask_cmplt_epi8_mask(mask, a, b);
+ assert_eq!(
+ r,
+ 0b01010101_01010101_01010101_01010101_01010101_01010101_01010101_01010101
+ );
+ }
+
+ #[simd_test(enable = "avx512bw")]
+ unsafe fn test_mm512_cmpgt_epu16_mask() {
+ let a = _mm512_set1_epi16(2);
+ let b = _mm512_set1_epi16(1);
+ let m = _mm512_cmpgt_epu16_mask(a, b);
+ assert_eq!(m, 0b11111111_11111111_11111111_11111111);
+ }
+
+ #[simd_test(enable = "avx512bw")]
+ unsafe fn test_mm512_mask_cmpgt_epu16_mask() {
+ let a = _mm512_set1_epi16(2);
+ let b = _mm512_set1_epi16(1);
+ let mask = 0b01010101_01010101_01010101_01010101;
+ let r = _mm512_mask_cmpgt_epu16_mask(mask, a, b);
+ assert_eq!(r, 0b01010101_01010101_01010101_01010101);
+ }
+
+ #[simd_test(enable = "avx512bw")]
+ unsafe fn test_mm512_cmpgt_epu8_mask() {
+ let a = _mm512_set1_epi8(2);
+ let b = _mm512_set1_epi8(1);
+ let m = _mm512_cmpgt_epu8_mask(a, b);
+ assert_eq!(
+ m,
+ 0b11111111_11111111_11111111_11111111_11111111_11111111_11111111_11111111
+ );
+ }
+
+ #[simd_test(enable = "avx512bw")]
+ unsafe fn test_mm512_mask_cmpgt_epu8_mask() {
+ let a = _mm512_set1_epi8(2);
+ let b = _mm512_set1_epi8(1);
+ let mask = 0b01010101_01010101_01010101_01010101_01010101_01010101_01010101_01010101;
+ let r = _mm512_mask_cmpgt_epu8_mask(mask, a, b);
+ assert_eq!(
+ r,
+ 0b01010101_01010101_01010101_01010101_01010101_01010101_01010101_01010101
+ );
+ }
+
+ #[simd_test(enable = "avx512bw")]
+ unsafe fn test_mm512_cmpgt_epi16_mask() {
+ let a = _mm512_set1_epi16(2);
+ let b = _mm512_set1_epi16(-1);
+ let m = _mm512_cmpgt_epi16_mask(a, b);
+ assert_eq!(m, 0b11111111_11111111_11111111_11111111);
+ }
+
+ #[simd_test(enable = "avx512bw")]
+ unsafe fn test_mm512_mask_cmpgt_epi16_mask() {
+ let a = _mm512_set1_epi16(2);
+ let b = _mm512_set1_epi16(-1);
+ let mask = 0b01010101_01010101_01010101_01010101;
+ let r = _mm512_mask_cmpgt_epi16_mask(mask, a, b);
+ assert_eq!(r, 0b01010101_01010101_01010101_01010101);
+ }
+
+ #[simd_test(enable = "avx512bw")]
+ unsafe fn test_mm512_cmpgt_epi8_mask() {
+ let a = _mm512_set1_epi8(2);
+ let b = _mm512_set1_epi8(-1);
+ let m = _mm512_cmpgt_epi8_mask(a, b);
+ assert_eq!(
+ m,
+ 0b11111111_11111111_11111111_11111111_11111111_11111111_11111111_11111111
+ );
+ }
+
+ #[simd_test(enable = "avx512bw")]
+ unsafe fn test_mm512_mask_cmpgt_epi8_mask() {
+ let a = _mm512_set1_epi8(2);
+ let b = _mm512_set1_epi8(-1);
+ let mask = 0b01010101_01010101_01010101_01010101_01010101_01010101_01010101_01010101;
+ let r = _mm512_mask_cmpgt_epi8_mask(mask, a, b);
+ assert_eq!(
+ r,
+ 0b01010101_01010101_01010101_01010101_01010101_01010101_01010101_01010101
+ );
+ }
+
+ #[simd_test(enable = "avx512bw")]
+ unsafe fn test_mm512_cmple_epu16_mask() {
+ let a = _mm512_set1_epi16(-1);
+ let b = _mm512_set1_epi16(-1);
+ let m = _mm512_cmple_epu16_mask(a, b);
+ assert_eq!(m, 0b11111111_11111111_11111111_11111111);
+ }
+
+ #[simd_test(enable = "avx512bw")]
+ unsafe fn test_mm512_mask_cmple_epu16_mask() {
+ let a = _mm512_set1_epi16(-1);
+ let b = _mm512_set1_epi16(-1);
+ let mask = 0b01010101_01010101_01010101_01010101;
+ let r = _mm512_mask_cmple_epu16_mask(mask, a, b);
+ assert_eq!(r, 0b01010101_01010101_01010101_01010101);
+ }
+
+ #[simd_test(enable = "avx512bw")]
+ unsafe fn test_mm512_cmple_epu8_mask() {
+ let a = _mm512_set1_epi8(-1);
+ let b = _mm512_set1_epi8(-1);
+ let m = _mm512_cmple_epu8_mask(a, b);
+ assert_eq!(
+ m,
+ 0b11111111_11111111_11111111_11111111_11111111_11111111_11111111_11111111
+ );
+ }
+
+ #[simd_test(enable = "avx512bw")]
+ unsafe fn test_mm512_mask_cmple_epu8_mask() {
+ let a = _mm512_set1_epi8(-1);
+ let b = _mm512_set1_epi8(-1);
+ let mask = 0b01010101_01010101_01010101_01010101_01010101_01010101_01010101_01010101;
+ let r = _mm512_mask_cmple_epu8_mask(mask, a, b);
+ assert_eq!(
+ r,
+ 0b01010101_01010101_01010101_01010101_01010101_01010101_01010101_01010101
+ );
+ }
+
+ #[simd_test(enable = "avx512bw")]
+ unsafe fn test_mm512_cmple_epi16_mask() {
+ let a = _mm512_set1_epi16(-1);
+ let b = _mm512_set1_epi16(-1);
+ let m = _mm512_cmple_epi16_mask(a, b);
+ assert_eq!(m, 0b11111111_11111111_11111111_11111111);
+ }
+
+ #[simd_test(enable = "avx512bw")]
+ unsafe fn test_mm512_mask_cmple_epi16_mask() {
+ let a = _mm512_set1_epi16(-1);
+ let b = _mm512_set1_epi16(-1);
+ let mask = 0b01010101_01010101_01010101_01010101;
+ let r = _mm512_mask_cmple_epi16_mask(mask, a, b);
+ assert_eq!(r, 0b01010101_01010101_01010101_01010101);
+ }
+
+ #[simd_test(enable = "avx512bw")]
+ unsafe fn test_mm512_cmple_epi8_mask() {
+ let a = _mm512_set1_epi8(-1);
+ let b = _mm512_set1_epi8(-1);
+ let m = _mm512_cmple_epi8_mask(a, b);
+ assert_eq!(
+ m,
+ 0b11111111_11111111_11111111_11111111_11111111_11111111_11111111_11111111
+ );
+ }
+
+ #[simd_test(enable = "avx512bw")]
+ unsafe fn test_mm512_mask_cmple_epi8_mask() {
+ let a = _mm512_set1_epi8(-1);
+ let b = _mm512_set1_epi8(-1);
+ let mask = 0b01010101_01010101_01010101_01010101_01010101_01010101_01010101_01010101;
+ let r = _mm512_mask_cmple_epi8_mask(mask, a, b);
+ assert_eq!(
+ r,
+ 0b01010101_01010101_01010101_01010101_01010101_01010101_01010101_01010101
+ );
+ }
+
+ #[simd_test(enable = "avx512bw")]
+ unsafe fn test_mm512_cmpge_epu16_mask() {
+ let a = _mm512_set1_epi16(1);
+ let b = _mm512_set1_epi16(1);
+ let m = _mm512_cmpge_epu16_mask(a, b);
+ assert_eq!(m, 0b11111111_11111111_11111111_11111111);
+ }
+
+ #[simd_test(enable = "avx512bw")]
+ unsafe fn test_mm512_mask_cmpge_epu16_mask() {
+ let a = _mm512_set1_epi16(1);
+ let b = _mm512_set1_epi16(1);
+ let mask = 0b01010101_01010101_01010101_01010101;
+ let r = _mm512_mask_cmpge_epu16_mask(mask, a, b);
+ assert_eq!(r, 0b01010101_01010101_01010101_01010101);
+ }
+
+ #[simd_test(enable = "avx512bw")]
+ unsafe fn test_mm512_cmpge_epu8_mask() {
+ let a = _mm512_set1_epi8(1);
+ let b = _mm512_set1_epi8(1);
+ let m = _mm512_cmpge_epu8_mask(a, b);
+ assert_eq!(
+ m,
+ 0b11111111_11111111_11111111_11111111_11111111_11111111_11111111_11111111
+ );
+ }
+
+ #[simd_test(enable = "avx512bw")]
+ unsafe fn test_mm512_mask_cmpge_epu8_mask() {
+ let a = _mm512_set1_epi8(1);
+ let b = _mm512_set1_epi8(1);
+ let mask = 0b01010101_01010101_01010101_01010101_01010101_01010101_01010101_01010101;
+ let r = _mm512_mask_cmpge_epu8_mask(mask, a, b);
+ assert_eq!(
+ r,
+ 0b01010101_01010101_01010101_01010101_01010101_01010101_01010101_01010101
+ );
+ }
+
+ #[simd_test(enable = "avx512bw")]
+ unsafe fn test_mm512_cmpge_epi16_mask() {
+ let a = _mm512_set1_epi16(-1);
+ let b = _mm512_set1_epi16(-1);
+ let m = _mm512_cmpge_epi16_mask(a, b);
+ assert_eq!(m, 0b11111111_11111111_11111111_11111111);
+ }
+
+ #[simd_test(enable = "avx512bw")]
+ unsafe fn test_mm512_mask_cmpge_epi16_mask() {
+ let a = _mm512_set1_epi16(-1);
+ let b = _mm512_set1_epi16(-1);
+ let mask = 0b01010101_01010101_01010101_01010101;
+ let r = _mm512_mask_cmpge_epi16_mask(mask, a, b);
+ assert_eq!(r, 0b01010101_01010101_01010101_01010101);
+ }
+
+ #[simd_test(enable = "avx512bw")]
+ unsafe fn test_mm512_cmpge_epi8_mask() {
+ let a = _mm512_set1_epi8(-1);
+ let b = _mm512_set1_epi8(-1);
+ let m = _mm512_cmpge_epi8_mask(a, b);
+ assert_eq!(
+ m,
+ 0b11111111_11111111_11111111_11111111_11111111_11111111_11111111_11111111
+ );
+ }
+
+ #[simd_test(enable = "avx512bw")]
+ unsafe fn test_mm512_mask_cmpge_epi8_mask() {
+ let a = _mm512_set1_epi8(-1);
+ let b = _mm512_set1_epi8(-1);
+ let mask = 0b01010101_01010101_01010101_01010101_01010101_01010101_01010101_01010101;
+ let r = _mm512_mask_cmpge_epi8_mask(mask, a, b);
+ assert_eq!(
+ r,
+ 0b01010101_01010101_01010101_01010101_01010101_01010101_01010101_01010101
+ );
+ }
+
+ #[simd_test(enable = "avx512bw")]
+ unsafe fn test_mm512_cmpeq_epu16_mask() {
+ let a = _mm512_set1_epi16(1);
+ let b = _mm512_set1_epi16(1);
+ let m = _mm512_cmpeq_epu16_mask(a, b);
+ assert_eq!(m, 0b11111111_11111111_11111111_11111111);
+ }
+
+ #[simd_test(enable = "avx512bw")]
+ unsafe fn test_mm512_mask_cmpeq_epu16_mask() {
+ let a = _mm512_set1_epi16(1);
+ let b = _mm512_set1_epi16(1);
+ let mask = 0b01010101_01010101_01010101_01010101;
+ let r = _mm512_mask_cmpeq_epu16_mask(mask, a, b);
+ assert_eq!(r, 0b01010101_01010101_01010101_01010101);
+ }
+
+ #[simd_test(enable = "avx512bw")]
+ unsafe fn test_mm512_cmpeq_epu8_mask() {
+ let a = _mm512_set1_epi8(1);
+ let b = _mm512_set1_epi8(1);
+ let m = _mm512_cmpeq_epu8_mask(a, b);
+ assert_eq!(
+ m,
+ 0b11111111_11111111_11111111_11111111_11111111_11111111_11111111_11111111
+ );
+ }
+
+ #[simd_test(enable = "avx512bw")]
+ unsafe fn test_mm512_mask_cmpeq_epu8_mask() {
+ let a = _mm512_set1_epi8(1);
+ let b = _mm512_set1_epi8(1);
+ let mask = 0b01010101_01010101_01010101_01010101_01010101_01010101_01010101_01010101;
+ let r = _mm512_mask_cmpeq_epu8_mask(mask, a, b);
+ assert_eq!(
+ r,
+ 0b01010101_01010101_01010101_01010101_01010101_01010101_01010101_01010101
+ );
+ }
+
+ #[simd_test(enable = "avx512bw")]
+ unsafe fn test_mm512_cmpeq_epi16_mask() {
+ let a = _mm512_set1_epi16(-1);
+ let b = _mm512_set1_epi16(-1);
+ let m = _mm512_cmpeq_epi16_mask(a, b);
+ assert_eq!(m, 0b11111111_11111111_11111111_11111111);
+ }
+
+ #[simd_test(enable = "avx512bw")]
+ unsafe fn test_mm512_mask_cmpeq_epi16_mask() {
+ let a = _mm512_set1_epi16(-1);
+ let b = _mm512_set1_epi16(-1);
+ let mask = 0b01010101_01010101_01010101_01010101;
+ let r = _mm512_mask_cmpeq_epi16_mask(mask, a, b);
+ assert_eq!(r, 0b01010101_01010101_01010101_01010101);
+ }
+
+ #[simd_test(enable = "avx512bw")]
+ unsafe fn test_mm512_cmpeq_epi8_mask() {
+ let a = _mm512_set1_epi8(-1);
+ let b = _mm512_set1_epi8(-1);
+ let m = _mm512_cmpeq_epi8_mask(a, b);
+ assert_eq!(
+ m,
+ 0b11111111_11111111_11111111_11111111_11111111_11111111_11111111_11111111
+ );
+ }
+
+ #[simd_test(enable = "avx512bw")]
+ unsafe fn test_mm512_mask_cmpeq_epi8_mask() {
+ let a = _mm512_set1_epi8(-1);
+ let b = _mm512_set1_epi8(-1);
+ let mask = 0b01010101_01010101_01010101_01010101_01010101_01010101_01010101_01010101;
+ let r = _mm512_mask_cmpeq_epi8_mask(mask, a, b);
+ assert_eq!(
+ r,
+ 0b01010101_01010101_01010101_01010101_01010101_01010101_01010101_01010101
+ );
+ }
+
+ #[simd_test(enable = "avx512bw")]
+ unsafe fn test_mm512_cmpneq_epu16_mask() {
+ let a = _mm512_set1_epi16(2);
+ let b = _mm512_set1_epi16(1);
+ let m = _mm512_cmpneq_epu16_mask(a, b);
+ assert_eq!(m, 0b11111111_11111111_11111111_11111111);
+ }
+
+ #[simd_test(enable = "avx512bw")]
+ unsafe fn test_mm512_mask_cmpneq_epu16_mask() {
+ let a = _mm512_set1_epi16(2);
+ let b = _mm512_set1_epi16(1);
+ let mask = 0b01010101_01010101_01010101_01010101;
+ let r = _mm512_mask_cmpneq_epu16_mask(mask, a, b);
+ assert_eq!(r, 0b01010101_01010101_01010101_01010101);
+ }
+
+ #[simd_test(enable = "avx512bw")]
+ unsafe fn test_mm512_cmpneq_epu8_mask() {
+ let a = _mm512_set1_epi8(2);
+ let b = _mm512_set1_epi8(1);
+ let m = _mm512_cmpneq_epu8_mask(a, b);
+ assert_eq!(
+ m,
+ 0b11111111_11111111_11111111_11111111_11111111_11111111_11111111_11111111
+ );
+ }
+
+ #[simd_test(enable = "avx512bw")]
+ unsafe fn test_mm512_mask_cmpneq_epu8_mask() {
+ let a = _mm512_set1_epi8(2);
+ let b = _mm512_set1_epi8(1);
+ let mask = 0b01010101_01010101_01010101_01010101_01010101_01010101_01010101_01010101;
+ let r = _mm512_mask_cmpneq_epu8_mask(mask, a, b);
+ assert_eq!(
+ r,
+ 0b01010101_01010101_01010101_01010101_01010101_01010101_01010101_01010101
+ );
+ }
+
+ #[simd_test(enable = "avx512bw")]
+ unsafe fn test_mm512_cmpneq_epi16_mask() {
+ let a = _mm512_set1_epi16(1);
+ let b = _mm512_set1_epi16(-1);
+ let m = _mm512_cmpneq_epi16_mask(a, b);
+ assert_eq!(m, 0b11111111_11111111_11111111_11111111);
+ }
+
+ #[simd_test(enable = "avx512bw")]
+ unsafe fn test_mm512_mask_cmpneq_epi16_mask() {
+ let a = _mm512_set1_epi16(1);
+ let b = _mm512_set1_epi16(-1);
+ let mask = 0b01010101_01010101_01010101_01010101;
+ let r = _mm512_mask_cmpneq_epi16_mask(mask, a, b);
+ assert_eq!(r, 0b01010101_01010101_01010101_01010101);
+ }
+
+ #[simd_test(enable = "avx512bw")]
+ unsafe fn test_mm512_cmpneq_epi8_mask() {
+ let a = _mm512_set1_epi8(1);
+ let b = _mm512_set1_epi8(-1);
+ let m = _mm512_cmpneq_epi8_mask(a, b);
+ assert_eq!(
+ m,
+ 0b11111111_11111111_11111111_11111111_11111111_11111111_11111111_11111111
+ );
+ }
+
+ #[simd_test(enable = "avx512bw")]
+ unsafe fn test_mm512_mask_cmpneq_epi8_mask() {
+ let a = _mm512_set1_epi8(1);
+ let b = _mm512_set1_epi8(-1);
+ let mask = 0b01010101_01010101_01010101_01010101_01010101_01010101_01010101_01010101;
+ let r = _mm512_mask_cmpneq_epi8_mask(mask, a, b);
+ assert_eq!(
+ r,
+ 0b01010101_01010101_01010101_01010101_01010101_01010101_01010101_01010101
+ );
+ }
+
+ #[simd_test(enable = "avx512bw")]
+ unsafe fn test_mm512_cmp_epu16_mask() {
+ let a = _mm512_set1_epi16(0);
+ let b = _mm512_set1_epi16(1);
+ let m = _mm512_cmp_epu16_mask(a, b, _MM_CMPINT_LT);
+ assert_eq!(m, 0b11111111_11111111_11111111_11111111);
+ }
+
+ #[simd_test(enable = "avx512bw")]
+ unsafe fn test_mm512_mask_cmp_epu16_mask() {
+ let a = _mm512_set1_epi16(0);
+ let b = _mm512_set1_epi16(1);
+ let mask = 0b01010101_01010101_01010101_01010101;
+ let r = _mm512_mask_cmp_epu16_mask(mask, a, b, _MM_CMPINT_LT);
+ assert_eq!(r, 0b01010101_01010101_01010101_01010101);
+ }
+
+ #[simd_test(enable = "avx512bw")]
+ unsafe fn test_mm512_cmp_epu8_mask() {
+ let a = _mm512_set1_epi8(0);
+ let b = _mm512_set1_epi8(1);
+ let m = _mm512_cmp_epu8_mask(a, b, _MM_CMPINT_LT);
+ assert_eq!(
+ m,
+ 0b11111111_11111111_11111111_11111111_11111111_11111111_11111111_11111111
+ );
+ }
+
+ #[simd_test(enable = "avx512bw")]
+ unsafe fn test_mm512_mask_cmp_epu8_mask() {
+ let a = _mm512_set1_epi8(0);
+ let b = _mm512_set1_epi8(1);
+ let mask = 0b01010101_01010101_01010101_01010101_01010101_01010101_01010101_01010101;
+ let r = _mm512_mask_cmp_epu8_mask(mask, a, b, _MM_CMPINT_LT);
+ assert_eq!(
+ r,
+ 0b01010101_01010101_01010101_01010101_01010101_01010101_01010101_01010101
+ );
+ }
+
+ #[simd_test(enable = "avx512bw")]
+ unsafe fn test_mm512_cmp_epi16_mask() {
+ let a = _mm512_set1_epi16(0);
+ let b = _mm512_set1_epi16(1);
+ let m = _mm512_cmp_epi16_mask(a, b, _MM_CMPINT_LT);
+ assert_eq!(m, 0b11111111_11111111_11111111_11111111);
+ }
+
+ #[simd_test(enable = "avx512bw")]
+ unsafe fn test_mm512_mask_cmp_epi16_mask() {
+ let a = _mm512_set1_epi16(0);
+ let b = _mm512_set1_epi16(1);
+ let mask = 0b01010101_01010101_01010101_01010101;
+ let r = _mm512_mask_cmp_epi16_mask(mask, a, b, _MM_CMPINT_LT);
+ assert_eq!(r, 0b01010101_01010101_01010101_01010101);
+ }
+
+ #[simd_test(enable = "avx512bw")]
+ unsafe fn test_mm512_cmp_epi8_mask() {
+ let a = _mm512_set1_epi8(0);
+ let b = _mm512_set1_epi8(1);
+ let m = _mm512_cmp_epi8_mask(a, b, _MM_CMPINT_LT);
+ assert_eq!(
+ m,
+ 0b11111111_11111111_11111111_11111111_11111111_11111111_11111111_11111111
+ );
+ }
+
+ #[simd_test(enable = "avx512bw")]
+ unsafe fn test_mm512_mask_cmp_epi8_mask() {
+ let a = _mm512_set1_epi8(0);
+ let b = _mm512_set1_epi8(1);
+ let mask = 0b01010101_01010101_01010101_01010101_01010101_01010101_01010101_01010101;
+ let r = _mm512_mask_cmp_epi8_mask(mask, a, b, _MM_CMPINT_LT);
+ assert_eq!(
+ r,
+ 0b01010101_01010101_01010101_01010101_01010101_01010101_01010101_01010101
+ );
+ }
+
+ #[simd_test(enable = "avx512bw")]
+ unsafe fn test_mm512_loadu_epi16() {
+ #[rustfmt::skip]
+ let a: [i16; 32] = [1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16, 17, 18, 19, 20, 21, 22, 23, 24, 25, 26, 27, 28, 29, 30, 31, 32];
+ let r = _mm512_loadu_epi16(&a[0]);
+ #[rustfmt::skip]
+ let e = _mm512_set_epi16(32, 31, 30, 29, 28, 27, 26, 25, 24, 23, 22, 21, 20, 19, 18, 17, 16, 15, 14, 13, 12, 11, 10, 9, 8, 7, 6, 5, 4, 3, 2, 1);
+ assert_eq_m512i(r, e);
+ }
+
+ #[simd_test(enable = "avx512bw")]
+ unsafe fn test_mm512_loadu_epi8() {
+ #[rustfmt::skip]
+ let a: [i8; 64] = [1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16, 17, 18, 19, 20, 21, 22, 23, 24, 25, 26, 27, 28, 29, 30, 31, 32,
+ 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16, 17, 18, 19, 20, 21, 22, 23, 24, 25, 26, 27, 28, 29, 30, 31, 32];
+ let r = _mm512_loadu_epi8(&a[0]);
+ #[rustfmt::skip]
+ let e = _mm512_set_epi8(32, 31, 30, 29, 28, 27, 26, 25, 24, 23, 22, 21, 20, 19, 18, 17, 16, 15, 14, 13, 12, 11, 10, 9, 8, 7, 6, 5, 4, 3, 2, 1,
+ 32, 31, 30, 29, 28, 27, 26, 25, 24, 23, 22, 21, 20, 19, 18, 17, 16, 15, 14, 13, 12, 11, 10, 9, 8, 7, 6, 5, 4, 3, 2, 1);
+ assert_eq_m512i(r, e);
+ }
+
+ #[simd_test(enable = "avx512bw")]
+ unsafe fn test_mm512_storeu_epi16() {
+ let a = _mm512_set1_epi16(9);
+ let mut r = _mm512_undefined_epi32();
+ _mm512_storeu_epi16(&mut r as *mut _ as *mut i16, a);
+ assert_eq_m512i(r, a);
+ }
+
+ #[simd_test(enable = "avx512bw")]
+ unsafe fn test_mm512_storeu_epi8() {
+ let a = _mm512_set1_epi8(9);
+ let mut r = _mm512_undefined_epi32();
+ _mm512_storeu_epi8(&mut r as *mut _ as *mut i8, a);
+ assert_eq_m512i(r, a);
+ }
+
+ #[simd_test(enable = "avx512bw")]
+ unsafe fn test_mm512_madd_epi16() {
+ let a = _mm512_set1_epi16(1);
+ let b = _mm512_set1_epi16(1);
+ let r = _mm512_madd_epi16(a, b);
+ let e = _mm512_set1_epi32(2);
+ assert_eq_m512i(r, e);
+ }
+
+ #[simd_test(enable = "avx512bw")]
+ unsafe fn test_mm512_mask_madd_epi16() {
+ let a = _mm512_set1_epi16(1);
+ let b = _mm512_set1_epi16(1);
+ let r = _mm512_mask_madd_epi16(a, 0, a, b);
+ assert_eq_m512i(r, a);
+ let r = _mm512_mask_madd_epi16(a, 0b00000000_00001111, a, b);
+ let e = _mm512_set_epi32(
+ 1 << 16 | 1,
+ 1 << 16 | 1,
+ 1 << 16 | 1,
+ 1 << 16 | 1,
+ 1 << 16 | 1,
+ 1 << 16 | 1,
+ 1 << 16 | 1,
+ 1 << 16 | 1,
+ 1 << 16 | 1,
+ 1 << 16 | 1,
+ 1 << 16 | 1,
+ 1 << 16 | 1,
+ 2,
+ 2,
+ 2,
+ 2,
+ );
+ assert_eq_m512i(r, e);
+ }
+
+ #[simd_test(enable = "avx512bw")]
+ unsafe fn test_mm512_maskz_madd_epi16() {
+ let a = _mm512_set1_epi16(1);
+ let b = _mm512_set1_epi16(1);
+ let r = _mm512_maskz_madd_epi16(0, a, b);
+ assert_eq_m512i(r, _mm512_setzero_si512());
+ let r = _mm512_maskz_madd_epi16(0b00000000_00001111, a, b);
+ let e = _mm512_set_epi32(0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 2, 2, 2, 2);
+ assert_eq_m512i(r, e);
+ }
+
+ #[simd_test(enable = "avx512bw")]
+ unsafe fn test_mm512_maddubs_epi16() {
+ let a = _mm512_set1_epi8(1);
+ let b = _mm512_set1_epi8(1);
+ let r = _mm512_maddubs_epi16(a, b);
+ let e = _mm512_set1_epi16(2);
+ assert_eq_m512i(r, e);
+ }
+
+ #[simd_test(enable = "avx512bw")]
+ unsafe fn test_mm512_mask_maddubs_epi16() {
+ let a = _mm512_set1_epi8(1);
+ let b = _mm512_set1_epi8(1);
+ let src = _mm512_set1_epi16(1);
+ let r = _mm512_mask_maddubs_epi16(src, 0, a, b);
+ assert_eq_m512i(r, src);
+ let r = _mm512_mask_add_epi16(src, 0b00000000_00000000_00000000_00000001, a, b);
+ #[rustfmt::skip]
+ let e = _mm512_set_epi16(1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
+ 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1<<9|2);
+ assert_eq_m512i(r, e);
+ }
+
+ #[simd_test(enable = "avx512bw")]
+ unsafe fn test_mm512_maskz_maddubs_epi16() {
+ let a = _mm512_set1_epi8(1);
+ let b = _mm512_set1_epi8(1);
+ let r = _mm512_maskz_maddubs_epi16(0, a, b);
+ assert_eq_m512i(r, _mm512_setzero_si512());
+ let r = _mm512_maskz_maddubs_epi16(0b00000000_11111111_00000000_11111111, a, b);
+ #[rustfmt::skip]
+ let e = _mm512_set_epi16(0, 0, 0, 0, 0, 0, 0, 0, 2, 2, 2, 2, 2, 2, 2, 2,
+ 0, 0, 0, 0, 0, 0, 0, 0, 2, 2, 2, 2, 2, 2, 2, 2);
+ assert_eq_m512i(r, e);
+ }
+
+ #[simd_test(enable = "avx512bw")]
+ unsafe fn test_mm512_packs_epi32() {
+ let a = _mm512_set1_epi32(i32::MAX);
+ let b = _mm512_set1_epi32(1);
+ let r = _mm512_packs_epi32(a, b);
+ #[rustfmt::skip]
+ let e = _mm512_set_epi16(1, 1, 1, 1, i16::MAX, i16::MAX, i16::MAX, i16::MAX, 1, 1, 1, 1, i16::MAX, i16::MAX, i16::MAX, i16::MAX,
+ 1, 1, 1, 1, i16::MAX, i16::MAX, i16::MAX, i16::MAX, 1, 1, 1, 1, i16::MAX, i16::MAX, i16::MAX, i16::MAX);
+ assert_eq_m512i(r, e);
+ }
+
+ #[simd_test(enable = "avx512bw")]
+ unsafe fn test_mm512_mask_packs_epi32() {
+ let a = _mm512_set1_epi32(i32::MAX);
+ let b = _mm512_set1_epi32(1 << 16 | 1);
+ let r = _mm512_mask_packs_epi32(a, 0, a, b);
+ assert_eq_m512i(r, a);
+ let r = _mm512_mask_packs_epi32(b, 0b00000000_00000000_00000000_00001111, a, b);
+ #[rustfmt::skip]
+ let e = _mm512_set_epi16(1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
+ 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, i16::MAX, i16::MAX, i16::MAX, i16::MAX);
+ assert_eq_m512i(r, e);
+ }
+
+ #[simd_test(enable = "avx512bw")]
+ unsafe fn test_mm512_maskz_packs_epi32() {
+ let a = _mm512_set1_epi32(i32::MAX);
+ let b = _mm512_set1_epi32(1);
+ let r = _mm512_maskz_packs_epi32(0, a, b);
+ assert_eq_m512i(r, _mm512_setzero_si512());
+ let r = _mm512_maskz_packs_epi32(0b00000000_00000000_00000000_00001111, a, b);
+ #[rustfmt::skip]
+ let e = _mm512_set_epi16(0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
+ 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, i16::MAX, i16::MAX, i16::MAX, i16::MAX);
+ assert_eq_m512i(r, e);
+ }
+
+ #[simd_test(enable = "avx512bw")]
+ unsafe fn test_mm512_packs_epi16() {
+ let a = _mm512_set1_epi16(i16::MAX);
+ let b = _mm512_set1_epi16(1);
+ let r = _mm512_packs_epi16(a, b);
+ #[rustfmt::skip]
+ let e = _mm512_set_epi8(1, 1, 1, 1, 1, 1, 1, 1, i8::MAX, i8::MAX, i8::MAX, i8::MAX, i8::MAX, i8::MAX, i8::MAX, i8::MAX,
+ 1, 1, 1, 1, 1, 1, 1, 1, i8::MAX, i8::MAX, i8::MAX, i8::MAX, i8::MAX, i8::MAX, i8::MAX, i8::MAX,
+ 1, 1, 1, 1, 1, 1, 1, 1, i8::MAX, i8::MAX, i8::MAX, i8::MAX, i8::MAX, i8::MAX, i8::MAX, i8::MAX,
+ 1, 1, 1, 1, 1, 1, 1, 1, i8::MAX, i8::MAX, i8::MAX, i8::MAX, i8::MAX, i8::MAX, i8::MAX, i8::MAX);
+ assert_eq_m512i(r, e);
+ }
+
+ #[simd_test(enable = "avx512bw")]
+ unsafe fn test_mm512_mask_packs_epi16() {
+ let a = _mm512_set1_epi16(i16::MAX);
+ let b = _mm512_set1_epi16(1 << 8 | 1);
+ let r = _mm512_mask_packs_epi16(a, 0, a, b);
+ assert_eq_m512i(r, a);
+ let r = _mm512_mask_packs_epi16(
+ b,
+ 0b00000000_00000000_00000000_00000000_00000000_00000000_00000000_00001111,
+ a,
+ b,
+ );
+ #[rustfmt::skip]
+ let e = _mm512_set_epi8(1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
+ 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
+ 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
+ 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, i8::MAX, i8::MAX, i8::MAX, i8::MAX);
+ assert_eq_m512i(r, e);
+ }
+
+ #[simd_test(enable = "avx512bw")]
+ unsafe fn test_mm512_maskz_packs_epi16() {
+ let a = _mm512_set1_epi16(i16::MAX);
+ let b = _mm512_set1_epi16(1);
+ let r = _mm512_maskz_packs_epi16(0, a, b);
+ assert_eq_m512i(r, _mm512_setzero_si512());
+ let r = _mm512_maskz_packs_epi16(
+ 0b00000000_00000000_00000000_00000000_00000000_00000000_00000000_00001111,
+ a,
+ b,
+ );
+ #[rustfmt::skip]
+ let e = _mm512_set_epi8(0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
+ 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
+ 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
+ 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, i8::MAX, i8::MAX, i8::MAX, i8::MAX);
+ assert_eq_m512i(r, e);
+ }
+
+ #[simd_test(enable = "avx512bw")]
+ unsafe fn test_mm512_packus_epi32() {
+ let a = _mm512_set1_epi32(-1);
+ let b = _mm512_set1_epi32(1);
+ let r = _mm512_packus_epi32(a, b);
+ #[rustfmt::skip]
+ let e = _mm512_set_epi16(1, 1, 1, 1, 0, 0, 0, 0, 1, 1, 1, 1, 0, 0, 0, 0,
+ 1, 1, 1, 1, 0, 0, 0, 0, 1, 1, 1, 1, 0, 0, 0, 0);
+ assert_eq_m512i(r, e);
+ }
+
+ #[simd_test(enable = "avx512bw")]
+ unsafe fn test_mm512_mask_packus_epi32() {
+ let a = _mm512_set1_epi32(-1);
+ let b = _mm512_set1_epi32(1 << 16 | 1);
+ let r = _mm512_mask_packus_epi32(a, 0, a, b);
+ assert_eq_m512i(r, a);
+ let r = _mm512_mask_packus_epi32(b, 0b00000000_00000000_00000000_00001111, a, b);
+ #[rustfmt::skip]
+ let e = _mm512_set_epi16(1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
+ 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 0, 0, 0, 0);
+ assert_eq_m512i(r, e);
+ }
+
+ #[simd_test(enable = "avx512bw")]
+ unsafe fn test_mm512_maskz_packus_epi32() {
+ let a = _mm512_set1_epi32(-1);
+ let b = _mm512_set1_epi32(1);
+ let r = _mm512_maskz_packus_epi32(0, a, b);
+ assert_eq_m512i(r, _mm512_setzero_si512());
+ let r = _mm512_maskz_packus_epi32(0b00000000_00000000_00000000_00001111, a, b);
+ #[rustfmt::skip]
+ let e = _mm512_set_epi16(0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
+ 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0);
+ assert_eq_m512i(r, e);
+ }
+
+ #[simd_test(enable = "avx512bw")]
+ unsafe fn test_mm512_packus_epi16() {
+ let a = _mm512_set1_epi16(-1);
+ let b = _mm512_set1_epi16(1);
+ let r = _mm512_packus_epi16(a, b);
+ #[rustfmt::skip]
+ let e = _mm512_set_epi8(1, 1, 1, 1, 1, 1, 1, 1, 0, 0, 0, 0, 0, 0, 0, 0,
+ 1, 1, 1, 1, 1, 1, 1, 1, 0, 0, 0, 0, 0, 0, 0, 0,
+ 1, 1, 1, 1, 1, 1, 1, 1, 0, 0, 0, 0, 0, 0, 0, 0,
+ 1, 1, 1, 1, 1, 1, 1, 1, 0, 0, 0, 0, 0, 0, 0, 0);
+ assert_eq_m512i(r, e);
+ }
+
+ #[simd_test(enable = "avx512bw")]
+ unsafe fn test_mm512_mask_packus_epi16() {
+ let a = _mm512_set1_epi16(-1);
+ let b = _mm512_set1_epi16(1 << 8 | 1);
+ let r = _mm512_mask_packus_epi16(a, 0, a, b);
+ assert_eq_m512i(r, a);
+ let r = _mm512_mask_packus_epi16(
+ b,
+ 0b00000000_00000000_00000000_00000000_00000000_00000000_00000000_00001111,
+ a,
+ b,
+ );
+ #[rustfmt::skip]
+ let e = _mm512_set_epi8(1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
+ 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
+ 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
+ 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 0, 0, 0, 0);
+ assert_eq_m512i(r, e);
+ }
+
+ #[simd_test(enable = "avx512bw")]
+ unsafe fn test_mm512_maskz_packus_epi16() {
+ let a = _mm512_set1_epi16(-1);
+ let b = _mm512_set1_epi16(1);
+ let r = _mm512_maskz_packus_epi16(0, a, b);
+ assert_eq_m512i(r, _mm512_setzero_si512());
+ let r = _mm512_maskz_packus_epi16(
+ 0b00000000_00000000_00000000_00000000_00000000_00000000_00000000_00001111,
+ a,
+ b,
+ );
+ #[rustfmt::skip]
+ let e = _mm512_set_epi8(0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
+ 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
+ 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
+ 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0);
+ assert_eq_m512i(r, e);
+ }
+
+ #[simd_test(enable = "avx512bw")]
+ unsafe fn test_mm512_avg_epu16() {
+ let a = _mm512_set1_epi16(1);
+ let b = _mm512_set1_epi16(1);
+ let r = _mm512_avg_epu16(a, b);
+ let e = _mm512_set1_epi16(1);
+ assert_eq_m512i(r, e);
+ }
+
+ #[simd_test(enable = "avx512bw")]
+ unsafe fn test_mm512_mask_avg_epu16() {
+ let a = _mm512_set1_epi16(1);
+ let b = _mm512_set1_epi16(1);
+ let r = _mm512_mask_avg_epu16(a, 0, a, b);
+ assert_eq_m512i(r, a);
+ let r = _mm512_mask_avg_epu16(a, 0b00000000_00000000_00000000_00001111, a, b);
+ #[rustfmt::skip]
+ let e = _mm512_set_epi16(1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
+ 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1);
+ assert_eq_m512i(r, e);
+ }
+
+ #[simd_test(enable = "avx512bw")]
+ unsafe fn test_mm512_maskz_avg_epu16() {
+ let a = _mm512_set1_epi16(1);
+ let b = _mm512_set1_epi16(1);
+ let r = _mm512_maskz_avg_epu16(0, a, b);
+ assert_eq_m512i(r, _mm512_setzero_si512());
+ let r = _mm512_maskz_avg_epu16(0b00000000_00000000_00000000_00001111, a, b);
+ #[rustfmt::skip]
+ let e = _mm512_set_epi16(0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
+ 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1, 1, 1, 1);
+ assert_eq_m512i(r, e);
+ }
+
+ #[simd_test(enable = "avx512bw")]
+ unsafe fn test_mm512_avg_epu8() {
+ let a = _mm512_set1_epi8(1);
+ let b = _mm512_set1_epi8(1);
+ let r = _mm512_avg_epu8(a, b);
+ let e = _mm512_set1_epi8(1);
+ assert_eq_m512i(r, e);
+ }
+
+ #[simd_test(enable = "avx512bw")]
+ unsafe fn test_mm512_mask_avg_epu8() {
+ let a = _mm512_set1_epi8(1);
+ let b = _mm512_set1_epi8(1);
+ let r = _mm512_mask_avg_epu8(a, 0, a, b);
+ assert_eq_m512i(r, a);
+ let r = _mm512_mask_avg_epu8(
+ a,
+ 0b00000000_00000000_00000000_00000000_00000000_00000000_00000000_00001111,
+ a,
+ b,
+ );
+ #[rustfmt::skip]
+ let e = _mm512_set_epi8(1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
+ 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
+ 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
+ 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1);
+ assert_eq_m512i(r, e);
+ }
+
+ #[simd_test(enable = "avx512bw")]
+ unsafe fn test_mm512_maskz_avg_epu8() {
+ let a = _mm512_set1_epi8(1);
+ let b = _mm512_set1_epi8(1);
+ let r = _mm512_maskz_avg_epu8(0, a, b);
+ assert_eq_m512i(r, _mm512_setzero_si512());
+ let r = _mm512_maskz_avg_epu8(
+ 0b00000000_000000000_00000000_00000000_00000000_0000000_00000000_00001111,
+ a,
+ b,
+ );
+ #[rustfmt::skip]
+ let e = _mm512_set_epi8(0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
+ 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
+ 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
+ 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1, 1, 1, 1);
+ assert_eq_m512i(r, e);
+ }
+
+ #[simd_test(enable = "avx512bw")]
+ unsafe fn test_mm512_sll_epi16() {
+ let a = _mm512_set1_epi16(1 << 15);
+ let count = _mm_set1_epi16(2);
+ let r = _mm512_sll_epi16(a, count);
+ let e = _mm512_set1_epi16(0);
+ assert_eq_m512i(r, e);
+ }
+
+ #[simd_test(enable = "avx512bw")]
+ unsafe fn test_mm512_mask_sll_epi16() {
+ let a = _mm512_set1_epi16(1 << 15);
+ let count = _mm_set1_epi16(2);
+ let r = _mm512_mask_sll_epi16(a, 0, a, count);
+ assert_eq_m512i(r, a);
+ let r = _mm512_mask_sll_epi16(a, 0b11111111_11111111_11111111_11111111, a, count);
+ let e = _mm512_set1_epi16(0);
+ assert_eq_m512i(r, e);
+ }
+
+ #[simd_test(enable = "avx512bw")]
+ unsafe fn test_mm512_maskz_sll_epi16() {
+ let a = _mm512_set1_epi16(1 << 15);
+ let count = _mm_set1_epi16(2);
+ let r = _mm512_maskz_sll_epi16(0, a, count);
+ assert_eq_m512i(r, _mm512_setzero_si512());
+ let r = _mm512_maskz_sll_epi16(0b11111111_11111111_11111111_11111111, a, count);
+ let e = _mm512_set1_epi16(0);
+ assert_eq_m512i(r, e);
+ }
+
+ #[simd_test(enable = "avx512bw")]
+ unsafe fn test_mm512_slli_epi16() {
+ let a = _mm512_set1_epi16(1 << 15);
+ let r = _mm512_slli_epi16(a, 1);
+ let e = _mm512_set1_epi16(0);
+ assert_eq_m512i(r, e);
+ }
+
+ #[simd_test(enable = "avx512bw")]
+ unsafe fn test_mm512_mask_slli_epi16() {
+ let a = _mm512_set1_epi16(1 << 15);
+ let r = _mm512_mask_slli_epi16(a, 0, a, 1);
+ assert_eq_m512i(r, a);
+ let r = _mm512_mask_slli_epi16(a, 0b11111111_11111111_11111111_11111111, a, 1);
+ let e = _mm512_set1_epi16(0);
+ assert_eq_m512i(r, e);
+ }
+
+ #[simd_test(enable = "avx512bw")]
+ unsafe fn test_mm512_maskz_slli_epi16() {
+ let a = _mm512_set1_epi16(1 << 15);
+ let r = _mm512_maskz_slli_epi16(0, a, 1);
+ assert_eq_m512i(r, _mm512_setzero_si512());
+ let r = _mm512_maskz_slli_epi16(0b11111111_11111111_11111111_11111111, a, 1);
+ let e = _mm512_set1_epi16(0);
+ assert_eq_m512i(r, e);
+ }
+
+ #[simd_test(enable = "avx512bw")]
+ unsafe fn test_mm512_sllv_epi16() {
+ let a = _mm512_set1_epi16(1 << 15);
+ let count = _mm512_set1_epi16(2);
+ let r = _mm512_sllv_epi16(a, count);
+ let e = _mm512_set1_epi16(0);
+ assert_eq_m512i(r, e);
+ }
+
+ #[simd_test(enable = "avx512bw")]
+ unsafe fn test_mm512_mask_sllv_epi16() {
+ let a = _mm512_set1_epi16(1 << 15);
+ let count = _mm512_set1_epi16(2);
+ let r = _mm512_mask_sllv_epi16(a, 0, a, count);
+ assert_eq_m512i(r, a);
+ let r = _mm512_mask_sllv_epi16(a, 0b11111111_11111111_11111111_11111111, a, count);
+ let e = _mm512_set1_epi16(0);
+ assert_eq_m512i(r, e);
+ }
+
+ #[simd_test(enable = "avx512bw")]
+ unsafe fn test_mm512_maskz_sllv_epi16() {
+ let a = _mm512_set1_epi16(1 << 15);
+ let count = _mm512_set1_epi16(2);
+ let r = _mm512_maskz_sllv_epi16(0, a, count);
+ assert_eq_m512i(r, _mm512_setzero_si512());
+ let r = _mm512_maskz_sllv_epi16(0b11111111_11111111_11111111_11111111, a, count);
+ let e = _mm512_set1_epi16(0);
+ assert_eq_m512i(r, e);
+ }
+
+ #[simd_test(enable = "avx512bw")]
+ unsafe fn test_mm512_srl_epi16() {
+ let a = _mm512_set1_epi16(1 << 1);
+ let count = _mm_set1_epi16(2);
+ let r = _mm512_srl_epi16(a, count);
+ let e = _mm512_set1_epi16(0);
+ assert_eq_m512i(r, e);
+ }
+
+ #[simd_test(enable = "avx512bw")]
+ unsafe fn test_mm512_mask_srl_epi16() {
+ let a = _mm512_set1_epi16(1 << 1);
+ let count = _mm_set1_epi16(2);
+ let r = _mm512_mask_srl_epi16(a, 0, a, count);
+ assert_eq_m512i(r, a);
+ let r = _mm512_mask_srl_epi16(a, 0b11111111_11111111_11111111_11111111, a, count);
+ let e = _mm512_set1_epi16(0);
+ assert_eq_m512i(r, e);
+ }
+
+ #[simd_test(enable = "avx512bw")]
+ unsafe fn test_mm512_maskz_srl_epi16() {
+ let a = _mm512_set1_epi16(1 << 1);
+ let count = _mm_set1_epi16(2);
+ let r = _mm512_maskz_srl_epi16(0, a, count);
+ assert_eq_m512i(r, _mm512_setzero_si512());
+ let r = _mm512_maskz_srl_epi16(0b11111111_11111111_11111111_11111111, a, count);
+ let e = _mm512_set1_epi16(0);
+ assert_eq_m512i(r, e);
+ }
+
+ #[simd_test(enable = "avx512bw")]
+ unsafe fn test_mm512_srli_epi16() {
+ let a = _mm512_set1_epi16(1 << 1);
+ let r = _mm512_srli_epi16(a, 2);
+ let e = _mm512_set1_epi16(0);
+ assert_eq_m512i(r, e);
+ }
+
+ #[simd_test(enable = "avx512bw")]
+ unsafe fn test_mm512_mask_srli_epi16() {
+ let a = _mm512_set1_epi16(1 << 1);
+ let r = _mm512_mask_srli_epi16(a, 0, a, 2);
+ assert_eq_m512i(r, a);
+ let r = _mm512_mask_srli_epi16(a, 0b11111111_11111111_11111111_11111111, a, 2);
+ let e = _mm512_set1_epi16(0);
+ assert_eq_m512i(r, e);
+ }
+
+ #[simd_test(enable = "avx512bw")]
+ unsafe fn test_mm512_maskz_srli_epi16() {
+ let a = _mm512_set1_epi16(1 << 1);
+ let r = _mm512_maskz_srli_epi16(0, a, 2);
+ assert_eq_m512i(r, _mm512_setzero_si512());
+ let r = _mm512_maskz_srli_epi16(0b11111111_11111111_11111111_11111111, a, 2);
+ let e = _mm512_set1_epi16(0);
+ assert_eq_m512i(r, e);
+ }
+
+ #[simd_test(enable = "avx512bw")]
+ unsafe fn test_mm512_srlv_epi16() {
+ let a = _mm512_set1_epi16(1 << 1);
+ let count = _mm512_set1_epi16(2);
+ let r = _mm512_srlv_epi16(a, count);
+ let e = _mm512_set1_epi16(0);
+ assert_eq_m512i(r, e);
+ }
+
+ #[simd_test(enable = "avx512bw")]
+ unsafe fn test_mm512_mask_srlv_epi16() {
+ let a = _mm512_set1_epi16(1 << 1);
+ let count = _mm512_set1_epi16(2);
+ let r = _mm512_mask_srlv_epi16(a, 0, a, count);
+ assert_eq_m512i(r, a);
+ let r = _mm512_mask_srlv_epi16(a, 0b11111111_11111111_11111111_11111111, a, count);
+ let e = _mm512_set1_epi16(0);
+ assert_eq_m512i(r, e);
+ }
+
+ #[simd_test(enable = "avx512bw")]
+ unsafe fn test_mm512_maskz_srlv_epi16() {
+ let a = _mm512_set1_epi16(1 << 1);
+ let count = _mm512_set1_epi16(2);
+ let r = _mm512_maskz_srlv_epi16(0, a, count);
+ assert_eq_m512i(r, _mm512_setzero_si512());
+ let r = _mm512_maskz_srlv_epi16(0b11111111_11111111_11111111_11111111, a, count);
+ let e = _mm512_set1_epi16(0);
+ assert_eq_m512i(r, e);
+ }
+
+ #[simd_test(enable = "avx512bw")]
+ unsafe fn test_mm512_sra_epi16() {
+ let a = _mm512_set1_epi16(8);
+ let count = _mm_set1_epi16(1);
+ let r = _mm512_sra_epi16(a, count);
+ let e = _mm512_set1_epi16(0);
+ assert_eq_m512i(r, e);
+ }
+
+ #[simd_test(enable = "avx512bw")]
+ unsafe fn test_mm512_mask_sra_epi16() {
+ let a = _mm512_set1_epi16(8);
+ let count = _mm_set1_epi16(1);
+ let r = _mm512_mask_sra_epi32(a, 0, a, count);
+ assert_eq_m512i(r, a);
+ let r = _mm512_mask_sra_epi16(a, 0b11111111_11111111_11111111_11111111, a, count);
+ let e = _mm512_set1_epi16(0);
+ assert_eq_m512i(r, e);
+ }
+
+ #[simd_test(enable = "avx512bw")]
+ unsafe fn test_mm512_maskz_sra_epi16() {
+ let a = _mm512_set1_epi16(8);
+ let count = _mm_set1_epi16(1);
+ let r = _mm512_maskz_sra_epi16(0, a, count);
+ assert_eq_m512i(r, _mm512_setzero_si512());
+ let r = _mm512_maskz_sra_epi16(0b11111111_11111111_11111111_11111111, a, count);
+ let e = _mm512_set1_epi16(0);
+ assert_eq_m512i(r, e);
+ }
+
+ #[simd_test(enable = "avx512bw")]
+ unsafe fn test_mm512_srai_epi16() {
+ let a = _mm512_set1_epi16(8);
+ let r = _mm512_srai_epi16(a, 2);
+ let e = _mm512_set1_epi16(2);
+ assert_eq_m512i(r, e);
+ }
+
+ #[simd_test(enable = "avx512bw")]
+ unsafe fn test_mm512_mask_srai_epi16() {
+ let a = _mm512_set1_epi16(8);
+ let r = _mm512_mask_srai_epi16(a, 0, a, 2);
+ assert_eq_m512i(r, a);
+ let r = _mm512_mask_srai_epi16(a, 0b11111111_11111111_11111111_11111111, a, 2);
+ let e = _mm512_set1_epi16(2);
+ assert_eq_m512i(r, e);
+ }
+
+ #[simd_test(enable = "avx512bw")]
+ unsafe fn test_mm512_maskz_srai_epi16() {
+ let a = _mm512_set1_epi16(8);
+ let r = _mm512_maskz_srai_epi16(0, a, 2);
+ assert_eq_m512i(r, _mm512_setzero_si512());
+ let r = _mm512_maskz_srai_epi16(0b11111111_11111111_11111111_11111111, a, 2);
+ let e = _mm512_set1_epi16(2);
+ assert_eq_m512i(r, e);
+ }
+
+ #[simd_test(enable = "avx512bw")]
+ unsafe fn test_mm512_srav_epi16() {
+ let a = _mm512_set1_epi16(8);
+ let count = _mm512_set1_epi16(2);
+ let r = _mm512_srav_epi16(a, count);
+ let e = _mm512_set1_epi16(2);
+ assert_eq_m512i(r, e);
+ }
+
+ #[simd_test(enable = "avx512bw")]
+ unsafe fn test_mm512_mask_srav_epi16() {
+ let a = _mm512_set1_epi16(8);
+ let count = _mm512_set1_epi16(2);
+ let r = _mm512_mask_srav_epi16(a, 0, a, count);
+ assert_eq_m512i(r, a);
+ let r = _mm512_mask_srav_epi16(a, 0b11111111_11111111_11111111_11111111, a, count);
+ let e = _mm512_set1_epi16(2);
+ assert_eq_m512i(r, e);
+ }
+
+ #[simd_test(enable = "avx512bw")]
+ unsafe fn test_mm512_maskz_srav_epi16() {
+ let a = _mm512_set1_epi16(8);
+ let count = _mm512_set1_epi16(2);
+ let r = _mm512_maskz_srav_epi16(0, a, count);
+ assert_eq_m512i(r, _mm512_setzero_si512());
+ let r = _mm512_maskz_srav_epi16(0b11111111_11111111_11111111_11111111, a, count);
+ let e = _mm512_set1_epi16(2);
+ assert_eq_m512i(r, e);
+ }
+
+ #[simd_test(enable = "avx512bw")]
+ unsafe fn test_mm512_permutex2var_epi16() {
+ #[rustfmt::skip]
+ let a = _mm512_set_epi16(0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15,
+ 16, 17, 18, 19, 20, 21, 22, 23, 24, 25, 26, 27, 28, 29, 30, 31);
+ #[rustfmt::skip]
+ let idx = _mm512_set_epi16(1, 1<<5, 2, 1<<5, 3, 1<<5, 4, 1<<5, 5, 1<<5, 6, 1<<5, 7, 1<<5, 8, 1<<5,
+ 9, 1<<5, 10, 1<<5, 11, 1<<5, 12, 1<<5, 13, 1<<5, 14, 1<<5, 15, 1<<5, 16, 1<<5);
+ let b = _mm512_set1_epi16(100);
+ let r = _mm512_permutex2var_epi16(a, idx, b);
+ #[rustfmt::skip]
+ let e = _mm512_set_epi16(
+ 30, 100, 29, 100, 28, 100, 27, 100, 26, 100, 25, 100, 24, 100, 23, 100,
+ 22, 100, 21, 100, 20, 100, 19, 100, 18, 100, 17, 100, 16, 100, 15, 100,
+ );
+ assert_eq_m512i(r, e);
+ }
+
+ #[simd_test(enable = "avx512bw")]
+ unsafe fn test_mm512_mask_permutex2var_epi16() {
+ #[rustfmt::skip]
+ let a = _mm512_set_epi16(0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15,
+ 16, 17, 18, 19, 20, 21, 22, 23, 24, 25, 26, 27, 28, 29, 30, 31);
+ #[rustfmt::skip]
+ let idx = _mm512_set_epi16(1, 1<<5, 2, 1<<5, 3, 1<<5, 4, 1<<5, 5, 1<<5, 6, 1<<5, 7, 1<<5, 8, 1<<5,
+ 9, 1<<5, 10, 1<<5, 11, 1<<5, 12, 1<<5, 13, 1<<5, 14, 1<<5, 15, 1<<5, 16, 1<<5);
+ let b = _mm512_set1_epi16(100);
+ let r = _mm512_mask_permutex2var_epi16(a, 0, idx, b);
+ assert_eq_m512i(r, a);
+ let r = _mm512_mask_permutex2var_epi16(a, 0b11111111_11111111_11111111_11111111, idx, b);
+ #[rustfmt::skip]
+ let e = _mm512_set_epi16(
+ 30, 100, 29, 100, 28, 100, 27, 100, 26, 100, 25, 100, 24, 100, 23, 100,
+ 22, 100, 21, 100, 20, 100, 19, 100, 18, 100, 17, 100, 16, 100, 15, 100,
+ );
+ assert_eq_m512i(r, e);
+ }
+
+ #[simd_test(enable = "avx512bw")]
+ unsafe fn test_mm512_maskz_permutex2var_epi16() {
+ #[rustfmt::skip]
+ let a = _mm512_set_epi16(0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15,
+ 16, 17, 18, 19, 20, 21, 22, 23, 24, 25, 26, 27, 28, 29, 30, 31);
+ #[rustfmt::skip]
+ let idx = _mm512_set_epi16(1, 1<<5, 2, 1<<5, 3, 1<<5, 4, 1<<5, 5, 1<<5, 6, 1<<5, 7, 1<<5, 8, 1<<5,
+ 9, 1<<5, 10, 1<<5, 11, 1<<5, 12, 1<<5, 13, 1<<5, 14, 1<<5, 15, 1<<5, 16, 1<<5);
+ let b = _mm512_set1_epi16(100);
+ let r = _mm512_maskz_permutex2var_epi16(0, a, idx, b);
+ assert_eq_m512i(r, _mm512_setzero_si512());
+ let r = _mm512_maskz_permutex2var_epi16(0b11111111_11111111_11111111_11111111, a, idx, b);
+ #[rustfmt::skip]
+ let e = _mm512_set_epi16(
+ 30, 100, 29, 100, 28, 100, 27, 100, 26, 100, 25, 100, 24, 100, 23, 100,
+ 22, 100, 21, 100, 20, 100, 19, 100, 18, 100, 17, 100, 16, 100, 15, 100,
+ );
+ assert_eq_m512i(r, e);
+ }
+
+ #[simd_test(enable = "avx512bw")]
+ unsafe fn test_mm512_mask2_permutex2var_epi16() {
+ #[rustfmt::skip]
+ let a = _mm512_set_epi16(0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15,
+ 16, 17, 18, 19, 20, 21, 22, 23, 24, 25, 26, 27, 28, 29, 30, 31);
+ #[rustfmt::skip]
+ let idx = _mm512_set_epi16(1, 1<<5, 2, 1<<5, 3, 1<<5, 4, 1<<5, 5, 1<<5, 6, 1<<5, 7, 1<<5, 8, 1<<5,
+ 9, 1<<5, 10, 1<<5, 11, 1<<5, 12, 1<<5, 13, 1<<5, 14, 1<<5, 15, 1<<5, 16, 1<<5);
+ let b = _mm512_set1_epi16(100);
+ let r = _mm512_mask2_permutex2var_epi16(a, idx, 0, b);
+ assert_eq_m512i(r, idx);
+ let r = _mm512_mask2_permutex2var_epi16(a, idx, 0b11111111_11111111_11111111_11111111, b);
+ #[rustfmt::skip]
+ let e = _mm512_set_epi16(
+ 30, 100, 29, 100, 28, 100, 27, 100, 26, 100, 25, 100, 24, 100, 23, 100,
+ 22, 100, 21, 100, 20, 100, 19, 100, 18, 100, 17, 100, 16, 100, 15, 100,
+ );
+ assert_eq_m512i(r, e);
+ }
+
+ #[simd_test(enable = "avx512bw")]
+ unsafe fn test_mm512_permutexvar_epi16() {
+ let idx = _mm512_set1_epi16(1);
+ #[rustfmt::skip]
+ let a = _mm512_set_epi16(0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15,
+ 16, 17, 18, 19, 20, 21, 22, 23, 24, 25, 26, 27, 28, 29, 30, 31);
+ let r = _mm512_permutexvar_epi16(idx, a);
+ let e = _mm512_set1_epi16(30);
+ assert_eq_m512i(r, e);
+ }
+
+ #[simd_test(enable = "avx512bw")]
+ unsafe fn test_mm512_mask_permutexvar_epi16() {
+ let idx = _mm512_set1_epi16(1);
+ #[rustfmt::skip]
+ let a = _mm512_set_epi16(0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15,
+ 16, 17, 18, 19, 20, 21, 22, 23, 24, 25, 26, 27, 28, 29, 30, 31);
+ let r = _mm512_mask_permutexvar_epi16(a, 0, idx, a);
+ assert_eq_m512i(r, a);
+ let r = _mm512_mask_permutexvar_epi16(a, 0b11111111_11111111_11111111_11111111, idx, a);
+ let e = _mm512_set1_epi16(30);
+ assert_eq_m512i(r, e);
+ }
+
+ #[simd_test(enable = "avx512bw")]
+ unsafe fn test_mm512_maskz_permutexvar_epi16() {
+ let idx = _mm512_set1_epi16(1);
+ #[rustfmt::skip]
+ let a = _mm512_set_epi16(0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15,
+ 16, 17, 18, 19, 20, 21, 22, 23, 24, 25, 26, 27, 28, 29, 30, 31);
+ let r = _mm512_maskz_permutexvar_epi16(0, idx, a);
+ assert_eq_m512i(r, _mm512_setzero_si512());
+ let r = _mm512_maskz_permutexvar_epi16(0b11111111_11111111_11111111_11111111, idx, a);
+ let e = _mm512_set1_epi16(30);
+ assert_eq_m512i(r, e);
+ }
+
+ #[simd_test(enable = "avx512bw")]
+ unsafe fn test_mm512_mask_blend_epi16() {
+ let a = _mm512_set1_epi16(1);
+ let b = _mm512_set1_epi16(2);
+ let r = _mm512_mask_blend_epi16(0b11111111_00000000_11111111_00000000, a, b);
+ #[rustfmt::skip]
+ let e = _mm512_set_epi16(2, 2, 2, 2, 2, 2, 2, 2, 1, 1, 1, 1, 1, 1, 1, 1,
+ 2, 2, 2, 2, 2, 2, 2, 2, 1, 1, 1, 1, 1, 1, 1, 1);
+ assert_eq_m512i(r, e);
+ }
+
+ #[simd_test(enable = "avx512bw")]
+ unsafe fn test_mm512_mask_blend_epi8() {
+ let a = _mm512_set1_epi8(1);
+ let b = _mm512_set1_epi8(2);
+ let r = _mm512_mask_blend_epi8(
+ 0b11111111_00000000_11111111_00000000_11111111_00000000_11111111_00000000,
+ a,
+ b,
+ );
+ #[rustfmt::skip]
+ let e = _mm512_set_epi8(2, 2, 2, 2, 2, 2, 2, 2, 1, 1, 1, 1, 1, 1, 1, 1,
+ 2, 2, 2, 2, 2, 2, 2, 2, 1, 1, 1, 1, 1, 1, 1, 1,
+ 2, 2, 2, 2, 2, 2, 2, 2, 1, 1, 1, 1, 1, 1, 1, 1,
+ 2, 2, 2, 2, 2, 2, 2, 2, 1, 1, 1, 1, 1, 1, 1, 1);
+ assert_eq_m512i(r, e);
+ }
+
+ #[simd_test(enable = "avx512bw")]
+ unsafe fn test_mm512_broadcastw_epi16() {
+ let a = _mm_set_epi16(17, 18, 19, 20, 21, 22, 23, 24);
+ let r = _mm512_broadcastw_epi16(a);
+ let e = _mm512_set1_epi16(24);
+ assert_eq_m512i(r, e);
+ }
+
+ #[simd_test(enable = "avx512bw")]
+ unsafe fn test_mm512_mask_broadcastw_epi16() {
+ let src = _mm512_set1_epi16(1);
+ let a = _mm_set_epi16(17, 18, 19, 20, 21, 22, 23, 24);
+ let r = _mm512_mask_broadcastw_epi16(src, 0, a);
+ assert_eq_m512i(r, src);
+ let r = _mm512_mask_broadcastw_epi16(src, 0b11111111_11111111_11111111_11111111, a);
+ let e = _mm512_set1_epi16(24);
+ assert_eq_m512i(r, e);
+ }
+
+ #[simd_test(enable = "avx512bw")]
+ unsafe fn test_mm512_maskz_broadcastw_epi16() {
+ let a = _mm_set_epi16(17, 18, 19, 20, 21, 22, 23, 24);
+ let r = _mm512_maskz_broadcastw_epi16(0, a);
+ assert_eq_m512i(r, _mm512_setzero_si512());
+ let r = _mm512_maskz_broadcastw_epi16(0b11111111_11111111_11111111_11111111, a);
+ let e = _mm512_set1_epi16(24);
+ assert_eq_m512i(r, e);
+ }
+
+ #[simd_test(enable = "avx512bw")]
+ unsafe fn test_mm512_broadcastb_epi8() {
+ let a = _mm_set_epi8(
+ 17, 18, 19, 20, 21, 22, 23, 24, 25, 26, 27, 28, 29, 30, 31, 32,
+ );
+ let r = _mm512_broadcastb_epi8(a);
+ let e = _mm512_set1_epi8(32);
+ assert_eq_m512i(r, e);
+ }
+
+ #[simd_test(enable = "avx512bw")]
+ unsafe fn test_mm512_mask_broadcastb_epi8() {
+ let src = _mm512_set1_epi8(1);
+ let a = _mm_set_epi8(
+ 17, 18, 19, 20, 21, 22, 23, 24, 25, 26, 27, 28, 29, 30, 31, 32,
+ );
+ let r = _mm512_mask_broadcastb_epi8(src, 0, a);
+ assert_eq_m512i(r, src);
+ let r = _mm512_mask_broadcastb_epi8(
+ src,
+ 0b11111111_11111111_11111111_11111111_11111111_11111111_11111111_11111111,
+ a,
+ );
+ let e = _mm512_set1_epi8(32);
+ assert_eq_m512i(r, e);
+ }
+
+ #[simd_test(enable = "avx512bw")]
+ unsafe fn test_mm512_maskz_broadcastb_epi8() {
+ let a = _mm_set_epi8(
+ 17, 18, 19, 20, 21, 22, 23, 24, 25, 26, 27, 28, 29, 30, 31, 32,
+ );
+ let r = _mm512_maskz_broadcastb_epi8(0, a);
+ assert_eq_m512i(r, _mm512_setzero_si512());
+ let r = _mm512_maskz_broadcastb_epi8(
+ 0b11111111_11111111_11111111_11111111_11111111_11111111_11111111_11111111,
+ a,
+ );
+ let e = _mm512_set1_epi8(32);
+ assert_eq_m512i(r, e);
+ }
+
+ #[simd_test(enable = "avx512bw")]
+ unsafe fn test_mm512_unpackhi_epi16() {
+ #[rustfmt::skip]
+ let a = _mm512_set_epi16(1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16,
+ 17, 18, 19, 20, 21, 22, 23, 24, 25, 26, 27, 28, 29, 30, 31, 32);
+ #[rustfmt::skip]
+ let b = _mm512_set_epi16(33, 34, 35, 36, 37, 38, 39, 40, 41, 42, 43, 44, 45, 46, 47, 48,
+ 49, 50, 51, 52, 53, 54, 55, 56, 57, 58, 59, 60, 61, 62, 63, 64);
+ let r = _mm512_unpackhi_epi16(a, b);
+ #[rustfmt::skip]
+ let e = _mm512_set_epi16(33, 1, 34, 2, 35, 3, 36, 4, 41, 9, 42, 10, 43, 11, 44, 12,
+ 49, 17, 50, 18, 51, 19, 52, 20, 57, 25, 58, 26, 59, 27, 60, 28);
+ assert_eq_m512i(r, e);
+ }
+
+ #[simd_test(enable = "avx512bw")]
+ unsafe fn test_mm512_mask_unpackhi_epi16() {
+ #[rustfmt::skip]
+ let a = _mm512_set_epi16(1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16,
+ 17, 18, 19, 20, 21, 22, 23, 24, 25, 26, 27, 28, 29, 30, 31, 32);
+ #[rustfmt::skip]
+ let b = _mm512_set_epi16(33, 34, 35, 36, 37, 38, 39, 40, 41, 42, 43, 44, 45, 46, 47, 48,
+ 49, 50, 51, 52, 53, 54, 55, 56, 57, 58, 59, 60, 61, 62, 63, 64);
+ let r = _mm512_mask_unpackhi_epi16(a, 0, a, b);
+ assert_eq_m512i(r, a);
+ let r = _mm512_mask_unpackhi_epi16(a, 0b11111111_11111111_11111111_11111111, a, b);
+ #[rustfmt::skip]
+ let e = _mm512_set_epi16(33, 1, 34, 2, 35, 3, 36, 4, 41, 9, 42, 10, 43, 11, 44, 12,
+ 49, 17, 50, 18, 51, 19, 52, 20, 57, 25, 58, 26, 59, 27, 60, 28);
+ assert_eq_m512i(r, e);
+ }
+
+ #[simd_test(enable = "avx512bw")]
+ unsafe fn test_mm512_maskz_unpackhi_epi16() {
+ #[rustfmt::skip]
+ let a = _mm512_set_epi16(1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16,
+ 17, 18, 19, 20, 21, 22, 23, 24, 25, 26, 27, 28, 29, 30, 31, 32);
+ #[rustfmt::skip]
+ let b = _mm512_set_epi16(33, 34, 35, 36, 37, 38, 39, 40, 41, 42, 43, 44, 45, 46, 47, 48,
+ 49, 50, 51, 52, 53, 54, 55, 56, 57, 58, 59, 60, 61, 62, 63, 64);
+ let r = _mm512_maskz_unpackhi_epi16(0, a, b);
+ assert_eq_m512i(r, _mm512_setzero_si512());
+ let r = _mm512_maskz_unpackhi_epi16(0b11111111_11111111_11111111_11111111, a, b);
+ #[rustfmt::skip]
+ let e = _mm512_set_epi16(33, 1, 34, 2, 35, 3, 36, 4, 41, 9, 42, 10, 43, 11, 44, 12,
+ 49, 17, 50, 18, 51, 19, 52, 20, 57, 25, 58, 26, 59, 27, 60, 28);
+ assert_eq_m512i(r, e);
+ }
+
+ #[simd_test(enable = "avx512bw")]
+ unsafe fn test_mm512_unpackhi_epi8() {
+ #[rustfmt::skip]
+ let a = _mm512_set_epi8(1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16,
+ 17, 18, 19, 20, 21, 22, 23, 24, 25, 26, 27, 28, 29, 30, 31, 32,
+ 33, 34, 35, 36, 37, 38, 39, 40, 41, 42, 43, 44, 45, 46, 47, 48,
+ 49, 50, 51, 52, 53, 54, 55, 56, 57, 58, 59, 60, 61, 62, 63, 64);
+ #[rustfmt::skip]
+ let b = _mm512_set_epi8(65, 66, 67, 68, 69, 70, 71, 72, 73, 74, 75, 76, 77, 78, 79, 80,
+ 81, 82, 83, 84, 85, 86, 87, 88, 89, 90, 91, 92, 93, 94, 95, 96,
+ 97, 98, 99, 100, 101, 102, 103, 104, 105, 106, 107, 108, 109, 110, 111, 112,
+ 113, 114, 115, 116, 117, 118, 119, 120, 121, 122, 123, 124, 125, 126, 127, 0);
+ let r = _mm512_unpackhi_epi8(a, b);
+ #[rustfmt::skip]
+ let e = _mm512_set_epi8(65, 1, 66, 2, 67, 3, 68, 4, 69, 5, 70, 6, 71, 7, 72, 8,
+ 81, 17, 82, 18, 83, 19, 84, 20, 85, 21, 86, 22, 87, 23, 88, 24,
+ 97, 33, 98, 34, 99, 35, 100, 36, 101, 37, 102, 38, 103, 39, 104, 40,
+ 113, 49, 114, 50, 115, 51, 116, 52, 117, 53, 118, 54, 119, 55, 120, 56);
+ assert_eq_m512i(r, e);
+ }
+
+ #[simd_test(enable = "avx512bw")]
+ unsafe fn test_mm512_mask_unpackhi_epi8() {
+ #[rustfmt::skip]
+ let a = _mm512_set_epi8(1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16,
+ 17, 18, 19, 20, 21, 22, 23, 24, 25, 26, 27, 28, 29, 30, 31, 32,
+ 33, 34, 35, 36, 37, 38, 39, 40, 41, 42, 43, 44, 45, 46, 47, 48,
+ 49, 50, 51, 52, 53, 54, 55, 56, 57, 58, 59, 60, 61, 62, 63, 64);
+ #[rustfmt::skip]
+ let b = _mm512_set_epi8(65, 66, 67, 68, 69, 70, 71, 72, 73, 74, 75, 76, 77, 78, 79, 80,
+ 81, 82, 83, 84, 85, 86, 87, 88, 89, 90, 91, 92, 93, 94, 95, 96,
+ 97, 98, 99, 100, 101, 102, 103, 104, 105, 106, 107, 108, 109, 110, 111, 112,
+ 113, 114, 115, 116, 117, 118, 119, 120, 121, 122, 123, 124, 125, 126, 127, 0);
+ let r = _mm512_mask_unpackhi_epi8(a, 0, a, b);
+ assert_eq_m512i(r, a);
+ let r = _mm512_mask_unpackhi_epi8(
+ a,
+ 0b_11111111_11111111_11111111_11111111_11111111_11111111_11111111_11111111,
+ a,
+ b,
+ );
+ #[rustfmt::skip]
+ let e = _mm512_set_epi8(65, 1, 66, 2, 67, 3, 68, 4, 69, 5, 70, 6, 71, 7, 72, 8,
+ 81, 17, 82, 18, 83, 19, 84, 20, 85, 21, 86, 22, 87, 23, 88, 24,
+ 97, 33, 98, 34, 99, 35, 100, 36, 101, 37, 102, 38, 103, 39, 104, 40,
+ 113, 49, 114, 50, 115, 51, 116, 52, 117, 53, 118, 54, 119, 55, 120, 56);
+ assert_eq_m512i(r, e);
+ }
+
+ #[simd_test(enable = "avx512bw")]
+ unsafe fn test_mm512_maskz_unpackhi_epi8() {
+ #[rustfmt::skip]
+ let a = _mm512_set_epi8(1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16,
+ 17, 18, 19, 20, 21, 22, 23, 24, 25, 26, 27, 28, 29, 30, 31, 32,
+ 33, 34, 35, 36, 37, 38, 39, 40, 41, 42, 43, 44, 45, 46, 47, 48,
+ 49, 50, 51, 52, 53, 54, 55, 56, 57, 58, 59, 60, 61, 62, 63, 64);
+ #[rustfmt::skip]
+ let b = _mm512_set_epi8(65, 66, 67, 68, 69, 70, 71, 72, 73, 74, 75, 76, 77, 78, 79, 80,
+ 81, 82, 83, 84, 85, 86, 87, 88, 89, 90, 91, 92, 93, 94, 95, 96,
+ 97, 98, 99, 100, 101, 102, 103, 104, 105, 106, 107, 108, 109, 110, 111, 112,
+ 113, 114, 115, 116, 117, 118, 119, 120, 121, 122, 123, 124, 125, 126, 127, 0);
+ let r = _mm512_maskz_unpackhi_epi8(0, a, b);
+ assert_eq_m512i(r, _mm512_setzero_si512());
+ let r = _mm512_maskz_unpackhi_epi8(
+ 0b_11111111_11111111_11111111_11111111_11111111_11111111_11111111_11111111,
+ a,
+ b,
+ );
+ #[rustfmt::skip]
+ let e = _mm512_set_epi8(65, 1, 66, 2, 67, 3, 68, 4, 69, 5, 70, 6, 71, 7, 72, 8,
+ 81, 17, 82, 18, 83, 19, 84, 20, 85, 21, 86, 22, 87, 23, 88, 24,
+ 97, 33, 98, 34, 99, 35, 100, 36, 101, 37, 102, 38, 103, 39, 104, 40,
+ 113, 49, 114, 50, 115, 51, 116, 52, 117, 53, 118, 54, 119, 55, 120, 56);
+ assert_eq_m512i(r, e);
+ }
+
+ #[simd_test(enable = "avx512bw")]
+ unsafe fn test_mm512_unpacklo_epi16() {
+ #[rustfmt::skip]
+ let a = _mm512_set_epi16(1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16,
+ 17, 18, 19, 20, 21, 22, 23, 24, 25, 26, 27, 28, 29, 30, 31, 32);
+ #[rustfmt::skip]
+ let b = _mm512_set_epi16(33, 34, 35, 36, 37, 38, 39, 40, 41, 42, 43, 44, 45, 46, 47, 48,
+ 49, 50, 51, 52, 53, 54, 55, 56, 57, 58, 59, 60, 61, 62, 63, 64);
+ let r = _mm512_unpacklo_epi16(a, b);
+ #[rustfmt::skip]
+ let e = _mm512_set_epi16(37, 5, 38, 6, 39, 7, 40, 8, 45, 13, 46, 14, 47, 15, 48, 16,
+ 53, 21, 54, 22, 55, 23, 56, 24, 61, 29, 62, 30, 63, 31, 64, 32);
+ assert_eq_m512i(r, e);
+ }
+
+ #[simd_test(enable = "avx512bw")]
+ unsafe fn test_mm512_mask_unpacklo_epi16() {
+ #[rustfmt::skip]
+ let a = _mm512_set_epi16(1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16,
+ 17, 18, 19, 20, 21, 22, 23, 24, 25, 26, 27, 28, 29, 30, 31, 32);
+ #[rustfmt::skip]
+ let b = _mm512_set_epi16(33, 34, 35, 36, 37, 38, 39, 40, 41, 42, 43, 44, 45, 46, 47, 48,
+ 49, 50, 51, 52, 53, 54, 55, 56, 57, 58, 59, 60, 61, 62, 63, 64);
+ let r = _mm512_mask_unpacklo_epi16(a, 0, a, b);
+ assert_eq_m512i(r, a);
+ let r = _mm512_mask_unpacklo_epi16(a, 0b11111111_11111111_11111111_11111111, a, b);
+ #[rustfmt::skip]
+ let e = _mm512_set_epi16(37, 5, 38, 6, 39, 7, 40, 8, 45, 13, 46, 14, 47, 15, 48, 16,
+ 53, 21, 54, 22, 55, 23, 56, 24, 61, 29, 62, 30, 63, 31, 64, 32);
+ assert_eq_m512i(r, e);
+ }
+
+ #[simd_test(enable = "avx512bw")]
+ unsafe fn test_mm512_maskz_unpacklo_epi16() {
+ #[rustfmt::skip]
+ let a = _mm512_set_epi16(1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16,
+ 17, 18, 19, 20, 21, 22, 23, 24, 25, 26, 27, 28, 29, 30, 31, 32);
+ #[rustfmt::skip]
+ let b = _mm512_set_epi16(33, 34, 35, 36, 37, 38, 39, 40, 41, 42, 43, 44, 45, 46, 47, 48,
+ 49, 50, 51, 52, 53, 54, 55, 56, 57, 58, 59, 60, 61, 62, 63, 64);
+ let r = _mm512_maskz_unpacklo_epi16(0, a, b);
+ assert_eq_m512i(r, _mm512_setzero_si512());
+ let r = _mm512_maskz_unpacklo_epi16(0b11111111_11111111_11111111_11111111, a, b);
+ #[rustfmt::skip]
+ let e = _mm512_set_epi16(37, 5, 38, 6, 39, 7, 40, 8, 45, 13, 46, 14, 47, 15, 48, 16,
+ 53, 21, 54, 22, 55, 23, 56, 24, 61, 29, 62, 30, 63, 31, 64, 32);
+ assert_eq_m512i(r, e);
+ }
+
+ #[simd_test(enable = "avx512bw")]
+ unsafe fn test_mm512_unpacklo_epi8() {
+ #[rustfmt::skip]
+ let a = _mm512_set_epi8(1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16,
+ 17, 18, 19, 20, 21, 22, 23, 24, 25, 26, 27, 28, 29, 30, 31, 32,
+ 33, 34, 35, 36, 37, 38, 39, 40, 41, 42, 43, 44, 45, 46, 47, 48,
+ 49, 50, 51, 52, 53, 54, 55, 56, 57, 58, 59, 60, 61, 62, 63, 64);
+ #[rustfmt::skip]
+ let b = _mm512_set_epi8(65, 66, 67, 68, 69, 70, 71, 72, 73, 74, 75, 76, 77, 78, 79, 80,
+ 81, 82, 83, 84, 85, 86, 87, 88, 89, 90, 91, 92, 93, 94, 95, 96,
+ 97, 98, 99, 100, 101, 102, 103, 104, 105, 106, 107, 108, 109, 110, 111, 112,
+ 113, 114, 115, 116, 117, 118, 119, 120, 121, 122, 123, 124, 125, 126, 127, 0);
+ let r = _mm512_unpacklo_epi8(a, b);
+ #[rustfmt::skip]
+ let e = _mm512_set_epi8(73, 9, 74, 10, 75, 11, 76, 12, 77, 13, 78, 14, 79, 15, 80, 16,
+ 89, 25, 90, 26, 91, 27, 92, 28, 93, 29, 94, 30, 95, 31, 96, 32,
+ 105, 41, 106, 42, 107, 43, 108, 44, 109, 45, 110, 46, 111, 47, 112, 48,
+ 121, 57, 122, 58, 123, 59, 124, 60, 125, 61, 126, 62, 127, 63, 0, 64);
+ assert_eq_m512i(r, e);
+ }
+
+ #[simd_test(enable = "avx512bw")]
+ unsafe fn test_mm512_mask_unpacklo_epi8() {
+ #[rustfmt::skip]
+ let a = _mm512_set_epi8(1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16,
+ 17, 18, 19, 20, 21, 22, 23, 24, 25, 26, 27, 28, 29, 30, 31, 32,
+ 33, 34, 35, 36, 37, 38, 39, 40, 41, 42, 43, 44, 45, 46, 47, 48,
+ 49, 50, 51, 52, 53, 54, 55, 56, 57, 58, 59, 60, 61, 62, 63, 64);
+ #[rustfmt::skip]
+ let b = _mm512_set_epi8(65, 66, 67, 68, 69, 70, 71, 72, 73, 74, 75, 76, 77, 78, 79, 80,
+ 81, 82, 83, 84, 85, 86, 87, 88, 89, 90, 91, 92, 93, 94, 95, 96,
+ 97, 98, 99, 100, 101, 102, 103, 104, 105, 106, 107, 108, 109, 110, 111, 112,
+ 113, 114, 115, 116, 117, 118, 119, 120, 121, 122, 123, 124, 125, 126, 127, 0);
+ let r = _mm512_mask_unpacklo_epi8(a, 0, a, b);
+ assert_eq_m512i(r, a);
+ let r = _mm512_mask_unpacklo_epi8(
+ a,
+ 0b_11111111_11111111_11111111_11111111_11111111_11111111_11111111_11111111,
+ a,
+ b,
+ );
+ #[rustfmt::skip]
+ let e = _mm512_set_epi8(73, 9, 74, 10, 75, 11, 76, 12, 77, 13, 78, 14, 79, 15, 80, 16,
+ 89, 25, 90, 26, 91, 27, 92, 28, 93, 29, 94, 30, 95, 31, 96, 32,
+ 105, 41, 106, 42, 107, 43, 108, 44, 109, 45, 110, 46, 111, 47, 112, 48,
+ 121, 57, 122, 58, 123, 59, 124, 60, 125, 61, 126, 62, 127, 63, 0, 64);
+ assert_eq_m512i(r, e);
+ }
+
+ #[simd_test(enable = "avx512bw")]
+ unsafe fn test_mm512_maskz_unpacklo_epi8() {
+ #[rustfmt::skip]
+ let a = _mm512_set_epi8(1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16,
+ 17, 18, 19, 20, 21, 22, 23, 24, 25, 26, 27, 28, 29, 30, 31, 32,
+ 33, 34, 35, 36, 37, 38, 39, 40, 41, 42, 43, 44, 45, 46, 47, 48,
+ 49, 50, 51, 52, 53, 54, 55, 56, 57, 58, 59, 60, 61, 62, 63, 64);
+ #[rustfmt::skip]
+ let b = _mm512_set_epi8(65, 66, 67, 68, 69, 70, 71, 72, 73, 74, 75, 76, 77, 78, 79, 80,
+ 81, 82, 83, 84, 85, 86, 87, 88, 89, 90, 91, 92, 93, 94, 95, 96,
+ 97, 98, 99, 100, 101, 102, 103, 104, 105, 106, 107, 108, 109, 110, 111, 112,
+ 113, 114, 115, 116, 117, 118, 119, 120, 121, 122, 123, 124, 125, 126, 127, 0);
+ let r = _mm512_maskz_unpacklo_epi8(0, a, b);
+ assert_eq_m512i(r, _mm512_setzero_si512());
+ let r = _mm512_maskz_unpacklo_epi8(
+ 0b_11111111_11111111_11111111_11111111_11111111_11111111_11111111_11111111,
+ a,
+ b,
+ );
+ #[rustfmt::skip]
+ let e = _mm512_set_epi8(73, 9, 74, 10, 75, 11, 76, 12, 77, 13, 78, 14, 79, 15, 80, 16,
+ 89, 25, 90, 26, 91, 27, 92, 28, 93, 29, 94, 30, 95, 31, 96, 32,
+ 105, 41, 106, 42, 107, 43, 108, 44, 109, 45, 110, 46, 111, 47, 112, 48,
+ 121, 57, 122, 58, 123, 59, 124, 60, 125, 61, 126, 62, 127, 63, 0, 64);
+ assert_eq_m512i(r, e);
+ }
+
+ #[simd_test(enable = "avx512bw")]
+ unsafe fn test_mm512_mask_mov_epi16() {
+ let src = _mm512_set1_epi16(1);
+ let a = _mm512_set1_epi16(2);
+ let r = _mm512_mask_mov_epi16(src, 0, a);
+ assert_eq_m512i(r, src);
+ let r = _mm512_mask_mov_epi16(src, 0b11111111_11111111_11111111_11111111, a);
+ assert_eq_m512i(r, a);
+ }
+
+ #[simd_test(enable = "avx512bw")]
+ unsafe fn test_mm512_maskz_mov_epi16() {
+ let a = _mm512_set1_epi16(2);
+ let r = _mm512_maskz_mov_epi16(0, a);
+ assert_eq_m512i(r, _mm512_setzero_si512());
+ let r = _mm512_maskz_mov_epi16(0b11111111_11111111_11111111_11111111, a);
+ assert_eq_m512i(r, a);
+ }
+
+ #[simd_test(enable = "avx512bw")]
+ unsafe fn test_mm512_mask_mov_epi8() {
+ let src = _mm512_set1_epi8(1);
+ let a = _mm512_set1_epi8(2);
+ let r = _mm512_mask_mov_epi8(src, 0, a);
+ assert_eq_m512i(r, src);
+ let r = _mm512_mask_mov_epi8(
+ src,
+ 0b11111111_11111111_11111111_11111111_11111111_11111111_11111111_11111111,
+ a,
+ );
+ assert_eq_m512i(r, a);
+ }
+
+ #[simd_test(enable = "avx512bw")]
+ unsafe fn test_mm512_maskz_mov_epi8() {
+ let a = _mm512_set1_epi8(2);
+ let r = _mm512_maskz_mov_epi8(0, a);
+ assert_eq_m512i(r, _mm512_setzero_si512());
+ let r = _mm512_maskz_mov_epi8(
+ 0b11111111_11111111_11111111_11111111_11111111_11111111_11111111_11111111,
+ a,
+ );
+ assert_eq_m512i(r, a);
+ }
+
+ #[simd_test(enable = "avx512bw")]
+ unsafe fn test_mm512_mask_set1_epi16() {
+ let src = _mm512_set1_epi16(2);
+ let a: i16 = 11;
+ let r = _mm512_mask_set1_epi16(src, 0, a);
+ assert_eq_m512i(r, src);
+ let r = _mm512_mask_set1_epi16(src, 0b11111111_11111111_11111111_11111111, a);
+ let e = _mm512_set1_epi16(11);
+ assert_eq_m512i(r, e);
+ }
+
+ #[simd_test(enable = "avx512bw")]
+ unsafe fn test_mm512_maskz_set1_epi16() {
+ let a: i16 = 11;
+ let r = _mm512_maskz_set1_epi16(0, a);
+ assert_eq_m512i(r, _mm512_setzero_si512());
+ let r = _mm512_maskz_set1_epi16(0b11111111_11111111_11111111_11111111, a);
+ let e = _mm512_set1_epi16(11);
+ assert_eq_m512i(r, e);
+ }
+
+ #[simd_test(enable = "avx512bw")]
+ unsafe fn test_mm512_mask_set1_epi8() {
+ let src = _mm512_set1_epi8(2);
+ let a: i8 = 11;
+ let r = _mm512_mask_set1_epi8(src, 0, a);
+ assert_eq_m512i(r, src);
+ let r = _mm512_mask_set1_epi8(
+ src,
+ 0b11111111_11111111_11111111_11111111_11111111_11111111_11111111_11111111,
+ a,
+ );
+ let e = _mm512_set1_epi8(11);
+ assert_eq_m512i(r, e);
+ }
+
+ #[simd_test(enable = "avx512bw")]
+ unsafe fn test_mm512_maskz_set1_epi8() {
+ let a: i8 = 11;
+ let r = _mm512_maskz_set1_epi8(0, a);
+ assert_eq_m512i(r, _mm512_setzero_si512());
+ let r = _mm512_maskz_set1_epi8(
+ 0b11111111_11111111_11111111_11111111_11111111_11111111_11111111_11111111,
+ a,
+ );
+ let e = _mm512_set1_epi8(11);
+ assert_eq_m512i(r, e);
+ }
+
+ #[simd_test(enable = "avx512bw")]
+ unsafe fn test_mm512_shufflelo_epi16() {
+ #[rustfmt::skip]
+ let a = _mm512_set_epi16(
+ 0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15,
+ 16, 17, 18, 19, 20, 21, 22, 23, 24, 25, 26, 27, 28, 29, 30, 31,
+ );
+ #[rustfmt::skip]
+ let e = _mm512_set_epi16(
+ 0, 1, 2, 3, 7, 6, 6, 4, 8, 9, 10, 11, 15, 14, 14, 12,
+ 16, 17, 18, 19, 23, 22, 22, 20, 24, 25, 26, 27, 31, 30, 30, 28,
+ );
+ let r = _mm512_shufflelo_epi16(a, 0b00_01_01_11);
+ assert_eq_m512i(r, e);
+ }
+
+ #[simd_test(enable = "avx512bw")]
+ unsafe fn test_mm512_mask_shufflelo_epi16() {
+ #[rustfmt::skip]
+ let a = _mm512_set_epi16(
+ 0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15,
+ 16, 17, 18, 19, 20, 21, 22, 23, 24, 25, 26, 27, 28, 29, 30, 31,
+ );
+ let r = _mm512_mask_shufflelo_epi16(a, 0, a, 0b00_01_01_11);
+ assert_eq_m512i(r, a);
+ let r =
+ _mm512_mask_shufflelo_epi16(a, 0b11111111_11111111_11111111_11111111, a, 0b00_01_01_11);
+ #[rustfmt::skip]
+ let e = _mm512_set_epi16(
+ 0, 1, 2, 3, 7, 6, 6, 4, 8, 9, 10, 11, 15, 14, 14, 12,
+ 16, 17, 18, 19, 23, 22, 22, 20, 24, 25, 26, 27, 31, 30, 30, 28,
+ );
+ assert_eq_m512i(r, e);
+ }
+
+ #[simd_test(enable = "avx512bw")]
+ unsafe fn test_mm512_maskz_shufflelo_epi16() {
+ #[rustfmt::skip]
+ let a = _mm512_set_epi16(
+ 0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15,
+ 16, 17, 18, 19, 20, 21, 22, 23, 24, 25, 26, 27, 28, 29, 30, 31,
+ );
+ let r = _mm512_maskz_shufflelo_epi16(0, a, 0b00_01_01_11);
+ assert_eq_m512i(r, _mm512_setzero_si512());
+ let r =
+ _mm512_maskz_shufflelo_epi16(0b11111111_11111111_11111111_11111111, a, 0b00_01_01_11);
+ #[rustfmt::skip]
+ let e = _mm512_set_epi16(
+ 0, 1, 2, 3, 7, 6, 6, 4, 8, 9, 10, 11, 15, 14, 14, 12,
+ 16, 17, 18, 19, 23, 22, 22, 20, 24, 25, 26, 27, 31, 30, 30, 28,
+ );
+ assert_eq_m512i(r, e);
+ }
+
+ #[simd_test(enable = "avx512bw")]
+ unsafe fn test_mm512_shufflehi_epi16() {
+ #[rustfmt::skip]
+ let a = _mm512_set_epi16(
+ 0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15,
+ 16, 17, 18, 19, 20, 21, 22, 23, 24, 25, 26, 27, 28, 29, 30, 31,
+ );
+ #[rustfmt::skip]
+ let e = _mm512_set_epi16(
+ 3, 2, 2, 0, 4, 5, 6, 7, 11, 10, 10, 8, 12, 13, 14, 15,
+ 19, 18, 18, 16, 20, 21, 22, 23, 27, 26, 26, 24, 28, 29, 30, 31,
+ );
+ let r = _mm512_shufflehi_epi16(a, 0b00_01_01_11);
+ assert_eq_m512i(r, e);
+ }
+
+ #[simd_test(enable = "avx512bw")]
+ unsafe fn test_mm512_mask_shufflehi_epi16() {
+ #[rustfmt::skip]
+ let a = _mm512_set_epi16(
+ 0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15,
+ 16, 17, 18, 19, 20, 21, 22, 23, 24, 25, 26, 27, 28, 29, 30, 31,
+ );
+ let r = _mm512_mask_shufflehi_epi16(a, 0, a, 0b00_01_01_11);
+ assert_eq_m512i(r, a);
+ let r =
+ _mm512_mask_shufflehi_epi16(a, 0b11111111_11111111_11111111_11111111, a, 0b00_01_01_11);
+ #[rustfmt::skip]
+ let e = _mm512_set_epi16(
+ 3, 2, 2, 0, 4, 5, 6, 7, 11, 10, 10, 8, 12, 13, 14, 15,
+ 19, 18, 18, 16, 20, 21, 22, 23, 27, 26, 26, 24, 28, 29, 30, 31,
+ );
+ assert_eq_m512i(r, e);
+ }
+
+ #[simd_test(enable = "avx512bw")]
+ unsafe fn test_mm512_maskz_shufflehi_epi16() {
+ #[rustfmt::skip]
+ let a = _mm512_set_epi16(
+ 0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15,
+ 16, 17, 18, 19, 20, 21, 22, 23, 24, 25, 26, 27, 28, 29, 30, 31,
+ );
+ let r = _mm512_maskz_shufflehi_epi16(0, a, 0b00_01_01_11);
+ assert_eq_m512i(r, _mm512_setzero_si512());
+ let r =
+ _mm512_maskz_shufflehi_epi16(0b11111111_11111111_11111111_11111111, a, 0b00_01_01_11);
+ #[rustfmt::skip]
+ let e = _mm512_set_epi16(
+ 3, 2, 2, 0, 4, 5, 6, 7, 11, 10, 10, 8, 12, 13, 14, 15,
+ 19, 18, 18, 16, 20, 21, 22, 23, 27, 26, 26, 24, 28, 29, 30, 31,
+ );
+ assert_eq_m512i(r, e);
+ }
+}
diff --git a/library/stdarch/crates/core_arch/src/x86/avx512f.rs b/library/stdarch/crates/core_arch/src/x86/avx512f.rs
index 11b83c537961..1d05e1a68404 100644
--- a/library/stdarch/crates/core_arch/src/x86/avx512f.rs
+++ b/library/stdarch/crates/core_arch/src/x86/avx512f.rs
@@ -9033,10 +9033,10 @@ pub unsafe fn _mm512_set_epi8(
e0: i8,
) -> __m512i {
let r = i8x64(
- e63, e62, e61, e60, e59, e58, e57, e56, e55, e54, e53, e52, e51, e50, e49, e48, e47, e46,
- e45, e44, e43, e42, e41, e40, e39, e38, e37, e36, e35, e34, e33, e32, e31, e30, e29, e28,
- e27, e26, e25, e24, e23, e22, e21, e20, e19, e18, e17, e16, e15, e14, e13, e12, e11, e10,
- e9, e8, e7, e6, e5, e4, e3, e2, e1, e0,
+ e0, e1, e2, e3, e4, e5, e6, e7, e8, e9, e10, e11, e12, e13, e14, e15, e16, e17, e18, e19,
+ e20, e21, e22, e23, e24, e25, e26, e27, e28, e29, e30, e31, e32, e33, e34, e35, e36, e37,
+ e38, e39, e40, e41, e42, e43, e44, e45, e46, e47, e48, e49, e50, e51, e52, e53, e54, e55,
+ e56, e57, e58, e59, e60, e61, e62, e63,
);
transmute(r)
}
@@ -9081,8 +9081,8 @@ pub unsafe fn _mm512_set_epi16(
e0: i16,
) -> __m512i {
let r = i16x32(
- e31, e30, e29, e28, e27, e26, e25, e24, e23, e22, e21, e20, e19, e18, e17, e16, e15, e14,
- e13, e12, e11, e10, e9, e8, e7, e6, e5, e4, e3, e2, e1, e0,
+ e0, e1, e2, e3, e4, e5, e6, e7, e8, e9, e10, e11, e12, e13, e14, e15, e16, e17, e18, e19,
+ e20, e21, e22, e23, e24, e25, e26, e27, e28, e29, e30, e31,
);
transmute(r)
}
diff --git a/library/stdarch/crates/core_arch/src/x86/mod.rs b/library/stdarch/crates/core_arch/src/x86/mod.rs
index a6878d527da1..75b4ab2662cd 100644
--- a/library/stdarch/crates/core_arch/src/x86/mod.rs
+++ b/library/stdarch/crates/core_arch/src/x86/mod.rs
@@ -298,6 +298,14 @@ types! {
pub struct __m512d(f64, f64, f64, f64, f64, f64, f64, f64);
}
+/// The `__mmask64` type used in AVX-512 intrinsics, a 64-bit integer
+#[allow(non_camel_case_types)]
+pub type __mmask64 = u64;
+
+/// The `__mmask32` type used in AVX-512 intrinsics, a 32-bit integer
+#[allow(non_camel_case_types)]
+pub type __mmask32 = u32;
+
/// The `__mmask16` type used in AVX-512 intrinsics, a 16-bit integer
#[allow(non_camel_case_types)]
pub type __mmask16 = u16;
@@ -492,11 +500,21 @@ impl m256Ext for __m256 {
pub(crate) trait m512iExt: Sized {
fn as_m512i(self) -> __m512i;
+ #[inline]
+ fn as_u8x64(self) -> crate::core_arch::simd::u8x64 {
+ unsafe { transmute(self.as_m512i()) }
+ }
+
#[inline]
fn as_i8x64(self) -> crate::core_arch::simd::i8x64 {
unsafe { transmute(self.as_m512i()) }
}
+ #[inline]
+ fn as_u16x32(self) -> crate::core_arch::simd::u16x32 {
+ unsafe { transmute(self.as_m512i()) }
+ }
+
#[inline]
fn as_i16x32(self) -> crate::core_arch::simd::i16x32 {
unsafe { transmute(self.as_m512i()) }
@@ -648,6 +666,9 @@ pub unsafe fn ud2() -> ! {
mod avx512f;
pub use self::avx512f::*;
+mod avx512bw;
+pub use self::avx512bw::*;
+
mod avx512ifma;
pub use self::avx512ifma::*;
diff --git a/library/stdarch/crates/stdarch-verify/src/lib.rs b/library/stdarch/crates/stdarch-verify/src/lib.rs
index 412e7f6c8b5d..acf22d19da02 100644
--- a/library/stdarch/crates/stdarch-verify/src/lib.rs
+++ b/library/stdarch/crates/stdarch-verify/src/lib.rs
@@ -147,6 +147,8 @@ fn to_type(t: &syn::Type) -> proc_macro2::TokenStream {
"__m512i" => quote! { &M512I },
"__mmask8" => quote! { &MMASK8 },
"__mmask16" => quote! { &MMASK16 },
+ "__mmask32" => quote! { &MMASK32 },
+ "__mmask64" => quote! { &MMASK64 },
"_MM_CMPINT_ENUM" => quote! { &MM_CMPINT_ENUM },
"_MM_MANTISSA_NORM_ENUM" => quote! { &MM_MANTISSA_NORM_ENUM },
"_MM_MANTISSA_SIGN_ENUM" => quote! { &MM_MANTISSA_SIGN_ENUM },
diff --git a/library/stdarch/crates/stdarch-verify/tests/x86-intel.rs b/library/stdarch/crates/stdarch-verify/tests/x86-intel.rs
index 3f01a699ddd1..20041997abc8 100644
--- a/library/stdarch/crates/stdarch-verify/tests/x86-intel.rs
+++ b/library/stdarch/crates/stdarch-verify/tests/x86-intel.rs
@@ -32,15 +32,15 @@ struct Function {
static F32: Type = Type::PrimFloat(32);
static F64: Type = Type::PrimFloat(64);
+static I8: Type = Type::PrimSigned(8);
static I16: Type = Type::PrimSigned(16);
static I32: Type = Type::PrimSigned(32);
static I64: Type = Type::PrimSigned(64);
-static I8: Type = Type::PrimSigned(8);
+static U8: Type = Type::PrimUnsigned(8);
static U16: Type = Type::PrimUnsigned(16);
static U32: Type = Type::PrimUnsigned(32);
static U64: Type = Type::PrimUnsigned(64);
static U128: Type = Type::PrimUnsigned(128);
-static U8: Type = Type::PrimUnsigned(8);
static ORDERING: Type = Type::Ordering;
static M64: Type = Type::M64;
@@ -55,6 +55,8 @@ static M512I: Type = Type::M512I;
static M512D: Type = Type::M512D;
static MMASK8: Type = Type::MMASK8;
static MMASK16: Type = Type::MMASK16;
+static MMASK32: Type = Type::MMASK32;
+static MMASK64: Type = Type::MMASK64;
static MM_CMPINT_ENUM: Type = Type::MM_CMPINT_ENUM;
static MM_MANTISSA_NORM_ENUM: Type = Type::MM_MANTISSA_NORM_ENUM;
static MM_MANTISSA_SIGN_ENUM: Type = Type::MM_MANTISSA_SIGN_ENUM;
@@ -83,6 +85,8 @@ enum Type {
M512I,
MMASK8,
MMASK16,
+ MMASK32,
+ MMASK64,
MM_CMPINT_ENUM,
MM_MANTISSA_NORM_ENUM,
MM_MANTISSA_SIGN_ENUM,
@@ -691,6 +695,8 @@ fn equate(t: &Type, intel: &str, intrinsic: &str, is_const: bool) -> Result<(),
(&Type::MutPtr(&Type::PrimFloat(32)), "void*") => {}
(&Type::MutPtr(&Type::PrimFloat(64)), "void*") => {}
(&Type::MutPtr(&Type::PrimSigned(32)), "void*") => {}
+ (&Type::MutPtr(&Type::PrimSigned(16)), "void*") => {}
+ (&Type::MutPtr(&Type::PrimSigned(8)), "void*") => {}
(&Type::MutPtr(&Type::PrimSigned(32)), "int*") => {}
(&Type::MutPtr(&Type::PrimSigned(32)), "__int32*") => {}
(&Type::MutPtr(&Type::PrimSigned(64)), "void*") => {}
@@ -717,9 +723,11 @@ fn equate(t: &Type, intel: &str, intrinsic: &str, is_const: bool) -> Result<(),
(&Type::ConstPtr(&Type::PrimFloat(64)), "void const*") => {}
(&Type::ConstPtr(&Type::PrimSigned(32)), "int const*") => {}
(&Type::ConstPtr(&Type::PrimSigned(32)), "__int32 const*") => {}
+ (&Type::ConstPtr(&Type::PrimSigned(8)), "void const*") => {}
+ (&Type::ConstPtr(&Type::PrimSigned(16)), "void const*") => {}
(&Type::ConstPtr(&Type::PrimSigned(32)), "void const*") => {}
- (&Type::ConstPtr(&Type::PrimSigned(64)), "__int64 const*") => {}
(&Type::ConstPtr(&Type::PrimSigned(64)), "void const*") => {}
+ (&Type::ConstPtr(&Type::PrimSigned(64)), "__int64 const*") => {}
(&Type::ConstPtr(&Type::PrimSigned(8)), "char const*") => {}
(&Type::ConstPtr(&Type::PrimUnsigned(16)), "unsigned short const*") => {}
(&Type::ConstPtr(&Type::PrimUnsigned(32)), "unsigned int const*") => {}
@@ -738,6 +746,8 @@ fn equate(t: &Type, intel: &str, intrinsic: &str, is_const: bool) -> Result<(),
(&Type::MMASK8, "__mmask8") => {}
(&Type::MMASK16, "__mmask16") => {}
+ (&Type::MMASK32, "__mmask32") => {}
+ (&Type::MMASK64, "__mmask64") => {}
(&Type::MM_CMPINT_ENUM, "_MM_CMPINT_ENUM") => {}
(&Type::MM_MANTISSA_NORM_ENUM, "_MM_MANTISSA_NORM_ENUM") => {}
(&Type::MM_MANTISSA_SIGN_ENUM, "_MM_MANTISSA_SIGN_ENUM") => {}