diff --git a/library/stdarch/crates/core_arch/avx512f.md b/library/stdarch/crates/core_arch/avx512f.md
index 6dd08b67f6ba..341e45e45167 100644
--- a/library/stdarch/crates/core_arch/avx512f.md
+++ b/library/stdarch/crates/core_arch/avx512f.md
@@ -1,24 +1,165 @@
 <summary>["AVX512F"]</summary><p>
  
-  * [x] [`_mm512_abs_epi32`](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm512_abs_epi32&expand=5236)
-  * [x] [`_mm512_abs_epi64`](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm512_abs_epi64&expand=5236)
-  * [x] [`_mm512_abs_pd`](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm512_abs_pd&expand=5236)
-  * [x] [`_mm512_abs_ps`](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm512_abs_ps&expand=5236)
-  * [x] [`_mm512_add_epi32`](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm512_add_epi32&expand=5236)
-  * [x] [`_mm512_add_epi64`](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm512_add_epi64&expand=5236)
-  * [x] [`_mm512_add_pd`](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm512_add_pd&expand=5236)
-  * [x] [`_mm512_add_ps`](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm512_add_ps&expand=5236)
-  * [x] [`_mm512_add_round_pd`](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm512_add_round_pd&expand=5236)
-  * [x] [`_mm512_add_round_ps`](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm512_add_round_ps&expand=5236)
-  * [x] [`_mm512_alignr_epi32`](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm512_alignr_epi32&expand=5236)
-  * [x] [`_mm512_alignr_epi64`](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm512_alignr_epi64&expand=5236)
-  * [x] [`_mm512_and_epi32`](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm512_and_epi32&expand=5236)
-  * [x] [`_mm512_and_epi64`](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm512_and_epi64&expand=5236)
-  * [x] [`_mm512_and_si512`](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm512_and_si512&expand=5236)
-  * [x] [`_mm512_andnot_epi32`](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm512_andnot_epi32&expand=5236)
-  * [x] [`_mm512_andnot_epi64`](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm512_andnot_epi64&expand=5236)
-  * [x] [`_mm512_andnot_si512`](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm512_andnot_si512&expand=5236)
-  * [x] [`_mm512_broadcast_f32x4`](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm512_broadcast_f32x4&expand=5236)
+  * [x] [`_mm512_abs_epi32`]
+  * [x] [`_mm512_mask_abs_epi32`]
+  * [x] [`_mm512_maskz_abs_epi32`]
+  * [x] [`_mm_mask_abs_epi32`]
+  * [x] [`_mm_maskz_abs_epi32`]
+  * [x] [`_mm256_mask_abs_epi32`]
+  * [x] [`_mm256_maskz_abs_epi32`]
+  * [x] [`_mm512_abs_epi64`]
+  * [x] [`_mm512_mask_abs_epi64`]
+  * [x] [`_mm512_maskz_abs_epi64`]
+  * [x] [`_mm_abs_epi64`]
+  * [x] [`_mm_mask_abs_epi64`]
+  * [x] [`_mm_maskz_abs_epi64`]
+  * [x] [`_mm256_abs_epi64`]
+  * [x] [`_mm256_mask_abs_epi64`]
+  * [x] [`_mm256_maskz_abs_epi64`]
+  * [x] [`_mm512_abs_pd`]
+  * [x] [`_mm512_mask_abs_pd`]
+  * [x] [`_mm512_abs_ps`]
+  * [x] [`_mm512_mask_abs_ps`]
+  * [x] [`_mm512_add_epi32`]
+  * [x] [`_mm512_mask_add_epi32`]
+  * [x] [`_mm512_maskz_add_epi32`]
+  * [x] [`_mm_mask_add_epi32`]
+  * [x] [`_mm_maskz_add_epi32`]
+  * [x] [`_mm256_mask_add_epi32`]
+  * [x] [`_mm256_maskz_add_epi32`]
+  * [x] [`_mm512_add_epi64`]
+  * [x] [`_mm512_mask_add_epi64`]
+  * [x] [`_mm512_maskz_add_epi64`]
+  * [x] [`_mm_mask_add_epi64`]
+  * [x] [`_mm_maskz_add_epi64`]
+  * [x] [`_mm256_mask_add_epi64`]
+  * [x] [`_mm256_maskz_add_epi64`]
+  * [x] [`_mm512_add_ps`]
+  * [x] [`_mm512_mask_add_ps`]
+  * [x] [`_mm512_maskz_add_ps`]
+  * [x] [`_mm_mask_add_ps`]
+  * [x] [`_mm_maskz_add_ps`]
+  * [x] [`_mm256_mask_add_ps`]
+  * [x] [`_mm256_maskz_add_ps`]
+  * [x] [`_mm512_add_pd`]
+  * [x] [`_mm512_mask_add_pd`]
+  * [x] [`_mm512_maskz_add_pd`]
+  * [x] [`_mm_mask_add_pd`]
+  * [x] [`_mm_maskz_add_pd`]
+  * [x] [`_mm256_mask_add_pd`]
+  * [x] [`_mm256_maskz_add_pd`]
+  * [x] [`_mm512_add_round_ps`]
+  * [x] [`_mm512_mask_add_round_ps`]
+  * [x] [`_mm512_maskz_add_round_ps`]
+  * [x] [`_mm512_add_round_pd`]
+  * [x] [`_mm512_mask_add_round_pd`]
+  * [x] [`_mm512_maskz_add_round_pd`]
+  * [x] [`_mm512_sub_epi32`]
+  * [x] [`_mm512_mask_sub_epi32`]
+  * [x] [`_mm512_maskz_sub_epi32`]
+  * [x] [`_mm_mask_sub_epi32`]
+  * [x] [`_mm_maskz_sub_epi32`]
+  * [x] [`_mm256_mask_sub_epi32`]
+  * [x] [`_mm256_maskz_sub_epi32`]
+  * [x] [`_mm512_sub_epi64`]
+  * [x] [`_mm512_mask_sub_epi64`]
+  * [x] [`_mm512_maskz_sub_epi64`]
+  * [x] [`_mm_mask_sub_epi64`]
+  * [x] [`_mm_maskz_sub_epi64`]
+  * [x] [`_mm256_mask_sub_epi64`]
+  * [x] [`_mm256_maskz_sub_epi64`]
+  * [x] [`_mm512_sub_ps`]
+  * [x] [`_mm512_mask_sub_ps`]
+  * [x] [`_mm512_maskz_sub_ps`]
+  * [x] [`_mm_mask_sub_ps`]
+  * [x] [`_mm_maskz_sub_ps`]
+  * [x] [`_mm256_mask_sub_ps`]
+  * [x] [`_mm256_maskz_sub_ps`]
+  * [x] [`_mm512_sub_pd`]
+  * [x] [`_mm512_mask_sub_pd`]
+  * [x] [`_mm512_maskz_sub_pd`]
+  * [x] [`_mm_mask_sub_pd`]
+  * [x] [`_mm_maskz_sub_pd`]
+  * [x] [`_mm256_mask_sub_pd`]
+  * [x] [`_mm256_maskz_sub_pd`]
+  * [x] [`_mm512_sub_round_ps`]
+  * [x] [`_mm512_mask_sub_round_ps`]
+  * [x] [`_mm512_maskz_sub_round_ps`]
+  * [x] [`_mm512_sub_round_pd`]
+  * [x] [`_mm512_mask_sub_round_pd`]
+  * [x] [`_mm512_maskz_sub_round_pd`]
+  * [x] [`_mm512_mul_epi32`]
+  * [x] [`_mm512_mask_mul_epi32`]
+  * [x] [`_mm512_maskz_mul_epi32`]
+  * [x] [`_mm_mask_mul_epi32`]
+  * [x] [`_mm_maskz_mul_epi32`]
+  * [x] [`_mm256_mask_mul_epi32`]
+  * [x] [`_mm256_maskz_mul_epi32`]
+  * [x] [`_mm512_mul_epu32`]
+  * [x] [`_mm512_mask_mul_epu32`]
+  * [x] [`_mm512_maskz_mul_epu32`]
+  * [x] [`_mm_mask_mul_epu32`]
+  * [x] [`_mm_maskz_mul_epu32`]
+  * [x] [`_mm256_mask_mul_epu32`]
+  * [x] [`_mm256_maskz_mul_epu32`]
+  * [x] [`_mm512_mul_ps`]
+  * [x] [`_mm512_mask_mul_ps`]
+  * [x] [`_mm512_maskz_mul_ps`]
+  * [x] [`_mm_mask_mul_ps`]
+  * [x] [`_mm_maskz_mul_ps`]
+  * [x] [`_mm256_mask_mul_ps`]
+  * [x] [`_mm256_maskz_mul_ps`]
+  * [x] [`_mm512_mul_pd`]
+  * [x] [`_mm512_mask_mul_pd`]
+  * [x] [`_mm512_maskz_mul_pd`]
+  * [x] [`_mm_mask_mul_pd`]
+  * [x] [`_mm_maskz_mul_pd`]
+  * [x] [`_mm256_mask_mul_pd`]
+  * [x] [`_mm256_maskz_mul_pd`]
+  * [x] [`_mm512_mul_round_ps`]
+  * [x] [`_mm512_mask_mul_round_ps`]
+  * [x] [`_mm512_maskz_mul_round_ps`]
+  * [x] [`_mm512_mul_round_pd`]
+  * [x] [`_mm512_mask_mul_round_pd`]
+  * [x] [`_mm512_maskz_mul_round_pd`]
+  * [x] [`_mm512_mullo_epi32`]
+  * [x] [`_mm512_mask_mullo_epi32`]
+  * [x] [`_mm512_maskz_mullo_epi32`]
+  * [x] [`_mm_mask_mullo_epi32`]
+  * [x] [`_mm_maskz_mullo_epi32`]
+  * [x] [`_mm256_mask_mullo_epi32`]
+  * [x] [`_mm256_maskz_mullo_epi32`]
+  * [x] [`_mm512_mullox_epi64`]
+  * [x] [`_mm512_mask_mullox_epi64`]
+  * [x] [`_mm512_div_ps`]
+  * [x] [`_mm512_mask_div_ps`]
+  * [x] [`_mm512_maskz_div_ps`]
+  * [x] [`_mm_mask_div_ps`]
+  * [x] [`_mm_maskz_div_ps`]
+  * [x] [`_mm256_mask_div_ps`]
+  * [x] [`_mm256_maskz_div_ps`]
+  * [x] [`_mm512_div_pd`]
+  * [x] [`_mm512_mask_div_pd`]
+  * [x] [`_mm512_maskz_div_pd`]
+  * [x] [`_mm_mask_div_pd`]
+  * [x] [`_mm_maskz_div_pd`]
+  * [x] [`_mm256_mask_div_pd`]
+  * [x] [`_mm256_maskz_div_pd`]
+  * [x] [`_mm512_div_round_ps`]
+  * [x] [`_mm512_mask_div_round_ps`]
+  * [x] [`_mm512_maskz_div_round_ps`]
+  * [x] [`_mm512_div_round_pd`]
+  * [x] [`_mm512_mask_div_round_pd`]
+  * [x] [`_mm512_maskz_div_round_pd`]
+  * [x] [`_mm512_alignr_epi32`]
+  * [x] [`_mm512_alignr_epi64`]
+  * [x] [`_mm512_and_epi32`]
+  * [x] [`_mm512_and_epi64`]
+  * [x] [`_mm512_and_si512`]
+  * [x] [`_mm512_andnot_epi32`]
+  * [x] [`_mm512_andnot_epi64`]
+  * [x] [`_mm512_andnot_si512`]
+  * [x] [`_mm512_broadcast_f32x4`]
   * [x] [`_mm512_broadcast_f64x4`](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm512_broadcast_f64x4&expand=5236)
   * [x] [`_mm512_broadcast_i32x4`](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm512_broadcast_i32x4&expand=5236)
   * [x] [`_mm512_broadcast_i64x4`](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm512_broadcast_i64x4&expand=5236)
@@ -151,10 +292,6 @@
   * [x] [`_mm512_cvtusepi64_epi16`](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm512_cvtusepi64_epi16&expand=5236)
   * [x] [`_mm512_cvtusepi64_epi32`](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm512_cvtusepi64_epi32&expand=5236)
   * [x] [`_mm512_cvtusepi64_epi8`](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm512_cvtusepi64_epi8&expand=5236)
-  * [x] [`_mm512_div_pd`](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm512_div_pd&expand=5236)
-  * [x] [`_mm512_div_ps`](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm512_div_ps&expand=5236)
-  * [x] [`_mm512_div_round_pd`](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm512_div_round_pd&expand=5236)
-  * [x] [`_mm512_div_round_ps`](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm512_div_round_ps&expand=5236)
   * [x] [`_mm512_extractf32x4_ps`](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm512_extractf32x4_ps&expand=5236)
   * [x] [`_mm512_extractf64x4_pd`](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm512_extractf64x4_pd&expand=5236)
   * [x] [`_mm512_extracti32x4_epi32`](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm512_extracti32x4_epi32&expand=5236)
@@ -276,16 +413,6 @@
   * [x] [`_mm512_mask3_fnmsub_ps`](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm512_mask3_fnmsub_ps&expand=5236)
   * [x] [`_mm512_mask3_fnmsub_round_pd`](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm512_mask3_fnmsub_round_pd&expand=5236)
   * [x] [`_mm512_mask3_fnmsub_round_ps`](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm512_mask3_fnmsub_round_ps&expand=5236)
-  * [x] [`_mm512_mask_abs_epi32`](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm512_mask_abs_epi32&expand=5236)
-  * [x] [`_mm512_mask_abs_epi64`](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm512_mask_abs_epi64&expand=5236)
-  * [x] [`_mm512_mask_abs_pd`](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm512_mask_abs_pd&expand=5236)
-  * [x] [`_mm512_mask_abs_ps`](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm512_mask_abs_ps&expand=5236)
-  * [x] [`_mm512_mask_add_epi32`](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm512_mask_add_epi32&expand=5236)
-  * [x] [`_mm512_mask_add_epi64`](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm512_mask_add_epi64&expand=5236)
-  * [x] [`_mm512_mask_add_pd`](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm512_mask_add_pd&expand=5236)
-  * [x] [`_mm512_mask_add_ps`](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm512_mask_add_ps&expand=5236)
-  * [x] [`_mm512_mask_add_round_pd`](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm512_mask_add_round_pd&expand=5236)
-  * [x] [`_mm512_mask_add_round_ps`](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm512_mask_add_round_ps&expand=5236)
   * [x] [`_mm512_mask_alignr_epi32`](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm512_mask_alignr_epi32&expand=5236)
   * [x] [`_mm512_mask_alignr_epi64`](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm512_mask_alignr_epi64&expand=5236)
   * [x] [`_mm512_mask_and_epi32`](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm512_mask_and_epi32&expand=5236)
@@ -434,10 +561,6 @@
   * [ ] [`_mm512_mask_cvtusepi64_storeu_epi16`](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm512_mask_cvtusepi64_storeu_epi16&expand=5236)
   * [ ] [`_mm512_mask_cvtusepi64_storeu_epi32`](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm512_mask_cvtusepi64_storeu_epi32&expand=5236)
   * [ ] [`_mm512_mask_cvtusepi64_storeu_epi8`](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm512_mask_cvtusepi64_storeu_epi8&expand=5236)
-  * [x] [`_mm512_mask_div_pd`](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm512_mask_div_pd&expand=5236)
-  * [x] [`_mm512_mask_div_ps`](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm512_mask_div_ps&expand=5236)
-  * [x] [`_mm512_mask_div_round_pd`](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm512_mask_div_round_pd&expand=5236)
-  * [x] [`_mm512_mask_div_round_ps`](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm512_mask_div_round_ps&expand=5236)
   * [x] [`_mm512_mask_expand_epi32`](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm512_mask_expand_epi32&expand=5236)
   * [x] [`_mm512_mask_expand_epi64`](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm512_mask_expand_epi64&expand=5236)
   * [x] [`_mm512_mask_expand_pd`](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm512_mask_expand_pd&expand=5236)
@@ -548,14 +671,6 @@
   * [x] [`_mm512_mask_movedup_pd`](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm512_mask_movedup_pd&expand=5236)
   * [x] [`_mm512_mask_movehdup_ps`](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm512_mask_movehdup_ps&expand=5236)
   * [x] [`_mm512_mask_moveldup_ps`](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm512_mask_moveldup_ps&expand=5236)
-  * [x] [`_mm512_mask_mul_epi32`](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm512_mask_mul_epi32&expand=5236)
-  * [x] [`_mm512_mask_mul_epu32`](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm512_mask_mul_epu32&expand=5236)
-  * [x] [`_mm512_mask_mul_pd`](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm512_mask_mul_pd&expand=5236)
-  * [x] [`_mm512_mask_mul_ps`](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm512_mask_mul_ps&expand=5236)
-  * [x] [`_mm512_mask_mul_round_pd`](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm512_mask_mul_round_pd&expand=5236)
-  * [x] [`_mm512_mask_mul_round_ps`](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm512_mask_mul_round_ps&expand=5236)
-  * [x] [`_mm512_mask_mullo_epi32`](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm512_mask_mullo_epi32&expand=5236)
-  * [x] [`_mm512_mask_mullox_epi64`](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm512_mask_mullox_epi64&expand=5236)
   * [x] [`_mm512_mask_or_epi32`](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm512_mask_or_epi32&expand=5236)
   * [x] [`_mm512_mask_or_epi64`](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm512_mask_or_epi64&expand=5236)
   * [x] [`_mm512_mask_permute_pd`](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm512_mask_permute_pd&expand=5236)
@@ -575,30 +690,30 @@
   * [x] [`_mm512_mask_permutexvar_ps`](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm512_mask_permutexvar_ps&expand=5236)
   * [x] [`_mm512_mask_rcp14_pd`](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm512_mask_rcp14_pd&expand=5236)
   * [x] [`_mm512_mask_rcp14_ps`](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm512_mask_rcp14_ps&expand=5236)
-  * [x] [`_mm512_mask_reduce_add_epi32`](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm512_mask_reduce_add_epi32&expand=5236)
-  * [x] [`_mm512_mask_reduce_add_epi64`](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm512_mask_reduce_add_epi64&expand=5236)
-  * [x] [`_mm512_mask_reduce_add_pd`](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm512_mask_reduce_add_pd&expand=5236)
-  * [x] [`_mm512_mask_reduce_add_ps`](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm512_mask_reduce_add_ps&expand=5236)
-  * [x] [`_mm512_mask_reduce_and_epi32`](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm512_mask_reduce_and_epi32&expand=5236)
-  * [x] [`_mm512_mask_reduce_and_epi64`](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm512_mask_reduce_and_epi64&expand=5236)
-  * [x] [`_mm512_mask_reduce_max_epi32`](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm512_mask_reduce_max_epi32&expand=5236)
-  * [x] [`_mm512_mask_reduce_max_epi64`](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm512_mask_reduce_max_epi64&expand=5236)
-  * [x] [`_mm512_mask_reduce_max_epu32`](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm512_mask_reduce_max_epu32&expand=5236)
-  * [x] [`_mm512_mask_reduce_max_epu64`](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm512_mask_reduce_max_epu64&expand=5236)
-  * [x] [`_mm512_mask_reduce_max_pd`](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm512_mask_reduce_max_pd&expand=5236)
-  * [x] [`_mm512_mask_reduce_max_ps`](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm512_mask_reduce_max_ps&expand=5236)
-  * [x] [`_mm512_mask_reduce_min_epi32`](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm512_mask_reduce_min_epi32&expand=5236)
-  * [x] [`_mm512_mask_reduce_min_epi64`](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm512_mask_reduce_min_epi64&expand=5236)
-  * [x] [`_mm512_mask_reduce_min_epu32`](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm512_mask_reduce_min_epu32&expand=5236)
-  * [x] [`_mm512_mask_reduce_min_epu64`](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm512_mask_reduce_min_epu64&expand=5236)
-  * [x] [`_mm512_mask_reduce_min_pd`](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm512_mask_reduce_min_pd&expand=5236)
-  * [x] [`_mm512_mask_reduce_min_ps`](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm512_mask_reduce_min_ps&expand=5236)
-  * [x] [`_mm512_mask_reduce_mul_epi32`](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm512_mask_reduce_mul_epi32&expand=5236)
-  * [x] [`_mm512_mask_reduce_mul_epi64`](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm512_mask_reduce_mul_epi64&expand=5236)
-  * [x] [`_mm512_mask_reduce_mul_pd`](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm512_mask_reduce_mul_pd&expand=5236)
-  * [x] [`_mm512_mask_reduce_mul_ps`](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm512_mask_reduce_mul_ps&expand=5236)
-  * [x] [`_mm512_mask_reduce_or_epi32`](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm512_mask_reduce_or_epi32&expand=5236)
-  * [x] [`_mm512_mask_reduce_or_epi64`](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm512_mask_reduce_or_epi64&expand=5236)
+  * [x] [`_mm512_mask_reduce_add_epi32`]
+  * [x] [`_mm512_mask_reduce_add_epi64`]
+  * [x] [`_mm512_mask_reduce_add_pd`]
+  * [x] [`_mm512_mask_reduce_add_ps`]
+  * [x] [`_mm512_mask_reduce_and_epi32`]
+  * [x] [`_mm512_mask_reduce_and_epi64`]
+  * [x] [`_mm512_mask_reduce_max_epi32`]
+  * [x] [`_mm512_mask_reduce_max_epi64`]
+  * [x] [`_mm512_mask_reduce_max_epu32`]
+  * [x] [`_mm512_mask_reduce_max_epu64`]
+  * [x] [`_mm512_mask_reduce_max_pd`]
+  * [x] [`_mm512_mask_reduce_max_ps`]
+  * [x] [`_mm512_mask_reduce_min_epi32`]
+  * [x] [`_mm512_mask_reduce_min_epi64`]
+  * [x] [`_mm512_mask_reduce_min_epu32`]
+  * [x] [`_mm512_mask_reduce_min_epu64`]
+  * [x] [`_mm512_mask_reduce_min_pd`]
+  * [x] [`_mm512_mask_reduce_min_ps`]
+  * [x] [`_mm512_mask_reduce_mul_epi32`]
+  * [x] [`_mm512_mask_reduce_mul_epi64`]
+  * [x] [`_mm512_mask_reduce_mul_pd`]
+  * [x] [`_mm512_mask_reduce_mul_ps`]
+  * [x] [`_mm512_mask_reduce_or_epi32`]
+  * [x] [`_mm512_mask_reduce_or_epi64`]
   * [x] [`_mm512_mask_rol_epi32`](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm512_mask_rol_epi32&expand=5236)
   * [x] [`_mm512_mask_rol_epi64`](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm512_mask_rol_epi64&expand=5236)
   * [x] [`_mm512_mask_rolv_epi32`](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm512_mask_rolv_epi32&expand=5236)
@@ -656,12 +771,6 @@
   * [ ] [`_mm512_mask_storeu_epi64`](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm512_mask_storeu_epi64&expand=5236)
   * [ ] [`_mm512_mask_storeu_pd`](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm512_mask_storeu_pd&expand=5236)
   * [ ] [`_mm512_mask_storeu_ps`](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm512_mask_storeu_ps&expand=5236)
-  * [x] [`_mm512_mask_sub_epi32`](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm512_mask_sub_epi32&expand=5236)
-  * [x] [`_mm512_mask_sub_epi64`](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm512_mask_sub_epi64&expand=5236)
-  * [x] [`_mm512_mask_sub_pd`](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm512_mask_sub_pd&expand=5236)
-  * [x] [`_mm512_mask_sub_ps`](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm512_mask_sub_ps&expand=5236)
-  * [x] [`_mm512_mask_sub_round_pd`](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm512_mask_sub_round_pd&expand=5236)
-  * [x] [`_mm512_mask_sub_round_ps`](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm512_mask_sub_round_ps&expand=5236)
   * [x] [`_mm512_mask_ternarylogic_epi32`](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm512_mask_ternarylogic_epi32&expand=5236)
   * [x] [`_mm512_mask_ternarylogic_epi64`](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm512_mask_ternarylogic_epi64&expand=5236)
   * [x] [`_mm512_mask_test_epi32_mask`](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm512_mask_test_epi32_mask&expand=5236)
@@ -678,14 +787,6 @@
   * [x] [`_mm512_mask_unpacklo_ps`](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm512_mask_unpacklo_ps&expand=5236)
   * [x] [`_mm512_mask_xor_epi32`](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm512_mask_xor_epi32&expand=5236)
   * [x] [`_mm512_mask_xor_epi64`](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm512_mask_xor_epi64&expand=5236)
-  * [x] [`_mm512_maskz_abs_epi32`](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm512_maskz_abs_epi32&expand=5236)
-  * [x] [`_mm512_maskz_abs_epi64`](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm512_maskz_abs_epi64&expand=5236)
-  * [x] [`_mm512_maskz_add_epi32`](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm512_maskz_add_epi32&expand=5236)
-  * [x] [`_mm512_maskz_add_epi64`](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm512_maskz_add_epi64&expand=5236)
-  * [x] [`_mm512_maskz_add_pd`](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm512_maskz_add_pd&expand=5236)
-  * [x] [`_mm512_maskz_add_ps`](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm512_maskz_add_ps&expand=5236)
-  * [x] [`_mm512_maskz_add_round_pd`](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm512_maskz_add_round_pd&expand=5236)
-  * [x] [`_mm512_maskz_add_round_ps`](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm512_maskz_add_round_ps&expand=5236)
   * [x] [`_mm512_maskz_alignr_epi32`](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm512_maskz_alignr_epi32&expand=5236)
   * [x] [`_mm512_maskz_alignr_epi64`](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm512_maskz_alignr_epi64&expand=5236)
   * [x] [`_mm512_maskz_and_epi32`](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm512_maskz_and_epi32&expand=5236)
@@ -759,10 +860,6 @@
   * [x] [`_mm512_maskz_cvtusepi64_epi16`](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm512_maskz_cvtusepi64_epi16&expand=5236)
   * [x] [`_mm512_maskz_cvtusepi64_epi32`](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm512_maskz_cvtusepi64_epi32&expand=5236)
   * [x] [`_mm512_maskz_cvtusepi64_epi8`](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm512_maskz_cvtusepi64_epi8&expand=5236)
-  * [x] [`_mm512_maskz_div_pd`](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm512_maskz_div_pd&expand=5236)
-  * [x] [`_mm512_maskz_div_ps`](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm512_maskz_div_ps&expand=5236)
-  * [x] [`_mm512_maskz_div_round_pd`](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm512_maskz_div_round_pd&expand=5236)
-  * [x] [`_mm512_maskz_div_round_ps`](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm512_maskz_div_round_ps&expand=5236)
   * [x] [`_mm512_maskz_expand_epi32`](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm512_maskz_expand_epi32&expand=5236)
   * [x] [`_mm512_maskz_expand_epi64`](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm512_maskz_expand_epi64&expand=5236)
   * [x] [`_mm512_maskz_expand_pd`](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm512_maskz_expand_pd&expand=5236)
@@ -846,13 +943,6 @@
   * [x] [`_mm512_maskz_movedup_pd`](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm512_maskz_movedup_pd&expand=5236)
   * [x] [`_mm512_maskz_movehdup_ps`](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm512_maskz_movehdup_ps&expand=5236)
   * [x] [`_mm512_maskz_moveldup_ps`](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm512_maskz_moveldup_ps&expand=5236)
-  * [x] [`_mm512_maskz_mul_epi32`](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm512_maskz_mul_epi32&expand=5236)
-  * [x] [`_mm512_maskz_mul_epu32`](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm512_maskz_mul_epu32&expand=5236)
-  * [x] [`_mm512_maskz_mul_pd`](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm512_maskz_mul_pd&expand=5236)
-  * [x] [`_mm512_maskz_mul_ps`](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm512_maskz_mul_ps&expand=5236)
-  * [x] [`_mm512_maskz_mul_round_pd`](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm512_maskz_mul_round_pd&expand=5236)
-  * [x] [`_mm512_maskz_mul_round_ps`](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm512_maskz_mul_round_ps&expand=5236)
-  * [x] [`_mm512_maskz_mullo_epi32`](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm512_maskz_mullo_epi32&expand=5236)
   * [x] [`_mm512_maskz_or_epi32`](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm512_maskz_or_epi32&expand=5236)
   * [x] [`_mm512_maskz_or_epi64`](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm512_maskz_or_epi64&expand=5236)
   * [x] [`_mm512_maskz_permute_pd`](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm512_maskz_permute_pd&expand=5236)
@@ -920,12 +1010,6 @@
   * [x] [`_mm512_maskz_srli_epi64`](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm512_maskz_srli_epi64&expand=5236)
   * [x] [`_mm512_maskz_srlv_epi32`](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm512_maskz_srlv_epi32&expand=5236)
   * [x] [`_mm512_maskz_srlv_epi64`](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm512_maskz_srlv_epi64&expand=5236)
-  * [x] [`_mm512_maskz_sub_epi32`](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm512_maskz_sub_epi32&expand=5236)
-  * [x] [`_mm512_maskz_sub_epi64`](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm512_maskz_sub_epi64&expand=5236)
-  * [x] [`_mm512_maskz_sub_pd`](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm512_maskz_sub_pd&expand=5236)
-  * [x] [`_mm512_maskz_sub_ps`](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm512_maskz_sub_ps&expand=5236)
-  * [x] [`_mm512_maskz_sub_round_pd`](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm512_maskz_sub_round_pd&expand=5236)
-  * [x] [`_mm512_maskz_sub_round_ps`](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm512_maskz_sub_round_ps&expand=5236)
   * [x] [`_mm512_maskz_ternarylogic_epi32`](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm512_maskz_ternarylogic_epi32&expand=5236)
   * [x] [`_mm512_maskz_ternarylogic_epi64`](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm512_maskz_ternarylogic_epi64&expand=5236)
   * [x] [`_mm512_maskz_unpackhi_epi32`](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm512_maskz_unpackhi_epi32&expand=5236)
@@ -957,14 +1041,6 @@
   * [x] [`_mm512_movedup_pd`](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm512_movedup_pd&expand=5236)
   * [x] [`_mm512_movehdup_ps`](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm512_movehdup_ps&expand=5236)
   * [x] [`_mm512_moveldup_ps`](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm512_moveldup_ps&expand=5236)
-  * [x] [`_mm512_mul_epi32`](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm512_mul_epi32&expand=5236)
-  * [x] [`_mm512_mul_epu32`](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm512_mul_epu32&expand=5236)
-  * [x] [`_mm512_mul_pd`](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm512_mul_pd&expand=5236)
-  * [x] [`_mm512_mul_ps`](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm512_mul_ps&expand=5236)
-  * [x] [`_mm512_mul_round_pd`](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm512_mul_round_pd&expand=5236)
-  * [x] [`_mm512_mul_round_ps`](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm512_mul_round_ps&expand=5236)
-  * [x] [`_mm512_mullo_epi32`](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm512_mullo_epi32&expand=5236)
-  * [x] [`_mm512_mullox_epi64`](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm512_mullox_epi64&expand=5236)
   * [x] [`_mm512_or_epi32`](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm512_or_epi32&expand=5236)
   * [x] [`_mm512_or_epi64`](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm512_or_epi64&expand=5236)
   * [x] [`_mm512_or_si512`](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm512_or_si512&expand=5236)
@@ -985,30 +1061,30 @@
   * [x] [`_mm512_permutexvar_ps`](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm512_permutexvar_ps&expand=5236)
   * [x] [`_mm512_rcp14_pd`](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm512_rcp14_pd&expand=5236)
   * [x] [`_mm512_rcp14_ps`](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm512_rcp14_ps&expand=5236)
-  * [x] [`_mm512_reduce_add_epi32`](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm512_reduce_add_epi32&expand=5236)
-  * [x] [`_mm512_reduce_add_epi64`](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm512_reduce_add_epi64&expand=5236)
-  * [x] [`_mm512_reduce_add_pd`](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm512_reduce_add_pd&expand=5236)
-  * [x] [`_mm512_reduce_add_ps`](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm512_reduce_add_ps&expand=5236)
-  * [x] [`_mm512_reduce_and_epi32`](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm512_reduce_and_epi32&expand=5236)
-  * [x] [`_mm512_reduce_and_epi64`](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm512_reduce_and_epi64&expand=5236)
-  * [x] [`_mm512_reduce_max_epi32`](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm512_reduce_max_epi32&expand=5236)
-  * [x] [`_mm512_reduce_max_epi64`](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm512_reduce_max_epi64&expand=5236)
-  * [x] [`_mm512_reduce_max_epu32`](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm512_reduce_max_epu32&expand=5236)
-  * [x] [`_mm512_reduce_max_epu64`](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm512_reduce_max_epu64&expand=5236)
-  * [x] [`_mm512_reduce_max_pd`](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm512_reduce_max_pd&expand=5236)
-  * [x] [`_mm512_reduce_max_ps`](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm512_reduce_max_ps&expand=5236)
-  * [x] [`_mm512_reduce_min_epi32`](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm512_reduce_min_epi32&expand=5236)
-  * [x] [`_mm512_reduce_min_epi64`](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm512_reduce_min_epi64&expand=5236)
-  * [x] [`_mm512_reduce_min_epu32`](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm512_reduce_min_epu32&expand=5236)
-  * [x] [`_mm512_reduce_min_epu64`](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm512_reduce_min_epu64&expand=5236)
-  * [x] [`_mm512_reduce_min_pd`](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm512_reduce_min_pd&expand=5236)
-  * [x] [`_mm512_reduce_min_ps`](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm512_reduce_min_ps&expand=5236)
-  * [x] [`_mm512_reduce_mul_epi32`](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm512_reduce_mul_epi32&expand=5236)
-  * [x] [`_mm512_reduce_mul_epi64`](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm512_reduce_mul_epi64&expand=5236)
-  * [x] [`_mm512_reduce_mul_pd`](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm512_reduce_mul_pd&expand=5236)
-  * [x] [`_mm512_reduce_mul_ps`](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm512_reduce_mul_ps&expand=5236)
-  * [x] [`_mm512_reduce_or_epi32`](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm512_reduce_or_epi32&expand=5236)
-  * [x] [`_mm512_reduce_or_epi64`](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm512_reduce_or_epi64&expand=5236)
+  * [x] [`_mm512_reduce_add_epi32`]
+  * [x] [`_mm512_reduce_add_epi64`]
+  * [x] [`_mm512_reduce_add_pd`]
+  * [x] [`_mm512_reduce_add_ps`]
+  * [x] [`_mm512_reduce_and_epi32`]
+  * [x] [`_mm512_reduce_and_epi64`]
+  * [x] [`_mm512_reduce_max_epi32`]
+  * [x] [`_mm512_reduce_max_epi64`]
+  * [x] [`_mm512_reduce_max_epu32`]
+  * [x] [`_mm512_reduce_max_epu64`]
+  * [x] [`_mm512_reduce_max_pd`]
+  * [x] [`_mm512_reduce_max_ps`]
+  * [x] [`_mm512_reduce_min_epi32`]
+  * [x] [`_mm512_reduce_min_epi64`]
+  * [x] [`_mm512_reduce_min_epu32`]
+  * [x] [`_mm512_reduce_min_epu64`]
+  * [x] [`_mm512_reduce_min_pd`]
+  * [x] [`_mm512_reduce_min_ps`]
+  * [x] [`_mm512_reduce_mul_epi32`]
+  * [x] [`_mm512_reduce_mul_epi64`]
+  * [x] [`_mm512_reduce_mul_pd`]
+  * [x] [`_mm512_reduce_mul_ps`]
+  * [x] [`_mm512_reduce_or_epi32`]
+  * [x] [`_mm512_reduce_or_epi64`]
   * [x] [`_mm512_rol_epi32`](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm512_rol_epi32&expand=5236)
   * [x] [`_mm512_rol_epi64`](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm512_rol_epi64&expand=5236)
   * [x] [`_mm512_rolv_epi32`](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm512_rolv_epi32&expand=5236)
@@ -1099,12 +1175,6 @@
   * [x] [`_mm512_stream_pd`](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm512_stream_pd&expand=5236)
   * [x] [`_mm512_stream_ps`](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm512_stream_ps&expand=5236)
   * [x] [`_mm512_stream_si512`](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm512_stream_si512&expand=5236)
-  * [x] [`_mm512_sub_epi32`](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm512_sub_epi32&expand=5236)
-  * [x] [`_mm512_sub_epi64`](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm512_sub_epi64&expand=5236)
-  * [x] [`_mm512_sub_pd`](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm512_sub_pd&expand=5236)
-  * [x] [`_mm512_sub_ps`](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm512_sub_ps&expand=5236)
-  * [x] [`_mm512_sub_round_pd`](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm512_sub_round_pd&expand=5236)
-  * [x] [`_mm512_sub_round_ps`](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm512_sub_round_ps&expand=5236)
   * [ ] [`_mm512_svml_round_pd`](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm512_svml_round_pd&expand=5236)
   * [x] [`_mm512_ternarylogic_epi32`](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm512_ternarylogic_epi32&expand=5236)
   * [x] [`_mm512_ternarylogic_epi64`](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm512_ternarylogic_epi64&expand=5236)
diff --git a/library/stdarch/crates/core_arch/src/simd.rs b/library/stdarch/crates/core_arch/src/simd.rs
index 6108bc40dee8..a2e393ab3290 100644
--- a/library/stdarch/crates/core_arch/src/simd.rs
+++ b/library/stdarch/crates/core_arch/src/simd.rs
@@ -245,6 +245,7 @@ simd_ty!(i64x2[i64]: i64, i64 | x0, x1);
 
 simd_ty!(f32x4[f32]: f32, f32, f32, f32 | x0, x1, x2, x3);
 simd_ty!(f64x2[f64]: f64, f64 | x0, x1);
+simd_ty!(f64x4[f64]: f64, f64, f64, f64 | x0, x1, x2, x3);
 
 simd_m_ty!(
     m8x16[i8]: i8,
diff --git a/library/stdarch/crates/core_arch/src/x86/avx512f.rs b/library/stdarch/crates/core_arch/src/x86/avx512f.rs
index 6a0426bd0b66..73572c9cf94c 100644
--- a/library/stdarch/crates/core_arch/src/x86/avx512f.rs
+++ b/library/stdarch/crates/core_arch/src/x86/avx512f.rs
@@ -9,7 +9,7 @@ use stdarch_test::assert_instr;
 
 /// Computes the absolute values of packed 32-bit integers in `a`.
 ///
-/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#avx512techs=AVX512F&expand=33,34,4990,33&text=_mm512_abs_epi32)
+/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm512_abs_epi32&expand=39)
 #[inline]
 #[target_feature(enable = "avx512f")]
 #[cfg_attr(test, assert_instr(vpabsd))]
@@ -26,7 +26,7 @@ pub unsafe fn _mm512_abs_epi32(a: __m512i) -> __m512i {
 /// unsigned results in `dst` using writemask `k` (elements are copied from
 /// `src` when the corresponding mask bit is not set).
 ///
-/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#avx512techs=AVX512F&expand=33,34,4990,33&text=_mm512_abs_epi32)
+/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm512_mask_abs_epi32&expand=40)
 #[inline]
 #[target_feature(enable = "avx512f")]
 #[cfg_attr(test, assert_instr(vpabsd))]
@@ -39,7 +39,7 @@ pub unsafe fn _mm512_mask_abs_epi32(src: __m512i, k: __mmask16, a: __m512i) -> _
 /// unsigned results in `dst` using zeromask `k` (elements are zeroed out when
 /// the corresponding mask bit is not set).
 ///
-/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#avx512techs=AVX512F&expand=33,34,4990,33,34,35,35&text=_mm512_maskz_abs_epi32)
+/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm512_maskz_abs_epi32&expand=41)
 #[inline]
 #[target_feature(enable = "avx512f")]
 #[cfg_attr(test, assert_instr(vpabsd))]
@@ -49,9 +49,55 @@ pub unsafe fn _mm512_maskz_abs_epi32(k: __mmask16, a: __m512i) -> __m512i {
     transmute(simd_select_bitmask(k, abs, zero))
 }
 
+/// Compute the absolute value of packed signed 32-bit integers in a, and store the unsigned results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set).
+///
+/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm256_mask_abs_epi32&expand=37)
+#[inline]
+#[target_feature(enable = "avx512f,avx512vl")]
+#[cfg_attr(test, assert_instr(vpabsd))]
+pub unsafe fn _mm256_mask_abs_epi32(src: __m256i, k: __mmask8, a: __m256i) -> __m256i {
+    let abs = _mm256_abs_epi32(a).as_i32x8();
+    transmute(simd_select_bitmask(k, abs, src.as_i32x8()))
+}
+
+/// Compute the absolute value of packed signed 32-bit integers in a, and store the unsigned results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
+///
+/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm256_maskz_abs_epi32&expand=38)
+#[inline]
+#[target_feature(enable = "avx512f,avx512vl")]
+#[cfg_attr(test, assert_instr(vpabsd))]
+pub unsafe fn _mm256_maskz_abs_epi32(k: __mmask8, a: __m256i) -> __m256i {
+    let abs = _mm256_abs_epi32(a).as_i32x8();
+    let zero = _mm256_setzero_si256().as_i32x8();
+    transmute(simd_select_bitmask(k, abs, zero))
+}
+
+/// Compute the absolute value of packed signed 32-bit integers in a, and store the unsigned results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set).
+///
+/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_mask_abs_epi32&expand=34)
+#[inline]
+#[target_feature(enable = "avx512f,avx512vl")]
+#[cfg_attr(test, assert_instr(vpabsd))]
+pub unsafe fn _mm_mask_abs_epi32(src: __m128i, k: __mmask8, a: __m128i) -> __m128i {
+    let abs = _mm_abs_epi32(a).as_i32x4();
+    transmute(simd_select_bitmask(k, abs, src.as_i32x4()))
+}
+
+/// Compute the absolute value of packed signed 32-bit integers in a, and store the unsigned results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
+///
+/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_maskz_abs_epi32&expand=35)
+#[inline]
+#[target_feature(enable = "avx512f,avx512vl")]
+#[cfg_attr(test, assert_instr(vpabsd))]
+pub unsafe fn _mm_maskz_abs_epi32(k: __mmask8, a: __m128i) -> __m128i {
+    let abs = _mm_abs_epi32(a).as_i32x4();
+    let zero = _mm_setzero_si128().as_i32x4();
+    transmute(simd_select_bitmask(k, abs, zero))
+}
+
 /// Compute the absolute value of packed signed 64-bit integers in a, and store the unsigned results in dst.
 ///
-/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=512_abs_epi64&expand=48)
+/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm512_abs_epi64&expand=48)
 #[inline]
 #[target_feature(enable = "avx512f")]
 #[cfg_attr(test, assert_instr(vpabsq))]
@@ -66,7 +112,7 @@ pub unsafe fn _mm512_abs_epi64(a: __m512i) -> __m512i {
 
 /// Compute the absolute value of packed signed 64-bit integers in a, and store the unsigned results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set).
 ///
-/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=512_mask_abs_epi64&expand=49)
+/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm512_mask_abs_epi64&expand=49)
 #[inline]
 #[target_feature(enable = "avx512f")]
 #[cfg_attr(test, assert_instr(vpabsq))]
@@ -77,7 +123,7 @@ pub unsafe fn _mm512_mask_abs_epi64(src: __m512i, k: __mmask8, a: __m512i) -> __
 
 /// Compute the absolute value of packed signed 64-bit integers in a, and store the unsigned results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
 ///
-/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=512_maskz_abs_epi64&expand=50)
+/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm512_maskz_abs_epi64&expand=50)
 #[inline]
 #[target_feature(enable = "avx512f")]
 #[cfg_attr(test, assert_instr(vpabsq))]
@@ -87,9 +133,47 @@ pub unsafe fn _mm512_maskz_abs_epi64(k: __mmask8, a: __m512i) -> __m512i {
     transmute(simd_select_bitmask(k, abs, zero))
 }
 
+/// Compute the absolute value of packed signed 64-bit integers in a, and store the unsigned results in dst.
+///
+/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm256_abs_epi64&expand=45)
+#[inline]
+#[target_feature(enable = "avx512f,avx512vl")]
+#[cfg_attr(test, assert_instr(vpabsq))]
+pub unsafe fn _mm256_abs_epi64(a: __m256i) -> __m256i {
+    let a = a.as_i64x4();
+    // all-0 is a properly initialized i64x4
+    let zero: i64x4 = mem::zeroed();
+    let sub = simd_sub(zero, a);
+    let cmp: i64x4 = simd_gt(a, zero);
+    transmute(simd_select(cmp, a, sub))
+}
+
+/// Compute the absolute value of packed signed 64-bit integers in a, and store the unsigned results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set).
+///
+/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm256_mask_abs_epi64&expand=46)
+#[inline]
+#[target_feature(enable = "avx512f,avx512vl")]
+#[cfg_attr(test, assert_instr(vpabsq))]
+pub unsafe fn _mm256_mask_abs_epi64(src: __m256i, k: __mmask8, a: __m256i) -> __m256i {
+    let abs = _mm256_abs_epi64(a).as_i64x4();
+    transmute(simd_select_bitmask(k, abs, src.as_i64x4()))
+}
+
+/// Compute the absolute value of packed signed 64-bit integers in a, and store the unsigned results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
+///
+/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm256_abs_epi64&expand=45)
+#[inline]
+#[target_feature(enable = "avx512f,avx512vl")]
+#[cfg_attr(test, assert_instr(vpabsq))]
+pub unsafe fn _mm256_maskz_abs_epi64(k: __mmask8, a: __m256i) -> __m256i {
+    let abs = _mm256_abs_epi64(a).as_i64x4();
+    let zero = _mm256_setzero_si256().as_i64x4();
+    transmute(simd_select_bitmask(k, abs, zero))
+}
+
 /// Finds the absolute value of each packed single-precision (32-bit) floating-point element in v2, storing the results in dst.
 ///
-/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=512_abs_ps&expand=65)
+/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm512_abs_ps&expand=65)
 #[inline]
 #[target_feature(enable = "avx512f")]
 #[cfg_attr(test, assert_instr(vpandq))]
@@ -102,7 +186,7 @@ pub unsafe fn _mm512_abs_ps(v2: __m512) -> __m512 {
 
 /// Finds the absolute value of each packed single-precision (32-bit) floating-point element in v2, storing the results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set).
 ///
-/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=512_mask_abs_ps&expand=66)
+/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm512_mask_abs_ps&expand=66)
 #[inline]
 #[target_feature(enable = "avx512f")]
 #[cfg_attr(test, assert_instr(vpandd))]
@@ -113,7 +197,7 @@ pub unsafe fn _mm512_mask_abs_ps(src: __m512, k: __mmask16, v2: __m512) -> __m51
 
 /// Finds the absolute value of each packed double-precision (64-bit) floating-point element in v2, storing the results in dst.
 ///
-/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=512_abs_pd&expand=60)
+/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm512_abs_pd&expand=60)
 #[inline]
 #[target_feature(enable = "avx512f")]
 #[cfg_attr(test, assert_instr(vpandq))]
@@ -126,7 +210,7 @@ pub unsafe fn _mm512_abs_pd(v2: __m512d) -> __m512d {
 
 /// Finds the absolute value of each packed double-precision (64-bit) floating-point element in v2, storing the results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set).
 ///
-/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=512_mask_abs_pd&expand=61)
+/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm512_mask_abs_pd&expand=61)
 #[inline]
 #[target_feature(enable = "avx512f")]
 #[cfg_attr(test, assert_instr(vpandq))]
@@ -137,7 +221,7 @@ pub unsafe fn _mm512_mask_abs_pd(src: __m512d, k: __mmask8, v2: __m512d) -> __m5
 
 /// Move packed 32-bit integers from a to dst using writemask k (elements are copied from src when the corresponding mask bit is not set).
 ///
-/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=512_mask_mov_epi32&expand=3801)
+/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm512_mask_mov_epi32&expand=3801)
 #[inline]
 #[target_feature(enable = "avx512f")]
 #[cfg_attr(test, assert_instr(vmovdqa32))]
@@ -148,7 +232,7 @@ pub unsafe fn _mm512_mask_mov_epi32(src: __m512i, k: __mmask16, a: __m512i) -> _
 
 /// Move packed 32-bit integers from a into dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
 ///
-/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=512_maskz_mov_epi32&expand=3802)
+/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm512_maskz_mov_epi32&expand=3802)
 #[inline]
 #[target_feature(enable = "avx512f")]
 #[cfg_attr(test, assert_instr(vmovdqa32))]
@@ -160,7 +244,7 @@ pub unsafe fn _mm512_maskz_mov_epi32(k: __mmask16, a: __m512i) -> __m512i {
 
 /// Move packed 64-bit integers from a to dst using writemask k (elements are copied from src when the corresponding mask bit is not set).
 ///
-/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=512_mask_mov_epi64&expand=3807)
+/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm512_mask_mov_epi64&expand=3807)
 #[inline]
 #[target_feature(enable = "avx512f")]
 #[cfg_attr(test, assert_instr(vmovdqa64))]
@@ -229,7 +313,7 @@ pub unsafe fn _mm512_maskz_mov_pd(k: __mmask8, a: __m512d) -> __m512d {
 
 /// Add packed 32-bit integers in a and b, and store the results in dst.
 ///
-/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=512_add_epi32&expand=100)
+/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm512_add_epi32&expand=100)
 #[inline]
 #[target_feature(enable = "avx512f")]
 #[cfg_attr(test, assert_instr(vpaddd))]
@@ -239,7 +323,7 @@ pub unsafe fn _mm512_add_epi32(a: __m512i, b: __m512i) -> __m512i {
 
 /// Add packed 32-bit integers in a and b, and store the results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set).
 ///
-/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=512_mask_add_epi32&expand=101)
+/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm512_mask_add_epi32&expand=101)
 #[inline]
 #[target_feature(enable = "avx512f")]
 #[cfg_attr(test, assert_instr(vpaddd))]
@@ -250,7 +334,7 @@ pub unsafe fn _mm512_mask_add_epi32(src: __m512i, k: __mmask16, a: __m512i, b: _
 
 /// Add packed 32-bit integers in a and b, and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
 ///
-/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=512_maskz_add_epi32&expand=102)
+/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm512_maskz_add_epi32&expand=102)
 #[inline]
 #[target_feature(enable = "avx512f")]
 #[cfg_attr(test, assert_instr(vpaddd))]
@@ -260,9 +344,55 @@ pub unsafe fn _mm512_maskz_add_epi32(k: __mmask16, a: __m512i, b: __m512i) -> __
     transmute(simd_select_bitmask(k, add, zero))
 }
 
+/// Add packed 32-bit integers in a and b, and store the results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set).
+///
+/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm256_mask_add_epi32&expand=98)
+#[inline]
+#[target_feature(enable = "avx512f,avx512vl")]
+#[cfg_attr(test, assert_instr(vpaddd))]
+pub unsafe fn _mm256_mask_add_epi32(src: __m256i, k: __mmask8, a: __m256i, b: __m256i) -> __m256i {
+    let add = _mm256_add_epi32(a, b).as_i32x8();
+    transmute(simd_select_bitmask(k, add, src.as_i32x8()))
+}
+
+/// Add packed 32-bit integers in a and b, and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
+///
+/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm256_maskz_add_epi32&expand=99)
+#[inline]
+#[target_feature(enable = "avx512f,avx512vl")]
+#[cfg_attr(test, assert_instr(vpaddd))]
+pub unsafe fn _mm256_maskz_add_epi32(k: __mmask8, a: __m256i, b: __m256i) -> __m256i {
+    let add = _mm256_add_epi32(a, b).as_i32x8();
+    let zero = _mm256_setzero_si256().as_i32x8();
+    transmute(simd_select_bitmask(k, add, zero))
+}
+
+/// Add packed 32-bit integers in a and b, and store the results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set).
+///
+/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_mask_add_epi32&expand=95)
+#[inline]
+#[target_feature(enable = "avx512f,avx512vl")]
+#[cfg_attr(test, assert_instr(vpaddd))]
+pub unsafe fn _mm_mask_add_epi32(src: __m128i, k: __mmask8, a: __m128i, b: __m128i) -> __m128i {
+    let add = _mm_add_epi32(a, b).as_i32x4();
+    transmute(simd_select_bitmask(k, add, src.as_i32x4()))
+}
+
+/// Add packed 32-bit integers in a and b, and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
+///
+/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_maskz_add_epi32&expand=96)
+#[inline]
+#[target_feature(enable = "avx512f,avx512vl")]
+#[cfg_attr(test, assert_instr(vpaddd))]
+pub unsafe fn _mm_maskz_add_epi32(k: __mmask8, a: __m128i, b: __m128i) -> __m128i {
+    let add = _mm_add_epi32(a, b).as_i32x4();
+    let zero = _mm_setzero_si128().as_i32x4();
+    transmute(simd_select_bitmask(k, add, zero))
+}
+
 /// Add packed 64-bit integers in a and b, and store the results in dst.
 ///
-/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=512_add_epi64&expand=109)
+/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm512_add_epi64&expand=109)
 #[inline]
 #[target_feature(enable = "avx512f")]
 #[cfg_attr(test, assert_instr(vpaddq))]
@@ -272,7 +402,7 @@ pub unsafe fn _mm512_add_epi64(a: __m512i, b: __m512i) -> __m512i {
 
 /// Add packed 64-bit integers in a and b, and store the results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set).
 ///
-/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=512_mask_add_epi64&expand=110)
+/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm512_mask_add_epi64&expand=110)
 #[inline]
 #[target_feature(enable = "avx512f")]
 #[cfg_attr(test, assert_instr(vpaddq))]
@@ -283,7 +413,7 @@ pub unsafe fn _mm512_mask_add_epi64(src: __m512i, k: __mmask8, a: __m512i, b: __
 
 /// Add packed 64-bit integers in a and b, and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
 ///
-/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=512_maskz_add_epi64&expand=111)
+/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm512_maskz_add_epi64&expand=111)
 #[inline]
 #[target_feature(enable = "avx512f")]
 #[cfg_attr(test, assert_instr(vpaddq))]
@@ -293,9 +423,55 @@ pub unsafe fn _mm512_maskz_add_epi64(k: __mmask8, a: __m512i, b: __m512i) -> __m
     transmute(simd_select_bitmask(k, add, zero))
 }
 
+/// Add packed 64-bit integers in a and b, and store the results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set).
+///
+/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm256_mask_add_epi64&expand=107)
+#[inline]
+#[target_feature(enable = "avx512f,avx512vl")]
+#[cfg_attr(test, assert_instr(vpaddq))]
+pub unsafe fn _mm256_mask_add_epi64(src: __m256i, k: __mmask8, a: __m256i, b: __m256i) -> __m256i {
+    let add = _mm256_add_epi64(a, b).as_i64x4();
+    transmute(simd_select_bitmask(k, add, src.as_i64x4()))
+}
+
+/// Add packed 64-bit integers in a and b, and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
+///
+/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm256_maskz_add_epi64&expand=108)
+#[inline]
+#[target_feature(enable = "avx512f,avx512vl")]
+#[cfg_attr(test, assert_instr(vpaddq))]
+pub unsafe fn _mm256_maskz_add_epi64(k: __mmask8, a: __m256i, b: __m256i) -> __m256i {
+    let add = _mm256_add_epi64(a, b).as_i64x4();
+    let zero = _mm256_setzero_si256().as_i64x4();
+    transmute(simd_select_bitmask(k, add, zero))
+}
+
+/// Add packed 64-bit integers in a and b, and store the results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set).
+///
+/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_mask_add_epi64&expand=104)
+#[inline]
+#[target_feature(enable = "avx512f,avx512vl")]
+#[cfg_attr(test, assert_instr(vpaddq))]
+pub unsafe fn _mm_mask_add_epi64(src: __m128i, k: __mmask8, a: __m128i, b: __m128i) -> __m128i {
+    let add = _mm_add_epi64(a, b).as_i64x2();
+    transmute(simd_select_bitmask(k, add, src.as_i64x2()))
+}
+
+/// Add packed 64-bit integers in a and b, and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
+///
+/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_maskz_add_epi64&expand=105)
+#[inline]
+#[target_feature(enable = "avx512f,avx512vl")]
+#[cfg_attr(test, assert_instr(vpaddq))]
+pub unsafe fn _mm_maskz_add_epi64(k: __mmask8, a: __m128i, b: __m128i) -> __m128i {
+    let add = _mm_add_epi64(a, b).as_i64x2();
+    let zero = _mm_setzero_si128().as_i64x2();
+    transmute(simd_select_bitmask(k, add, zero))
+}
+
 /// Add packed single-precision (32-bit) floating-point elements in a and b, and store the results in dst.
 ///
-/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=512_add_ps&expand=139)
+/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm512_add_ps&expand=139)
 #[inline]
 #[target_feature(enable = "avx512f")]
 #[cfg_attr(test, assert_instr(vaddps))]
@@ -305,7 +481,7 @@ pub unsafe fn _mm512_add_ps(a: __m512, b: __m512) -> __m512 {
 
 /// Add packed single-precision (32-bit) floating-point elements in a and b, and store the results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set).
 ///
-/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=512_mask_add_ps&expand=140)
+/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm512_mask_add_ps&expand=140)
 #[inline]
 #[target_feature(enable = "avx512f")]
 #[cfg_attr(test, assert_instr(vaddps))]
@@ -316,7 +492,7 @@ pub unsafe fn _mm512_mask_add_ps(src: __m512, k: __mmask16, a: __m512, b: __m512
 
 /// Add packed single-precision (32-bit) floating-point elements in a and b, and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
 ///
-/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=512_maskz_add_ps&expand=141)
+/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm512_maskz_add_ps&expand=141)
 #[inline]
 #[target_feature(enable = "avx512f")]
 #[cfg_attr(test, assert_instr(vaddps))]
@@ -326,9 +502,55 @@ pub unsafe fn _mm512_maskz_add_ps(k: __mmask16, a: __m512, b: __m512) -> __m512
     transmute(simd_select_bitmask(k, add, zero))
 }
 
+/// Add packed single-precision (32-bit) floating-point elements in a and b, and store the results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set).
+///
+/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm256_mask_add_ps&expand=137)
+#[inline]
+#[target_feature(enable = "avx512f,avx512vl")]
+#[cfg_attr(test, assert_instr(vaddps))]
+pub unsafe fn _mm256_mask_add_ps(src: __m256, k: __mmask8, a: __m256, b: __m256) -> __m256 {
+    let add = _mm256_add_ps(a, b).as_f32x8();
+    transmute(simd_select_bitmask(k, add, src.as_f32x8()))
+}
+
+/// Add packed single-precision (32-bit) floating-point elements in a and b, and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
+///
+/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm256_maskz_add_ps&expand=138)
+#[inline]
+#[target_feature(enable = "avx512f,avx512vl")]
+#[cfg_attr(test, assert_instr(vaddps))]
+pub unsafe fn _mm256_maskz_add_ps(k: __mmask8, a: __m256, b: __m256) -> __m256 {
+    let add = _mm256_add_ps(a, b).as_f32x8();
+    let zero = _mm256_setzero_ps().as_f32x8();
+    transmute(simd_select_bitmask(k, add, zero))
+}
+
+/// Add packed single-precision (32-bit) floating-point elements in a and b, and store the results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set).
+///
+/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_mask_add_ps&expand=134)
+#[inline]
+#[target_feature(enable = "avx512f,avx512vl")]
+#[cfg_attr(test, assert_instr(vaddps))]
+pub unsafe fn _mm_mask_add_ps(src: __m128, k: __mmask8, a: __m128, b: __m128) -> __m128 {
+    let add = _mm_add_ps(a, b).as_f32x4();
+    transmute(simd_select_bitmask(k, add, src.as_f32x4()))
+}
+
+/// Add packed single-precision (32-bit) floating-point elements in a and b, and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
+///
+/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_maskz_add_ps&expand=135)
+#[inline]
+#[target_feature(enable = "avx512f,avx512vl")]
+#[cfg_attr(test, assert_instr(vaddps))]
+pub unsafe fn _mm_maskz_add_ps(k: __mmask8, a: __m128, b: __m128) -> __m128 {
+    let add = _mm_add_ps(a, b).as_f32x4();
+    let zero = _mm_setzero_ps().as_f32x4();
+    transmute(simd_select_bitmask(k, add, zero))
+}
+
 /// Add packed double-precision (64-bit) floating-point elements in a and b, and store the results in dst.
 ///
-/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=512_add_pd&expand=127)
+/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm512_add_pd&expand=127)
 #[inline]
 #[target_feature(enable = "avx512f")]
 #[cfg_attr(test, assert_instr(vaddpd))]
@@ -338,7 +560,7 @@ pub unsafe fn _mm512_add_pd(a: __m512d, b: __m512d) -> __m512d {
 
 /// Add packed double-precision (64-bit) floating-point elements in a and b, and store the results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set).
 ///
-/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=512_mask_add_pd&expand=128)
+/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm512_mask_add_pd&expand=128)
 #[inline]
 #[target_feature(enable = "avx512f")]
 #[cfg_attr(test, assert_instr(vaddpd))]
@@ -349,7 +571,7 @@ pub unsafe fn _mm512_mask_add_pd(src: __m512d, k: __mmask8, a: __m512d, b: __m51
 
 /// Add packed double-precision (64-bit) floating-point elements in a and b, and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
 ///
-/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=512_maskz_add_pd&expand=129)
+/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm512_maskz_add_pd&expand=129)
 #[inline]
 #[target_feature(enable = "avx512f")]
 #[cfg_attr(test, assert_instr(vaddpd))]
@@ -359,9 +581,55 @@ pub unsafe fn _mm512_maskz_add_pd(k: __mmask8, a: __m512d, b: __m512d) -> __m512
     transmute(simd_select_bitmask(k, add, zero))
 }
 
+/// Add packed double-precision (64-bit) floating-point elements in a and b, and store the results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set).
+///
+/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm256_mask_add_pd&expand=125)
+#[inline]
+#[target_feature(enable = "avx512f,avx512vl")]
+#[cfg_attr(test, assert_instr(vaddpd))]
+pub unsafe fn _mm256_mask_add_pd(src: __m256d, k: __mmask8, a: __m256d, b: __m256d) -> __m256d {
+    let add = _mm256_add_pd(a, b).as_f64x4();
+    transmute(simd_select_bitmask(k, add, src.as_f64x4()))
+}
+
+/// Add packed double-precision (64-bit) floating-point elements in a and b, and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
+///
+/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm256_maskz_add_pd&expand=126)
+#[inline]
+#[target_feature(enable = "avx512f,avx512vl")]
+#[cfg_attr(test, assert_instr(vaddpd))]
+pub unsafe fn _mm256_maskz_add_pd(k: __mmask8, a: __m256d, b: __m256d) -> __m256d {
+    let add = _mm256_add_pd(a, b).as_f64x4();
+    let zero = _mm256_setzero_pd().as_f64x4();
+    transmute(simd_select_bitmask(k, add, zero))
+}
+
+/// Add packed double-precision (64-bit) floating-point elements in a and b, and store the results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set).
+///
+/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_mask_add_pd&expand=122)
+#[inline]
+#[target_feature(enable = "avx512f,avx512vl")]
+#[cfg_attr(test, assert_instr(vaddpd))]
+pub unsafe fn _mm_mask_add_pd(src: __m128d, k: __mmask8, a: __m128d, b: __m128d) -> __m128d {
+    let add = _mm_add_pd(a, b).as_f64x2();
+    transmute(simd_select_bitmask(k, add, src.as_f64x2()))
+}
+
+/// Add packed double-precision (64-bit) floating-point elements in a and b, and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
+///
+/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_maskz_add_pd&expand=123)
+#[inline]
+#[target_feature(enable = "avx512f,avx512vl")]
+#[cfg_attr(test, assert_instr(vaddpd))]
+pub unsafe fn _mm_maskz_add_pd(k: __mmask8, a: __m128d, b: __m128d) -> __m128d {
+    let add = _mm_add_pd(a, b).as_f64x2();
+    let zero = _mm_setzero_pd().as_f64x2();
+    transmute(simd_select_bitmask(k, add, zero))
+}
+
 /// Subtract packed 32-bit integers in b from packed 32-bit integers in a, and store the results in dst.
 ///
-/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=512_sub_epi32&expand=5694)
+/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm512_sub_epi32&expand=5694)
 #[inline]
 #[target_feature(enable = "avx512f")]
 #[cfg_attr(test, assert_instr(vpsubd))]
@@ -371,7 +639,7 @@ pub unsafe fn _mm512_sub_epi32(a: __m512i, b: __m512i) -> __m512i {
 
 /// Subtract packed 32-bit integers in b from packed 32-bit integers in a, and store the results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set).
 ///
-/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=512_mask_sub_epi32&expand=5692)
+/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm512_mask_sub_epi32&expand=5692)
 #[inline]
 #[target_feature(enable = "avx512f")]
 #[cfg_attr(test, assert_instr(vpsubd))]
@@ -382,7 +650,7 @@ pub unsafe fn _mm512_mask_sub_epi32(src: __m512i, k: __mmask16, a: __m512i, b: _
 
 /// Subtract packed 32-bit integers in b from packed 32-bit integers in a, and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
 ///
-/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=512_maskz_sub_epi32&expand=5693)
+/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm512_maskz_sub_epi32&expand=5693)
 #[inline]
 #[target_feature(enable = "avx512f")]
 #[cfg_attr(test, assert_instr(vpsubd))]
@@ -392,9 +660,55 @@ pub unsafe fn _mm512_maskz_sub_epi32(k: __mmask16, a: __m512i, b: __m512i) -> __
     transmute(simd_select_bitmask(k, sub, zero))
 }
 
+/// Subtract packed 32-bit integers in b from packed 32-bit integers in a, and store the results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set).
+///
+/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm256_mask_sub_epi32&expand=5689)
+#[inline]
+#[target_feature(enable = "avx512f,avx512vl")]
+#[cfg_attr(test, assert_instr(vpsubd))]
+pub unsafe fn _mm256_mask_sub_epi32(src: __m256i, k: __mmask8, a: __m256i, b: __m256i) -> __m256i {
+    let sub = _mm256_sub_epi32(a, b).as_i32x8();
+    transmute(simd_select_bitmask(k, sub, src.as_i32x8()))
+}
+
+/// Subtract packed 32-bit integers in b from packed 32-bit integers in a, and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
+///
+/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm256_maskz_sub_epi32&expand=5690)
+#[inline]
+#[target_feature(enable = "avx512f,avx512vl")]
+#[cfg_attr(test, assert_instr(vpsubd))]
+pub unsafe fn _mm256_maskz_sub_epi32(k: __mmask8, a: __m256i, b: __m256i) -> __m256i {
+    let sub = _mm256_sub_epi32(a, b).as_i32x8();
+    let zero = _mm256_setzero_si256().as_i32x8();
+    transmute(simd_select_bitmask(k, sub, zero))
+}
+
+/// Subtract packed 32-bit integers in b from packed 32-bit integers in a, and store the results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set).
+///
+/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_mask_sub_epi32&expand=5686)
+#[inline]
+#[target_feature(enable = "avx512f,avx512vl")]
+#[cfg_attr(test, assert_instr(vpsubd))]
+pub unsafe fn _mm_mask_sub_epi32(src: __m128i, k: __mmask8, a: __m128i, b: __m128i) -> __m128i {
+    let sub = _mm_sub_epi32(a, b).as_i32x4();
+    transmute(simd_select_bitmask(k, sub, src.as_i32x4()))
+}
+
+/// Subtract packed 32-bit integers in b from packed 32-bit integers in a, and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
+///
+/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_maskz_sub_epi32&expand=5687)
+#[inline]
+#[target_feature(enable = "avx512f,avx512vl")]
+#[cfg_attr(test, assert_instr(vpsubd))]
+pub unsafe fn _mm_maskz_sub_epi32(k: __mmask8, a: __m128i, b: __m128i) -> __m128i {
+    let sub = _mm_sub_epi32(a, b).as_i32x4();
+    let zero = _mm_setzero_si128().as_i32x4();
+    transmute(simd_select_bitmask(k, sub, zero))
+}
+
 /// Subtract packed 64-bit integers in b from packed 64-bit integers in a, and store the results in dst.
 ///
-/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=512_sub_epi64&expand=5703)
+/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm512_sub_epi64&expand=5703)
 #[inline]
 #[target_feature(enable = "avx512f")]
 #[cfg_attr(test, assert_instr(vpsubq))]
@@ -404,7 +718,7 @@ pub unsafe fn _mm512_sub_epi64(a: __m512i, b: __m512i) -> __m512i {
 
 /// Subtract packed 64-bit integers in b from packed 64-bit integers in a, and store the results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set).
 ///
-/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=512_mask_sub_epi64&expand=5701)
+/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm512_mask_sub_epi64&expand=5701)
 #[inline]
 #[target_feature(enable = "avx512f")]
 #[cfg_attr(test, assert_instr(vpsubq))]
@@ -415,19 +729,65 @@ pub unsafe fn _mm512_mask_sub_epi64(src: __m512i, k: __mmask8, a: __m512i, b: __
 
 /// Subtract packed 64-bit integers in b from packed 64-bit integers in a, and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
 ///
-/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=512_maskz_sub_epi64&expand=5702)
+/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm512_maskz_sub_epi64&expand=5702)
 #[inline]
 #[target_feature(enable = "avx512f")]
 #[cfg_attr(test, assert_instr(vpsubq))]
 pub unsafe fn _mm512_maskz_sub_epi64(k: __mmask8, a: __m512i, b: __m512i) -> __m512i {
-    let add = _mm512_sub_epi64(a, b).as_i64x8();
+    let sub = _mm512_sub_epi64(a, b).as_i64x8();
     let zero = _mm512_setzero_si512().as_i64x8();
-    transmute(simd_select_bitmask(k, add, zero))
+    transmute(simd_select_bitmask(k, sub, zero))
+}
+
+/// Subtract packed 64-bit integers in b from packed 64-bit integers in a, and store the results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set).
+///
+/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm256_mask_sub_epi64&expand=5698)
+#[inline]
+#[target_feature(enable = "avx512f,avx512vl")]
+#[cfg_attr(test, assert_instr(vpsubq))]
+pub unsafe fn _mm256_mask_sub_epi64(src: __m256i, k: __mmask8, a: __m256i, b: __m256i) -> __m256i {
+    let sub = _mm256_sub_epi64(a, b).as_i64x4();
+    transmute(simd_select_bitmask(k, sub, src.as_i64x4()))
+}
+
+/// Subtract packed 64-bit integers in b from packed 64-bit integers in a, and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
+///
+/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm256_maskz_sub_epi64&expand=5699)
+#[inline]
+#[target_feature(enable = "avx512f,avx512vl")]
+#[cfg_attr(test, assert_instr(vpsubq))]
+pub unsafe fn _mm256_maskz_sub_epi64(k: __mmask8, a: __m256i, b: __m256i) -> __m256i {
+    let sub = _mm256_sub_epi64(a, b).as_i64x4();
+    let zero = _mm256_setzero_si256().as_i64x4();
+    transmute(simd_select_bitmask(k, sub, zero))
+}
+
+/// Subtract packed 64-bit integers in b from packed 64-bit integers in a, and store the results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set).
+///
+/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_mask_sub_epi64&expand=5695)
+#[inline]
+#[target_feature(enable = "avx512f,avx512vl")]
+#[cfg_attr(test, assert_instr(vpsubq))]
+pub unsafe fn _mm_mask_sub_epi64(src: __m128i, k: __mmask8, a: __m128i, b: __m128i) -> __m128i {
+    let sub = _mm_sub_epi64(a, b).as_i64x2();
+    transmute(simd_select_bitmask(k, sub, src.as_i64x2()))
+}
+
+/// Subtract packed 64-bit integers in b from packed 64-bit integers in a, and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
+///
+/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_maskz_sub_epi64&expand=5696)
+#[inline]
+#[target_feature(enable = "avx512f,avx512vl")]
+#[cfg_attr(test, assert_instr(vpsubq))]
+pub unsafe fn _mm_maskz_sub_epi64(k: __mmask8, a: __m128i, b: __m128i) -> __m128i {
+    let sub = _mm_sub_epi64(a, b).as_i64x2();
+    let zero = _mm_setzero_si128().as_i64x2();
+    transmute(simd_select_bitmask(k, sub, zero))
 }
 
 /// Subtract packed single-precision (32-bit) floating-point elements in b from packed single-precision (32-bit) floating-point elements in a, and store the results in dst.
 ///
-/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=512_sub_ps&expand=5733)
+/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm512_sub_ps&expand=5733)
 #[inline]
 #[target_feature(enable = "avx512f")]
 #[cfg_attr(test, assert_instr(vsubps))]
@@ -437,7 +797,7 @@ pub unsafe fn _mm512_sub_ps(a: __m512, b: __m512) -> __m512 {
 
 /// Subtract packed single-precision (32-bit) floating-point elements in b from packed single-precision (32-bit) floating-point elements in a, and store the results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set).
 ///
-/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=512_mask_sub_ps&expand=5731)
+/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm512_mask_sub_ps&expand=5731)
 #[inline]
 #[target_feature(enable = "avx512f")]
 #[cfg_attr(test, assert_instr(vsubps))]
@@ -448,7 +808,7 @@ pub unsafe fn _mm512_mask_sub_ps(src: __m512, k: __mmask16, a: __m512, b: __m512
 
 /// Subtract packed single-precision (32-bit) floating-point elements in b from packed single-precision (32-bit) floating-point elements in a, and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
 ///
-/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=512_maskz_sub_ps&expand=5732)
+/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm512_maskz_sub_ps&expand=5732)
 #[inline]
 #[target_feature(enable = "avx512f")]
 #[cfg_attr(test, assert_instr(vsubps))]
@@ -458,9 +818,55 @@ pub unsafe fn _mm512_maskz_sub_ps(k: __mmask16, a: __m512, b: __m512) -> __m512
     transmute(simd_select_bitmask(k, sub, zero))
 }
 
+/// Subtract packed single-precision (32-bit) floating-point elements in b from packed single-precision (32-bit) floating-point elements in a, and store the results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set).
+///
+/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm256_mask_sub_ps&expand=5728)
+#[inline]
+#[target_feature(enable = "avx512f,avx512vl")]
+#[cfg_attr(test, assert_instr(vsubps))]
+pub unsafe fn _mm256_mask_sub_ps(src: __m256, k: __mmask8, a: __m256, b: __m256) -> __m256 {
+    let sub = _mm256_sub_ps(a, b).as_f32x8();
+    transmute(simd_select_bitmask(k, sub, src.as_f32x8()))
+}
+
+/// Subtract packed single-precision (32-bit) floating-point elements in b from packed single-precision (32-bit) floating-point elements in a, and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
+///
+/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm256_maskz_sub_ps&expand=5729)
+#[inline]
+#[target_feature(enable = "avx512f,avx512vl")]
+#[cfg_attr(test, assert_instr(vsubps))]
+pub unsafe fn _mm256_maskz_sub_ps(k: __mmask8, a: __m256, b: __m256) -> __m256 {
+    let sub = _mm256_sub_ps(a, b).as_f32x8();
+    let zero = _mm256_setzero_ps().as_f32x8();
+    transmute(simd_select_bitmask(k, sub, zero))
+}
+
+/// Subtract packed single-precision (32-bit) floating-point elements in b from packed single-precision (32-bit) floating-point elements in a, and store the results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set).
+///
+/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_mask_sub_ps&expand=5725)
+#[inline]
+#[target_feature(enable = "avx512f,avx512vl")]
+#[cfg_attr(test, assert_instr(vsubps))]
+pub unsafe fn _mm_mask_sub_ps(src: __m128, k: __mmask8, a: __m128, b: __m128) -> __m128 {
+    let sub = _mm_sub_ps(a, b).as_f32x4();
+    transmute(simd_select_bitmask(k, sub, src.as_f32x4()))
+}
+
+/// Subtract packed single-precision (32-bit) floating-point elements in b from packed single-precision (32-bit) floating-point elements in a, and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
+///
+/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_maskz_sub_ps&expand=5726)
+#[inline]
+#[target_feature(enable = "avx512f,avx512vl")]
+#[cfg_attr(test, assert_instr(vsubps))]
+pub unsafe fn _mm_maskz_sub_ps(k: __mmask8, a: __m128, b: __m128) -> __m128 {
+    let sub = _mm_sub_ps(a, b).as_f32x4();
+    let zero = _mm_setzero_ps().as_f32x4();
+    transmute(simd_select_bitmask(k, sub, zero))
+}
+
 /// Subtract packed double-precision (64-bit) floating-point elements in b from packed double-precision (64-bit) floating-point elements in a, and store the results in dst.
 ///
-/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=512_sub_pd&expand=5721)
+/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm512_sub_pd&expand=5721)
 #[inline]
 #[target_feature(enable = "avx512f")]
 #[cfg_attr(test, assert_instr(vsubpd))]
@@ -470,7 +876,7 @@ pub unsafe fn _mm512_sub_pd(a: __m512d, b: __m512d) -> __m512d {
 
 /// Subtract packed double-precision (64-bit) floating-point elements in b from packed double-precision (64-bit) floating-point elements in a, and store the results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set).
 ///
-/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=512_mask_sub_pd&expand=5719)
+/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm512_mask_sub_pd&expand=5719)
 #[inline]
 #[target_feature(enable = "avx512f")]
 #[cfg_attr(test, assert_instr(vsubpd))]
@@ -481,7 +887,7 @@ pub unsafe fn _mm512_mask_sub_pd(src: __m512d, k: __mmask8, a: __m512d, b: __m51
 
 /// Subtract packed double-precision (64-bit) floating-point elements in b from packed double-precision (64-bit) floating-point elements in a, and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
 ///
-/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=512_maskz_sub_pd&expand=5720)
+/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm512_maskz_sub_pd&expand=5720)
 #[inline]
 #[target_feature(enable = "avx512f")]
 #[cfg_attr(test, assert_instr(vsubpd))]
@@ -491,9 +897,55 @@ pub unsafe fn _mm512_maskz_sub_pd(k: __mmask8, a: __m512d, b: __m512d) -> __m512
     transmute(simd_select_bitmask(k, sub, zero))
 }
 
+/// Subtract packed double-precision (64-bit) floating-point elements in b from packed double-precision (64-bit) floating-point elements in a, and store the results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set).
+///
+/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm256_mask_sub_pd&expand=5716)
+#[inline]
+#[target_feature(enable = "avx512f,avx512vl")]
+#[cfg_attr(test, assert_instr(vsubpd))]
+pub unsafe fn _mm256_mask_sub_pd(src: __m256d, k: __mmask8, a: __m256d, b: __m256d) -> __m256d {
+    let sub = _mm256_sub_pd(a, b).as_f64x4();
+    transmute(simd_select_bitmask(k, sub, src.as_f64x4()))
+}
+
+/// Subtract packed double-precision (64-bit) floating-point elements in b from packed double-precision (64-bit) floating-point elements in a, and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
+///
+/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm256_maskz_sub_pd&expand=5717)
+#[inline]
+#[target_feature(enable = "avx512f,avx512vl")]
+#[cfg_attr(test, assert_instr(vsubpd))]
+pub unsafe fn _mm256_maskz_sub_pd(k: __mmask8, a: __m256d, b: __m256d) -> __m256d {
+    let sub = _mm256_sub_pd(a, b).as_f64x4();
+    let zero = _mm256_setzero_pd().as_f64x4();
+    transmute(simd_select_bitmask(k, sub, zero))
+}
+
+/// Subtract packed double-precision (64-bit) floating-point elements in b from packed double-precision (64-bit) floating-point elements in a, and store the results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set).
+///
+/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_mask_sub_pd&expand=5713)
+#[inline]
+#[target_feature(enable = "avx512f,avx512vl")]
+#[cfg_attr(test, assert_instr(vsubpd))]
+pub unsafe fn _mm_mask_sub_pd(src: __m128d, k: __mmask8, a: __m128d, b: __m128d) -> __m128d {
+    let sub = _mm_sub_pd(a, b).as_f64x2();
+    transmute(simd_select_bitmask(k, sub, src.as_f64x2()))
+}
+
+/// Subtract packed double-precision (64-bit) floating-point elements in b from packed double-precision (64-bit) floating-point elements in a, and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
+///
+/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_maskz_sub_pd&expand=5714)
+#[inline]
+#[target_feature(enable = "avx512f,avx512vl")]
+#[cfg_attr(test, assert_instr(vsubpd))]
+pub unsafe fn _mm_maskz_sub_pd(k: __mmask8, a: __m128d, b: __m128d) -> __m128d {
+    let sub = _mm_sub_pd(a, b).as_f64x2();
+    let zero = _mm_setzero_pd().as_f64x2();
+    transmute(simd_select_bitmask(k, sub, zero))
+}
+
 /// Multiply the low signed 32-bit integers from each packed 64-bit element in a and b, and store the signed 64-bit results in dst.
 ///
-/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=512_mul_epi32&expand=3907)
+/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm512_mul_epi32&expand=3907)
 #[inline]
 #[target_feature(enable = "avx512f")]
 #[cfg_attr(test, assert_instr(vpmuldq))]
@@ -503,7 +955,7 @@ pub unsafe fn _mm512_mul_epi32(a: __m512i, b: __m512i) -> __m512i {
 
 /// Multiply the low signed 32-bit integers from each packed 64-bit element in a and b, and store the signed 64-bit results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set).
 ///
-/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=512_mask_mul_epi32&expand=3905)
+/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm512_mask_mul_epi32&expand=3905)
 #[inline]
 #[target_feature(enable = "avx512f")]
 #[cfg_attr(test, assert_instr(vpmuldq))]
@@ -514,7 +966,7 @@ pub unsafe fn _mm512_mask_mul_epi32(src: __m512i, k: __mmask8, a: __m512i, b: __
 
 /// Multiply the low signed 32-bit integers from each packed 64-bit element in a and b, and store the signed 64-bit results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
 ///
-/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=512_maskz_mul_epi32&expand=3906)
+/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm512_maskz_mul_epi32&expand=3906)
 #[inline]
 #[target_feature(enable = "avx512f")]
 #[cfg_attr(test, assert_instr(vpmuldq))]
@@ -524,9 +976,55 @@ pub unsafe fn _mm512_maskz_mul_epi32(k: __mmask8, a: __m512i, b: __m512i) -> __m
     transmute(simd_select_bitmask(k, mul, zero))
 }
 
+/// Multiply the low signed 32-bit integers from each packed 64-bit element in a and b, and store the signed 64-bit results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set).
+///
+/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm256_mask_mul_epi32&expand=3902)
+#[inline]
+#[target_feature(enable = "avx512f,avx512vl")]
+#[cfg_attr(test, assert_instr(vpmuldq))]
+pub unsafe fn _mm256_mask_mul_epi32(src: __m256i, k: __mmask8, a: __m256i, b: __m256i) -> __m256i {
+    let mul = _mm256_mul_epi32(a, b).as_i64x4();
+    transmute(simd_select_bitmask(k, mul, src.as_i64x4()))
+}
+
+/// Multiply the low signed 32-bit integers from each packed 64-bit element in a and b, and store the signed 64-bit results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
+///
+/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm256_maskz_mul_epi32&expand=3903)
+#[inline]
+#[target_feature(enable = "avx512f,avx512vl")]
+#[cfg_attr(test, assert_instr(vpmuldq))]
+pub unsafe fn _mm256_maskz_mul_epi32(k: __mmask8, a: __m256i, b: __m256i) -> __m256i {
+    let mul = _mm256_mul_epi32(a, b).as_i64x4();
+    let zero = _mm256_setzero_si256().as_i64x4();
+    transmute(simd_select_bitmask(k, mul, zero))
+}
+
+/// Multiply the low signed 32-bit integers from each packed 64-bit element in a and b, and store the signed 64-bit results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set).
+///
+/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_mask_mul_epi32&expand=3899)
+#[inline]
+#[target_feature(enable = "avx512f,avx512vl")]
+#[cfg_attr(test, assert_instr(vpmuldq))]
+pub unsafe fn _mm_mask_mul_epi32(src: __m128i, k: __mmask8, a: __m128i, b: __m128i) -> __m128i {
+    let mul = _mm_mul_epi32(a, b).as_i64x2();
+    transmute(simd_select_bitmask(k, mul, src.as_i64x2()))
+}
+
+/// Multiply the low signed 32-bit integers from each packed 64-bit element in a and b, and store the signed 64-bit results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
+///
+/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_maskz_mul_epi32&expand=3900)
+#[inline]
+#[target_feature(enable = "avx512f,avx512vl")]
+#[cfg_attr(test, assert_instr(vpmuldq))]
+pub unsafe fn _mm_maskz_mul_epi32(k: __mmask8, a: __m128i, b: __m128i) -> __m128i {
+    let mul = _mm_mul_epi32(a, b).as_i64x2();
+    let zero = _mm_setzero_si128().as_i64x2();
+    transmute(simd_select_bitmask(k, mul, zero))
+}
+
 /// Multiply the packed 32-bit integers in a and b, producing intermediate 64-bit integers, and store the low 32 bits of the intermediate integers in dst.
 ///
-/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=512_mullo_epi&expand=4005)
+/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm512_mullo_epi&expand=4005)
 #[inline]
 #[target_feature(enable = "avx512f")]
 #[cfg_attr(test, assert_instr(vpmulld))]
@@ -536,7 +1034,7 @@ pub unsafe fn _mm512_mullo_epi32(a: __m512i, b: __m512i) -> __m512i {
 
 /// Multiply the packed 32-bit integers in a and b, producing intermediate 64-bit integers, and store the low 32 bits of the intermediate integers in dst using writemask k (elements are copied from src when the corresponding mask bit is not set).
 ///
-/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=512_mask_mullo_epi32&expand=4003)
+/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm512_mask_mullo_epi32&expand=4003)
 #[inline]
 #[target_feature(enable = "avx512f")]
 #[cfg_attr(test, assert_instr(vpmulld))]
@@ -552,7 +1050,7 @@ pub unsafe fn _mm512_mask_mullo_epi32(
 
 /// Multiply the packed 32-bit integers in a and b, producing intermediate 64-bit integers, and store the low 32 bits of the intermediate integers in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
 ///
-/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=512_maskz_mullo_epi32&expand=4004)
+/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm512_maskz_mullo_epi32&expand=4004)
 #[inline]
 #[target_feature(enable = "avx512f")]
 #[cfg_attr(test, assert_instr(vpmulld))]
@@ -562,6 +1060,57 @@ pub unsafe fn _mm512_maskz_mullo_epi32(k: __mmask16, a: __m512i, b: __m512i) ->
     transmute(simd_select_bitmask(k, mul, zero))
 }
 
+/// Multiply the packed 32-bit integers in a and b, producing intermediate 64-bit integers, and store the low 32 bits of the intermediate integers in dst using writemask k (elements are copied from src when the corresponding mask bit is not set).
+///
+/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm256_mask_mullo_epi32&expand=4000)
+#[inline]
+#[target_feature(enable = "avx512f,avx512vl")]
+#[cfg_attr(test, assert_instr(vpmulld))]
+pub unsafe fn _mm256_mask_mullo_epi32(
+    src: __m256i,
+    k: __mmask8,
+    a: __m256i,
+    b: __m256i,
+) -> __m256i {
+    let mul = _mm256_mullo_epi32(a, b).as_i32x8();
+    transmute(simd_select_bitmask(k, mul, src.as_i32x8()))
+}
+
+/// Multiply the packed 32-bit integers in a and b, producing intermediate 64-bit integers, and store the low 32 bits of the intermediate integers in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
+///
+/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm256_maskz_mullo_epi32&expand=4001)
+#[inline]
+#[target_feature(enable = "avx512f,avx512vl")]
+#[cfg_attr(test, assert_instr(vpmulld))]
+pub unsafe fn _mm256_maskz_mullo_epi32(k: __mmask8, a: __m256i, b: __m256i) -> __m256i {
+    let mul = _mm256_mullo_epi32(a, b).as_i32x8();
+    let zero = _mm256_setzero_si256().as_i32x8();
+    transmute(simd_select_bitmask(k, mul, zero))
+}
+
+/// Multiply the packed 32-bit integers in a and b, producing intermediate 64-bit integers, and store the low 32 bits of the intermediate integers in dst using writemask k (elements are copied from src when the corresponding mask bit is not set).
+///
+/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_mask_mullo_epi32&expand=3997)
+#[inline]
+#[target_feature(enable = "avx512f,avx512vl")]
+#[cfg_attr(test, assert_instr(vpmulld))]
+pub unsafe fn _mm_mask_mullo_epi32(src: __m128i, k: __mmask8, a: __m128i, b: __m128i) -> __m128i {
+    let mul = _mm_mullo_epi32(a, b).as_i32x4();
+    transmute(simd_select_bitmask(k, mul, src.as_i32x4()))
+}
+
+/// Multiply the packed 32-bit integers in a and b, producing intermediate 64-bit integers, and store the low 32 bits of the intermediate integers in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
+///
+/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_maskz_mullo_epi32&expand=3998)
+#[inline]
+#[target_feature(enable = "avx512f,avx512vl")]
+#[cfg_attr(test, assert_instr(vpmulld))]
+pub unsafe fn _mm_maskz_mullo_epi32(k: __mmask8, a: __m128i, b: __m128i) -> __m128i {
+    let mul = _mm_mullo_epi32(a, b).as_i32x4();
+    let zero = _mm_setzero_si128().as_i32x4();
+    transmute(simd_select_bitmask(k, mul, zero))
+}
+
 /// Multiplies elements in packed 64-bit integer vectors a and b together, storing the lower 64 bits of the result in dst.
 ///
 /// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=512_mullox_epi64&expand=4017)
@@ -623,9 +1172,55 @@ pub unsafe fn _mm512_maskz_mul_epu32(k: __mmask8, a: __m512i, b: __m512i) -> __m
     transmute(simd_select_bitmask(k, mul, zero))
 }
 
+/// Multiply the low unsigned 32-bit integers from each packed 64-bit element in a and b, and store the unsigned 64-bit results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set).
+///
+/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm256_mask_mul_epu32&expand=3911)
+#[inline]
+#[target_feature(enable = "avx512f,avx512vl")]
+#[cfg_attr(test, assert_instr(vpmuludq))]
+pub unsafe fn _mm256_mask_mul_epu32(src: __m256i, k: __mmask8, a: __m256i, b: __m256i) -> __m256i {
+    let mul = _mm256_mul_epu32(a, b).as_u64x4();
+    transmute(simd_select_bitmask(k, mul, src.as_u64x4()))
+}
+
+/// Multiply the low unsigned 32-bit integers from each packed 64-bit element in a and b, and store the unsigned 64-bit results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
+///
+/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm256_maskz_mul_epu32&expand=3912)
+#[inline]
+#[target_feature(enable = "avx512f,avx512vl")]
+#[cfg_attr(test, assert_instr(vpmuludq))]
+pub unsafe fn _mm256_maskz_mul_epu32(k: __mmask8, a: __m256i, b: __m256i) -> __m256i {
+    let mul = _mm256_mul_epu32(a, b).as_u64x4();
+    let zero = _mm256_setzero_si256().as_u64x4();
+    transmute(simd_select_bitmask(k, mul, zero))
+}
+
+/// Multiply the low unsigned 32-bit integers from each packed 64-bit element in a and b, and store the unsigned 64-bit results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set).
+///
+/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_mask_mul_epu32&expand=3908)
+#[inline]
+#[target_feature(enable = "avx512f,avx512vl")]
+#[cfg_attr(test, assert_instr(vpmuludq))]
+pub unsafe fn _mm_mask_mul_epu32(src: __m128i, k: __mmask8, a: __m128i, b: __m128i) -> __m128i {
+    let mul = _mm_mul_epu32(a, b).as_u64x2();
+    transmute(simd_select_bitmask(k, mul, src.as_u64x2()))
+}
+
+/// Multiply the low unsigned 32-bit integers from each packed 64-bit element in a and b, and store the unsigned 64-bit results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
+///
+/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_maskz_mul_epu32&expand=3909)
+#[inline]
+#[target_feature(enable = "avx512f,avx512vl")]
+#[cfg_attr(test, assert_instr(vpmuludq))]
+pub unsafe fn _mm_maskz_mul_epu32(k: __mmask8, a: __m128i, b: __m128i) -> __m128i {
+    let mul = _mm_mul_epu32(a, b).as_u64x2();
+    let zero = _mm_setzero_si128().as_u64x2();
+    transmute(simd_select_bitmask(k, mul, zero))
+}
+
 /// Multiply packed single-precision (32-bit) floating-point elements in a and b, and store the results in dst.
 ///
-/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=mm512_mul_ps&expand=3934)
+/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm512_mul_ps&expand=3934)
 #[inline]
 #[target_feature(enable = "avx512f")]
 #[cfg_attr(test, assert_instr(vmulps))]
@@ -633,9 +1228,9 @@ pub unsafe fn _mm512_mul_ps(a: __m512, b: __m512) -> __m512 {
     transmute(simd_mul(a.as_f32x16(), b.as_f32x16()))
 }
 
-/// Multiply packed single-precision (32-bit) floating-point elements in a and b, and store the results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set). RM.
+/// Multiply packed single-precision (32-bit) floating-point elements in a and b, and store the results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set).
 ///
-/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=512_mask_mul_ps&expand=3932)
+/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm512_mask_mul_ps&expand=3932)
 #[inline]
 #[target_feature(enable = "avx512f")]
 #[cfg_attr(test, assert_instr(vmulps))]
@@ -646,7 +1241,7 @@ pub unsafe fn _mm512_mask_mul_ps(src: __m512, k: __mmask16, a: __m512, b: __m512
 
 /// Multiply packed single-precision (32-bit) floating-point elements in a and b, and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
 ///
-/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=512_maskz_mul_ps&expand=3933)
+/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm512_maskz_mul_ps&expand=3933)
 #[inline]
 #[target_feature(enable = "avx512f")]
 #[cfg_attr(test, assert_instr(vmulps))]
@@ -656,9 +1251,55 @@ pub unsafe fn _mm512_maskz_mul_ps(k: __mmask16, a: __m512, b: __m512) -> __m512
     transmute(simd_select_bitmask(k, mul, zero))
 }
 
+/// Multiply packed single-precision (32-bit) floating-point elements in a and b, and store the results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set).
+///
+/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm256_mask_mul_ps&expand=3929)
+#[inline]
+#[target_feature(enable = "avx512f,avx512vl")]
+#[cfg_attr(test, assert_instr(vmulps))]
+pub unsafe fn _mm256_mask_mul_ps(src: __m256, k: __mmask8, a: __m256, b: __m256) -> __m256 {
+    let mul = _mm256_mul_ps(a, b).as_f32x8();
+    transmute(simd_select_bitmask(k, mul, src.as_f32x8()))
+}
+
+/// Multiply packed single-precision (32-bit) floating-point elements in a and b, and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
+///
+/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm256_maskz_mul_ps&expand=3930)
+#[inline]
+#[target_feature(enable = "avx512f,avx512vl")]
+#[cfg_attr(test, assert_instr(vmulps))]
+pub unsafe fn _mm256_maskz_mul_ps(k: __mmask8, a: __m256, b: __m256) -> __m256 {
+    let mul = _mm256_mul_ps(a, b).as_f32x8();
+    let zero = _mm256_setzero_ps().as_f32x8();
+    transmute(simd_select_bitmask(k, mul, zero))
+}
+
+/// Multiply packed single-precision (32-bit) floating-point elements in a and b, and store the results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set).
+///
+/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_mask_mul_ps&expand=3926)
+#[inline]
+#[target_feature(enable = "avx512f,avx512vl")]
+#[cfg_attr(test, assert_instr(vmulps))]
+pub unsafe fn _mm_mask_mul_ps(src: __m128, k: __mmask8, a: __m128, b: __m128) -> __m128 {
+    let mul = _mm_mul_ps(a, b).as_f32x4();
+    transmute(simd_select_bitmask(k, mul, src.as_f32x4()))
+}
+
+/// Multiply packed single-precision (32-bit) floating-point elements in a and b, and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
+///
+/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_maskz_mul_ps&expand=3927)
+#[inline]
+#[target_feature(enable = "avx512f,avx512vl")]
+#[cfg_attr(test, assert_instr(vmulps))]
+pub unsafe fn _mm_maskz_mul_ps(k: __mmask8, a: __m128, b: __m128) -> __m128 {
+    let mul = _mm_mul_ps(a, b).as_f32x4();
+    let zero = _mm_setzero_ps().as_f32x4();
+    transmute(simd_select_bitmask(k, mul, zero))
+}
+
 /// Multiply packed double-precision (64-bit) floating-point elements in a and b, and store the results in dst.
 ///
-/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=512_mul_pd&expand=3925)
+/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm512_mul_pd&expand=3925)
 #[inline]
 #[target_feature(enable = "avx512f")]
 #[cfg_attr(test, assert_instr(vmulpd))]
@@ -666,9 +1307,9 @@ pub unsafe fn _mm512_mul_pd(a: __m512d, b: __m512d) -> __m512d {
     transmute(simd_mul(a.as_f64x8(), b.as_f64x8()))
 }
 
-/// Multiply packed double-precision (64-bit) floating-point elements in a and b, and store the results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set). RM.
+/// Multiply packed double-precision (64-bit) floating-point elements in a and b, and store the results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set).
 ///
-/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=512_mask_mul_pd&expand=3923)
+/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm512_mask_mul_pd&expand=3923)
 #[inline]
 #[target_feature(enable = "avx512f")]
 #[cfg_attr(test, assert_instr(vmulpd))]
@@ -679,7 +1320,7 @@ pub unsafe fn _mm512_mask_mul_pd(src: __m512d, k: __mmask8, a: __m512d, b: __m51
 
 /// Multiply packed double-precision (64-bit) floating-point elements in a and b, and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
 ///
-/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=512_maskz_mul_pd&expand=3924)
+/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm512_maskz_mul_pd&expand=3924)
 #[inline]
 #[target_feature(enable = "avx512f")]
 #[cfg_attr(test, assert_instr(vmulpd))]
@@ -689,9 +1330,55 @@ pub unsafe fn _mm512_maskz_mul_pd(k: __mmask8, a: __m512d, b: __m512d) -> __m512
     transmute(simd_select_bitmask(k, mul, zero))
 }
 
+/// Multiply packed double-precision (64-bit) floating-point elements in a and b, and store the results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set).
+///
+/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm256_mask_mul_pd&expand=3920)
+#[inline]
+#[target_feature(enable = "avx512f,avx512vl")]
+#[cfg_attr(test, assert_instr(vmulpd))]
+pub unsafe fn _mm256_mask_mul_pd(src: __m256d, k: __mmask8, a: __m256d, b: __m256d) -> __m256d {
+    let mul = _mm256_mul_pd(a, b).as_f64x4();
+    transmute(simd_select_bitmask(k, mul, src.as_f64x4()))
+}
+
+/// Multiply packed double-precision (64-bit) floating-point elements in a and b, and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
+///
+/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm256_maskz_mul_pd&expand=3921)
+#[inline]
+#[target_feature(enable = "avx512f,avx512vl")]
+#[cfg_attr(test, assert_instr(vmulpd))]
+pub unsafe fn _mm256_maskz_mul_pd(k: __mmask8, a: __m256d, b: __m256d) -> __m256d {
+    let mul = _mm256_mul_pd(a, b).as_f64x4();
+    let zero = _mm256_setzero_pd().as_f64x4();
+    transmute(simd_select_bitmask(k, mul, zero))
+}
+
+/// Multiply packed double-precision (64-bit) floating-point elements in a and b, and store the results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set).
+///
+/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_mask_mul_pd&expand=3917)
+#[inline]
+#[target_feature(enable = "avx512f,avx512vl")]
+#[cfg_attr(test, assert_instr(vmulpd))]
+pub unsafe fn _mm_mask_mul_pd(src: __m128d, k: __mmask8, a: __m128d, b: __m128d) -> __m128d {
+    let mul = _mm_mul_pd(a, b).as_f64x2();
+    transmute(simd_select_bitmask(k, mul, src.as_f64x2()))
+}
+
+/// Multiply packed double-precision (64-bit) floating-point elements in a and b, and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
+///
+/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_maskz_mul_pd&expand=3918)
+#[inline]
+#[target_feature(enable = "avx512f,avx512vl")]
+#[cfg_attr(test, assert_instr(vmulpd))]
+pub unsafe fn _mm_maskz_mul_pd(k: __mmask8, a: __m128d, b: __m128d) -> __m128d {
+    let mul = _mm_mul_pd(a, b).as_f64x2();
+    let zero = _mm_setzero_pd().as_f64x2();
+    transmute(simd_select_bitmask(k, mul, zero))
+}
+
 /// Divide packed single-precision (32-bit) floating-point elements in a by packed elements in b, and store the results in dst.
 ///
-/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=512_div_ps&expand=2162)
+/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm512_div_ps&expand=2162)
 #[inline]
 #[target_feature(enable = "avx512f")]
 #[cfg_attr(test, assert_instr(vdivps))]
@@ -701,7 +1388,7 @@ pub unsafe fn _mm512_div_ps(a: __m512, b: __m512) -> __m512 {
 
 /// Divide packed single-precision (32-bit) floating-point elements in a by packed elements in b, and store the results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set).
 ///
-/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=512_mask_div_ps&expand=2163)
+/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm512_mask_div_ps&expand=2163)
 #[inline]
 #[target_feature(enable = "avx512f")]
 #[cfg_attr(test, assert_instr(vdivps))]
@@ -712,7 +1399,7 @@ pub unsafe fn _mm512_mask_div_ps(src: __m512, k: __mmask16, a: __m512, b: __m512
 
 /// Divide packed single-precision (32-bit) floating-point elements in a by packed elements in b, and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
 ///
-/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=512_maskz_div_ps&expand=2164)
+/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm512_maskz_div_ps&expand=2164)
 #[inline]
 #[target_feature(enable = "avx512f")]
 #[cfg_attr(test, assert_instr(vdivps))]
@@ -722,6 +1409,52 @@ pub unsafe fn _mm512_maskz_div_ps(k: __mmask16, a: __m512, b: __m512) -> __m512
     transmute(simd_select_bitmask(k, div, zero))
 }
 
+/// Divide packed single-precision (32-bit) floating-point elements in a by packed elements in b, and store the results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set).
+///
+/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm256_mask_div_ps&expand=2160)
+#[inline]
+#[target_feature(enable = "avx512f,avx512vl")]
+#[cfg_attr(test, assert_instr(vdivps))]
+pub unsafe fn _mm256_mask_div_ps(src: __m256, k: __mmask8, a: __m256, b: __m256) -> __m256 {
+    let div = _mm256_div_ps(a, b).as_f32x8();
+    transmute(simd_select_bitmask(k, div, src.as_f32x8()))
+}
+
+/// Divide packed single-precision (32-bit) floating-point elements in a by packed elements in b, and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
+///
+/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm256_maskz_div_ps&expand=2161)
+#[inline]
+#[target_feature(enable = "avx512f,avx512vl")]
+#[cfg_attr(test, assert_instr(vdivps))]
+pub unsafe fn _mm256_maskz_div_ps(k: __mmask8, a: __m256, b: __m256) -> __m256 {
+    let div = _mm256_div_ps(a, b).as_f32x8();
+    let zero = _mm256_setzero_ps().as_f32x8();
+    transmute(simd_select_bitmask(k, div, zero))
+}
+
+/// Divide packed single-precision (32-bit) floating-point elements in a by packed elements in b, and store the results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set).
+///
+/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_mask_div_ps&expand=2157)
+#[inline]
+#[target_feature(enable = "avx512f,avx512vl")]
+#[cfg_attr(test, assert_instr(vdivps))]
+pub unsafe fn _mm_mask_div_ps(src: __m128, k: __mmask8, a: __m128, b: __m128) -> __m128 {
+    let div = _mm_div_ps(a, b).as_f32x4();
+    transmute(simd_select_bitmask(k, div, src.as_f32x4()))
+}
+
+/// Divide packed single-precision (32-bit) floating-point elements in a by packed elements in b, and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
+///
+/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_maskz_div_ps&expand=2158)
+#[inline]
+#[target_feature(enable = "avx512f,avx512vl")]
+#[cfg_attr(test, assert_instr(vdivps))]
+pub unsafe fn _mm_maskz_div_ps(k: __mmask8, a: __m128, b: __m128) -> __m128 {
+    let div = _mm_div_ps(a, b).as_f32x4();
+    let zero = _mm_setzero_ps().as_f32x4();
+    transmute(simd_select_bitmask(k, div, zero))
+}
+
 /// Divide packed double-precision (64-bit) floating-point elements in a by packed elements in b, and store the results in dst.
 ///
 /// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=512_div_pd&expand=2153)
@@ -734,7 +1467,7 @@ pub unsafe fn _mm512_div_pd(a: __m512d, b: __m512d) -> __m512d {
 
 /// Divide packed double-precision (64-bit) floating-point elements in a by packed elements in b, and store the results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set).
 ///
-/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=512_mask_div_pd&expand=2154)
+/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm512_mask_div_pd&expand=2154)
 #[inline]
 #[target_feature(enable = "avx512f")]
 #[cfg_attr(test, assert_instr(vdivpd))]
@@ -745,7 +1478,7 @@ pub unsafe fn _mm512_mask_div_pd(src: __m512d, k: __mmask8, a: __m512d, b: __m51
 
 /// Divide packed double-precision (64-bit) floating-point elements in a by packed elements in b, and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
 ///
-/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=512_maskz_div_pd&expand=2155)
+/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm512_maskz_div_pd&expand=2155)
 #[inline]
 #[target_feature(enable = "avx512f")]
 #[cfg_attr(test, assert_instr(vdivpd))]
@@ -755,6 +1488,52 @@ pub unsafe fn _mm512_maskz_div_pd(k: __mmask8, a: __m512d, b: __m512d) -> __m512
     transmute(simd_select_bitmask(k, div, zero))
 }
 
+/// Divide packed double-precision (64-bit) floating-point elements in a by packed elements in b, and store the results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set).
+///
+/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm256_mask_div_pd&expand=2151)
+#[inline]
+#[target_feature(enable = "avx512f,avx512vl")]
+#[cfg_attr(test, assert_instr(vdivpd))]
+pub unsafe fn _mm256_mask_div_pd(src: __m256d, k: __mmask8, a: __m256d, b: __m256d) -> __m256d {
+    let div = _mm256_div_pd(a, b).as_f64x4();
+    transmute(simd_select_bitmask(k, div, src.as_f64x4()))
+}
+
+/// Divide packed double-precision (64-bit) floating-point elements in a by packed elements in b, and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
+///
+/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm256_maskz_div_pd&expand=2152)
+#[inline]
+#[target_feature(enable = "avx512f,avx512vl")]
+#[cfg_attr(test, assert_instr(vdivpd))]
+pub unsafe fn _mm256_maskz_div_pd(k: __mmask8, a: __m256d, b: __m256d) -> __m256d {
+    let div = _mm256_div_pd(a, b).as_f64x4();
+    let zero = _mm256_setzero_pd().as_f64x4();
+    transmute(simd_select_bitmask(k, div, zero))
+}
+
+/// Divide packed double-precision (64-bit) floating-point elements in a by packed elements in b, and store the results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set).
+///
+/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_mask_div_pd&expand=2148)
+#[inline]
+#[target_feature(enable = "avx512f,avx512vl")]
+#[cfg_attr(test, assert_instr(vdivpd))]
+pub unsafe fn _mm_mask_div_pd(src: __m128d, k: __mmask8, a: __m128d, b: __m128d) -> __m128d {
+    let div = _mm_div_pd(a, b).as_f64x2();
+    transmute(simd_select_bitmask(k, div, src.as_f64x2()))
+}
+
+/// Divide packed double-precision (64-bit) floating-point elements in a by packed elements in b, and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
+///
+/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_maskz_div_pd&expand=2149)
+#[inline]
+#[target_feature(enable = "avx512f,avx512vl")]
+#[cfg_attr(test, assert_instr(vdivpd))]
+pub unsafe fn _mm_maskz_div_pd(k: __mmask8, a: __m128d, b: __m128d) -> __m128d {
+    let div = _mm_div_pd(a, b).as_f64x2();
+    let zero = _mm_setzero_pd().as_f64x2();
+    transmute(simd_select_bitmask(k, div, zero))
+}
+
 /// Compare packed signed 32-bit integers in a and b, and store packed maximum values in dst.
 ///
 /// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=512_max_epi32&expand=3582)
@@ -25080,23 +25859,12 @@ mod tests {
             i32::MIN, 100, -100, -32,
         );
         let r = _mm512_abs_epi32(a);
+        #[rustfmt::skip]
         let e = _mm512_setr_epi32(
-            0,
-            1,
-            1,
-            i32::MAX,
-            i32::MAX.wrapping_add(1),
-            100,
-            100,
-            32,
-            0,
-            1,
-            1,
-            i32::MAX,
-            i32::MAX.wrapping_add(1),
-            100,
-            100,
-            32,
+            0, 1, 1, i32::MAX,
+            i32::MAX.wrapping_add(1), 100, 100, 32,
+            0, 1, 1, i32::MAX,
+            i32::MAX.wrapping_add(1), 100, 100, 32,
         );
         assert_eq_m512i(r, e);
     }
@@ -25113,23 +25881,12 @@ mod tests {
         let r = _mm512_mask_abs_epi32(a, 0, a);
         assert_eq_m512i(r, a);
         let r = _mm512_mask_abs_epi32(a, 0b00000000_11111111, a);
+        #[rustfmt::skip]
         let e = _mm512_setr_epi32(
-            0,
-            1,
-            1,
-            i32::MAX,
-            i32::MAX.wrapping_add(1),
-            100,
-            100,
-            32,
-            0,
-            1,
-            -1,
-            i32::MAX,
-            i32::MIN,
-            100,
-            -100,
-            -32,
+            0, 1, 1, i32::MAX,
+            i32::MAX.wrapping_add(1), 100, 100, 32,
+            0, 1, -1, i32::MAX,
+            i32::MIN, 100, -100, -32,
         );
         assert_eq_m512i(r, e);
     }
@@ -25146,27 +25903,72 @@ mod tests {
         let r = _mm512_maskz_abs_epi32(0, a);
         assert_eq_m512i(r, _mm512_setzero_si512());
         let r = _mm512_maskz_abs_epi32(0b00000000_11111111, a);
+        #[rustfmt::skip]
         let e = _mm512_setr_epi32(
-            0,
-            1,
-            1,
-            i32::MAX,
-            i32::MAX.wrapping_add(1),
-            100,
-            100,
-            32,
-            0,
-            0,
-            0,
-            0,
-            0,
-            0,
-            0,
-            0,
+            0, 1, 1, i32::MAX,
+            i32::MAX.wrapping_add(1), 100, 100, 32,
+            0, 0, 0, 0,
+            0, 0, 0, 0,
         );
         assert_eq_m512i(r, e);
     }
 
+    #[simd_test(enable = "avx512f,avx512vl")]
+    unsafe fn test_mm256_mask_abs_epi32() {
+        #[rustfmt::skip]
+        let a = _mm256_setr_epi32(
+            0, 1, -1, i32::MAX,
+            i32::MIN, 100, -100, -32,
+        );
+        let r = _mm256_mask_abs_epi32(a, 0, a);
+        assert_eq_m256i(r, a);
+        let r = _mm256_mask_abs_epi32(a, 0b00001111, a);
+        #[rustfmt::skip]
+        let e = _mm256_setr_epi32(
+            0, 1, 1, i32::MAX,
+            i32::MAX.wrapping_add(1), 100, -100, -32,
+        );
+        assert_eq_m256i(r, e);
+    }
+
+    #[simd_test(enable = "avx512f,avx512vl")]
+    unsafe fn test_mm256_maskz_abs_epi32() {
+        #[rustfmt::skip]
+        let a = _mm256_setr_epi32(
+            0, 1, -1, i32::MAX,
+            i32::MIN, 100, -100, -32,
+        );
+        let r = _mm256_maskz_abs_epi32(0, a);
+        assert_eq_m256i(r, _mm256_setzero_si256());
+        let r = _mm256_maskz_abs_epi32(0b00001111, a);
+        #[rustfmt::skip]
+        let e = _mm256_setr_epi32(
+            0, 1, 1, i32::MAX,
+            0, 0, 0, 0,
+        );
+        assert_eq_m256i(r, e);
+    }
+
+    #[simd_test(enable = "avx512f,avx512vl")]
+    unsafe fn test_mm_mask_abs_epi32() {
+        let a = _mm_setr_epi32(i32::MIN, 100, -100, -32);
+        let r = _mm_mask_abs_epi32(a, 0, a);
+        assert_eq_m128i(r, a);
+        let r = _mm_mask_abs_epi32(a, 0b00001111, a);
+        let e = _mm_setr_epi32(i32::MAX.wrapping_add(1), 100, 100, 32);
+        assert_eq_m128i(r, e);
+    }
+
+    #[simd_test(enable = "avx512f,avx512vl")]
+    unsafe fn test_mm_maskz_abs_epi32() {
+        let a = _mm_setr_epi32(i32::MIN, 100, -100, -32);
+        let r = _mm_maskz_abs_epi32(0, a);
+        assert_eq_m128i(r, _mm_setzero_si128());
+        let r = _mm_maskz_abs_epi32(0b00001111, a);
+        let e = _mm_setr_epi32(i32::MAX.wrapping_add(1), 100, 100, 32);
+        assert_eq_m128i(r, e);
+    }
+
     #[simd_test(enable = "avx512f")]
     unsafe fn test_mm512_abs_ps() {
         #[rustfmt::skip]
@@ -25282,43 +26084,21 @@ mod tests {
 
     #[simd_test(enable = "avx512f")]
     unsafe fn test_mm512_add_epi32() {
+        #[rustfmt::skip]
         let a = _mm512_setr_epi32(
-            0,
-            1,
-            -1,
-            i32::MAX,
-            i32::MIN,
-            100,
-            -100,
-            -32,
-            0,
-            1,
-            -1,
-            i32::MAX,
-            i32::MIN,
-            100,
-            -100,
-            -32,
+            0, 1, -1, i32::MAX,
+            i32::MIN, 100, -100, -32,
+            0, 1, -1, i32::MAX,
+            i32::MIN, 100, -100, -32,
         );
         let b = _mm512_set1_epi32(1);
         let r = _mm512_add_epi32(a, b);
+        #[rustfmt::skip]
         let e = _mm512_setr_epi32(
-            1,
-            2,
-            0,
-            i32::MIN,
-            i32::MIN + 1,
-            101,
-            -99,
-            -31,
-            1,
-            2,
-            0,
-            i32::MIN,
-            i32::MIN + 1,
-            101,
-            -99,
-            -31,
+            1, 2, 0, i32::MIN,
+            i32::MIN + 1, 101, -99, -31,
+            1, 2, 0, i32::MIN,
+            i32::MIN + 1, 101, -99, -31,
         );
         assert_eq_m512i(r, e);
     }
@@ -25336,23 +26116,12 @@ mod tests {
         let r = _mm512_mask_add_epi32(a, 0, a, b);
         assert_eq_m512i(r, a);
         let r = _mm512_mask_add_epi32(a, 0b00000000_11111111, a, b);
+        #[rustfmt::skip]
         let e = _mm512_setr_epi32(
-            1,
-            2,
-            0,
-            i32::MIN,
-            i32::MIN + 1,
-            101,
-            -99,
-            -31,
-            0,
-            1,
-            -1,
-            i32::MAX,
-            i32::MIN,
-            100,
-            -100,
-            -32,
+            1, 2, 0, i32::MIN,
+            i32::MIN + 1, 101, -99, -31,
+            0, 1, -1, i32::MAX,
+            i32::MIN, 100, -100, -32,
         );
         assert_eq_m512i(r, e);
     }
@@ -25370,445 +26139,401 @@ mod tests {
         let r = _mm512_maskz_add_epi32(0, a, b);
         assert_eq_m512i(r, _mm512_setzero_si512());
         let r = _mm512_maskz_add_epi32(0b00000000_11111111, a, b);
+        #[rustfmt::skip]
         let e = _mm512_setr_epi32(
-            1,
-            2,
-            0,
-            i32::MIN,
-            i32::MIN + 1,
-            101,
-            -99,
-            -31,
-            0,
-            0,
-            0,
-            0,
-            0,
-            0,
-            0,
-            0,
+            1, 2, 0, i32::MIN,
+            i32::MIN + 1, 101, -99, -31,
+            0, 0, 0, 0,
+            0, 0, 0, 0,
         );
         assert_eq_m512i(r, e);
     }
 
+    #[simd_test(enable = "avx512f,avx512vl")]
+    unsafe fn test_mm256_mask_add_epi32() {
+        let a = _mm256_set_epi32(0, 1, -1, i32::MAX, i32::MIN, 100, -100, -32);
+        let b = _mm256_set1_epi32(1);
+        let r = _mm256_mask_add_epi32(a, 0, a, b);
+        assert_eq_m256i(r, a);
+        let r = _mm256_mask_add_epi32(a, 0b11111111, a, b);
+        let e = _mm256_set_epi32(1, 2, 0, i32::MIN, i32::MIN + 1, 101, -99, -31);
+        assert_eq_m256i(r, e);
+    }
+
+    #[simd_test(enable = "avx512f,avx512vl")]
+    unsafe fn test_mm256_maskz_add_epi32() {
+        let a = _mm256_setr_epi32(0, 1, -1, i32::MAX, i32::MIN, 100, -100, -32);
+        let b = _mm256_set1_epi32(1);
+        let r = _mm256_maskz_add_epi32(0, a, b);
+        assert_eq_m256i(r, _mm256_setzero_si256());
+        let r = _mm256_maskz_add_epi32(0b11111111, a, b);
+        let e = _mm256_setr_epi32(1, 2, 0, i32::MIN, i32::MIN + 1, 101, -99, -31);
+        assert_eq_m256i(r, e);
+    }
+
+    #[simd_test(enable = "avx512f,avx512vl")]
+    unsafe fn test_mm_mask_add_epi32() {
+        let a = _mm_set_epi32(1, -1, i32::MAX, i32::MIN);
+        let b = _mm_set1_epi32(1);
+        let r = _mm_mask_add_epi32(a, 0, a, b);
+        assert_eq_m128i(r, a);
+        let r = _mm_mask_add_epi32(a, 0b00001111, a, b);
+        let e = _mm_set_epi32(2, 0, i32::MIN, i32::MIN + 1);
+        assert_eq_m128i(r, e);
+    }
+
+    #[simd_test(enable = "avx512f,avx512vl")]
+    unsafe fn test_mm_maskz_add_epi32() {
+        let a = _mm_setr_epi32(1, -1, i32::MAX, i32::MIN);
+        let b = _mm_set1_epi32(1);
+        let r = _mm_maskz_add_epi32(0, a, b);
+        assert_eq_m128i(r, _mm_setzero_si128());
+        let r = _mm_maskz_add_epi32(0b00001111, a, b);
+        let e = _mm_setr_epi32(2, 0, i32::MIN, i32::MIN + 1);
+        assert_eq_m128i(r, e);
+    }
+
     #[simd_test(enable = "avx512f")]
     unsafe fn test_mm512_add_ps() {
+        #[rustfmt::skip]
         let a = _mm512_setr_ps(
-            0.,
-            1.,
-            -1.,
-            f32::MAX,
-            f32::MIN,
-            100.,
-            -100.,
-            -32.,
-            0.,
-            1.,
-            -1.,
-            f32::MAX,
-            f32::MIN,
-            100.,
-            -100.,
-            -32.,
+            0., 1., -1., f32::MAX,
+            f32::MIN, 100., -100., -32.,
+            0., 1., -1., f32::MAX,
+            f32::MIN, 100., -100., -32.,
         );
         let b = _mm512_set1_ps(1.);
         let r = _mm512_add_ps(a, b);
+        #[rustfmt::skip]
         let e = _mm512_setr_ps(
-            1.,
-            2.,
-            0.,
-            f32::MAX,
-            f32::MIN + 1.,
-            101.,
-            -99.,
-            -31.,
-            1.,
-            2.,
-            0.,
-            f32::MAX,
-            f32::MIN + 1.,
-            101.,
-            -99.,
-            -31.,
+            1., 2., 0., f32::MAX,
+            f32::MIN + 1., 101., -99., -31.,
+            1., 2., 0., f32::MAX,
+            f32::MIN + 1., 101., -99., -31.,
         );
         assert_eq_m512(r, e);
     }
 
     #[simd_test(enable = "avx512f")]
     unsafe fn test_mm512_mask_add_ps() {
+        #[rustfmt::skip]
         let a = _mm512_setr_ps(
-            0.,
-            1.,
-            -1.,
-            f32::MAX,
-            f32::MIN,
-            100.,
-            -100.,
-            -32.,
-            0.,
-            1.,
-            -1.,
-            f32::MAX,
-            f32::MIN,
-            100.,
-            -100.,
-            -32.,
+            0., 1., -1., f32::MAX,
+            f32::MIN, 100., -100., -32.,
+            0., 1., -1., f32::MAX,
+            f32::MIN, 100., -100., -32.,
         );
         let b = _mm512_set1_ps(1.);
         let r = _mm512_mask_add_ps(a, 0, a, b);
         assert_eq_m512(r, a);
         let r = _mm512_mask_add_ps(a, 0b00000000_11111111, a, b);
+        #[rustfmt::skip]
         let e = _mm512_setr_ps(
-            1.,
-            2.,
-            0.,
-            f32::MAX,
-            f32::MIN + 1.,
-            101.,
-            -99.,
-            -31.,
-            0.,
-            1.,
-            -1.,
-            f32::MAX,
-            f32::MIN,
-            100.,
-            -100.,
-            -32.,
+            1., 2., 0., f32::MAX,
+            f32::MIN + 1., 101., -99., -31.,
+            0., 1., -1., f32::MAX,
+            f32::MIN, 100., -100., -32.,
         );
         assert_eq_m512(r, e);
     }
 
     #[simd_test(enable = "avx512f")]
     unsafe fn test_mm512_maskz_add_ps() {
+        #[rustfmt::skip]
         let a = _mm512_setr_ps(
-            0.,
-            1.,
-            -1.,
-            f32::MAX,
-            f32::MIN,
-            100.,
-            -100.,
-            -32.,
-            0.,
-            1.,
-            -1.,
-            f32::MAX,
-            f32::MIN,
-            100.,
-            -100.,
-            -32.,
+            0., 1., -1., f32::MAX,
+            f32::MIN, 100., -100., -32.,
+            0., 1., -1., f32::MAX,
+            f32::MIN, 100., -100., -32.,
         );
         let b = _mm512_set1_ps(1.);
         let r = _mm512_maskz_add_ps(0, a, b);
         assert_eq_m512(r, _mm512_setzero_ps());
         let r = _mm512_maskz_add_ps(0b00000000_11111111, a, b);
+        #[rustfmt::skip]
         let e = _mm512_setr_ps(
-            1.,
-            2.,
-            0.,
-            f32::MAX,
-            f32::MIN + 1.,
-            101.,
-            -99.,
-            -31.,
-            0.,
-            0.,
-            0.,
-            0.,
-            0.,
-            0.,
-            0.,
-            0.,
+            1., 2., 0., f32::MAX,
+            f32::MIN + 1., 101., -99., -31.,
+            0., 0., 0., 0.,
+            0., 0., 0., 0.,
         );
         assert_eq_m512(r, e);
     }
 
+    #[simd_test(enable = "avx512f,avx512vl")]
+    unsafe fn test_mm256_mask_add_ps() {
+        let a = _mm256_set_ps(0., 1., -1., f32::MAX, f32::MIN, 100., -100., -32.);
+        let b = _mm256_set1_ps(1.);
+        let r = _mm256_mask_add_ps(a, 0, a, b);
+        assert_eq_m256(r, a);
+        let r = _mm256_mask_add_ps(a, 0b11111111, a, b);
+        let e = _mm256_set_ps(1., 2., 0., f32::MAX, f32::MIN + 1., 101., -99., -31.);
+        assert_eq_m256(r, e);
+    }
+
+    #[simd_test(enable = "avx512f,avx512vl")]
+    unsafe fn test_mm256_maskz_add_ps() {
+        let a = _mm256_set_ps(0., 1., -1., f32::MAX, f32::MIN, 100., -100., -32.);
+        let b = _mm256_set1_ps(1.);
+        let r = _mm256_maskz_add_ps(0, a, b);
+        assert_eq_m256(r, _mm256_setzero_ps());
+        let r = _mm256_maskz_add_ps(0b11111111, a, b);
+        let e = _mm256_set_ps(1., 2., 0., f32::MAX, f32::MIN + 1., 101., -99., -31.);
+        assert_eq_m256(r, e);
+    }
+
+    #[simd_test(enable = "avx512f,avx512vl")]
+    unsafe fn test_mm_mask_add_ps() {
+        let a = _mm_set_ps(1., -1., f32::MAX, f32::MIN);
+        let b = _mm_set1_ps(1.);
+        let r = _mm_mask_add_ps(a, 0, a, b);
+        assert_eq_m128(r, a);
+        let r = _mm_mask_add_ps(a, 0b00001111, a, b);
+        let e = _mm_set_ps(2., 0., f32::MAX, f32::MIN + 1.);
+        assert_eq_m128(r, e);
+    }
+
+    #[simd_test(enable = "avx512f,avx512vl")]
+    unsafe fn test_mm_maskz_add_ps() {
+        let a = _mm_set_ps(1., -1., f32::MAX, f32::MIN);
+        let b = _mm_set1_ps(1.);
+        let r = _mm_maskz_add_ps(0, a, b);
+        assert_eq_m128(r, _mm_setzero_ps());
+        let r = _mm_maskz_add_ps(0b00001111, a, b);
+        let e = _mm_set_ps(2., 0., f32::MAX, f32::MIN + 1.);
+        assert_eq_m128(r, e);
+    }
+
     #[simd_test(enable = "avx512f")]
     unsafe fn test_mm512_sub_epi32() {
+        #[rustfmt::skip]
         let a = _mm512_setr_epi32(
-            0,
-            1,
-            -1,
-            i32::MAX,
-            i32::MIN,
-            100,
-            -100,
-            -32,
-            0,
-            1,
-            -1,
-            i32::MAX,
-            i32::MIN,
-            100,
-            -100,
-            -32,
+            0, 1, -1, i32::MAX,
+            i32::MIN, 100, -100, -32,
+            0, 1, -1, i32::MAX,
+            i32::MIN, 100, -100, -32,
         );
         let b = _mm512_set1_epi32(1);
         let r = _mm512_sub_epi32(a, b);
+        #[rustfmt::skip]
         let e = _mm512_setr_epi32(
-            -1,
-            0,
-            -2,
-            i32::MAX - 1,
-            i32::MAX,
-            99,
-            -101,
-            -33,
-            -1,
-            0,
-            -2,
-            i32::MAX - 1,
-            i32::MAX,
-            99,
-            -101,
-            -33,
+            -1, 0, -2, i32::MAX - 1,
+            i32::MAX, 99, -101, -33,
+            -1, 0, -2, i32::MAX - 1,
+            i32::MAX, 99, -101, -33,
         );
         assert_eq_m512i(r, e);
     }
 
     #[simd_test(enable = "avx512f")]
     unsafe fn test_mm512_mask_sub_epi32() {
+        #[rustfmt::skip]
         let a = _mm512_setr_epi32(
-            0,
-            1,
-            -1,
-            i32::MAX,
-            i32::MIN,
-            100,
-            -100,
-            -32,
-            0,
-            1,
-            -1,
-            i32::MAX,
-            i32::MIN,
-            100,
-            -100,
-            -32,
+            0, 1, -1, i32::MAX,
+            i32::MIN, 100, -100, -32,
+            0, 1, -1, i32::MAX,
+            i32::MIN, 100, -100, -32,
         );
         let b = _mm512_set1_epi32(1);
         let r = _mm512_mask_sub_epi32(a, 0, a, b);
         assert_eq_m512i(r, a);
         let r = _mm512_mask_sub_epi32(a, 0b00000000_11111111, a, b);
+        #[rustfmt::skip]
         let e = _mm512_setr_epi32(
-            -1,
-            0,
-            -2,
-            i32::MAX - 1,
-            i32::MAX,
-            99,
-            -101,
-            -33,
-            0,
-            1,
-            -1,
-            i32::MAX,
-            i32::MIN,
-            100,
-            -100,
-            -32,
+            -1, 0, -2, i32::MAX - 1,
+            i32::MAX, 99, -101, -33,
+            0, 1, -1, i32::MAX,
+            i32::MIN, 100, -100, -32,
         );
         assert_eq_m512i(r, e);
     }
 
     #[simd_test(enable = "avx512f")]
     unsafe fn test_mm512_maskz_sub_epi32() {
+        #[rustfmt::skip]
         let a = _mm512_setr_epi32(
-            0,
-            1,
-            -1,
-            i32::MAX,
-            i32::MIN,
-            100,
-            -100,
-            -32,
-            0,
-            1,
-            -1,
-            i32::MAX,
-            i32::MIN,
-            100,
-            -100,
-            -32,
+            0, 1, -1, i32::MAX,
+            i32::MIN, 100, -100, -32,
+            0, 1, -1, i32::MAX,
+            i32::MIN, 100, -100, -32,
         );
         let b = _mm512_set1_epi32(1);
         let r = _mm512_maskz_sub_epi32(0, a, b);
         assert_eq_m512i(r, _mm512_setzero_si512());
         let r = _mm512_maskz_sub_epi32(0b00000000_11111111, a, b);
+        #[rustfmt::skip]
         let e = _mm512_setr_epi32(
-            -1,
-            0,
-            -2,
-            i32::MAX - 1,
-            i32::MAX,
-            99,
-            -101,
-            -33,
-            0,
-            0,
-            0,
-            0,
-            0,
-            0,
-            0,
-            0,
+            -1, 0, -2, i32::MAX - 1,
+            i32::MAX, 99, -101, -33,
+            0, 0, 0, 0,
+            0, 0, 0, 0,
         );
         assert_eq_m512i(r, e);
     }
 
+    #[simd_test(enable = "avx512f,avx512vl")]
+    unsafe fn test_mm256_mask_sub_epi32() {
+        let a = _mm256_set_epi32(0, 1, -1, i32::MAX, i32::MIN, 100, -100, -32);
+        let b = _mm256_set1_epi32(1);
+        let r = _mm256_mask_sub_epi32(a, 0, a, b);
+        assert_eq_m256i(r, a);
+        let r = _mm256_mask_sub_epi32(a, 0b11111111, a, b);
+        let e = _mm256_set_epi32(-1, 0, -2, i32::MAX - 1, i32::MAX, 99, -101, -33);
+        assert_eq_m256i(r, e);
+    }
+
+    #[simd_test(enable = "avx512f,avx512vl")]
+    unsafe fn test_mm256_maskz_sub_epi32() {
+        let a = _mm256_set_epi32(0, 1, -1, i32::MAX, i32::MIN, 100, -100, -32);
+        let b = _mm256_set1_epi32(1);
+        let r = _mm256_maskz_sub_epi32(0, a, b);
+        assert_eq_m256i(r, _mm256_setzero_si256());
+        let r = _mm256_maskz_sub_epi32(0b11111111, a, b);
+        let e = _mm256_set_epi32(-1, 0, -2, i32::MAX - 1, i32::MAX, 99, -101, -33);
+        assert_eq_m256i(r, e);
+    }
+
+    #[simd_test(enable = "avx512f,avx512vl")]
+    unsafe fn test_mm_mask_sub_epi32() {
+        let a = _mm_set_epi32(1, -1, i32::MAX, i32::MIN);
+        let b = _mm_set1_epi32(1);
+        let r = _mm_mask_sub_epi32(a, 0, a, b);
+        assert_eq_m128i(r, a);
+        let r = _mm_mask_sub_epi32(a, 0b00001111, a, b);
+        let e = _mm_set_epi32(0, -2, i32::MAX - 1, i32::MAX);
+        assert_eq_m128i(r, e);
+    }
+
+    #[simd_test(enable = "avx512f,avx512vl")]
+    unsafe fn test_mm_maskz_sub_epi32() {
+        let a = _mm_set_epi32(1, -1, i32::MAX, i32::MIN);
+        let b = _mm_set1_epi32(1);
+        let r = _mm_maskz_sub_epi32(0, a, b);
+        assert_eq_m128i(r, _mm_setzero_si128());
+        let r = _mm_maskz_sub_epi32(0b00001111, a, b);
+        let e = _mm_set_epi32(0, -2, i32::MAX - 1, i32::MAX);
+        assert_eq_m128i(r, e);
+    }
+
     #[simd_test(enable = "avx512f")]
     unsafe fn test_mm512_sub_ps() {
+        #[rustfmt::skip]
         let a = _mm512_setr_ps(
-            0.,
-            1.,
-            -1.,
-            f32::MAX,
-            f32::MIN,
-            100.,
-            -100.,
-            -32.,
-            0.,
-            1.,
-            -1.,
-            f32::MAX,
-            f32::MIN,
-            100.,
-            -100.,
-            -32.,
+            0., 1., -1., f32::MAX,
+            f32::MIN, 100., -100., -32.,
+            0., 1., -1., f32::MAX,
+            f32::MIN, 100., -100., -32.,
         );
         let b = _mm512_set1_ps(1.);
         let r = _mm512_sub_ps(a, b);
+        #[rustfmt::skip]
         let e = _mm512_setr_ps(
-            -1.,
-            0.,
-            -2.,
-            f32::MAX - 1.,
-            f32::MIN,
-            99.,
-            -101.,
-            -33.,
-            -1.,
-            0.,
-            -2.,
-            f32::MAX - 1.,
-            f32::MIN,
-            99.,
-            -101.,
-            -33.,
+            -1., 0., -2., f32::MAX - 1.,
+            f32::MIN, 99., -101., -33.,
+            -1., 0., -2., f32::MAX - 1.,
+            f32::MIN, 99., -101., -33.,
         );
         assert_eq_m512(r, e);
     }
 
     #[simd_test(enable = "avx512f")]
     unsafe fn test_mm512_mask_sub_ps() {
+        #[rustfmt::skip]
         let a = _mm512_setr_ps(
-            0.,
-            1.,
-            -1.,
-            f32::MAX,
-            f32::MIN,
-            100.,
-            -100.,
-            -32.,
-            0.,
-            1.,
-            -1.,
-            f32::MAX,
-            f32::MIN,
-            100.,
-            -100.,
-            -32.,
+            0., 1., -1., f32::MAX,
+            f32::MIN, 100., -100., -32.,
+            0., 1., -1., f32::MAX,
+            f32::MIN, 100., -100., -32.,
         );
         let b = _mm512_set1_ps(1.);
         let r = _mm512_mask_sub_ps(a, 0, a, b);
         assert_eq_m512(r, a);
         let r = _mm512_mask_sub_ps(a, 0b00000000_11111111, a, b);
+        #[rustfmt::skip]
         let e = _mm512_setr_ps(
-            -1.,
-            0.,
-            -2.,
-            f32::MAX - 1.,
-            f32::MIN,
-            99.,
-            -101.,
-            -33.,
-            0.,
-            1.,
-            -1.,
-            f32::MAX,
-            f32::MIN,
-            100.,
-            -100.,
-            -32.,
+            -1., 0., -2., f32::MAX - 1.,
+            f32::MIN, 99., -101., -33.,
+            0., 1., -1., f32::MAX,
+            f32::MIN, 100., -100., -32.,
         );
         assert_eq_m512(r, e);
     }
 
     #[simd_test(enable = "avx512f")]
     unsafe fn test_mm512_maskz_sub_ps() {
+        #[rustfmt::skip]
         let a = _mm512_setr_ps(
-            0.,
-            1.,
-            -1.,
-            f32::MAX,
-            f32::MIN,
-            100.,
-            -100.,
-            -32.,
-            0.,
-            1.,
-            -1.,
-            f32::MAX,
-            f32::MIN,
-            100.,
-            -100.,
-            -32.,
+            0., 1., -1., f32::MAX,
+            f32::MIN, 100., -100., -32.,
+            0., 1., -1., f32::MAX,
+            f32::MIN, 100., -100., -32.,
         );
         let b = _mm512_set1_ps(1.);
         let r = _mm512_maskz_sub_ps(0, a, b);
         assert_eq_m512(r, _mm512_setzero_ps());
         let r = _mm512_maskz_sub_ps(0b00000000_11111111, a, b);
+        #[rustfmt::skip]
         let e = _mm512_setr_ps(
-            -1.,
-            0.,
-            -2.,
-            f32::MAX - 1.,
-            f32::MIN,
-            99.,
-            -101.,
-            -33.,
-            0.,
-            0.,
-            0.,
-            0.,
-            0.,
-            0.,
-            0.,
-            0.,
+            -1., 0., -2., f32::MAX - 1.,
+            f32::MIN, 99., -101., -33.,
+            0., 0., 0., 0.,
+            0., 0., 0., 0.,
         );
         assert_eq_m512(r, e);
     }
 
+    #[simd_test(enable = "avx512f,avx512vl")]
+    unsafe fn test_mm256_mask_sub_ps() {
+        let a = _mm256_set_ps(0., 1., -1., f32::MAX, f32::MIN, 100., -100., -32.);
+        let b = _mm256_set1_ps(1.);
+        let r = _mm256_mask_sub_ps(a, 0, a, b);
+        assert_eq_m256(r, a);
+        let r = _mm256_mask_sub_ps(a, 0b11111111, a, b);
+        let e = _mm256_set_ps(-1., 0., -2., f32::MAX - 1., f32::MIN, 99., -101., -33.);
+        assert_eq_m256(r, e);
+    }
+
+    #[simd_test(enable = "avx512f,avx512vl")]
+    unsafe fn test_mm256_maskz_sub_ps() {
+        let a = _mm256_set_ps(0., 1., -1., f32::MAX, f32::MIN, 100., -100., -32.);
+        let b = _mm256_set1_ps(1.);
+        let r = _mm256_maskz_sub_ps(0, a, b);
+        assert_eq_m256(r, _mm256_setzero_ps());
+        let r = _mm256_maskz_sub_ps(0b11111111, a, b);
+        let e = _mm256_set_ps(-1., 0., -2., f32::MAX - 1., f32::MIN, 99., -101., -33.);
+        assert_eq_m256(r, e);
+    }
+
+    #[simd_test(enable = "avx512f,avx512vl")]
+    unsafe fn test_mm_mask_sub_ps() {
+        let a = _mm_set_ps(1., -1., f32::MAX, f32::MIN);
+        let b = _mm_set1_ps(1.);
+        let r = _mm_mask_sub_ps(a, 0, a, b);
+        assert_eq_m128(r, a);
+        let r = _mm_mask_sub_ps(a, 0b00001111, a, b);
+        let e = _mm_set_ps(0., -2., f32::MAX - 1., f32::MIN);
+        assert_eq_m128(r, e);
+    }
+
+    #[simd_test(enable = "avx512f,avx512vl")]
+    unsafe fn test_mm_maskz_sub_ps() {
+        let a = _mm_set_ps(1., -1., f32::MAX, f32::MIN);
+        let b = _mm_set1_ps(1.);
+        let r = _mm_maskz_sub_ps(0, a, b);
+        assert_eq_m128(r, _mm_setzero_ps());
+        let r = _mm_maskz_sub_ps(0b00001111, a, b);
+        let e = _mm_set_ps(0., -2., f32::MAX - 1., f32::MIN);
+        assert_eq_m128(r, e);
+    }
+
     #[simd_test(enable = "avx512f")]
     unsafe fn test_mm512_mullo_epi32() {
+        #[rustfmt::skip]
         let a = _mm512_setr_epi32(
-            0,
-            1,
-            -1,
-            i32::MAX,
-            i32::MIN,
-            100,
-            -100,
-            -32,
-            0,
-            1,
-            -1,
-            i32::MAX,
-            i32::MIN,
-            100,
-            -100,
-            -32,
+            0, 1, -1, i32::MAX,
+            i32::MIN, 100, -100, -32,
+            0, 1, -1, i32::MAX,
+            i32::MIN, 100, -100, -32,
         );
         let b = _mm512_set1_epi32(2);
         let r = _mm512_mullo_epi32(a, b);
@@ -25820,68 +26545,35 @@ mod tests {
 
     #[simd_test(enable = "avx512f")]
     unsafe fn test_mm512_mask_mullo_epi32() {
+        #[rustfmt::skip]
         let a = _mm512_setr_epi32(
-            0,
-            1,
-            -1,
-            i32::MAX,
-            i32::MIN,
-            100,
-            -100,
-            -32,
-            0,
-            1,
-            -1,
-            i32::MAX,
-            i32::MIN,
-            100,
-            -100,
-            -32,
+            0, 1, -1, i32::MAX,
+            i32::MIN, 100, -100, -32,
+            0, 1, -1, i32::MAX,
+            i32::MIN, 100, -100, -32,
         );
         let b = _mm512_set1_epi32(2);
         let r = _mm512_mask_mullo_epi32(a, 0, a, b);
         assert_eq_m512i(r, a);
         let r = _mm512_mask_mullo_epi32(a, 0b00000000_11111111, a, b);
+        #[rustfmt::skip]
         let e = _mm512_setr_epi32(
-            0,
-            2,
-            -2,
-            -2,
-            0,
-            200,
-            -200,
-            -64,
-            0,
-            1,
-            -1,
-            i32::MAX,
-            i32::MIN,
-            100,
-            -100,
-            -32,
+            0, 2, -2, -2,
+            0, 200, -200, -64,
+            0, 1, -1, i32::MAX,
+            i32::MIN, 100, -100, -32,
         );
         assert_eq_m512i(r, e);
     }
 
     #[simd_test(enable = "avx512f")]
     unsafe fn test_mm512_maskz_mullo_epi32() {
+        #[rustfmt::skip]
         let a = _mm512_setr_epi32(
-            0,
-            1,
-            -1,
-            i32::MAX,
-            i32::MIN,
-            100,
-            -100,
-            -32,
-            0,
-            1,
-            -1,
-            i32::MAX,
-            i32::MIN,
-            100,
-            -100,
-            -32,
+            0, 1, -1, i32::MAX,
+            i32::MIN, 100, -100, -32,
+            0, 1, -1, i32::MAX,
+            i32::MIN, 100, -100, -32,
         );
         let b = _mm512_set1_epi32(2);
         let r = _mm512_maskz_mullo_epi32(0, a, b);
@@ -25891,44 +26583,67 @@ mod tests {
         assert_eq_m512i(r, e);
     }
 
+    #[simd_test(enable = "avx512f,avx512vl")]
+    unsafe fn test_mm256_mask_mullo_epi32() {
+        let a = _mm256_set_epi32(0, 1, -1, i32::MAX, i32::MIN, 100, -100, -32);
+        let b = _mm256_set1_epi32(2);
+        let r = _mm256_mask_mullo_epi32(a, 0, a, b);
+        assert_eq_m256i(r, a);
+        let r = _mm256_mask_mullo_epi32(a, 0b11111111, a, b);
+        let e = _mm256_set_epi32(0, 2, -2, -2, 0, 200, -200, -64);
+        assert_eq_m256i(r, e);
+    }
+
+    #[simd_test(enable = "avx512f,avx512vl")]
+    unsafe fn test_mm256_maskz_mullo_epi32() {
+        let a = _mm256_set_epi32(0, 1, -1, i32::MAX, i32::MIN, 100, -100, -32);
+        let b = _mm256_set1_epi32(2);
+        let r = _mm256_maskz_mullo_epi32(0, a, b);
+        assert_eq_m256i(r, _mm256_setzero_si256());
+        let r = _mm256_maskz_mullo_epi32(0b11111111, a, b);
+        let e = _mm256_set_epi32(0, 2, -2, -2, 0, 200, -200, -64);
+        assert_eq_m256i(r, e);
+    }
+
+    #[simd_test(enable = "avx512f,avx512vl")]
+    unsafe fn test_mm_mask_mullo_epi32() {
+        let a = _mm_set_epi32(1, -1, i32::MAX, i32::MIN);
+        let b = _mm_set1_epi32(2);
+        let r = _mm_mask_mullo_epi32(a, 0, a, b);
+        assert_eq_m128i(r, a);
+        let r = _mm_mask_mullo_epi32(a, 0b00001111, a, b);
+        let e = _mm_set_epi32(2, -2, -2, 0);
+        assert_eq_m128i(r, e);
+    }
+
+    #[simd_test(enable = "avx512f,avx512vl")]
+    unsafe fn test_mm_maskz_mullo_epi32() {
+        let a = _mm_set_epi32(1, -1, i32::MAX, i32::MIN);
+        let b = _mm_set1_epi32(2);
+        let r = _mm_maskz_mullo_epi32(0, a, b);
+        assert_eq_m128i(r, _mm_setzero_si128());
+        let r = _mm_maskz_mullo_epi32(0b00001111, a, b);
+        let e = _mm_set_epi32(2, -2, -2, 0);
+        assert_eq_m128i(r, e);
+    }
+
     #[simd_test(enable = "avx512f")]
     unsafe fn test_mm512_mul_ps() {
+        #[rustfmt::skip]
         let a = _mm512_setr_ps(
-            0.,
-            1.,
-            -1.,
-            f32::MAX,
-            f32::MIN,
-            100.,
-            -100.,
-            -32.,
-            0.,
-            1.,
-            -1.,
-            f32::MAX,
-            f32::MIN,
-            100.,
-            -100.,
-            -32.,
+            0., 1., -1., f32::MAX,
+            f32::MIN, 100., -100., -32.,
+            0., 1., -1., f32::MAX,
+            f32::MIN, 100., -100., -32.,
         );
         let b = _mm512_set1_ps(2.);
         let r = _mm512_mul_ps(a, b);
+        #[rustfmt::skip]
         let e = _mm512_setr_ps(
-            0.,
-            2.,
-            -2.,
-            f32::INFINITY,
-            f32::NEG_INFINITY,
-            200.,
-            -200.,
-            -64.,
-            0.,
-            2.,
-            -2.,
-            f32::INFINITY,
-            f32::NEG_INFINITY,
-            200.,
-            -200.,
+            0., 2., -2., f32::INFINITY,
+            f32::NEG_INFINITY, 200., -200., -64.,
+            0., 2., -2., f32::INFINITY,
+            f32::NEG_INFINITY, 200., -200.,
             -64.,
         );
         assert_eq_m512(r, e);
@@ -25936,94 +26651,102 @@ mod tests {
 
     #[simd_test(enable = "avx512f")]
     unsafe fn test_mm512_mask_mul_ps() {
+        #[rustfmt::skip]
         let a = _mm512_setr_ps(
-            0.,
-            1.,
-            -1.,
-            f32::MAX,
-            f32::MIN,
-            100.,
-            -100.,
-            -32.,
-            0.,
-            1.,
-            -1.,
-            f32::MAX,
-            f32::MIN,
-            100.,
-            -100.,
-            -32.,
+            0., 1., -1., f32::MAX,
+            f32::MIN, 100., -100., -32.,
+            0., 1., -1., f32::MAX,
+            f32::MIN, 100., -100., -32.,
         );
         let b = _mm512_set1_ps(2.);
         let r = _mm512_mask_mul_ps(a, 0, a, b);
         assert_eq_m512(r, a);
         let r = _mm512_mask_mul_ps(a, 0b00000000_11111111, a, b);
+        #[rustfmt::skip]
         let e = _mm512_setr_ps(
-            0.,
-            2.,
-            -2.,
-            f32::INFINITY,
-            f32::NEG_INFINITY,
-            200.,
-            -200.,
-            -64.,
-            0.,
-            1.,
-            -1.,
-            f32::MAX,
-            f32::MIN,
-            100.,
-            -100.,
-            -32.,
+            0., 2., -2., f32::INFINITY,
+            f32::NEG_INFINITY, 200., -200., -64.,
+            0., 1., -1., f32::MAX,
+            f32::MIN, 100., -100., -32.,
         );
         assert_eq_m512(r, e);
     }
 
     #[simd_test(enable = "avx512f")]
     unsafe fn test_mm512_maskz_mul_ps() {
+        #[rustfmt::skip]
         let a = _mm512_setr_ps(
-            0.,
-            1.,
-            -1.,
-            f32::MAX,
-            f32::MIN,
-            100.,
-            -100.,
-            -32.,
-            0.,
-            1.,
-            -1.,
-            f32::MAX,
-            f32::MIN,
-            100.,
-            -100.,
-            -32.,
+            0., 1., -1., f32::MAX,
+            f32::MIN, 100., -100., -32.,
+            0., 1., -1., f32::MAX,
+            f32::MIN, 100., -100., -32.,
         );
         let b = _mm512_set1_ps(2.);
         let r = _mm512_maskz_mul_ps(0, a, b);
         assert_eq_m512(r, _mm512_setzero_ps());
         let r = _mm512_maskz_mul_ps(0b00000000_11111111, a, b);
+        #[rustfmt::skip]
         let e = _mm512_setr_ps(
-            0.,
-            2.,
-            -2.,
-            f32::INFINITY,
-            f32::NEG_INFINITY,
-            200.,
-            -200.,
-            -64.,
-            0.,
-            0.,
-            0.,
-            0.,
-            0.,
-            0.,
-            0.,
-            0.,
+            0., 2., -2., f32::INFINITY,
+            f32::NEG_INFINITY, 200., -200., -64.,
+            0., 0., 0., 0.,
+            0., 0., 0., 0.,
         );
         assert_eq_m512(r, e);
     }
 
+    #[simd_test(enable = "avx512f,avx512vl")]
+    unsafe fn test_mm256_mask_mul_ps() {
+        let a = _mm256_set_ps(0., 1., -1., f32::MAX, f32::MIN, 100., -100., -32.);
+        let b = _mm256_set1_ps(2.);
+        let r = _mm256_mask_mul_ps(a, 0, a, b);
+        assert_eq_m256(r, a);
+        let r = _mm256_mask_mul_ps(a, 0b11111111, a, b);
+        #[rustfmt::skip]
+        let e = _mm256_set_ps(
+            0., 2., -2., f32::INFINITY,
+            f32::NEG_INFINITY, 200., -200., -64.,
+        );
+        assert_eq_m256(r, e);
+    }
+
+    #[simd_test(enable = "avx512f,avx512vl")]
+    unsafe fn test_mm256_maskz_mul_ps() {
+        let a = _mm256_set_ps(0., 1., -1., f32::MAX, f32::MIN, 100., -100., -32.);
+        let b = _mm256_set1_ps(2.);
+        let r = _mm256_maskz_mul_ps(0, a, b);
+        assert_eq_m256(r, _mm256_setzero_ps());
+        let r = _mm256_maskz_mul_ps(0b11111111, a, b);
+        #[rustfmt::skip]
+        let e = _mm256_set_ps(
+            0., 2., -2., f32::INFINITY,
+            f32::NEG_INFINITY, 200., -200., -64.,
+        );
+        assert_eq_m256(r, e);
+    }
+
+    #[simd_test(enable = "avx512f,avx512vl")]
+    unsafe fn test_mm_mask_mul_ps() {
+        let a = _mm_set_ps(1., -1., f32::MAX, f32::MIN);
+        let b = _mm_set1_ps(2.);
+        let r = _mm_mask_mul_ps(a, 0, a, b);
+        assert_eq_m128(r, a);
+        let r = _mm_mask_mul_ps(a, 0b00001111, a, b);
+        let e = _mm_set_ps(2., -2., f32::INFINITY, f32::NEG_INFINITY);
+        assert_eq_m128(r, e);
+    }
+
+    #[simd_test(enable = "avx512f,avx512vl")]
+    unsafe fn test_mm_maskz_mul_ps() {
+        let a = _mm_set_ps(1., -1., f32::MAX, f32::MIN);
+        let b = _mm_set1_ps(2.);
+        let r = _mm_maskz_mul_ps(0, a, b);
+        assert_eq_m128(r, _mm_setzero_ps());
+        let r = _mm_maskz_mul_ps(0b00001111, a, b);
+        let e = _mm_set_ps(2., -2., f32::INFINITY, f32::NEG_INFINITY);
+        assert_eq_m128(r, e);
+    }
+
     #[simd_test(enable = "avx512f")]
     unsafe fn test_mm512_div_ps() {
         let a = _mm512_setr_ps(
@@ -26033,23 +26756,12 @@ mod tests {
             2., 2., 2., 2., 2., 0., 2., 2., 2., 2., 2., 2., 0., 2., 2., 2.,
         );
         let r = _mm512_div_ps(a, b);
+        #[rustfmt::skip]
         let e = _mm512_setr_ps(
-            0.,
-            0.5,
-            -0.5,
-            -1.,
-            50.,
-            f32::INFINITY,
-            -50.,
-            -16.,
-            0.,
-            0.5,
-            -0.5,
-            500.,
-            f32::NEG_INFINITY,
-            50.,
-            -50.,
-            -16.,
+            0., 0.5, -0.5, -1.,
+            50., f32::INFINITY, -50., -16.,
+            0., 0.5, -0.5, 500.,
+            f32::NEG_INFINITY, 50., -50., -16.,
         );
         assert_eq_m512(r, e); // 0/0 = NAN
     }
@@ -26065,23 +26777,12 @@ mod tests {
         let r = _mm512_mask_div_ps(a, 0, a, b);
         assert_eq_m512(r, a);
         let r = _mm512_mask_div_ps(a, 0b00000000_11111111, a, b);
+        #[rustfmt::skip]
         let e = _mm512_setr_ps(
-            0.,
-            0.5,
-            -0.5,
-            -1.,
-            50.,
-            f32::INFINITY,
-            -50.,
-            -16.,
-            0.,
-            1.,
-            -1.,
-            1000.,
-            -131.,
-            100.,
-            -100.,
-            -32.,
+            0., 0.5, -0.5, -1.,
+            50., f32::INFINITY, -50., -16.,
+            0., 1., -1., 1000.,
+            -131., 100., -100., -32.,
         );
         assert_eq_m512(r, e);
     }
@@ -26097,27 +26798,60 @@ mod tests {
         let r = _mm512_maskz_div_ps(0, a, b);
         assert_eq_m512(r, _mm512_setzero_ps());
         let r = _mm512_maskz_div_ps(0b00000000_11111111, a, b);
+        #[rustfmt::skip]
         let e = _mm512_setr_ps(
-            0.,
-            0.5,
-            -0.5,
-            -1.,
-            50.,
-            f32::INFINITY,
-            -50.,
-            -16.,
-            0.,
-            0.,
-            0.,
-            0.,
-            0.,
-            0.,
-            0.,
-            0.,
+            0., 0.5, -0.5, -1.,
+            50., f32::INFINITY, -50., -16.,
+            0., 0., 0., 0.,
+            0., 0., 0., 0.,
         );
         assert_eq_m512(r, e);
     }
 
+    #[simd_test(enable = "avx512f,avx512vl")]
+    unsafe fn test_mm256_mask_div_ps() {
+        let a = _mm256_set_ps(0., 1., -1., -2., 100., 100., -100., -32.);
+        let b = _mm256_set_ps(2., 2., 2., 2., 2., 0., 2., 2.);
+        let r = _mm256_mask_div_ps(a, 0, a, b);
+        assert_eq_m256(r, a);
+        let r = _mm256_mask_div_ps(a, 0b11111111, a, b);
+        let e = _mm256_set_ps(0., 0.5, -0.5, -1., 50., f32::INFINITY, -50., -16.);
+        assert_eq_m256(r, e);
+    }
+
+    #[simd_test(enable = "avx512f,avx512vl")]
+    unsafe fn test_mm256_maskz_div_ps() {
+        let a = _mm256_set_ps(0., 1., -1., -2., 100., 100., -100., -32.);
+        let b = _mm256_set_ps(2., 2., 2., 2., 2., 0., 2., 2.);
+        let r = _mm256_maskz_div_ps(0, a, b);
+        assert_eq_m256(r, _mm256_setzero_ps());
+        let r = _mm256_maskz_div_ps(0b11111111, a, b);
+        let e = _mm256_set_ps(0., 0.5, -0.5, -1., 50., f32::INFINITY, -50., -16.);
+        assert_eq_m256(r, e);
+    }
+
+    #[simd_test(enable = "avx512f,avx512vl")]
+    unsafe fn test_mm_mask_div_ps() {
+        let a = _mm_set_ps(100., 100., -100., -32.);
+        let b = _mm_set_ps(2., 0., 2., 2.);
+        let r = _mm_mask_div_ps(a, 0, a, b);
+        assert_eq_m128(r, a);
+        let r = _mm_mask_div_ps(a, 0b00001111, a, b);
+        let e = _mm_set_ps(50., f32::INFINITY, -50., -16.);
+        assert_eq_m128(r, e);
+    }
+
+    #[simd_test(enable = "avx512f,avx512vl")]
+    unsafe fn test_mm_maskz_div_ps() {
+        let a = _mm_set_ps(100., 100., -100., -32.);
+        let b = _mm_set_ps(2., 0., 2., 2.);
+        let r = _mm_maskz_div_ps(0, a, b);
+        assert_eq_m128(r, _mm_setzero_ps());
+        let r = _mm_maskz_div_ps(0b00001111, a, b);
+        let e = _mm_set_ps(50., f32::INFINITY, -50., -16.);
+        assert_eq_m128(r, e);
+    }
+
     #[simd_test(enable = "avx512f")]
     unsafe fn test_mm512_max_epi32() {
         let a = _mm512_setr_epi32(0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15);
diff --git a/library/stdarch/crates/core_arch/src/x86/mod.rs b/library/stdarch/crates/core_arch/src/x86/mod.rs
index a4217e2d3492..4e63ca8845f9 100644
--- a/library/stdarch/crates/core_arch/src/x86/mod.rs
+++ b/library/stdarch/crates/core_arch/src/x86/mod.rs
@@ -495,6 +495,24 @@ impl m256Ext for __m256 {
     }
 }
 
+#[allow(non_camel_case_types)]
+#[unstable(feature = "stdsimd_internal", issue = "none")]
+pub(crate) trait m256dExt: Sized {
+    fn as_m256d(self) -> __m256d;
+
+    #[inline]
+    fn as_f64x4(self) -> crate::core_arch::simd::f64x4 {
+        unsafe { transmute(self.as_m256d()) }
+    }
+}
+
+impl m256dExt for __m256d {
+    #[inline]
+    fn as_m256d(self) -> Self {
+        self
+    }
+}
+
 #[allow(non_camel_case_types)]
 #[unstable(feature = "stdsimd_internal", issue = "none")]
 pub(crate) trait m512iExt: Sized {
diff --git a/library/stdarch/crates/core_arch/src/x86_64/avx512f.rs b/library/stdarch/crates/core_arch/src/x86_64/avx512f.rs
index f216a47f498d..dab5170f6f77 100644
--- a/library/stdarch/crates/core_arch/src/x86_64/avx512f.rs
+++ b/library/stdarch/crates/core_arch/src/x86_64/avx512f.rs
@@ -15,33 +15,60 @@ mod tests {
 
     #[simd_test(enable = "avx512f")]
     unsafe fn test_mm512_abs_epi64() {
-        let a = _mm512_setr_epi64(0, 1, -1, i64::MAX, i64::MIN, 100, -100, -32);
+        let a = _mm512_set_epi64(0, 1, -1, i64::MAX, i64::MIN, 100, -100, -32);
         let r = _mm512_abs_epi64(a);
-        let e = _mm512_setr_epi64(0, 1, 1, i64::MAX, i64::MAX.wrapping_add(1), 100, 100, 32);
+        let e = _mm512_set_epi64(0, 1, 1, i64::MAX, i64::MAX.wrapping_add(1), 100, 100, 32);
         assert_eq_m512i(r, e);
     }
 
     #[simd_test(enable = "avx512f")]
     unsafe fn test_mm512_mask_abs_epi64() {
-        let a = _mm512_setr_epi64(0, 1, -1, i64::MAX, i64::MIN, 100, -100, -32);
+        let a = _mm512_set_epi64(0, 1, -1, i64::MAX, i64::MIN, 100, -100, -32);
         let r = _mm512_mask_abs_epi64(a, 0, a);
         assert_eq_m512i(r, a);
-        let r = _mm512_mask_abs_epi64(a, 0b00001111, a);
-        let e = _mm512_setr_epi64(0, 1, 1, i64::MAX, i64::MIN, 100, -100, -32);
+        let r = _mm512_mask_abs_epi64(a, 0b11111111, a);
+        let e = _mm512_set_epi64(0, 1, 1, i64::MAX, i64::MIN, 100, 100, 32);
         assert_eq_m512i(r, e);
     }
 
     #[simd_test(enable = "avx512f")]
     unsafe fn test_mm512_maskz_abs_epi64() {
-        #[rustfmt::skip]
-        let a = _mm512_setr_epi64(0, 1, -1, i64::MAX, i64::MIN, 100, -100, -32);
+        let a = _mm512_set_epi64(0, 1, -1, i64::MAX, i64::MIN, 100, -100, -32);
         let r = _mm512_maskz_abs_epi64(0, a);
         assert_eq_m512i(r, _mm512_setzero_si512());
-        let r = _mm512_maskz_abs_epi64(0b00001111, a);
-        let e = _mm512_setr_epi64(0, 1, 1, i64::MAX, 0, 0, 0, 0);
+        let r = _mm512_maskz_abs_epi64(0b11111111, a);
+        let e = _mm512_set_epi64(0, 1, 1, i64::MAX, i64::MIN, 100, 100, 32);
         assert_eq_m512i(r, e);
     }
 
+    #[simd_test(enable = "avx512f,avx512vl")]
+    unsafe fn test_mm256_abs_epi64() {
+        let a = _mm256_set_epi64x(i64::MAX, i64::MIN, 100, -100);
+        let r = _mm256_abs_epi64(a);
+        let e = _mm256_set_epi64x(i64::MAX, i64::MAX.wrapping_add(1), 100, 100);
+        assert_eq_m256i(r, e);
+    }
+
+    #[simd_test(enable = "avx512f,avx512vl")]
+    unsafe fn test_mm256_mask_abs_epi64() {
+        let a = _mm256_set_epi64x(i64::MAX, i64::MIN, 100, -100);
+        let r = _mm256_mask_abs_epi64(a, 0, a);
+        assert_eq_m256i(r, a);
+        let r = _mm256_mask_abs_epi64(a, 0b00001111, a);
+        let e = _mm256_set_epi64x(i64::MAX, i64::MAX.wrapping_add(1), 100, 100);
+        assert_eq_m256i(r, e);
+    }
+
+    #[simd_test(enable = "avx512f,avx512vl")]
+    unsafe fn test_mm256_maskz_abs_epi64() {
+        let a = _mm256_set_epi64x(i64::MAX, i64::MIN, 100, -100);
+        let r = _mm256_maskz_abs_epi64(0, a);
+        assert_eq_m256i(r, _mm256_setzero_si256());
+        let r = _mm256_maskz_abs_epi64(0b00001111, a);
+        let e = _mm256_set_epi64x(i64::MAX, i64::MAX.wrapping_add(1), 100, 100);
+        assert_eq_m256i(r, e);
+    }
+
     #[simd_test(enable = "avx512f")]
     unsafe fn test_mm512_abs_pd() {
         let a = _mm512_setr_pd(0., 1., -1., f64::MAX, f64::MIN, 100., -100., -32.);
@@ -60,15 +87,6 @@ mod tests {
         assert_eq_m512d(r, e);
     }
 
-    #[simd_test(enable = "avx512f")]
-    unsafe fn test_mm512_add_epi64() {
-        let a = _mm512_setr_epi64(0, 1, -1, i64::MAX, i64::MIN, 100, -100, -32);
-        let b = _mm512_set1_epi64(1);
-        let r = _mm512_add_epi64(a, b);
-        let e = _mm512_setr_epi64(1, 2, 0, i64::MIN, i64::MIN + 1, 101, -99, -31);
-        assert_eq_m512i(r, e);
-    }
-
     #[simd_test(enable = "avx512f")]
     unsafe fn test_mm512_mask_mov_epi64() {
         let src = _mm512_set1_epi64(1);
@@ -107,6 +125,15 @@ mod tests {
         assert_eq_m512d(r, a);
     }
 
+    #[simd_test(enable = "avx512f")]
+    unsafe fn test_mm512_add_epi64() {
+        let a = _mm512_setr_epi64(0, 1, -1, i64::MAX, i64::MIN, 100, -100, -32);
+        let b = _mm512_set1_epi64(1);
+        let r = _mm512_add_epi64(a, b);
+        let e = _mm512_setr_epi64(1, 2, 0, i64::MIN, i64::MIN + 1, 101, -99, -31);
+        assert_eq_m512i(r, e);
+    }
+
     #[simd_test(enable = "avx512f")]
     unsafe fn test_mm512_mask_add_epi64() {
         let a = _mm512_setr_epi64(0, 1, -1, i64::MAX, i64::MIN, 100, -100, -32);
@@ -120,11 +147,7 @@ mod tests {
 
     #[simd_test(enable = "avx512f")]
     unsafe fn test_mm512_maskz_add_epi64() {
-        #[rustfmt::skip]
-        let a = _mm512_setr_epi64(
-            0, 1, -1, i64::MAX,
-            i64::MIN, 100, -100, -32
-        );
+        let a = _mm512_setr_epi64(0, 1, -1, i64::MAX, i64::MIN, 100, -100, -32);
         let b = _mm512_set1_epi64(1);
         let r = _mm512_maskz_add_epi64(0, a, b);
         assert_eq_m512i(r, _mm512_setzero_si512());
@@ -133,6 +156,50 @@ mod tests {
         assert_eq_m512i(r, e);
     }
 
+    #[simd_test(enable = "avx512f,avx512vl")]
+    unsafe fn test_mm256_mask_add_epi64() {
+        let a = _mm256_set_epi64x(1, -1, i64::MAX, i64::MIN);
+        let b = _mm256_set1_epi64x(1);
+        let r = _mm256_mask_add_epi64(a, 0, a, b);
+        assert_eq_m256i(r, a);
+        let r = _mm256_mask_add_epi64(a, 0b00001111, a, b);
+        let e = _mm256_set_epi64x(2, 0, i64::MIN, i64::MIN + 1);
+        assert_eq_m256i(r, e);
+    }
+
+    #[simd_test(enable = "avx512f,avx512vl")]
+    unsafe fn test_mm256_maskz_add_epi64() {
+        let a = _mm256_set_epi64x(1, -1, i64::MAX, i64::MIN);
+        let b = _mm256_set1_epi64x(1);
+        let r = _mm256_maskz_add_epi64(0, a, b);
+        assert_eq_m256i(r, _mm256_setzero_si256());
+        let r = _mm256_maskz_add_epi64(0b00001111, a, b);
+        let e = _mm256_set_epi64x(2, 0, i64::MIN, i64::MIN + 1);
+        assert_eq_m256i(r, e);
+    }
+
+    #[simd_test(enable = "avx512f,avx512vl")]
+    unsafe fn test_mm_mask_add_epi64() {
+        let a = _mm_set_epi64x(i64::MAX, i64::MIN);
+        let b = _mm_set1_epi64x(1);
+        let r = _mm_mask_add_epi64(a, 0, a, b);
+        assert_eq_m128i(r, a);
+        let r = _mm_mask_add_epi64(a, 0b00000011, a, b);
+        let e = _mm_set_epi64x(i64::MIN, i64::MIN + 1);
+        assert_eq_m128i(r, e);
+    }
+
+    #[simd_test(enable = "avx512f,avx512vl")]
+    unsafe fn test_mm_maskz_add_epi64() {
+        let a = _mm_set_epi64x(i64::MAX, i64::MIN);
+        let b = _mm_set1_epi64x(1);
+        let r = _mm_maskz_add_epi64(0, a, b);
+        assert_eq_m128i(r, _mm_setzero_si128());
+        let r = _mm_maskz_add_epi64(0b00000011, a, b);
+        let e = _mm_set_epi64x(i64::MIN, i64::MIN + 1);
+        assert_eq_m128i(r, e);
+    }
+
     #[simd_test(enable = "avx512f")]
     unsafe fn test_mm512_add_pd() {
         let a = _mm512_setr_pd(0., 1., -1., f64::MAX, f64::MIN, 100., -100., -32.);
@@ -164,6 +231,50 @@ mod tests {
         assert_eq_m512d(r, e);
     }
 
+    #[simd_test(enable = "avx512f,avx512vl")]
+    unsafe fn test_mm256_mask_add_pd() {
+        let a = _mm256_set_pd(1., -1., f64::MAX, f64::MIN);
+        let b = _mm256_set1_pd(1.);
+        let r = _mm256_mask_add_pd(a, 0, a, b);
+        assert_eq_m256d(r, a);
+        let r = _mm256_mask_add_pd(a, 0b00001111, a, b);
+        let e = _mm256_set_pd(2., 0., f64::MAX, f64::MIN + 1.);
+        assert_eq_m256d(r, e);
+    }
+
+    #[simd_test(enable = "avx512f,avx512vl")]
+    unsafe fn test_mm256_maskz_add_pd() {
+        let a = _mm256_set_pd(1., -1., f64::MAX, f64::MIN);
+        let b = _mm256_set1_pd(1.);
+        let r = _mm256_maskz_add_pd(0, a, b);
+        assert_eq_m256d(r, _mm256_setzero_pd());
+        let r = _mm256_maskz_add_pd(0b00001111, a, b);
+        let e = _mm256_set_pd(2., 0., f64::MAX, f64::MIN + 1.);
+        assert_eq_m256d(r, e);
+    }
+
+    #[simd_test(enable = "avx512f,avx512vl")]
+    unsafe fn test_mm_mask_add_pd() {
+        let a = _mm_set_pd(f64::MAX, f64::MIN);
+        let b = _mm_set1_pd(1.);
+        let r = _mm_mask_add_pd(a, 0, a, b);
+        assert_eq_m128d(r, a);
+        let r = _mm_mask_add_pd(a, 0b00000011, a, b);
+        let e = _mm_set_pd(f64::MAX, f64::MIN + 1.);
+        assert_eq_m128d(r, e);
+    }
+
+    #[simd_test(enable = "avx512f,avx512vl")]
+    unsafe fn test_mm_maskz_add_pd() {
+        let a = _mm_set_pd(f64::MAX, f64::MIN);
+        let b = _mm_set1_pd(1.);
+        let r = _mm_maskz_add_pd(0, a, b);
+        assert_eq_m128d(r, _mm_setzero_pd());
+        let r = _mm_maskz_add_pd(0b00000011, a, b);
+        let e = _mm_set_pd(f64::MAX, f64::MIN + 1.);
+        assert_eq_m128d(r, e);
+    }
+
     #[simd_test(enable = "avx512f")]
     unsafe fn test_mm512_sub_epi64() {
         let a = _mm512_setr_epi64(0, 1, -1, i64::MAX, i64::MIN, 100, -100, -32);
@@ -195,6 +306,50 @@ mod tests {
         assert_eq_m512i(r, e);
     }
 
+    #[simd_test(enable = "avx512f,avx512vl")]
+    unsafe fn test_mm256_mask_sub_epi64() {
+        let a = _mm256_set_epi64x(1, -1, i64::MAX, i64::MIN);
+        let b = _mm256_set1_epi64x(1);
+        let r = _mm256_mask_sub_epi64(a, 0, a, b);
+        assert_eq_m256i(r, a);
+        let r = _mm256_mask_sub_epi64(a, 0b00001111, a, b);
+        let e = _mm256_set_epi64x(0, -2, i64::MAX - 1, i64::MAX);
+        assert_eq_m256i(r, e);
+    }
+
+    #[simd_test(enable = "avx512f,avx512vl")]
+    unsafe fn test_mm256_maskz_sub_epi64() {
+        let a = _mm256_set_epi64x(1, -1, i64::MAX, i64::MIN);
+        let b = _mm256_set1_epi64x(1);
+        let r = _mm256_maskz_sub_epi64(0, a, b);
+        assert_eq_m256i(r, _mm256_setzero_si256());
+        let r = _mm256_maskz_sub_epi64(0b00001111, a, b);
+        let e = _mm256_set_epi64x(0, -2, i64::MAX - 1, i64::MAX);
+        assert_eq_m256i(r, e);
+    }
+
+    #[simd_test(enable = "avx512f,avx512vl")]
+    unsafe fn test_mm_mask_sub_epi64() {
+        let a = _mm_set_epi64x(i64::MAX, i64::MIN);
+        let b = _mm_set1_epi64x(1);
+        let r = _mm_mask_sub_epi64(a, 0, a, b);
+        assert_eq_m128i(r, a);
+        let r = _mm_mask_sub_epi64(a, 0b00000011, a, b);
+        let e = _mm_set_epi64x(i64::MAX - 1, i64::MAX);
+        assert_eq_m128i(r, e);
+    }
+
+    #[simd_test(enable = "avx512f,avx512vl")]
+    unsafe fn test_mm_maskz_sub_epi64() {
+        let a = _mm_set_epi64x(i64::MAX, i64::MIN);
+        let b = _mm_set1_epi64x(1);
+        let r = _mm_maskz_sub_epi64(0, a, b);
+        assert_eq_m128i(r, _mm_setzero_si128());
+        let r = _mm_maskz_sub_epi64(0b00000011, a, b);
+        let e = _mm_set_epi64x(i64::MAX - 1, i64::MAX);
+        assert_eq_m128i(r, e);
+    }
+
     #[simd_test(enable = "avx512f")]
     unsafe fn test_mm512_sub_pd() {
         let a = _mm512_setr_pd(0., 1., -1., f64::MAX, f64::MIN, 100., -100., -32.);
@@ -226,9 +381,53 @@ mod tests {
         assert_eq_m512d(r, e);
     }
 
+    #[simd_test(enable = "avx512f,avx512vl")]
+    unsafe fn test_mm256_mask_sub_pd() {
+        let a = _mm256_set_pd(1., -1., f64::MAX, f64::MIN);
+        let b = _mm256_set1_pd(1.);
+        let r = _mm256_mask_sub_pd(a, 0, a, b);
+        assert_eq_m256d(r, a);
+        let r = _mm256_mask_sub_pd(a, 0b00001111, a, b);
+        let e = _mm256_set_pd(0., -2., f64::MAX - 1., f64::MIN);
+        assert_eq_m256d(r, e);
+    }
+
+    #[simd_test(enable = "avx512f,avx512vl")]
+    unsafe fn test_mm256_maskz_sub_pd() {
+        let a = _mm256_set_pd(1., -1., f64::MAX, f64::MIN);
+        let b = _mm256_set1_pd(1.);
+        let r = _mm256_maskz_sub_pd(0, a, b);
+        assert_eq_m256d(r, _mm256_setzero_pd());
+        let r = _mm256_maskz_sub_pd(0b00001111, a, b);
+        let e = _mm256_set_pd(0., -2., f64::MAX - 1., f64::MIN);
+        assert_eq_m256d(r, e);
+    }
+
+    #[simd_test(enable = "avx512f,avx512vl")]
+    unsafe fn test_mm_mask_sub_pd() {
+        let a = _mm_set_pd(f64::MAX, f64::MIN);
+        let b = _mm_set1_pd(1.);
+        let r = _mm_mask_sub_pd(a, 0, a, b);
+        assert_eq_m128d(r, a);
+        let r = _mm_mask_sub_pd(a, 0b00000011, a, b);
+        let e = _mm_set_pd(f64::MAX - 1., f64::MIN);
+        assert_eq_m128d(r, e);
+    }
+
+    #[simd_test(enable = "avx512f,avx512vl")]
+    unsafe fn test_mm_maskz_sub_pd() {
+        let a = _mm_set_pd(f64::MAX, f64::MIN);
+        let b = _mm_set1_pd(1.);
+        let r = _mm_maskz_sub_pd(0, a, b);
+        assert_eq_m128d(r, _mm_setzero_pd());
+        let r = _mm_maskz_sub_pd(0b00000011, a, b);
+        let e = _mm_set_pd(f64::MAX - 1., f64::MIN);
+        assert_eq_m128d(r, e);
+    }
+
     #[simd_test(enable = "avx512f")]
     unsafe fn test_mm512_mul_epi32() {
-        let a = _mm512_setr_epi32(1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1);
+        let a = _mm512_set1_epi32(1);
         let b = _mm512_setr_epi32(1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16);
         let r = _mm512_mul_epi32(a, b);
         let e = _mm512_set_epi64(15, 13, 11, 9, 7, 5, 3, 1);
@@ -237,27 +436,22 @@ mod tests {
 
     #[simd_test(enable = "avx512f")]
     unsafe fn test_mm512_mask_mul_epi32() {
-        let a = _mm512_setr_epi32(1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1);
+        let a = _mm512_set1_epi32(1);
         let b = _mm512_setr_epi32(1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16);
         let r = _mm512_mask_mul_epi32(a, 0, a, b);
         assert_eq_m512i(r, a);
         let r = _mm512_mask_mul_epi32(a, 0b00001111, a, b);
+        #[rustfmt::skip]
         let e = _mm512_set_epi64(
-            1 | 1 << 32,
-            1 | 1 << 32,
-            1 | 1 << 32,
-            1 | 1 << 32,
-            7,
-            5,
-            3,
-            1,
+            1 | 1 << 32, 1 | 1 << 32, 1 | 1 << 32, 1 | 1 << 32,
+            7, 5, 3, 1,
         );
         assert_eq_m512i(r, e);
     }
 
     #[simd_test(enable = "avx512f")]
     unsafe fn test_mm512_maskz_mul_epi32() {
-        let a = _mm512_setr_epi32(1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1);
+        let a = _mm512_set1_epi32(1);
         let b = _mm512_setr_epi32(1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16);
         let r = _mm512_maskz_mul_epi32(0, a, b);
         assert_eq_m512i(r, _mm512_setzero_si512());
@@ -266,9 +460,53 @@ mod tests {
         assert_eq_m512i(r, e);
     }
 
+    #[simd_test(enable = "avx512f,avx512vl")]
+    unsafe fn test_mm256_mask_mul_epi32() {
+        let a = _mm256_set1_epi32(1);
+        let b = _mm256_set_epi32(1, 2, 3, 4, 5, 6, 7, 8);
+        let r = _mm256_mask_mul_epi32(a, 0, a, b);
+        assert_eq_m256i(r, a);
+        let r = _mm256_mask_mul_epi32(a, 0b00001111, a, b);
+        let e = _mm256_set_epi64x(2, 4, 6, 8);
+        assert_eq_m256i(r, e);
+    }
+
+    #[simd_test(enable = "avx512f,avx512vl")]
+    unsafe fn test_mm256_maskz_mul_epi32() {
+        let a = _mm256_set1_epi32(1);
+        let b = _mm256_set_epi32(1, 2, 3, 4, 5, 6, 7, 8);
+        let r = _mm256_maskz_mul_epi32(0, a, b);
+        assert_eq_m256i(r, _mm256_setzero_si256());
+        let r = _mm256_maskz_mul_epi32(0b00001111, a, b);
+        let e = _mm256_set_epi64x(2, 4, 6, 8);
+        assert_eq_m256i(r, e);
+    }
+
+    #[simd_test(enable = "avx512f,avx512vl")]
+    unsafe fn test_mm_mask_mul_epi32() {
+        let a = _mm_set1_epi32(1);
+        let b = _mm_set_epi32(1, 2, 3, 4);
+        let r = _mm_mask_mul_epi32(a, 0, a, b);
+        assert_eq_m128i(r, a);
+        let r = _mm_mask_mul_epi32(a, 0b00000011, a, b);
+        let e = _mm_set_epi64x(2, 4);
+        assert_eq_m128i(r, e);
+    }
+
+    #[simd_test(enable = "avx512f,avx512vl")]
+    unsafe fn test_mm_maskz_mul_epi32() {
+        let a = _mm_set1_epi32(1);
+        let b = _mm_set_epi32(1, 2, 3, 4);
+        let r = _mm_maskz_mul_epi32(0, a, b);
+        assert_eq_m128i(r, _mm_setzero_si128());
+        let r = _mm_maskz_mul_epi32(0b00000011, a, b);
+        let e = _mm_set_epi64x(2, 4);
+        assert_eq_m128i(r, e);
+    }
+
     #[simd_test(enable = "avx512f")]
     unsafe fn test_mm512_mul_epu32() {
-        let a = _mm512_setr_epi32(1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1);
+        let a = _mm512_set1_epi32(1);
         let b = _mm512_setr_epi32(1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16);
         let r = _mm512_mul_epu32(a, b);
         let e = _mm512_set_epi64(15, 13, 11, 9, 7, 5, 3, 1);
@@ -277,27 +515,22 @@ mod tests {
 
     #[simd_test(enable = "avx512f")]
     unsafe fn test_mm512_mask_mul_epu32() {
-        let a = _mm512_setr_epi32(1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1);
+        let a = _mm512_set1_epi32(1);
         let b = _mm512_setr_epi32(1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16);
         let r = _mm512_mask_mul_epu32(a, 0, a, b);
         assert_eq_m512i(r, a);
         let r = _mm512_mask_mul_epu32(a, 0b00001111, a, b);
+        #[rustfmt::skip]
         let e = _mm512_set_epi64(
-            1 | 1 << 32,
-            1 | 1 << 32,
-            1 | 1 << 32,
-            1 | 1 << 32,
-            7,
-            5,
-            3,
-            1,
+            1 | 1 << 32, 1 | 1 << 32, 1 | 1 << 32, 1 | 1 << 32,
+            7, 5, 3, 1,
         );
         assert_eq_m512i(r, e);
     }
 
     #[simd_test(enable = "avx512f")]
     unsafe fn test_mm512_maskz_mul_epu32() {
-        let a = _mm512_setr_epi32(1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1);
+        let a = _mm512_set1_epi32(1);
         let b = _mm512_setr_epi32(1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16);
         let r = _mm512_maskz_mul_epu32(0, a, b);
         assert_eq_m512i(r, _mm512_setzero_si512());
@@ -306,6 +539,50 @@ mod tests {
         assert_eq_m512i(r, e);
     }
 
+    #[simd_test(enable = "avx512f,avx512vl")]
+    unsafe fn test_mm256_mask_mul_epu32() {
+        let a = _mm256_set1_epi32(1);
+        let b = _mm256_set_epi32(1, 2, 3, 4, 5, 6, 7, 8);
+        let r = _mm256_mask_mul_epu32(a, 0, a, b);
+        assert_eq_m256i(r, a);
+        let r = _mm256_mask_mul_epu32(a, 0b00001111, a, b);
+        let e = _mm256_set_epi64x(2, 4, 6, 8);
+        assert_eq_m256i(r, e);
+    }
+
+    #[simd_test(enable = "avx512f,avx512vl")]
+    unsafe fn test_mm256_maskz_mul_epu32() {
+        let a = _mm256_set1_epi32(1);
+        let b = _mm256_set_epi32(1, 2, 3, 4, 5, 6, 7, 8);
+        let r = _mm256_maskz_mul_epu32(0, a, b);
+        assert_eq_m256i(r, _mm256_setzero_si256());
+        let r = _mm256_maskz_mul_epu32(0b00001111, a, b);
+        let e = _mm256_set_epi64x(2, 4, 6, 8);
+        assert_eq_m256i(r, e);
+    }
+
+    #[simd_test(enable = "avx512f,avx512vl")]
+    unsafe fn test_mm_mask_mul_epu32() {
+        let a = _mm_set1_epi32(1);
+        let b = _mm_set_epi32(1, 2, 3, 4);
+        let r = _mm_mask_mul_epu32(a, 0, a, b);
+        assert_eq_m128i(r, a);
+        let r = _mm_mask_mul_epu32(a, 0b00000011, a, b);
+        let e = _mm_set_epi64x(2, 4);
+        assert_eq_m128i(r, e);
+    }
+
+    #[simd_test(enable = "avx512f,avx512vl")]
+    unsafe fn test_mm_maskz_mul_epu32() {
+        let a = _mm_set1_epi32(1);
+        let b = _mm_set_epi32(1, 2, 3, 4);
+        let r = _mm_maskz_mul_epu32(0, a, b);
+        assert_eq_m128i(r, _mm_setzero_si128());
+        let r = _mm_maskz_mul_epu32(0b00000011, a, b);
+        let e = _mm_set_epi64x(2, 4);
+        assert_eq_m128i(r, e);
+    }
+
     #[simd_test(enable = "avx512f")]
     unsafe fn test_mm512_mullox_epi64() {
         let a = _mm512_setr_epi64(0, 1, i64::MAX, i64::MIN, i64::MAX, 100, -100, -32);
@@ -331,15 +608,10 @@ mod tests {
         let a = _mm512_setr_pd(0., 1., f64::MAX, f64::MIN, f64::MAX, f64::MIN, -100., -32.);
         let b = _mm512_set1_pd(2.);
         let r = _mm512_mul_pd(a, b);
+        #[rustfmt::skip]
         let e = _mm512_setr_pd(
-            0.,
-            2.,
-            f64::INFINITY,
-            f64::NEG_INFINITY,
-            f64::INFINITY,
-            f64::NEG_INFINITY,
-            -200.,
-            -64.,
+            0., 2., f64::INFINITY, f64::NEG_INFINITY,
+            f64::INFINITY, f64::NEG_INFINITY, -200., -64.,
         );
         assert_eq_m512d(r, e);
     }
@@ -351,15 +623,10 @@ mod tests {
         let r = _mm512_mask_mul_pd(a, 0, a, b);
         assert_eq_m512d(r, a);
         let r = _mm512_mask_mul_pd(a, 0b00001111, a, b);
+        #[rustfmt::skip]
         let e = _mm512_setr_pd(
-            0.,
-            2.,
-            f64::INFINITY,
-            f64::NEG_INFINITY,
-            f64::MAX,
-            f64::MIN,
-            -100.,
-            -32.,
+            0., 2., f64::INFINITY, f64::NEG_INFINITY,
+            f64::MAX, f64::MIN, -100., -32.,
         );
         assert_eq_m512d(r, e);
     }
@@ -375,20 +642,59 @@ mod tests {
         assert_eq_m512d(r, e);
     }
 
+    #[simd_test(enable = "avx512f,avx512vl")]
+    unsafe fn test_mm256_mask_mul_pd() {
+        let a = _mm256_set_pd(0., 1., f64::MAX, f64::MIN);
+        let b = _mm256_set1_pd(2.);
+        let r = _mm256_mask_mul_pd(a, 0, a, b);
+        assert_eq_m256d(r, a);
+        let r = _mm256_mask_mul_pd(a, 0b00001111, a, b);
+        let e = _mm256_set_pd(0., 2., f64::INFINITY, f64::NEG_INFINITY);
+        assert_eq_m256d(r, e);
+    }
+
+    #[simd_test(enable = "avx512f,avx512vl")]
+    unsafe fn test_mm256_maskz_mul_pd() {
+        let a = _mm256_set_pd(0., 1., f64::MAX, f64::MIN);
+        let b = _mm256_set1_pd(2.);
+        let r = _mm256_maskz_mul_pd(0, a, b);
+        assert_eq_m256d(r, _mm256_setzero_pd());
+        let r = _mm256_maskz_mul_pd(0b00001111, a, b);
+        let e = _mm256_set_pd(0., 2., f64::INFINITY, f64::NEG_INFINITY);
+        assert_eq_m256d(r, e);
+    }
+
+    #[simd_test(enable = "avx512f,avx512vl")]
+    unsafe fn test_mm_mask_mul_pd() {
+        let a = _mm_set_pd(f64::MAX, f64::MIN);
+        let b = _mm_set1_pd(2.);
+        let r = _mm_mask_mul_pd(a, 0, a, b);
+        assert_eq_m128d(r, a);
+        let r = _mm_mask_mul_pd(a, 0b00000011, a, b);
+        let e = _mm_set_pd(f64::INFINITY, f64::NEG_INFINITY);
+        assert_eq_m128d(r, e);
+    }
+
+    #[simd_test(enable = "avx512f,avx512vl")]
+    unsafe fn test_mm_maskz_mul_pd() {
+        let a = _mm_set_pd(f64::MAX, f64::MIN);
+        let b = _mm_set1_pd(2.);
+        let r = _mm_maskz_mul_pd(0, a, b);
+        assert_eq_m128d(r, _mm_setzero_pd());
+        let r = _mm_maskz_mul_pd(0b00000011, a, b);
+        let e = _mm_set_pd(f64::INFINITY, f64::NEG_INFINITY);
+        assert_eq_m128d(r, e);
+    }
+
     #[simd_test(enable = "avx512f")]
     unsafe fn test_mm512_div_pd() {
         let a = _mm512_setr_pd(0., 1., f64::MAX, f64::MIN, f64::MAX, f64::MIN, -100., -32.);
         let b = _mm512_setr_pd(2., 2., 0., 0., 0., 0., 2., 2.);
         let r = _mm512_div_pd(a, b);
+        #[rustfmt::skip]
         let e = _mm512_setr_pd(
-            0.,
-            0.5,
-            f64::INFINITY,
-            f64::NEG_INFINITY,
-            f64::INFINITY,
-            f64::NEG_INFINITY,
-            -50.,
-            -16.,
+            0., 0.5, f64::INFINITY, f64::NEG_INFINITY,
+            f64::INFINITY, f64::NEG_INFINITY, -50., -16.,
         );
         assert_eq_m512d(r, e);
     }
@@ -400,15 +706,10 @@ mod tests {
         let r = _mm512_mask_div_pd(a, 0, a, b);
         assert_eq_m512d(r, a);
         let r = _mm512_mask_div_pd(a, 0b00001111, a, b);
+        #[rustfmt::skip]
         let e = _mm512_setr_pd(
-            0.,
-            0.5,
-            f64::INFINITY,
-            f64::NEG_INFINITY,
-            f64::MAX,
-            f64::MIN,
-            -100.,
-            -32.,
+            0., 0.5, f64::INFINITY, f64::NEG_INFINITY,
+            f64::MAX, f64::MIN, -100., -32.,
         );
         assert_eq_m512d(r, e);
     }
@@ -424,6 +725,50 @@ mod tests {
         assert_eq_m512d(r, e);
     }
 
+    #[simd_test(enable = "avx512f,avx512vl")]
+    unsafe fn test_mm256_mask_div_pd() {
+        let a = _mm256_set_pd(0., 1., f64::MAX, f64::MIN);
+        let b = _mm256_set_pd(2., 2., 0., 0.);
+        let r = _mm256_mask_div_pd(a, 0, a, b);
+        assert_eq_m256d(r, a);
+        let r = _mm256_mask_div_pd(a, 0b00001111, a, b);
+        let e = _mm256_set_pd(0., 0.5, f64::INFINITY, f64::NEG_INFINITY);
+        assert_eq_m256d(r, e);
+    }
+
+    #[simd_test(enable = "avx512f,avx512vl")]
+    unsafe fn test_mm256_maskz_div_pd() {
+        let a = _mm256_set_pd(0., 1., f64::MAX, f64::MIN);
+        let b = _mm256_set_pd(2., 2., 0., 0.);
+        let r = _mm256_maskz_div_pd(0, a, b);
+        assert_eq_m256d(r, _mm256_setzero_pd());
+        let r = _mm256_maskz_div_pd(0b00001111, a, b);
+        let e = _mm256_set_pd(0., 0.5, f64::INFINITY, f64::NEG_INFINITY);
+        assert_eq_m256d(r, e);
+    }
+
+    #[simd_test(enable = "avx512f,avx512vl")]
+    unsafe fn test_mm_mask_div_pd() {
+        let a = _mm_set_pd(f64::MAX, f64::MIN);
+        let b = _mm_set_pd(0., 0.);
+        let r = _mm_mask_div_pd(a, 0, a, b);
+        assert_eq_m128d(r, a);
+        let r = _mm_mask_div_pd(a, 0b00000011, a, b);
+        let e = _mm_set_pd(f64::INFINITY, f64::NEG_INFINITY);
+        assert_eq_m128d(r, e);
+    }
+
+    #[simd_test(enable = "avx512f,avx512vl")]
+    unsafe fn test_mm_maskz_div_pd() {
+        let a = _mm_set_pd(f64::MAX, f64::MIN);
+        let b = _mm_set_pd(0., 0.);
+        let r = _mm_maskz_div_pd(0, a, b);
+        assert_eq_m128d(r, _mm_setzero_pd());
+        let r = _mm_maskz_div_pd(0b00000011, a, b);
+        let e = _mm_set_pd(f64::INFINITY, f64::NEG_INFINITY);
+        assert_eq_m128d(r, e);
+    }
+
     #[simd_test(enable = "avx512f")]
     unsafe fn test_mm512_max_epi64() {
         let a = _mm512_setr_epi64(0, 1, 2, 3, 4, 5, 6, 7);