diff --git a/library/stdarch/crates/core_arch/missing-x86.md b/library/stdarch/crates/core_arch/missing-x86.md
index 1c2d0a6d7b78..94ecc929ef56 100644
--- a/library/stdarch/crates/core_arch/missing-x86.md
+++ b/library/stdarch/crates/core_arch/missing-x86.md
@@ -56,217 +56,16 @@
* [ ] [`_mm256_cvtsh_h`](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm256_cvtsh_h)
* [ ] [`_mm256_set1_pch`](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm256_set1_pch)
* [ ] [`_mm512_cmp_round_ph_mask`](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm512_cmp_round_ph_mask)
- * [ ] [`_mm512_cvt_roundph_epi16`](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm512_cvt_roundph_epi16)
- * [ ] [`_mm512_cvt_roundph_epi32`](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm512_cvt_roundph_epi32)
- * [ ] [`_mm512_cvt_roundph_epi64`](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm512_cvt_roundph_epi64)
- * [ ] [`_mm512_cvt_roundph_epu16`](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm512_cvt_roundph_epu16)
- * [ ] [`_mm512_cvt_roundph_epu32`](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm512_cvt_roundph_epu32)
- * [ ] [`_mm512_cvt_roundph_epu64`](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm512_cvt_roundph_epu64)
- * [ ] [`_mm512_cvt_roundph_pd`](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm512_cvt_roundph_pd)
- * [ ] [`_mm512_cvtph_epi16`](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm512_cvtph_epi16)
- * [ ] [`_mm512_cvtph_epi32`](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm512_cvtph_epi32)
- * [ ] [`_mm512_cvtph_epi64`](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm512_cvtph_epi64)
- * [ ] [`_mm512_cvtph_epu16`](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm512_cvtph_epu16)
- * [ ] [`_mm512_cvtph_epu32`](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm512_cvtph_epu32)
- * [ ] [`_mm512_cvtph_epu64`](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm512_cvtph_epu64)
- * [ ] [`_mm512_cvtph_pd`](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm512_cvtph_pd)
* [ ] [`_mm512_cvtsh_h`](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm512_cvtsh_h)
- * [ ] [`_mm512_cvtt_roundph_epi16`](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm512_cvtt_roundph_epi16)
- * [ ] [`_mm512_cvtt_roundph_epi32`](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm512_cvtt_roundph_epi32)
- * [ ] [`_mm512_cvtt_roundph_epi64`](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm512_cvtt_roundph_epi64)
- * [ ] [`_mm512_cvtt_roundph_epu16`](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm512_cvtt_roundph_epu16)
- * [ ] [`_mm512_cvtt_roundph_epu32`](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm512_cvtt_roundph_epu32)
- * [ ] [`_mm512_cvtt_roundph_epu64`](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm512_cvtt_roundph_epu64)
- * [ ] [`_mm512_cvttph_epi16`](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm512_cvttph_epi16)
- * [ ] [`_mm512_cvttph_epi32`](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm512_cvttph_epi32)
- * [ ] [`_mm512_cvttph_epi64`](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm512_cvttph_epi64)
- * [ ] [`_mm512_cvttph_epu16`](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm512_cvttph_epu16)
- * [ ] [`_mm512_cvttph_epu32`](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm512_cvttph_epu32)
- * [ ] [`_mm512_cvttph_epu64`](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm512_cvttph_epu64)
- * [ ] [`_mm512_cvtx_roundph_ps`](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm512_cvtx_roundph_ps)
- * [ ] [`_mm512_cvtxph_ps`](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm512_cvtxph_ps)
* [ ] [`_mm512_mask_cmp_round_ph_mask`](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm512_mask_cmp_round_ph_mask)
- * [ ] [`_mm512_mask_cvt_roundph_epi16`](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm512_mask_cvt_roundph_epi16)
- * [ ] [`_mm512_mask_cvt_roundph_epi32`](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm512_mask_cvt_roundph_epi32)
- * [ ] [`_mm512_mask_cvt_roundph_epi64`](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm512_mask_cvt_roundph_epi64)
- * [ ] [`_mm512_mask_cvt_roundph_epu16`](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm512_mask_cvt_roundph_epu16)
- * [ ] [`_mm512_mask_cvt_roundph_epu32`](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm512_mask_cvt_roundph_epu32)
- * [ ] [`_mm512_mask_cvt_roundph_epu64`](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm512_mask_cvt_roundph_epu64)
- * [ ] [`_mm512_mask_cvt_roundph_pd`](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm512_mask_cvt_roundph_pd)
- * [ ] [`_mm512_mask_cvtph_epi16`](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm512_mask_cvtph_epi16)
- * [ ] [`_mm512_mask_cvtph_epi32`](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm512_mask_cvtph_epi32)
- * [ ] [`_mm512_mask_cvtph_epi64`](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm512_mask_cvtph_epi64)
- * [ ] [`_mm512_mask_cvtph_epu16`](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm512_mask_cvtph_epu16)
- * [ ] [`_mm512_mask_cvtph_epu32`](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm512_mask_cvtph_epu32)
- * [ ] [`_mm512_mask_cvtph_epu64`](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm512_mask_cvtph_epu64)
- * [ ] [`_mm512_mask_cvtph_pd`](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm512_mask_cvtph_pd)
- * [ ] [`_mm512_mask_cvtt_roundph_epi16`](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm512_mask_cvtt_roundph_epi16)
- * [ ] [`_mm512_mask_cvtt_roundph_epi32`](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm512_mask_cvtt_roundph_epi32)
- * [ ] [`_mm512_mask_cvtt_roundph_epi64`](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm512_mask_cvtt_roundph_epi64)
- * [ ] [`_mm512_mask_cvtt_roundph_epu16`](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm512_mask_cvtt_roundph_epu16)
- * [ ] [`_mm512_mask_cvtt_roundph_epu32`](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm512_mask_cvtt_roundph_epu32)
- * [ ] [`_mm512_mask_cvtt_roundph_epu64`](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm512_mask_cvtt_roundph_epu64)
- * [ ] [`_mm512_mask_cvttph_epi16`](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm512_mask_cvttph_epi16)
- * [ ] [`_mm512_mask_cvttph_epi32`](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm512_mask_cvttph_epi32)
- * [ ] [`_mm512_mask_cvttph_epi64`](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm512_mask_cvttph_epi64)
- * [ ] [`_mm512_mask_cvttph_epu16`](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm512_mask_cvttph_epu16)
- * [ ] [`_mm512_mask_cvttph_epu32`](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm512_mask_cvttph_epu32)
- * [ ] [`_mm512_mask_cvttph_epu64`](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm512_mask_cvttph_epu64)
- * [ ] [`_mm512_mask_cvtx_roundph_ps`](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm512_mask_cvtx_roundph_ps)
- * [ ] [`_mm512_mask_cvtxph_ps`](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm512_mask_cvtxph_ps)
- * [ ] [`_mm512_maskz_cvt_roundph_epi16`](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm512_maskz_cvt_roundph_epi16)
- * [ ] [`_mm512_maskz_cvt_roundph_epi32`](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm512_maskz_cvt_roundph_epi32)
- * [ ] [`_mm512_maskz_cvt_roundph_epi64`](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm512_maskz_cvt_roundph_epi64)
- * [ ] [`_mm512_maskz_cvt_roundph_epu16`](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm512_maskz_cvt_roundph_epu16)
- * [ ] [`_mm512_maskz_cvt_roundph_epu32`](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm512_maskz_cvt_roundph_epu32)
- * [ ] [`_mm512_maskz_cvt_roundph_epu64`](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm512_maskz_cvt_roundph_epu64)
- * [ ] [`_mm512_maskz_cvt_roundph_pd`](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm512_maskz_cvt_roundph_pd)
- * [ ] [`_mm512_maskz_cvtph_epi16`](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm512_maskz_cvtph_epi16)
- * [ ] [`_mm512_maskz_cvtph_epi32`](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm512_maskz_cvtph_epi32)
- * [ ] [`_mm512_maskz_cvtph_epi64`](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm512_maskz_cvtph_epi64)
- * [ ] [`_mm512_maskz_cvtph_epu16`](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm512_maskz_cvtph_epu16)
- * [ ] [`_mm512_maskz_cvtph_epu32`](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm512_maskz_cvtph_epu32)
- * [ ] [`_mm512_maskz_cvtph_epu64`](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm512_maskz_cvtph_epu64)
- * [ ] [`_mm512_maskz_cvtph_pd`](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm512_maskz_cvtph_pd)
- * [ ] [`_mm512_maskz_cvtt_roundph_epi16`](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm512_maskz_cvtt_roundph_epi16)
- * [ ] [`_mm512_maskz_cvtt_roundph_epi32`](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm512_maskz_cvtt_roundph_epi32)
- * [ ] [`_mm512_maskz_cvtt_roundph_epi64`](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm512_maskz_cvtt_roundph_epi64)
- * [ ] [`_mm512_maskz_cvtt_roundph_epu16`](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm512_maskz_cvtt_roundph_epu16)
- * [ ] [`_mm512_maskz_cvtt_roundph_epu32`](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm512_maskz_cvtt_roundph_epu32)
- * [ ] [`_mm512_maskz_cvtt_roundph_epu64`](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm512_maskz_cvtt_roundph_epu64)
- * [ ] [`_mm512_maskz_cvttph_epi16`](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm512_maskz_cvttph_epi16)
- * [ ] [`_mm512_maskz_cvttph_epi32`](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm512_maskz_cvttph_epi32)
- * [ ] [`_mm512_maskz_cvttph_epi64`](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm512_maskz_cvttph_epi64)
- * [ ] [`_mm512_maskz_cvttph_epu16`](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm512_maskz_cvttph_epu16)
- * [ ] [`_mm512_maskz_cvttph_epu32`](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm512_maskz_cvttph_epu32)
- * [ ] [`_mm512_maskz_cvttph_epu64`](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm512_maskz_cvttph_epu64)
- * [ ] [`_mm512_maskz_cvtx_roundph_ps`](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm512_maskz_cvtx_roundph_ps)
- * [ ] [`_mm512_maskz_cvtxph_ps`](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm512_maskz_cvtxph_ps)
* [ ] [`_mm512_set1_pch`](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm512_set1_pch)
- * [ ] [`_mm_cvt_roundsh_i32`](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_cvt_roundsh_i32)
- * [ ] [`_mm_cvt_roundsh_i64`](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_cvt_roundsh_i64)
- * [ ] [`_mm_cvt_roundsh_sd`](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_cvt_roundsh_sd)
- * [ ] [`_mm_cvt_roundsh_ss`](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_cvt_roundsh_ss)
- * [ ] [`_mm_cvt_roundsh_u32`](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_cvt_roundsh_u32)
- * [ ] [`_mm_cvt_roundsh_u64`](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_cvt_roundsh_u64)
* [ ] [`_mm_cvtsh_h`](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_cvtsh_h)
- * [ ] [`_mm_cvtsh_i32`](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_cvtsh_i32)
- * [ ] [`_mm_cvtsh_i64`](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_cvtsh_i64)
- * [ ] [`_mm_cvtsh_sd`](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_cvtsh_sd)
- * [ ] [`_mm_cvtsh_ss`](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_cvtsh_ss)
- * [ ] [`_mm_cvtsh_u32`](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_cvtsh_u32)
- * [ ] [`_mm_cvtsh_u64`](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_cvtsh_u64)
* [ ] [`_mm_cvtsi128_si16`](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_cvtsi128_si16)
* [ ] [`_mm_cvtsi16_si128`](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_cvtsi16_si128)
- * [ ] [`_mm_cvtt_roundsh_i32`](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_cvtt_roundsh_i32)
- * [ ] [`_mm_cvtt_roundsh_i64`](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_cvtt_roundsh_i64)
- * [ ] [`_mm_cvtt_roundsh_u32`](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_cvtt_roundsh_u32)
- * [ ] [`_mm_cvtt_roundsh_u64`](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_cvtt_roundsh_u64)
- * [ ] [`_mm_cvttsh_i32`](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_cvttsh_i32)
- * [ ] [`_mm_cvttsh_i64`](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_cvttsh_i64)
- * [ ] [`_mm_cvttsh_u32`](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_cvttsh_u32)
- * [ ] [`_mm_cvttsh_u64`](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_cvttsh_u64)
- * [ ] [`_mm_mask_cvt_roundsh_sd`](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_mask_cvt_roundsh_sd)
- * [ ] [`_mm_mask_cvt_roundsh_ss`](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_mask_cvt_roundsh_ss)
- * [ ] [`_mm_mask_cvtsh_sd`](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_mask_cvtsh_sd)
- * [ ] [`_mm_mask_cvtsh_ss`](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_mask_cvtsh_ss)
- * [ ] [`_mm_maskz_cvt_roundsh_sd`](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_maskz_cvt_roundsh_sd)
- * [ ] [`_mm_maskz_cvt_roundsh_ss`](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_maskz_cvt_roundsh_ss)
- * [ ] [`_mm_maskz_cvtsh_sd`](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_maskz_cvtsh_sd)
- * [ ] [`_mm_maskz_cvtsh_ss`](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_maskz_cvtsh_ss)
* [ ] [`_mm_set1_pch`](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_set1_pch)
-["AVX512_VP2INTERSECT", "AVX512F"]
* [ ] [`_mm512_2intersect_epi32`](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm512_2intersect_epi32)
diff --git a/library/stdarch/crates/core_arch/src/x86/avx512fp16.rs b/library/stdarch/crates/core_arch/src/x86/avx512fp16.rs
index be99002e51c3..86d38feaec19 100644
--- a/library/stdarch/crates/core_arch/src/x86/avx512fp16.rs
+++ b/library/stdarch/crates/core_arch/src/x86/avx512fp16.rs
@@ -13079,6 +13079,2800 @@ pub unsafe fn _mm_maskz_cvt_roundsd_sh(
_mm_mask_cvt_roundsd_sh::(_mm_setzero_ph(), k, a, b)
}
+/// Convert packed half-precision (16-bit) floating-point elements in a to packed 16-bit integers, and
+/// store the results in dst.
+///
+/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_cvtph_epi16)
+#[inline]
+#[target_feature(enable = "avx512fp16,avx512vl")]
+#[cfg_attr(test, assert_instr(vcvtph2w))]
+#[unstable(feature = "stdarch_x86_avx512_f16", issue = "127213")]
+pub unsafe fn _mm_cvtph_epi16(a: __m128h) -> __m128i {
+ _mm_mask_cvtph_epi16(_mm_undefined_si128(), 0xff, a)
+}
+
+/// Convert packed half-precision (16-bit) floating-point elements in a to packed 16-bit integers, and
+/// store the results in dst using writemask k (elements are copied from src when the corresponding
+/// mask bit is not set).
+///
+/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_mask_cvtph_epi16)
+#[inline]
+#[target_feature(enable = "avx512fp16,avx512vl")]
+#[cfg_attr(test, assert_instr(vcvtph2w))]
+#[unstable(feature = "stdarch_x86_avx512_f16", issue = "127213")]
+pub unsafe fn _mm_mask_cvtph_epi16(src: __m128i, k: __mmask8, a: __m128h) -> __m128i {
+ transmute(vcvtph2w_128(a, src.as_i16x8(), k))
+}
+
+/// Convert packed half-precision (16-bit) floating-point elements in a to packed 16-bit integers, and
+/// store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
+///
+/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_maskz_cvtph_epi16)
+#[inline]
+#[target_feature(enable = "avx512fp16,avx512vl")]
+#[cfg_attr(test, assert_instr(vcvtph2w))]
+#[unstable(feature = "stdarch_x86_avx512_f16", issue = "127213")]
+pub unsafe fn _mm_maskz_cvtph_epi16(k: __mmask8, a: __m128h) -> __m128i {
+ _mm_mask_cvtph_epi16(_mm_setzero_si128(), k, a)
+}
+
+/// Convert packed half-precision (16-bit) floating-point elements in a to packed 16-bit integers, and
+/// store the results in dst.
+///
+/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm256_cvtph_epi16)
+#[inline]
+#[target_feature(enable = "avx512fp16,avx512vl")]
+#[cfg_attr(test, assert_instr(vcvtph2w))]
+#[unstable(feature = "stdarch_x86_avx512_f16", issue = "127213")]
+pub unsafe fn _mm256_cvtph_epi16(a: __m256h) -> __m256i {
+ _mm256_mask_cvtph_epi16(_mm256_undefined_si256(), 0xffff, a)
+}
+
+/// Convert packed half-precision (16-bit) floating-point elements in a to packed 16-bit integers, and
+/// store the results in dst using writemask k (elements are copied from src when the corresponding
+/// mask bit is not set).
+///
+/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm256_mask_cvtph_epi16)
+#[inline]
+#[target_feature(enable = "avx512fp16,avx512vl")]
+#[cfg_attr(test, assert_instr(vcvtph2w))]
+#[unstable(feature = "stdarch_x86_avx512_f16", issue = "127213")]
+pub unsafe fn _mm256_mask_cvtph_epi16(src: __m256i, k: __mmask16, a: __m256h) -> __m256i {
+ transmute(vcvtph2w_256(a, src.as_i16x16(), k))
+}
+
+/// Convert packed half-precision (16-bit) floating-point elements in a to packed 16-bit integers, and
+/// store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
+///
+/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm256_maskz_cvtph_epi16)
+#[inline]
+#[target_feature(enable = "avx512fp16,avx512vl")]
+#[cfg_attr(test, assert_instr(vcvtph2w))]
+#[unstable(feature = "stdarch_x86_avx512_f16", issue = "127213")]
+pub unsafe fn _mm256_maskz_cvtph_epi16(k: __mmask16, a: __m256h) -> __m256i {
+ _mm256_mask_cvtph_epi16(_mm256_setzero_si256(), k, a)
+}
+
+/// Convert packed half-precision (16-bit) floating-point elements in a to packed 16-bit integers, and
+/// store the results in dst.
+///
+/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm512_cvtph_epi16)
+#[inline]
+#[target_feature(enable = "avx512fp16")]
+#[cfg_attr(test, assert_instr(vcvtph2w))]
+#[unstable(feature = "stdarch_x86_avx512_f16", issue = "127213")]
+pub unsafe fn _mm512_cvtph_epi16(a: __m512h) -> __m512i {
+ _mm512_mask_cvtph_epi16(_mm512_undefined_epi32(), 0xffffffff, a)
+}
+
+/// Convert packed half-precision (16-bit) floating-point elements in a to packed 16-bit integers, and
+/// store the results in dst using writemask k (elements are copied from src when the corresponding
+/// mask bit is not set).
+///
+/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm512_mask_cvtph_epi16)
+#[inline]
+#[target_feature(enable = "avx512fp16")]
+#[cfg_attr(test, assert_instr(vcvtph2w))]
+#[unstable(feature = "stdarch_x86_avx512_f16", issue = "127213")]
+pub unsafe fn _mm512_mask_cvtph_epi16(src: __m512i, k: __mmask32, a: __m512h) -> __m512i {
+ transmute(vcvtph2w_512(
+ a,
+ src.as_i16x32(),
+ k,
+ _MM_FROUND_CUR_DIRECTION,
+ ))
+}
+
+/// Convert packed half-precision (16-bit) floating-point elements in a to packed 16-bit integers, and
+/// store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
+///
+/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm512_maskz_cvtph_epi16)
+#[inline]
+#[target_feature(enable = "avx512fp16")]
+#[cfg_attr(test, assert_instr(vcvtph2w))]
+#[unstable(feature = "stdarch_x86_avx512_f16", issue = "127213")]
+pub unsafe fn _mm512_maskz_cvtph_epi16(k: __mmask32, a: __m512h) -> __m512i {
+ _mm512_mask_cvtph_epi16(_mm512_setzero_si512(), k, a)
+}
+
+/// Convert packed half-precision (16-bit) floating-point elements in a to packed 16-bit integers, and
+/// store the results in dst.
+///
+/// Rounding is done according to the rounding parameter, which can be one of:
+///
+/// (_MM_FROUND_TO_NEAREST_INT |_MM_FROUND_NO_EXC) // round to nearest, and suppress exceptions
+/// (_MM_FROUND_TO_NEG_INF |_MM_FROUND_NO_EXC) // round down, and suppress exceptions
+/// (_MM_FROUND_TO_POS_INF |_MM_FROUND_NO_EXC) // round up, and suppress exceptions
+/// (_MM_FROUND_TO_ZERO |_MM_FROUND_NO_EXC) // truncate, and suppress exceptions
+/// _MM_FROUND_CUR_DIRECTION
+///
+/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm512_cvt_roundph_epi16)
+#[inline]
+#[target_feature(enable = "avx512fp16")]
+#[cfg_attr(test, assert_instr(vcvtph2w, ROUNDING = 8))]
+#[rustc_legacy_const_generics(1)]
+#[unstable(feature = "stdarch_x86_avx512_f16", issue = "127213")]
+pub unsafe fn _mm512_cvt_roundph_epi16(a: __m512h) -> __m512i {
+ static_assert_rounding!(ROUNDING);
+ _mm512_mask_cvt_roundph_epi16::(_mm512_undefined_epi32(), 0xffffffff, a)
+}
+
+/// Convert packed half-precision (16-bit) floating-point elements in a to packed 16-bit integers, and
+/// store the results in dst using writemask k (elements are copied from src when the corresponding
+/// mask bit is not set).
+///
+/// Rounding is done according to the rounding parameter, which can be one of:
+///
+/// (_MM_FROUND_TO_NEAREST_INT |_MM_FROUND_NO_EXC) // round to nearest, and suppress exceptions
+/// (_MM_FROUND_TO_NEG_INF |_MM_FROUND_NO_EXC) // round down, and suppress exceptions
+/// (_MM_FROUND_TO_POS_INF |_MM_FROUND_NO_EXC) // round up, and suppress exceptions
+/// (_MM_FROUND_TO_ZERO |_MM_FROUND_NO_EXC) // truncate, and suppress exceptions
+/// _MM_FROUND_CUR_DIRECTION
+///
+/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm512_mask_cvt_roundph_epi16)
+#[inline]
+#[target_feature(enable = "avx512fp16")]
+#[cfg_attr(test, assert_instr(vcvtph2w, ROUNDING = 8))]
+#[rustc_legacy_const_generics(3)]
+#[unstable(feature = "stdarch_x86_avx512_f16", issue = "127213")]
+pub unsafe fn _mm512_mask_cvt_roundph_epi16(
+ src: __m512i,
+ k: __mmask32,
+ a: __m512h,
+) -> __m512i {
+ static_assert_rounding!(ROUNDING);
+ transmute(vcvtph2w_512(a, src.as_i16x32(), k, ROUNDING))
+}
+
+/// Convert packed half-precision (16-bit) floating-point elements in a to packed 16-bit integers, and
+/// store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
+///
+/// Rounding is done according to the rounding parameter, which can be one of:
+///
+/// (_MM_FROUND_TO_NEAREST_INT |_MM_FROUND_NO_EXC) // round to nearest, and suppress exceptions
+/// (_MM_FROUND_TO_NEG_INF |_MM_FROUND_NO_EXC) // round down, and suppress exceptions
+/// (_MM_FROUND_TO_POS_INF |_MM_FROUND_NO_EXC) // round up, and suppress exceptions
+/// (_MM_FROUND_TO_ZERO |_MM_FROUND_NO_EXC) // truncate, and suppress exceptions
+/// _MM_FROUND_CUR_DIRECTION
+///
+/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm512_maskz_cvt_roundph_epi16)
+#[inline]
+#[target_feature(enable = "avx512fp16")]
+#[cfg_attr(test, assert_instr(vcvtph2w, ROUNDING = 8))]
+#[rustc_legacy_const_generics(2)]
+#[unstable(feature = "stdarch_x86_avx512_f16", issue = "127213")]
+pub unsafe fn _mm512_maskz_cvt_roundph_epi16(
+ k: __mmask32,
+ a: __m512h,
+) -> __m512i {
+ static_assert_rounding!(ROUNDING);
+ _mm512_mask_cvt_roundph_epi16::(_mm512_setzero_si512(), k, a)
+}
+
+/// Convert packed half-precision (16-bit) floating-point elements in a to packed unsigned 16-bit integers,
+/// and store the results in dst.
+///
+/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_cvtph_epu16)
+#[inline]
+#[target_feature(enable = "avx512fp16,avx512vl")]
+#[cfg_attr(test, assert_instr(vcvtph2uw))]
+#[unstable(feature = "stdarch_x86_avx512_f16", issue = "127213")]
+pub unsafe fn _mm_cvtph_epu16(a: __m128h) -> __m128i {
+ _mm_mask_cvtph_epu16(_mm_undefined_si128(), 0xff, a)
+}
+
+/// Convert packed half-precision (16-bit) floating-point elements in a to packed unsigned 16-bit integers,
+/// and store the results in dst using writemask k (elements are copied from src when the corresponding
+/// mask bit is not set).
+///
+/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_mask_cvtph_epu16)
+#[inline]
+#[target_feature(enable = "avx512fp16,avx512vl")]
+#[cfg_attr(test, assert_instr(vcvtph2uw))]
+#[unstable(feature = "stdarch_x86_avx512_f16", issue = "127213")]
+pub unsafe fn _mm_mask_cvtph_epu16(src: __m128i, k: __mmask8, a: __m128h) -> __m128i {
+ transmute(vcvtph2uw_128(a, src.as_u16x8(), k))
+}
+
+/// Convert packed half-precision (16-bit) floating-point elements in a to packed unsigned 16-bit integers,
+/// and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
+///
+/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_maskz_cvtph_epu16)
+#[inline]
+#[target_feature(enable = "avx512fp16,avx512vl")]
+#[cfg_attr(test, assert_instr(vcvtph2uw))]
+#[unstable(feature = "stdarch_x86_avx512_f16", issue = "127213")]
+pub unsafe fn _mm_maskz_cvtph_epu16(k: __mmask8, a: __m128h) -> __m128i {
+ _mm_mask_cvtph_epu16(_mm_setzero_si128(), k, a)
+}
+
+/// Convert packed half-precision (16-bit) floating-point elements in a to packed unsigned 16-bit integers,
+/// and store the results in dst.
+///
+/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm256_cvtph_epu16)
+#[inline]
+#[target_feature(enable = "avx512fp16,avx512vl")]
+#[cfg_attr(test, assert_instr(vcvtph2uw))]
+#[unstable(feature = "stdarch_x86_avx512_f16", issue = "127213")]
+pub unsafe fn _mm256_cvtph_epu16(a: __m256h) -> __m256i {
+ _mm256_mask_cvtph_epu16(_mm256_undefined_si256(), 0xffff, a)
+}
+
+/// Convert packed half-precision (16-bit) floating-point elements in a to packed unsigned 16-bit integers,
+/// and store the results in dst using writemask k (elements are copied from src when the corresponding
+/// mask bit is not set).
+///
+/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm256_mask_cvtph_epu16)
+#[inline]
+#[target_feature(enable = "avx512fp16,avx512vl")]
+#[cfg_attr(test, assert_instr(vcvtph2uw))]
+#[unstable(feature = "stdarch_x86_avx512_f16", issue = "127213")]
+pub unsafe fn _mm256_mask_cvtph_epu16(src: __m256i, k: __mmask16, a: __m256h) -> __m256i {
+ transmute(vcvtph2uw_256(a, src.as_u16x16(), k))
+}
+
+/// Convert packed half-precision (16-bit) floating-point elements in a to packed unsigned 16-bit integers,
+/// and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
+///
+/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm256_maskz_cvtph_epu16)
+#[inline]
+#[target_feature(enable = "avx512fp16,avx512vl")]
+#[cfg_attr(test, assert_instr(vcvtph2uw))]
+#[unstable(feature = "stdarch_x86_avx512_f16", issue = "127213")]
+pub unsafe fn _mm256_maskz_cvtph_epu16(k: __mmask16, a: __m256h) -> __m256i {
+ _mm256_mask_cvtph_epu16(_mm256_setzero_si256(), k, a)
+}
+
+/// Convert packed half-precision (16-bit) floating-point elements in a to packed unsigned 16-bit integers,
+/// and store the results in dst.
+///
+/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm512_cvtph_epu16)
+#[inline]
+#[target_feature(enable = "avx512fp16")]
+#[cfg_attr(test, assert_instr(vcvtph2uw))]
+#[unstable(feature = "stdarch_x86_avx512_f16", issue = "127213")]
+pub unsafe fn _mm512_cvtph_epu16(a: __m512h) -> __m512i {
+ _mm512_mask_cvtph_epu16(_mm512_undefined_epi32(), 0xffffffff, a)
+}
+
+/// Convert packed half-precision (16-bit) floating-point elements in a to packed unsigned 16-bit integers,
+/// and store the results in dst using writemask k (elements are copied from src when the corresponding
+/// mask bit is not set).
+///
+/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm512_mask_cvtph_epu16)
+#[inline]
+#[target_feature(enable = "avx512fp16")]
+#[cfg_attr(test, assert_instr(vcvtph2uw))]
+#[unstable(feature = "stdarch_x86_avx512_f16", issue = "127213")]
+pub unsafe fn _mm512_mask_cvtph_epu16(src: __m512i, k: __mmask32, a: __m512h) -> __m512i {
+ transmute(vcvtph2uw_512(
+ a,
+ src.as_u16x32(),
+ k,
+ _MM_FROUND_CUR_DIRECTION,
+ ))
+}
+
+/// Convert packed half-precision (16-bit) floating-point elements in a to packed unsigned 16-bit integers,
+/// and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
+///
+/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm512_maskz_cvtph_epu16)
+#[inline]
+#[target_feature(enable = "avx512fp16")]
+#[cfg_attr(test, assert_instr(vcvtph2uw))]
+#[unstable(feature = "stdarch_x86_avx512_f16", issue = "127213")]
+pub unsafe fn _mm512_maskz_cvtph_epu16(k: __mmask32, a: __m512h) -> __m512i {
+ _mm512_mask_cvtph_epu16(_mm512_setzero_si512(), k, a)
+}
+
+/// Convert packed half-precision (16-bit) floating-point elements in a to packed unsigned 16-bit integers,
+/// and store the results in dst.
+///
+/// Rounding is done according to the rounding parameter, which can be one of:
+///
+/// (_MM_FROUND_TO_NEAREST_INT |_MM_FROUND_NO_EXC) // round to nearest, and suppress exceptions
+/// (_MM_FROUND_TO_NEG_INF |_MM_FROUND_NO_EXC) // round down, and suppress exceptions
+/// (_MM_FROUND_TO_POS_INF |_MM_FROUND_NO_EXC) // round up, and suppress exceptions
+/// (_MM_FROUND_TO_ZERO |_MM_FROUND_NO_EXC) // truncate, and suppress exceptions
+/// _MM_FROUND_CUR_DIRECTION
+///
+/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm512_cvt_roundph_epu16)
+#[inline]
+#[target_feature(enable = "avx512fp16")]
+#[cfg_attr(test, assert_instr(vcvtph2uw, ROUNDING = 8))]
+#[rustc_legacy_const_generics(1)]
+#[unstable(feature = "stdarch_x86_avx512_f16", issue = "127213")]
+pub unsafe fn _mm512_cvt_roundph_epu16(a: __m512h) -> __m512i {
+ static_assert_rounding!(ROUNDING);
+ _mm512_mask_cvt_roundph_epu16::(_mm512_undefined_epi32(), 0xffffffff, a)
+}
+
+/// Convert packed half-precision (16-bit) floating-point elements in a to packed unsigned 16-bit integers,
+/// and store the results in dst using writemask k (elements are copied from src when the corresponding
+/// mask bit is not set).
+///
+/// Rounding is done according to the rounding parameter, which can be one of:
+///
+/// (_MM_FROUND_TO_NEAREST_INT |_MM_FROUND_NO_EXC) // round to nearest, and suppress exceptions
+/// (_MM_FROUND_TO_NEG_INF |_MM_FROUND_NO_EXC) // round down, and suppress exceptions
+/// (_MM_FROUND_TO_POS_INF |_MM_FROUND_NO_EXC) // round up, and suppress exceptions
+/// (_MM_FROUND_TO_ZERO |_MM_FROUND_NO_EXC) // truncate, and suppress exceptions
+/// _MM_FROUND_CUR_DIRECTION
+///
+/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm512_mask_cvt_roundph_epu16)
+#[inline]
+#[target_feature(enable = "avx512fp16")]
+#[cfg_attr(test, assert_instr(vcvtph2uw, ROUNDING = 8))]
+#[rustc_legacy_const_generics(3)]
+#[unstable(feature = "stdarch_x86_avx512_f16", issue = "127213")]
+pub unsafe fn _mm512_mask_cvt_roundph_epu16(
+ src: __m512i,
+ k: __mmask32,
+ a: __m512h,
+) -> __m512i {
+ static_assert_rounding!(ROUNDING);
+ transmute(vcvtph2uw_512(a, src.as_u16x32(), k, ROUNDING))
+}
+
+/// Convert packed half-precision (16-bit) floating-point elements in a to packed unsigned 16-bit integers,
+/// and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
+///
+/// Rounding is done according to the rounding parameter, which can be one of:
+///
+/// (_MM_FROUND_TO_NEAREST_INT |_MM_FROUND_NO_EXC) // round to nearest, and suppress exceptions
+/// (_MM_FROUND_TO_NEG_INF |_MM_FROUND_NO_EXC) // round down, and suppress exceptions
+/// (_MM_FROUND_TO_POS_INF |_MM_FROUND_NO_EXC) // round up, and suppress exceptions
+/// (_MM_FROUND_TO_ZERO |_MM_FROUND_NO_EXC) // truncate, and suppress exceptions
+/// _MM_FROUND_CUR_DIRECTION
+///
+/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm512_maskz_cvt_roundph_epu16)
+#[inline]
+#[target_feature(enable = "avx512fp16")]
+#[cfg_attr(test, assert_instr(vcvtph2uw, ROUNDING = 8))]
+#[rustc_legacy_const_generics(2)]
+#[unstable(feature = "stdarch_x86_avx512_f16", issue = "127213")]
+pub unsafe fn _mm512_maskz_cvt_roundph_epu16(
+ k: __mmask32,
+ a: __m512h,
+) -> __m512i {
+ static_assert_rounding!(ROUNDING);
+ _mm512_mask_cvt_roundph_epu16::(_mm512_setzero_si512(), k, a)
+}
+
+/// Convert packed half-precision (16-bit) floating-point elements in a to packed 16-bit integers with
+/// truncation, and store the results in dst.
+///
+/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_cvttph_epi16)
+#[inline]
+#[target_feature(enable = "avx512fp16,avx512vl")]
+#[cfg_attr(test, assert_instr(vcvttph2w))]
+#[unstable(feature = "stdarch_x86_avx512_f16", issue = "127213")]
+pub unsafe fn _mm_cvttph_epi16(a: __m128h) -> __m128i {
+ _mm_mask_cvttph_epi16(_mm_undefined_si128(), 0xff, a)
+}
+
+/// Convert packed half-precision (16-bit) floating-point elements in a to packed 16-bit integers with
+/// truncation, and store the results in dst using writemask k (elements are copied from src when the corresponding
+/// mask bit is not set).
+///
+/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_mask_cvttph_epi16)
+#[inline]
+#[target_feature(enable = "avx512fp16,avx512vl")]
+#[cfg_attr(test, assert_instr(vcvttph2w))]
+#[unstable(feature = "stdarch_x86_avx512_f16", issue = "127213")]
+pub unsafe fn _mm_mask_cvttph_epi16(src: __m128i, k: __mmask8, a: __m128h) -> __m128i {
+ transmute(vcvttph2w_128(a, src.as_i16x8(), k))
+}
+
+/// Convert packed half-precision (16-bit) floating-point elements in a to packed 16-bit integers with
+/// truncation, and store the results in dst using zeromask k (elements are zeroed out when the corresponding
+/// mask bit is not set).
+///
+/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_maskz_cvttph_epi16)
+#[inline]
+#[target_feature(enable = "avx512fp16,avx512vl")]
+#[cfg_attr(test, assert_instr(vcvttph2w))]
+#[unstable(feature = "stdarch_x86_avx512_f16", issue = "127213")]
+pub unsafe fn _mm_maskz_cvttph_epi16(k: __mmask8, a: __m128h) -> __m128i {
+ _mm_mask_cvttph_epi16(_mm_setzero_si128(), k, a)
+}
+
+/// Convert packed half-precision (16-bit) floating-point elements in a to packed 16-bit integers with
+/// truncation, and store the results in dst.
+///
+/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm256_cvttph_epi16)
+#[inline]
+#[target_feature(enable = "avx512fp16,avx512vl")]
+#[cfg_attr(test, assert_instr(vcvttph2w))]
+#[unstable(feature = "stdarch_x86_avx512_f16", issue = "127213")]
+pub unsafe fn _mm256_cvttph_epi16(a: __m256h) -> __m256i {
+ _mm256_mask_cvttph_epi16(_mm256_undefined_si256(), 0xffff, a)
+}
+
+/// Convert packed half-precision (16-bit) floating-point elements in a to packed 16-bit integers with
+/// truncation, and store the results in dst using writemask k (elements are copied from src when the corresponding
+/// mask bit is not set).
+///
+/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm256_mask_cvttph_epi16)
+#[inline]
+#[target_feature(enable = "avx512fp16,avx512vl")]
+#[cfg_attr(test, assert_instr(vcvttph2w))]
+#[unstable(feature = "stdarch_x86_avx512_f16", issue = "127213")]
+pub unsafe fn _mm256_mask_cvttph_epi16(src: __m256i, k: __mmask16, a: __m256h) -> __m256i {
+ transmute(vcvttph2w_256(a, src.as_i16x16(), k))
+}
+
+/// Convert packed half-precision (16-bit) floating-point elements in a to packed 16-bit integers with
+/// truncation, and store the results in dst using zeromask k (elements are zeroed out when the corresponding
+/// mask bit is not set).
+///
+/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm256_maskz_cvttph_epi16)
+#[inline]
+#[target_feature(enable = "avx512fp16,avx512vl")]
+#[cfg_attr(test, assert_instr(vcvttph2w))]
+#[unstable(feature = "stdarch_x86_avx512_f16", issue = "127213")]
+pub unsafe fn _mm256_maskz_cvttph_epi16(k: __mmask16, a: __m256h) -> __m256i {
+ _mm256_mask_cvttph_epi16(_mm256_setzero_si256(), k, a)
+}
+
+/// Convert packed half-precision (16-bit) floating-point elements in a to packed 16-bit integers with
+/// truncation, and store the results in dst.
+///
+/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm512_cvttph_epi16)
+#[inline]
+#[target_feature(enable = "avx512fp16")]
+#[cfg_attr(test, assert_instr(vcvttph2w))]
+#[unstable(feature = "stdarch_x86_avx512_f16", issue = "127213")]
+pub unsafe fn _mm512_cvttph_epi16(a: __m512h) -> __m512i {
+ _mm512_mask_cvttph_epi16(_mm512_undefined_epi32(), 0xffffffff, a)
+}
+
+/// Convert packed half-precision (16-bit) floating-point elements in a to packed 16-bit integers with
+/// truncation, and store the results in dst using writemask k (elements are copied from src when the corresponding
+/// mask bit is not set).
+///
+/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm512_mask_cvttph_epi16)
+#[inline]
+#[target_feature(enable = "avx512fp16")]
+#[cfg_attr(test, assert_instr(vcvttph2w))]
+#[unstable(feature = "stdarch_x86_avx512_f16", issue = "127213")]
+pub unsafe fn _mm512_mask_cvttph_epi16(src: __m512i, k: __mmask32, a: __m512h) -> __m512i {
+ transmute(vcvttph2w_512(
+ a,
+ src.as_i16x32(),
+ k,
+ _MM_FROUND_CUR_DIRECTION,
+ ))
+}
+
+/// Convert packed half-precision (16-bit) floating-point elements in a to packed 16-bit integers with
+/// truncation, and store the results in dst using zeromask k (elements are zeroed out when the corresponding
+/// mask bit is not set).
+///
+/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm512_maskz_cvttph_epi16)
+#[inline]
+#[target_feature(enable = "avx512fp16")]
+#[cfg_attr(test, assert_instr(vcvttph2w))]
+#[unstable(feature = "stdarch_x86_avx512_f16", issue = "127213")]
+pub unsafe fn _mm512_maskz_cvttph_epi16(k: __mmask32, a: __m512h) -> __m512i {
+ _mm512_mask_cvttph_epi16(_mm512_setzero_si512(), k, a)
+}
+
+/// Convert packed half-precision (16-bit) floating-point elements in a to packed 16-bit integers with
+/// truncation, and store the results in dst.
+///
+/// Exceptions can be suppressed by passing _MM_FROUND_NO_EXC in the sae parameter.
+///
+/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm512_cvtt_roundph_epi16)
+#[inline]
+#[target_feature(enable = "avx512fp16")]
+#[cfg_attr(test, assert_instr(vcvttph2w, SAE = 8))]
+#[rustc_legacy_const_generics(1)]
+#[unstable(feature = "stdarch_x86_avx512_f16", issue = "127213")]
+pub unsafe fn _mm512_cvtt_roundph_epi16(a: __m512h) -> __m512i {
+ static_assert_sae!(SAE);
+ _mm512_mask_cvtt_roundph_epi16::(_mm512_undefined_epi32(), 0xffffffff, a)
+}
+
+/// Convert packed half-precision (16-bit) floating-point elements in a to packed 16-bit integers with
+/// truncation, and store the results in dst using writemask k (elements are copied from src when the corresponding
+/// mask bit is not set).
+///
+/// Exceptions can be suppressed by passing _MM_FROUND_NO_EXC in the sae parameter.
+///
+/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm512_mask_cvtt_roundph_epi16)
+#[inline]
+#[target_feature(enable = "avx512fp16")]
+#[cfg_attr(test, assert_instr(vcvttph2w, SAE = 8))]
+#[rustc_legacy_const_generics(3)]
+#[unstable(feature = "stdarch_x86_avx512_f16", issue = "127213")]
+pub unsafe fn _mm512_mask_cvtt_roundph_epi16(
+ src: __m512i,
+ k: __mmask32,
+ a: __m512h,
+) -> __m512i {
+ static_assert_sae!(SAE);
+ transmute(vcvttph2w_512(a, src.as_i16x32(), k, SAE))
+}
+
+/// Convert packed half-precision (16-bit) floating-point elements in a to packed 16-bit integers with
+/// truncation, and store the results in dst using zeromask k (elements are zeroed out when the corresponding
+/// mask bit is not set).
+///
+/// Exceptions can be suppressed by passing _MM_FROUND_NO_EXC in the sae parameter.
+///
+/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm512_maskz_cvtt_roundph_epi16)
+#[inline]
+#[target_feature(enable = "avx512fp16")]
+#[cfg_attr(test, assert_instr(vcvttph2w, SAE = 8))]
+#[rustc_legacy_const_generics(2)]
+#[unstable(feature = "stdarch_x86_avx512_f16", issue = "127213")]
+pub unsafe fn _mm512_maskz_cvtt_roundph_epi16(k: __mmask32, a: __m512h) -> __m512i {
+ static_assert_sae!(SAE);
+ _mm512_mask_cvtt_roundph_epi16::(_mm512_setzero_si512(), k, a)
+}
+
+/// Convert packed half-precision (16-bit) floating-point elements in a to packed unsigned 16-bit integers with
+/// truncation, and store the results in dst.
+///
+/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_cvttph_epu16)
+#[inline]
+#[target_feature(enable = "avx512fp16,avx512vl")]
+#[cfg_attr(test, assert_instr(vcvttph2uw))]
+#[unstable(feature = "stdarch_x86_avx512_f16", issue = "127213")]
+pub unsafe fn _mm_cvttph_epu16(a: __m128h) -> __m128i {
+ _mm_mask_cvttph_epu16(_mm_undefined_si128(), 0xff, a)
+}
+
+/// Convert packed half-precision (16-bit) floating-point elements in a to packed unsigned 16-bit integers with
+/// truncation, and store the results in dst using writemask k (elements are copied from src when the corresponding
+/// mask bit is not set).
+///
+/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_mask_cvttph_epu16)
+#[inline]
+#[target_feature(enable = "avx512fp16,avx512vl")]
+#[cfg_attr(test, assert_instr(vcvttph2uw))]
+#[unstable(feature = "stdarch_x86_avx512_f16", issue = "127213")]
+pub unsafe fn _mm_mask_cvttph_epu16(src: __m128i, k: __mmask8, a: __m128h) -> __m128i {
+ transmute(vcvttph2uw_128(a, src.as_u16x8(), k))
+}
+
+/// Convert packed half-precision (16-bit) floating-point elements in a to packed unsigned 16-bit integers with
+/// truncation, and store the results in dst using zeromask k (elements are zeroed out when the corresponding
+/// mask bit is not set).
+///
+/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_maskz_cvttph_epu16)
+#[inline]
+#[target_feature(enable = "avx512fp16,avx512vl")]
+#[cfg_attr(test, assert_instr(vcvttph2uw))]
+#[unstable(feature = "stdarch_x86_avx512_f16", issue = "127213")]
+pub unsafe fn _mm_maskz_cvttph_epu16(k: __mmask8, a: __m128h) -> __m128i {
+ _mm_mask_cvttph_epu16(_mm_setzero_si128(), k, a)
+}
+
+/// Convert packed half-precision (16-bit) floating-point elements in a to packed unsigned 16-bit integers with
+/// truncation, and store the results in dst.
+///
+/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm256_cvttph_epu16)
+#[inline]
+#[target_feature(enable = "avx512fp16,avx512vl")]
+#[cfg_attr(test, assert_instr(vcvttph2uw))]
+#[unstable(feature = "stdarch_x86_avx512_f16", issue = "127213")]
+pub unsafe fn _mm256_cvttph_epu16(a: __m256h) -> __m256i {
+ _mm256_mask_cvttph_epu16(_mm256_undefined_si256(), 0xffff, a)
+}
+
+/// Convert packed half-precision (16-bit) floating-point elements in a to packed unsigned 16-bit integers with
+/// truncation, and store the results in dst using writemask k (elements are copied from src when the corresponding
+/// mask bit is not set).
+///
+/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm256_mask_cvttph_epu16)
+#[inline]
+#[target_feature(enable = "avx512fp16,avx512vl")]
+#[cfg_attr(test, assert_instr(vcvttph2uw))]
+#[unstable(feature = "stdarch_x86_avx512_f16", issue = "127213")]
+pub unsafe fn _mm256_mask_cvttph_epu16(src: __m256i, k: __mmask16, a: __m256h) -> __m256i {
+ transmute(vcvttph2uw_256(a, src.as_u16x16(), k))
+}
+
+/// Convert packed half-precision (16-bit) floating-point elements in a to packed unsigned 16-bit integers with
+/// truncation, and store the results in dst using zeromask k (elements are zeroed out when the corresponding
+/// mask bit is not set).
+///
+/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm256_maskz_cvttph_epu16)
+#[inline]
+#[target_feature(enable = "avx512fp16,avx512vl")]
+#[cfg_attr(test, assert_instr(vcvttph2uw))]
+#[unstable(feature = "stdarch_x86_avx512_f16", issue = "127213")]
+pub unsafe fn _mm256_maskz_cvttph_epu16(k: __mmask16, a: __m256h) -> __m256i {
+ _mm256_mask_cvttph_epu16(_mm256_setzero_si256(), k, a)
+}
+
+/// Convert packed half-precision (16-bit) floating-point elements in a to packed unsigned 16-bit integers with
+/// truncation, and store the results in dst.
+///
+/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm512_cvttph_epu16)
+#[inline]
+#[target_feature(enable = "avx512fp16")]
+#[cfg_attr(test, assert_instr(vcvttph2uw))]
+#[unstable(feature = "stdarch_x86_avx512_f16", issue = "127213")]
+pub unsafe fn _mm512_cvttph_epu16(a: __m512h) -> __m512i {
+ _mm512_mask_cvttph_epu16(_mm512_undefined_epi32(), 0xffffffff, a)
+}
+
+/// Convert packed half-precision (16-bit) floating-point elements in a to packed unsigned 16-bit integers with
+/// truncation, and store the results in dst using writemask k (elements are copied from src when the corresponding
+/// mask bit is not set).
+///
+/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm512_mask_cvttph_epu16)
+#[inline]
+#[target_feature(enable = "avx512fp16")]
+#[cfg_attr(test, assert_instr(vcvttph2uw))]
+#[unstable(feature = "stdarch_x86_avx512_f16", issue = "127213")]
+pub unsafe fn _mm512_mask_cvttph_epu16(src: __m512i, k: __mmask32, a: __m512h) -> __m512i {
+ transmute(vcvttph2uw_512(
+ a,
+ src.as_u16x32(),
+ k,
+ _MM_FROUND_CUR_DIRECTION,
+ ))
+}
+
+/// Convert packed half-precision (16-bit) floating-point elements in a to packed unsigned 16-bit integers with
+/// truncation, and store the results in dst using zeromask k (elements are zeroed out when the corresponding
+/// mask bit is not set).
+///
+/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm512_maskz_cvttph_epu16)
+#[inline]
+#[target_feature(enable = "avx512fp16")]
+#[cfg_attr(test, assert_instr(vcvttph2uw))]
+#[unstable(feature = "stdarch_x86_avx512_f16", issue = "127213")]
+pub unsafe fn _mm512_maskz_cvttph_epu16(k: __mmask32, a: __m512h) -> __m512i {
+ _mm512_mask_cvttph_epu16(_mm512_setzero_si512(), k, a)
+}
+
+/// Convert packed half-precision (16-bit) floating-point elements in a to packed unsigned 16-bit integers with
+/// truncation, and store the results in dst.
+///
+/// Exceptions can be suppressed by passing _MM_FROUND_NO_EXC in the sae parameter.
+///
+/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm512_cvtt_roundph_epu16)
+#[inline]
+#[target_feature(enable = "avx512fp16")]
+#[cfg_attr(test, assert_instr(vcvttph2uw, SAE = 8))]
+#[rustc_legacy_const_generics(1)]
+#[unstable(feature = "stdarch_x86_avx512_f16", issue = "127213")]
+pub unsafe fn _mm512_cvtt_roundph_epu16(a: __m512h) -> __m512i {
+ static_assert_sae!(SAE);
+ _mm512_mask_cvtt_roundph_epu16::(_mm512_undefined_epi32(), 0xffffffff, a)
+}
+
+/// Convert packed half-precision (16-bit) floating-point elements in a to packed unsigned 16-bit integers with
+/// truncation, and store the results in dst using writemask k (elements are copied from src when the corresponding
+/// mask bit is not set).
+///
+/// Exceptions can be suppressed by passing _MM_FROUND_NO_EXC in the sae parameter.
+///
+/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm512_mask_cvtt_roundph_epu16)
+#[inline]
+#[target_feature(enable = "avx512fp16")]
+#[cfg_attr(test, assert_instr(vcvttph2uw, SAE = 8))]
+#[rustc_legacy_const_generics(3)]
+#[unstable(feature = "stdarch_x86_avx512_f16", issue = "127213")]
+pub unsafe fn _mm512_mask_cvtt_roundph_epu16(
+ src: __m512i,
+ k: __mmask32,
+ a: __m512h,
+) -> __m512i {
+ static_assert_sae!(SAE);
+ transmute(vcvttph2uw_512(a, src.as_u16x32(), k, SAE))
+}
+
+/// Convert packed half-precision (16-bit) floating-point elements in a to packed unsigned 16-bit integers with
+/// truncation, and store the results in dst using zeromask k (elements are zeroed out when the corresponding
+/// mask bit is not set).
+///
+/// Exceptions can be suppressed by passing _MM_FROUND_NO_EXC in the sae parameter.
+///
+/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm512_maskz_cvtt_roundph_epu16)
+#[inline]
+#[target_feature(enable = "avx512fp16")]
+#[cfg_attr(test, assert_instr(vcvttph2uw, SAE = 8))]
+#[rustc_legacy_const_generics(2)]
+#[unstable(feature = "stdarch_x86_avx512_f16", issue = "127213")]
+pub unsafe fn _mm512_maskz_cvtt_roundph_epu16(k: __mmask32, a: __m512h) -> __m512i {
+ static_assert_sae!(SAE);
+ _mm512_mask_cvtt_roundph_epu16::(_mm512_setzero_si512(), k, a)
+}
+
+/// Convert packed half-precision (16-bit) floating-point elements in a to packed 32-bit integers, and store the
+/// results in dst.
+///
+/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_cvtph_epi32)
+#[inline]
+#[target_feature(enable = "avx512fp16,avx512vl")]
+#[cfg_attr(test, assert_instr(vcvtph2dq))]
+#[unstable(feature = "stdarch_x86_avx512_f16", issue = "127213")]
+pub unsafe fn _mm_cvtph_epi32(a: __m128h) -> __m128i {
+ _mm_mask_cvtph_epi32(_mm_undefined_si128(), 0xff, a)
+}
+
+/// Convert packed half-precision (16-bit) floating-point elements in a to packed 32-bit integers, and store the
+/// results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set).
+///
+/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_mask_cvtph_epi32)
+#[inline]
+#[target_feature(enable = "avx512fp16,avx512vl")]
+#[cfg_attr(test, assert_instr(vcvtph2dq))]
+#[unstable(feature = "stdarch_x86_avx512_f16", issue = "127213")]
+pub unsafe fn _mm_mask_cvtph_epi32(src: __m128i, k: __mmask8, a: __m128h) -> __m128i {
+ transmute(vcvtph2dq_128(a, src.as_i32x4(), k))
+}
+
+/// Convert packed half-precision (16-bit) floating-point elements in a to packed 32-bit integers, and store the
+/// results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
+///
+/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_maskz_cvtph_epi32)
+#[inline]
+#[target_feature(enable = "avx512fp16,avx512vl")]
+#[cfg_attr(test, assert_instr(vcvtph2dq))]
+#[unstable(feature = "stdarch_x86_avx512_f16", issue = "127213")]
+pub unsafe fn _mm_maskz_cvtph_epi32(k: __mmask8, a: __m128h) -> __m128i {
+ _mm_mask_cvtph_epi32(_mm_setzero_si128(), k, a)
+}
+
+/// Convert packed half-precision (16-bit) floating-point elements in a to packed 32-bit integers, and store the
+/// results in dst.
+///
+/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm256_cvtph_epi32)
+#[inline]
+#[target_feature(enable = "avx512fp16,avx512vl")]
+#[cfg_attr(test, assert_instr(vcvtph2dq))]
+#[unstable(feature = "stdarch_x86_avx512_f16", issue = "127213")]
+pub unsafe fn _mm256_cvtph_epi32(a: __m128h) -> __m256i {
+ _mm256_mask_cvtph_epi32(_mm256_undefined_si256(), 0xff, a)
+}
+
+/// Convert packed half-precision (16-bit) floating-point elements in a to packed 32-bit integers, and store the
+/// results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set).
+///
+/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm256_mask_cvtph_epi32)
+#[inline]
+#[target_feature(enable = "avx512fp16,avx512vl")]
+#[cfg_attr(test, assert_instr(vcvtph2dq))]
+#[unstable(feature = "stdarch_x86_avx512_f16", issue = "127213")]
+pub unsafe fn _mm256_mask_cvtph_epi32(src: __m256i, k: __mmask8, a: __m128h) -> __m256i {
+ transmute(vcvtph2dq_256(a, src.as_i32x8(), k))
+}
+
+/// Convert packed half-precision (16-bit) floating-point elements in a to packed 32-bit integers, and store the
+/// results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
+///
+/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm256_maskz_cvtph_epi32)
+#[inline]
+#[target_feature(enable = "avx512fp16,avx512vl")]
+#[cfg_attr(test, assert_instr(vcvtph2dq))]
+#[unstable(feature = "stdarch_x86_avx512_f16", issue = "127213")]
+pub unsafe fn _mm256_maskz_cvtph_epi32(k: __mmask8, a: __m128h) -> __m256i {
+ _mm256_mask_cvtph_epi32(_mm256_setzero_si256(), k, a)
+}
+
+/// Convert packed half-precision (16-bit) floating-point elements in a to packed 32-bit integers, and store the
+/// results in dst.
+///
+/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm512_cvtph_epi32)
+#[inline]
+#[target_feature(enable = "avx512fp16")]
+#[cfg_attr(test, assert_instr(vcvtph2dq))]
+#[unstable(feature = "stdarch_x86_avx512_f16", issue = "127213")]
+pub unsafe fn _mm512_cvtph_epi32(a: __m256h) -> __m512i {
+ _mm512_mask_cvtph_epi32(_mm512_undefined_epi32(), 0xffff, a)
+}
+
+/// Convert packed half-precision (16-bit) floating-point elements in a to packed 32-bit integers, and store the
+/// results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set).
+///
+/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm512_mask_cvtph_epi32)
+#[inline]
+#[target_feature(enable = "avx512fp16")]
+#[cfg_attr(test, assert_instr(vcvtph2dq))]
+#[unstable(feature = "stdarch_x86_avx512_f16", issue = "127213")]
+pub unsafe fn _mm512_mask_cvtph_epi32(src: __m512i, k: __mmask16, a: __m256h) -> __m512i {
+ transmute(vcvtph2dq_512(
+ a,
+ src.as_i32x16(),
+ k,
+ _MM_FROUND_CUR_DIRECTION,
+ ))
+}
+
+/// Convert packed half-precision (16-bit) floating-point elements in a to packed 32-bit integers, and store the
+/// results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
+///
+/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm512_maskz_cvtph_epi32)
+#[inline]
+#[target_feature(enable = "avx512fp16")]
+#[cfg_attr(test, assert_instr(vcvtph2dq))]
+#[unstable(feature = "stdarch_x86_avx512_f16", issue = "127213")]
+pub unsafe fn _mm512_maskz_cvtph_epi32(k: __mmask16, a: __m256h) -> __m512i {
+ _mm512_mask_cvtph_epi32(_mm512_setzero_si512(), k, a)
+}
+
+/// Convert packed half-precision (16-bit) floating-point elements in a to packed 32-bit integers, and store the
+/// results in dst.
+///
+/// Rounding is done according to the rounding parameter, which can be one of:
+///
+/// (_MM_FROUND_TO_NEAREST_INT |_MM_FROUND_NO_EXC) // round to nearest, and suppress exceptions
+/// (_MM_FROUND_TO_NEG_INF |_MM_FROUND_NO_EXC) // round down, and suppress exceptions
+/// (_MM_FROUND_TO_POS_INF |_MM_FROUND_NO_EXC) // round up, and suppress exceptions
+/// (_MM_FROUND_TO_ZERO |_MM_FROUND_NO_EXC) // truncate, and suppress exceptions
+/// _MM_FROUND_CUR_DIRECTION
+///
+/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm512_cvt_roundph_epi32)
+#[inline]
+#[target_feature(enable = "avx512fp16")]
+#[cfg_attr(test, assert_instr(vcvtph2dq, ROUNDING = 8))]
+#[rustc_legacy_const_generics(1)]
+#[unstable(feature = "stdarch_x86_avx512_f16", issue = "127213")]
+pub unsafe fn _mm512_cvt_roundph_epi32(a: __m256h) -> __m512i {
+ static_assert_rounding!(ROUNDING);
+ _mm512_mask_cvt_roundph_epi32::(_mm512_undefined_epi32(), 0xffff, a)
+}
+
+/// Convert packed half-precision (16-bit) floating-point elements in a to packed 32-bit integers, and store the
+/// results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set).
+///
+/// Rounding is done according to the rounding parameter, which can be one of:
+///
+/// (_MM_FROUND_TO_NEAREST_INT |_MM_FROUND_NO_EXC) // round to nearest, and suppress exceptions
+/// (_MM_FROUND_TO_NEG_INF |_MM_FROUND_NO_EXC) // round down, and suppress exceptions
+/// (_MM_FROUND_TO_POS_INF |_MM_FROUND_NO_EXC) // round up, and suppress exceptions
+/// (_MM_FROUND_TO_ZERO |_MM_FROUND_NO_EXC) // truncate, and suppress exceptions
+/// _MM_FROUND_CUR_DIRECTION
+///
+/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm512_mask_cvt_roundph_epi32)
+#[inline]
+#[target_feature(enable = "avx512fp16")]
+#[cfg_attr(test, assert_instr(vcvtph2dq, ROUNDING = 8))]
+#[rustc_legacy_const_generics(3)]
+#[unstable(feature = "stdarch_x86_avx512_f16", issue = "127213")]
+pub unsafe fn _mm512_mask_cvt_roundph_epi32(
+ src: __m512i,
+ k: __mmask16,
+ a: __m256h,
+) -> __m512i {
+ static_assert_rounding!(ROUNDING);
+ transmute(vcvtph2dq_512(a, src.as_i32x16(), k, ROUNDING))
+}
+
+/// Convert packed half-precision (16-bit) floating-point elements in a to packed 32-bit integers, and store the
+/// results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
+///
+/// Rounding is done according to the rounding parameter, which can be one of:
+///
+/// (_MM_FROUND_TO_NEAREST_INT |_MM_FROUND_NO_EXC) // round to nearest, and suppress exceptions
+/// (_MM_FROUND_TO_NEG_INF |_MM_FROUND_NO_EXC) // round down, and suppress exceptions
+/// (_MM_FROUND_TO_POS_INF |_MM_FROUND_NO_EXC) // round up, and suppress exceptions
+/// (_MM_FROUND_TO_ZERO |_MM_FROUND_NO_EXC) // truncate, and suppress exceptions
+/// _MM_FROUND_CUR_DIRECTION
+///
+/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm512_maskz_cvt_roundph_epi32)
+#[inline]
+#[target_feature(enable = "avx512fp16")]
+#[cfg_attr(test, assert_instr(vcvtph2dq, ROUNDING = 8))]
+#[rustc_legacy_const_generics(2)]
+#[unstable(feature = "stdarch_x86_avx512_f16", issue = "127213")]
+pub unsafe fn _mm512_maskz_cvt_roundph_epi32(
+ k: __mmask16,
+ a: __m256h,
+) -> __m512i {
+ static_assert_rounding!(ROUNDING);
+ _mm512_mask_cvt_roundph_epi32::(_mm512_setzero_si512(), k, a)
+}
+
+/// Convert the lower half-precision (16-bit) floating-point element in a to a 32-bit integer, and store
+/// the result in dst.
+///
+/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_cvtsh_i32)
+#[inline]
+#[target_feature(enable = "avx512fp16")]
+#[cfg_attr(test, assert_instr(vcvtsh2si))]
+#[unstable(feature = "stdarch_x86_avx512_f16", issue = "127213")]
+pub unsafe fn _mm_cvtsh_i32(a: __m128h) -> i32 {
+ vcvtsh2si32(a, _MM_FROUND_CUR_DIRECTION)
+}
+
+/// Convert the lower half-precision (16-bit) floating-point element in a to a 32-bit integer, and store
+/// the result in dst.
+///
+/// Rounding is done according to the rounding parameter, which can be one of:
+///
+/// (_MM_FROUND_TO_NEAREST_INT |_MM_FROUND_NO_EXC) // round to nearest, and suppress exceptions
+/// (_MM_FROUND_TO_NEG_INF |_MM_FROUND_NO_EXC) // round down, and suppress exceptions
+/// (_MM_FROUND_TO_POS_INF |_MM_FROUND_NO_EXC) // round up, and suppress exceptions
+/// (_MM_FROUND_TO_ZERO |_MM_FROUND_NO_EXC) // truncate, and suppress exceptions
+/// _MM_FROUND_CUR_DIRECTION
+///
+/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_cvt_roundsh_i32)
+#[inline]
+#[target_feature(enable = "avx512fp16")]
+#[cfg_attr(test, assert_instr(vcvtsh2si, ROUNDING = 8))]
+#[rustc_legacy_const_generics(1)]
+#[unstable(feature = "stdarch_x86_avx512_f16", issue = "127213")]
+pub unsafe fn _mm_cvt_roundsh_i32(a: __m128h) -> i32 {
+ static_assert_rounding!(ROUNDING);
+ vcvtsh2si32(a, ROUNDING)
+}
+
+/// Convert packed half-precision (16-bit) floating-point elements in a to packed 32-bit integers, and store the
+/// results in dst.
+///
+/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_cvtph_epu32)
+#[inline]
+#[target_feature(enable = "avx512fp16,avx512vl")]
+#[cfg_attr(test, assert_instr(vcvtph2udq))]
+#[unstable(feature = "stdarch_x86_avx512_f16", issue = "127213")]
+pub unsafe fn _mm_cvtph_epu32(a: __m128h) -> __m128i {
+ _mm_mask_cvtph_epu32(_mm_undefined_si128(), 0xff, a)
+}
+
+/// Convert packed half-precision (16-bit) floating-point elements in a to packed 32-bit unsigned integers, and store
+/// the results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set).
+///
+/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_mask_cvtph_epu32)
+#[inline]
+#[target_feature(enable = "avx512fp16,avx512vl")]
+#[cfg_attr(test, assert_instr(vcvtph2udq))]
+#[unstable(feature = "stdarch_x86_avx512_f16", issue = "127213")]
+pub unsafe fn _mm_mask_cvtph_epu32(src: __m128i, k: __mmask8, a: __m128h) -> __m128i {
+ transmute(vcvtph2udq_128(a, src.as_u32x4(), k))
+}
+
+/// Convert packed half-precision (16-bit) floating-point elements in a to packed 32-bit unsigned integers, and store
+/// the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
+///
+/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_maskz_cvtph_epu32)
+#[inline]
+#[target_feature(enable = "avx512fp16,avx512vl")]
+#[cfg_attr(test, assert_instr(vcvtph2udq))]
+#[unstable(feature = "stdarch_x86_avx512_f16", issue = "127213")]
+pub unsafe fn _mm_maskz_cvtph_epu32(k: __mmask8, a: __m128h) -> __m128i {
+ _mm_mask_cvtph_epu32(_mm_setzero_si128(), k, a)
+}
+
+/// Convert packed half-precision (16-bit) floating-point elements in a to packed 32-bit unsigned integers, and store
+/// the results in dst.
+///
+/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm256_cvtph_epu32)
+#[inline]
+#[target_feature(enable = "avx512fp16,avx512vl")]
+#[cfg_attr(test, assert_instr(vcvtph2udq))]
+#[unstable(feature = "stdarch_x86_avx512_f16", issue = "127213")]
+pub unsafe fn _mm256_cvtph_epu32(a: __m128h) -> __m256i {
+ _mm256_mask_cvtph_epu32(_mm256_undefined_si256(), 0xff, a)
+}
+
+/// Convert packed half-precision (16-bit) floating-point elements in a to packed 32-bit unsigned integers, and store
+/// the results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set).
+///
+/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm256_mask_cvtph_epu32)
+#[inline]
+#[target_feature(enable = "avx512fp16,avx512vl")]
+#[cfg_attr(test, assert_instr(vcvtph2udq))]
+#[unstable(feature = "stdarch_x86_avx512_f16", issue = "127213")]
+pub unsafe fn _mm256_mask_cvtph_epu32(src: __m256i, k: __mmask8, a: __m128h) -> __m256i {
+ transmute(vcvtph2udq_256(a, src.as_u32x8(), k))
+}
+
+/// Convert packed half-precision (16-bit) floating-point elements in a to packed 32-bit unsigned integers, and store
+/// the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
+///
+/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm256_maskz_cvtph_epu32)
+#[inline]
+#[target_feature(enable = "avx512fp16,avx512vl")]
+#[cfg_attr(test, assert_instr(vcvtph2udq))]
+#[unstable(feature = "stdarch_x86_avx512_f16", issue = "127213")]
+pub unsafe fn _mm256_maskz_cvtph_epu32(k: __mmask8, a: __m128h) -> __m256i {
+ _mm256_mask_cvtph_epu32(_mm256_setzero_si256(), k, a)
+}
+
+/// Convert packed half-precision (16-bit) floating-point elements in a to packed 32-bit unsigned integers, and store
+/// the results in dst.
+///
+/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm512_cvtph_epu32)
+#[inline]
+#[target_feature(enable = "avx512fp16")]
+#[cfg_attr(test, assert_instr(vcvtph2udq))]
+#[unstable(feature = "stdarch_x86_avx512_f16", issue = "127213")]
+pub unsafe fn _mm512_cvtph_epu32(a: __m256h) -> __m512i {
+ _mm512_mask_cvtph_epu32(_mm512_undefined_epi32(), 0xffff, a)
+}
+
+/// Convert packed half-precision (16-bit) floating-point elements in a to packed 32-bit unsigned integers, and store
+/// the results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set).
+///
+/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm512_mask_cvtph_epu32)
+#[inline]
+#[target_feature(enable = "avx512fp16")]
+#[cfg_attr(test, assert_instr(vcvtph2udq))]
+#[unstable(feature = "stdarch_x86_avx512_f16", issue = "127213")]
+pub unsafe fn _mm512_mask_cvtph_epu32(src: __m512i, k: __mmask16, a: __m256h) -> __m512i {
+ transmute(vcvtph2udq_512(
+ a,
+ src.as_u32x16(),
+ k,
+ _MM_FROUND_CUR_DIRECTION,
+ ))
+}
+
+/// Convert packed half-precision (16-bit) floating-point elements in a to packed 32-bit unsigned integers, and store
+/// the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
+///
+/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm512_maskz_cvtph_epu32)
+#[inline]
+#[target_feature(enable = "avx512fp16")]
+#[cfg_attr(test, assert_instr(vcvtph2udq))]
+#[unstable(feature = "stdarch_x86_avx512_f16", issue = "127213")]
+pub unsafe fn _mm512_maskz_cvtph_epu32(k: __mmask16, a: __m256h) -> __m512i {
+ _mm512_mask_cvtph_epu32(_mm512_setzero_si512(), k, a)
+}
+
+/// Convert packed half-precision (16-bit) floating-point elements in a to packed 32-bit unsigned integers, and store
+/// the results in dst.
+///
+/// Rounding is done according to the rounding parameter, which can be one of:
+///
+/// (_MM_FROUND_TO_NEAREST_INT |_MM_FROUND_NO_EXC) // round to nearest, and suppress exceptions
+/// (_MM_FROUND_TO_NEG_INF |_MM_FROUND_NO_EXC) // round down, and suppress exceptions
+/// (_MM_FROUND_TO_POS_INF |_MM_FROUND_NO_EXC) // round up, and suppress exceptions
+/// (_MM_FROUND_TO_ZERO |_MM_FROUND_NO_EXC) // truncate, and suppress exceptions
+/// _MM_FROUND_CUR_DIRECTION
+///
+/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm512_cvt_roundph_epu32)
+#[inline]
+#[target_feature(enable = "avx512fp16")]
+#[cfg_attr(test, assert_instr(vcvtph2udq, ROUNDING = 8))]
+#[rustc_legacy_const_generics(1)]
+#[unstable(feature = "stdarch_x86_avx512_f16", issue = "127213")]
+pub unsafe fn _mm512_cvt_roundph_epu32(a: __m256h) -> __m512i {
+ static_assert_rounding!(ROUNDING);
+ _mm512_mask_cvt_roundph_epu32::(_mm512_undefined_epi32(), 0xffff, a)
+}
+
+/// Convert packed half-precision (16-bit) floating-point elements in a to packed 32-bit unsigned integers, and store
+/// the results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set).
+///
+/// Rounding is done according to the rounding parameter, which can be one of:
+///
+/// (_MM_FROUND_TO_NEAREST_INT |_MM_FROUND_NO_EXC) // round to nearest, and suppress exceptions
+/// (_MM_FROUND_TO_NEG_INF |_MM_FROUND_NO_EXC) // round down, and suppress exceptions
+/// (_MM_FROUND_TO_POS_INF |_MM_FROUND_NO_EXC) // round up, and suppress exceptions
+/// (_MM_FROUND_TO_ZERO |_MM_FROUND_NO_EXC) // truncate, and suppress exceptions
+/// _MM_FROUND_CUR_DIRECTION
+///
+/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm512_mask_cvt_roundph_epu32)
+#[inline]
+#[target_feature(enable = "avx512fp16")]
+#[cfg_attr(test, assert_instr(vcvtph2udq, ROUNDING = 8))]
+#[rustc_legacy_const_generics(3)]
+#[unstable(feature = "stdarch_x86_avx512_f16", issue = "127213")]
+pub unsafe fn _mm512_mask_cvt_roundph_epu32(
+ src: __m512i,
+ k: __mmask16,
+ a: __m256h,
+) -> __m512i {
+ static_assert_rounding!(ROUNDING);
+ transmute(vcvtph2udq_512(a, src.as_u32x16(), k, ROUNDING))
+}
+
+/// Convert packed half-precision (16-bit) floating-point elements in a to packed 32-bit unsigned integers, and store
+/// the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
+///
+/// Rounding is done according to the rounding parameter, which can be one of:
+///
+/// (_MM_FROUND_TO_NEAREST_INT |_MM_FROUND_NO_EXC) // round to nearest, and suppress exceptions
+/// (_MM_FROUND_TO_NEG_INF |_MM_FROUND_NO_EXC) // round down, and suppress exceptions
+/// (_MM_FROUND_TO_POS_INF |_MM_FROUND_NO_EXC) // round up, and suppress exceptions
+/// (_MM_FROUND_TO_ZERO |_MM_FROUND_NO_EXC) // truncate, and suppress exceptions
+/// _MM_FROUND_CUR_DIRECTION
+///
+/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm512_maskz_cvt_roundph_epu32)
+#[inline]
+#[target_feature(enable = "avx512fp16")]
+#[cfg_attr(test, assert_instr(vcvtph2udq, ROUNDING = 8))]
+#[rustc_legacy_const_generics(2)]
+#[unstable(feature = "stdarch_x86_avx512_f16", issue = "127213")]
+pub unsafe fn _mm512_maskz_cvt_roundph_epu32(
+ k: __mmask16,
+ a: __m256h,
+) -> __m512i {
+ static_assert_rounding!(ROUNDING);
+ _mm512_mask_cvt_roundph_epu32::(_mm512_setzero_si512(), k, a)
+}
+
+/// Convert the lower half-precision (16-bit) floating-point element in a to a 32-bit unsigned integer, and store
+/// the result in dst.
+///
+/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_cvtsh_u32)
+#[inline]
+#[target_feature(enable = "avx512fp16")]
+#[cfg_attr(test, assert_instr(vcvtsh2usi))]
+#[unstable(feature = "stdarch_x86_avx512_f16", issue = "127213")]
+pub unsafe fn _mm_cvtsh_u32(a: __m128h) -> u32 {
+ vcvtsh2usi32(a, _MM_FROUND_CUR_DIRECTION)
+}
+
+/// Convert the lower half-precision (16-bit) floating-point element in a to a 32-bit unsigned integer, and store
+/// the result in dst.
+///
+/// Rounding is done according to the rounding parameter, which can be one of:
+///
+/// (_MM_FROUND_TO_NEAREST_INT |_MM_FROUND_NO_EXC) // round to nearest, and suppress exceptions
+/// (_MM_FROUND_TO_NEG_INF |_MM_FROUND_NO_EXC) // round down, and suppress exceptions
+/// (_MM_FROUND_TO_POS_INF |_MM_FROUND_NO_EXC) // round up, and suppress exceptions
+/// (_MM_FROUND_TO_ZERO |_MM_FROUND_NO_EXC) // truncate, and suppress exceptions
+/// _MM_FROUND_CUR_DIRECTION
+///
+/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_cvt_roundsh_u32)
+#[inline]
+#[target_feature(enable = "avx512fp16")]
+#[cfg_attr(test, assert_instr(vcvtsh2usi, ROUNDING = 8))]
+#[rustc_legacy_const_generics(1)]
+#[unstable(feature = "stdarch_x86_avx512_f16", issue = "127213")]
+pub unsafe fn _mm_cvt_roundsh_u32(a: __m128h) -> u32 {
+ static_assert_rounding!(ROUNDING);
+ vcvtsh2usi32(a, ROUNDING)
+}
+
+/// Convert packed half-precision (16-bit) floating-point elements in a to packed 32-bit integers with truncation, and
+/// store the results in dst.
+///
+/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_cvttph_epi32)
+#[inline]
+#[target_feature(enable = "avx512fp16,avx512vl")]
+#[cfg_attr(test, assert_instr(vcvttph2dq))]
+#[unstable(feature = "stdarch_x86_avx512_f16", issue = "127213")]
+pub unsafe fn _mm_cvttph_epi32(a: __m128h) -> __m128i {
+ _mm_mask_cvttph_epi32(_mm_undefined_si128(), 0xff, a)
+}
+
+/// Convert packed half-precision (16-bit) floating-point elements in a to packed 32-bit integers with truncation, and
+/// store the results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set).
+///
+/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_mask_cvttph_epi32)
+#[inline]
+#[target_feature(enable = "avx512fp16,avx512vl")]
+#[cfg_attr(test, assert_instr(vcvttph2dq))]
+#[unstable(feature = "stdarch_x86_avx512_f16", issue = "127213")]
+pub unsafe fn _mm_mask_cvttph_epi32(src: __m128i, k: __mmask8, a: __m128h) -> __m128i {
+ transmute(vcvttph2dq_128(a, src.as_i32x4(), k))
+}
+
+/// Convert packed half-precision (16-bit) floating-point elements in a to packed 32-bit integers with truncation, and
+/// store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
+///
+/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_maskz_cvttph_epi32)
+#[inline]
+#[target_feature(enable = "avx512fp16,avx512vl")]
+#[cfg_attr(test, assert_instr(vcvttph2dq))]
+#[unstable(feature = "stdarch_x86_avx512_f16", issue = "127213")]
+pub unsafe fn _mm_maskz_cvttph_epi32(k: __mmask8, a: __m128h) -> __m128i {
+ _mm_mask_cvttph_epi32(_mm_setzero_si128(), k, a)
+}
+
+/// Convert packed half-precision (16-bit) floating-point elements in a to packed 32-bit integers with truncation, and
+/// store the results in dst.
+///
+/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm256_cvttph_epi32)
+#[inline]
+#[target_feature(enable = "avx512fp16,avx512vl")]
+#[cfg_attr(test, assert_instr(vcvttph2dq))]
+#[unstable(feature = "stdarch_x86_avx512_f16", issue = "127213")]
+pub unsafe fn _mm256_cvttph_epi32(a: __m128h) -> __m256i {
+ _mm256_mask_cvttph_epi32(_mm256_undefined_si256(), 0xff, a)
+}
+
+/// Convert packed half-precision (16-bit) floating-point elements in a to packed 32-bit integers with truncation, and
+/// store the results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set).
+///
+/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm256_mask_cvttph_epi32)
+#[inline]
+#[target_feature(enable = "avx512fp16,avx512vl")]
+#[cfg_attr(test, assert_instr(vcvttph2dq))]
+#[unstable(feature = "stdarch_x86_avx512_f16", issue = "127213")]
+pub unsafe fn _mm256_mask_cvttph_epi32(src: __m256i, k: __mmask8, a: __m128h) -> __m256i {
+ transmute(vcvttph2dq_256(a, src.as_i32x8(), k))
+}
+
+/// Convert packed half-precision (16-bit) floating-point elements in a to packed 32-bit integers with truncation, and
+/// store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
+///
+/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm256_maskz_cvttph_epi32)
+#[inline]
+#[target_feature(enable = "avx512fp16,avx512vl")]
+#[cfg_attr(test, assert_instr(vcvttph2dq))]
+#[unstable(feature = "stdarch_x86_avx512_f16", issue = "127213")]
+pub unsafe fn _mm256_maskz_cvttph_epi32(k: __mmask8, a: __m128h) -> __m256i {
+ _mm256_mask_cvttph_epi32(_mm256_setzero_si256(), k, a)
+}
+
+/// Convert packed half-precision (16-bit) floating-point elements in a to packed 32-bit integers with truncation, and
+/// store the results in dst.
+///
+/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm512_cvttph_epi32)
+#[inline]
+#[target_feature(enable = "avx512fp16")]
+#[cfg_attr(test, assert_instr(vcvttph2dq))]
+#[unstable(feature = "stdarch_x86_avx512_f16", issue = "127213")]
+pub unsafe fn _mm512_cvttph_epi32(a: __m256h) -> __m512i {
+ _mm512_mask_cvttph_epi32(_mm512_undefined_epi32(), 0xffff, a)
+}
+
+/// Convert packed half-precision (16-bit) floating-point elements in a to packed 32-bit integers with truncation, and
+/// store the results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set).
+///
+/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm512_mask_cvttph_epi32)
+#[inline]
+#[target_feature(enable = "avx512fp16")]
+#[cfg_attr(test, assert_instr(vcvttph2dq))]
+#[unstable(feature = "stdarch_x86_avx512_f16", issue = "127213")]
+pub unsafe fn _mm512_mask_cvttph_epi32(src: __m512i, k: __mmask16, a: __m256h) -> __m512i {
+ transmute(vcvttph2dq_512(
+ a,
+ src.as_i32x16(),
+ k,
+ _MM_FROUND_CUR_DIRECTION,
+ ))
+}
+
+/// Convert packed half-precision (16-bit) floating-point elements in a to packed 32-bit integers with truncation, and
+/// store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
+///
+/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm512_maskz_cvttph_epi32)
+#[inline]
+#[target_feature(enable = "avx512fp16")]
+#[cfg_attr(test, assert_instr(vcvttph2dq))]
+#[unstable(feature = "stdarch_x86_avx512_f16", issue = "127213")]
+pub unsafe fn _mm512_maskz_cvttph_epi32(k: __mmask16, a: __m256h) -> __m512i {
+ _mm512_mask_cvttph_epi32(_mm512_setzero_si512(), k, a)
+}
+
+/// Convert packed half-precision (16-bit) floating-point elements in a to packed 32-bit integers with truncation, and
+/// store the results in dst.
+///
+/// Exceptions can be suppressed by passing `_MM_FROUND_NO_EXC` in the `sae` parameter.
+///
+/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm512_cvtt_roundph_epi32)
+#[inline]
+#[target_feature(enable = "avx512fp16")]
+#[cfg_attr(test, assert_instr(vcvttph2dq, SAE = 8))]
+#[rustc_legacy_const_generics(1)]
+#[unstable(feature = "stdarch_x86_avx512_f16", issue = "127213")]
+pub unsafe fn _mm512_cvtt_roundph_epi32(a: __m256h) -> __m512i {
+ static_assert_sae!(SAE);
+ _mm512_mask_cvtt_roundph_epi32::(_mm512_undefined_epi32(), 0xffff, a)
+}
+
+/// Convert packed half-precision (16-bit) floating-point elements in a to packed 32-bit integers with truncation, and
+/// store the results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set).
+///
+/// Exceptions can be suppressed by passing `_MM_FROUND_NO_EXC` in the `sae` parameter.
+///
+/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm512_mask_cvtt_roundph_epi32)
+#[inline]
+#[target_feature(enable = "avx512fp16")]
+#[cfg_attr(test, assert_instr(vcvttph2dq, SAE = 8))]
+#[rustc_legacy_const_generics(3)]
+#[unstable(feature = "stdarch_x86_avx512_f16", issue = "127213")]
+pub unsafe fn _mm512_mask_cvtt_roundph_epi32(
+ src: __m512i,
+ k: __mmask16,
+ a: __m256h,
+) -> __m512i {
+ static_assert_sae!(SAE);
+ transmute(vcvttph2dq_512(a, src.as_i32x16(), k, SAE))
+}
+
+/// Convert packed half-precision (16-bit) floating-point elements in a to packed 32-bit integers with truncation, and
+/// store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
+///
+/// Exceptions can be suppressed by passing `_MM_FROUND_NO_EXC` in the `sae` parameter.
+///
+/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm512_maskz_cvtt_roundph_epi32)
+#[inline]
+#[target_feature(enable = "avx512fp16")]
+#[cfg_attr(test, assert_instr(vcvttph2dq, SAE = 8))]
+#[rustc_legacy_const_generics(2)]
+#[unstable(feature = "stdarch_x86_avx512_f16", issue = "127213")]
+pub unsafe fn _mm512_maskz_cvtt_roundph_epi32(k: __mmask16, a: __m256h) -> __m512i {
+ static_assert_sae!(SAE);
+ _mm512_mask_cvtt_roundph_epi32::(_mm512_setzero_si512(), k, a)
+}
+
+/// Convert the lower half-precision (16-bit) floating-point element in a to a 32-bit integer with truncation, and store
+/// the result in dst.
+///
+/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_cvttsh_i32)
+#[inline]
+#[target_feature(enable = "avx512fp16")]
+#[cfg_attr(test, assert_instr(vcvttsh2si))]
+#[unstable(feature = "stdarch_x86_avx512_f16", issue = "127213")]
+pub unsafe fn _mm_cvttsh_i32(a: __m128h) -> i32 {
+ vcvttsh2si32(a, _MM_FROUND_CUR_DIRECTION)
+}
+
+/// Convert the lower half-precision (16-bit) floating-point element in a to a 32-bit integer with truncation, and store
+/// the result in dst.
+///
+/// Exceptions can be suppressed by passing `_MM_FROUND_NO_EXC` in the `sae` parameter.
+///
+/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_cvtt_roundsh_i32)
+#[inline]
+#[target_feature(enable = "avx512fp16")]
+#[cfg_attr(test, assert_instr(vcvttsh2si, SAE = 8))]
+#[rustc_legacy_const_generics(1)]
+#[unstable(feature = "stdarch_x86_avx512_f16", issue = "127213")]
+pub unsafe fn _mm_cvtt_roundsh_i32(a: __m128h) -> i32 {
+ static_assert_sae!(SAE);
+ vcvttsh2si32(a, SAE)
+}
+
+/// Convert packed half-precision (16-bit) floating-point elements in a to packed 32-bit unsigned integers with truncation, and
+/// store the results in dst.
+///
+/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_cvttph_epu32)
+#[inline]
+#[target_feature(enable = "avx512fp16,avx512vl")]
+#[cfg_attr(test, assert_instr(vcvttph2udq))]
+#[unstable(feature = "stdarch_x86_avx512_f16", issue = "127213")]
+pub unsafe fn _mm_cvttph_epu32(a: __m128h) -> __m128i {
+ _mm_mask_cvttph_epu32(_mm_undefined_si128(), 0xff, a)
+}
+
+/// Convert packed half-precision (16-bit) floating-point elements in a to packed 32-bit unsigned integers with truncation, and
+/// store the results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set).
+///
+/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_mask_cvttph_epu32)
+#[inline]
+#[target_feature(enable = "avx512fp16,avx512vl")]
+#[cfg_attr(test, assert_instr(vcvttph2udq))]
+#[unstable(feature = "stdarch_x86_avx512_f16", issue = "127213")]
+pub unsafe fn _mm_mask_cvttph_epu32(src: __m128i, k: __mmask8, a: __m128h) -> __m128i {
+ transmute(vcvttph2udq_128(a, src.as_u32x4(), k))
+}
+
+/// Convert packed half-precision (16-bit) floating-point elements in a to packed 32-bit unsigned integers with truncation, and
+/// store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
+///
+/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_maskz_cvttph_epu32)
+#[inline]
+#[target_feature(enable = "avx512fp16,avx512vl")]
+#[cfg_attr(test, assert_instr(vcvttph2udq))]
+#[unstable(feature = "stdarch_x86_avx512_f16", issue = "127213")]
+pub unsafe fn _mm_maskz_cvttph_epu32(k: __mmask8, a: __m128h) -> __m128i {
+ _mm_mask_cvttph_epu32(_mm_setzero_si128(), k, a)
+}
+
+/// Convert packed half-precision (16-bit) floating-point elements in a to packed 32-bit unsigned integers with truncation, and
+/// store the results in dst.
+///
+/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm256_cvttph_epu32)
+#[inline]
+#[target_feature(enable = "avx512fp16,avx512vl")]
+#[cfg_attr(test, assert_instr(vcvttph2udq))]
+#[unstable(feature = "stdarch_x86_avx512_f16", issue = "127213")]
+pub unsafe fn _mm256_cvttph_epu32(a: __m128h) -> __m256i {
+ _mm256_mask_cvttph_epu32(_mm256_undefined_si256(), 0xff, a)
+}
+
+/// Convert packed half-precision (16-bit) floating-point elements in a to packed 32-bit unsigned integers with truncation, and
+/// store the results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set).
+///
+/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm256_mask_cvttph_epu32)
+#[inline]
+#[target_feature(enable = "avx512fp16,avx512vl")]
+#[cfg_attr(test, assert_instr(vcvttph2udq))]
+#[unstable(feature = "stdarch_x86_avx512_f16", issue = "127213")]
+pub unsafe fn _mm256_mask_cvttph_epu32(src: __m256i, k: __mmask8, a: __m128h) -> __m256i {
+ transmute(vcvttph2udq_256(a, src.as_u32x8(), k))
+}
+
+/// Convert packed half-precision (16-bit) floating-point elements in a to packed 32-bit unsigned integers with truncation, and
+/// store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
+///
+/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm256_maskz_cvttph_epu32)
+#[inline]
+#[target_feature(enable = "avx512fp16,avx512vl")]
+#[cfg_attr(test, assert_instr(vcvttph2udq))]
+#[unstable(feature = "stdarch_x86_avx512_f16", issue = "127213")]
+pub unsafe fn _mm256_maskz_cvttph_epu32(k: __mmask8, a: __m128h) -> __m256i {
+ _mm256_mask_cvttph_epu32(_mm256_setzero_si256(), k, a)
+}
+
+/// Convert packed half-precision (16-bit) floating-point elements in a to packed 32-bit unsigned integers with truncation, and
+/// store the results in dst.
+///
+/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm512_cvttph_epu32)
+#[inline]
+#[target_feature(enable = "avx512fp16")]
+#[cfg_attr(test, assert_instr(vcvttph2udq))]
+#[unstable(feature = "stdarch_x86_avx512_f16", issue = "127213")]
+pub unsafe fn _mm512_cvttph_epu32(a: __m256h) -> __m512i {
+ _mm512_mask_cvttph_epu32(_mm512_undefined_epi32(), 0xffff, a)
+}
+
+/// Convert packed half-precision (16-bit) floating-point elements in a to packed 32-bit unsigned integers with truncation, and
+/// store the results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set).
+///
+/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm512_mask_cvttph_epu32)
+#[inline]
+#[target_feature(enable = "avx512fp16")]
+#[cfg_attr(test, assert_instr(vcvttph2udq))]
+#[unstable(feature = "stdarch_x86_avx512_f16", issue = "127213")]
+pub unsafe fn _mm512_mask_cvttph_epu32(src: __m512i, k: __mmask16, a: __m256h) -> __m512i {
+ transmute(vcvttph2udq_512(
+ a,
+ src.as_u32x16(),
+ k,
+ _MM_FROUND_CUR_DIRECTION,
+ ))
+}
+
+/// Convert packed half-precision (16-bit) floating-point elements in a to packed 32-bit unsigned integers with truncation, and
+/// store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
+///
+/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm512_maskz_cvttph_epu32)
+#[inline]
+#[target_feature(enable = "avx512fp16")]
+#[cfg_attr(test, assert_instr(vcvttph2udq))]
+#[unstable(feature = "stdarch_x86_avx512_f16", issue = "127213")]
+pub unsafe fn _mm512_maskz_cvttph_epu32(k: __mmask16, a: __m256h) -> __m512i {
+ _mm512_mask_cvttph_epu32(_mm512_setzero_si512(), k, a)
+}
+
+/// Convert packed half-precision (16-bit) floating-point elements in a to packed 32-bit unsigned integers with truncation, and
+/// store the results in dst.
+///
+/// Exceptions can be suppressed by passing `_MM_FROUND_NO_EXC` in the `sae` parameter.
+///
+/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm512_cvtt_roundph_epu32)
+#[inline]
+#[target_feature(enable = "avx512fp16")]
+#[cfg_attr(test, assert_instr(vcvttph2udq, SAE = 8))]
+#[rustc_legacy_const_generics(1)]
+#[unstable(feature = "stdarch_x86_avx512_f16", issue = "127213")]
+pub unsafe fn _mm512_cvtt_roundph_epu32(a: __m256h) -> __m512i {
+ static_assert_sae!(SAE);
+ _mm512_mask_cvtt_roundph_epu32::(_mm512_undefined_epi32(), 0xffff, a)
+}
+
+/// Convert packed half-precision (16-bit) floating-point elements in a to packed 32-bit unsigned integers with truncation, and
+/// store the results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set).
+///
+/// Exceptions can be suppressed by passing `_MM_FROUND_NO_EXC` in the `sae` parameter.
+///
+/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm512_mask_cvtt_roundph_epu32)
+#[inline]
+#[target_feature(enable = "avx512fp16")]
+#[cfg_attr(test, assert_instr(vcvttph2udq, SAE = 8))]
+#[rustc_legacy_const_generics(3)]
+#[unstable(feature = "stdarch_x86_avx512_f16", issue = "127213")]
+pub unsafe fn _mm512_mask_cvtt_roundph_epu32(
+ src: __m512i,
+ k: __mmask16,
+ a: __m256h,
+) -> __m512i {
+ static_assert_sae!(SAE);
+ transmute(vcvttph2udq_512(a, src.as_u32x16(), k, SAE))
+}
+
+/// Convert packed half-precision (16-bit) floating-point elements in a to packed 32-bit unsigned integers with truncation, and
+/// store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
+///
+/// Exceptions can be suppressed by passing `_MM_FROUND_NO_EXC` in the `sae` parameter.
+///
+/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm512_maskz_cvtt_roundph_epu32)
+#[inline]
+#[target_feature(enable = "avx512fp16")]
+#[cfg_attr(test, assert_instr(vcvttph2udq, SAE = 8))]
+#[rustc_legacy_const_generics(2)]
+#[unstable(feature = "stdarch_x86_avx512_f16", issue = "127213")]
+pub unsafe fn _mm512_maskz_cvtt_roundph_epu32(k: __mmask16, a: __m256h) -> __m512i {
+ static_assert_sae!(SAE);
+ _mm512_mask_cvtt_roundph_epu32::(_mm512_setzero_si512(), k, a)
+}
+
+/// Convert the lower half-precision (16-bit) floating-point element in a to a 32-bit unsigned integer with truncation, and store
+/// the result in dst.
+///
+/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_cvttsh_u32)
+#[inline]
+#[target_feature(enable = "avx512fp16")]
+#[cfg_attr(test, assert_instr(vcvttsh2usi))]
+#[unstable(feature = "stdarch_x86_avx512_f16", issue = "127213")]
+pub unsafe fn _mm_cvttsh_u32(a: __m128h) -> u32 {
+ vcvttsh2usi32(a, _MM_FROUND_CUR_DIRECTION)
+}
+
+/// Convert the lower half-precision (16-bit) floating-point element in a to a 32-bit unsigned integer with truncation, and store
+/// the result in dst.
+///
+/// Exceptions can be suppressed by passing `_MM_FROUND_NO_EXC` in the `sae` parameter.
+///
+/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_cvtt_roundsh_u32)
+#[inline]
+#[target_feature(enable = "avx512fp16")]
+#[cfg_attr(test, assert_instr(vcvttsh2usi, SAE = 8))]
+#[rustc_legacy_const_generics(1)]
+#[unstable(feature = "stdarch_x86_avx512_f16", issue = "127213")]
+pub unsafe fn _mm_cvtt_roundsh_u32(a: __m128h) -> u32 {
+ static_assert_sae!(SAE);
+ vcvttsh2usi32(a, SAE)
+}
+
+/// Convert packed half-precision (16-bit) floating-point elements in a to packed 64-bit integers, and
+/// store the results in dst.
+///
+/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_cvtph_epi64)
+#[inline]
+#[target_feature(enable = "avx512fp16,avx512vl")]
+#[cfg_attr(test, assert_instr(vcvtph2qq))]
+#[unstable(feature = "stdarch_x86_avx512_f16", issue = "127213")]
+pub unsafe fn _mm_cvtph_epi64(a: __m128h) -> __m128i {
+ _mm_mask_cvtph_epi64(_mm_undefined_si128(), 0xff, a)
+}
+
+/// Convert packed half-precision (16-bit) floating-point elements in a to packed 64-bit integers, and
+/// store the results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set).
+///
+/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_mask_cvtph_epi64)
+#[inline]
+#[target_feature(enable = "avx512fp16,avx512vl")]
+#[cfg_attr(test, assert_instr(vcvtph2qq))]
+#[unstable(feature = "stdarch_x86_avx512_f16", issue = "127213")]
+pub unsafe fn _mm_mask_cvtph_epi64(src: __m128i, k: __mmask8, a: __m128h) -> __m128i {
+ transmute(vcvtph2qq_128(a, src.as_i64x2(), k))
+}
+
+/// Convert packed half-precision (16-bit) floating-point elements in a to packed 64-bit integers, and
+/// store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
+///
+/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_maskz_cvtph_epi64)
+#[inline]
+#[target_feature(enable = "avx512fp16,avx512vl")]
+#[cfg_attr(test, assert_instr(vcvtph2qq))]
+#[unstable(feature = "stdarch_x86_avx512_f16", issue = "127213")]
+pub unsafe fn _mm_maskz_cvtph_epi64(k: __mmask8, a: __m128h) -> __m128i {
+ _mm_mask_cvtph_epi64(_mm_setzero_si128(), k, a)
+}
+
+/// Convert packed half-precision (16-bit) floating-point elements in a to packed 64-bit integers, and
+/// store the results in dst.
+///
+/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm256_cvtph_epi64)
+#[inline]
+#[target_feature(enable = "avx512fp16,avx512vl")]
+#[cfg_attr(test, assert_instr(vcvtph2qq))]
+#[unstable(feature = "stdarch_x86_avx512_f16", issue = "127213")]
+pub unsafe fn _mm256_cvtph_epi64(a: __m128h) -> __m256i {
+ _mm256_mask_cvtph_epi64(_mm256_undefined_si256(), 0xff, a)
+}
+
+/// Convert packed half-precision (16-bit) floating-point elements in a to packed 64-bit integers, and
+/// store the results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set).
+///
+/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm256_mask_cvtph_epi64)
+#[inline]
+#[target_feature(enable = "avx512fp16,avx512vl")]
+#[cfg_attr(test, assert_instr(vcvtph2qq))]
+#[unstable(feature = "stdarch_x86_avx512_f16", issue = "127213")]
+pub unsafe fn _mm256_mask_cvtph_epi64(src: __m256i, k: __mmask8, a: __m128h) -> __m256i {
+ transmute(vcvtph2qq_256(a, src.as_i64x4(), k))
+}
+
+/// Convert packed half-precision (16-bit) floating-point elements in a to packed 64-bit integers, and
+/// store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
+///
+/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm256_maskz_cvtph_epi64)
+#[inline]
+#[target_feature(enable = "avx512fp16,avx512vl")]
+#[cfg_attr(test, assert_instr(vcvtph2qq))]
+#[unstable(feature = "stdarch_x86_avx512_f16", issue = "127213")]
+pub unsafe fn _mm256_maskz_cvtph_epi64(k: __mmask8, a: __m128h) -> __m256i {
+ _mm256_mask_cvtph_epi64(_mm256_setzero_si256(), k, a)
+}
+
+/// Convert packed half-precision (16-bit) floating-point elements in a to packed 64-bit integers, and
+/// store the results in dst.
+///
+/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm512_cvtph_epi64)
+#[inline]
+#[target_feature(enable = "avx512fp16")]
+#[cfg_attr(test, assert_instr(vcvtph2qq))]
+#[unstable(feature = "stdarch_x86_avx512_f16", issue = "127213")]
+pub unsafe fn _mm512_cvtph_epi64(a: __m128h) -> __m512i {
+ _mm512_mask_cvtph_epi64(_mm512_undefined_epi32(), 0xff, a)
+}
+
+/// Convert packed half-precision (16-bit) floating-point elements in a to packed 64-bit integers, and
+/// store the results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set).
+///
+/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm512_mask_cvtph_epi64)
+#[inline]
+#[target_feature(enable = "avx512fp16")]
+#[cfg_attr(test, assert_instr(vcvtph2qq))]
+#[unstable(feature = "stdarch_x86_avx512_f16", issue = "127213")]
+pub unsafe fn _mm512_mask_cvtph_epi64(src: __m512i, k: __mmask8, a: __m128h) -> __m512i {
+ transmute(vcvtph2qq_512(
+ a,
+ src.as_i64x8(),
+ k,
+ _MM_FROUND_CUR_DIRECTION,
+ ))
+}
+
+/// Convert packed half-precision (16-bit) floating-point elements in a to packed 64-bit integers, and
+/// store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
+///
+/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm512_maskz_cvtph_epi64)
+#[inline]
+#[target_feature(enable = "avx512fp16")]
+#[cfg_attr(test, assert_instr(vcvtph2qq))]
+#[unstable(feature = "stdarch_x86_avx512_f16", issue = "127213")]
+pub unsafe fn _mm512_maskz_cvtph_epi64(k: __mmask8, a: __m128h) -> __m512i {
+ _mm512_mask_cvtph_epi64(_mm512_setzero_si512(), k, a)
+}
+
+/// Convert packed half-precision (16-bit) floating-point elements in a to packed 64-bit integers, and
+/// store the results in dst.
+///
+/// Rounding is done according to the rounding parameter, which can be one of:
+///
+/// (_MM_FROUND_TO_NEAREST_INT |_MM_FROUND_NO_EXC) // round to nearest, and suppress exceptions
+/// (_MM_FROUND_TO_NEG_INF |_MM_FROUND_NO_EXC) // round down, and suppress exceptions
+/// (_MM_FROUND_TO_POS_INF |_MM_FROUND_NO_EXC) // round up, and suppress exceptions
+/// (_MM_FROUND_TO_ZERO |_MM_FROUND_NO_EXC) // truncate, and suppress exceptions
+/// _MM_FROUND_CUR_DIRECTION
+///
+/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm512_cvt_roundph_epi64)
+#[inline]
+#[target_feature(enable = "avx512fp16")]
+#[cfg_attr(test, assert_instr(vcvtph2qq, ROUNDING = 8))]
+#[rustc_legacy_const_generics(1)]
+#[unstable(feature = "stdarch_x86_avx512_f16", issue = "127213")]
+pub unsafe fn _mm512_cvt_roundph_epi64(a: __m128h) -> __m512i {
+ static_assert_rounding!(ROUNDING);
+ _mm512_mask_cvt_roundph_epi64::(_mm512_undefined_epi32(), 0xff, a)
+}
+
+/// Convert packed half-precision (16-bit) floating-point elements in a to packed 64-bit integers, and
+/// store the results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set).
+///
+/// Rounding is done according to the rounding parameter, which can be one of:
+///
+/// (_MM_FROUND_TO_NEAREST_INT |_MM_FROUND_NO_EXC) // round to nearest, and suppress exceptions
+/// (_MM_FROUND_TO_NEG_INF |_MM_FROUND_NO_EXC) // round down, and suppress exceptions
+/// (_MM_FROUND_TO_POS_INF |_MM_FROUND_NO_EXC) // round up, and suppress exceptions
+/// (_MM_FROUND_TO_ZERO |_MM_FROUND_NO_EXC) // truncate, and suppress exceptions
+/// _MM_FROUND_CUR_DIRECTION
+///
+/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm512_mask_cvt_roundph_epi64)
+#[inline]
+#[target_feature(enable = "avx512fp16")]
+#[cfg_attr(test, assert_instr(vcvtph2qq, ROUNDING = 8))]
+#[rustc_legacy_const_generics(3)]
+#[unstable(feature = "stdarch_x86_avx512_f16", issue = "127213")]
+pub unsafe fn _mm512_mask_cvt_roundph_epi64(
+ src: __m512i,
+ k: __mmask8,
+ a: __m128h,
+) -> __m512i {
+ static_assert_rounding!(ROUNDING);
+ transmute(vcvtph2qq_512(a, src.as_i64x8(), k, ROUNDING))
+}
+
+/// Convert packed half-precision (16-bit) floating-point elements in a to packed 64-bit integers, and
+/// store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
+///
+/// Rounding is done according to the rounding parameter, which can be one of:
+///
+/// (_MM_FROUND_TO_NEAREST_INT |_MM_FROUND_NO_EXC) // round to nearest, and suppress exceptions
+/// (_MM_FROUND_TO_NEG_INF |_MM_FROUND_NO_EXC) // round down, and suppress exceptions
+/// (_MM_FROUND_TO_POS_INF |_MM_FROUND_NO_EXC) // round up, and suppress exceptions
+/// (_MM_FROUND_TO_ZERO |_MM_FROUND_NO_EXC) // truncate, and suppress exceptions
+/// _MM_FROUND_CUR_DIRECTION
+///
+/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm512_maskz_cvt_roundph_epi64)
+#[inline]
+#[target_feature(enable = "avx512fp16")]
+#[cfg_attr(test, assert_instr(vcvtph2qq, ROUNDING = 8))]
+#[rustc_legacy_const_generics(2)]
+#[unstable(feature = "stdarch_x86_avx512_f16", issue = "127213")]
+pub unsafe fn _mm512_maskz_cvt_roundph_epi64(
+ k: __mmask8,
+ a: __m128h,
+) -> __m512i {
+ static_assert_rounding!(ROUNDING);
+ _mm512_mask_cvt_roundph_epi64::(_mm512_setzero_si512(), k, a)
+}
+
+/// Convert packed half-precision (16-bit) floating-point elements in a to packed 64-bit unsigned integers, and
+/// store the results in dst.
+///
+/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_cvtph_epu64)
+#[inline]
+#[target_feature(enable = "avx512fp16,avx512vl")]
+#[cfg_attr(test, assert_instr(vcvtph2uqq))]
+#[unstable(feature = "stdarch_x86_avx512_f16", issue = "127213")]
+pub unsafe fn _mm_cvtph_epu64(a: __m128h) -> __m128i {
+ _mm_mask_cvtph_epu64(_mm_undefined_si128(), 0xff, a)
+}
+
+/// Convert packed half-precision (16-bit) floating-point elements in a to packed 64-bit unsigned integers, and
+/// store the results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set).
+///
+/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_mask_cvtph_epu64)
+#[inline]
+#[target_feature(enable = "avx512fp16,avx512vl")]
+#[cfg_attr(test, assert_instr(vcvtph2uqq))]
+#[unstable(feature = "stdarch_x86_avx512_f16", issue = "127213")]
+pub unsafe fn _mm_mask_cvtph_epu64(src: __m128i, k: __mmask8, a: __m128h) -> __m128i {
+ transmute(vcvtph2uqq_128(a, src.as_u64x2(), k))
+}
+
+/// Convert packed half-precision (16-bit) floating-point elements in a to packed 64-bit unsigned integers, and
+/// store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
+///
+/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_maskz_cvtph_epu64)
+#[inline]
+#[target_feature(enable = "avx512fp16,avx512vl")]
+#[cfg_attr(test, assert_instr(vcvtph2uqq))]
+#[unstable(feature = "stdarch_x86_avx512_f16", issue = "127213")]
+pub unsafe fn _mm_maskz_cvtph_epu64(k: __mmask8, a: __m128h) -> __m128i {
+ _mm_mask_cvtph_epu64(_mm_setzero_si128(), k, a)
+}
+
+/// Convert packed half-precision (16-bit) floating-point elements in a to packed 64-bit unsigned integers, and
+/// store the results in dst.
+///
+/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm256_cvtph_epu64)
+#[inline]
+#[target_feature(enable = "avx512fp16,avx512vl")]
+#[cfg_attr(test, assert_instr(vcvtph2uqq))]
+#[unstable(feature = "stdarch_x86_avx512_f16", issue = "127213")]
+pub unsafe fn _mm256_cvtph_epu64(a: __m128h) -> __m256i {
+ _mm256_mask_cvtph_epu64(_mm256_undefined_si256(), 0xff, a)
+}
+
+/// Convert packed half-precision (16-bit) floating-point elements in a to packed 64-bit unsigned integers, and
+/// store the results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set).
+///
+/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm256_mask_cvtph_epu64)
+#[inline]
+#[target_feature(enable = "avx512fp16,avx512vl")]
+#[cfg_attr(test, assert_instr(vcvtph2uqq))]
+#[unstable(feature = "stdarch_x86_avx512_f16", issue = "127213")]
+pub unsafe fn _mm256_mask_cvtph_epu64(src: __m256i, k: __mmask8, a: __m128h) -> __m256i {
+ transmute(vcvtph2uqq_256(a, src.as_u64x4(), k))
+}
+
+/// Convert packed half-precision (16-bit) floating-point elements in a to packed 64-bit unsigned integers, and
+/// store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
+///
+/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm256_maskz_cvtph_epu64)
+#[inline]
+#[target_feature(enable = "avx512fp16,avx512vl")]
+#[cfg_attr(test, assert_instr(vcvtph2uqq))]
+#[unstable(feature = "stdarch_x86_avx512_f16", issue = "127213")]
+pub unsafe fn _mm256_maskz_cvtph_epu64(k: __mmask8, a: __m128h) -> __m256i {
+ _mm256_mask_cvtph_epu64(_mm256_setzero_si256(), k, a)
+}
+
+/// Convert packed half-precision (16-bit) floating-point elements in a to packed 64-bit unsigned integers, and
+/// store the results in dst.
+///
+/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm512_cvtph_epu64)
+#[inline]
+#[target_feature(enable = "avx512fp16")]
+#[cfg_attr(test, assert_instr(vcvtph2uqq))]
+#[unstable(feature = "stdarch_x86_avx512_f16", issue = "127213")]
+pub unsafe fn _mm512_cvtph_epu64(a: __m128h) -> __m512i {
+ _mm512_mask_cvtph_epu64(_mm512_undefined_epi32(), 0xff, a)
+}
+
+/// Convert packed half-precision (16-bit) floating-point elements in a to packed 64-bit unsigned integers, and
+/// store the results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set).
+///
+/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm512_mask_cvtph_epu64)
+#[inline]
+#[target_feature(enable = "avx512fp16")]
+#[cfg_attr(test, assert_instr(vcvtph2uqq))]
+#[unstable(feature = "stdarch_x86_avx512_f16", issue = "127213")]
+pub unsafe fn _mm512_mask_cvtph_epu64(src: __m512i, k: __mmask8, a: __m128h) -> __m512i {
+ transmute(vcvtph2uqq_512(
+ a,
+ src.as_u64x8(),
+ k,
+ _MM_FROUND_CUR_DIRECTION,
+ ))
+}
+
+/// Convert packed half-precision (16-bit) floating-point elements in a to packed 64-bit unsigned integers, and
+/// store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
+///
+/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm512_maskz_cvtph_epu64)
+#[inline]
+#[target_feature(enable = "avx512fp16")]
+#[cfg_attr(test, assert_instr(vcvtph2uqq))]
+#[unstable(feature = "stdarch_x86_avx512_f16", issue = "127213")]
+pub unsafe fn _mm512_maskz_cvtph_epu64(k: __mmask8, a: __m128h) -> __m512i {
+ _mm512_mask_cvtph_epu64(_mm512_setzero_si512(), k, a)
+}
+
+/// Convert packed half-precision (16-bit) floating-point elements in a to packed 64-bit unsigned integers, and
+/// store the results in dst.
+///
+/// Rounding is done according to the rounding parameter, which can be one of:
+///
+/// (_MM_FROUND_TO_NEAREST_INT |_MM_FROUND_NO_EXC) // round to nearest, and suppress exceptions
+/// (_MM_FROUND_TO_NEG_INF |_MM_FROUND_NO_EXC) // round down, and suppress exceptions
+/// (_MM_FROUND_TO_POS_INF |_MM_FROUND_NO_EXC) // round up, and suppress exceptions
+/// (_MM_FROUND_TO_ZERO |_MM_FROUND_NO_EXC) // truncate, and suppress exceptions
+/// _MM_FROUND_CUR_DIRECTION
+///
+/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm512_cvt_roundph_epu64)
+#[inline]
+#[target_feature(enable = "avx512fp16")]
+#[cfg_attr(test, assert_instr(vcvtph2uqq, ROUNDING = 8))]
+#[rustc_legacy_const_generics(1)]
+#[unstable(feature = "stdarch_x86_avx512_f16", issue = "127213")]
+pub unsafe fn _mm512_cvt_roundph_epu64(a: __m128h) -> __m512i {
+ static_assert_rounding!(ROUNDING);
+ _mm512_mask_cvt_roundph_epu64::(_mm512_undefined_epi32(), 0xff, a)
+}
+
+/// Convert packed half-precision (16-bit) floating-point elements in a to packed 64-bit unsigned integers, and
+/// store the results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set).
+///
+/// Rounding is done according to the rounding parameter, which can be one of:
+///
+/// (_MM_FROUND_TO_NEAREST_INT |_MM_FROUND_NO_EXC) // round to nearest, and suppress exceptions
+/// (_MM_FROUND_TO_NEG_INF |_MM_FROUND_NO_EXC) // round down, and suppress exceptions
+/// (_MM_FROUND_TO_POS_INF |_MM_FROUND_NO_EXC) // round up, and suppress exceptions
+/// (_MM_FROUND_TO_ZERO |_MM_FROUND_NO_EXC) // truncate, and suppress exceptions
+/// _MM_FROUND_CUR_DIRECTION
+///
+/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm512_mask_cvt_roundph_epu64)
+#[inline]
+#[target_feature(enable = "avx512fp16")]
+#[cfg_attr(test, assert_instr(vcvtph2uqq, ROUNDING = 8))]
+#[rustc_legacy_const_generics(3)]
+#[unstable(feature = "stdarch_x86_avx512_f16", issue = "127213")]
+pub unsafe fn _mm512_mask_cvt_roundph_epu64(
+ src: __m512i,
+ k: __mmask8,
+ a: __m128h,
+) -> __m512i {
+ static_assert_rounding!(ROUNDING);
+ transmute(vcvtph2uqq_512(a, src.as_u64x8(), k, ROUNDING))
+}
+
+/// Convert packed half-precision (16-bit) floating-point elements in a to packed 64-bit unsigned integers, and
+/// store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
+///
+/// Rounding is done according to the rounding parameter, which can be one of:
+///
+/// (_MM_FROUND_TO_NEAREST_INT |_MM_FROUND_NO_EXC) // round to nearest, and suppress exceptions
+/// (_MM_FROUND_TO_NEG_INF |_MM_FROUND_NO_EXC) // round down, and suppress exceptions
+/// (_MM_FROUND_TO_POS_INF |_MM_FROUND_NO_EXC) // round up, and suppress exceptions
+/// (_MM_FROUND_TO_ZERO |_MM_FROUND_NO_EXC) // truncate, and suppress exceptions
+/// _MM_FROUND_CUR_DIRECTION
+///
+/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm512_maskz_cvt_roundph_epu64)
+#[inline]
+#[target_feature(enable = "avx512fp16")]
+#[cfg_attr(test, assert_instr(vcvtph2uqq, ROUNDING = 8))]
+#[rustc_legacy_const_generics(2)]
+#[unstable(feature = "stdarch_x86_avx512_f16", issue = "127213")]
+pub unsafe fn _mm512_maskz_cvt_roundph_epu64(
+ k: __mmask8,
+ a: __m128h,
+) -> __m512i {
+ static_assert_rounding!(ROUNDING);
+ _mm512_mask_cvt_roundph_epu64::(_mm512_setzero_si512(), k, a)
+}
+
+/// Convert packed half-precision (16-bit) floating-point elements in a to packed 64-bit integers with truncation, and
+/// store the results in dst.
+///
+/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_cvttph_epi64)
+#[inline]
+#[target_feature(enable = "avx512fp16,avx512vl")]
+#[cfg_attr(test, assert_instr(vcvttph2qq))]
+#[unstable(feature = "stdarch_x86_avx512_f16", issue = "127213")]
+pub unsafe fn _mm_cvttph_epi64(a: __m128h) -> __m128i {
+ _mm_mask_cvttph_epi64(_mm_undefined_si128(), 0xff, a)
+}
+
+/// Convert packed half-precision (16-bit) floating-point elements in a to packed 64-bit integers with truncation, and
+/// store the results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set).
+///
+/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_mask_cvttph_epi64)
+#[inline]
+#[target_feature(enable = "avx512fp16,avx512vl")]
+#[cfg_attr(test, assert_instr(vcvttph2qq))]
+#[unstable(feature = "stdarch_x86_avx512_f16", issue = "127213")]
+pub unsafe fn _mm_mask_cvttph_epi64(src: __m128i, k: __mmask8, a: __m128h) -> __m128i {
+ transmute(vcvttph2qq_128(a, src.as_i64x2(), k))
+}
+
+/// Convert packed half-precision (16-bit) floating-point elements in a to packed 64-bit integers with truncation, and
+/// store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
+///
+/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_maskz_cvttph_epi64)
+#[inline]
+#[target_feature(enable = "avx512fp16,avx512vl")]
+#[cfg_attr(test, assert_instr(vcvttph2qq))]
+#[unstable(feature = "stdarch_x86_avx512_f16", issue = "127213")]
+pub unsafe fn _mm_maskz_cvttph_epi64(k: __mmask8, a: __m128h) -> __m128i {
+ _mm_mask_cvttph_epi64(_mm_setzero_si128(), k, a)
+}
+
+/// Convert packed half-precision (16-bit) floating-point elements in a to packed 64-bit integers with truncation, and
+/// store the results in dst.
+///
+/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm256_cvttph_epi64)
+#[inline]
+#[target_feature(enable = "avx512fp16,avx512vl")]
+#[cfg_attr(test, assert_instr(vcvttph2qq))]
+#[unstable(feature = "stdarch_x86_avx512_f16", issue = "127213")]
+pub unsafe fn _mm256_cvttph_epi64(a: __m128h) -> __m256i {
+ _mm256_mask_cvttph_epi64(_mm256_undefined_si256(), 0xff, a)
+}
+
+/// Convert packed half-precision (16-bit) floating-point elements in a to packed 64-bit integers with truncation, and
+/// store the results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set).
+///
+/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm256_mask_cvttph_epi64)
+#[inline]
+#[target_feature(enable = "avx512fp16,avx512vl")]
+#[cfg_attr(test, assert_instr(vcvttph2qq))]
+#[unstable(feature = "stdarch_x86_avx512_f16", issue = "127213")]
+pub unsafe fn _mm256_mask_cvttph_epi64(src: __m256i, k: __mmask8, a: __m128h) -> __m256i {
+ transmute(vcvttph2qq_256(a, src.as_i64x4(), k))
+}
+
+/// Convert packed half-precision (16-bit) floating-point elements in a to packed 64-bit integers with truncation, and
+/// store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
+///
+/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm256_maskz_cvttph_epi64)
+#[inline]
+#[target_feature(enable = "avx512fp16,avx512vl")]
+#[cfg_attr(test, assert_instr(vcvttph2qq))]
+#[unstable(feature = "stdarch_x86_avx512_f16", issue = "127213")]
+pub unsafe fn _mm256_maskz_cvttph_epi64(k: __mmask8, a: __m128h) -> __m256i {
+ _mm256_mask_cvttph_epi64(_mm256_setzero_si256(), k, a)
+}
+
+/// Convert packed half-precision (16-bit) floating-point elements in a to packed 64-bit integers with truncation, and
+/// store the results in dst.
+///
+/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm512_cvttph_epi64)
+#[inline]
+#[target_feature(enable = "avx512fp16")]
+#[cfg_attr(test, assert_instr(vcvttph2qq))]
+#[unstable(feature = "stdarch_x86_avx512_f16", issue = "127213")]
+pub unsafe fn _mm512_cvttph_epi64(a: __m128h) -> __m512i {
+ _mm512_mask_cvttph_epi64(_mm512_undefined_epi32(), 0xff, a)
+}
+
+/// Convert packed half-precision (16-bit) floating-point elements in a to packed 64-bit integers with truncation, and
+/// store the results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set).
+///
+/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm512_mask_cvttph_epi64)
+#[inline]
+#[target_feature(enable = "avx512fp16")]
+#[cfg_attr(test, assert_instr(vcvttph2qq))]
+#[unstable(feature = "stdarch_x86_avx512_f16", issue = "127213")]
+pub unsafe fn _mm512_mask_cvttph_epi64(src: __m512i, k: __mmask8, a: __m128h) -> __m512i {
+ transmute(vcvttph2qq_512(
+ a,
+ src.as_i64x8(),
+ k,
+ _MM_FROUND_CUR_DIRECTION,
+ ))
+}
+
+/// Convert packed half-precision (16-bit) floating-point elements in a to packed 64-bit integers with truncation, and
+/// store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
+///
+/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm512_maskz_cvttph_epi64)
+#[inline]
+#[target_feature(enable = "avx512fp16")]
+#[cfg_attr(test, assert_instr(vcvttph2qq))]
+#[unstable(feature = "stdarch_x86_avx512_f16", issue = "127213")]
+pub unsafe fn _mm512_maskz_cvttph_epi64(k: __mmask8, a: __m128h) -> __m512i {
+ _mm512_mask_cvttph_epi64(_mm512_setzero_si512(), k, a)
+}
+
+/// Convert packed half-precision (16-bit) floating-point elements in a to packed 64-bit integers with truncation, and
+/// store the results in dst.
+///
+/// Exceptions can be suppressed by passing _MM_FROUND_NO_EXC in the sae parameter.
+///
+/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm512_cvtt_roundph_epi64)
+#[inline]
+#[target_feature(enable = "avx512fp16")]
+#[cfg_attr(test, assert_instr(vcvttph2qq, SAE = 8))]
+#[rustc_legacy_const_generics(1)]
+#[unstable(feature = "stdarch_x86_avx512_f16", issue = "127213")]
+pub unsafe fn _mm512_cvtt_roundph_epi64(a: __m128h) -> __m512i {
+ static_assert_sae!(SAE);
+ _mm512_mask_cvtt_roundph_epi64::(_mm512_undefined_epi32(), 0xff, a)
+}
+
+/// Convert packed half-precision (16-bit) floating-point elements in a to packed 64-bit integers with truncation, and
+/// store the results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set).
+///
+/// Exceptions can be suppressed by passing _MM_FROUND_NO_EXC in the sae parameter.
+///
+/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm512_mask_cvtt_roundph_epi64)
+#[inline]
+#[target_feature(enable = "avx512fp16")]
+#[cfg_attr(test, assert_instr(vcvttph2qq, SAE = 8))]
+#[rustc_legacy_const_generics(3)]
+#[unstable(feature = "stdarch_x86_avx512_f16", issue = "127213")]
+pub unsafe fn _mm512_mask_cvtt_roundph_epi64(
+ src: __m512i,
+ k: __mmask8,
+ a: __m128h,
+) -> __m512i {
+ static_assert_sae!(SAE);
+ transmute(vcvttph2qq_512(a, src.as_i64x8(), k, SAE))
+}
+
+/// Convert packed half-precision (16-bit) floating-point elements in a to packed 64-bit integers with truncation, and
+/// store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
+///
+/// Exceptions can be suppressed by passing _MM_FROUND_NO_EXC in the sae parameter.
+///
+/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm512_maskz_cvtt_roundph_epi64)
+#[inline]
+#[target_feature(enable = "avx512fp16")]
+#[cfg_attr(test, assert_instr(vcvttph2qq, SAE = 8))]
+#[rustc_legacy_const_generics(2)]
+#[unstable(feature = "stdarch_x86_avx512_f16", issue = "127213")]
+pub unsafe fn _mm512_maskz_cvtt_roundph_epi64(k: __mmask8, a: __m128h) -> __m512i {
+ static_assert_sae!(SAE);
+ _mm512_mask_cvtt_roundph_epi64::(_mm512_setzero_si512(), k, a)
+}
+
+/// Convert packed half-precision (16-bit) floating-point elements in a to packed 64-bit unsigned integers with truncation, and
+/// store the results in dst.
+///
+/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_cvttph_epu64)
+#[inline]
+#[target_feature(enable = "avx512fp16,avx512vl")]
+#[cfg_attr(test, assert_instr(vcvttph2uqq))]
+#[unstable(feature = "stdarch_x86_avx512_f16", issue = "127213")]
+pub unsafe fn _mm_cvttph_epu64(a: __m128h) -> __m128i {
+ _mm_mask_cvttph_epu64(_mm_undefined_si128(), 0xff, a)
+}
+
+/// Convert packed half-precision (16-bit) floating-point elements in a to packed 64-bit unsigned integers with truncation, and
+/// store the results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set).
+///
+/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_mask_cvttph_epu64)
+#[inline]
+#[target_feature(enable = "avx512fp16,avx512vl")]
+#[cfg_attr(test, assert_instr(vcvttph2uqq))]
+#[unstable(feature = "stdarch_x86_avx512_f16", issue = "127213")]
+pub unsafe fn _mm_mask_cvttph_epu64(src: __m128i, k: __mmask8, a: __m128h) -> __m128i {
+ transmute(vcvttph2uqq_128(a, src.as_u64x2(), k))
+}
+
+/// Convert packed half-precision (16-bit) floating-point elements in a to packed 64-bit unsigned integers with truncation, and
+/// store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
+///
+/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_maskz_cvttph_epu64)
+#[inline]
+#[target_feature(enable = "avx512fp16,avx512vl")]
+#[cfg_attr(test, assert_instr(vcvttph2uqq))]
+#[unstable(feature = "stdarch_x86_avx512_f16", issue = "127213")]
+pub unsafe fn _mm_maskz_cvttph_epu64(k: __mmask8, a: __m128h) -> __m128i {
+ _mm_mask_cvttph_epu64(_mm_setzero_si128(), k, a)
+}
+
+/// Convert packed half-precision (16-bit) floating-point elements in a to packed 64-bit unsigned integers with truncation, and
+/// store the results in dst.
+///
+/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm256_cvttph_epu64)
+#[inline]
+#[target_feature(enable = "avx512fp16,avx512vl")]
+#[cfg_attr(test, assert_instr(vcvttph2uqq))]
+#[unstable(feature = "stdarch_x86_avx512_f16", issue = "127213")]
+pub unsafe fn _mm256_cvttph_epu64(a: __m128h) -> __m256i {
+ _mm256_mask_cvttph_epu64(_mm256_undefined_si256(), 0xff, a)
+}
+
+/// Convert packed half-precision (16-bit) floating-point elements in a to packed 64-bit unsigned integers with truncation, and
+/// store the results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set).
+///
+/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm256_mask_cvttph_epu64)
+#[inline]
+#[target_feature(enable = "avx512fp16,avx512vl")]
+#[cfg_attr(test, assert_instr(vcvttph2uqq))]
+#[unstable(feature = "stdarch_x86_avx512_f16", issue = "127213")]
+pub unsafe fn _mm256_mask_cvttph_epu64(src: __m256i, k: __mmask8, a: __m128h) -> __m256i {
+ transmute(vcvttph2uqq_256(a, src.as_u64x4(), k))
+}
+
+/// Convert packed half-precision (16-bit) floating-point elements in a to packed 64-bit unsigned integers with truncation, and
+/// store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
+///
+/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm256_maskz_cvttph_epu64)
+#[inline]
+#[target_feature(enable = "avx512fp16,avx512vl")]
+#[cfg_attr(test, assert_instr(vcvttph2uqq))]
+#[unstable(feature = "stdarch_x86_avx512_f16", issue = "127213")]
+pub unsafe fn _mm256_maskz_cvttph_epu64(k: __mmask8, a: __m128h) -> __m256i {
+ _mm256_mask_cvttph_epu64(_mm256_setzero_si256(), k, a)
+}
+
+/// Convert packed half-precision (16-bit) floating-point elements in a to packed 64-bit unsigned integers with truncation, and
+/// store the results in dst.
+///
+/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm512_cvttph_epu64)
+#[inline]
+#[target_feature(enable = "avx512fp16")]
+#[cfg_attr(test, assert_instr(vcvttph2uqq))]
+#[unstable(feature = "stdarch_x86_avx512_f16", issue = "127213")]
+pub unsafe fn _mm512_cvttph_epu64(a: __m128h) -> __m512i {
+ _mm512_mask_cvttph_epu64(_mm512_undefined_epi32(), 0xff, a)
+}
+
+/// Convert packed half-precision (16-bit) floating-point elements in a to packed 64-bit unsigned integers with truncation, and
+/// store the results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set).
+///
+/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm512_mask_cvttph_epu64)
+#[inline]
+#[target_feature(enable = "avx512fp16")]
+#[cfg_attr(test, assert_instr(vcvttph2uqq))]
+#[unstable(feature = "stdarch_x86_avx512_f16", issue = "127213")]
+pub unsafe fn _mm512_mask_cvttph_epu64(src: __m512i, k: __mmask8, a: __m128h) -> __m512i {
+ transmute(vcvttph2uqq_512(
+ a,
+ src.as_u64x8(),
+ k,
+ _MM_FROUND_CUR_DIRECTION,
+ ))
+}
+
+/// Convert packed half-precision (16-bit) floating-point elements in a to packed 64-bit unsigned integers with truncation, and
+/// store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
+///
+/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm512_maskz_cvttph_epu64)
+#[inline]
+#[target_feature(enable = "avx512fp16")]
+#[cfg_attr(test, assert_instr(vcvttph2uqq))]
+#[unstable(feature = "stdarch_x86_avx512_f16", issue = "127213")]
+pub unsafe fn _mm512_maskz_cvttph_epu64(k: __mmask8, a: __m128h) -> __m512i {
+ _mm512_mask_cvttph_epu64(_mm512_setzero_si512(), k, a)
+}
+
+/// Convert packed half-precision (16-bit) floating-point elements in a to packed 64-bit unsigned integers with truncation, and
+/// store the results in dst.
+///
+/// Exceptions can be suppressed by passing _MM_FROUND_NO_EXC in the sae parameter.
+///
+/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm512_cvtt_roundph_epu64)
+#[inline]
+#[target_feature(enable = "avx512fp16")]
+#[cfg_attr(test, assert_instr(vcvttph2uqq, SAE = 8))]
+#[rustc_legacy_const_generics(1)]
+#[unstable(feature = "stdarch_x86_avx512_f16", issue = "127213")]
+pub unsafe fn _mm512_cvtt_roundph_epu64(a: __m128h) -> __m512i {
+ static_assert_sae!(SAE);
+ _mm512_mask_cvtt_roundph_epu64::(_mm512_undefined_epi32(), 0xff, a)
+}
+
+/// Convert packed half-precision (16-bit) floating-point elements in a to packed 64-bit unsigned integers with truncation, and
+/// store the results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set).
+///
+/// Exceptions can be suppressed by passing _MM_FROUND_NO_EXC in the sae parameter.
+///
+/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm512_mask_cvtt_roundph_epu64)
+#[inline]
+#[target_feature(enable = "avx512fp16")]
+#[cfg_attr(test, assert_instr(vcvttph2uqq, SAE = 8))]
+#[rustc_legacy_const_generics(3)]
+#[unstable(feature = "stdarch_x86_avx512_f16", issue = "127213")]
+pub unsafe fn _mm512_mask_cvtt_roundph_epu64(
+ src: __m512i,
+ k: __mmask8,
+ a: __m128h,
+) -> __m512i {
+ static_assert_sae!(SAE);
+ transmute(vcvttph2uqq_512(a, src.as_u64x8(), k, SAE))
+}
+
+/// Convert packed half-precision (16-bit) floating-point elements in a to packed 64-bit unsigned integers with truncation, and
+/// store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
+///
+/// Exceptions can be suppressed by passing _MM_FROUND_NO_EXC in the sae parameter.
+///
+/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm512_maskz_cvtt_roundph_epu64)
+#[inline]
+#[target_feature(enable = "avx512fp16")]
+#[cfg_attr(test, assert_instr(vcvttph2uqq, SAE = 8))]
+#[rustc_legacy_const_generics(2)]
+#[unstable(feature = "stdarch_x86_avx512_f16", issue = "127213")]
+pub unsafe fn _mm512_maskz_cvtt_roundph_epu64(k: __mmask8, a: __m128h) -> __m512i {
+ static_assert_sae!(SAE);
+ _mm512_mask_cvtt_roundph_epu64::(_mm512_setzero_si512(), k, a)
+}
+
+/// Convert packed half-precision (16-bit) floating-point elements in a to packed single-precision (32-bit)
+/// floating-point elements, and store the results in dst.
+///
+/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_cvtxph_ps)
+#[inline]
+#[target_feature(enable = "avx512fp16,avx512vl")]
+#[cfg_attr(test, assert_instr(vcvtph2psx))]
+#[unstable(feature = "stdarch_x86_avx512_f16", issue = "127213")]
+pub unsafe fn _mm_cvtxph_ps(a: __m128h) -> __m128 {
+ _mm_mask_cvtxph_ps(_mm_setzero_ps(), 0xff, a)
+}
+
+/// Convert packed half-precision (16-bit) floating-point elements in a to packed single-precision (32-bit)
+/// floating-point elements, and store the results in dst using writemask k (elements are copied from src to
+/// dst when the corresponding mask bit is not set).
+///
+/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_mask_cvtxph_ps)
+#[inline]
+#[target_feature(enable = "avx512fp16,avx512vl")]
+#[cfg_attr(test, assert_instr(vcvtph2psx))]
+#[unstable(feature = "stdarch_x86_avx512_f16", issue = "127213")]
+pub unsafe fn _mm_mask_cvtxph_ps(src: __m128, k: __mmask8, a: __m128h) -> __m128 {
+ vcvtph2psx_128(a, src, k)
+}
+
+/// Convert packed half-precision (16-bit) floating-point elements in a to packed single-precision (32-bit)
+/// floating-point elements, and store the results in dst using zeromask k (elements are zeroed out when the
+/// corresponding mask bit is not set).
+///
+/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_maskz_cvtxph_ps)
+#[inline]
+#[target_feature(enable = "avx512fp16,avx512vl")]
+#[cfg_attr(test, assert_instr(vcvtph2psx))]
+#[unstable(feature = "stdarch_x86_avx512_f16", issue = "127213")]
+pub unsafe fn _mm_maskz_cvtxph_ps(k: __mmask8, a: __m128h) -> __m128 {
+ _mm_mask_cvtxph_ps(_mm_setzero_ps(), k, a)
+}
+
+/// Convert packed half-precision (16-bit) floating-point elements in a to packed single-precision (32-bit)
+/// floating-point elements, and store the results in dst.
+///
+/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm256_cvtxph_ps)
+#[inline]
+#[target_feature(enable = "avx512fp16,avx512vl")]
+#[cfg_attr(test, assert_instr(vcvtph2psx))]
+#[unstable(feature = "stdarch_x86_avx512_f16", issue = "127213")]
+pub unsafe fn _mm256_cvtxph_ps(a: __m128h) -> __m256 {
+ _mm256_mask_cvtxph_ps(_mm256_setzero_ps(), 0xff, a)
+}
+
+/// Convert packed half-precision (16-bit) floating-point elements in a to packed single-precision (32-bit)
+/// floating-point elements, and store the results in dst using writemask k (elements are copied from src to
+/// dst when the corresponding mask bit is not set).
+///
+/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm256_mask_cvtxph_ps)
+#[inline]
+#[target_feature(enable = "avx512fp16,avx512vl")]
+#[cfg_attr(test, assert_instr(vcvtph2psx))]
+#[unstable(feature = "stdarch_x86_avx512_f16", issue = "127213")]
+pub unsafe fn _mm256_mask_cvtxph_ps(src: __m256, k: __mmask8, a: __m128h) -> __m256 {
+ vcvtph2psx_256(a, src, k)
+}
+
+/// Convert packed half-precision (16-bit) floating-point elements in a to packed single-precision (32-bit)
+/// floating-point elements, and store the results in dst using zeromask k (elements are zeroed out when the
+/// corresponding mask bit is not set).
+///
+/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm256_maskz_cvtxph_ps)
+#[inline]
+#[target_feature(enable = "avx512fp16,avx512vl")]
+#[cfg_attr(test, assert_instr(vcvtph2psx))]
+#[unstable(feature = "stdarch_x86_avx512_f16", issue = "127213")]
+pub unsafe fn _mm256_maskz_cvtxph_ps(k: __mmask8, a: __m128h) -> __m256 {
+ _mm256_mask_cvtxph_ps(_mm256_setzero_ps(), k, a)
+}
+
+/// Convert packed half-precision (16-bit) floating-point elements in a to packed single-precision (32-bit)
+/// floating-point elements, and store the results in dst.
+///
+/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm512_cvtxph_ps)
+#[inline]
+#[target_feature(enable = "avx512fp16")]
+#[cfg_attr(test, assert_instr(vcvtph2psx))]
+#[unstable(feature = "stdarch_x86_avx512_f16", issue = "127213")]
+pub unsafe fn _mm512_cvtxph_ps(a: __m256h) -> __m512 {
+ _mm512_mask_cvtxph_ps(_mm512_setzero_ps(), 0xffff, a)
+}
+
+/// Convert packed half-precision (16-bit) floating-point elements in a to packed single-precision (32-bit)
+/// floating-point elements, and store the results in dst using writemask k (elements are copied from src to
+/// dst when the corresponding mask bit is not set).
+///
+/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm512_mask_cvtxph_ps)
+#[inline]
+#[target_feature(enable = "avx512fp16")]
+#[cfg_attr(test, assert_instr(vcvtph2psx))]
+#[unstable(feature = "stdarch_x86_avx512_f16", issue = "127213")]
+pub unsafe fn _mm512_mask_cvtxph_ps(src: __m512, k: __mmask16, a: __m256h) -> __m512 {
+ vcvtph2psx_512(a, src, k, _MM_FROUND_CUR_DIRECTION)
+}
+
+/// Convert packed half-precision (16-bit) floating-point elements in a to packed single-precision (32-bit)
+/// floating-point elements, and store the results in dst using zeromask k (elements are zeroed out when the
+/// corresponding mask bit is not set).
+///
+/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm512_maskz_cvtxph_ps)
+#[inline]
+#[target_feature(enable = "avx512fp16")]
+#[cfg_attr(test, assert_instr(vcvtph2psx))]
+#[unstable(feature = "stdarch_x86_avx512_f16", issue = "127213")]
+pub unsafe fn _mm512_maskz_cvtxph_ps(k: __mmask16, a: __m256h) -> __m512 {
+ _mm512_mask_cvtxph_ps(_mm512_setzero_ps(), k, a)
+}
+
+/// Convert packed half-precision (16-bit) floating-point elements in a to packed single-precision (32-bit)
+/// floating-point elements, and store the results in dst.
+///
+/// Exceptions can be suppressed by passing _MM_FROUND_NO_EXC in the sae parameter.
+///
+/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm512_cvtx_roundph_ps)
+#[inline]
+#[target_feature(enable = "avx512fp16")]
+#[cfg_attr(test, assert_instr(vcvtph2psx, SAE = 8))]
+#[rustc_legacy_const_generics(1)]
+#[unstable(feature = "stdarch_x86_avx512_f16", issue = "127213")]
+pub unsafe fn _mm512_cvtx_roundph_ps(a: __m256h) -> __m512 {
+ static_assert_sae!(SAE);
+ _mm512_mask_cvtx_roundph_ps::(_mm512_setzero_ps(), 0xffff, a)
+}
+
+/// Convert packed half-precision (16-bit) floating-point elements in a to packed single-precision (32-bit)
+/// floating-point elements, and store the results in dst using writemask k (elements are copied from src to
+/// dst when the corresponding mask bit is not set).
+///
+/// Exceptions can be suppressed by passing _MM_FROUND_NO_EXC in the sae parameter.
+///
+/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm512_mask_cvtx_roundph_ps)
+#[inline]
+#[target_feature(enable = "avx512fp16")]
+#[cfg_attr(test, assert_instr(vcvtph2psx, SAE = 8))]
+#[rustc_legacy_const_generics(3)]
+#[unstable(feature = "stdarch_x86_avx512_f16", issue = "127213")]
+pub unsafe fn _mm512_mask_cvtx_roundph_ps(
+ src: __m512,
+ k: __mmask16,
+ a: __m256h,
+) -> __m512 {
+ static_assert_sae!(SAE);
+ vcvtph2psx_512(a, src, k, SAE)
+}
+
+/// Convert packed half-precision (16-bit) floating-point elements in a to packed single-precision (32-bit)
+/// floating-point elements, and store the results in dst using zeromask k (elements are zeroed out when the
+/// corresponding mask bit is not set).
+///
+/// Exceptions can be suppressed by passing _MM_FROUND_NO_EXC in the sae parameter.
+///
+/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm512_maskz_cvtx_roundph_ps)
+#[inline]
+#[target_feature(enable = "avx512fp16")]
+#[cfg_attr(test, assert_instr(vcvtph2psx, SAE = 8))]
+#[rustc_legacy_const_generics(2)]
+#[unstable(feature = "stdarch_x86_avx512_f16", issue = "127213")]
+pub unsafe fn _mm512_maskz_cvtx_roundph_ps(k: __mmask16, a: __m256h) -> __m512 {
+ static_assert_sae!(SAE);
+ _mm512_mask_cvtx_roundph_ps::(_mm512_setzero_ps(), k, a)
+}
+
+/// Convert the lower half-precision (16-bit) floating-point element in b to a single-precision (32-bit)
+/// floating-point element, store the result in the lower element of dst, and copy the upper 3 packed
+/// elements from a to the upper elements of dst.
+///
+/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_cvtsh_ss)
+#[inline]
+#[target_feature(enable = "avx512fp16")]
+#[cfg_attr(test, assert_instr(vcvtsh2ss))]
+#[unstable(feature = "stdarch_x86_avx512_f16", issue = "127213")]
+pub unsafe fn _mm_cvtsh_ss(a: __m128, b: __m128h) -> __m128 {
+ _mm_mask_cvtsh_ss(a, 0xff, a, b)
+}
+
+/// Convert the lower half-precision (16-bit) floating-point element in b to a single-precision (32-bit)
+/// floating-point element, store the result in the lower element of dst using writemask k (the element is
+/// copied from src to dst when mask bit 0 is not set), and copy the upper 3 packed elements from a to the
+/// upper elements of dst.
+///
+/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_mask_cvtsh_ss)
+#[inline]
+#[target_feature(enable = "avx512fp16")]
+#[cfg_attr(test, assert_instr(vcvtsh2ss))]
+#[unstable(feature = "stdarch_x86_avx512_f16", issue = "127213")]
+pub unsafe fn _mm_mask_cvtsh_ss(src: __m128, k: __mmask8, a: __m128, b: __m128h) -> __m128 {
+ vcvtsh2ss(a, b, src, k, _MM_FROUND_CUR_DIRECTION)
+}
+
+/// Convert the lower half-precision (16-bit) floating-point element in b to a single-precision (32-bit)
+/// floating-point element, store the result in the lower element of dst using zeromask k (the element is
+/// zeroed out when mask bit 0 is not set), and copy the upper 3 packed elements from a to the upper elements
+/// of dst.
+///
+/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_maskz_cvtsh_ss)
+#[inline]
+#[target_feature(enable = "avx512fp16")]
+#[cfg_attr(test, assert_instr(vcvtsh2ss))]
+#[unstable(feature = "stdarch_x86_avx512_f16", issue = "127213")]
+pub unsafe fn _mm_maskz_cvtsh_ss(k: __mmask8, a: __m128, b: __m128h) -> __m128 {
+ _mm_mask_cvtsh_ss(_mm_setzero_ps(), k, a, b)
+}
+
+/// Convert the lower half-precision (16-bit) floating-point element in b to a single-precision (32-bit)
+/// floating-point element, store the result in the lower element of dst, and copy the upper 3 packed elements
+/// from a to the upper elements of dst.
+///
+/// Exceptions can be suppressed by passing _MM_FROUND_NO_EXC in the sae parameter.
+///
+/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_cvt_roundsh_ss)
+#[inline]
+#[target_feature(enable = "avx512fp16")]
+#[cfg_attr(test, assert_instr(vcvtsh2ss, SAE = 8))]
+#[rustc_legacy_const_generics(2)]
+#[unstable(feature = "stdarch_x86_avx512_f16", issue = "127213")]
+pub unsafe fn _mm_cvt_roundsh_ss(a: __m128, b: __m128h) -> __m128 {
+ static_assert_sae!(SAE);
+ _mm_mask_cvt_roundsh_ss::(_mm_undefined_ps(), 0xff, a, b)
+}
+
+/// Convert the lower half-precision (16-bit) floating-point element in b to a single-precision (32-bit)
+/// floating-point element, store the result in the lower element of dst using writemask k (the element is
+/// copied from src to dst when mask bit 0 is not set), and copy the upper 3 packed elements from a to the
+/// upper elements of dst.
+///
+/// Exceptions can be suppressed by passing _MM_FROUND_NO_EXC in the sae parameter.
+///
+/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_mask_cvt_roundsh_ss)
+#[inline]
+#[target_feature(enable = "avx512fp16")]
+#[cfg_attr(test, assert_instr(vcvtsh2ss, SAE = 8))]
+#[rustc_legacy_const_generics(4)]
+#[unstable(feature = "stdarch_x86_avx512_f16", issue = "127213")]
+pub unsafe fn _mm_mask_cvt_roundsh_ss(
+ src: __m128,
+ k: __mmask8,
+ a: __m128,
+ b: __m128h,
+) -> __m128 {
+ static_assert_sae!(SAE);
+ vcvtsh2ss(a, b, src, k, SAE)
+}
+
+/// Convert the lower half-precision (16-bit) floating-point element in b to a single-precision (32-bit)
+/// floating-point element, store the result in the lower element of dst using zeromask k (the element is
+/// zeroed out when mask bit 0 is not set), and copy the upper 3 packed elements from a to the upper elements
+/// of dst.
+///
+/// Exceptions can be suppressed by passing _MM_FROUND_NO_EXC in the sae parameter.
+///
+/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_maskz_cvt_roundsh_ss)
+#[inline]
+#[target_feature(enable = "avx512fp16")]
+#[cfg_attr(test, assert_instr(vcvtsh2ss, SAE = 8))]
+#[rustc_legacy_const_generics(3)]
+#[unstable(feature = "stdarch_x86_avx512_f16", issue = "127213")]
+pub unsafe fn _mm_maskz_cvt_roundsh_ss(
+ k: __mmask8,
+ a: __m128,
+ b: __m128h,
+) -> __m128 {
+ static_assert_sae!(SAE);
+ _mm_mask_cvt_roundsh_ss::(_mm_setzero_ps(), k, a, b)
+}
+
+/// Convert packed half-precision (16-bit) floating-point elements in a to packed double-precision (64-bit)
+/// floating-point elements, and store the results in dst.
+///
+/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_cvtph_pd)
+#[inline]
+#[target_feature(enable = "avx512fp16,avx512vl")]
+#[cfg_attr(test, assert_instr(vcvtph2pd))]
+#[unstable(feature = "stdarch_x86_avx512_f16", issue = "127213")]
+pub unsafe fn _mm_cvtph_pd(a: __m128h) -> __m128d {
+ _mm_mask_cvtph_pd(_mm_setzero_pd(), 0xff, a)
+}
+
+/// Convert packed half-precision (16-bit) floating-point elements in a to packed double-precision (64-bit)
+/// floating-point elements, and store the results in dst using writemask k (elements are copied from src to
+/// dst when the corresponding mask bit is not set).
+///
+/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_mask_cvtph_pd)
+#[inline]
+#[target_feature(enable = "avx512fp16,avx512vl")]
+#[cfg_attr(test, assert_instr(vcvtph2pd))]
+#[unstable(feature = "stdarch_x86_avx512_f16", issue = "127213")]
+pub unsafe fn _mm_mask_cvtph_pd(src: __m128d, k: __mmask8, a: __m128h) -> __m128d {
+ vcvtph2pd_128(a, src, k)
+}
+
+/// Convert packed half-precision (16-bit) floating-point elements in a to packed double-precision (64-bit)
+/// floating-point elements, and store the results in dst using zeromask k (elements are zeroed out when the
+/// corresponding mask bit is not set).
+///
+/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_maskz_cvtph_pd)
+#[inline]
+#[target_feature(enable = "avx512fp16,avx512vl")]
+#[cfg_attr(test, assert_instr(vcvtph2pd))]
+#[unstable(feature = "stdarch_x86_avx512_f16", issue = "127213")]
+pub unsafe fn _mm_maskz_cvtph_pd(k: __mmask8, a: __m128h) -> __m128d {
+ _mm_mask_cvtph_pd(_mm_setzero_pd(), k, a)
+}
+
+/// Convert packed half-precision (16-bit) floating-point elements in a to packed double-precision (64-bit)
+/// floating-point elements, and store the results in dst.
+///
+/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm256_cvtph_pd)
+#[inline]
+#[target_feature(enable = "avx512fp16,avx512vl")]
+#[cfg_attr(test, assert_instr(vcvtph2pd))]
+#[unstable(feature = "stdarch_x86_avx512_f16", issue = "127213")]
+pub unsafe fn _mm256_cvtph_pd(a: __m128h) -> __m256d {
+ _mm256_mask_cvtph_pd(_mm256_setzero_pd(), 0xff, a)
+}
+
+/// Convert packed half-precision (16-bit) floating-point elements in a to packed double-precision (64-bit)
+/// floating-point elements, and store the results in dst using writemask k (elements are copied from src to
+/// dst when the corresponding mask bit is not set).
+///
+/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm256_mask_cvtph_pd)
+#[inline]
+#[target_feature(enable = "avx512fp16,avx512vl")]
+#[cfg_attr(test, assert_instr(vcvtph2pd))]
+#[unstable(feature = "stdarch_x86_avx512_f16", issue = "127213")]
+pub unsafe fn _mm256_mask_cvtph_pd(src: __m256d, k: __mmask8, a: __m128h) -> __m256d {
+ vcvtph2pd_256(a, src, k)
+}
+
+/// Convert packed half-precision (16-bit) floating-point elements in a to packed double-precision (64-bit)
+/// floating-point elements, and store the results in dst using zeromask k (elements are zeroed out when the
+/// corresponding mask bit is not set).
+///
+/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm256_maskz_cvtph_pd)
+#[inline]
+#[target_feature(enable = "avx512fp16,avx512vl")]
+#[cfg_attr(test, assert_instr(vcvtph2pd))]
+#[unstable(feature = "stdarch_x86_avx512_f16", issue = "127213")]
+pub unsafe fn _mm256_maskz_cvtph_pd(k: __mmask8, a: __m128h) -> __m256d {
+ _mm256_mask_cvtph_pd(_mm256_setzero_pd(), k, a)
+}
+
+/// Convert packed half-precision (16-bit) floating-point elements in a to packed double-precision (64-bit)
+/// floating-point elements, and store the results in dst.
+///
+/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm512_cvtph_pd)
+#[inline]
+#[target_feature(enable = "avx512fp16")]
+#[cfg_attr(test, assert_instr(vcvtph2pd))]
+#[unstable(feature = "stdarch_x86_avx512_f16", issue = "127213")]
+pub unsafe fn _mm512_cvtph_pd(a: __m128h) -> __m512d {
+ _mm512_mask_cvtph_pd(_mm512_setzero_pd(), 0xff, a)
+}
+
+/// Convert packed half-precision (16-bit) floating-point elements in a to packed double-precision (64-bit)
+/// floating-point elements, and store the results in dst using writemask k (elements are copied from src to
+/// dst when the corresponding mask bit is not set).
+///
+/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm512_mask_cvtph_pd)
+#[inline]
+#[target_feature(enable = "avx512fp16")]
+#[cfg_attr(test, assert_instr(vcvtph2pd))]
+#[unstable(feature = "stdarch_x86_avx512_f16", issue = "127213")]
+pub unsafe fn _mm512_mask_cvtph_pd(src: __m512d, k: __mmask8, a: __m128h) -> __m512d {
+ vcvtph2pd_512(a, src, k, _MM_FROUND_CUR_DIRECTION)
+}
+
+/// Convert packed half-precision (16-bit) floating-point elements in a to packed double-precision (64-bit)
+/// floating-point elements, and store the results in dst using zeromask k (elements are zeroed out when the
+/// corresponding mask bit is not set).
+///
+/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm512_maskz_cvtph_pd)
+#[inline]
+#[target_feature(enable = "avx512fp16")]
+#[cfg_attr(test, assert_instr(vcvtph2pd))]
+#[unstable(feature = "stdarch_x86_avx512_f16", issue = "127213")]
+pub unsafe fn _mm512_maskz_cvtph_pd(k: __mmask8, a: __m128h) -> __m512d {
+ _mm512_mask_cvtph_pd(_mm512_setzero_pd(), k, a)
+}
+
+/// Convert packed half-precision (16-bit) floating-point elements in a to packed double-precision (64-bit)
+/// floating-point elements, and store the results in dst.
+///
+/// Exceptions can be suppressed by passing _MM_FROUND_NO_EXC in the sae parameter.
+///
+/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm512_cvt_roundph_pd)
+#[inline]
+#[target_feature(enable = "avx512fp16")]
+#[cfg_attr(test, assert_instr(vcvtph2pd, SAE = 8))]
+#[rustc_legacy_const_generics(1)]
+#[unstable(feature = "stdarch_x86_avx512_f16", issue = "127213")]
+pub unsafe fn _mm512_cvt_roundph_pd(a: __m128h) -> __m512d {
+ static_assert_sae!(SAE);
+ _mm512_mask_cvt_roundph_pd::(_mm512_setzero_pd(), 0xff, a)
+}
+
+/// Convert packed half-precision (16-bit) floating-point elements in a to packed double-precision (64-bit)
+/// floating-point elements, and store the results in dst using writemask k (elements are copied from src to
+/// dst when the corresponding mask bit is not set).
+///
+/// Exceptions can be suppressed by passing _MM_FROUND_NO_EXC in the sae parameter.
+///
+/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm512_mask_cvt_roundph_pd)
+#[inline]
+#[target_feature(enable = "avx512fp16")]
+#[cfg_attr(test, assert_instr(vcvtph2pd, SAE = 8))]
+#[rustc_legacy_const_generics(3)]
+#[unstable(feature = "stdarch_x86_avx512_f16", issue = "127213")]
+pub unsafe fn _mm512_mask_cvt_roundph_pd(
+ src: __m512d,
+ k: __mmask8,
+ a: __m128h,
+) -> __m512d {
+ static_assert_sae!(SAE);
+ vcvtph2pd_512(a, src, k, SAE)
+}
+
+/// Convert packed half-precision (16-bit) floating-point elements in a to packed double-precision (64-bit)
+/// floating-point elements, and store the results in dst using zeromask k (elements are zeroed out when the
+/// corresponding mask bit is not set).
+///
+/// Exceptions can be suppressed by passing _MM_FROUND_NO_EXC in the sae parameter.
+///
+/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm512_maskz_cvt_roundph_pd)
+#[inline]
+#[target_feature(enable = "avx512fp16")]
+#[cfg_attr(test, assert_instr(vcvtph2pd, SAE = 8))]
+#[rustc_legacy_const_generics(2)]
+#[unstable(feature = "stdarch_x86_avx512_f16", issue = "127213")]
+pub unsafe fn _mm512_maskz_cvt_roundph_pd(k: __mmask8, a: __m128h) -> __m512d {
+ static_assert_sae!(SAE);
+ _mm512_mask_cvt_roundph_pd::(_mm512_setzero_pd(), k, a)
+}
+
+/// Convert the lower half-precision (16-bit) floating-point element in b to a double-precision (64-bit)
+/// floating-point element, store the result in the lower element of dst, and copy the upper element
+/// from a to the upper element of dst.
+///
+/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_cvtsh_sd)
+#[inline]
+#[target_feature(enable = "avx512fp16")]
+#[cfg_attr(test, assert_instr(vcvtsh2sd))]
+#[unstable(feature = "stdarch_x86_avx512_f16", issue = "127213")]
+pub unsafe fn _mm_cvtsh_sd(a: __m128d, b: __m128h) -> __m128d {
+ _mm_mask_cvtsh_sd(a, 0xff, a, b)
+}
+
+/// Convert the lower half-precision (16-bit) floating-point element in b to a double-precision (64-bit)
+/// floating-point element, store the result in the lower element of dst using writemask k (the element is
+/// copied from src to dst when mask bit 0 is not set), and copy the upper element from a to the upper element
+/// of dst.
+///
+/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_mask_cvtsh_sd)
+#[inline]
+#[target_feature(enable = "avx512fp16")]
+#[cfg_attr(test, assert_instr(vcvtsh2sd))]
+#[unstable(feature = "stdarch_x86_avx512_f16", issue = "127213")]
+pub unsafe fn _mm_mask_cvtsh_sd(src: __m128d, k: __mmask8, a: __m128d, b: __m128h) -> __m128d {
+ vcvtsh2sd(a, b, src, k, _MM_FROUND_CUR_DIRECTION)
+}
+
+/// Convert the lower half-precision (16-bit) floating-point element in b to a double-precision (64-bit)
+/// floating-point element, store the result in the lower element of dst using zeromask k (the element is
+/// zeroed out when mask bit 0 is not set), and copy the upper element from a to the upper element of dst.
+///
+/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_maskz_cvtsh_sd)
+#[inline]
+#[target_feature(enable = "avx512fp16")]
+#[cfg_attr(test, assert_instr(vcvtsh2sd))]
+#[unstable(feature = "stdarch_x86_avx512_f16", issue = "127213")]
+pub unsafe fn _mm_maskz_cvtsh_sd(k: __mmask8, a: __m128d, b: __m128h) -> __m128d {
+ _mm_mask_cvtsh_sd(_mm_setzero_pd(), k, a, b)
+}
+
+/// Convert the lower half-precision (16-bit) floating-point element in b to a double-precision (64-bit)
+/// floating-point element, store the result in the lower element of dst, and copy the upper element from a
+/// to the upper element of dst.
+///
+/// Exceptions can be suppressed by passing _MM_FROUND_NO_EXC in the sae parameter.
+///
+/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_cvt_roundsh_sd)
+#[inline]
+#[target_feature(enable = "avx512fp16")]
+#[cfg_attr(test, assert_instr(vcvtsh2sd, SAE = 8))]
+#[rustc_legacy_const_generics(2)]
+#[unstable(feature = "stdarch_x86_avx512_f16", issue = "127213")]
+pub unsafe fn _mm_cvt_roundsh_sd(a: __m128d, b: __m128h) -> __m128d {
+ static_assert_sae!(SAE);
+ _mm_mask_cvt_roundsh_sd::(a, 0xff, a, b)
+}
+
+/// Convert the lower half-precision (16-bit) floating-point element in b to a double-precision (64-bit)
+/// floating-point element, store the result in the lower element of dst using writemask k (the element is
+/// copied from src to dst when mask bit 0 is not set), and copy the upper element from a to the upper element
+/// of dst.
+///
+/// Exceptions can be suppressed by passing _MM_FROUND_NO_EXC in the sae parameter.
+///
+/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_mask_cvt_roundsh_sd)
+#[inline]
+#[target_feature(enable = "avx512fp16")]
+#[cfg_attr(test, assert_instr(vcvtsh2sd, SAE = 8))]
+#[rustc_legacy_const_generics(4)]
+#[unstable(feature = "stdarch_x86_avx512_f16", issue = "127213")]
+pub unsafe fn _mm_mask_cvt_roundsh_sd(
+ src: __m128d,
+ k: __mmask8,
+ a: __m128d,
+ b: __m128h,
+) -> __m128d {
+ static_assert_sae!(SAE);
+ vcvtsh2sd(a, b, src, k, SAE)
+}
+
+/// Convert the lower half-precision (16-bit) floating-point element in b to a double-precision (64-bit)
+/// floating-point element, store the result in the lower element of dst using zeromask k (the element is
+/// zeroed out when mask bit 0 is not set), and copy the upper element from a to the upper element of dst.
+///
+/// Exceptions can be suppressed by passing _MM_FROUND_NO_EXC in the sae parameter.
+///
+/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_maskz_cvt_roundsh_sd)
+#[inline]
+#[target_feature(enable = "avx512fp16")]
+#[cfg_attr(test, assert_instr(vcvtsh2sd, SAE = 8))]
+#[rustc_legacy_const_generics(3)]
+#[unstable(feature = "stdarch_x86_avx512_f16", issue = "127213")]
+pub unsafe fn _mm_maskz_cvt_roundsh_sd(
+ k: __mmask8,
+ a: __m128d,
+ b: __m128h,
+) -> __m128d {
+ static_assert_sae!(SAE);
+ _mm_mask_cvt_roundsh_sd::(_mm_setzero_pd(), k, a, b)
+}
+
#[allow(improper_ctypes)]
extern "C" {
#[link_name = "llvm.x86.avx512fp16.mask.cmp.sh"]
@@ -13337,6 +16131,110 @@ extern "C" {
#[link_name = "llvm.x86.avx512fp16.mask.vcvtsd2sh.round"]
fn vcvtsd2sh(a: __m128h, b: __m128d, src: __m128h, k: __mmask8, rounding: i32) -> __m128h;
+ #[link_name = "llvm.x86.avx512fp16.mask.vcvtph2w.128"]
+ fn vcvtph2w_128(a: __m128h, src: i16x8, k: __mmask8) -> i16x8;
+ #[link_name = "llvm.x86.avx512fp16.mask.vcvtph2w.256"]
+ fn vcvtph2w_256(a: __m256h, src: i16x16, k: __mmask16) -> i16x16;
+ #[link_name = "llvm.x86.avx512fp16.mask.vcvtph2w.512"]
+ fn vcvtph2w_512(a: __m512h, src: i16x32, k: __mmask32, rounding: i32) -> i16x32;
+ #[link_name = "llvm.x86.avx512fp16.mask.vcvtph2uw.128"]
+ fn vcvtph2uw_128(a: __m128h, src: u16x8, k: __mmask8) -> u16x8;
+ #[link_name = "llvm.x86.avx512fp16.mask.vcvtph2uw.256"]
+ fn vcvtph2uw_256(a: __m256h, src: u16x16, k: __mmask16) -> u16x16;
+ #[link_name = "llvm.x86.avx512fp16.mask.vcvtph2uw.512"]
+ fn vcvtph2uw_512(a: __m512h, src: u16x32, k: __mmask32, rounding: i32) -> u16x32;
+
+ #[link_name = "llvm.x86.avx512fp16.mask.vcvttph2w.128"]
+ fn vcvttph2w_128(a: __m128h, src: i16x8, k: __mmask8) -> i16x8;
+ #[link_name = "llvm.x86.avx512fp16.mask.vcvttph2w.256"]
+ fn vcvttph2w_256(a: __m256h, src: i16x16, k: __mmask16) -> i16x16;
+ #[link_name = "llvm.x86.avx512fp16.mask.vcvttph2w.512"]
+ fn vcvttph2w_512(a: __m512h, src: i16x32, k: __mmask32, sae: i32) -> i16x32;
+ #[link_name = "llvm.x86.avx512fp16.mask.vcvttph2uw.128"]
+ fn vcvttph2uw_128(a: __m128h, src: u16x8, k: __mmask8) -> u16x8;
+ #[link_name = "llvm.x86.avx512fp16.mask.vcvttph2uw.256"]
+ fn vcvttph2uw_256(a: __m256h, src: u16x16, k: __mmask16) -> u16x16;
+ #[link_name = "llvm.x86.avx512fp16.mask.vcvttph2uw.512"]
+ fn vcvttph2uw_512(a: __m512h, src: u16x32, k: __mmask32, sae: i32) -> u16x32;
+
+ #[link_name = "llvm.x86.avx512fp16.mask.vcvtph2dq.128"]
+ fn vcvtph2dq_128(a: __m128h, src: i32x4, k: __mmask8) -> i32x4;
+ #[link_name = "llvm.x86.avx512fp16.mask.vcvtph2dq.256"]
+ fn vcvtph2dq_256(a: __m128h, src: i32x8, k: __mmask8) -> i32x8;
+ #[link_name = "llvm.x86.avx512fp16.mask.vcvtph2dq.512"]
+ fn vcvtph2dq_512(a: __m256h, src: i32x16, k: __mmask16, rounding: i32) -> i32x16;
+ #[link_name = "llvm.x86.avx512fp16.vcvtsh2si32"]
+ fn vcvtsh2si32(a: __m128h, rounding: i32) -> i32;
+ #[link_name = "llvm.x86.avx512fp16.mask.vcvtph2udq.128"]
+ fn vcvtph2udq_128(a: __m128h, src: u32x4, k: __mmask8) -> u32x4;
+ #[link_name = "llvm.x86.avx512fp16.mask.vcvtph2udq.256"]
+ fn vcvtph2udq_256(a: __m128h, src: u32x8, k: __mmask8) -> u32x8;
+ #[link_name = "llvm.x86.avx512fp16.mask.vcvtph2udq.512"]
+ fn vcvtph2udq_512(a: __m256h, src: u32x16, k: __mmask16, rounding: i32) -> u32x16;
+ #[link_name = "llvm.x86.avx512fp16.vcvtsh2usi32"]
+ fn vcvtsh2usi32(a: __m128h, sae: i32) -> u32;
+
+ #[link_name = "llvm.x86.avx512fp16.mask.vcvttph2dq.128"]
+ fn vcvttph2dq_128(a: __m128h, src: i32x4, k: __mmask8) -> i32x4;
+ #[link_name = "llvm.x86.avx512fp16.mask.vcvttph2dq.256"]
+ fn vcvttph2dq_256(a: __m128h, src: i32x8, k: __mmask8) -> i32x8;
+ #[link_name = "llvm.x86.avx512fp16.mask.vcvttph2dq.512"]
+ fn vcvttph2dq_512(a: __m256h, src: i32x16, k: __mmask16, sae: i32) -> i32x16;
+ #[link_name = "llvm.x86.avx512fp16.vcvttsh2si32"]
+ fn vcvttsh2si32(a: __m128h, sae: i32) -> i32;
+ #[link_name = "llvm.x86.avx512fp16.mask.vcvttph2udq.128"]
+ fn vcvttph2udq_128(a: __m128h, src: u32x4, k: __mmask8) -> u32x4;
+ #[link_name = "llvm.x86.avx512fp16.mask.vcvttph2udq.256"]
+ fn vcvttph2udq_256(a: __m128h, src: u32x8, k: __mmask8) -> u32x8;
+ #[link_name = "llvm.x86.avx512fp16.mask.vcvttph2udq.512"]
+ fn vcvttph2udq_512(a: __m256h, src: u32x16, k: __mmask16, sae: i32) -> u32x16;
+ #[link_name = "llvm.x86.avx512fp16.vcvttsh2usi32"]
+ fn vcvttsh2usi32(a: __m128h, sae: i32) -> u32;
+
+ #[link_name = "llvm.x86.avx512fp16.mask.vcvtph2qq.128"]
+ fn vcvtph2qq_128(a: __m128h, src: i64x2, k: __mmask8) -> i64x2;
+ #[link_name = "llvm.x86.avx512fp16.mask.vcvtph2qq.256"]
+ fn vcvtph2qq_256(a: __m128h, src: i64x4, k: __mmask8) -> i64x4;
+ #[link_name = "llvm.x86.avx512fp16.mask.vcvtph2qq.512"]
+ fn vcvtph2qq_512(a: __m128h, src: i64x8, k: __mmask8, rounding: i32) -> i64x8;
+ #[link_name = "llvm.x86.avx512fp16.mask.vcvtph2uqq.128"]
+ fn vcvtph2uqq_128(a: __m128h, src: u64x2, k: __mmask8) -> u64x2;
+ #[link_name = "llvm.x86.avx512fp16.mask.vcvtph2uqq.256"]
+ fn vcvtph2uqq_256(a: __m128h, src: u64x4, k: __mmask8) -> u64x4;
+ #[link_name = "llvm.x86.avx512fp16.mask.vcvtph2uqq.512"]
+ fn vcvtph2uqq_512(a: __m128h, src: u64x8, k: __mmask8, rounding: i32) -> u64x8;
+
+ #[link_name = "llvm.x86.avx512fp16.mask.vcvttph2qq.128"]
+ fn vcvttph2qq_128(a: __m128h, src: i64x2, k: __mmask8) -> i64x2;
+ #[link_name = "llvm.x86.avx512fp16.mask.vcvttph2qq.256"]
+ fn vcvttph2qq_256(a: __m128h, src: i64x4, k: __mmask8) -> i64x4;
+ #[link_name = "llvm.x86.avx512fp16.mask.vcvttph2qq.512"]
+ fn vcvttph2qq_512(a: __m128h, src: i64x8, k: __mmask8, sae: i32) -> i64x8;
+ #[link_name = "llvm.x86.avx512fp16.mask.vcvttph2uqq.128"]
+ fn vcvttph2uqq_128(a: __m128h, src: u64x2, k: __mmask8) -> u64x2;
+ #[link_name = "llvm.x86.avx512fp16.mask.vcvttph2uqq.256"]
+ fn vcvttph2uqq_256(a: __m128h, src: u64x4, k: __mmask8) -> u64x4;
+ #[link_name = "llvm.x86.avx512fp16.mask.vcvttph2uqq.512"]
+ fn vcvttph2uqq_512(a: __m128h, src: u64x8, k: __mmask8, sae: i32) -> u64x8;
+
+ #[link_name = "llvm.x86.avx512fp16.mask.vcvtph2psx.128"]
+ fn vcvtph2psx_128(a: __m128h, src: __m128, k: __mmask8) -> __m128;
+ #[link_name = "llvm.x86.avx512fp16.mask.vcvtph2psx.256"]
+ fn vcvtph2psx_256(a: __m128h, src: __m256, k: __mmask8) -> __m256;
+ #[link_name = "llvm.x86.avx512fp16.mask.vcvtph2psx.512"]
+ fn vcvtph2psx_512(a: __m256h, src: __m512, k: __mmask16, sae: i32) -> __m512;
+ #[link_name = "llvm.x86.avx512fp16.mask.vcvtsh2ss.round"]
+ fn vcvtsh2ss(a: __m128, b: __m128h, src: __m128, k: __mmask8, sae: i32) -> __m128;
+
+ #[link_name = "llvm.x86.avx512fp16.mask.vcvtph2pd.128"]
+ fn vcvtph2pd_128(a: __m128h, src: __m128d, k: __mmask8) -> __m128d;
+ #[link_name = "llvm.x86.avx512fp16.mask.vcvtph2pd.256"]
+ fn vcvtph2pd_256(a: __m128h, src: __m256d, k: __mmask8) -> __m256d;
+ #[link_name = "llvm.x86.avx512fp16.mask.vcvtph2pd.512"]
+ fn vcvtph2pd_512(a: __m128h, src: __m512d, k: __mmask8, sae: i32) -> __m512d;
+ #[link_name = "llvm.x86.avx512fp16.mask.vcvtsh2sd.round"]
+ fn vcvtsh2sd(a: __m128d, b: __m128h, src: __m128d, k: __mmask8, sae: i32) -> __m128d;
+
}
#[cfg(test)]
@@ -21940,4 +24838,1966 @@ mod tests {
let e = _mm_setr_ph(1.0, 11., 12., 13., 14., 15., 16., 17.);
assert_eq_m128h(r, e);
}
+
+ #[simd_test(enable = "avx512fp16,avx512vl")]
+ unsafe fn test_mm_cvtph_epi16() {
+ let a = _mm_set_ph(1.0, 2.0, 3.0, 4.0, 5.0, 6.0, 7.0, 8.0);
+ let r = _mm_cvttph_epi16(a);
+ let e = _mm_set_epi16(1, 2, 3, 4, 5, 6, 7, 8);
+ assert_eq_m128i(r, e);
+ }
+
+ #[simd_test(enable = "avx512fp16,avx512vl")]
+ unsafe fn test_mm_mask_cvtph_epi16() {
+ let a = _mm_set_ph(1.0, 2.0, 3.0, 4.0, 5.0, 6.0, 7.0, 8.0);
+ let src = _mm_set_epi16(10, 11, 12, 13, 14, 15, 16, 17);
+ let r = _mm_mask_cvttph_epi16(src, 0b01010101, a);
+ let e = _mm_set_epi16(10, 2, 12, 4, 14, 6, 16, 8);
+ assert_eq_m128i(r, e);
+ }
+
+ #[simd_test(enable = "avx512fp16,avx512vl")]
+ unsafe fn test_mm_maskz_cvtph_epi16() {
+ let a = _mm_set_ph(1.0, 2.0, 3.0, 4.0, 5.0, 6.0, 7.0, 8.0);
+ let r = _mm_maskz_cvttph_epi16(0b01010101, a);
+ let e = _mm_set_epi16(0, 2, 0, 4, 0, 6, 0, 8);
+ assert_eq_m128i(r, e);
+ }
+
+ #[simd_test(enable = "avx512fp16,avx512vl")]
+ unsafe fn test_mm256_cvtph_epi16() {
+ let a = _mm256_set_ph(
+ 1.0, 2.0, 3.0, 4.0, 5.0, 6.0, 7.0, 8.0, 9.0, 10.0, 11.0, 12.0, 13.0, 14.0, 15.0, 16.0,
+ );
+ let r = _mm256_cvttph_epi16(a);
+ let e = _mm256_set_epi16(1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16);
+ assert_eq_m256i(r, e);
+ }
+
+ #[simd_test(enable = "avx512fp16,avx512vl")]
+ unsafe fn test_mm256_mask_cvtph_epi16() {
+ let a = _mm256_set_ph(
+ 1.0, 2.0, 3.0, 4.0, 5.0, 6.0, 7.0, 8.0, 9.0, 10.0, 11.0, 12.0, 13.0, 14.0, 15.0, 16.0,
+ );
+ let src = _mm256_set_epi16(
+ 10, 11, 12, 13, 14, 15, 16, 17, 18, 19, 20, 21, 22, 23, 24, 25,
+ );
+ let r = _mm256_mask_cvttph_epi16(src, 0b0101010101010101, a);
+ let e = _mm256_set_epi16(10, 2, 12, 4, 14, 6, 16, 8, 18, 10, 20, 12, 22, 14, 24, 16);
+ assert_eq_m256i(r, e);
+ }
+
+ #[simd_test(enable = "avx512fp16,avx512vl")]
+ unsafe fn test_mm256_maskz_cvtph_epi16() {
+ let a = _mm256_set_ph(
+ 1.0, 2.0, 3.0, 4.0, 5.0, 6.0, 7.0, 8.0, 9.0, 10.0, 11.0, 12.0, 13.0, 14.0, 15.0, 16.0,
+ );
+ let r = _mm256_maskz_cvttph_epi16(0b0101010101010101, a);
+ let e = _mm256_set_epi16(0, 2, 0, 4, 0, 6, 0, 8, 0, 10, 0, 12, 0, 14, 0, 16);
+ assert_eq_m256i(r, e);
+ }
+
+ #[simd_test(enable = "avx512fp16")]
+ unsafe fn test_mm512_cvtph_epi16() {
+ let a = _mm512_set_ph(
+ 1.0, 2.0, 3.0, 4.0, 5.0, 6.0, 7.0, 8.0, 9.0, 10.0, 11.0, 12.0, 13.0, 14.0, 15.0, 16.0,
+ 17.0, 18.0, 19.0, 20.0, 21.0, 22.0, 23.0, 24.0, 25.0, 26.0, 27.0, 28.0, 29.0, 30.0,
+ 31.0, 32.0,
+ );
+ let r = _mm512_cvttph_epi16(a);
+ let e = _mm512_set_epi16(
+ 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16, 17, 18, 19, 20, 21, 22, 23, 24,
+ 25, 26, 27, 28, 29, 30, 31, 32,
+ );
+ assert_eq_m512i(r, e);
+ }
+
+ #[simd_test(enable = "avx512fp16")]
+ unsafe fn test_mm512_mask_cvtph_epi16() {
+ let a = _mm512_set_ph(
+ 1.0, 2.0, 3.0, 4.0, 5.0, 6.0, 7.0, 8.0, 9.0, 10.0, 11.0, 12.0, 13.0, 14.0, 15.0, 16.0,
+ 17.0, 18.0, 19.0, 20.0, 21.0, 22.0, 23.0, 24.0, 25.0, 26.0, 27.0, 28.0, 29.0, 30.0,
+ 31.0, 32.0,
+ );
+ let src = _mm512_set_epi16(
+ 10, 11, 12, 13, 14, 15, 16, 17, 18, 19, 20, 21, 22, 23, 24, 25, 26, 27, 28, 29, 30, 31,
+ 32, 33, 34, 35, 36, 37, 38, 39, 40, 41,
+ );
+ let r = _mm512_mask_cvttph_epi16(src, 0b01010101010101010101010101010101, a);
+ let e = _mm512_set_epi16(
+ 10, 2, 12, 4, 14, 6, 16, 8, 18, 10, 20, 12, 22, 14, 24, 16, 26, 18, 28, 20, 30, 22, 32,
+ 24, 34, 26, 36, 28, 38, 30, 40, 32,
+ );
+ assert_eq_m512i(r, e);
+ }
+
+ #[simd_test(enable = "avx512fp16")]
+ unsafe fn test_mm512_maskz_cvtph_epi16() {
+ let a = _mm512_set_ph(
+ 1.0, 2.0, 3.0, 4.0, 5.0, 6.0, 7.0, 8.0, 9.0, 10.0, 11.0, 12.0, 13.0, 14.0, 15.0, 16.0,
+ 17.0, 18.0, 19.0, 20.0, 21.0, 22.0, 23.0, 24.0, 25.0, 26.0, 27.0, 28.0, 29.0, 30.0,
+ 31.0, 32.0,
+ );
+ let r = _mm512_maskz_cvttph_epi16(0b01010101010101010101010101010101, a);
+ let e = _mm512_set_epi16(
+ 0, 2, 0, 4, 0, 6, 0, 8, 0, 10, 0, 12, 0, 14, 0, 16, 0, 18, 0, 20, 0, 22, 0, 24, 0, 26,
+ 0, 28, 0, 30, 0, 32,
+ );
+ assert_eq_m512i(r, e);
+ }
+
+ #[simd_test(enable = "avx512fp16")]
+ unsafe fn test_mm512_cvt_roundph_epi16() {
+ let a = _mm512_set_ph(
+ 1.0, 2.0, 3.0, 4.0, 5.0, 6.0, 7.0, 8.0, 9.0, 10.0, 11.0, 12.0, 13.0, 14.0, 15.0, 16.0,
+ 17.0, 18.0, 19.0, 20.0, 21.0, 22.0, 23.0, 24.0, 25.0, 26.0, 27.0, 28.0, 29.0, 30.0,
+ 31.0, 32.0,
+ );
+ let r = _mm512_cvtt_roundph_epi16::<_MM_FROUND_NO_EXC>(a);
+ let e = _mm512_set_epi16(
+ 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16, 17, 18, 19, 20, 21, 22, 23, 24,
+ 25, 26, 27, 28, 29, 30, 31, 32,
+ );
+ assert_eq_m512i(r, e);
+ }
+
+ #[simd_test(enable = "avx512fp16")]
+ unsafe fn test_mm512_mask_cvt_roundph_epi16() {
+ let a = _mm512_set_ph(
+ 1.0, 2.0, 3.0, 4.0, 5.0, 6.0, 7.0, 8.0, 9.0, 10.0, 11.0, 12.0, 13.0, 14.0, 15.0, 16.0,
+ 17.0, 18.0, 19.0, 20.0, 21.0, 22.0, 23.0, 24.0, 25.0, 26.0, 27.0, 28.0, 29.0, 30.0,
+ 31.0, 32.0,
+ );
+ let src = _mm512_set_epi16(
+ 10, 11, 12, 13, 14, 15, 16, 17, 18, 19, 20, 21, 22, 23, 24, 25, 26, 27, 28, 29, 30, 31,
+ 32, 33, 34, 35, 36, 37, 38, 39, 40, 41,
+ );
+ let r = _mm512_mask_cvtt_roundph_epi16::<_MM_FROUND_NO_EXC>(
+ src,
+ 0b01010101010101010101010101010101,
+ a,
+ );
+ let e = _mm512_set_epi16(
+ 10, 2, 12, 4, 14, 6, 16, 8, 18, 10, 20, 12, 22, 14, 24, 16, 26, 18, 28, 20, 30, 22, 32,
+ 24, 34, 26, 36, 28, 38, 30, 40, 32,
+ );
+ assert_eq_m512i(r, e);
+ }
+
+ #[simd_test(enable = "avx512fp16")]
+ unsafe fn test_mm512_maskz_cvt_roundph_epi16() {
+ let a = _mm512_set_ph(
+ 1.0, 2.0, 3.0, 4.0, 5.0, 6.0, 7.0, 8.0, 9.0, 10.0, 11.0, 12.0, 13.0, 14.0, 15.0, 16.0,
+ 17.0, 18.0, 19.0, 20.0, 21.0, 22.0, 23.0, 24.0, 25.0, 26.0, 27.0, 28.0, 29.0, 30.0,
+ 31.0, 32.0,
+ );
+ let r = _mm512_maskz_cvtt_roundph_epi16::<_MM_FROUND_NO_EXC>(
+ 0b01010101010101010101010101010101,
+ a,
+ );
+ let e = _mm512_set_epi16(
+ 0, 2, 0, 4, 0, 6, 0, 8, 0, 10, 0, 12, 0, 14, 0, 16, 0, 18, 0, 20, 0, 22, 0, 24, 0, 26,
+ 0, 28, 0, 30, 0, 32,
+ );
+ assert_eq_m512i(r, e);
+ }
+
+ #[simd_test(enable = "avx512fp16,avx512vl")]
+ unsafe fn test_mm_cvtph_epu16() {
+ let a = _mm_set_ph(1.0, 2.0, 3.0, 4.0, 5.0, 6.0, 7.0, 8.0);
+ let r = _mm_cvttph_epu16(a);
+ let e = _mm_set_epi16(1, 2, 3, 4, 5, 6, 7, 8);
+ assert_eq_m128i(r, e);
+ }
+
+ #[simd_test(enable = "avx512fp16,avx512vl")]
+ unsafe fn test_mm_mask_cvtph_epu16() {
+ let a = _mm_set_ph(1.0, 2.0, 3.0, 4.0, 5.0, 6.0, 7.0, 8.0);
+ let src = _mm_set_epi16(10, 11, 12, 13, 14, 15, 16, 17);
+ let r = _mm_mask_cvttph_epu16(src, 0b01010101, a);
+ let e = _mm_set_epi16(10, 2, 12, 4, 14, 6, 16, 8);
+ assert_eq_m128i(r, e);
+ }
+
+ #[simd_test(enable = "avx512fp16,avx512vl")]
+ unsafe fn test_mm_maskz_cvtph_epu16() {
+ let a = _mm_set_ph(1.0, 2.0, 3.0, 4.0, 5.0, 6.0, 7.0, 8.0);
+ let r = _mm_maskz_cvttph_epu16(0b01010101, a);
+ let e = _mm_set_epi16(0, 2, 0, 4, 0, 6, 0, 8);
+ assert_eq_m128i(r, e);
+ }
+
+ #[simd_test(enable = "avx512fp16,avx512vl")]
+ unsafe fn test_mm256_cvtph_epu16() {
+ let a = _mm256_set_ph(
+ 1.0, 2.0, 3.0, 4.0, 5.0, 6.0, 7.0, 8.0, 9.0, 10.0, 11.0, 12.0, 13.0, 14.0, 15.0, 16.0,
+ );
+ let r = _mm256_cvttph_epu16(a);
+ let e = _mm256_set_epi16(1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16);
+ assert_eq_m256i(r, e);
+ }
+
+ #[simd_test(enable = "avx512fp16,avx512vl")]
+ unsafe fn test_mm256_mask_cvtph_epu16() {
+ let a = _mm256_set_ph(
+ 1.0, 2.0, 3.0, 4.0, 5.0, 6.0, 7.0, 8.0, 9.0, 10.0, 11.0, 12.0, 13.0, 14.0, 15.0, 16.0,
+ );
+ let src = _mm256_set_epi16(
+ 10, 11, 12, 13, 14, 15, 16, 17, 18, 19, 20, 21, 22, 23, 24, 25,
+ );
+ let r = _mm256_mask_cvttph_epu16(src, 0b0101010101010101, a);
+ let e = _mm256_set_epi16(10, 2, 12, 4, 14, 6, 16, 8, 18, 10, 20, 12, 22, 14, 24, 16);
+ assert_eq_m256i(r, e);
+ }
+
+ #[simd_test(enable = "avx512fp16,avx512vl")]
+ unsafe fn test_mm256_maskz_cvtph_epu16() {
+ let a = _mm256_set_ph(
+ 1.0, 2.0, 3.0, 4.0, 5.0, 6.0, 7.0, 8.0, 9.0, 10.0, 11.0, 12.0, 13.0, 14.0, 15.0, 16.0,
+ );
+ let r = _mm256_maskz_cvttph_epu16(0b0101010101010101, a);
+ let e = _mm256_set_epi16(0, 2, 0, 4, 0, 6, 0, 8, 0, 10, 0, 12, 0, 14, 0, 16);
+ assert_eq_m256i(r, e);
+ }
+
+ #[simd_test(enable = "avx512fp16")]
+ unsafe fn test_mm512_cvtph_epu16() {
+ let a = _mm512_set_ph(
+ 1.0, 2.0, 3.0, 4.0, 5.0, 6.0, 7.0, 8.0, 9.0, 10.0, 11.0, 12.0, 13.0, 14.0, 15.0, 16.0,
+ 17.0, 18.0, 19.0, 20.0, 21.0, 22.0, 23.0, 24.0, 25.0, 26.0, 27.0, 28.0, 29.0, 30.0,
+ 31.0, 32.0,
+ );
+ let r = _mm512_cvttph_epu16(a);
+ let e = _mm512_set_epi16(
+ 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16, 17, 18, 19, 20, 21, 22, 23, 24,
+ 25, 26, 27, 28, 29, 30, 31, 32,
+ );
+ assert_eq_m512i(r, e);
+ }
+
+ #[simd_test(enable = "avx512fp16")]
+ unsafe fn test_mm512_mask_cvtph_epu16() {
+ let a = _mm512_set_ph(
+ 1.0, 2.0, 3.0, 4.0, 5.0, 6.0, 7.0, 8.0, 9.0, 10.0, 11.0, 12.0, 13.0, 14.0, 15.0, 16.0,
+ 17.0, 18.0, 19.0, 20.0, 21.0, 22.0, 23.0, 24.0, 25.0, 26.0, 27.0, 28.0, 29.0, 30.0,
+ 31.0, 32.0,
+ );
+ let src = _mm512_set_epi16(
+ 10, 11, 12, 13, 14, 15, 16, 17, 18, 19, 20, 21, 22, 23, 24, 25, 26, 27, 28, 29, 30, 31,
+ 32, 33, 34, 35, 36, 37, 38, 39, 40, 41,
+ );
+ let r = _mm512_mask_cvttph_epu16(src, 0b01010101010101010101010101010101, a);
+ let e = _mm512_set_epi16(
+ 10, 2, 12, 4, 14, 6, 16, 8, 18, 10, 20, 12, 22, 14, 24, 16, 26, 18, 28, 20, 30, 22, 32,
+ 24, 34, 26, 36, 28, 38, 30, 40, 32,
+ );
+ assert_eq_m512i(r, e);
+ }
+
+ #[simd_test(enable = "avx512fp16")]
+ unsafe fn test_mm512_maskz_cvtph_epu16() {
+ let a = _mm512_set_ph(
+ 1.0, 2.0, 3.0, 4.0, 5.0, 6.0, 7.0, 8.0, 9.0, 10.0, 11.0, 12.0, 13.0, 14.0, 15.0, 16.0,
+ 17.0, 18.0, 19.0, 20.0, 21.0, 22.0, 23.0, 24.0, 25.0, 26.0, 27.0, 28.0, 29.0, 30.0,
+ 31.0, 32.0,
+ );
+ let r = _mm512_maskz_cvttph_epu16(0b01010101010101010101010101010101, a);
+ let e = _mm512_set_epi16(
+ 0, 2, 0, 4, 0, 6, 0, 8, 0, 10, 0, 12, 0, 14, 0, 16, 0, 18, 0, 20, 0, 22, 0, 24, 0, 26,
+ 0, 28, 0, 30, 0, 32,
+ );
+ assert_eq_m512i(r, e);
+ }
+
+ #[simd_test(enable = "avx512fp16")]
+ unsafe fn test_mm512_cvt_roundph_epu16() {
+ let a = _mm512_set_ph(
+ 1.0, 2.0, 3.0, 4.0, 5.0, 6.0, 7.0, 8.0, 9.0, 10.0, 11.0, 12.0, 13.0, 14.0, 15.0, 16.0,
+ 17.0, 18.0, 19.0, 20.0, 21.0, 22.0, 23.0, 24.0, 25.0, 26.0, 27.0, 28.0, 29.0, 30.0,
+ 31.0, 32.0,
+ );
+ let r = _mm512_cvt_roundph_epu16::<{ _MM_FROUND_TO_NEAREST_INT | _MM_FROUND_NO_EXC }>(a);
+ let e = _mm512_set_epi16(
+ 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16, 17, 18, 19, 20, 21, 22, 23, 24,
+ 25, 26, 27, 28, 29, 30, 31, 32,
+ );
+ assert_eq_m512i(r, e);
+ }
+
+ #[simd_test(enable = "avx512fp16")]
+ unsafe fn test_mm512_mask_cvt_roundph_epu16() {
+ let a = _mm512_set_ph(
+ 1.0, 2.0, 3.0, 4.0, 5.0, 6.0, 7.0, 8.0, 9.0, 10.0, 11.0, 12.0, 13.0, 14.0, 15.0, 16.0,
+ 17.0, 18.0, 19.0, 20.0, 21.0, 22.0, 23.0, 24.0, 25.0, 26.0, 27.0, 28.0, 29.0, 30.0,
+ 31.0, 32.0,
+ );
+ let src = _mm512_set_epi16(
+ 10, 11, 12, 13, 14, 15, 16, 17, 18, 19, 20, 21, 22, 23, 24, 25, 26, 27, 28, 29, 30, 31,
+ 32, 33, 34, 35, 36, 37, 38, 39, 40, 41,
+ );
+ let r = _mm512_mask_cvt_roundph_epu16::<{ _MM_FROUND_TO_NEAREST_INT | _MM_FROUND_NO_EXC }>(
+ src,
+ 0b01010101010101010101010101010101,
+ a,
+ );
+ let e = _mm512_set_epi16(
+ 10, 2, 12, 4, 14, 6, 16, 8, 18, 10, 20, 12, 22, 14, 24, 16, 26, 18, 28, 20, 30, 22, 32,
+ 24, 34, 26, 36, 28, 38, 30, 40, 32,
+ );
+ assert_eq_m512i(r, e);
+ }
+
+ #[simd_test(enable = "avx512fp16")]
+ unsafe fn test_mm512_maskz_cvt_roundph_epu16() {
+ let a = _mm512_set_ph(
+ 1.0, 2.0, 3.0, 4.0, 5.0, 6.0, 7.0, 8.0, 9.0, 10.0, 11.0, 12.0, 13.0, 14.0, 15.0, 16.0,
+ 17.0, 18.0, 19.0, 20.0, 21.0, 22.0, 23.0, 24.0, 25.0, 26.0, 27.0, 28.0, 29.0, 30.0,
+ 31.0, 32.0,
+ );
+ let r = _mm512_maskz_cvt_roundph_epu16::<{ _MM_FROUND_TO_NEAREST_INT | _MM_FROUND_NO_EXC }>(
+ 0b01010101010101010101010101010101,
+ a,
+ );
+ let e = _mm512_set_epi16(
+ 0, 2, 0, 4, 0, 6, 0, 8, 0, 10, 0, 12, 0, 14, 0, 16, 0, 18, 0, 20, 0, 22, 0, 24, 0, 26,
+ 0, 28, 0, 30, 0, 32,
+ );
+ assert_eq_m512i(r, e);
+ }
+
+ #[simd_test(enable = "avx512fp16,avx512vl")]
+ unsafe fn test_mm_cvttph_epi16() {
+ let a = _mm_set_ph(1.0, 2.0, 3.0, 4.0, 5.0, 6.0, 7.0, 8.0);
+ let r = _mm_cvttph_epi16(a);
+ let e = _mm_set_epi16(1, 2, 3, 4, 5, 6, 7, 8);
+ assert_eq_m128i(r, e);
+ }
+
+ #[simd_test(enable = "avx512fp16,avx512vl")]
+ unsafe fn test_mm_mask_cvttph_epi16() {
+ let a = _mm_set_ph(1.0, 2.0, 3.0, 4.0, 5.0, 6.0, 7.0, 8.0);
+ let src = _mm_set_epi16(10, 11, 12, 13, 14, 15, 16, 17);
+ let r = _mm_mask_cvttph_epi16(src, 0b01010101, a);
+ let e = _mm_set_epi16(10, 2, 12, 4, 14, 6, 16, 8);
+ assert_eq_m128i(r, e);
+ }
+
+ #[simd_test(enable = "avx512fp16,avx512vl")]
+ unsafe fn test_mm_maskz_cvttph_epi16() {
+ let a = _mm_set_ph(1.0, 2.0, 3.0, 4.0, 5.0, 6.0, 7.0, 8.0);
+ let r = _mm_maskz_cvttph_epi16(0b01010101, a);
+ let e = _mm_set_epi16(0, 2, 0, 4, 0, 6, 0, 8);
+ assert_eq_m128i(r, e);
+ }
+
+ #[simd_test(enable = "avx512fp16,avx512vl")]
+ unsafe fn test_mm256_cvttph_epi16() {
+ let a = _mm256_set_ph(
+ 1.0, 2.0, 3.0, 4.0, 5.0, 6.0, 7.0, 8.0, 9.0, 10.0, 11.0, 12.0, 13.0, 14.0, 15.0, 16.0,
+ );
+ let r = _mm256_cvttph_epi16(a);
+ let e = _mm256_set_epi16(1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16);
+ assert_eq_m256i(r, e);
+ }
+
+ #[simd_test(enable = "avx512fp16,avx512vl")]
+ unsafe fn test_mm256_mask_cvttph_epi16() {
+ let a = _mm256_set_ph(
+ 1.0, 2.0, 3.0, 4.0, 5.0, 6.0, 7.0, 8.0, 9.0, 10.0, 11.0, 12.0, 13.0, 14.0, 15.0, 16.0,
+ );
+ let src = _mm256_set_epi16(
+ 10, 11, 12, 13, 14, 15, 16, 17, 18, 19, 20, 21, 22, 23, 24, 25,
+ );
+ let r = _mm256_mask_cvttph_epi16(src, 0b0101010101010101, a);
+ let e = _mm256_set_epi16(10, 2, 12, 4, 14, 6, 16, 8, 18, 10, 20, 12, 22, 14, 24, 16);
+ assert_eq_m256i(r, e);
+ }
+
+ #[simd_test(enable = "avx512fp16,avx512vl")]
+ unsafe fn test_mm256_maskz_cvttph_epi16() {
+ let a = _mm256_set_ph(
+ 1.0, 2.0, 3.0, 4.0, 5.0, 6.0, 7.0, 8.0, 9.0, 10.0, 11.0, 12.0, 13.0, 14.0, 15.0, 16.0,
+ );
+ let r = _mm256_maskz_cvttph_epi16(0b0101010101010101, a);
+ let e = _mm256_set_epi16(0, 2, 0, 4, 0, 6, 0, 8, 0, 10, 0, 12, 0, 14, 0, 16);
+ assert_eq_m256i(r, e);
+ }
+
+ #[simd_test(enable = "avx512fp16")]
+ unsafe fn test_mm512_cvttph_epi16() {
+ let a = _mm512_set_ph(
+ 1.0, 2.0, 3.0, 4.0, 5.0, 6.0, 7.0, 8.0, 9.0, 10.0, 11.0, 12.0, 13.0, 14.0, 15.0, 16.0,
+ 17.0, 18.0, 19.0, 20.0, 21.0, 22.0, 23.0, 24.0, 25.0, 26.0, 27.0, 28.0, 29.0, 30.0,
+ 31.0, 32.0,
+ );
+ let r = _mm512_cvttph_epi16(a);
+ let e = _mm512_set_epi16(
+ 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16, 17, 18, 19, 20, 21, 22, 23, 24,
+ 25, 26, 27, 28, 29, 30, 31, 32,
+ );
+ assert_eq_m512i(r, e);
+ }
+
+ #[simd_test(enable = "avx512fp16")]
+ unsafe fn test_mm512_mask_cvttph_epi16() {
+ let a = _mm512_set_ph(
+ 1.0, 2.0, 3.0, 4.0, 5.0, 6.0, 7.0, 8.0, 9.0, 10.0, 11.0, 12.0, 13.0, 14.0, 15.0, 16.0,
+ 17.0, 18.0, 19.0, 20.0, 21.0, 22.0, 23.0, 24.0, 25.0, 26.0, 27.0, 28.0, 29.0, 30.0,
+ 31.0, 32.0,
+ );
+ let src = _mm512_set_epi16(
+ 10, 11, 12, 13, 14, 15, 16, 17, 18, 19, 20, 21, 22, 23, 24, 25, 26, 27, 28, 29, 30, 31,
+ 32, 33, 34, 35, 36, 37, 38, 39, 40, 41,
+ );
+ let r = _mm512_mask_cvttph_epi16(src, 0b01010101010101010101010101010101, a);
+ let e = _mm512_set_epi16(
+ 10, 2, 12, 4, 14, 6, 16, 8, 18, 10, 20, 12, 22, 14, 24, 16, 26, 18, 28, 20, 30, 22, 32,
+ 24, 34, 26, 36, 28, 38, 30, 40, 32,
+ );
+ assert_eq_m512i(r, e);
+ }
+
+ #[simd_test(enable = "avx512fp16")]
+ unsafe fn test_mm512_maskz_cvttph_epi16() {
+ let a = _mm512_set_ph(
+ 1.0, 2.0, 3.0, 4.0, 5.0, 6.0, 7.0, 8.0, 9.0, 10.0, 11.0, 12.0, 13.0, 14.0, 15.0, 16.0,
+ 17.0, 18.0, 19.0, 20.0, 21.0, 22.0, 23.0, 24.0, 25.0, 26.0, 27.0, 28.0, 29.0, 30.0,
+ 31.0, 32.0,
+ );
+ let r = _mm512_maskz_cvttph_epi16(0b01010101010101010101010101010101, a);
+ let e = _mm512_set_epi16(
+ 0, 2, 0, 4, 0, 6, 0, 8, 0, 10, 0, 12, 0, 14, 0, 16, 0, 18, 0, 20, 0, 22, 0, 24, 0, 26,
+ 0, 28, 0, 30, 0, 32,
+ );
+ assert_eq_m512i(r, e);
+ }
+
+ #[simd_test(enable = "avx512fp16")]
+ unsafe fn test_mm512_cvtt_roundph_epi16() {
+ let a = _mm512_set_ph(
+ 1.0, 2.0, 3.0, 4.0, 5.0, 6.0, 7.0, 8.0, 9.0, 10.0, 11.0, 12.0, 13.0, 14.0, 15.0, 16.0,
+ 17.0, 18.0, 19.0, 20.0, 21.0, 22.0, 23.0, 24.0, 25.0, 26.0, 27.0, 28.0, 29.0, 30.0,
+ 31.0, 32.0,
+ );
+ let r = _mm512_cvtt_roundph_epi16::<_MM_FROUND_NO_EXC>(a);
+ let e = _mm512_set_epi16(
+ 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16, 17, 18, 19, 20, 21, 22, 23, 24,
+ 25, 26, 27, 28, 29, 30, 31, 32,
+ );
+ assert_eq_m512i(r, e);
+ }
+
+ #[simd_test(enable = "avx512fp16")]
+ unsafe fn test_mm512_mask_cvtt_roundph_epi16() {
+ let a = _mm512_set_ph(
+ 1.0, 2.0, 3.0, 4.0, 5.0, 6.0, 7.0, 8.0, 9.0, 10.0, 11.0, 12.0, 13.0, 14.0, 15.0, 16.0,
+ 17.0, 18.0, 19.0, 20.0, 21.0, 22.0, 23.0, 24.0, 25.0, 26.0, 27.0, 28.0, 29.0, 30.0,
+ 31.0, 32.0,
+ );
+ let src = _mm512_set_epi16(
+ 10, 11, 12, 13, 14, 15, 16, 17, 18, 19, 20, 21, 22, 23, 24, 25, 26, 27, 28, 29, 30, 31,
+ 32, 33, 34, 35, 36, 37, 38, 39, 40, 41,
+ );
+ let r = _mm512_mask_cvtt_roundph_epi16::<_MM_FROUND_NO_EXC>(
+ src,
+ 0b01010101010101010101010101010101,
+ a,
+ );
+ let e = _mm512_set_epi16(
+ 10, 2, 12, 4, 14, 6, 16, 8, 18, 10, 20, 12, 22, 14, 24, 16, 26, 18, 28, 20, 30, 22, 32,
+ 24, 34, 26, 36, 28, 38, 30, 40, 32,
+ );
+ assert_eq_m512i(r, e);
+ }
+
+ #[simd_test(enable = "avx512fp16")]
+ unsafe fn test_mm512_maskz_cvtt_roundph_epi16() {
+ let a = _mm512_set_ph(
+ 1.0, 2.0, 3.0, 4.0, 5.0, 6.0, 7.0, 8.0, 9.0, 10.0, 11.0, 12.0, 13.0, 14.0, 15.0, 16.0,
+ 17.0, 18.0, 19.0, 20.0, 21.0, 22.0, 23.0, 24.0, 25.0, 26.0, 27.0, 28.0, 29.0, 30.0,
+ 31.0, 32.0,
+ );
+ let r = _mm512_maskz_cvtt_roundph_epi16::<_MM_FROUND_NO_EXC>(
+ 0b01010101010101010101010101010101,
+ a,
+ );
+ let e = _mm512_set_epi16(
+ 0, 2, 0, 4, 0, 6, 0, 8, 0, 10, 0, 12, 0, 14, 0, 16, 0, 18, 0, 20, 0, 22, 0, 24, 0, 26,
+ 0, 28, 0, 30, 0, 32,
+ );
+ assert_eq_m512i(r, e);
+ }
+
+ #[simd_test(enable = "avx512fp16,avx512vl")]
+ unsafe fn test_mm_cvttph_epu16() {
+ let a = _mm_set_ph(1.0, 2.0, 3.0, 4.0, 5.0, 6.0, 7.0, 8.0);
+ let r = _mm_cvttph_epu16(a);
+ let e = _mm_set_epi16(1, 2, 3, 4, 5, 6, 7, 8);
+ assert_eq_m128i(r, e);
+ }
+
+ #[simd_test(enable = "avx512fp16,avx512vl")]
+ unsafe fn test_mm_mask_cvttph_epu16() {
+ let a = _mm_set_ph(1.0, 2.0, 3.0, 4.0, 5.0, 6.0, 7.0, 8.0);
+ let src = _mm_set_epi16(10, 11, 12, 13, 14, 15, 16, 17);
+ let r = _mm_mask_cvttph_epu16(src, 0b01010101, a);
+ let e = _mm_set_epi16(10, 2, 12, 4, 14, 6, 16, 8);
+ assert_eq_m128i(r, e);
+ }
+
+ #[simd_test(enable = "avx512fp16,avx512vl")]
+ unsafe fn test_mm_maskz_cvttph_epu16() {
+ let a = _mm_set_ph(1.0, 2.0, 3.0, 4.0, 5.0, 6.0, 7.0, 8.0);
+ let r = _mm_maskz_cvttph_epu16(0b01010101, a);
+ let e = _mm_set_epi16(0, 2, 0, 4, 0, 6, 0, 8);
+ assert_eq_m128i(r, e);
+ }
+
+ #[simd_test(enable = "avx512fp16,avx512vl")]
+ unsafe fn test_mm256_cvttph_epu16() {
+ let a = _mm256_set_ph(
+ 1.0, 2.0, 3.0, 4.0, 5.0, 6.0, 7.0, 8.0, 9.0, 10.0, 11.0, 12.0, 13.0, 14.0, 15.0, 16.0,
+ );
+ let r = _mm256_cvttph_epu16(a);
+ let e = _mm256_set_epi16(1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16);
+ assert_eq_m256i(r, e);
+ }
+
+ #[simd_test(enable = "avx512fp16,avx512vl")]
+ unsafe fn test_mm256_mask_cvttph_epu16() {
+ let a = _mm256_set_ph(
+ 1.0, 2.0, 3.0, 4.0, 5.0, 6.0, 7.0, 8.0, 9.0, 10.0, 11.0, 12.0, 13.0, 14.0, 15.0, 16.0,
+ );
+ let src = _mm256_set_epi16(
+ 10, 11, 12, 13, 14, 15, 16, 17, 18, 19, 20, 21, 22, 23, 24, 25,
+ );
+ let r = _mm256_mask_cvttph_epu16(src, 0b0101010101010101, a);
+ let e = _mm256_set_epi16(10, 2, 12, 4, 14, 6, 16, 8, 18, 10, 20, 12, 22, 14, 24, 16);
+ assert_eq_m256i(r, e);
+ }
+
+ #[simd_test(enable = "avx512fp16,avx512vl")]
+ unsafe fn test_mm256_maskz_cvttph_epu16() {
+ let a = _mm256_set_ph(
+ 1.0, 2.0, 3.0, 4.0, 5.0, 6.0, 7.0, 8.0, 9.0, 10.0, 11.0, 12.0, 13.0, 14.0, 15.0, 16.0,
+ );
+ let r = _mm256_maskz_cvttph_epu16(0b0101010101010101, a);
+ let e = _mm256_set_epi16(0, 2, 0, 4, 0, 6, 0, 8, 0, 10, 0, 12, 0, 14, 0, 16);
+ assert_eq_m256i(r, e);
+ }
+
+ #[simd_test(enable = "avx512fp16")]
+ unsafe fn test_mm512_cvttph_epu16() {
+ let a = _mm512_set_ph(
+ 1.0, 2.0, 3.0, 4.0, 5.0, 6.0, 7.0, 8.0, 9.0, 10.0, 11.0, 12.0, 13.0, 14.0, 15.0, 16.0,
+ 17.0, 18.0, 19.0, 20.0, 21.0, 22.0, 23.0, 24.0, 25.0, 26.0, 27.0, 28.0, 29.0, 30.0,
+ 31.0, 32.0,
+ );
+ let r = _mm512_cvttph_epu16(a);
+ let e = _mm512_set_epi16(
+ 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16, 17, 18, 19, 20, 21, 22, 23, 24,
+ 25, 26, 27, 28, 29, 30, 31, 32,
+ );
+ assert_eq_m512i(r, e);
+ }
+
+ #[simd_test(enable = "avx512fp16")]
+ unsafe fn test_mm512_mask_cvttph_epu16() {
+ let a = _mm512_set_ph(
+ 1.0, 2.0, 3.0, 4.0, 5.0, 6.0, 7.0, 8.0, 9.0, 10.0, 11.0, 12.0, 13.0, 14.0, 15.0, 16.0,
+ 17.0, 18.0, 19.0, 20.0, 21.0, 22.0, 23.0, 24.0, 25.0, 26.0, 27.0, 28.0, 29.0, 30.0,
+ 31.0, 32.0,
+ );
+ let src = _mm512_set_epi16(
+ 10, 11, 12, 13, 14, 15, 16, 17, 18, 19, 20, 21, 22, 23, 24, 25, 26, 27, 28, 29, 30, 31,
+ 32, 33, 34, 35, 36, 37, 38, 39, 40, 41,
+ );
+ let r = _mm512_mask_cvttph_epu16(src, 0b01010101010101010101010101010101, a);
+ let e = _mm512_set_epi16(
+ 10, 2, 12, 4, 14, 6, 16, 8, 18, 10, 20, 12, 22, 14, 24, 16, 26, 18, 28, 20, 30, 22, 32,
+ 24, 34, 26, 36, 28, 38, 30, 40, 32,
+ );
+ assert_eq_m512i(r, e);
+ }
+
+ #[simd_test(enable = "avx512fp16")]
+ unsafe fn test_mm512_maskz_cvttph_epu16() {
+ let a = _mm512_set_ph(
+ 1.0, 2.0, 3.0, 4.0, 5.0, 6.0, 7.0, 8.0, 9.0, 10.0, 11.0, 12.0, 13.0, 14.0, 15.0, 16.0,
+ 17.0, 18.0, 19.0, 20.0, 21.0, 22.0, 23.0, 24.0, 25.0, 26.0, 27.0, 28.0, 29.0, 30.0,
+ 31.0, 32.0,
+ );
+ let r = _mm512_maskz_cvttph_epu16(0b01010101010101010101010101010101, a);
+ let e = _mm512_set_epi16(
+ 0, 2, 0, 4, 0, 6, 0, 8, 0, 10, 0, 12, 0, 14, 0, 16, 0, 18, 0, 20, 0, 22, 0, 24, 0, 26,
+ 0, 28, 0, 30, 0, 32,
+ );
+ assert_eq_m512i(r, e);
+ }
+
+ #[simd_test(enable = "avx512fp16")]
+ unsafe fn test_mm512_cvtt_roundph_epu16() {
+ let a = _mm512_set_ph(
+ 1.0, 2.0, 3.0, 4.0, 5.0, 6.0, 7.0, 8.0, 9.0, 10.0, 11.0, 12.0, 13.0, 14.0, 15.0, 16.0,
+ 17.0, 18.0, 19.0, 20.0, 21.0, 22.0, 23.0, 24.0, 25.0, 26.0, 27.0, 28.0, 29.0, 30.0,
+ 31.0, 32.0,
+ );
+ let r = _mm512_cvtt_roundph_epu16::<_MM_FROUND_NO_EXC>(a);
+ let e = _mm512_set_epi16(
+ 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16, 17, 18, 19, 20, 21, 22, 23, 24,
+ 25, 26, 27, 28, 29, 30, 31, 32,
+ );
+ assert_eq_m512i(r, e);
+ }
+
+ #[simd_test(enable = "avx512fp16")]
+ unsafe fn test_mm512_mask_cvtt_roundph_epu16() {
+ let a = _mm512_set_ph(
+ 1.0, 2.0, 3.0, 4.0, 5.0, 6.0, 7.0, 8.0, 9.0, 10.0, 11.0, 12.0, 13.0, 14.0, 15.0, 16.0,
+ 17.0, 18.0, 19.0, 20.0, 21.0, 22.0, 23.0, 24.0, 25.0, 26.0, 27.0, 28.0, 29.0, 30.0,
+ 31.0, 32.0,
+ );
+ let src = _mm512_set_epi16(
+ 10, 11, 12, 13, 14, 15, 16, 17, 18, 19, 20, 21, 22, 23, 24, 25, 26, 27, 28, 29, 30, 31,
+ 32, 33, 34, 35, 36, 37, 38, 39, 40, 41,
+ );
+ let r = _mm512_mask_cvtt_roundph_epu16::<_MM_FROUND_NO_EXC>(
+ src,
+ 0b01010101010101010101010101010101,
+ a,
+ );
+ let e = _mm512_set_epi16(
+ 10, 2, 12, 4, 14, 6, 16, 8, 18, 10, 20, 12, 22, 14, 24, 16, 26, 18, 28, 20, 30, 22, 32,
+ 24, 34, 26, 36, 28, 38, 30, 40, 32,
+ );
+ assert_eq_m512i(r, e);
+ }
+
+ #[simd_test(enable = "avx512fp16")]
+ unsafe fn test_mm512_maskz_cvtt_roundph_epu16() {
+ let a = _mm512_set_ph(
+ 1.0, 2.0, 3.0, 4.0, 5.0, 6.0, 7.0, 8.0, 9.0, 10.0, 11.0, 12.0, 13.0, 14.0, 15.0, 16.0,
+ 17.0, 18.0, 19.0, 20.0, 21.0, 22.0, 23.0, 24.0, 25.0, 26.0, 27.0, 28.0, 29.0, 30.0,
+ 31.0, 32.0,
+ );
+ let r = _mm512_maskz_cvtt_roundph_epu16::<_MM_FROUND_NO_EXC>(
+ 0b01010101010101010101010101010101,
+ a,
+ );
+ let e = _mm512_set_epi16(
+ 0, 2, 0, 4, 0, 6, 0, 8, 0, 10, 0, 12, 0, 14, 0, 16, 0, 18, 0, 20, 0, 22, 0, 24, 0, 26,
+ 0, 28, 0, 30, 0, 32,
+ );
+ assert_eq_m512i(r, e);
+ }
+
+ #[simd_test(enable = "avx512fp16,avx512vl")]
+ unsafe fn test_mm_cvtph_epi32() {
+ let a = _mm_set_ph(0.0, 0.0, 0.0, 0.0, 1.0, 2.0, 3.0, 4.0);
+ let r = _mm_cvtph_epi32(a);
+ let e = _mm_set_epi32(1, 2, 3, 4);
+ assert_eq_m128i(r, e);
+ }
+
+ #[simd_test(enable = "avx512fp16,avx512vl")]
+ unsafe fn test_mm_mask_cvtph_epi32() {
+ let a = _mm_set_ph(0.0, 0.0, 0.0, 0.0, 1.0, 2.0, 3.0, 4.0);
+ let src = _mm_set_epi32(10, 11, 12, 13);
+ let r = _mm_mask_cvtph_epi32(src, 0b0101, a);
+ let e = _mm_set_epi32(10, 2, 12, 4);
+ assert_eq_m128i(r, e);
+ }
+
+ #[simd_test(enable = "avx512fp16,avx512vl")]
+ unsafe fn test_mm_maskz_cvtph_epi32() {
+ let a = _mm_set_ph(0.0, 0.0, 0.0, 0.0, 1.0, 2.0, 3.0, 4.0);
+ let r = _mm_maskz_cvtph_epi32(0b0101, a);
+ let e = _mm_set_epi32(0, 2, 0, 4);
+ assert_eq_m128i(r, e);
+ }
+
+ #[simd_test(enable = "avx512fp16,avx512vl")]
+ unsafe fn test_mm256_cvtph_epi32() {
+ let a = _mm_set_ph(1.0, 2.0, 3.0, 4.0, 5.0, 6.0, 7.0, 8.0);
+ let r = _mm256_cvtph_epi32(a);
+ let e = _mm256_set_epi32(1, 2, 3, 4, 5, 6, 7, 8);
+ assert_eq_m256i(r, e);
+ }
+
+ #[simd_test(enable = "avx512fp16,avx512vl")]
+ unsafe fn test_mm256_mask_cvtph_epi32() {
+ let a = _mm_set_ph(1.0, 2.0, 3.0, 4.0, 5.0, 6.0, 7.0, 8.0);
+ let src = _mm256_set_epi32(10, 11, 12, 13, 14, 15, 16, 17);
+ let r = _mm256_mask_cvtph_epi32(src, 0b01010101, a);
+ let e = _mm256_set_epi32(10, 2, 12, 4, 14, 6, 16, 8);
+ assert_eq_m256i(r, e);
+ }
+
+ #[simd_test(enable = "avx512fp16,avx512vl")]
+ unsafe fn test_mm256_maskz_cvtph_epi32() {
+ let a = _mm_set_ph(1.0, 2.0, 3.0, 4.0, 5.0, 6.0, 7.0, 8.0);
+ let r = _mm256_maskz_cvtph_epi32(0b01010101, a);
+ let e = _mm256_set_epi32(0, 2, 0, 4, 0, 6, 0, 8);
+ assert_eq_m256i(r, e);
+ }
+
+ #[simd_test(enable = "avx512fp16")]
+ unsafe fn test_mm512_cvtph_epi32() {
+ let a = _mm256_set_ph(
+ 1.0, 2.0, 3.0, 4.0, 5.0, 6.0, 7.0, 8.0, 9.0, 10.0, 11.0, 12.0, 13.0, 14.0, 15.0, 16.0,
+ );
+ let r = _mm512_cvtph_epi32(a);
+ let e = _mm512_set_epi32(1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16);
+ assert_eq_m512i(r, e);
+ }
+
+ #[simd_test(enable = "avx512fp16")]
+ unsafe fn test_mm512_mask_cvtph_epi32() {
+ let a = _mm256_set_ph(
+ 1.0, 2.0, 3.0, 4.0, 5.0, 6.0, 7.0, 8.0, 9.0, 10.0, 11.0, 12.0, 13.0, 14.0, 15.0, 16.0,
+ );
+ let src = _mm512_set_epi32(
+ 10, 11, 12, 13, 14, 15, 16, 17, 18, 19, 20, 21, 22, 23, 24, 25,
+ );
+ let r = _mm512_mask_cvtph_epi32(src, 0b0101010101010101, a);
+ let e = _mm512_set_epi32(10, 2, 12, 4, 14, 6, 16, 8, 18, 10, 20, 12, 22, 14, 24, 16);
+ assert_eq_m512i(r, e);
+ }
+
+ #[simd_test(enable = "avx512fp16")]
+ unsafe fn test_mm512_maskz_cvtph_epi32() {
+ let a = _mm256_set_ph(
+ 1.0, 2.0, 3.0, 4.0, 5.0, 6.0, 7.0, 8.0, 9.0, 10.0, 11.0, 12.0, 13.0, 14.0, 15.0, 16.0,
+ );
+ let r = _mm512_maskz_cvtph_epi32(0b0101010101010101, a);
+ let e = _mm512_set_epi32(0, 2, 0, 4, 0, 6, 0, 8, 0, 10, 0, 12, 0, 14, 0, 16);
+ assert_eq_m512i(r, e);
+ }
+
+ #[simd_test(enable = "avx512fp16")]
+ unsafe fn test_mm512_cvt_roundph_epi32() {
+ let a = _mm256_set_ph(
+ 1.0, 2.0, 3.0, 4.0, 5.0, 6.0, 7.0, 8.0, 9.0, 10.0, 11.0, 12.0, 13.0, 14.0, 15.0, 16.0,
+ );
+ let r = _mm512_cvt_roundph_epi32::<{ _MM_FROUND_TO_NEAREST_INT | _MM_FROUND_NO_EXC }>(a);
+ let e = _mm512_set_epi32(1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16);
+ assert_eq_m512i(r, e);
+ }
+
+ #[simd_test(enable = "avx512fp16")]
+ unsafe fn test_mm512_mask_cvt_roundph_epi32() {
+ let a = _mm256_set_ph(
+ 1.0, 2.0, 3.0, 4.0, 5.0, 6.0, 7.0, 8.0, 9.0, 10.0, 11.0, 12.0, 13.0, 14.0, 15.0, 16.0,
+ );
+ let src = _mm512_set_epi32(
+ 10, 11, 12, 13, 14, 15, 16, 17, 18, 19, 20, 21, 22, 23, 24, 25,
+ );
+ let r = _mm512_mask_cvt_roundph_epi32::<{ _MM_FROUND_TO_NEAREST_INT | _MM_FROUND_NO_EXC }>(
+ src,
+ 0b0101010101010101,
+ a,
+ );
+ let e = _mm512_set_epi32(10, 2, 12, 4, 14, 6, 16, 8, 18, 10, 20, 12, 22, 14, 24, 16);
+ assert_eq_m512i(r, e);
+ }
+
+ #[simd_test(enable = "avx512fp16")]
+ unsafe fn test_mm512_maskz_cvt_roundph_epi32() {
+ let a = _mm256_set_ph(
+ 1.0, 2.0, 3.0, 4.0, 5.0, 6.0, 7.0, 8.0, 9.0, 10.0, 11.0, 12.0, 13.0, 14.0, 15.0, 16.0,
+ );
+ let r = _mm512_maskz_cvt_roundph_epi32::<{ _MM_FROUND_TO_NEAREST_INT | _MM_FROUND_NO_EXC }>(
+ 0b0101010101010101,
+ a,
+ );
+ let e = _mm512_set_epi32(0, 2, 0, 4, 0, 6, 0, 8, 0, 10, 0, 12, 0, 14, 0, 16);
+ assert_eq_m512i(r, e);
+ }
+
+ #[simd_test(enable = "avx512fp16")]
+ unsafe fn test_mm_cvtsh_i32() {
+ let a = _mm_setr_ph(1.0, 2.0, 3.0, 4.0, 5.0, 6.0, 7.0, 8.0);
+ let r = _mm_cvtsh_i32(a);
+ assert_eq!(r, 1);
+ }
+
+ #[simd_test(enable = "avx512fp16")]
+ unsafe fn test_mm_cvt_roundsh_i32() {
+ let a = _mm_setr_ph(1.0, 2.0, 3.0, 4.0, 5.0, 6.0, 7.0, 8.0);
+ let r = _mm_cvt_roundsh_i32::<{ _MM_FROUND_TO_NEAREST_INT | _MM_FROUND_NO_EXC }>(a);
+ assert_eq!(r, 1);
+ }
+
+ #[simd_test(enable = "avx512fp16,avx512vl")]
+ unsafe fn test_mm_cvtph_epu32() {
+ let a = _mm_set_ph(0.0, 0.0, 0.0, 0.0, 1.0, 2.0, 3.0, 4.0);
+ let r = _mm_cvtph_epu32(a);
+ let e = _mm_set_epi32(1, 2, 3, 4);
+ assert_eq_m128i(r, e);
+ }
+
+ #[simd_test(enable = "avx512fp16,avx512vl")]
+ unsafe fn test_mm_mask_cvtph_epu32() {
+ let a = _mm_set_ph(0.0, 0.0, 0.0, 0.0, 1.0, 2.0, 3.0, 4.0);
+ let src = _mm_set_epi32(10, 11, 12, 13);
+ let r = _mm_mask_cvtph_epu32(src, 0b0101, a);
+ let e = _mm_set_epi32(10, 2, 12, 4);
+ assert_eq_m128i(r, e);
+ }
+
+ #[simd_test(enable = "avx512fp16,avx512vl")]
+ unsafe fn test_mm_maskz_cvtph_epu32() {
+ let a = _mm_set_ph(0.0, 0.0, 0.0, 0.0, 1.0, 2.0, 3.0, 4.0);
+ let r = _mm_maskz_cvtph_epu32(0b0101, a);
+ let e = _mm_set_epi32(0, 2, 0, 4);
+ assert_eq_m128i(r, e);
+ }
+
+ #[simd_test(enable = "avx512fp16,avx512vl")]
+ unsafe fn test_mm256_cvtph_epu32() {
+ let a = _mm_set_ph(1.0, 2.0, 3.0, 4.0, 5.0, 6.0, 7.0, 8.0);
+ let r = _mm256_cvtph_epu32(a);
+ let e = _mm256_set_epi32(1, 2, 3, 4, 5, 6, 7, 8);
+ assert_eq_m256i(r, e);
+ }
+
+ #[simd_test(enable = "avx512fp16,avx512vl")]
+ unsafe fn test_mm256_mask_cvtph_epu32() {
+ let a = _mm_set_ph(1.0, 2.0, 3.0, 4.0, 5.0, 6.0, 7.0, 8.0);
+ let src = _mm256_set_epi32(10, 11, 12, 13, 14, 15, 16, 17);
+ let r = _mm256_mask_cvtph_epu32(src, 0b01010101, a);
+ let e = _mm256_set_epi32(10, 2, 12, 4, 14, 6, 16, 8);
+ assert_eq_m256i(r, e);
+ }
+
+ #[simd_test(enable = "avx512fp16,avx512vl")]
+ unsafe fn test_mm256_maskz_cvtph_epu32() {
+ let a = _mm_set_ph(1.0, 2.0, 3.0, 4.0, 5.0, 6.0, 7.0, 8.0);
+ let r = _mm256_maskz_cvtph_epu32(0b01010101, a);
+ let e = _mm256_set_epi32(0, 2, 0, 4, 0, 6, 0, 8);
+ assert_eq_m256i(r, e);
+ }
+
+ #[simd_test(enable = "avx512fp16")]
+ unsafe fn test_mm512_cvtph_epu32() {
+ let a = _mm256_set_ph(
+ 1.0, 2.0, 3.0, 4.0, 5.0, 6.0, 7.0, 8.0, 9.0, 10.0, 11.0, 12.0, 13.0, 14.0, 15.0, 16.0,
+ );
+ let r = _mm512_cvtph_epu32(a);
+ let e = _mm512_set_epi32(1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16);
+ assert_eq_m512i(r, e);
+ }
+
+ #[simd_test(enable = "avx512fp16")]
+ unsafe fn test_mm512_mask_cvtph_epu32() {
+ let a = _mm256_set_ph(
+ 1.0, 2.0, 3.0, 4.0, 5.0, 6.0, 7.0, 8.0, 9.0, 10.0, 11.0, 12.0, 13.0, 14.0, 15.0, 16.0,
+ );
+ let src = _mm512_set_epi32(
+ 10, 11, 12, 13, 14, 15, 16, 17, 18, 19, 20, 21, 22, 23, 24, 25,
+ );
+ let r = _mm512_mask_cvtph_epu32(src, 0b0101010101010101, a);
+ let e = _mm512_set_epi32(10, 2, 12, 4, 14, 6, 16, 8, 18, 10, 20, 12, 22, 14, 24, 16);
+ assert_eq_m512i(r, e);
+ }
+
+ #[simd_test(enable = "avx512fp16")]
+ unsafe fn test_mm512_maskz_cvtph_epu32() {
+ let a = _mm256_set_ph(
+ 1.0, 2.0, 3.0, 4.0, 5.0, 6.0, 7.0, 8.0, 9.0, 10.0, 11.0, 12.0, 13.0, 14.0, 15.0, 16.0,
+ );
+ let r = _mm512_maskz_cvtph_epu32(0b0101010101010101, a);
+ let e = _mm512_set_epi32(0, 2, 0, 4, 0, 6, 0, 8, 0, 10, 0, 12, 0, 14, 0, 16);
+ assert_eq_m512i(r, e);
+ }
+
+ #[simd_test(enable = "avx512fp16")]
+ unsafe fn test_mm512_cvt_roundph_epu32() {
+ let a = _mm256_set_ph(
+ 1.0, 2.0, 3.0, 4.0, 5.0, 6.0, 7.0, 8.0, 9.0, 10.0, 11.0, 12.0, 13.0, 14.0, 15.0, 16.0,
+ );
+ let r = _mm512_cvt_roundph_epu32::<{ _MM_FROUND_TO_NEAREST_INT | _MM_FROUND_NO_EXC }>(a);
+ let e = _mm512_set_epi32(1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16);
+ assert_eq_m512i(r, e);
+ }
+
+ #[simd_test(enable = "avx512fp16")]
+ unsafe fn test_mm512_mask_cvt_roundph_epu32() {
+ let a = _mm256_set_ph(
+ 1.0, 2.0, 3.0, 4.0, 5.0, 6.0, 7.0, 8.0, 9.0, 10.0, 11.0, 12.0, 13.0, 14.0, 15.0, 16.0,
+ );
+ let src = _mm512_set_epi32(
+ 10, 11, 12, 13, 14, 15, 16, 17, 18, 19, 20, 21, 22, 23, 24, 25,
+ );
+ let r = _mm512_mask_cvt_roundph_epu32::<{ _MM_FROUND_TO_NEAREST_INT | _MM_FROUND_NO_EXC }>(
+ src,
+ 0b0101010101010101,
+ a,
+ );
+ let e = _mm512_set_epi32(10, 2, 12, 4, 14, 6, 16, 8, 18, 10, 20, 12, 22, 14, 24, 16);
+ assert_eq_m512i(r, e);
+ }
+
+ #[simd_test(enable = "avx512fp16")]
+ unsafe fn test_mm512_maskz_cvt_roundph_epu32() {
+ let a = _mm256_set_ph(
+ 1.0, 2.0, 3.0, 4.0, 5.0, 6.0, 7.0, 8.0, 9.0, 10.0, 11.0, 12.0, 13.0, 14.0, 15.0, 16.0,
+ );
+ let r = _mm512_maskz_cvt_roundph_epu32::<{ _MM_FROUND_TO_NEAREST_INT | _MM_FROUND_NO_EXC }>(
+ 0b0101010101010101,
+ a,
+ );
+ let e = _mm512_set_epi32(0, 2, 0, 4, 0, 6, 0, 8, 0, 10, 0, 12, 0, 14, 0, 16);
+ assert_eq_m512i(r, e);
+ }
+
+ #[simd_test(enable = "avx512fp16")]
+ unsafe fn test_mm_cvtsh_u32() {
+ let a = _mm_setr_ph(1.0, 2.0, 3.0, 4.0, 5.0, 6.0, 7.0, 8.0);
+ let r = _mm_cvtsh_u32(a);
+ assert_eq!(r, 1);
+ }
+
+ #[simd_test(enable = "avx512fp16")]
+ unsafe fn test_mm_cvt_roundsh_u32() {
+ let a = _mm_setr_ph(1.0, 2.0, 3.0, 4.0, 5.0, 6.0, 7.0, 8.0);
+ let r = _mm_cvt_roundsh_u32::<{ _MM_FROUND_TO_NEAREST_INT | _MM_FROUND_NO_EXC }>(a);
+ assert_eq!(r, 1);
+ }
+
+ #[simd_test(enable = "avx512fp16,avx512vl")]
+ unsafe fn test_mm_cvttph_epi32() {
+ let a = _mm_set_ph(0.0, 0.0, 0.0, 0.0, 1.0, 2.0, 3.0, 4.0);
+ let r = _mm_cvttph_epi32(a);
+ let e = _mm_set_epi32(1, 2, 3, 4);
+ assert_eq_m128i(r, e);
+ }
+
+ #[simd_test(enable = "avx512fp16,avx512vl")]
+ unsafe fn test_mm_mask_cvttph_epi32() {
+ let a = _mm_set_ph(0.0, 0.0, 0.0, 0.0, 1.0, 2.0, 3.0, 4.0);
+ let src = _mm_set_epi32(10, 11, 12, 13);
+ let r = _mm_mask_cvttph_epi32(src, 0b0101, a);
+ let e = _mm_set_epi32(10, 2, 12, 4);
+ assert_eq_m128i(r, e);
+ }
+
+ #[simd_test(enable = "avx512fp16,avx512vl")]
+ unsafe fn test_mm_maskz_cvttph_epi32() {
+ let a = _mm_set_ph(0.0, 0.0, 0.0, 0.0, 1.0, 2.0, 3.0, 4.0);
+ let r = _mm_maskz_cvttph_epi32(0b0101, a);
+ let e = _mm_set_epi32(0, 2, 0, 4);
+ assert_eq_m128i(r, e);
+ }
+
+ #[simd_test(enable = "avx512fp16,avx512vl")]
+ unsafe fn test_mm256_cvttph_epi32() {
+ let a = _mm_set_ph(1.0, 2.0, 3.0, 4.0, 5.0, 6.0, 7.0, 8.0);
+ let r = _mm256_cvttph_epi32(a);
+ let e = _mm256_set_epi32(1, 2, 3, 4, 5, 6, 7, 8);
+ assert_eq_m256i(r, e);
+ }
+
+ #[simd_test(enable = "avx512fp16,avx512vl")]
+ unsafe fn test_mm256_mask_cvttph_epi32() {
+ let a = _mm_set_ph(1.0, 2.0, 3.0, 4.0, 5.0, 6.0, 7.0, 8.0);
+ let src = _mm256_set_epi32(10, 11, 12, 13, 14, 15, 16, 17);
+ let r = _mm256_mask_cvttph_epi32(src, 0b01010101, a);
+ let e = _mm256_set_epi32(10, 2, 12, 4, 14, 6, 16, 8);
+ assert_eq_m256i(r, e);
+ }
+
+ #[simd_test(enable = "avx512fp16,avx512vl")]
+ unsafe fn test_mm256_maskz_cvttph_epi32() {
+ let a = _mm_set_ph(1.0, 2.0, 3.0, 4.0, 5.0, 6.0, 7.0, 8.0);
+ let r = _mm256_maskz_cvttph_epi32(0b01010101, a);
+ let e = _mm256_set_epi32(0, 2, 0, 4, 0, 6, 0, 8);
+ assert_eq_m256i(r, e);
+ }
+
+ #[simd_test(enable = "avx512fp16")]
+ unsafe fn test_mm512_cvttph_epi32() {
+ let a = _mm256_set_ph(
+ 1.0, 2.0, 3.0, 4.0, 5.0, 6.0, 7.0, 8.0, 9.0, 10.0, 11.0, 12.0, 13.0, 14.0, 15.0, 16.0,
+ );
+ let r = _mm512_cvttph_epi32(a);
+ let e = _mm512_set_epi32(1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16);
+ assert_eq_m512i(r, e);
+ }
+
+ #[simd_test(enable = "avx512fp16")]
+ unsafe fn test_mm512_mask_cvttph_epi32() {
+ let a = _mm256_set_ph(
+ 1.0, 2.0, 3.0, 4.0, 5.0, 6.0, 7.0, 8.0, 9.0, 10.0, 11.0, 12.0, 13.0, 14.0, 15.0, 16.0,
+ );
+ let src = _mm512_set_epi32(
+ 10, 11, 12, 13, 14, 15, 16, 17, 18, 19, 20, 21, 22, 23, 24, 25,
+ );
+ let r = _mm512_mask_cvttph_epi32(src, 0b0101010101010101, a);
+ let e = _mm512_set_epi32(10, 2, 12, 4, 14, 6, 16, 8, 18, 10, 20, 12, 22, 14, 24, 16);
+ assert_eq_m512i(r, e);
+ }
+
+ #[simd_test(enable = "avx512fp16")]
+ unsafe fn test_mm512_maskz_cvttph_epi32() {
+ let a = _mm256_set_ph(
+ 1.0, 2.0, 3.0, 4.0, 5.0, 6.0, 7.0, 8.0, 9.0, 10.0, 11.0, 12.0, 13.0, 14.0, 15.0, 16.0,
+ );
+ let r = _mm512_maskz_cvttph_epi32(0b0101010101010101, a);
+ let e = _mm512_set_epi32(0, 2, 0, 4, 0, 6, 0, 8, 0, 10, 0, 12, 0, 14, 0, 16);
+ assert_eq_m512i(r, e);
+ }
+
+ #[simd_test(enable = "avx512fp16")]
+ unsafe fn test_mm512_cvtt_roundph_epi32() {
+ let a = _mm256_set_ph(
+ 1.0, 2.0, 3.0, 4.0, 5.0, 6.0, 7.0, 8.0, 9.0, 10.0, 11.0, 12.0, 13.0, 14.0, 15.0, 16.0,
+ );
+ let r = _mm512_cvtt_roundph_epi32::<_MM_FROUND_NO_EXC>(a);
+ let e = _mm512_set_epi32(1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16);
+ assert_eq_m512i(r, e);
+ }
+
+ #[simd_test(enable = "avx512fp16")]
+ unsafe fn test_mm512_mask_cvtt_roundph_epi32() {
+ let a = _mm256_set_ph(
+ 1.0, 2.0, 3.0, 4.0, 5.0, 6.0, 7.0, 8.0, 9.0, 10.0, 11.0, 12.0, 13.0, 14.0, 15.0, 16.0,
+ );
+ let src = _mm512_set_epi32(
+ 10, 11, 12, 13, 14, 15, 16, 17, 18, 19, 20, 21, 22, 23, 24, 25,
+ );
+ let r = _mm512_mask_cvtt_roundph_epi32::<_MM_FROUND_NO_EXC>(src, 0b0101010101010101, a);
+ let e = _mm512_set_epi32(10, 2, 12, 4, 14, 6, 16, 8, 18, 10, 20, 12, 22, 14, 24, 16);
+ assert_eq_m512i(r, e);
+ }
+
+ #[simd_test(enable = "avx512fp16")]
+ unsafe fn test_mm512_maskz_cvtt_roundph_epi32() {
+ let a = _mm256_set_ph(
+ 1.0, 2.0, 3.0, 4.0, 5.0, 6.0, 7.0, 8.0, 9.0, 10.0, 11.0, 12.0, 13.0, 14.0, 15.0, 16.0,
+ );
+ let r = _mm512_maskz_cvtt_roundph_epi32::<_MM_FROUND_NO_EXC>(0b0101010101010101, a);
+ let e = _mm512_set_epi32(0, 2, 0, 4, 0, 6, 0, 8, 0, 10, 0, 12, 0, 14, 0, 16);
+ assert_eq_m512i(r, e);
+ }
+
+ #[simd_test(enable = "avx512fp16")]
+ unsafe fn test_mm_cvttsh_i32() {
+ let a = _mm_setr_ph(1.0, 2.0, 3.0, 4.0, 5.0, 6.0, 7.0, 8.0);
+ let r = _mm_cvttsh_i32(a);
+ assert_eq!(r, 1);
+ }
+
+ #[simd_test(enable = "avx512fp16")]
+ unsafe fn test_mm_cvtt_roundsh_i32() {
+ let a = _mm_setr_ph(1.0, 2.0, 3.0, 4.0, 5.0, 6.0, 7.0, 8.0);
+ let r = _mm_cvtt_roundsh_i32::<_MM_FROUND_NO_EXC>(a);
+ assert_eq!(r, 1);
+ }
+
+ #[simd_test(enable = "avx512fp16,avx512vl")]
+ unsafe fn test_mm_cvttph_epu32() {
+ let a = _mm_set_ph(0.0, 0.0, 0.0, 0.0, 1.0, 2.0, 3.0, 4.0);
+ let r = _mm_cvttph_epu32(a);
+ let e = _mm_set_epi32(1, 2, 3, 4);
+ assert_eq_m128i(r, e);
+ }
+
+ #[simd_test(enable = "avx512fp16,avx512vl")]
+ unsafe fn test_mm_mask_cvttph_epu32() {
+ let a = _mm_set_ph(0.0, 0.0, 0.0, 0.0, 1.0, 2.0, 3.0, 4.0);
+ let src = _mm_set_epi32(10, 11, 12, 13);
+ let r = _mm_mask_cvttph_epu32(src, 0b0101, a);
+ let e = _mm_set_epi32(10, 2, 12, 4);
+ assert_eq_m128i(r, e);
+ }
+
+ #[simd_test(enable = "avx512fp16,avx512vl")]
+ unsafe fn test_mm_maskz_cvttph_epu32() {
+ let a = _mm_set_ph(0.0, 0.0, 0.0, 0.0, 1.0, 2.0, 3.0, 4.0);
+ let r = _mm_maskz_cvttph_epu32(0b0101, a);
+ let e = _mm_set_epi32(0, 2, 0, 4);
+ assert_eq_m128i(r, e);
+ }
+
+ #[simd_test(enable = "avx512fp16,avx512vl")]
+ unsafe fn test_mm256_cvttph_epu32() {
+ let a = _mm_set_ph(1.0, 2.0, 3.0, 4.0, 5.0, 6.0, 7.0, 8.0);
+ let r = _mm256_cvttph_epu32(a);
+ let e = _mm256_set_epi32(1, 2, 3, 4, 5, 6, 7, 8);
+ assert_eq_m256i(r, e);
+ }
+
+ #[simd_test(enable = "avx512fp16,avx512vl")]
+ unsafe fn test_mm256_mask_cvttph_epu32() {
+ let a = _mm_set_ph(1.0, 2.0, 3.0, 4.0, 5.0, 6.0, 7.0, 8.0);
+ let src = _mm256_set_epi32(10, 11, 12, 13, 14, 15, 16, 17);
+ let r = _mm256_mask_cvttph_epu32(src, 0b01010101, a);
+ let e = _mm256_set_epi32(10, 2, 12, 4, 14, 6, 16, 8);
+ assert_eq_m256i(r, e);
+ }
+
+ #[simd_test(enable = "avx512fp16,avx512vl")]
+ unsafe fn test_mm256_maskz_cvttph_epu32() {
+ let a = _mm_set_ph(1.0, 2.0, 3.0, 4.0, 5.0, 6.0, 7.0, 8.0);
+ let r = _mm256_maskz_cvttph_epu32(0b01010101, a);
+ let e = _mm256_set_epi32(0, 2, 0, 4, 0, 6, 0, 8);
+ assert_eq_m256i(r, e);
+ }
+
+ #[simd_test(enable = "avx512fp16")]
+ unsafe fn test_mm512_cvttph_epu32() {
+ let a = _mm256_set_ph(
+ 1.0, 2.0, 3.0, 4.0, 5.0, 6.0, 7.0, 8.0, 9.0, 10.0, 11.0, 12.0, 13.0, 14.0, 15.0, 16.0,
+ );
+ let r = _mm512_cvttph_epu32(a);
+ let e = _mm512_set_epi32(1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16);
+ assert_eq_m512i(r, e);
+ }
+
+ #[simd_test(enable = "avx512fp16")]
+ unsafe fn test_mm512_mask_cvttph_epu32() {
+ let a = _mm256_set_ph(
+ 1.0, 2.0, 3.0, 4.0, 5.0, 6.0, 7.0, 8.0, 9.0, 10.0, 11.0, 12.0, 13.0, 14.0, 15.0, 16.0,
+ );
+ let src = _mm512_set_epi32(
+ 10, 11, 12, 13, 14, 15, 16, 17, 18, 19, 20, 21, 22, 23, 24, 25,
+ );
+ let r = _mm512_mask_cvttph_epu32(src, 0b0101010101010101, a);
+ let e = _mm512_set_epi32(10, 2, 12, 4, 14, 6, 16, 8, 18, 10, 20, 12, 22, 14, 24, 16);
+ assert_eq_m512i(r, e);
+ }
+
+ #[simd_test(enable = "avx512fp16")]
+ unsafe fn test_mm512_maskz_cvttph_epu32() {
+ let a = _mm256_set_ph(
+ 1.0, 2.0, 3.0, 4.0, 5.0, 6.0, 7.0, 8.0, 9.0, 10.0, 11.0, 12.0, 13.0, 14.0, 15.0, 16.0,
+ );
+ let r = _mm512_maskz_cvttph_epu32(0b0101010101010101, a);
+ let e = _mm512_set_epi32(0, 2, 0, 4, 0, 6, 0, 8, 0, 10, 0, 12, 0, 14, 0, 16);
+ assert_eq_m512i(r, e);
+ }
+
+ #[simd_test(enable = "avx512fp16")]
+ unsafe fn test_mm512_cvtt_roundph_epu32() {
+ let a = _mm256_set_ph(
+ 1.0, 2.0, 3.0, 4.0, 5.0, 6.0, 7.0, 8.0, 9.0, 10.0, 11.0, 12.0, 13.0, 14.0, 15.0, 16.0,
+ );
+ let r = _mm512_cvtt_roundph_epu32::<_MM_FROUND_NO_EXC>(a);
+ let e = _mm512_set_epi32(1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16);
+ assert_eq_m512i(r, e);
+ }
+
+ #[simd_test(enable = "avx512fp16")]
+ unsafe fn test_mm512_mask_cvtt_roundph_epu32() {
+ let a = _mm256_set_ph(
+ 1.0, 2.0, 3.0, 4.0, 5.0, 6.0, 7.0, 8.0, 9.0, 10.0, 11.0, 12.0, 13.0, 14.0, 15.0, 16.0,
+ );
+ let src = _mm512_set_epi32(
+ 10, 11, 12, 13, 14, 15, 16, 17, 18, 19, 20, 21, 22, 23, 24, 25,
+ );
+ let r = _mm512_mask_cvtt_roundph_epu32::<_MM_FROUND_NO_EXC>(src, 0b0101010101010101, a);
+ let e = _mm512_set_epi32(10, 2, 12, 4, 14, 6, 16, 8, 18, 10, 20, 12, 22, 14, 24, 16);
+ assert_eq_m512i(r, e);
+ }
+
+ #[simd_test(enable = "avx512fp16")]
+ unsafe fn test_mm512_maskz_cvtt_roundph_epu32() {
+ let a = _mm256_set_ph(
+ 1.0, 2.0, 3.0, 4.0, 5.0, 6.0, 7.0, 8.0, 9.0, 10.0, 11.0, 12.0, 13.0, 14.0, 15.0, 16.0,
+ );
+ let r = _mm512_maskz_cvtt_roundph_epu32::<_MM_FROUND_NO_EXC>(0b0101010101010101, a);
+ let e = _mm512_set_epi32(0, 2, 0, 4, 0, 6, 0, 8, 0, 10, 0, 12, 0, 14, 0, 16);
+ assert_eq_m512i(r, e);
+ }
+
+ #[simd_test(enable = "avx512fp16")]
+ unsafe fn test_mm_cvttsh_u32() {
+ let a = _mm_setr_ph(1.0, 2.0, 3.0, 4.0, 5.0, 6.0, 7.0, 8.0);
+ let r = _mm_cvttsh_u32(a);
+ assert_eq!(r, 1);
+ }
+
+ #[simd_test(enable = "avx512fp16")]
+ unsafe fn test_mm_cvtt_roundsh_u32() {
+ let a = _mm_setr_ph(1.0, 2.0, 3.0, 4.0, 5.0, 6.0, 7.0, 8.0);
+ let r = _mm_cvtt_roundsh_u32::<_MM_FROUND_NO_EXC>(a);
+ assert_eq!(r, 1);
+ }
+
+ #[simd_test(enable = "avx512fp16,avx512vl")]
+ unsafe fn test_mm_cvtph_epi64() {
+ let a = _mm_set_ph(0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0, 2.0);
+ let r = _mm_cvtph_epi64(a);
+ let e = _mm_set_epi64x(1, 2);
+ assert_eq_m128i(r, e);
+ }
+
+ #[simd_test(enable = "avx512fp16,avx512vl")]
+ unsafe fn test_mm_mask_cvtph_epi64() {
+ let src = _mm_set_epi64x(3, 4);
+ let a = _mm_set_ph(0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0, 2.0);
+ let r = _mm_mask_cvtph_epi64(src, 0b01, a);
+ let e = _mm_set_epi64x(3, 2);
+ assert_eq_m128i(r, e);
+ }
+
+ #[simd_test(enable = "avx512fp16,avx512vl")]
+ unsafe fn test_mm_maskz_cvtph_epi64() {
+ let a = _mm_set_ph(0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0, 2.0);
+ let r = _mm_maskz_cvtph_epi64(0b01, a);
+ let e = _mm_set_epi64x(0, 2);
+ assert_eq_m128i(r, e);
+ }
+
+ #[simd_test(enable = "avx512fp16,avx512vl")]
+ unsafe fn test_mm256_cvtph_epi64() {
+ let a = _mm_set_ph(0.0, 0.0, 0.0, 0.0, 1.0, 2.0, 3.0, 4.0);
+ let r = _mm256_cvtph_epi64(a);
+ let e = _mm256_set_epi64x(1, 2, 3, 4);
+ assert_eq_m256i(r, e);
+ }
+
+ #[simd_test(enable = "avx512fp16,avx512vl")]
+ unsafe fn test_mm256_mask_cvtph_epi64() {
+ let src = _mm256_set_epi64x(5, 6, 7, 8);
+ let a = _mm_set_ph(0.0, 0.0, 0.0, 0.0, 1.0, 2.0, 3.0, 4.0);
+ let r = _mm256_mask_cvtph_epi64(src, 0b0101, a);
+ let e = _mm256_set_epi64x(5, 2, 7, 4);
+ assert_eq_m256i(r, e);
+ }
+
+ #[simd_test(enable = "avx512fp16,avx512vl")]
+ unsafe fn test_mm256_maskz_cvtph_epi64() {
+ let a = _mm_set_ph(0.0, 0.0, 0.0, 0.0, 1.0, 2.0, 3.0, 4.0);
+ let r = _mm256_maskz_cvtph_epi64(0b0101, a);
+ let e = _mm256_set_epi64x(0, 2, 0, 4);
+ assert_eq_m256i(r, e);
+ }
+
+ #[simd_test(enable = "avx512fp16")]
+ unsafe fn test_mm512_cvtph_epi64() {
+ let a = _mm_set_ph(1.0, 2.0, 3.0, 4.0, 5.0, 6.0, 7.0, 8.0);
+ let r = _mm512_cvtph_epi64(a);
+ let e = _mm512_set_epi64(1, 2, 3, 4, 5, 6, 7, 8);
+ assert_eq_m512i(r, e);
+ }
+
+ #[simd_test(enable = "avx512fp16")]
+ unsafe fn test_mm512_mask_cvtph_epi64() {
+ let src = _mm512_set_epi64(9, 10, 11, 12, 13, 14, 15, 16);
+ let a = _mm_set_ph(1.0, 2.0, 3.0, 4.0, 5.0, 6.0, 7.0, 8.0);
+ let r = _mm512_mask_cvtph_epi64(src, 0b01010101, a);
+ let e = _mm512_set_epi64(9, 2, 11, 4, 13, 6, 15, 8);
+ assert_eq_m512i(r, e);
+ }
+
+ #[simd_test(enable = "avx512fp16")]
+ unsafe fn test_mm512_maskz_cvtph_epi64() {
+ let a = _mm_set_ph(1.0, 2.0, 3.0, 4.0, 5.0, 6.0, 7.0, 8.0);
+ let r = _mm512_maskz_cvtph_epi64(0b01010101, a);
+ let e = _mm512_set_epi64(0, 2, 0, 4, 0, 6, 0, 8);
+ assert_eq_m512i(r, e);
+ }
+
+ #[simd_test(enable = "avx512fp16")]
+ unsafe fn test_mm512_cvt_roundph_epi64() {
+ let a = _mm_set_ph(1.0, 2.0, 3.0, 4.0, 5.0, 6.0, 7.0, 8.0);
+ let r = _mm512_cvt_roundph_epi64::<{ _MM_FROUND_TO_NEAREST_INT | _MM_FROUND_NO_EXC }>(a);
+ let e = _mm512_set_epi64(1, 2, 3, 4, 5, 6, 7, 8);
+ assert_eq_m512i(r, e);
+ }
+
+ #[simd_test(enable = "avx512fp16")]
+ unsafe fn test_mm512_mask_cvt_roundph_epi64() {
+ let src = _mm512_set_epi64(9, 10, 11, 12, 13, 14, 15, 16);
+ let a = _mm_set_ph(1.0, 2.0, 3.0, 4.0, 5.0, 6.0, 7.0, 8.0);
+ let r = _mm512_mask_cvt_roundph_epi64::<{ _MM_FROUND_TO_NEAREST_INT | _MM_FROUND_NO_EXC }>(
+ src, 0b01010101, a,
+ );
+ let e = _mm512_set_epi64(9, 2, 11, 4, 13, 6, 15, 8);
+ assert_eq_m512i(r, e);
+ }
+
+ #[simd_test(enable = "avx512fp16")]
+ unsafe fn test_mm512_maskz_cvt_roundph_epi64() {
+ let a = _mm_set_ph(1.0, 2.0, 3.0, 4.0, 5.0, 6.0, 7.0, 8.0);
+ let r = _mm512_maskz_cvt_roundph_epi64::<{ _MM_FROUND_TO_NEAREST_INT | _MM_FROUND_NO_EXC }>(
+ 0b01010101, a,
+ );
+ let e = _mm512_set_epi64(0, 2, 0, 4, 0, 6, 0, 8);
+ assert_eq_m512i(r, e);
+ }
+
+ #[simd_test(enable = "avx512fp16,avx512vl")]
+ unsafe fn test_mm_cvtph_epu64() {
+ let a = _mm_set_ph(0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0, 2.0);
+ let r = _mm_cvtph_epu64(a);
+ let e = _mm_set_epi64x(1, 2);
+ assert_eq_m128i(r, e);
+ }
+
+ #[simd_test(enable = "avx512fp16,avx512vl")]
+ unsafe fn test_mm_mask_cvtph_epu64() {
+ let src = _mm_set_epi64x(3, 4);
+ let a = _mm_set_ph(0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0, 2.0);
+ let r = _mm_mask_cvtph_epu64(src, 0b01, a);
+ let e = _mm_set_epi64x(3, 2);
+ assert_eq_m128i(r, e);
+ }
+
+ #[simd_test(enable = "avx512fp16,avx512vl")]
+ unsafe fn test_mm_maskz_cvtph_epu64() {
+ let a = _mm_set_ph(0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0, 2.0);
+ let r = _mm_maskz_cvtph_epu64(0b01, a);
+ let e = _mm_set_epi64x(0, 2);
+ assert_eq_m128i(r, e);
+ }
+
+ #[simd_test(enable = "avx512fp16,avx512vl")]
+ unsafe fn test_mm256_cvtph_epu64() {
+ let a = _mm_set_ph(0.0, 0.0, 0.0, 0.0, 1.0, 2.0, 3.0, 4.0);
+ let r = _mm256_cvtph_epu64(a);
+ let e = _mm256_set_epi64x(1, 2, 3, 4);
+ assert_eq_m256i(r, e);
+ }
+
+ #[simd_test(enable = "avx512fp16,avx512vl")]
+ unsafe fn test_mm256_mask_cvtph_epu64() {
+ let src = _mm256_set_epi64x(5, 6, 7, 8);
+ let a = _mm_set_ph(0.0, 0.0, 0.0, 0.0, 1.0, 2.0, 3.0, 4.0);
+ let r = _mm256_mask_cvtph_epu64(src, 0b0101, a);
+ let e = _mm256_set_epi64x(5, 2, 7, 4);
+ assert_eq_m256i(r, e);
+ }
+
+ #[simd_test(enable = "avx512fp16,avx512vl")]
+ unsafe fn test_mm256_maskz_cvtph_epu64() {
+ let a = _mm_set_ph(0.0, 0.0, 0.0, 0.0, 1.0, 2.0, 3.0, 4.0);
+ let r = _mm256_maskz_cvtph_epu64(0b0101, a);
+ let e = _mm256_set_epi64x(0, 2, 0, 4);
+ assert_eq_m256i(r, e);
+ }
+
+ #[simd_test(enable = "avx512fp16")]
+ unsafe fn test_mm512_cvtph_epu64() {
+ let a = _mm_set_ph(1.0, 2.0, 3.0, 4.0, 5.0, 6.0, 7.0, 8.0);
+ let r = _mm512_cvtph_epu64(a);
+ let e = _mm512_set_epi64(1, 2, 3, 4, 5, 6, 7, 8);
+ assert_eq_m512i(r, e);
+ }
+
+ #[simd_test(enable = "avx512fp16")]
+ unsafe fn test_mm512_mask_cvtph_epu64() {
+ let src = _mm512_set_epi64(9, 10, 11, 12, 13, 14, 15, 16);
+ let a = _mm_set_ph(1.0, 2.0, 3.0, 4.0, 5.0, 6.0, 7.0, 8.0);
+ let r = _mm512_mask_cvtph_epu64(src, 0b01010101, a);
+ let e = _mm512_set_epi64(9, 2, 11, 4, 13, 6, 15, 8);
+ assert_eq_m512i(r, e);
+ }
+
+ #[simd_test(enable = "avx512fp16")]
+ unsafe fn test_mm512_maskz_cvtph_epu64() {
+ let a = _mm_set_ph(1.0, 2.0, 3.0, 4.0, 5.0, 6.0, 7.0, 8.0);
+ let r = _mm512_maskz_cvtph_epu64(0b01010101, a);
+ let e = _mm512_set_epi64(0, 2, 0, 4, 0, 6, 0, 8);
+ assert_eq_m512i(r, e);
+ }
+
+ #[simd_test(enable = "avx512fp16")]
+ unsafe fn test_mm512_cvt_roundph_epu64() {
+ let a = _mm_set_ph(1.0, 2.0, 3.0, 4.0, 5.0, 6.0, 7.0, 8.0);
+ let r = _mm512_cvt_roundph_epu64::<{ _MM_FROUND_TO_NEAREST_INT | _MM_FROUND_NO_EXC }>(a);
+ let e = _mm512_set_epi64(1, 2, 3, 4, 5, 6, 7, 8);
+ assert_eq_m512i(r, e);
+ }
+
+ #[simd_test(enable = "avx512fp16")]
+ unsafe fn test_mm512_mask_cvt_roundph_epu64() {
+ let src = _mm512_set_epi64(9, 10, 11, 12, 13, 14, 15, 16);
+ let a = _mm_set_ph(1.0, 2.0, 3.0, 4.0, 5.0, 6.0, 7.0, 8.0);
+ let r = _mm512_mask_cvt_roundph_epu64::<{ _MM_FROUND_TO_NEAREST_INT | _MM_FROUND_NO_EXC }>(
+ src, 0b01010101, a,
+ );
+ let e = _mm512_set_epi64(9, 2, 11, 4, 13, 6, 15, 8);
+ assert_eq_m512i(r, e);
+ }
+
+ #[simd_test(enable = "avx512fp16")]
+ unsafe fn test_mm512_maskz_cvt_roundph_epu64() {
+ let a = _mm_set_ph(1.0, 2.0, 3.0, 4.0, 5.0, 6.0, 7.0, 8.0);
+ let r = _mm512_maskz_cvt_roundph_epu64::<{ _MM_FROUND_TO_NEAREST_INT | _MM_FROUND_NO_EXC }>(
+ 0b01010101, a,
+ );
+ let e = _mm512_set_epi64(0, 2, 0, 4, 0, 6, 0, 8);
+ assert_eq_m512i(r, e);
+ }
+
+ #[simd_test(enable = "avx512fp16,avx512vl")]
+ unsafe fn test_mm_cvttph_epi64() {
+ let a = _mm_set_ph(0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0, 2.0);
+ let r = _mm_cvttph_epi64(a);
+ let e = _mm_set_epi64x(1, 2);
+ assert_eq_m128i(r, e);
+ }
+
+ #[simd_test(enable = "avx512fp16,avx512vl")]
+ unsafe fn test_mm_mask_cvttph_epi64() {
+ let src = _mm_set_epi64x(3, 4);
+ let a = _mm_set_ph(0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0, 2.0);
+ let r = _mm_mask_cvttph_epi64(src, 0b01, a);
+ let e = _mm_set_epi64x(3, 2);
+ assert_eq_m128i(r, e);
+ }
+
+ #[simd_test(enable = "avx512fp16,avx512vl")]
+ unsafe fn test_mm_maskz_cvttph_epi64() {
+ let a = _mm_set_ph(0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0, 2.0);
+ let r = _mm_maskz_cvttph_epi64(0b01, a);
+ let e = _mm_set_epi64x(0, 2);
+ assert_eq_m128i(r, e);
+ }
+
+ #[simd_test(enable = "avx512fp16,avx512vl")]
+ unsafe fn test_mm256_cvttph_epi64() {
+ let a = _mm_set_ph(0.0, 0.0, 0.0, 0.0, 1.0, 2.0, 3.0, 4.0);
+ let r = _mm256_cvttph_epi64(a);
+ let e = _mm256_set_epi64x(1, 2, 3, 4);
+ assert_eq_m256i(r, e);
+ }
+
+ #[simd_test(enable = "avx512fp16,avx512vl")]
+ unsafe fn test_mm256_mask_cvttph_epi64() {
+ let src = _mm256_set_epi64x(5, 6, 7, 8);
+ let a = _mm_set_ph(0.0, 0.0, 0.0, 0.0, 1.0, 2.0, 3.0, 4.0);
+ let r = _mm256_mask_cvttph_epi64(src, 0b0101, a);
+ let e = _mm256_set_epi64x(5, 2, 7, 4);
+ assert_eq_m256i(r, e);
+ }
+
+ #[simd_test(enable = "avx512fp16,avx512vl")]
+ unsafe fn test_mm256_maskz_cvttph_epi64() {
+ let a = _mm_set_ph(0.0, 0.0, 0.0, 0.0, 1.0, 2.0, 3.0, 4.0);
+ let r = _mm256_maskz_cvttph_epi64(0b0101, a);
+ let e = _mm256_set_epi64x(0, 2, 0, 4);
+ assert_eq_m256i(r, e);
+ }
+
+ #[simd_test(enable = "avx512fp16")]
+ unsafe fn test_mm512_cvttph_epi64() {
+ let a = _mm_set_ph(1.0, 2.0, 3.0, 4.0, 5.0, 6.0, 7.0, 8.0);
+ let r = _mm512_cvttph_epi64(a);
+ let e = _mm512_set_epi64(1, 2, 3, 4, 5, 6, 7, 8);
+ assert_eq_m512i(r, e);
+ }
+
+ #[simd_test(enable = "avx512fp16")]
+ unsafe fn test_mm512_mask_cvttph_epi64() {
+ let src = _mm512_set_epi64(9, 10, 11, 12, 13, 14, 15, 16);
+ let a = _mm_set_ph(1.0, 2.0, 3.0, 4.0, 5.0, 6.0, 7.0, 8.0);
+ let r = _mm512_mask_cvttph_epi64(src, 0b01010101, a);
+ let e = _mm512_set_epi64(9, 2, 11, 4, 13, 6, 15, 8);
+ assert_eq_m512i(r, e);
+ }
+
+ #[simd_test(enable = "avx512fp16")]
+ unsafe fn test_mm512_maskz_cvttph_epi64() {
+ let a = _mm_set_ph(1.0, 2.0, 3.0, 4.0, 5.0, 6.0, 7.0, 8.0);
+ let r = _mm512_maskz_cvttph_epi64(0b01010101, a);
+ let e = _mm512_set_epi64(0, 2, 0, 4, 0, 6, 0, 8);
+ assert_eq_m512i(r, e);
+ }
+
+ #[simd_test(enable = "avx512fp16")]
+ unsafe fn test_mm512_cvtt_roundph_epi64() {
+ let a = _mm_set_ph(1.0, 2.0, 3.0, 4.0, 5.0, 6.0, 7.0, 8.0);
+ let r = _mm512_cvtt_roundph_epi64::<_MM_FROUND_NO_EXC>(a);
+ let e = _mm512_set_epi64(1, 2, 3, 4, 5, 6, 7, 8);
+ assert_eq_m512i(r, e);
+ }
+
+ #[simd_test(enable = "avx512fp16")]
+ unsafe fn test_mm512_mask_cvtt_roundph_epi64() {
+ let src = _mm512_set_epi64(9, 10, 11, 12, 13, 14, 15, 16);
+ let a = _mm_set_ph(1.0, 2.0, 3.0, 4.0, 5.0, 6.0, 7.0, 8.0);
+ let r = _mm512_mask_cvtt_roundph_epi64::<_MM_FROUND_NO_EXC>(src, 0b01010101, a);
+ let e = _mm512_set_epi64(9, 2, 11, 4, 13, 6, 15, 8);
+ assert_eq_m512i(r, e);
+ }
+
+ #[simd_test(enable = "avx512fp16")]
+ unsafe fn test_mm512_maskz_cvtt_roundph_epi64() {
+ let a = _mm_set_ph(1.0, 2.0, 3.0, 4.0, 5.0, 6.0, 7.0, 8.0);
+ let r = _mm512_maskz_cvtt_roundph_epi64::<_MM_FROUND_NO_EXC>(0b01010101, a);
+ let e = _mm512_set_epi64(0, 2, 0, 4, 0, 6, 0, 8);
+ assert_eq_m512i(r, e);
+ }
+
+ #[simd_test(enable = "avx512fp16,avx512vl")]
+ unsafe fn test_mm_cvttph_epu64() {
+ let a = _mm_set_ph(0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0, 2.0);
+ let r = _mm_cvttph_epu64(a);
+ let e = _mm_set_epi64x(1, 2);
+ assert_eq_m128i(r, e);
+ }
+
+ #[simd_test(enable = "avx512fp16,avx512vl")]
+ unsafe fn test_mm_mask_cvttph_epu64() {
+ let src = _mm_set_epi64x(3, 4);
+ let a = _mm_set_ph(0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0, 2.0);
+ let r = _mm_mask_cvttph_epu64(src, 0b01, a);
+ let e = _mm_set_epi64x(3, 2);
+ assert_eq_m128i(r, e);
+ }
+
+ #[simd_test(enable = "avx512fp16,avx512vl")]
+ unsafe fn test_mm_maskz_cvttph_epu64() {
+ let a = _mm_set_ph(0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0, 2.0);
+ let r = _mm_maskz_cvttph_epu64(0b01, a);
+ let e = _mm_set_epi64x(0, 2);
+ assert_eq_m128i(r, e);
+ }
+
+ #[simd_test(enable = "avx512fp16,avx512vl")]
+ unsafe fn test_mm256_cvttph_epu64() {
+ let a = _mm_set_ph(0.0, 0.0, 0.0, 0.0, 1.0, 2.0, 3.0, 4.0);
+ let r = _mm256_cvttph_epu64(a);
+ let e = _mm256_set_epi64x(1, 2, 3, 4);
+ assert_eq_m256i(r, e);
+ }
+
+ #[simd_test(enable = "avx512fp16,avx512vl")]
+ unsafe fn test_mm256_mask_cvttph_epu64() {
+ let src = _mm256_set_epi64x(5, 6, 7, 8);
+ let a = _mm_set_ph(0.0, 0.0, 0.0, 0.0, 1.0, 2.0, 3.0, 4.0);
+ let r = _mm256_mask_cvttph_epu64(src, 0b0101, a);
+ let e = _mm256_set_epi64x(5, 2, 7, 4);
+ assert_eq_m256i(r, e);
+ }
+
+ #[simd_test(enable = "avx512fp16,avx512vl")]
+ unsafe fn test_mm256_maskz_cvttph_epu64() {
+ let a = _mm_set_ph(0.0, 0.0, 0.0, 0.0, 1.0, 2.0, 3.0, 4.0);
+ let r = _mm256_maskz_cvttph_epu64(0b0101, a);
+ let e = _mm256_set_epi64x(0, 2, 0, 4);
+ assert_eq_m256i(r, e);
+ }
+
+ #[simd_test(enable = "avx512fp16")]
+ unsafe fn test_mm512_cvttph_epu64() {
+ let a = _mm_set_ph(1.0, 2.0, 3.0, 4.0, 5.0, 6.0, 7.0, 8.0);
+ let r = _mm512_cvttph_epu64(a);
+ let e = _mm512_set_epi64(1, 2, 3, 4, 5, 6, 7, 8);
+ assert_eq_m512i(r, e);
+ }
+
+ #[simd_test(enable = "avx512fp16")]
+ unsafe fn test_mm512_mask_cvttph_epu64() {
+ let src = _mm512_set_epi64(9, 10, 11, 12, 13, 14, 15, 16);
+ let a = _mm_set_ph(1.0, 2.0, 3.0, 4.0, 5.0, 6.0, 7.0, 8.0);
+ let r = _mm512_mask_cvttph_epu64(src, 0b01010101, a);
+ let e = _mm512_set_epi64(9, 2, 11, 4, 13, 6, 15, 8);
+ assert_eq_m512i(r, e);
+ }
+
+ #[simd_test(enable = "avx512fp16")]
+ unsafe fn test_mm512_maskz_cvttph_epu64() {
+ let a = _mm_set_ph(1.0, 2.0, 3.0, 4.0, 5.0, 6.0, 7.0, 8.0);
+ let r = _mm512_maskz_cvttph_epu64(0b01010101, a);
+ let e = _mm512_set_epi64(0, 2, 0, 4, 0, 6, 0, 8);
+ assert_eq_m512i(r, e);
+ }
+
+ #[simd_test(enable = "avx512fp16")]
+ unsafe fn test_mm512_cvtt_roundph_epu64() {
+ let a = _mm_set_ph(1.0, 2.0, 3.0, 4.0, 5.0, 6.0, 7.0, 8.0);
+ let r = _mm512_cvtt_roundph_epu64::<_MM_FROUND_NO_EXC>(a);
+ let e = _mm512_set_epi64(1, 2, 3, 4, 5, 6, 7, 8);
+ assert_eq_m512i(r, e);
+ }
+
+ #[simd_test(enable = "avx512fp16")]
+ unsafe fn test_mm512_mask_cvtt_roundph_epu64() {
+ let src = _mm512_set_epi64(9, 10, 11, 12, 13, 14, 15, 16);
+ let a = _mm_set_ph(1.0, 2.0, 3.0, 4.0, 5.0, 6.0, 7.0, 8.0);
+ let r = _mm512_mask_cvtt_roundph_epu64::<_MM_FROUND_NO_EXC>(src, 0b01010101, a);
+ let e = _mm512_set_epi64(9, 2, 11, 4, 13, 6, 15, 8);
+ assert_eq_m512i(r, e);
+ }
+
+ #[simd_test(enable = "avx512fp16")]
+ unsafe fn test_mm512_maskz_cvtt_roundph_epu64() {
+ let a = _mm_set_ph(1.0, 2.0, 3.0, 4.0, 5.0, 6.0, 7.0, 8.0);
+ let r = _mm512_maskz_cvtt_roundph_epu64::<_MM_FROUND_NO_EXC>(0b01010101, a);
+ let e = _mm512_set_epi64(0, 2, 0, 4, 0, 6, 0, 8);
+ assert_eq_m512i(r, e);
+ }
+
+ #[simd_test(enable = "avx512fp16,avx512vl")]
+ unsafe fn test_mm_cvtxph_ps() {
+ let a = _mm_set_ph(0.0, 0.0, 0.0, 0.0, 1.0, 2.0, 3.0, 4.0);
+ let r = _mm_cvtxph_ps(a);
+ let e = _mm_set_ps(1.0, 2.0, 3.0, 4.0);
+ assert_eq_m128(r, e);
+ }
+
+ #[simd_test(enable = "avx512fp16,avx512vl")]
+ unsafe fn test_mm_mask_cvtxph_ps() {
+ let src = _mm_set_ps(10.0, 11.0, 12.0, 13.0);
+ let a = _mm_set_ph(0.0, 0.0, 0.0, 0.0, 1.0, 2.0, 3.0, 4.0);
+ let r = _mm_mask_cvtxph_ps(src, 0b0101, a);
+ let e = _mm_set_ps(10.0, 2.0, 12.0, 4.0);
+ assert_eq_m128(r, e);
+ }
+
+ #[simd_test(enable = "avx512fp16,avx512vl")]
+ unsafe fn test_mm_maskz_cvtxph_ps() {
+ let a = _mm_set_ph(0.0, 0.0, 0.0, 0.0, 1.0, 2.0, 3.0, 4.0);
+ let r = _mm_maskz_cvtxph_ps(0b0101, a);
+ let e = _mm_set_ps(0.0, 2.0, 0.0, 4.0);
+ assert_eq_m128(r, e);
+ }
+
+ #[simd_test(enable = "avx512fp16,avx512vl")]
+ unsafe fn test_mm256_cvtxph_ps() {
+ let a = _mm_set_ph(1.0, 2.0, 3.0, 4.0, 5.0, 6.0, 7.0, 8.0);
+ let r = _mm256_cvtxph_ps(a);
+ let e = _mm256_set_ps(1.0, 2.0, 3.0, 4.0, 5.0, 6.0, 7.0, 8.0);
+ assert_eq_m256(r, e);
+ }
+
+ #[simd_test(enable = "avx512fp16,avx512vl")]
+ unsafe fn test_mm256_mask_cvtxph_ps() {
+ let src = _mm256_set_ps(10.0, 11.0, 12.0, 13.0, 14.0, 15.0, 16.0, 17.0);
+ let a = _mm_set_ph(1.0, 2.0, 3.0, 4.0, 5.0, 6.0, 7.0, 8.0);
+ let r = _mm256_mask_cvtxph_ps(src, 0b01010101, a);
+ let e = _mm256_set_ps(10.0, 2.0, 12.0, 4.0, 14.0, 6.0, 16.0, 8.0);
+ assert_eq_m256(r, e);
+ }
+
+ #[simd_test(enable = "avx512fp16,avx512vl")]
+ unsafe fn test_mm256_maskz_cvtxph_ps() {
+ let a = _mm_set_ph(1.0, 2.0, 3.0, 4.0, 5.0, 6.0, 7.0, 8.0);
+ let r = _mm256_maskz_cvtxph_ps(0b01010101, a);
+ let e = _mm256_set_ps(0.0, 2.0, 0.0, 4.0, 0.0, 6.0, 0.0, 8.0);
+ assert_eq_m256(r, e);
+ }
+
+ #[simd_test(enable = "avx512fp16")]
+ unsafe fn test_mm512_cvtxph_ps() {
+ let a = _mm256_set_ph(
+ 1.0, 2.0, 3.0, 4.0, 5.0, 6.0, 7.0, 8.0, 9.0, 10.0, 11.0, 12.0, 13.0, 14.0, 15.0, 16.0,
+ );
+ let r = _mm512_cvtxph_ps(a);
+ let e = _mm512_set_ps(
+ 1.0, 2.0, 3.0, 4.0, 5.0, 6.0, 7.0, 8.0, 9.0, 10.0, 11.0, 12.0, 13.0, 14.0, 15.0, 16.0,
+ );
+ assert_eq_m512(r, e);
+ }
+
+ #[simd_test(enable = "avx512fp16")]
+ unsafe fn test_mm512_mask_cvtxph_ps() {
+ let src = _mm512_set_ps(
+ 10.0, 11.0, 12.0, 13.0, 14.0, 15.0, 16.0, 17.0, 18.0, 19.0, 20.0, 21.0, 22.0, 23.0,
+ 24.0, 25.0,
+ );
+ let a = _mm256_set_ph(
+ 1.0, 2.0, 3.0, 4.0, 5.0, 6.0, 7.0, 8.0, 9.0, 10.0, 11.0, 12.0, 13.0, 14.0, 15.0, 16.0,
+ );
+ let r = _mm512_mask_cvtxph_ps(src, 0b0101010101010101, a);
+ let e = _mm512_set_ps(
+ 10.0, 2.0, 12.0, 4.0, 14.0, 6.0, 16.0, 8.0, 18.0, 10.0, 20.0, 12.0, 22.0, 14.0, 24.0,
+ 16.0,
+ );
+ assert_eq_m512(r, e);
+ }
+
+ #[simd_test(enable = "avx512fp16")]
+ unsafe fn test_mm512_maskz_cvtxph_ps() {
+ let a = _mm256_set_ph(
+ 1.0, 2.0, 3.0, 4.0, 5.0, 6.0, 7.0, 8.0, 9.0, 10.0, 11.0, 12.0, 13.0, 14.0, 15.0, 16.0,
+ );
+ let r = _mm512_maskz_cvtxph_ps(0b0101010101010101, a);
+ let e = _mm512_set_ps(
+ 0.0, 2.0, 0.0, 4.0, 0.0, 6.0, 0.0, 8.0, 0.0, 10.0, 0.0, 12.0, 0.0, 14.0, 0.0, 16.0,
+ );
+ assert_eq_m512(r, e);
+ }
+
+ #[simd_test(enable = "avx512fp16")]
+ unsafe fn test_mm512_cvtx_roundph_ps() {
+ let a = _mm256_set_ph(
+ 1.0, 2.0, 3.0, 4.0, 5.0, 6.0, 7.0, 8.0, 9.0, 10.0, 11.0, 12.0, 13.0, 14.0, 15.0, 16.0,
+ );
+ let r = _mm512_cvtx_roundph_ps::<_MM_FROUND_NO_EXC>(a);
+ let e = _mm512_set_ps(
+ 1.0, 2.0, 3.0, 4.0, 5.0, 6.0, 7.0, 8.0, 9.0, 10.0, 11.0, 12.0, 13.0, 14.0, 15.0, 16.0,
+ );
+ assert_eq_m512(r, e);
+ }
+
+ #[simd_test(enable = "avx512fp16")]
+ unsafe fn test_mm512_mask_cvtx_roundph_ps() {
+ let src = _mm512_set_ps(
+ 10.0, 11.0, 12.0, 13.0, 14.0, 15.0, 16.0, 17.0, 18.0, 19.0, 20.0, 21.0, 22.0, 23.0,
+ 24.0, 25.0,
+ );
+ let a = _mm256_set_ph(
+ 1.0, 2.0, 3.0, 4.0, 5.0, 6.0, 7.0, 8.0, 9.0, 10.0, 11.0, 12.0, 13.0, 14.0, 15.0, 16.0,
+ );
+ let r = _mm512_mask_cvtx_roundph_ps::<_MM_FROUND_NO_EXC>(src, 0b0101010101010101, a);
+ let e = _mm512_set_ps(
+ 10.0, 2.0, 12.0, 4.0, 14.0, 6.0, 16.0, 8.0, 18.0, 10.0, 20.0, 12.0, 22.0, 14.0, 24.0,
+ 16.0,
+ );
+ assert_eq_m512(r, e);
+ }
+
+ #[simd_test(enable = "avx512fp16")]
+ unsafe fn test_mm512_maskz_cvtx_roundph_ps() {
+ let a = _mm256_set_ph(
+ 1.0, 2.0, 3.0, 4.0, 5.0, 6.0, 7.0, 8.0, 9.0, 10.0, 11.0, 12.0, 13.0, 14.0, 15.0, 16.0,
+ );
+ let r = _mm512_maskz_cvtx_roundph_ps::<_MM_FROUND_NO_EXC>(0b0101010101010101, a);
+ let e = _mm512_set_ps(
+ 0.0, 2.0, 0.0, 4.0, 0.0, 6.0, 0.0, 8.0, 0.0, 10.0, 0.0, 12.0, 0.0, 14.0, 0.0, 16.0,
+ );
+ assert_eq_m512(r, e);
+ }
+
+ #[simd_test(enable = "avx512fp16")]
+ unsafe fn test_mm_cvtsh_ss() {
+ let a = _mm_setr_ps(2.0, 20.0, 21.0, 22.0);
+ let b = _mm_setr_ph(1.0, 10.0, 11.0, 12.0, 13.0, 14.0, 15.0, 16.0);
+ let r = _mm_cvtsh_ss(a, b);
+ let e = _mm_setr_ps(1.0, 20.0, 21.0, 22.0);
+ assert_eq_m128(r, e);
+ }
+
+ #[simd_test(enable = "avx512fp16")]
+ unsafe fn test_mm_mask_cvtsh_ss() {
+ let src = _mm_setr_ps(3.0, 11.0, 12.0, 13.0);
+ let a = _mm_setr_ps(2.0, 20.0, 21.0, 22.0);
+ let b = _mm_setr_ph(1.0, 10.0, 11.0, 12.0, 13.0, 14.0, 15.0, 16.0);
+ let r = _mm_mask_cvtsh_ss(src, 0, a, b);
+ let e = _mm_setr_ps(3.0, 20.0, 21.0, 22.0);
+ assert_eq_m128(r, e);
+ let r = _mm_mask_cvtsh_ss(src, 1, a, b);
+ let e = _mm_setr_ps(1.0, 20.0, 21.0, 22.0);
+ assert_eq_m128(r, e);
+ }
+
+ #[simd_test(enable = "avx512fp16")]
+ unsafe fn test_mm_maskz_cvtsh_ss() {
+ let a = _mm_setr_ps(2.0, 20.0, 21.0, 22.0);
+ let b = _mm_setr_ph(1.0, 10.0, 11.0, 12.0, 13.0, 14.0, 15.0, 16.0);
+ let r = _mm_maskz_cvtsh_ss(0, a, b);
+ let e = _mm_setr_ps(0.0, 20.0, 21.0, 22.0);
+ assert_eq_m128(r, e);
+ let r = _mm_maskz_cvtsh_ss(1, a, b);
+ let e = _mm_setr_ps(1.0, 20.0, 21.0, 22.0);
+ assert_eq_m128(r, e);
+ }
+
+ #[simd_test(enable = "avx512fp16")]
+ unsafe fn test_mm_cvt_roundsh_ss() {
+ let a = _mm_setr_ps(2.0, 20.0, 21.0, 22.0);
+ let b = _mm_setr_ph(1.0, 10.0, 11.0, 12.0, 13.0, 14.0, 15.0, 16.0);
+ let r = _mm_cvt_roundsh_ss::<_MM_FROUND_NO_EXC>(a, b);
+ let e = _mm_setr_ps(1.0, 20.0, 21.0, 22.0);
+ assert_eq_m128(r, e);
+ }
+
+ #[simd_test(enable = "avx512fp16")]
+ unsafe fn test_mm_mask_cvt_roundsh_ss() {
+ let src = _mm_setr_ps(3.0, 11.0, 12.0, 13.0);
+ let a = _mm_setr_ps(2.0, 20.0, 21.0, 22.0);
+ let b = _mm_setr_ph(1.0, 10.0, 11.0, 12.0, 13.0, 14.0, 15.0, 16.0);
+ let r = _mm_mask_cvt_roundsh_ss::<_MM_FROUND_NO_EXC>(src, 0, a, b);
+ let e = _mm_setr_ps(3.0, 20.0, 21.0, 22.0);
+ assert_eq_m128(r, e);
+ let r = _mm_mask_cvt_roundsh_ss::<_MM_FROUND_NO_EXC>(src, 1, a, b);
+ let e = _mm_setr_ps(1.0, 20.0, 21.0, 22.0);
+ assert_eq_m128(r, e);
+ }
+
+ #[simd_test(enable = "avx512fp16")]
+ unsafe fn test_mm_maskz_cvt_roundsh_ss() {
+ let a = _mm_setr_ps(2.0, 20.0, 21.0, 22.0);
+ let b = _mm_setr_ph(1.0, 10.0, 11.0, 12.0, 13.0, 14.0, 15.0, 16.0);
+ let r = _mm_maskz_cvt_roundsh_ss::<_MM_FROUND_NO_EXC>(0, a, b);
+ let e = _mm_setr_ps(0.0, 20.0, 21.0, 22.0);
+ assert_eq_m128(r, e);
+ let r = _mm_maskz_cvt_roundsh_ss::<_MM_FROUND_NO_EXC>(1, a, b);
+ let e = _mm_setr_ps(1.0, 20.0, 21.0, 22.0);
+ assert_eq_m128(r, e);
+ }
+
+ #[simd_test(enable = "avx512fp16,avx512vl")]
+ unsafe fn test_mm_cvtph_pd() {
+ let a = _mm_set_ph(0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0, 2.0);
+ let r = _mm_cvtph_pd(a);
+ let e = _mm_set_pd(1.0, 2.0);
+ assert_eq_m128d(r, e);
+ }
+
+ #[simd_test(enable = "avx512fp16,avx512vl")]
+ unsafe fn test_mm_mask_cvtph_pd() {
+ let src = _mm_set_pd(10.0, 11.0);
+ let a = _mm_set_ph(0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0, 2.0);
+ let r = _mm_mask_cvtph_pd(src, 0b01, a);
+ let e = _mm_set_pd(10.0, 2.0);
+ assert_eq_m128d(r, e);
+ }
+
+ #[simd_test(enable = "avx512fp16,avx512vl")]
+ unsafe fn test_mm_maskz_cvtph_pd() {
+ let a = _mm_set_ph(0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0, 2.0);
+ let r = _mm_maskz_cvtph_pd(0b01, a);
+ let e = _mm_set_pd(0.0, 2.0);
+ assert_eq_m128d(r, e);
+ }
+
+ #[simd_test(enable = "avx512fp16,avx512vl")]
+ unsafe fn test_mm256_cvtph_pd() {
+ let a = _mm_set_ph(0.0, 0.0, 0.0, 0.0, 1.0, 2.0, 3.0, 4.0);
+ let r = _mm256_cvtph_pd(a);
+ let e = _mm256_set_pd(1.0, 2.0, 3.0, 4.0);
+ assert_eq_m256d(r, e);
+ }
+
+ #[simd_test(enable = "avx512fp16,avx512vl")]
+ unsafe fn test_mm256_mask_cvtph_pd() {
+ let src = _mm256_set_pd(10.0, 11.0, 12.0, 13.0);
+ let a = _mm_set_ph(0.0, 0.0, 0.0, 0.0, 1.0, 2.0, 3.0, 4.0);
+ let r = _mm256_mask_cvtph_pd(src, 0b0101, a);
+ let e = _mm256_set_pd(10.0, 2.0, 12.0, 4.0);
+ assert_eq_m256d(r, e);
+ }
+
+ #[simd_test(enable = "avx512fp16,avx512vl")]
+ unsafe fn test_mm256_maskz_cvtph_pd() {
+ let a = _mm_set_ph(0.0, 0.0, 0.0, 0.0, 1.0, 2.0, 3.0, 4.0);
+ let r = _mm256_maskz_cvtph_pd(0b0101, a);
+ let e = _mm256_set_pd(0.0, 2.0, 0.0, 4.0);
+ assert_eq_m256d(r, e);
+ }
+
+ #[simd_test(enable = "avx512fp16")]
+ unsafe fn test_mm512_cvtph_pd() {
+ let a = _mm_set_ph(1.0, 2.0, 3.0, 4.0, 5.0, 6.0, 7.0, 8.0);
+ let r = _mm512_cvtph_pd(a);
+ let e = _mm512_set_pd(1.0, 2.0, 3.0, 4.0, 5.0, 6.0, 7.0, 8.0);
+ assert_eq_m512d(r, e);
+ }
+
+ #[simd_test(enable = "avx512fp16")]
+ unsafe fn test_mm512_mask_cvtph_pd() {
+ let src = _mm512_set_pd(10.0, 11.0, 12.0, 13.0, 14.0, 15.0, 16.0, 17.0);
+ let a = _mm_set_ph(1.0, 2.0, 3.0, 4.0, 5.0, 6.0, 7.0, 8.0);
+ let r = _mm512_mask_cvtph_pd(src, 0b01010101, a);
+ let e = _mm512_set_pd(10.0, 2.0, 12.0, 4.0, 14.0, 6.0, 16.0, 8.0);
+ assert_eq_m512d(r, e);
+ }
+
+ #[simd_test(enable = "avx512fp16")]
+ unsafe fn test_mm512_maskz_cvtph_pd() {
+ let a = _mm_set_ph(1.0, 2.0, 3.0, 4.0, 5.0, 6.0, 7.0, 8.0);
+ let r = _mm512_maskz_cvtph_pd(0b01010101, a);
+ let e = _mm512_set_pd(0.0, 2.0, 0.0, 4.0, 0.0, 6.0, 0.0, 8.0);
+ assert_eq_m512d(r, e);
+ }
+
+ #[simd_test(enable = "avx512fp16")]
+ unsafe fn test_mm512_cvt_roundph_pd() {
+ let a = _mm_set_ph(1.0, 2.0, 3.0, 4.0, 5.0, 6.0, 7.0, 8.0);
+ let r = _mm512_cvt_roundph_pd::<_MM_FROUND_NO_EXC>(a);
+ let e = _mm512_set_pd(1.0, 2.0, 3.0, 4.0, 5.0, 6.0, 7.0, 8.0);
+ assert_eq_m512d(r, e);
+ }
+
+ #[simd_test(enable = "avx512fp16")]
+ unsafe fn test_mm512_mask_cvt_roundph_pd() {
+ let src = _mm512_set_pd(10.0, 11.0, 12.0, 13.0, 14.0, 15.0, 16.0, 17.0);
+ let a = _mm_set_ph(1.0, 2.0, 3.0, 4.0, 5.0, 6.0, 7.0, 8.0);
+ let r = _mm512_mask_cvt_roundph_pd::<_MM_FROUND_NO_EXC>(src, 0b01010101, a);
+ let e = _mm512_set_pd(10.0, 2.0, 12.0, 4.0, 14.0, 6.0, 16.0, 8.0);
+ assert_eq_m512d(r, e);
+ }
+
+ #[simd_test(enable = "avx512fp16")]
+ unsafe fn test_mm512_maskz_cvt_roundph_pd() {
+ let a = _mm_set_ph(1.0, 2.0, 3.0, 4.0, 5.0, 6.0, 7.0, 8.0);
+ let r = _mm512_maskz_cvt_roundph_pd::<_MM_FROUND_NO_EXC>(0b01010101, a);
+ let e = _mm512_set_pd(0.0, 2.0, 0.0, 4.0, 0.0, 6.0, 0.0, 8.0);
+ assert_eq_m512d(r, e);
+ }
+
+ #[simd_test(enable = "avx512fp16")]
+ unsafe fn test_mm_cvtsh_sd() {
+ let a = _mm_setr_pd(2.0, 20.0);
+ let b = _mm_setr_ph(1.0, 10.0, 11.0, 12.0, 13.0, 14.0, 15.0, 16.0);
+ let r = _mm_cvtsh_sd(a, b);
+ let e = _mm_setr_pd(1.0, 20.0);
+ assert_eq_m128d(r, e);
+ }
+
+ #[simd_test(enable = "avx512fp16")]
+ unsafe fn test_mm_mask_cvtsh_sd() {
+ let src = _mm_setr_pd(3.0, 11.0);
+ let a = _mm_setr_pd(2.0, 20.0);
+ let b = _mm_setr_ph(1.0, 10.0, 11.0, 12.0, 13.0, 14.0, 15.0, 16.0);
+ let r = _mm_mask_cvtsh_sd(src, 0, a, b);
+ let e = _mm_setr_pd(3.0, 20.0);
+ assert_eq_m128d(r, e);
+ let r = _mm_mask_cvtsh_sd(src, 1, a, b);
+ let e = _mm_setr_pd(1.0, 20.0);
+ assert_eq_m128d(r, e);
+ }
+
+ #[simd_test(enable = "avx512fp16")]
+ unsafe fn test_mm_maskz_cvtsh_sd() {
+ let a = _mm_setr_pd(2.0, 20.0);
+ let b = _mm_setr_ph(1.0, 10.0, 11.0, 12.0, 13.0, 14.0, 15.0, 16.0);
+ let r = _mm_maskz_cvtsh_sd(0, a, b);
+ let e = _mm_setr_pd(0.0, 20.0);
+ assert_eq_m128d(r, e);
+ let r = _mm_maskz_cvtsh_sd(1, a, b);
+ let e = _mm_setr_pd(1.0, 20.0);
+ assert_eq_m128d(r, e);
+ }
+
+ #[simd_test(enable = "avx512fp16")]
+ unsafe fn test_mm_cvt_roundsh_sd() {
+ let a = _mm_setr_pd(2.0, 20.0);
+ let b = _mm_setr_ph(1.0, 10.0, 11.0, 12.0, 13.0, 14.0, 15.0, 16.0);
+ let r = _mm_cvt_roundsh_sd::<_MM_FROUND_NO_EXC>(a, b);
+ let e = _mm_setr_pd(1.0, 20.0);
+ assert_eq_m128d(r, e);
+ }
+
+ #[simd_test(enable = "avx512fp16")]
+ unsafe fn test_mm_mask_cvt_roundsh_sd() {
+ let src = _mm_setr_pd(3.0, 11.0);
+ let a = _mm_setr_pd(2.0, 20.0);
+ let b = _mm_setr_ph(1.0, 10.0, 11.0, 12.0, 13.0, 14.0, 15.0, 16.0);
+ let r = _mm_mask_cvt_roundsh_sd::<_MM_FROUND_NO_EXC>(src, 0, a, b);
+ let e = _mm_setr_pd(3.0, 20.0);
+ assert_eq_m128d(r, e);
+ let r = _mm_mask_cvt_roundsh_sd::<_MM_FROUND_NO_EXC>(src, 1, a, b);
+ let e = _mm_setr_pd(1.0, 20.0);
+ assert_eq_m128d(r, e);
+ }
+
+ #[simd_test(enable = "avx512fp16")]
+ unsafe fn test_mm_maskz_cvt_roundsh_sd() {
+ let a = _mm_setr_pd(2.0, 20.0);
+ let b = _mm_setr_ph(1.0, 10.0, 11.0, 12.0, 13.0, 14.0, 15.0, 16.0);
+ let r = _mm_maskz_cvt_roundsh_sd::<_MM_FROUND_NO_EXC>(0, a, b);
+ let e = _mm_setr_pd(0.0, 20.0);
+ assert_eq_m128d(r, e);
+ let r = _mm_maskz_cvt_roundsh_sd::<_MM_FROUND_NO_EXC>(1, a, b);
+ let e = _mm_setr_pd(1.0, 20.0);
+ assert_eq_m128d(r, e);
+ }
}
diff --git a/library/stdarch/crates/core_arch/src/x86_64/avx512fp16.rs b/library/stdarch/crates/core_arch/src/x86_64/avx512fp16.rs
index ebd85ed4ad8a..dc216627a6e1 100644
--- a/library/stdarch/crates/core_arch/src/x86_64/avx512fp16.rs
+++ b/library/stdarch/crates/core_arch/src/x86_64/avx512fp16.rs
@@ -74,6 +74,130 @@ pub unsafe fn _mm_cvt_roundu64_sh(a: __m128h, b: u64) -> __
vcvtusi642sh(a, b, ROUNDING)
}
+/// Convert the lower half-precision (16-bit) floating-point element in a to a 64-bit integer, and store
+/// the result in dst.
+///
+/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_cvtsh_i64)
+#[inline]
+#[target_feature(enable = "avx512fp16")]
+#[cfg_attr(test, assert_instr(vcvtsh2si))]
+#[unstable(feature = "stdarch_x86_avx512_f16", issue = "127213")]
+pub unsafe fn _mm_cvtsh_i64(a: __m128h) -> i64 {
+ vcvtsh2si64(a, _MM_FROUND_CUR_DIRECTION)
+}
+
+/// Convert the lower half-precision (16-bit) floating-point element in a to a 64-bit integer, and store
+/// the result in dst.
+///
+/// Rounding is done according to the rounding parameter, which can be one of:
+///
+/// (_MM_FROUND_TO_NEAREST_INT |_MM_FROUND_NO_EXC) // round to nearest, and suppress exceptions
+/// (_MM_FROUND_TO_NEG_INF |_MM_FROUND_NO_EXC) // round down, and suppress exceptions
+/// (_MM_FROUND_TO_POS_INF |_MM_FROUND_NO_EXC) // round up, and suppress exceptions
+/// (_MM_FROUND_TO_ZERO |_MM_FROUND_NO_EXC) // truncate, and suppress exceptions
+/// _MM_FROUND_CUR_DIRECTION
+///
+/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_cvt_roundsh_i64)
+#[inline]
+#[target_feature(enable = "avx512fp16")]
+#[cfg_attr(test, assert_instr(vcvtsh2si, ROUNDING = 8))]
+#[rustc_legacy_const_generics(1)]
+#[unstable(feature = "stdarch_x86_avx512_f16", issue = "127213")]
+pub unsafe fn _mm_cvt_roundsh_i64(a: __m128h) -> i64 {
+ static_assert_rounding!(ROUNDING);
+ vcvtsh2si64(a, ROUNDING)
+}
+
+/// Convert the lower half-precision (16-bit) floating-point element in a to a 64-bit unsigned integer, and store
+/// the result in dst.
+///
+/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_cvtsh_u64)
+#[inline]
+#[target_feature(enable = "avx512fp16")]
+#[cfg_attr(test, assert_instr(vcvtsh2usi))]
+#[unstable(feature = "stdarch_x86_avx512_f16", issue = "127213")]
+pub unsafe fn _mm_cvtsh_u64(a: __m128h) -> u64 {
+ vcvtsh2usi64(a, _MM_FROUND_CUR_DIRECTION)
+}
+
+/// Convert the lower half-precision (16-bit) floating-point element in a to a 64-bit unsigned integer, and store
+/// the result in dst.
+///
+/// Rounding is done according to the rounding parameter, which can be one of:
+///
+/// (_MM_FROUND_TO_NEAREST_INT |_MM_FROUND_NO_EXC) // round to nearest, and suppress exceptions
+/// (_MM_FROUND_TO_NEG_INF |_MM_FROUND_NO_EXC) // round down, and suppress exceptions
+/// (_MM_FROUND_TO_POS_INF |_MM_FROUND_NO_EXC) // round up, and suppress exceptions
+/// (_MM_FROUND_TO_ZERO |_MM_FROUND_NO_EXC) // truncate, and suppress exceptions
+/// _MM_FROUND_CUR_DIRECTION
+///
+/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_cvt_roundsh_u64)
+#[inline]
+#[target_feature(enable = "avx512fp16")]
+#[cfg_attr(test, assert_instr(vcvtsh2usi, ROUNDING = 8))]
+#[rustc_legacy_const_generics(1)]
+#[unstable(feature = "stdarch_x86_avx512_f16", issue = "127213")]
+pub unsafe fn _mm_cvt_roundsh_u64(a: __m128h) -> u64 {
+ static_assert_rounding!(ROUNDING);
+ vcvtsh2usi64(a, ROUNDING)
+}
+
+/// Convert the lower half-precision (16-bit) floating-point element in a to a 64-bit integer with truncation,
+/// and store the result in dst.
+///
+/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_cvttsh_i64)
+#[inline]
+#[target_feature(enable = "avx512fp16")]
+#[cfg_attr(test, assert_instr(vcvttsh2si))]
+#[unstable(feature = "stdarch_x86_avx512_f16", issue = "127213")]
+pub unsafe fn _mm_cvttsh_i64(a: __m128h) -> i64 {
+ vcvttsh2si64(a, _MM_FROUND_CUR_DIRECTION)
+}
+
+/// Convert the lower half-precision (16-bit) floating-point element in a to a 64-bit integer with truncation,
+/// and store the result in dst.
+///
+/// Exceptions can be suppressed by passing _MM_FROUND_NO_EXC in the sae parameter.
+///
+/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_cvtt_roundsh_i64)
+#[inline]
+#[target_feature(enable = "avx512fp16")]
+#[cfg_attr(test, assert_instr(vcvttsh2si, SAE = 8))]
+#[rustc_legacy_const_generics(1)]
+#[unstable(feature = "stdarch_x86_avx512_f16", issue = "127213")]
+pub unsafe fn _mm_cvtt_roundsh_i64(a: __m128h) -> i64 {
+ static_assert_sae!(SAE);
+ vcvttsh2si64(a, SAE)
+}
+
+/// Convert the lower half-precision (16-bit) floating-point element in a to a 64-bit unsigned integer with truncation,
+/// and store the result in dst.
+///
+/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_cvttsh_u64)
+#[inline]
+#[target_feature(enable = "avx512fp16")]
+#[cfg_attr(test, assert_instr(vcvttsh2usi))]
+#[unstable(feature = "stdarch_x86_avx512_f16", issue = "127213")]
+pub unsafe fn _mm_cvttsh_u64(a: __m128h) -> u64 {
+ vcvttsh2usi64(a, _MM_FROUND_CUR_DIRECTION)
+}
+
+/// Convert the lower half-precision (16-bit) floating-point element in a to a 64-bit unsigned integer with truncation,
+/// and store the result in dst.
+///
+/// Exceptions can be suppressed by passing _MM_FROUND_NO_EXC in the sae parameter.
+///
+/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_cvtt_roundsh_u64)
+#[inline]
+#[target_feature(enable = "avx512fp16")]
+#[cfg_attr(test, assert_instr(vcvttsh2usi, SAE = 8))]
+#[rustc_legacy_const_generics(1)]
+#[unstable(feature = "stdarch_x86_avx512_f16", issue = "127213")]
+pub unsafe fn _mm_cvtt_roundsh_u64(a: __m128h) -> u64 {
+ static_assert_sae!(SAE);
+ vcvttsh2usi64(a, SAE)
+}
+
#[allow(improper_ctypes)]
extern "C" {
#[link_name = "llvm.x86.avx512fp16.vcvtsi642sh"]
@@ -126,4 +250,60 @@ mod tests {
let e = _mm_setr_ph(10.0, 2.0, 3.0, 4.0, 5.0, 6.0, 7.0, 8.0);
assert_eq_m128h(r, e);
}
+
+ #[simd_test(enable = "avx512fp16")]
+ unsafe fn test_mm_cvtsh_i64() {
+ let a = _mm_setr_ph(1.0, 2.0, 3.0, 4.0, 5.0, 6.0, 7.0, 8.0);
+ let r = _mm_cvtsh_i64(a);
+ assert_eq!(r, 1);
+ }
+
+ #[simd_test(enable = "avx512fp16")]
+ unsafe fn test_mm_cvt_roundsh_i64() {
+ let a = _mm_setr_ph(1.0, 2.0, 3.0, 4.0, 5.0, 6.0, 7.0, 8.0);
+ let r = _mm_cvt_roundsh_i64::<{ _MM_FROUND_TO_NEAREST_INT | _MM_FROUND_NO_EXC }>(a);
+ assert_eq!(r, 1);
+ }
+
+ #[simd_test(enable = "avx512fp16")]
+ unsafe fn test_mm_cvtsh_u64() {
+ let a = _mm_setr_ph(1.0, 2.0, 3.0, 4.0, 5.0, 6.0, 7.0, 8.0);
+ let r = _mm_cvtsh_u64(a);
+ assert_eq!(r, 1);
+ }
+
+ #[simd_test(enable = "avx512fp16")]
+ unsafe fn test_mm_cvt_roundsh_u64() {
+ let a = _mm_setr_ph(1.0, 2.0, 3.0, 4.0, 5.0, 6.0, 7.0, 8.0);
+ let r = _mm_cvt_roundsh_u64::<{ _MM_FROUND_TO_NEAREST_INT | _MM_FROUND_NO_EXC }>(a);
+ assert_eq!(r, 1);
+ }
+
+ #[simd_test(enable = "avx512fp16")]
+ unsafe fn test_mm_cvttsh_i64() {
+ let a = _mm_setr_ph(1.0, 2.0, 3.0, 4.0, 5.0, 6.0, 7.0, 8.0);
+ let r = _mm_cvttsh_i64(a);
+ assert_eq!(r, 1);
+ }
+
+ #[simd_test(enable = "avx512fp16")]
+ unsafe fn test_mm_cvtt_roundsh_i64() {
+ let a = _mm_setr_ph(1.0, 2.0, 3.0, 4.0, 5.0, 6.0, 7.0, 8.0);
+ let r = _mm_cvtt_roundsh_i64::<{ _MM_FROUND_TO_NEAREST_INT | _MM_FROUND_NO_EXC }>(a);
+ assert_eq!(r, 1);
+ }
+
+ #[simd_test(enable = "avx512fp16")]
+ unsafe fn test_mm_cvttsh_u64() {
+ let a = _mm_setr_ph(1.0, 2.0, 3.0, 4.0, 5.0, 6.0, 7.0, 8.0);
+ let r = _mm_cvttsh_u64(a);
+ assert_eq!(r, 1);
+ }
+
+ #[simd_test(enable = "avx512fp16")]
+ unsafe fn test_mm_cvtt_roundsh_u64() {
+ let a = _mm_setr_ph(1.0, 2.0, 3.0, 4.0, 5.0, 6.0, 7.0, 8.0);
+ let r = _mm_cvtt_roundsh_u64::<_MM_FROUND_NO_EXC>(a);
+ assert_eq!(r, 1);
+ }
}