mark AVX512 & AVXNECONVERT intrinsics as safe

Mark all AVX512 & AVXNECONVERT SIMD-computing intrinsics as safe, except for those involving memory operations.
This commit is contained in:
usamoi 2025-01-28 00:00:50 +08:00 committed by Amanieu d'Antras
parent 2348f153ae
commit f53c07b3ff
16 changed files with 21793 additions and 18447 deletions

View file

@ -37,8 +37,8 @@ unsafe extern "C" {
#[target_feature(enable = "avx512bf16,avx512vl")]
#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
#[cfg_attr(test, assert_instr("vcvtne2ps2bf16"))]
pub unsafe fn _mm_cvtne2ps_pbh(a: __m128, b: __m128) -> __m128bh {
transmute(cvtne2ps2bf16(a.as_f32x4(), b.as_f32x4()))
pub fn _mm_cvtne2ps_pbh(a: __m128, b: __m128) -> __m128bh {
unsafe { transmute(cvtne2ps2bf16(a.as_f32x4(), b.as_f32x4())) }
}
/// Convert packed single-precision (32-bit) floating-point elements in two vectors
@ -50,9 +50,11 @@ pub unsafe fn _mm_cvtne2ps_pbh(a: __m128, b: __m128) -> __m128bh {
#[target_feature(enable = "avx512bf16,avx512vl")]
#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
#[cfg_attr(test, assert_instr("vcvtne2ps2bf16"))]
pub unsafe fn _mm_mask_cvtne2ps_pbh(src: __m128bh, k: __mmask8, a: __m128, b: __m128) -> __m128bh {
let cvt = _mm_cvtne2ps_pbh(a, b).as_u16x8();
transmute(simd_select_bitmask(k, cvt, src.as_u16x8()))
pub fn _mm_mask_cvtne2ps_pbh(src: __m128bh, k: __mmask8, a: __m128, b: __m128) -> __m128bh {
unsafe {
let cvt = _mm_cvtne2ps_pbh(a, b).as_u16x8();
transmute(simd_select_bitmask(k, cvt, src.as_u16x8()))
}
}
/// Convert packed single-precision (32-bit) floating-point elements in two vectors
@ -64,9 +66,11 @@ pub unsafe fn _mm_mask_cvtne2ps_pbh(src: __m128bh, k: __mmask8, a: __m128, b: __
#[target_feature(enable = "avx512bf16,avx512vl")]
#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
#[cfg_attr(test, assert_instr("vcvtne2ps2bf16"))]
pub unsafe fn _mm_maskz_cvtne2ps_pbh(k: __mmask8, a: __m128, b: __m128) -> __m128bh {
let cvt = _mm_cvtne2ps_pbh(a, b).as_u16x8();
transmute(simd_select_bitmask(k, cvt, u16x8::ZERO))
pub fn _mm_maskz_cvtne2ps_pbh(k: __mmask8, a: __m128, b: __m128) -> __m128bh {
unsafe {
let cvt = _mm_cvtne2ps_pbh(a, b).as_u16x8();
transmute(simd_select_bitmask(k, cvt, u16x8::ZERO))
}
}
/// Convert packed single-precision (32-bit) floating-point elements in two 256-bit vectors
@ -77,8 +81,8 @@ pub unsafe fn _mm_maskz_cvtne2ps_pbh(k: __mmask8, a: __m128, b: __m128) -> __m12
#[target_feature(enable = "avx512bf16,avx512vl")]
#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
#[cfg_attr(test, assert_instr("vcvtne2ps2bf16"))]
pub unsafe fn _mm256_cvtne2ps_pbh(a: __m256, b: __m256) -> __m256bh {
transmute(cvtne2ps2bf16_256(a.as_f32x8(), b.as_f32x8()))
pub fn _mm256_cvtne2ps_pbh(a: __m256, b: __m256) -> __m256bh {
unsafe { transmute(cvtne2ps2bf16_256(a.as_f32x8(), b.as_f32x8())) }
}
/// Convert packed single-precision (32-bit) floating-point elements in two vectors a and b
@ -89,14 +93,11 @@ pub unsafe fn _mm256_cvtne2ps_pbh(a: __m256, b: __m256) -> __m256bh {
#[target_feature(enable = "avx512bf16,avx512vl")]
#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
#[cfg_attr(test, assert_instr("vcvtne2ps2bf16"))]
pub unsafe fn _mm256_mask_cvtne2ps_pbh(
src: __m256bh,
k: __mmask16,
a: __m256,
b: __m256,
) -> __m256bh {
let cvt = _mm256_cvtne2ps_pbh(a, b).as_u16x16();
transmute(simd_select_bitmask(k, cvt, src.as_u16x16()))
pub fn _mm256_mask_cvtne2ps_pbh(src: __m256bh, k: __mmask16, a: __m256, b: __m256) -> __m256bh {
unsafe {
let cvt = _mm256_cvtne2ps_pbh(a, b).as_u16x16();
transmute(simd_select_bitmask(k, cvt, src.as_u16x16()))
}
}
/// Convert packed single-precision (32-bit) floating-point elements in two vectors a and b
@ -107,9 +108,11 @@ pub unsafe fn _mm256_mask_cvtne2ps_pbh(
#[target_feature(enable = "avx512bf16,avx512vl")]
#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
#[cfg_attr(test, assert_instr("vcvtne2ps2bf16"))]
pub unsafe fn _mm256_maskz_cvtne2ps_pbh(k: __mmask16, a: __m256, b: __m256) -> __m256bh {
let cvt = _mm256_cvtne2ps_pbh(a, b).as_u16x16();
transmute(simd_select_bitmask(k, cvt, u16x16::ZERO))
pub fn _mm256_maskz_cvtne2ps_pbh(k: __mmask16, a: __m256, b: __m256) -> __m256bh {
unsafe {
let cvt = _mm256_cvtne2ps_pbh(a, b).as_u16x16();
transmute(simd_select_bitmask(k, cvt, u16x16::ZERO))
}
}
/// Convert packed single-precision (32-bit) floating-point elements in two 512-bit vectors
@ -120,8 +123,8 @@ pub unsafe fn _mm256_maskz_cvtne2ps_pbh(k: __mmask16, a: __m256, b: __m256) -> _
#[target_feature(enable = "avx512bf16,avx512f")]
#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
#[cfg_attr(test, assert_instr("vcvtne2ps2bf16"))]
pub unsafe fn _mm512_cvtne2ps_pbh(a: __m512, b: __m512) -> __m512bh {
transmute(cvtne2ps2bf16_512(a.as_f32x16(), b.as_f32x16()))
pub fn _mm512_cvtne2ps_pbh(a: __m512, b: __m512) -> __m512bh {
unsafe { transmute(cvtne2ps2bf16_512(a.as_f32x16(), b.as_f32x16())) }
}
/// Convert packed single-precision (32-bit) floating-point elements in two vectors
@ -133,14 +136,11 @@ pub unsafe fn _mm512_cvtne2ps_pbh(a: __m512, b: __m512) -> __m512bh {
#[target_feature(enable = "avx512bf16,avx512f")]
#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
#[cfg_attr(test, assert_instr("vcvtne2ps2bf16"))]
pub unsafe fn _mm512_mask_cvtne2ps_pbh(
src: __m512bh,
k: __mmask32,
a: __m512,
b: __m512,
) -> __m512bh {
let cvt = _mm512_cvtne2ps_pbh(a, b).as_u16x32();
transmute(simd_select_bitmask(k, cvt, src.as_u16x32()))
pub fn _mm512_mask_cvtne2ps_pbh(src: __m512bh, k: __mmask32, a: __m512, b: __m512) -> __m512bh {
unsafe {
let cvt = _mm512_cvtne2ps_pbh(a, b).as_u16x32();
transmute(simd_select_bitmask(k, cvt, src.as_u16x32()))
}
}
/// Convert packed single-precision (32-bit) floating-point elements in two vectors
@ -152,9 +152,11 @@ pub unsafe fn _mm512_mask_cvtne2ps_pbh(
#[target_feature(enable = "avx512bf16,avx512f")]
#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
#[cfg_attr(test, assert_instr("vcvtne2ps2bf16"))]
pub unsafe fn _mm512_maskz_cvtne2ps_pbh(k: __mmask32, a: __m512, b: __m512) -> __m512bh {
let cvt = _mm512_cvtne2ps_pbh(a, b).as_u16x32();
transmute(simd_select_bitmask(k, cvt, u16x32::ZERO))
pub fn _mm512_maskz_cvtne2ps_pbh(k: __mmask32, a: __m512, b: __m512) -> __m512bh {
unsafe {
let cvt = _mm512_cvtne2ps_pbh(a, b).as_u16x32();
transmute(simd_select_bitmask(k, cvt, u16x32::ZERO))
}
}
/// Convert packed single-precision (32-bit) floating-point elements in a to packed BF16 (16-bit)
@ -164,8 +166,8 @@ pub unsafe fn _mm512_maskz_cvtne2ps_pbh(k: __mmask32, a: __m512, b: __m512) -> _
#[target_feature(enable = "avx512bf16,avx512vl")]
#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
#[cfg_attr(test, assert_instr("vcvtneps2bf16"))]
pub unsafe fn _mm256_cvtneps_pbh(a: __m256) -> __m128bh {
transmute(cvtneps2bf16_256(a.as_f32x8()))
pub fn _mm256_cvtneps_pbh(a: __m256) -> __m128bh {
unsafe { transmute(cvtneps2bf16_256(a.as_f32x8())) }
}
/// Convert packed single-precision (32-bit) floating-point elements in a to packed BF16 (16-bit)
@ -176,9 +178,11 @@ pub unsafe fn _mm256_cvtneps_pbh(a: __m256) -> __m128bh {
#[target_feature(enable = "avx512bf16,avx512vl")]
#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
#[cfg_attr(test, assert_instr("vcvtneps2bf16"))]
pub unsafe fn _mm256_mask_cvtneps_pbh(src: __m128bh, k: __mmask8, a: __m256) -> __m128bh {
let cvt = _mm256_cvtneps_pbh(a).as_u16x8();
transmute(simd_select_bitmask(k, cvt, src.as_u16x8()))
pub fn _mm256_mask_cvtneps_pbh(src: __m128bh, k: __mmask8, a: __m256) -> __m128bh {
unsafe {
let cvt = _mm256_cvtneps_pbh(a).as_u16x8();
transmute(simd_select_bitmask(k, cvt, src.as_u16x8()))
}
}
/// Convert packed single-precision (32-bit) floating-point elements in a to packed BF16 (16-bit)
@ -189,9 +193,11 @@ pub unsafe fn _mm256_mask_cvtneps_pbh(src: __m128bh, k: __mmask8, a: __m256) ->
#[target_feature(enable = "avx512bf16,avx512vl")]
#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
#[cfg_attr(test, assert_instr("vcvtneps2bf16"))]
pub unsafe fn _mm256_maskz_cvtneps_pbh(k: __mmask8, a: __m256) -> __m128bh {
let cvt = _mm256_cvtneps_pbh(a).as_u16x8();
transmute(simd_select_bitmask(k, cvt, u16x8::ZERO))
pub fn _mm256_maskz_cvtneps_pbh(k: __mmask8, a: __m256) -> __m128bh {
unsafe {
let cvt = _mm256_cvtneps_pbh(a).as_u16x8();
transmute(simd_select_bitmask(k, cvt, u16x8::ZERO))
}
}
/// Convert packed single-precision (32-bit) floating-point elements in a to packed BF16 (16-bit)
@ -201,8 +207,8 @@ pub unsafe fn _mm256_maskz_cvtneps_pbh(k: __mmask8, a: __m256) -> __m128bh {
#[target_feature(enable = "avx512bf16,avx512f")]
#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
#[cfg_attr(test, assert_instr("vcvtneps2bf16"))]
pub unsafe fn _mm512_cvtneps_pbh(a: __m512) -> __m256bh {
transmute(cvtneps2bf16_512(a.as_f32x16()))
pub fn _mm512_cvtneps_pbh(a: __m512) -> __m256bh {
unsafe { transmute(cvtneps2bf16_512(a.as_f32x16())) }
}
/// Convert packed single-precision (32-bit) floating-point elements in a to packed BF16 (16-bit)
@ -213,9 +219,11 @@ pub unsafe fn _mm512_cvtneps_pbh(a: __m512) -> __m256bh {
#[target_feature(enable = "avx512bf16,avx512f")]
#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
#[cfg_attr(test, assert_instr("vcvtneps2bf16"))]
pub unsafe fn _mm512_mask_cvtneps_pbh(src: __m256bh, k: __mmask16, a: __m512) -> __m256bh {
let cvt = _mm512_cvtneps_pbh(a).as_u16x16();
transmute(simd_select_bitmask(k, cvt, src.as_u16x16()))
pub fn _mm512_mask_cvtneps_pbh(src: __m256bh, k: __mmask16, a: __m512) -> __m256bh {
unsafe {
let cvt = _mm512_cvtneps_pbh(a).as_u16x16();
transmute(simd_select_bitmask(k, cvt, src.as_u16x16()))
}
}
/// Convert packed single-precision (32-bit) floating-point elements in a to packed BF16 (16-bit)
@ -226,9 +234,11 @@ pub unsafe fn _mm512_mask_cvtneps_pbh(src: __m256bh, k: __mmask16, a: __m512) ->
#[target_feature(enable = "avx512bf16,avx512f")]
#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
#[cfg_attr(test, assert_instr("vcvtneps2bf16"))]
pub unsafe fn _mm512_maskz_cvtneps_pbh(k: __mmask16, a: __m512) -> __m256bh {
let cvt = _mm512_cvtneps_pbh(a).as_u16x16();
transmute(simd_select_bitmask(k, cvt, u16x16::ZERO))
pub fn _mm512_maskz_cvtneps_pbh(k: __mmask16, a: __m512) -> __m256bh {
unsafe {
let cvt = _mm512_cvtneps_pbh(a).as_u16x16();
transmute(simd_select_bitmask(k, cvt, u16x16::ZERO))
}
}
/// Compute dot-product of BF16 (16-bit) floating-point pairs in a and b,
@ -239,8 +249,8 @@ pub unsafe fn _mm512_maskz_cvtneps_pbh(k: __mmask16, a: __m512) -> __m256bh {
#[target_feature(enable = "avx512bf16,avx512vl")]
#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
#[cfg_attr(test, assert_instr("vdpbf16ps"))]
pub unsafe fn _mm_dpbf16_ps(src: __m128, a: __m128bh, b: __m128bh) -> __m128 {
transmute(dpbf16ps(src.as_f32x4(), a.as_i32x4(), b.as_i32x4()))
pub fn _mm_dpbf16_ps(src: __m128, a: __m128bh, b: __m128bh) -> __m128 {
unsafe { transmute(dpbf16ps(src.as_f32x4(), a.as_i32x4(), b.as_i32x4())) }
}
/// Compute dot-product of BF16 (16-bit) floating-point pairs in a and b,
@ -252,9 +262,11 @@ pub unsafe fn _mm_dpbf16_ps(src: __m128, a: __m128bh, b: __m128bh) -> __m128 {
#[target_feature(enable = "avx512bf16,avx512vl")]
#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
#[cfg_attr(test, assert_instr("vdpbf16ps"))]
pub unsafe fn _mm_mask_dpbf16_ps(src: __m128, k: __mmask8, a: __m128bh, b: __m128bh) -> __m128 {
let rst = _mm_dpbf16_ps(src, a, b).as_f32x4();
transmute(simd_select_bitmask(k, rst, src.as_f32x4()))
pub fn _mm_mask_dpbf16_ps(src: __m128, k: __mmask8, a: __m128bh, b: __m128bh) -> __m128 {
unsafe {
let rst = _mm_dpbf16_ps(src, a, b).as_f32x4();
transmute(simd_select_bitmask(k, rst, src.as_f32x4()))
}
}
/// Compute dot-product of BF16 (16-bit) floating-point pairs in a and b,
@ -266,10 +278,12 @@ pub unsafe fn _mm_mask_dpbf16_ps(src: __m128, k: __mmask8, a: __m128bh, b: __m12
#[target_feature(enable = "avx512bf16,avx512vl")]
#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
#[cfg_attr(test, assert_instr("vdpbf16ps"))]
pub unsafe fn _mm_maskz_dpbf16_ps(k: __mmask8, src: __m128, a: __m128bh, b: __m128bh) -> __m128 {
let rst = _mm_dpbf16_ps(src, a, b).as_f32x4();
let zero = _mm_set1_ps(0.0_f32).as_f32x4();
transmute(simd_select_bitmask(k, rst, zero))
pub fn _mm_maskz_dpbf16_ps(k: __mmask8, src: __m128, a: __m128bh, b: __m128bh) -> __m128 {
unsafe {
let rst = _mm_dpbf16_ps(src, a, b).as_f32x4();
let zero = _mm_set1_ps(0.0_f32).as_f32x4();
transmute(simd_select_bitmask(k, rst, zero))
}
}
/// Compute dot-product of BF16 (16-bit) floating-point pairs in a and b,
@ -280,8 +294,8 @@ pub unsafe fn _mm_maskz_dpbf16_ps(k: __mmask8, src: __m128, a: __m128bh, b: __m1
#[target_feature(enable = "avx512bf16,avx512vl")]
#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
#[cfg_attr(test, assert_instr("vdpbf16ps"))]
pub unsafe fn _mm256_dpbf16_ps(src: __m256, a: __m256bh, b: __m256bh) -> __m256 {
transmute(dpbf16ps_256(src.as_f32x8(), a.as_i32x8(), b.as_i32x8()))
pub fn _mm256_dpbf16_ps(src: __m256, a: __m256bh, b: __m256bh) -> __m256 {
unsafe { transmute(dpbf16ps_256(src.as_f32x8(), a.as_i32x8(), b.as_i32x8())) }
}
/// Compute dot-product of BF16 (16-bit) floating-point pairs in a and b,
@ -293,9 +307,11 @@ pub unsafe fn _mm256_dpbf16_ps(src: __m256, a: __m256bh, b: __m256bh) -> __m256
#[target_feature(enable = "avx512bf16,avx512vl")]
#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
#[cfg_attr(test, assert_instr("vdpbf16ps"))]
pub unsafe fn _mm256_mask_dpbf16_ps(src: __m256, k: __mmask8, a: __m256bh, b: __m256bh) -> __m256 {
let rst = _mm256_dpbf16_ps(src, a, b).as_f32x8();
transmute(simd_select_bitmask(k, rst, src.as_f32x8()))
pub fn _mm256_mask_dpbf16_ps(src: __m256, k: __mmask8, a: __m256bh, b: __m256bh) -> __m256 {
unsafe {
let rst = _mm256_dpbf16_ps(src, a, b).as_f32x8();
transmute(simd_select_bitmask(k, rst, src.as_f32x8()))
}
}
/// Compute dot-product of BF16 (16-bit) floating-point pairs in a and b,
@ -307,9 +323,11 @@ pub unsafe fn _mm256_mask_dpbf16_ps(src: __m256, k: __mmask8, a: __m256bh, b: __
#[target_feature(enable = "avx512bf16,avx512vl")]
#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
#[cfg_attr(test, assert_instr("vdpbf16ps"))]
pub unsafe fn _mm256_maskz_dpbf16_ps(k: __mmask8, src: __m256, a: __m256bh, b: __m256bh) -> __m256 {
let rst = _mm256_dpbf16_ps(src, a, b).as_f32x8();
transmute(simd_select_bitmask(k, rst, f32x8::ZERO))
pub fn _mm256_maskz_dpbf16_ps(k: __mmask8, src: __m256, a: __m256bh, b: __m256bh) -> __m256 {
unsafe {
let rst = _mm256_dpbf16_ps(src, a, b).as_f32x8();
transmute(simd_select_bitmask(k, rst, f32x8::ZERO))
}
}
/// Compute dot-product of BF16 (16-bit) floating-point pairs in a and b,
@ -322,8 +340,8 @@ pub unsafe fn _mm256_maskz_dpbf16_ps(k: __mmask8, src: __m256, a: __m256bh, b: _
#[target_feature(enable = "avx512bf16,avx512f")]
#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
#[cfg_attr(test, assert_instr("vdpbf16ps"))]
pub unsafe fn _mm512_dpbf16_ps(src: __m512, a: __m512bh, b: __m512bh) -> __m512 {
transmute(dpbf16ps_512(src.as_f32x16(), a.as_i32x16(), b.as_i32x16()))
pub fn _mm512_dpbf16_ps(src: __m512, a: __m512bh, b: __m512bh) -> __m512 {
unsafe { transmute(dpbf16ps_512(src.as_f32x16(), a.as_i32x16(), b.as_i32x16())) }
}
/// Compute dot-product of BF16 (16-bit) floating-point pairs in a and b,
@ -335,9 +353,11 @@ pub unsafe fn _mm512_dpbf16_ps(src: __m512, a: __m512bh, b: __m512bh) -> __m512
#[target_feature(enable = "avx512bf16,avx512f")]
#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
#[cfg_attr(test, assert_instr("vdpbf16ps"))]
pub unsafe fn _mm512_mask_dpbf16_ps(src: __m512, k: __mmask16, a: __m512bh, b: __m512bh) -> __m512 {
let rst = _mm512_dpbf16_ps(src, a, b).as_f32x16();
transmute(simd_select_bitmask(k, rst, src.as_f32x16()))
pub fn _mm512_mask_dpbf16_ps(src: __m512, k: __mmask16, a: __m512bh, b: __m512bh) -> __m512 {
unsafe {
let rst = _mm512_dpbf16_ps(src, a, b).as_f32x16();
transmute(simd_select_bitmask(k, rst, src.as_f32x16()))
}
}
/// Compute dot-product of BF16 (16-bit) floating-point pairs in a and b,
@ -349,14 +369,11 @@ pub unsafe fn _mm512_mask_dpbf16_ps(src: __m512, k: __mmask16, a: __m512bh, b: _
#[target_feature(enable = "avx512bf16,avx512f")]
#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
#[cfg_attr(test, assert_instr("vdpbf16ps"))]
pub unsafe fn _mm512_maskz_dpbf16_ps(
k: __mmask16,
src: __m512,
a: __m512bh,
b: __m512bh,
) -> __m512 {
let rst = _mm512_dpbf16_ps(src, a, b).as_f32x16();
transmute(simd_select_bitmask(k, rst, f32x16::ZERO))
pub fn _mm512_maskz_dpbf16_ps(k: __mmask16, src: __m512, a: __m512bh, b: __m512bh) -> __m512 {
unsafe {
let rst = _mm512_dpbf16_ps(src, a, b).as_f32x16();
transmute(simd_select_bitmask(k, rst, f32x16::ZERO))
}
}
/// Converts packed BF16 (16-bit) floating-point elements in a to packed single-precision (32-bit)
@ -366,8 +383,8 @@ pub unsafe fn _mm512_maskz_dpbf16_ps(
#[inline]
#[target_feature(enable = "avx512bf16,avx512f")]
#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
pub unsafe fn _mm512_cvtpbh_ps(a: __m256bh) -> __m512 {
_mm512_castsi512_ps(_mm512_slli_epi32::<16>(_mm512_cvtepi16_epi32(transmute(a))))
pub fn _mm512_cvtpbh_ps(a: __m256bh) -> __m512 {
unsafe { _mm512_castsi512_ps(_mm512_slli_epi32::<16>(_mm512_cvtepi16_epi32(transmute(a)))) }
}
/// Converts packed BF16 (16-bit) floating-point elements in a to packed single-precision (32-bit)
@ -378,9 +395,11 @@ pub unsafe fn _mm512_cvtpbh_ps(a: __m256bh) -> __m512 {
#[inline]
#[target_feature(enable = "avx512bf16,avx512f")]
#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
pub unsafe fn _mm512_mask_cvtpbh_ps(src: __m512, k: __mmask16, a: __m256bh) -> __m512 {
let cvt = _mm512_cvtpbh_ps(a);
transmute(simd_select_bitmask(k, cvt.as_f32x16(), src.as_f32x16()))
pub fn _mm512_mask_cvtpbh_ps(src: __m512, k: __mmask16, a: __m256bh) -> __m512 {
unsafe {
let cvt = _mm512_cvtpbh_ps(a);
transmute(simd_select_bitmask(k, cvt.as_f32x16(), src.as_f32x16()))
}
}
/// Converts packed BF16 (16-bit) floating-point elements in a to packed single-precision (32-bit)
@ -391,9 +410,11 @@ pub unsafe fn _mm512_mask_cvtpbh_ps(src: __m512, k: __mmask16, a: __m256bh) -> _
#[inline]
#[target_feature(enable = "avx512bf16,avx512f")]
#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
pub unsafe fn _mm512_maskz_cvtpbh_ps(k: __mmask16, a: __m256bh) -> __m512 {
let cvt = _mm512_cvtpbh_ps(a);
transmute(simd_select_bitmask(k, cvt.as_f32x16(), f32x16::ZERO))
pub fn _mm512_maskz_cvtpbh_ps(k: __mmask16, a: __m256bh) -> __m512 {
unsafe {
let cvt = _mm512_cvtpbh_ps(a);
transmute(simd_select_bitmask(k, cvt.as_f32x16(), f32x16::ZERO))
}
}
/// Converts packed BF16 (16-bit) floating-point elements in a to packed single-precision (32-bit)
@ -403,8 +424,8 @@ pub unsafe fn _mm512_maskz_cvtpbh_ps(k: __mmask16, a: __m256bh) -> __m512 {
#[inline]
#[target_feature(enable = "avx512bf16,avx512vl")]
#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
pub unsafe fn _mm256_cvtpbh_ps(a: __m128bh) -> __m256 {
_mm256_castsi256_ps(_mm256_slli_epi32::<16>(_mm256_cvtepi16_epi32(transmute(a))))
pub fn _mm256_cvtpbh_ps(a: __m128bh) -> __m256 {
unsafe { _mm256_castsi256_ps(_mm256_slli_epi32::<16>(_mm256_cvtepi16_epi32(transmute(a)))) }
}
/// Converts packed BF16 (16-bit) floating-point elements in a to packed single-precision (32-bit)
@ -415,9 +436,11 @@ pub unsafe fn _mm256_cvtpbh_ps(a: __m128bh) -> __m256 {
#[inline]
#[target_feature(enable = "avx512bf16,avx512vl")]
#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
pub unsafe fn _mm256_mask_cvtpbh_ps(src: __m256, k: __mmask8, a: __m128bh) -> __m256 {
let cvt = _mm256_cvtpbh_ps(a);
transmute(simd_select_bitmask(k, cvt.as_f32x8(), src.as_f32x8()))
pub fn _mm256_mask_cvtpbh_ps(src: __m256, k: __mmask8, a: __m128bh) -> __m256 {
unsafe {
let cvt = _mm256_cvtpbh_ps(a);
transmute(simd_select_bitmask(k, cvt.as_f32x8(), src.as_f32x8()))
}
}
/// Converts packed BF16 (16-bit) floating-point elements in a to packed single-precision (32-bit)
@ -428,9 +451,11 @@ pub unsafe fn _mm256_mask_cvtpbh_ps(src: __m256, k: __mmask8, a: __m128bh) -> __
#[inline]
#[target_feature(enable = "avx512bf16,avx512vl")]
#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
pub unsafe fn _mm256_maskz_cvtpbh_ps(k: __mmask8, a: __m128bh) -> __m256 {
let cvt = _mm256_cvtpbh_ps(a);
transmute(simd_select_bitmask(k, cvt.as_f32x8(), f32x8::ZERO))
pub fn _mm256_maskz_cvtpbh_ps(k: __mmask8, a: __m128bh) -> __m256 {
unsafe {
let cvt = _mm256_cvtpbh_ps(a);
transmute(simd_select_bitmask(k, cvt.as_f32x8(), f32x8::ZERO))
}
}
/// Converts packed BF16 (16-bit) floating-point elements in a to single-precision (32-bit) floating-point
@ -440,8 +465,8 @@ pub unsafe fn _mm256_maskz_cvtpbh_ps(k: __mmask8, a: __m128bh) -> __m256 {
#[inline]
#[target_feature(enable = "avx512bf16,avx512vl")]
#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
pub unsafe fn _mm_cvtpbh_ps(a: __m128bh) -> __m128 {
_mm_castsi128_ps(_mm_slli_epi32::<16>(_mm_cvtepi16_epi32(transmute(a))))
pub fn _mm_cvtpbh_ps(a: __m128bh) -> __m128 {
unsafe { _mm_castsi128_ps(_mm_slli_epi32::<16>(_mm_cvtepi16_epi32(transmute(a)))) }
}
/// Converts packed BF16 (16-bit) floating-point elements in a to single-precision (32-bit) floating-point
@ -452,9 +477,11 @@ pub unsafe fn _mm_cvtpbh_ps(a: __m128bh) -> __m128 {
#[inline]
#[target_feature(enable = "avx512bf16,avx512vl")]
#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
pub unsafe fn _mm_mask_cvtpbh_ps(src: __m128, k: __mmask8, a: __m128bh) -> __m128 {
let cvt = _mm_cvtpbh_ps(a);
transmute(simd_select_bitmask(k, cvt.as_f32x4(), src.as_f32x4()))
pub fn _mm_mask_cvtpbh_ps(src: __m128, k: __mmask8, a: __m128bh) -> __m128 {
unsafe {
let cvt = _mm_cvtpbh_ps(a);
transmute(simd_select_bitmask(k, cvt.as_f32x4(), src.as_f32x4()))
}
}
/// Converts packed BF16 (16-bit) floating-point elements in a to single-precision (32-bit) floating-point
@ -465,9 +492,11 @@ pub unsafe fn _mm_mask_cvtpbh_ps(src: __m128, k: __mmask8, a: __m128bh) -> __m12
#[inline]
#[target_feature(enable = "avx512bf16,avx512vl")]
#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
pub unsafe fn _mm_maskz_cvtpbh_ps(k: __mmask8, a: __m128bh) -> __m128 {
let cvt = _mm_cvtpbh_ps(a);
transmute(simd_select_bitmask(k, cvt.as_f32x4(), f32x4::ZERO))
pub fn _mm_maskz_cvtpbh_ps(k: __mmask8, a: __m128bh) -> __m128 {
unsafe {
let cvt = _mm_cvtpbh_ps(a);
transmute(simd_select_bitmask(k, cvt.as_f32x4(), f32x4::ZERO))
}
}
/// Converts a single BF16 (16-bit) floating-point element in a to a single-precision (32-bit) floating-point
@ -477,7 +506,7 @@ pub unsafe fn _mm_maskz_cvtpbh_ps(k: __mmask8, a: __m128bh) -> __m128 {
#[inline]
#[target_feature(enable = "avx512bf16,avx512f")]
#[unstable(feature = "stdarch_x86_avx512_bf16", issue = "127356")]
pub unsafe fn _mm_cvtsbh_ss(a: bf16) -> f32 {
pub fn _mm_cvtsbh_ss(a: bf16) -> f32 {
f32::from_bits((a.to_bits() as u32) << 16)
}
@ -489,15 +518,17 @@ pub unsafe fn _mm_cvtsbh_ss(a: bf16) -> f32 {
#[target_feature(enable = "avx512bf16,avx512vl")]
#[cfg_attr(test, assert_instr("vcvtneps2bf16"))]
#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
pub unsafe fn _mm_cvtneps_pbh(a: __m128) -> __m128bh {
let mut dst: __m128bh;
asm!(
"vcvtneps2bf16 {dst}, {src}",
dst = lateout(xmm_reg) dst,
src = in(xmm_reg) a,
options(pure, nomem, nostack, preserves_flags)
);
dst
pub fn _mm_cvtneps_pbh(a: __m128) -> __m128bh {
unsafe {
let mut dst: __m128bh;
asm!(
"vcvtneps2bf16 {dst}, {src}",
dst = lateout(xmm_reg) dst,
src = in(xmm_reg) a,
options(pure, nomem, nostack, preserves_flags)
);
dst
}
}
/// Converts packed single-precision (32-bit) floating-point elements in a to packed BF16 (16-bit)
@ -509,16 +540,18 @@ pub unsafe fn _mm_cvtneps_pbh(a: __m128) -> __m128bh {
#[target_feature(enable = "avx512bf16,avx512vl")]
#[cfg_attr(test, assert_instr("vcvtneps2bf16"))]
#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
pub unsafe fn _mm_mask_cvtneps_pbh(src: __m128bh, k: __mmask8, a: __m128) -> __m128bh {
let mut dst = src;
asm!(
"vcvtneps2bf16 {dst}{{{k}}},{src}",
dst = inlateout(xmm_reg) dst,
src = in(xmm_reg) a,
k = in(kreg) k,
options(pure, nomem, nostack, preserves_flags)
);
dst
pub fn _mm_mask_cvtneps_pbh(src: __m128bh, k: __mmask8, a: __m128) -> __m128bh {
unsafe {
let mut dst = src;
asm!(
"vcvtneps2bf16 {dst}{{{k}}},{src}",
dst = inlateout(xmm_reg) dst,
src = in(xmm_reg) a,
k = in(kreg) k,
options(pure, nomem, nostack, preserves_flags)
);
dst
}
}
/// Converts packed single-precision (32-bit) floating-point elements in a to packed BF16 (16-bit)
@ -530,16 +563,18 @@ pub unsafe fn _mm_mask_cvtneps_pbh(src: __m128bh, k: __mmask8, a: __m128) -> __m
#[target_feature(enable = "avx512bf16,avx512vl")]
#[cfg_attr(test, assert_instr("vcvtneps2bf16"))]
#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
pub unsafe fn _mm_maskz_cvtneps_pbh(k: __mmask8, a: __m128) -> __m128bh {
let mut dst: __m128bh;
asm!(
"vcvtneps2bf16 {dst}{{{k}}}{{z}},{src}",
dst = lateout(xmm_reg) dst,
src = in(xmm_reg) a,
k = in(kreg) k,
options(pure, nomem, nostack, preserves_flags)
);
dst
pub fn _mm_maskz_cvtneps_pbh(k: __mmask8, a: __m128) -> __m128bh {
unsafe {
let mut dst: __m128bh;
asm!(
"vcvtneps2bf16 {dst}{{{k}}}{{z}},{src}",
dst = lateout(xmm_reg) dst,
src = in(xmm_reg) a,
k = in(kreg) k,
options(pure, nomem, nostack, preserves_flags)
);
dst
}
}
/// Converts a single-precision (32-bit) floating-point element in a to a BF16 (16-bit) floating-point
@ -549,9 +584,11 @@ pub unsafe fn _mm_maskz_cvtneps_pbh(k: __mmask8, a: __m128) -> __m128bh {
#[inline]
#[target_feature(enable = "avx512bf16,avx512vl")]
#[unstable(feature = "stdarch_x86_avx512_bf16", issue = "127356")]
pub unsafe fn _mm_cvtness_sbh(a: f32) -> bf16 {
let value: u16 = simd_extract!(_mm_cvtneps_pbh(_mm_set_ss(a)), 0);
bf16::from_bits(value)
pub fn _mm_cvtness_sbh(a: f32) -> bf16 {
unsafe {
let value: u16 = simd_extract!(_mm_cvtneps_pbh(_mm_set_ss(a)), 0);
bf16::from_bits(value)
}
}
#[cfg(test)]

View file

@ -43,8 +43,8 @@ unsafe extern "C" {
#[target_feature(enable = "avx512bitalg")]
#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
#[cfg_attr(test, assert_instr(vpopcntw))]
pub unsafe fn _mm512_popcnt_epi16(a: __m512i) -> __m512i {
transmute(simd_ctpop(a.as_i16x32()))
pub fn _mm512_popcnt_epi16(a: __m512i) -> __m512i {
unsafe { transmute(simd_ctpop(a.as_i16x32())) }
}
/// For each packed 16-bit integer maps the value to the number of logical 1 bits.
@ -57,12 +57,14 @@ pub unsafe fn _mm512_popcnt_epi16(a: __m512i) -> __m512i {
#[target_feature(enable = "avx512bitalg")]
#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
#[cfg_attr(test, assert_instr(vpopcntw))]
pub unsafe fn _mm512_maskz_popcnt_epi16(k: __mmask32, a: __m512i) -> __m512i {
transmute(simd_select_bitmask(
k,
simd_ctpop(a.as_i16x32()),
i16x32::ZERO,
))
pub fn _mm512_maskz_popcnt_epi16(k: __mmask32, a: __m512i) -> __m512i {
unsafe {
transmute(simd_select_bitmask(
k,
simd_ctpop(a.as_i16x32()),
i16x32::ZERO,
))
}
}
/// For each packed 16-bit integer maps the value to the number of logical 1 bits.
@ -75,12 +77,14 @@ pub unsafe fn _mm512_maskz_popcnt_epi16(k: __mmask32, a: __m512i) -> __m512i {
#[target_feature(enable = "avx512bitalg")]
#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
#[cfg_attr(test, assert_instr(vpopcntw))]
pub unsafe fn _mm512_mask_popcnt_epi16(src: __m512i, k: __mmask32, a: __m512i) -> __m512i {
transmute(simd_select_bitmask(
k,
simd_ctpop(a.as_i16x32()),
src.as_i16x32(),
))
pub fn _mm512_mask_popcnt_epi16(src: __m512i, k: __mmask32, a: __m512i) -> __m512i {
unsafe {
transmute(simd_select_bitmask(
k,
simd_ctpop(a.as_i16x32()),
src.as_i16x32(),
))
}
}
/// For each packed 16-bit integer maps the value to the number of logical 1 bits.
@ -90,8 +94,8 @@ pub unsafe fn _mm512_mask_popcnt_epi16(src: __m512i, k: __mmask32, a: __m512i) -
#[target_feature(enable = "avx512bitalg,avx512vl")]
#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
#[cfg_attr(test, assert_instr(vpopcntw))]
pub unsafe fn _mm256_popcnt_epi16(a: __m256i) -> __m256i {
transmute(simd_ctpop(a.as_i16x16()))
pub fn _mm256_popcnt_epi16(a: __m256i) -> __m256i {
unsafe { transmute(simd_ctpop(a.as_i16x16())) }
}
/// For each packed 16-bit integer maps the value to the number of logical 1 bits.
@ -104,12 +108,14 @@ pub unsafe fn _mm256_popcnt_epi16(a: __m256i) -> __m256i {
#[target_feature(enable = "avx512bitalg,avx512vl")]
#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
#[cfg_attr(test, assert_instr(vpopcntw))]
pub unsafe fn _mm256_maskz_popcnt_epi16(k: __mmask16, a: __m256i) -> __m256i {
transmute(simd_select_bitmask(
k,
simd_ctpop(a.as_i16x16()),
i16x16::ZERO,
))
pub fn _mm256_maskz_popcnt_epi16(k: __mmask16, a: __m256i) -> __m256i {
unsafe {
transmute(simd_select_bitmask(
k,
simd_ctpop(a.as_i16x16()),
i16x16::ZERO,
))
}
}
/// For each packed 16-bit integer maps the value to the number of logical 1 bits.
@ -122,12 +128,14 @@ pub unsafe fn _mm256_maskz_popcnt_epi16(k: __mmask16, a: __m256i) -> __m256i {
#[target_feature(enable = "avx512bitalg,avx512vl")]
#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
#[cfg_attr(test, assert_instr(vpopcntw))]
pub unsafe fn _mm256_mask_popcnt_epi16(src: __m256i, k: __mmask16, a: __m256i) -> __m256i {
transmute(simd_select_bitmask(
k,
simd_ctpop(a.as_i16x16()),
src.as_i16x16(),
))
pub fn _mm256_mask_popcnt_epi16(src: __m256i, k: __mmask16, a: __m256i) -> __m256i {
unsafe {
transmute(simd_select_bitmask(
k,
simd_ctpop(a.as_i16x16()),
src.as_i16x16(),
))
}
}
/// For each packed 16-bit integer maps the value to the number of logical 1 bits.
@ -137,8 +145,8 @@ pub unsafe fn _mm256_mask_popcnt_epi16(src: __m256i, k: __mmask16, a: __m256i) -
#[target_feature(enable = "avx512bitalg,avx512vl")]
#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
#[cfg_attr(test, assert_instr(vpopcntw))]
pub unsafe fn _mm_popcnt_epi16(a: __m128i) -> __m128i {
transmute(simd_ctpop(a.as_i16x8()))
pub fn _mm_popcnt_epi16(a: __m128i) -> __m128i {
unsafe { transmute(simd_ctpop(a.as_i16x8())) }
}
/// For each packed 16-bit integer maps the value to the number of logical 1 bits.
@ -151,12 +159,14 @@ pub unsafe fn _mm_popcnt_epi16(a: __m128i) -> __m128i {
#[target_feature(enable = "avx512bitalg,avx512vl")]
#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
#[cfg_attr(test, assert_instr(vpopcntw))]
pub unsafe fn _mm_maskz_popcnt_epi16(k: __mmask8, a: __m128i) -> __m128i {
transmute(simd_select_bitmask(
k,
simd_ctpop(a.as_i16x8()),
i16x8::ZERO,
))
pub fn _mm_maskz_popcnt_epi16(k: __mmask8, a: __m128i) -> __m128i {
unsafe {
transmute(simd_select_bitmask(
k,
simd_ctpop(a.as_i16x8()),
i16x8::ZERO,
))
}
}
/// For each packed 16-bit integer maps the value to the number of logical 1 bits.
@ -169,12 +179,14 @@ pub unsafe fn _mm_maskz_popcnt_epi16(k: __mmask8, a: __m128i) -> __m128i {
#[target_feature(enable = "avx512bitalg,avx512vl")]
#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
#[cfg_attr(test, assert_instr(vpopcntw))]
pub unsafe fn _mm_mask_popcnt_epi16(src: __m128i, k: __mmask8, a: __m128i) -> __m128i {
transmute(simd_select_bitmask(
k,
simd_ctpop(a.as_i16x8()),
src.as_i16x8(),
))
pub fn _mm_mask_popcnt_epi16(src: __m128i, k: __mmask8, a: __m128i) -> __m128i {
unsafe {
transmute(simd_select_bitmask(
k,
simd_ctpop(a.as_i16x8()),
src.as_i16x8(),
))
}
}
/// For each packed 8-bit integer maps the value to the number of logical 1 bits.
@ -184,8 +196,8 @@ pub unsafe fn _mm_mask_popcnt_epi16(src: __m128i, k: __mmask8, a: __m128i) -> __
#[target_feature(enable = "avx512bitalg")]
#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
#[cfg_attr(test, assert_instr(vpopcntb))]
pub unsafe fn _mm512_popcnt_epi8(a: __m512i) -> __m512i {
transmute(simd_ctpop(a.as_i8x64()))
pub fn _mm512_popcnt_epi8(a: __m512i) -> __m512i {
unsafe { transmute(simd_ctpop(a.as_i8x64())) }
}
/// For each packed 8-bit integer maps the value to the number of logical 1 bits.
@ -198,12 +210,14 @@ pub unsafe fn _mm512_popcnt_epi8(a: __m512i) -> __m512i {
#[target_feature(enable = "avx512bitalg")]
#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
#[cfg_attr(test, assert_instr(vpopcntb))]
pub unsafe fn _mm512_maskz_popcnt_epi8(k: __mmask64, a: __m512i) -> __m512i {
transmute(simd_select_bitmask(
k,
simd_ctpop(a.as_i8x64()),
i8x64::ZERO,
))
pub fn _mm512_maskz_popcnt_epi8(k: __mmask64, a: __m512i) -> __m512i {
unsafe {
transmute(simd_select_bitmask(
k,
simd_ctpop(a.as_i8x64()),
i8x64::ZERO,
))
}
}
/// For each packed 8-bit integer maps the value to the number of logical 1 bits.
@ -216,12 +230,14 @@ pub unsafe fn _mm512_maskz_popcnt_epi8(k: __mmask64, a: __m512i) -> __m512i {
#[target_feature(enable = "avx512bitalg")]
#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
#[cfg_attr(test, assert_instr(vpopcntb))]
pub unsafe fn _mm512_mask_popcnt_epi8(src: __m512i, k: __mmask64, a: __m512i) -> __m512i {
transmute(simd_select_bitmask(
k,
simd_ctpop(a.as_i8x64()),
src.as_i8x64(),
))
pub fn _mm512_mask_popcnt_epi8(src: __m512i, k: __mmask64, a: __m512i) -> __m512i {
unsafe {
transmute(simd_select_bitmask(
k,
simd_ctpop(a.as_i8x64()),
src.as_i8x64(),
))
}
}
/// For each packed 8-bit integer maps the value to the number of logical 1 bits.
@ -231,8 +247,8 @@ pub unsafe fn _mm512_mask_popcnt_epi8(src: __m512i, k: __mmask64, a: __m512i) ->
#[target_feature(enable = "avx512bitalg,avx512vl")]
#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
#[cfg_attr(test, assert_instr(vpopcntb))]
pub unsafe fn _mm256_popcnt_epi8(a: __m256i) -> __m256i {
transmute(simd_ctpop(a.as_i8x32()))
pub fn _mm256_popcnt_epi8(a: __m256i) -> __m256i {
unsafe { transmute(simd_ctpop(a.as_i8x32())) }
}
/// For each packed 8-bit integer maps the value to the number of logical 1 bits.
@ -245,12 +261,14 @@ pub unsafe fn _mm256_popcnt_epi8(a: __m256i) -> __m256i {
#[target_feature(enable = "avx512bitalg,avx512vl")]
#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
#[cfg_attr(test, assert_instr(vpopcntb))]
pub unsafe fn _mm256_maskz_popcnt_epi8(k: __mmask32, a: __m256i) -> __m256i {
transmute(simd_select_bitmask(
k,
simd_ctpop(a.as_i8x32()),
i8x32::ZERO,
))
pub fn _mm256_maskz_popcnt_epi8(k: __mmask32, a: __m256i) -> __m256i {
unsafe {
transmute(simd_select_bitmask(
k,
simd_ctpop(a.as_i8x32()),
i8x32::ZERO,
))
}
}
/// For each packed 8-bit integer maps the value to the number of logical 1 bits.
@ -263,12 +281,14 @@ pub unsafe fn _mm256_maskz_popcnt_epi8(k: __mmask32, a: __m256i) -> __m256i {
#[target_feature(enable = "avx512bitalg,avx512vl")]
#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
#[cfg_attr(test, assert_instr(vpopcntb))]
pub unsafe fn _mm256_mask_popcnt_epi8(src: __m256i, k: __mmask32, a: __m256i) -> __m256i {
transmute(simd_select_bitmask(
k,
simd_ctpop(a.as_i8x32()),
src.as_i8x32(),
))
pub fn _mm256_mask_popcnt_epi8(src: __m256i, k: __mmask32, a: __m256i) -> __m256i {
unsafe {
transmute(simd_select_bitmask(
k,
simd_ctpop(a.as_i8x32()),
src.as_i8x32(),
))
}
}
/// For each packed 8-bit integer maps the value to the number of logical 1 bits.
@ -278,8 +298,8 @@ pub unsafe fn _mm256_mask_popcnt_epi8(src: __m256i, k: __mmask32, a: __m256i) ->
#[target_feature(enable = "avx512bitalg,avx512vl")]
#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
#[cfg_attr(test, assert_instr(vpopcntb))]
pub unsafe fn _mm_popcnt_epi8(a: __m128i) -> __m128i {
transmute(simd_ctpop(a.as_i8x16()))
pub fn _mm_popcnt_epi8(a: __m128i) -> __m128i {
unsafe { transmute(simd_ctpop(a.as_i8x16())) }
}
/// For each packed 8-bit integer maps the value to the number of logical 1 bits.
@ -292,12 +312,14 @@ pub unsafe fn _mm_popcnt_epi8(a: __m128i) -> __m128i {
#[target_feature(enable = "avx512bitalg,avx512vl")]
#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
#[cfg_attr(test, assert_instr(vpopcntb))]
pub unsafe fn _mm_maskz_popcnt_epi8(k: __mmask16, a: __m128i) -> __m128i {
transmute(simd_select_bitmask(
k,
simd_ctpop(a.as_i8x16()),
i8x16::ZERO,
))
pub fn _mm_maskz_popcnt_epi8(k: __mmask16, a: __m128i) -> __m128i {
unsafe {
transmute(simd_select_bitmask(
k,
simd_ctpop(a.as_i8x16()),
i8x16::ZERO,
))
}
}
/// For each packed 8-bit integer maps the value to the number of logical 1 bits.
@ -310,12 +332,14 @@ pub unsafe fn _mm_maskz_popcnt_epi8(k: __mmask16, a: __m128i) -> __m128i {
#[target_feature(enable = "avx512bitalg,avx512vl")]
#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
#[cfg_attr(test, assert_instr(vpopcntb))]
pub unsafe fn _mm_mask_popcnt_epi8(src: __m128i, k: __mmask16, a: __m128i) -> __m128i {
transmute(simd_select_bitmask(
k,
simd_ctpop(a.as_i8x16()),
src.as_i8x16(),
))
pub fn _mm_mask_popcnt_epi8(src: __m128i, k: __mmask16, a: __m128i) -> __m128i {
unsafe {
transmute(simd_select_bitmask(
k,
simd_ctpop(a.as_i8x16()),
src.as_i8x16(),
))
}
}
/// Considers the input `b` as packed 64-bit integers and `c` as packed 8-bit integers.
@ -327,8 +351,8 @@ pub unsafe fn _mm_mask_popcnt_epi8(src: __m128i, k: __mmask16, a: __m128i) -> __
#[target_feature(enable = "avx512bitalg")]
#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
#[cfg_attr(test, assert_instr(vpshufbitqmb))]
pub unsafe fn _mm512_bitshuffle_epi64_mask(b: __m512i, c: __m512i) -> __mmask64 {
bitshuffle_512(b.as_i8x64(), c.as_i8x64(), !0)
pub fn _mm512_bitshuffle_epi64_mask(b: __m512i, c: __m512i) -> __mmask64 {
unsafe { bitshuffle_512(b.as_i8x64(), c.as_i8x64(), !0) }
}
/// Considers the input `b` as packed 64-bit integers and `c` as packed 8-bit integers.
@ -343,8 +367,8 @@ pub unsafe fn _mm512_bitshuffle_epi64_mask(b: __m512i, c: __m512i) -> __mmask64
#[target_feature(enable = "avx512bitalg")]
#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
#[cfg_attr(test, assert_instr(vpshufbitqmb))]
pub unsafe fn _mm512_mask_bitshuffle_epi64_mask(k: __mmask64, b: __m512i, c: __m512i) -> __mmask64 {
bitshuffle_512(b.as_i8x64(), c.as_i8x64(), k)
pub fn _mm512_mask_bitshuffle_epi64_mask(k: __mmask64, b: __m512i, c: __m512i) -> __mmask64 {
unsafe { bitshuffle_512(b.as_i8x64(), c.as_i8x64(), k) }
}
/// Considers the input `b` as packed 64-bit integers and `c` as packed 8-bit integers.
@ -356,8 +380,8 @@ pub unsafe fn _mm512_mask_bitshuffle_epi64_mask(k: __mmask64, b: __m512i, c: __m
#[target_feature(enable = "avx512bitalg,avx512vl")]
#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
#[cfg_attr(test, assert_instr(vpshufbitqmb))]
pub unsafe fn _mm256_bitshuffle_epi64_mask(b: __m256i, c: __m256i) -> __mmask32 {
bitshuffle_256(b.as_i8x32(), c.as_i8x32(), !0)
pub fn _mm256_bitshuffle_epi64_mask(b: __m256i, c: __m256i) -> __mmask32 {
unsafe { bitshuffle_256(b.as_i8x32(), c.as_i8x32(), !0) }
}
/// Considers the input `b` as packed 64-bit integers and `c` as packed 8-bit integers.
@ -372,8 +396,8 @@ pub unsafe fn _mm256_bitshuffle_epi64_mask(b: __m256i, c: __m256i) -> __mmask32
#[target_feature(enable = "avx512bitalg,avx512vl")]
#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
#[cfg_attr(test, assert_instr(vpshufbitqmb))]
pub unsafe fn _mm256_mask_bitshuffle_epi64_mask(k: __mmask32, b: __m256i, c: __m256i) -> __mmask32 {
bitshuffle_256(b.as_i8x32(), c.as_i8x32(), k)
pub fn _mm256_mask_bitshuffle_epi64_mask(k: __mmask32, b: __m256i, c: __m256i) -> __mmask32 {
unsafe { bitshuffle_256(b.as_i8x32(), c.as_i8x32(), k) }
}
/// Considers the input `b` as packed 64-bit integers and `c` as packed 8-bit integers.
@ -385,8 +409,8 @@ pub unsafe fn _mm256_mask_bitshuffle_epi64_mask(k: __mmask32, b: __m256i, c: __m
#[target_feature(enable = "avx512bitalg,avx512vl")]
#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
#[cfg_attr(test, assert_instr(vpshufbitqmb))]
pub unsafe fn _mm_bitshuffle_epi64_mask(b: __m128i, c: __m128i) -> __mmask16 {
bitshuffle_128(b.as_i8x16(), c.as_i8x16(), !0)
pub fn _mm_bitshuffle_epi64_mask(b: __m128i, c: __m128i) -> __mmask16 {
unsafe { bitshuffle_128(b.as_i8x16(), c.as_i8x16(), !0) }
}
/// Considers the input `b` as packed 64-bit integers and `c` as packed 8-bit integers.
@ -401,8 +425,8 @@ pub unsafe fn _mm_bitshuffle_epi64_mask(b: __m128i, c: __m128i) -> __mmask16 {
#[target_feature(enable = "avx512bitalg,avx512vl")]
#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
#[cfg_attr(test, assert_instr(vpshufbitqmb))]
pub unsafe fn _mm_mask_bitshuffle_epi64_mask(k: __mmask16, b: __m128i, c: __m128i) -> __mmask16 {
bitshuffle_128(b.as_i8x16(), c.as_i8x16(), k)
pub fn _mm_mask_bitshuffle_epi64_mask(k: __mmask16, b: __m128i, c: __m128i) -> __mmask16 {
unsafe { bitshuffle_128(b.as_i8x16(), c.as_i8x16(), k) }
}
#[cfg(test)]

File diff suppressed because it is too large Load diff

View file

@ -11,7 +11,7 @@ use stdarch_test::assert_instr;
#[target_feature(enable = "avx512cd")]
#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
#[cfg_attr(test, assert_instr(vpbroadcast))] // should be vpbroadcastmw2d
pub unsafe fn _mm512_broadcastmw_epi32(k: __mmask16) -> __m512i {
pub fn _mm512_broadcastmw_epi32(k: __mmask16) -> __m512i {
_mm512_set1_epi32(k as i32)
}
@ -22,7 +22,7 @@ pub unsafe fn _mm512_broadcastmw_epi32(k: __mmask16) -> __m512i {
#[target_feature(enable = "avx512cd,avx512vl")]
#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
#[cfg_attr(test, assert_instr(vpbroadcast))] // should be vpbroadcastmw2d
pub unsafe fn _mm256_broadcastmw_epi32(k: __mmask16) -> __m256i {
pub fn _mm256_broadcastmw_epi32(k: __mmask16) -> __m256i {
_mm256_set1_epi32(k as i32)
}
@ -33,7 +33,7 @@ pub unsafe fn _mm256_broadcastmw_epi32(k: __mmask16) -> __m256i {
#[target_feature(enable = "avx512cd,avx512vl")]
#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
#[cfg_attr(test, assert_instr(vpbroadcast))] // should be vpbroadcastmw2d
pub unsafe fn _mm_broadcastmw_epi32(k: __mmask16) -> __m128i {
pub fn _mm_broadcastmw_epi32(k: __mmask16) -> __m128i {
_mm_set1_epi32(k as i32)
}
@ -44,7 +44,7 @@ pub unsafe fn _mm_broadcastmw_epi32(k: __mmask16) -> __m128i {
#[target_feature(enable = "avx512cd")]
#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
#[cfg_attr(test, assert_instr(vpbroadcast))] // should be vpbroadcastmb2q
pub unsafe fn _mm512_broadcastmb_epi64(k: __mmask8) -> __m512i {
pub fn _mm512_broadcastmb_epi64(k: __mmask8) -> __m512i {
_mm512_set1_epi64(k as i64)
}
@ -55,7 +55,7 @@ pub unsafe fn _mm512_broadcastmb_epi64(k: __mmask8) -> __m512i {
#[target_feature(enable = "avx512cd,avx512vl")]
#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
#[cfg_attr(test, assert_instr(vpbroadcast))] // should be vpbroadcastmb2q
pub unsafe fn _mm256_broadcastmb_epi64(k: __mmask8) -> __m256i {
pub fn _mm256_broadcastmb_epi64(k: __mmask8) -> __m256i {
_mm256_set1_epi64x(k as i64)
}
@ -66,7 +66,7 @@ pub unsafe fn _mm256_broadcastmb_epi64(k: __mmask8) -> __m256i {
#[target_feature(enable = "avx512cd,avx512vl")]
#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
#[cfg_attr(test, assert_instr(vpbroadcast))] // should be vpbroadcastmb2q
pub unsafe fn _mm_broadcastmb_epi64(k: __mmask8) -> __m128i {
pub fn _mm_broadcastmb_epi64(k: __mmask8) -> __m128i {
_mm_set1_epi64x(k as i64)
}
@ -77,8 +77,8 @@ pub unsafe fn _mm_broadcastmb_epi64(k: __mmask8) -> __m128i {
#[target_feature(enable = "avx512cd")]
#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
#[cfg_attr(test, assert_instr(vpconflictd))]
pub unsafe fn _mm512_conflict_epi32(a: __m512i) -> __m512i {
transmute(vpconflictd(a.as_i32x16()))
pub fn _mm512_conflict_epi32(a: __m512i) -> __m512i {
unsafe { transmute(vpconflictd(a.as_i32x16())) }
}
/// Test each 32-bit element of a for equality with all other elements in a closer to the least significant bit using writemask k (elements are copied from src when the corresponding mask bit is not set). Each element's comparison forms a zero extended bit vector in dst.
@ -88,9 +88,11 @@ pub unsafe fn _mm512_conflict_epi32(a: __m512i) -> __m512i {
#[target_feature(enable = "avx512cd")]
#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
#[cfg_attr(test, assert_instr(vpconflictd))]
pub unsafe fn _mm512_mask_conflict_epi32(src: __m512i, k: __mmask16, a: __m512i) -> __m512i {
let conflict = _mm512_conflict_epi32(a).as_i32x16();
transmute(simd_select_bitmask(k, conflict, src.as_i32x16()))
pub fn _mm512_mask_conflict_epi32(src: __m512i, k: __mmask16, a: __m512i) -> __m512i {
unsafe {
let conflict = _mm512_conflict_epi32(a).as_i32x16();
transmute(simd_select_bitmask(k, conflict, src.as_i32x16()))
}
}
/// Test each 32-bit element of a for equality with all other elements in a closer to the least significant bit using zeromask k (elements are zeroed out when the corresponding mask bit is not set). Each element's comparison forms a zero extended bit vector in dst.
@ -100,9 +102,11 @@ pub unsafe fn _mm512_mask_conflict_epi32(src: __m512i, k: __mmask16, a: __m512i)
#[target_feature(enable = "avx512cd")]
#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
#[cfg_attr(test, assert_instr(vpconflictd))]
pub unsafe fn _mm512_maskz_conflict_epi32(k: __mmask16, a: __m512i) -> __m512i {
let conflict = _mm512_conflict_epi32(a).as_i32x16();
transmute(simd_select_bitmask(k, conflict, i32x16::ZERO))
pub fn _mm512_maskz_conflict_epi32(k: __mmask16, a: __m512i) -> __m512i {
unsafe {
let conflict = _mm512_conflict_epi32(a).as_i32x16();
transmute(simd_select_bitmask(k, conflict, i32x16::ZERO))
}
}
/// Test each 32-bit element of a for equality with all other elements in a closer to the least significant bit. Each element's comparison forms a zero extended bit vector in dst.
@ -112,8 +116,8 @@ pub unsafe fn _mm512_maskz_conflict_epi32(k: __mmask16, a: __m512i) -> __m512i {
#[target_feature(enable = "avx512cd,avx512vl")]
#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
#[cfg_attr(test, assert_instr(vpconflictd))]
pub unsafe fn _mm256_conflict_epi32(a: __m256i) -> __m256i {
transmute(vpconflictd256(a.as_i32x8()))
pub fn _mm256_conflict_epi32(a: __m256i) -> __m256i {
unsafe { transmute(vpconflictd256(a.as_i32x8())) }
}
/// Test each 32-bit element of a for equality with all other elements in a closer to the least significant bit using writemask k (elements are copied from src when the corresponding mask bit is not set). Each element's comparison forms a zero extended bit vector in dst.
@ -123,9 +127,11 @@ pub unsafe fn _mm256_conflict_epi32(a: __m256i) -> __m256i {
#[target_feature(enable = "avx512cd,avx512vl")]
#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
#[cfg_attr(test, assert_instr(vpconflictd))]
pub unsafe fn _mm256_mask_conflict_epi32(src: __m256i, k: __mmask8, a: __m256i) -> __m256i {
let conflict = _mm256_conflict_epi32(a).as_i32x8();
transmute(simd_select_bitmask(k, conflict, src.as_i32x8()))
pub fn _mm256_mask_conflict_epi32(src: __m256i, k: __mmask8, a: __m256i) -> __m256i {
unsafe {
let conflict = _mm256_conflict_epi32(a).as_i32x8();
transmute(simd_select_bitmask(k, conflict, src.as_i32x8()))
}
}
/// Test each 32-bit element of a for equality with all other elements in a closer to the least significant bit using zeromask k (elements are zeroed out when the corresponding mask bit is not set). Each element's comparison forms a zero extended bit vector in dst.
@ -135,9 +141,11 @@ pub unsafe fn _mm256_mask_conflict_epi32(src: __m256i, k: __mmask8, a: __m256i)
#[target_feature(enable = "avx512cd,avx512vl")]
#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
#[cfg_attr(test, assert_instr(vpconflictd))]
pub unsafe fn _mm256_maskz_conflict_epi32(k: __mmask8, a: __m256i) -> __m256i {
let conflict = _mm256_conflict_epi32(a).as_i32x8();
transmute(simd_select_bitmask(k, conflict, i32x8::ZERO))
pub fn _mm256_maskz_conflict_epi32(k: __mmask8, a: __m256i) -> __m256i {
unsafe {
let conflict = _mm256_conflict_epi32(a).as_i32x8();
transmute(simd_select_bitmask(k, conflict, i32x8::ZERO))
}
}
/// Test each 32-bit element of a for equality with all other elements in a closer to the least significant bit. Each element's comparison forms a zero extended bit vector in dst.
@ -147,8 +155,8 @@ pub unsafe fn _mm256_maskz_conflict_epi32(k: __mmask8, a: __m256i) -> __m256i {
#[target_feature(enable = "avx512cd,avx512vl")]
#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
#[cfg_attr(test, assert_instr(vpconflictd))]
pub unsafe fn _mm_conflict_epi32(a: __m128i) -> __m128i {
transmute(vpconflictd128(a.as_i32x4()))
pub fn _mm_conflict_epi32(a: __m128i) -> __m128i {
unsafe { transmute(vpconflictd128(a.as_i32x4())) }
}
/// Test each 32-bit element of a for equality with all other elements in a closer to the least significant bit using writemask k (elements are copied from src when the corresponding mask bit is not set). Each element's comparison forms a zero extended bit vector in dst.
@ -158,9 +166,11 @@ pub unsafe fn _mm_conflict_epi32(a: __m128i) -> __m128i {
#[target_feature(enable = "avx512cd,avx512vl")]
#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
#[cfg_attr(test, assert_instr(vpconflictd))]
pub unsafe fn _mm_mask_conflict_epi32(src: __m128i, k: __mmask8, a: __m128i) -> __m128i {
let conflict = _mm_conflict_epi32(a).as_i32x4();
transmute(simd_select_bitmask(k, conflict, src.as_i32x4()))
pub fn _mm_mask_conflict_epi32(src: __m128i, k: __mmask8, a: __m128i) -> __m128i {
unsafe {
let conflict = _mm_conflict_epi32(a).as_i32x4();
transmute(simd_select_bitmask(k, conflict, src.as_i32x4()))
}
}
/// Test each 32-bit element of a for equality with all other elements in a closer to the least significant bit using zeromask k (elements are zeroed out when the corresponding mask bit is not set). Each element's comparison forms a zero extended bit vector in dst.
@ -170,9 +180,11 @@ pub unsafe fn _mm_mask_conflict_epi32(src: __m128i, k: __mmask8, a: __m128i) ->
#[target_feature(enable = "avx512cd,avx512vl")]
#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
#[cfg_attr(test, assert_instr(vpconflictd))]
pub unsafe fn _mm_maskz_conflict_epi32(k: __mmask8, a: __m128i) -> __m128i {
let conflict = _mm_conflict_epi32(a).as_i32x4();
transmute(simd_select_bitmask(k, conflict, i32x4::ZERO))
pub fn _mm_maskz_conflict_epi32(k: __mmask8, a: __m128i) -> __m128i {
unsafe {
let conflict = _mm_conflict_epi32(a).as_i32x4();
transmute(simd_select_bitmask(k, conflict, i32x4::ZERO))
}
}
/// Test each 64-bit element of a for equality with all other elements in a closer to the least significant bit. Each element's comparison forms a zero extended bit vector in dst.
@ -182,8 +194,8 @@ pub unsafe fn _mm_maskz_conflict_epi32(k: __mmask8, a: __m128i) -> __m128i {
#[target_feature(enable = "avx512cd")]
#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
#[cfg_attr(test, assert_instr(vpconflictq))]
pub unsafe fn _mm512_conflict_epi64(a: __m512i) -> __m512i {
transmute(vpconflictq(a.as_i64x8()))
pub fn _mm512_conflict_epi64(a: __m512i) -> __m512i {
unsafe { transmute(vpconflictq(a.as_i64x8())) }
}
/// Test each 64-bit element of a for equality with all other elements in a closer to the least significant bit using writemask k (elements are copied from src when the corresponding mask bit is not set). Each element's comparison forms a zero extended bit vector in dst.
@ -193,9 +205,11 @@ pub unsafe fn _mm512_conflict_epi64(a: __m512i) -> __m512i {
#[target_feature(enable = "avx512cd")]
#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
#[cfg_attr(test, assert_instr(vpconflictq))]
pub unsafe fn _mm512_mask_conflict_epi64(src: __m512i, k: __mmask8, a: __m512i) -> __m512i {
let conflict = _mm512_conflict_epi64(a).as_i64x8();
transmute(simd_select_bitmask(k, conflict, src.as_i64x8()))
pub fn _mm512_mask_conflict_epi64(src: __m512i, k: __mmask8, a: __m512i) -> __m512i {
unsafe {
let conflict = _mm512_conflict_epi64(a).as_i64x8();
transmute(simd_select_bitmask(k, conflict, src.as_i64x8()))
}
}
/// Test each 64-bit element of a for equality with all other elements in a closer to the least significant bit using zeromask k (elements are zeroed out when the corresponding mask bit is not set). Each element's comparison forms a zero extended bit vector in dst.
@ -205,9 +219,11 @@ pub unsafe fn _mm512_mask_conflict_epi64(src: __m512i, k: __mmask8, a: __m512i)
#[target_feature(enable = "avx512cd")]
#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
#[cfg_attr(test, assert_instr(vpconflictq))]
pub unsafe fn _mm512_maskz_conflict_epi64(k: __mmask8, a: __m512i) -> __m512i {
let conflict = _mm512_conflict_epi64(a).as_i64x8();
transmute(simd_select_bitmask(k, conflict, i64x8::ZERO))
pub fn _mm512_maskz_conflict_epi64(k: __mmask8, a: __m512i) -> __m512i {
unsafe {
let conflict = _mm512_conflict_epi64(a).as_i64x8();
transmute(simd_select_bitmask(k, conflict, i64x8::ZERO))
}
}
/// Test each 64-bit element of a for equality with all other elements in a closer to the least significant bit. Each element's comparison forms a zero extended bit vector in dst.
@ -217,8 +233,8 @@ pub unsafe fn _mm512_maskz_conflict_epi64(k: __mmask8, a: __m512i) -> __m512i {
#[target_feature(enable = "avx512cd,avx512vl")]
#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
#[cfg_attr(test, assert_instr(vpconflictq))]
pub unsafe fn _mm256_conflict_epi64(a: __m256i) -> __m256i {
transmute(vpconflictq256(a.as_i64x4()))
pub fn _mm256_conflict_epi64(a: __m256i) -> __m256i {
unsafe { transmute(vpconflictq256(a.as_i64x4())) }
}
/// Test each 64-bit element of a for equality with all other elements in a closer to the least significant bit using writemask k (elements are copied from src when the corresponding mask bit is not set). Each element's comparison forms a zero extended bit vector in dst.
@ -228,9 +244,11 @@ pub unsafe fn _mm256_conflict_epi64(a: __m256i) -> __m256i {
#[target_feature(enable = "avx512cd,avx512vl")]
#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
#[cfg_attr(test, assert_instr(vpconflictq))]
pub unsafe fn _mm256_mask_conflict_epi64(src: __m256i, k: __mmask8, a: __m256i) -> __m256i {
let conflict = _mm256_conflict_epi64(a).as_i64x4();
transmute(simd_select_bitmask(k, conflict, src.as_i64x4()))
pub fn _mm256_mask_conflict_epi64(src: __m256i, k: __mmask8, a: __m256i) -> __m256i {
unsafe {
let conflict = _mm256_conflict_epi64(a).as_i64x4();
transmute(simd_select_bitmask(k, conflict, src.as_i64x4()))
}
}
/// Test each 64-bit element of a for equality with all other elements in a closer to the least significant bit using zeromask k (elements are zeroed out when the corresponding mask bit is not set). Each element's comparison forms a zero extended bit vector in dst.
@ -240,9 +258,11 @@ pub unsafe fn _mm256_mask_conflict_epi64(src: __m256i, k: __mmask8, a: __m256i)
#[target_feature(enable = "avx512cd,avx512vl")]
#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
#[cfg_attr(test, assert_instr(vpconflictq))]
pub unsafe fn _mm256_maskz_conflict_epi64(k: __mmask8, a: __m256i) -> __m256i {
let conflict = _mm256_conflict_epi64(a).as_i64x4();
transmute(simd_select_bitmask(k, conflict, i64x4::ZERO))
pub fn _mm256_maskz_conflict_epi64(k: __mmask8, a: __m256i) -> __m256i {
unsafe {
let conflict = _mm256_conflict_epi64(a).as_i64x4();
transmute(simd_select_bitmask(k, conflict, i64x4::ZERO))
}
}
/// Test each 64-bit element of a for equality with all other elements in a closer to the least significant bit. Each element's comparison forms a zero extended bit vector in dst.
@ -252,8 +272,8 @@ pub unsafe fn _mm256_maskz_conflict_epi64(k: __mmask8, a: __m256i) -> __m256i {
#[target_feature(enable = "avx512cd,avx512vl")]
#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
#[cfg_attr(test, assert_instr(vpconflictq))]
pub unsafe fn _mm_conflict_epi64(a: __m128i) -> __m128i {
transmute(vpconflictq128(a.as_i64x2()))
pub fn _mm_conflict_epi64(a: __m128i) -> __m128i {
unsafe { transmute(vpconflictq128(a.as_i64x2())) }
}
/// Test each 64-bit element of a for equality with all other elements in a closer to the least significant bit using writemask k (elements are copied from src when the corresponding mask bit is not set). Each element's comparison forms a zero extended bit vector in dst.
@ -263,9 +283,11 @@ pub unsafe fn _mm_conflict_epi64(a: __m128i) -> __m128i {
#[target_feature(enable = "avx512cd,avx512vl")]
#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
#[cfg_attr(test, assert_instr(vpconflictq))]
pub unsafe fn _mm_mask_conflict_epi64(src: __m128i, k: __mmask8, a: __m128i) -> __m128i {
let conflict = _mm_conflict_epi64(a).as_i64x2();
transmute(simd_select_bitmask(k, conflict, src.as_i64x2()))
pub fn _mm_mask_conflict_epi64(src: __m128i, k: __mmask8, a: __m128i) -> __m128i {
unsafe {
let conflict = _mm_conflict_epi64(a).as_i64x2();
transmute(simd_select_bitmask(k, conflict, src.as_i64x2()))
}
}
/// Test each 64-bit element of a for equality with all other elements in a closer to the least significant bit using zeromask k (elements are zeroed out when the corresponding mask bit is not set). Each element's comparison forms a zero extended bit vector in dst.
@ -275,9 +297,11 @@ pub unsafe fn _mm_mask_conflict_epi64(src: __m128i, k: __mmask8, a: __m128i) ->
#[target_feature(enable = "avx512cd,avx512vl")]
#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
#[cfg_attr(test, assert_instr(vpconflictq))]
pub unsafe fn _mm_maskz_conflict_epi64(k: __mmask8, a: __m128i) -> __m128i {
let conflict = _mm_conflict_epi64(a).as_i64x2();
transmute(simd_select_bitmask(k, conflict, i64x2::ZERO))
pub fn _mm_maskz_conflict_epi64(k: __mmask8, a: __m128i) -> __m128i {
unsafe {
let conflict = _mm_conflict_epi64(a).as_i64x2();
transmute(simd_select_bitmask(k, conflict, i64x2::ZERO))
}
}
/// Counts the number of leading zero bits in each packed 32-bit integer in a, and store the results in dst.
@ -287,8 +311,8 @@ pub unsafe fn _mm_maskz_conflict_epi64(k: __mmask8, a: __m128i) -> __m128i {
#[target_feature(enable = "avx512cd")]
#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
#[cfg_attr(test, assert_instr(vplzcntd))]
pub unsafe fn _mm512_lzcnt_epi32(a: __m512i) -> __m512i {
transmute(simd_ctlz(a.as_i32x16()))
pub fn _mm512_lzcnt_epi32(a: __m512i) -> __m512i {
unsafe { transmute(simd_ctlz(a.as_i32x16())) }
}
/// Counts the number of leading zero bits in each packed 32-bit integer in a, and store the results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set).
@ -298,9 +322,11 @@ pub unsafe fn _mm512_lzcnt_epi32(a: __m512i) -> __m512i {
#[target_feature(enable = "avx512cd")]
#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
#[cfg_attr(test, assert_instr(vplzcntd))]
pub unsafe fn _mm512_mask_lzcnt_epi32(src: __m512i, k: __mmask16, a: __m512i) -> __m512i {
let zerocount = _mm512_lzcnt_epi32(a).as_i32x16();
transmute(simd_select_bitmask(k, zerocount, src.as_i32x16()))
pub fn _mm512_mask_lzcnt_epi32(src: __m512i, k: __mmask16, a: __m512i) -> __m512i {
unsafe {
let zerocount = _mm512_lzcnt_epi32(a).as_i32x16();
transmute(simd_select_bitmask(k, zerocount, src.as_i32x16()))
}
}
/// Counts the number of leading zero bits in each packed 32-bit integer in a, and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
@ -310,9 +336,11 @@ pub unsafe fn _mm512_mask_lzcnt_epi32(src: __m512i, k: __mmask16, a: __m512i) ->
#[target_feature(enable = "avx512cd")]
#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
#[cfg_attr(test, assert_instr(vplzcntd))]
pub unsafe fn _mm512_maskz_lzcnt_epi32(k: __mmask16, a: __m512i) -> __m512i {
let zerocount = _mm512_lzcnt_epi32(a).as_i32x16();
transmute(simd_select_bitmask(k, zerocount, i32x16::ZERO))
pub fn _mm512_maskz_lzcnt_epi32(k: __mmask16, a: __m512i) -> __m512i {
unsafe {
let zerocount = _mm512_lzcnt_epi32(a).as_i32x16();
transmute(simd_select_bitmask(k, zerocount, i32x16::ZERO))
}
}
/// Counts the number of leading zero bits in each packed 32-bit integer in a, and store the results in dst.
@ -322,8 +350,8 @@ pub unsafe fn _mm512_maskz_lzcnt_epi32(k: __mmask16, a: __m512i) -> __m512i {
#[target_feature(enable = "avx512cd,avx512vl")]
#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
#[cfg_attr(test, assert_instr(vplzcntd))]
pub unsafe fn _mm256_lzcnt_epi32(a: __m256i) -> __m256i {
transmute(simd_ctlz(a.as_i32x8()))
pub fn _mm256_lzcnt_epi32(a: __m256i) -> __m256i {
unsafe { transmute(simd_ctlz(a.as_i32x8())) }
}
/// Counts the number of leading zero bits in each packed 32-bit integer in a, and store the results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set).
@ -333,9 +361,11 @@ pub unsafe fn _mm256_lzcnt_epi32(a: __m256i) -> __m256i {
#[target_feature(enable = "avx512cd,avx512vl")]
#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
#[cfg_attr(test, assert_instr(vplzcntd))]
pub unsafe fn _mm256_mask_lzcnt_epi32(src: __m256i, k: __mmask8, a: __m256i) -> __m256i {
let zerocount = _mm256_lzcnt_epi32(a).as_i32x8();
transmute(simd_select_bitmask(k, zerocount, src.as_i32x8()))
pub fn _mm256_mask_lzcnt_epi32(src: __m256i, k: __mmask8, a: __m256i) -> __m256i {
unsafe {
let zerocount = _mm256_lzcnt_epi32(a).as_i32x8();
transmute(simd_select_bitmask(k, zerocount, src.as_i32x8()))
}
}
/// Counts the number of leading zero bits in each packed 32-bit integer in a, and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
@ -345,9 +375,11 @@ pub unsafe fn _mm256_mask_lzcnt_epi32(src: __m256i, k: __mmask8, a: __m256i) ->
#[target_feature(enable = "avx512cd,avx512vl")]
#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
#[cfg_attr(test, assert_instr(vplzcntd))]
pub unsafe fn _mm256_maskz_lzcnt_epi32(k: __mmask8, a: __m256i) -> __m256i {
let zerocount = _mm256_lzcnt_epi32(a).as_i32x8();
transmute(simd_select_bitmask(k, zerocount, i32x8::ZERO))
pub fn _mm256_maskz_lzcnt_epi32(k: __mmask8, a: __m256i) -> __m256i {
unsafe {
let zerocount = _mm256_lzcnt_epi32(a).as_i32x8();
transmute(simd_select_bitmask(k, zerocount, i32x8::ZERO))
}
}
/// Counts the number of leading zero bits in each packed 32-bit integer in a, and store the results in dst.
@ -357,8 +389,8 @@ pub unsafe fn _mm256_maskz_lzcnt_epi32(k: __mmask8, a: __m256i) -> __m256i {
#[target_feature(enable = "avx512cd,avx512vl")]
#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
#[cfg_attr(test, assert_instr(vplzcntd))]
pub unsafe fn _mm_lzcnt_epi32(a: __m128i) -> __m128i {
transmute(simd_ctlz(a.as_i32x4()))
pub fn _mm_lzcnt_epi32(a: __m128i) -> __m128i {
unsafe { transmute(simd_ctlz(a.as_i32x4())) }
}
/// Counts the number of leading zero bits in each packed 32-bit integer in a, and store the results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set).
@ -368,9 +400,11 @@ pub unsafe fn _mm_lzcnt_epi32(a: __m128i) -> __m128i {
#[target_feature(enable = "avx512cd,avx512vl")]
#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
#[cfg_attr(test, assert_instr(vplzcntd))]
pub unsafe fn _mm_mask_lzcnt_epi32(src: __m128i, k: __mmask8, a: __m128i) -> __m128i {
let zerocount = _mm_lzcnt_epi32(a).as_i32x4();
transmute(simd_select_bitmask(k, zerocount, src.as_i32x4()))
pub fn _mm_mask_lzcnt_epi32(src: __m128i, k: __mmask8, a: __m128i) -> __m128i {
unsafe {
let zerocount = _mm_lzcnt_epi32(a).as_i32x4();
transmute(simd_select_bitmask(k, zerocount, src.as_i32x4()))
}
}
/// Counts the number of leading zero bits in each packed 32-bit integer in a, and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
@ -380,9 +414,11 @@ pub unsafe fn _mm_mask_lzcnt_epi32(src: __m128i, k: __mmask8, a: __m128i) -> __m
#[target_feature(enable = "avx512cd,avx512vl")]
#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
#[cfg_attr(test, assert_instr(vplzcntd))]
pub unsafe fn _mm_maskz_lzcnt_epi32(k: __mmask8, a: __m128i) -> __m128i {
let zerocount = _mm_lzcnt_epi32(a).as_i32x4();
transmute(simd_select_bitmask(k, zerocount, i32x4::ZERO))
pub fn _mm_maskz_lzcnt_epi32(k: __mmask8, a: __m128i) -> __m128i {
unsafe {
let zerocount = _mm_lzcnt_epi32(a).as_i32x4();
transmute(simd_select_bitmask(k, zerocount, i32x4::ZERO))
}
}
/// Counts the number of leading zero bits in each packed 64-bit integer in a, and store the results in dst.
@ -392,8 +428,8 @@ pub unsafe fn _mm_maskz_lzcnt_epi32(k: __mmask8, a: __m128i) -> __m128i {
#[target_feature(enable = "avx512cd")]
#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
#[cfg_attr(test, assert_instr(vplzcntq))]
pub unsafe fn _mm512_lzcnt_epi64(a: __m512i) -> __m512i {
transmute(simd_ctlz(a.as_i64x8()))
pub fn _mm512_lzcnt_epi64(a: __m512i) -> __m512i {
unsafe { transmute(simd_ctlz(a.as_i64x8())) }
}
/// Counts the number of leading zero bits in each packed 64-bit integer in a, and store the results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set).
@ -403,9 +439,11 @@ pub unsafe fn _mm512_lzcnt_epi64(a: __m512i) -> __m512i {
#[target_feature(enable = "avx512cd")]
#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
#[cfg_attr(test, assert_instr(vplzcntq))]
pub unsafe fn _mm512_mask_lzcnt_epi64(src: __m512i, k: __mmask8, a: __m512i) -> __m512i {
let zerocount = _mm512_lzcnt_epi64(a).as_i64x8();
transmute(simd_select_bitmask(k, zerocount, src.as_i64x8()))
pub fn _mm512_mask_lzcnt_epi64(src: __m512i, k: __mmask8, a: __m512i) -> __m512i {
unsafe {
let zerocount = _mm512_lzcnt_epi64(a).as_i64x8();
transmute(simd_select_bitmask(k, zerocount, src.as_i64x8()))
}
}
/// Counts the number of leading zero bits in each packed 64-bit integer in a, and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
@ -415,9 +453,11 @@ pub unsafe fn _mm512_mask_lzcnt_epi64(src: __m512i, k: __mmask8, a: __m512i) ->
#[target_feature(enable = "avx512cd")]
#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
#[cfg_attr(test, assert_instr(vplzcntq))]
pub unsafe fn _mm512_maskz_lzcnt_epi64(k: __mmask8, a: __m512i) -> __m512i {
let zerocount = _mm512_lzcnt_epi64(a).as_i64x8();
transmute(simd_select_bitmask(k, zerocount, i64x8::ZERO))
pub fn _mm512_maskz_lzcnt_epi64(k: __mmask8, a: __m512i) -> __m512i {
unsafe {
let zerocount = _mm512_lzcnt_epi64(a).as_i64x8();
transmute(simd_select_bitmask(k, zerocount, i64x8::ZERO))
}
}
/// Counts the number of leading zero bits in each packed 64-bit integer in a, and store the results in dst.
@ -427,8 +467,8 @@ pub unsafe fn _mm512_maskz_lzcnt_epi64(k: __mmask8, a: __m512i) -> __m512i {
#[target_feature(enable = "avx512cd,avx512vl")]
#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
#[cfg_attr(test, assert_instr(vplzcntq))]
pub unsafe fn _mm256_lzcnt_epi64(a: __m256i) -> __m256i {
transmute(simd_ctlz(a.as_i64x4()))
pub fn _mm256_lzcnt_epi64(a: __m256i) -> __m256i {
unsafe { transmute(simd_ctlz(a.as_i64x4())) }
}
/// Counts the number of leading zero bits in each packed 64-bit integer in a, and store the results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set).
@ -438,9 +478,11 @@ pub unsafe fn _mm256_lzcnt_epi64(a: __m256i) -> __m256i {
#[target_feature(enable = "avx512cd,avx512vl")]
#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
#[cfg_attr(test, assert_instr(vplzcntq))]
pub unsafe fn _mm256_mask_lzcnt_epi64(src: __m256i, k: __mmask8, a: __m256i) -> __m256i {
let zerocount = _mm256_lzcnt_epi64(a).as_i64x4();
transmute(simd_select_bitmask(k, zerocount, src.as_i64x4()))
pub fn _mm256_mask_lzcnt_epi64(src: __m256i, k: __mmask8, a: __m256i) -> __m256i {
unsafe {
let zerocount = _mm256_lzcnt_epi64(a).as_i64x4();
transmute(simd_select_bitmask(k, zerocount, src.as_i64x4()))
}
}
/// Counts the number of leading zero bits in each packed 64-bit integer in a, and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
@ -450,9 +492,11 @@ pub unsafe fn _mm256_mask_lzcnt_epi64(src: __m256i, k: __mmask8, a: __m256i) ->
#[target_feature(enable = "avx512cd,avx512vl")]
#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
#[cfg_attr(test, assert_instr(vplzcntq))]
pub unsafe fn _mm256_maskz_lzcnt_epi64(k: __mmask8, a: __m256i) -> __m256i {
let zerocount = _mm256_lzcnt_epi64(a).as_i64x4();
transmute(simd_select_bitmask(k, zerocount, i64x4::ZERO))
pub fn _mm256_maskz_lzcnt_epi64(k: __mmask8, a: __m256i) -> __m256i {
unsafe {
let zerocount = _mm256_lzcnt_epi64(a).as_i64x4();
transmute(simd_select_bitmask(k, zerocount, i64x4::ZERO))
}
}
/// Counts the number of leading zero bits in each packed 64-bit integer in a, and store the results in dst.
@ -462,8 +506,8 @@ pub unsafe fn _mm256_maskz_lzcnt_epi64(k: __mmask8, a: __m256i) -> __m256i {
#[target_feature(enable = "avx512cd,avx512vl")]
#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
#[cfg_attr(test, assert_instr(vplzcntq))]
pub unsafe fn _mm_lzcnt_epi64(a: __m128i) -> __m128i {
transmute(simd_ctlz(a.as_i64x2()))
pub fn _mm_lzcnt_epi64(a: __m128i) -> __m128i {
unsafe { transmute(simd_ctlz(a.as_i64x2())) }
}
/// Counts the number of leading zero bits in each packed 64-bit integer in a, and store the results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set).
@ -473,9 +517,11 @@ pub unsafe fn _mm_lzcnt_epi64(a: __m128i) -> __m128i {
#[target_feature(enable = "avx512cd,avx512vl")]
#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
#[cfg_attr(test, assert_instr(vplzcntq))]
pub unsafe fn _mm_mask_lzcnt_epi64(src: __m128i, k: __mmask8, a: __m128i) -> __m128i {
let zerocount = _mm_lzcnt_epi64(a).as_i64x2();
transmute(simd_select_bitmask(k, zerocount, src.as_i64x2()))
pub fn _mm_mask_lzcnt_epi64(src: __m128i, k: __mmask8, a: __m128i) -> __m128i {
unsafe {
let zerocount = _mm_lzcnt_epi64(a).as_i64x2();
transmute(simd_select_bitmask(k, zerocount, src.as_i64x2()))
}
}
/// Counts the number of leading zero bits in each packed 64-bit integer in a, and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
@ -485,9 +531,11 @@ pub unsafe fn _mm_mask_lzcnt_epi64(src: __m128i, k: __mmask8, a: __m128i) -> __m
#[target_feature(enable = "avx512cd,avx512vl")]
#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
#[cfg_attr(test, assert_instr(vplzcntq))]
pub unsafe fn _mm_maskz_lzcnt_epi64(k: __mmask8, a: __m128i) -> __m128i {
let zerocount = _mm_lzcnt_epi64(a).as_i64x2();
transmute(simd_select_bitmask(k, zerocount, i64x2::ZERO))
pub fn _mm_maskz_lzcnt_epi64(k: __mmask8, a: __m128i) -> __m128i {
unsafe {
let zerocount = _mm_lzcnt_epi64(a).as_i64x2();
transmute(simd_select_bitmask(k, zerocount, i64x2::ZERO))
}
}
#[allow(improper_ctypes)]

File diff suppressed because it is too large Load diff

File diff suppressed because it is too large Load diff

File diff suppressed because it is too large Load diff

View file

@ -15,8 +15,8 @@ use stdarch_test::assert_instr;
#[target_feature(enable = "avx512ifma")]
#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
#[cfg_attr(test, assert_instr(vpmadd52huq))]
pub unsafe fn _mm512_madd52hi_epu64(a: __m512i, b: __m512i, c: __m512i) -> __m512i {
vpmadd52huq_512(a, b, c)
pub fn _mm512_madd52hi_epu64(a: __m512i, b: __m512i, c: __m512i) -> __m512i {
unsafe { vpmadd52huq_512(a, b, c) }
}
/// Multiply packed unsigned 52-bit integers in each 64-bit element of
@ -31,13 +31,8 @@ pub unsafe fn _mm512_madd52hi_epu64(a: __m512i, b: __m512i, c: __m512i) -> __m51
#[target_feature(enable = "avx512ifma")]
#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
#[cfg_attr(test, assert_instr(vpmadd52huq))]
pub unsafe fn _mm512_mask_madd52hi_epu64(
a: __m512i,
k: __mmask8,
b: __m512i,
c: __m512i,
) -> __m512i {
simd_select_bitmask(k, vpmadd52huq_512(a, b, c), a)
pub fn _mm512_mask_madd52hi_epu64(a: __m512i, k: __mmask8, b: __m512i, c: __m512i) -> __m512i {
unsafe { simd_select_bitmask(k, vpmadd52huq_512(a, b, c), a) }
}
/// Multiply packed unsigned 52-bit integers in each 64-bit element of
@ -52,13 +47,8 @@ pub unsafe fn _mm512_mask_madd52hi_epu64(
#[target_feature(enable = "avx512ifma")]
#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
#[cfg_attr(test, assert_instr(vpmadd52huq))]
pub unsafe fn _mm512_maskz_madd52hi_epu64(
k: __mmask8,
a: __m512i,
b: __m512i,
c: __m512i,
) -> __m512i {
simd_select_bitmask(k, vpmadd52huq_512(a, b, c), _mm512_setzero_si512())
pub fn _mm512_maskz_madd52hi_epu64(k: __mmask8, a: __m512i, b: __m512i, c: __m512i) -> __m512i {
unsafe { simd_select_bitmask(k, vpmadd52huq_512(a, b, c), _mm512_setzero_si512()) }
}
/// Multiply packed unsigned 52-bit integers in each 64-bit element of
@ -72,8 +62,8 @@ pub unsafe fn _mm512_maskz_madd52hi_epu64(
#[target_feature(enable = "avx512ifma")]
#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
#[cfg_attr(test, assert_instr(vpmadd52luq))]
pub unsafe fn _mm512_madd52lo_epu64(a: __m512i, b: __m512i, c: __m512i) -> __m512i {
vpmadd52luq_512(a, b, c)
pub fn _mm512_madd52lo_epu64(a: __m512i, b: __m512i, c: __m512i) -> __m512i {
unsafe { vpmadd52luq_512(a, b, c) }
}
/// Multiply packed unsigned 52-bit integers in each 64-bit element of
@ -88,13 +78,8 @@ pub unsafe fn _mm512_madd52lo_epu64(a: __m512i, b: __m512i, c: __m512i) -> __m51
#[target_feature(enable = "avx512ifma")]
#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
#[cfg_attr(test, assert_instr(vpmadd52luq))]
pub unsafe fn _mm512_mask_madd52lo_epu64(
a: __m512i,
k: __mmask8,
b: __m512i,
c: __m512i,
) -> __m512i {
simd_select_bitmask(k, vpmadd52luq_512(a, b, c), a)
pub fn _mm512_mask_madd52lo_epu64(a: __m512i, k: __mmask8, b: __m512i, c: __m512i) -> __m512i {
unsafe { simd_select_bitmask(k, vpmadd52luq_512(a, b, c), a) }
}
/// Multiply packed unsigned 52-bit integers in each 64-bit element of
@ -109,13 +94,8 @@ pub unsafe fn _mm512_mask_madd52lo_epu64(
#[target_feature(enable = "avx512ifma")]
#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
#[cfg_attr(test, assert_instr(vpmadd52luq))]
pub unsafe fn _mm512_maskz_madd52lo_epu64(
k: __mmask8,
a: __m512i,
b: __m512i,
c: __m512i,
) -> __m512i {
simd_select_bitmask(k, vpmadd52luq_512(a, b, c), _mm512_setzero_si512())
pub fn _mm512_maskz_madd52lo_epu64(k: __mmask8, a: __m512i, b: __m512i, c: __m512i) -> __m512i {
unsafe { simd_select_bitmask(k, vpmadd52luq_512(a, b, c), _mm512_setzero_si512()) }
}
/// Multiply packed unsigned 52-bit integers in each 64-bit element of
@ -132,8 +112,8 @@ pub unsafe fn _mm512_maskz_madd52lo_epu64(
all(test, any(target_os = "linux", target_env = "msvc")),
assert_instr(vpmadd52huq)
)]
pub unsafe fn _mm256_madd52hi_avx_epu64(a: __m256i, b: __m256i, c: __m256i) -> __m256i {
vpmadd52huq_256(a, b, c)
pub fn _mm256_madd52hi_avx_epu64(a: __m256i, b: __m256i, c: __m256i) -> __m256i {
unsafe { vpmadd52huq_256(a, b, c) }
}
/// Multiply packed unsigned 52-bit integers in each 64-bit element of
@ -147,8 +127,8 @@ pub unsafe fn _mm256_madd52hi_avx_epu64(a: __m256i, b: __m256i, c: __m256i) -> _
#[target_feature(enable = "avx512ifma,avx512vl")]
#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
#[cfg_attr(test, assert_instr(vpmadd52huq))]
pub unsafe fn _mm256_madd52hi_epu64(a: __m256i, b: __m256i, c: __m256i) -> __m256i {
vpmadd52huq_256(a, b, c)
pub fn _mm256_madd52hi_epu64(a: __m256i, b: __m256i, c: __m256i) -> __m256i {
unsafe { vpmadd52huq_256(a, b, c) }
}
/// Multiply packed unsigned 52-bit integers in each 64-bit element of
@ -163,13 +143,8 @@ pub unsafe fn _mm256_madd52hi_epu64(a: __m256i, b: __m256i, c: __m256i) -> __m25
#[target_feature(enable = "avx512ifma,avx512vl")]
#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
#[cfg_attr(test, assert_instr(vpmadd52huq))]
pub unsafe fn _mm256_mask_madd52hi_epu64(
a: __m256i,
k: __mmask8,
b: __m256i,
c: __m256i,
) -> __m256i {
simd_select_bitmask(k, vpmadd52huq_256(a, b, c), a)
pub fn _mm256_mask_madd52hi_epu64(a: __m256i, k: __mmask8, b: __m256i, c: __m256i) -> __m256i {
unsafe { simd_select_bitmask(k, vpmadd52huq_256(a, b, c), a) }
}
/// Multiply packed unsigned 52-bit integers in each 64-bit element of
@ -184,13 +159,8 @@ pub unsafe fn _mm256_mask_madd52hi_epu64(
#[target_feature(enable = "avx512ifma,avx512vl")]
#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
#[cfg_attr(test, assert_instr(vpmadd52huq))]
pub unsafe fn _mm256_maskz_madd52hi_epu64(
k: __mmask8,
a: __m256i,
b: __m256i,
c: __m256i,
) -> __m256i {
simd_select_bitmask(k, vpmadd52huq_256(a, b, c), _mm256_setzero_si256())
pub fn _mm256_maskz_madd52hi_epu64(k: __mmask8, a: __m256i, b: __m256i, c: __m256i) -> __m256i {
unsafe { simd_select_bitmask(k, vpmadd52huq_256(a, b, c), _mm256_setzero_si256()) }
}
/// Multiply packed unsigned 52-bit integers in each 64-bit element of
@ -207,8 +177,8 @@ pub unsafe fn _mm256_maskz_madd52hi_epu64(
all(test, any(target_os = "linux", target_env = "msvc")),
assert_instr(vpmadd52luq)
)]
pub unsafe fn _mm256_madd52lo_avx_epu64(a: __m256i, b: __m256i, c: __m256i) -> __m256i {
vpmadd52luq_256(a, b, c)
pub fn _mm256_madd52lo_avx_epu64(a: __m256i, b: __m256i, c: __m256i) -> __m256i {
unsafe { vpmadd52luq_256(a, b, c) }
}
/// Multiply packed unsigned 52-bit integers in each 64-bit element of
@ -222,8 +192,8 @@ pub unsafe fn _mm256_madd52lo_avx_epu64(a: __m256i, b: __m256i, c: __m256i) -> _
#[target_feature(enable = "avx512ifma,avx512vl")]
#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
#[cfg_attr(test, assert_instr(vpmadd52luq))]
pub unsafe fn _mm256_madd52lo_epu64(a: __m256i, b: __m256i, c: __m256i) -> __m256i {
vpmadd52luq_256(a, b, c)
pub fn _mm256_madd52lo_epu64(a: __m256i, b: __m256i, c: __m256i) -> __m256i {
unsafe { vpmadd52luq_256(a, b, c) }
}
/// Multiply packed unsigned 52-bit integers in each 64-bit element of
@ -238,13 +208,8 @@ pub unsafe fn _mm256_madd52lo_epu64(a: __m256i, b: __m256i, c: __m256i) -> __m25
#[target_feature(enable = "avx512ifma,avx512vl")]
#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
#[cfg_attr(test, assert_instr(vpmadd52luq))]
pub unsafe fn _mm256_mask_madd52lo_epu64(
a: __m256i,
k: __mmask8,
b: __m256i,
c: __m256i,
) -> __m256i {
simd_select_bitmask(k, vpmadd52luq_256(a, b, c), a)
pub fn _mm256_mask_madd52lo_epu64(a: __m256i, k: __mmask8, b: __m256i, c: __m256i) -> __m256i {
unsafe { simd_select_bitmask(k, vpmadd52luq_256(a, b, c), a) }
}
/// Multiply packed unsigned 52-bit integers in each 64-bit element of
@ -259,13 +224,8 @@ pub unsafe fn _mm256_mask_madd52lo_epu64(
#[target_feature(enable = "avx512ifma,avx512vl")]
#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
#[cfg_attr(test, assert_instr(vpmadd52luq))]
pub unsafe fn _mm256_maskz_madd52lo_epu64(
k: __mmask8,
a: __m256i,
b: __m256i,
c: __m256i,
) -> __m256i {
simd_select_bitmask(k, vpmadd52luq_256(a, b, c), _mm256_setzero_si256())
pub fn _mm256_maskz_madd52lo_epu64(k: __mmask8, a: __m256i, b: __m256i, c: __m256i) -> __m256i {
unsafe { simd_select_bitmask(k, vpmadd52luq_256(a, b, c), _mm256_setzero_si256()) }
}
/// Multiply packed unsigned 52-bit integers in each 64-bit element of
@ -282,8 +242,8 @@ pub unsafe fn _mm256_maskz_madd52lo_epu64(
all(test, any(target_os = "linux", target_env = "msvc")),
assert_instr(vpmadd52huq)
)]
pub unsafe fn _mm_madd52hi_avx_epu64(a: __m128i, b: __m128i, c: __m128i) -> __m128i {
vpmadd52huq_128(a, b, c)
pub fn _mm_madd52hi_avx_epu64(a: __m128i, b: __m128i, c: __m128i) -> __m128i {
unsafe { vpmadd52huq_128(a, b, c) }
}
/// Multiply packed unsigned 52-bit integers in each 64-bit element of
@ -297,8 +257,8 @@ pub unsafe fn _mm_madd52hi_avx_epu64(a: __m128i, b: __m128i, c: __m128i) -> __m1
#[target_feature(enable = "avx512ifma,avx512vl")]
#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
#[cfg_attr(test, assert_instr(vpmadd52huq))]
pub unsafe fn _mm_madd52hi_epu64(a: __m128i, b: __m128i, c: __m128i) -> __m128i {
vpmadd52huq_128(a, b, c)
pub fn _mm_madd52hi_epu64(a: __m128i, b: __m128i, c: __m128i) -> __m128i {
unsafe { vpmadd52huq_128(a, b, c) }
}
/// Multiply packed unsigned 52-bit integers in each 64-bit element of
@ -313,8 +273,8 @@ pub unsafe fn _mm_madd52hi_epu64(a: __m128i, b: __m128i, c: __m128i) -> __m128i
#[target_feature(enable = "avx512ifma,avx512vl")]
#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
#[cfg_attr(test, assert_instr(vpmadd52huq))]
pub unsafe fn _mm_mask_madd52hi_epu64(a: __m128i, k: __mmask8, b: __m128i, c: __m128i) -> __m128i {
simd_select_bitmask(k, vpmadd52huq_128(a, b, c), a)
pub fn _mm_mask_madd52hi_epu64(a: __m128i, k: __mmask8, b: __m128i, c: __m128i) -> __m128i {
unsafe { simd_select_bitmask(k, vpmadd52huq_128(a, b, c), a) }
}
/// Multiply packed unsigned 52-bit integers in each 64-bit element of
@ -329,8 +289,8 @@ pub unsafe fn _mm_mask_madd52hi_epu64(a: __m128i, k: __mmask8, b: __m128i, c: __
#[target_feature(enable = "avx512ifma,avx512vl")]
#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
#[cfg_attr(test, assert_instr(vpmadd52huq))]
pub unsafe fn _mm_maskz_madd52hi_epu64(k: __mmask8, a: __m128i, b: __m128i, c: __m128i) -> __m128i {
simd_select_bitmask(k, vpmadd52huq_128(a, b, c), _mm_setzero_si128())
pub fn _mm_maskz_madd52hi_epu64(k: __mmask8, a: __m128i, b: __m128i, c: __m128i) -> __m128i {
unsafe { simd_select_bitmask(k, vpmadd52huq_128(a, b, c), _mm_setzero_si128()) }
}
/// Multiply packed unsigned 52-bit integers in each 64-bit element of
@ -347,8 +307,8 @@ pub unsafe fn _mm_maskz_madd52hi_epu64(k: __mmask8, a: __m128i, b: __m128i, c: _
all(test, any(target_os = "linux", target_env = "msvc")),
assert_instr(vpmadd52luq)
)]
pub unsafe fn _mm_madd52lo_avx_epu64(a: __m128i, b: __m128i, c: __m128i) -> __m128i {
vpmadd52luq_128(a, b, c)
pub fn _mm_madd52lo_avx_epu64(a: __m128i, b: __m128i, c: __m128i) -> __m128i {
unsafe { vpmadd52luq_128(a, b, c) }
}
/// Multiply packed unsigned 52-bit integers in each 64-bit element of
@ -362,8 +322,8 @@ pub unsafe fn _mm_madd52lo_avx_epu64(a: __m128i, b: __m128i, c: __m128i) -> __m1
#[target_feature(enable = "avx512ifma,avx512vl")]
#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
#[cfg_attr(test, assert_instr(vpmadd52luq))]
pub unsafe fn _mm_madd52lo_epu64(a: __m128i, b: __m128i, c: __m128i) -> __m128i {
vpmadd52luq_128(a, b, c)
pub fn _mm_madd52lo_epu64(a: __m128i, b: __m128i, c: __m128i) -> __m128i {
unsafe { vpmadd52luq_128(a, b, c) }
}
/// Multiply packed unsigned 52-bit integers in each 64-bit element of
@ -378,8 +338,8 @@ pub unsafe fn _mm_madd52lo_epu64(a: __m128i, b: __m128i, c: __m128i) -> __m128i
#[target_feature(enable = "avx512ifma,avx512vl")]
#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
#[cfg_attr(test, assert_instr(vpmadd52luq))]
pub unsafe fn _mm_mask_madd52lo_epu64(a: __m128i, k: __mmask8, b: __m128i, c: __m128i) -> __m128i {
simd_select_bitmask(k, vpmadd52luq_128(a, b, c), a)
pub fn _mm_mask_madd52lo_epu64(a: __m128i, k: __mmask8, b: __m128i, c: __m128i) -> __m128i {
unsafe { simd_select_bitmask(k, vpmadd52luq_128(a, b, c), a) }
}
/// Multiply packed unsigned 52-bit integers in each 64-bit element of
@ -394,8 +354,8 @@ pub unsafe fn _mm_mask_madd52lo_epu64(a: __m128i, k: __mmask8, b: __m128i, c: __
#[target_feature(enable = "avx512ifma,avx512vl")]
#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
#[cfg_attr(test, assert_instr(vpmadd52luq))]
pub unsafe fn _mm_maskz_madd52lo_epu64(k: __mmask8, a: __m128i, b: __m128i, c: __m128i) -> __m128i {
simd_select_bitmask(k, vpmadd52luq_128(a, b, c), _mm_setzero_si128())
pub fn _mm_maskz_madd52lo_epu64(k: __mmask8, a: __m128i, b: __m128i, c: __m128i) -> __m128i {
unsafe { simd_select_bitmask(k, vpmadd52luq_128(a, b, c), _mm_setzero_si128()) }
}
#[allow(improper_ctypes)]

View file

@ -11,8 +11,8 @@ use stdarch_test::assert_instr;
#[target_feature(enable = "avx512vbmi")]
#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
#[cfg_attr(test, assert_instr(vperm))] //should be vpermi2b
pub unsafe fn _mm512_permutex2var_epi8(a: __m512i, idx: __m512i, b: __m512i) -> __m512i {
transmute(vpermi2b(a.as_i8x64(), idx.as_i8x64(), b.as_i8x64()))
pub fn _mm512_permutex2var_epi8(a: __m512i, idx: __m512i, b: __m512i) -> __m512i {
unsafe { transmute(vpermi2b(a.as_i8x64(), idx.as_i8x64(), b.as_i8x64())) }
}
/// Shuffle 8-bit integers in a and b across lanes using the corresponding selector and index in idx, and store the results in dst using writemask k (elements are copied from a when the corresponding mask bit is not set).
@ -22,14 +22,16 @@ pub unsafe fn _mm512_permutex2var_epi8(a: __m512i, idx: __m512i, b: __m512i) ->
#[target_feature(enable = "avx512vbmi")]
#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
#[cfg_attr(test, assert_instr(vpermt2b))]
pub unsafe fn _mm512_mask_permutex2var_epi8(
pub fn _mm512_mask_permutex2var_epi8(
a: __m512i,
k: __mmask64,
idx: __m512i,
b: __m512i,
) -> __m512i {
let permute = _mm512_permutex2var_epi8(a, idx, b).as_i8x64();
transmute(simd_select_bitmask(k, permute, a.as_i8x64()))
unsafe {
let permute = _mm512_permutex2var_epi8(a, idx, b).as_i8x64();
transmute(simd_select_bitmask(k, permute, a.as_i8x64()))
}
}
/// Shuffle 8-bit integers in a and b across lanes using the corresponding selector and index in idx, and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
@ -39,14 +41,16 @@ pub unsafe fn _mm512_mask_permutex2var_epi8(
#[target_feature(enable = "avx512vbmi")]
#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
#[cfg_attr(test, assert_instr(vperm))] //should be vpermi2b
pub unsafe fn _mm512_maskz_permutex2var_epi8(
pub fn _mm512_maskz_permutex2var_epi8(
k: __mmask64,
a: __m512i,
idx: __m512i,
b: __m512i,
) -> __m512i {
let permute = _mm512_permutex2var_epi8(a, idx, b).as_i8x64();
transmute(simd_select_bitmask(k, permute, i8x64::ZERO))
unsafe {
let permute = _mm512_permutex2var_epi8(a, idx, b).as_i8x64();
transmute(simd_select_bitmask(k, permute, i8x64::ZERO))
}
}
/// Shuffle 8-bit integers in a and b across lanes using the corresponding selector and index in idx, and store the results in dst using writemask k (elements are copied from a when the corresponding mask bit is not set).
@ -56,14 +60,16 @@ pub unsafe fn _mm512_maskz_permutex2var_epi8(
#[target_feature(enable = "avx512vbmi")]
#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
#[cfg_attr(test, assert_instr(vpermi2b))]
pub unsafe fn _mm512_mask2_permutex2var_epi8(
pub fn _mm512_mask2_permutex2var_epi8(
a: __m512i,
idx: __m512i,
k: __mmask64,
b: __m512i,
) -> __m512i {
let permute = _mm512_permutex2var_epi8(a, idx, b).as_i8x64();
transmute(simd_select_bitmask(k, permute, idx.as_i8x64()))
unsafe {
let permute = _mm512_permutex2var_epi8(a, idx, b).as_i8x64();
transmute(simd_select_bitmask(k, permute, idx.as_i8x64()))
}
}
/// Shuffle 8-bit integers in a and b across lanes using the corresponding selector and index in idx, and store the results in dst.
@ -73,8 +79,8 @@ pub unsafe fn _mm512_mask2_permutex2var_epi8(
#[target_feature(enable = "avx512vbmi,avx512vl")]
#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
#[cfg_attr(test, assert_instr(vperm))] //should be vpermi2b
pub unsafe fn _mm256_permutex2var_epi8(a: __m256i, idx: __m256i, b: __m256i) -> __m256i {
transmute(vpermi2b256(a.as_i8x32(), idx.as_i8x32(), b.as_i8x32()))
pub fn _mm256_permutex2var_epi8(a: __m256i, idx: __m256i, b: __m256i) -> __m256i {
unsafe { transmute(vpermi2b256(a.as_i8x32(), idx.as_i8x32(), b.as_i8x32())) }
}
/// Shuffle 8-bit integers in a and b across lanes using the corresponding selector and index in idx, and store the results in dst using writemask k (elements are copied from a when the corresponding mask bit is not set).
@ -84,14 +90,16 @@ pub unsafe fn _mm256_permutex2var_epi8(a: __m256i, idx: __m256i, b: __m256i) ->
#[target_feature(enable = "avx512vbmi,avx512vl")]
#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
#[cfg_attr(test, assert_instr(vpermt2b))]
pub unsafe fn _mm256_mask_permutex2var_epi8(
pub fn _mm256_mask_permutex2var_epi8(
a: __m256i,
k: __mmask32,
idx: __m256i,
b: __m256i,
) -> __m256i {
let permute = _mm256_permutex2var_epi8(a, idx, b).as_i8x32();
transmute(simd_select_bitmask(k, permute, a.as_i8x32()))
unsafe {
let permute = _mm256_permutex2var_epi8(a, idx, b).as_i8x32();
transmute(simd_select_bitmask(k, permute, a.as_i8x32()))
}
}
/// Shuffle 8-bit integers in a and b across lanes using the corresponding selector and index in idx, and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
@ -101,14 +109,16 @@ pub unsafe fn _mm256_mask_permutex2var_epi8(
#[target_feature(enable = "avx512vbmi,avx512vl")]
#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
#[cfg_attr(test, assert_instr(vperm))] //should be vpermi2b
pub unsafe fn _mm256_maskz_permutex2var_epi8(
pub fn _mm256_maskz_permutex2var_epi8(
k: __mmask32,
a: __m256i,
idx: __m256i,
b: __m256i,
) -> __m256i {
let permute = _mm256_permutex2var_epi8(a, idx, b).as_i8x32();
transmute(simd_select_bitmask(k, permute, i8x32::ZERO))
unsafe {
let permute = _mm256_permutex2var_epi8(a, idx, b).as_i8x32();
transmute(simd_select_bitmask(k, permute, i8x32::ZERO))
}
}
/// Shuffle 8-bit integers in a and b across lanes using the corresponding selector and index in idx, and store the results in dst using writemask k (elements are copied from a when the corresponding mask bit is not set).
@ -118,14 +128,16 @@ pub unsafe fn _mm256_maskz_permutex2var_epi8(
#[target_feature(enable = "avx512vbmi,avx512vl")]
#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
#[cfg_attr(test, assert_instr(vpermi2b))]
pub unsafe fn _mm256_mask2_permutex2var_epi8(
pub fn _mm256_mask2_permutex2var_epi8(
a: __m256i,
idx: __m256i,
k: __mmask32,
b: __m256i,
) -> __m256i {
let permute = _mm256_permutex2var_epi8(a, idx, b).as_i8x32();
transmute(simd_select_bitmask(k, permute, idx.as_i8x32()))
unsafe {
let permute = _mm256_permutex2var_epi8(a, idx, b).as_i8x32();
transmute(simd_select_bitmask(k, permute, idx.as_i8x32()))
}
}
/// Shuffle 8-bit integers in a and b across lanes using the corresponding selector and index in idx, and store the results in dst.
@ -135,8 +147,8 @@ pub unsafe fn _mm256_mask2_permutex2var_epi8(
#[target_feature(enable = "avx512vbmi,avx512vl")]
#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
#[cfg_attr(test, assert_instr(vperm))] //should be vpermi2b
pub unsafe fn _mm_permutex2var_epi8(a: __m128i, idx: __m128i, b: __m128i) -> __m128i {
transmute(vpermi2b128(a.as_i8x16(), idx.as_i8x16(), b.as_i8x16()))
pub fn _mm_permutex2var_epi8(a: __m128i, idx: __m128i, b: __m128i) -> __m128i {
unsafe { transmute(vpermi2b128(a.as_i8x16(), idx.as_i8x16(), b.as_i8x16())) }
}
/// Shuffle 8-bit integers in a and b across lanes using the corresponding selector and index in idx, and store the results in dst using writemask k (elements are copied from a when the corresponding mask bit is not set).
@ -146,14 +158,11 @@ pub unsafe fn _mm_permutex2var_epi8(a: __m128i, idx: __m128i, b: __m128i) -> __m
#[target_feature(enable = "avx512vbmi,avx512vl")]
#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
#[cfg_attr(test, assert_instr(vpermt2b))]
pub unsafe fn _mm_mask_permutex2var_epi8(
a: __m128i,
k: __mmask16,
idx: __m128i,
b: __m128i,
) -> __m128i {
let permute = _mm_permutex2var_epi8(a, idx, b).as_i8x16();
transmute(simd_select_bitmask(k, permute, a.as_i8x16()))
pub fn _mm_mask_permutex2var_epi8(a: __m128i, k: __mmask16, idx: __m128i, b: __m128i) -> __m128i {
unsafe {
let permute = _mm_permutex2var_epi8(a, idx, b).as_i8x16();
transmute(simd_select_bitmask(k, permute, a.as_i8x16()))
}
}
/// Shuffle 8-bit integers in a and b across lanes using the corresponding selector and index in idx, and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
@ -163,14 +172,11 @@ pub unsafe fn _mm_mask_permutex2var_epi8(
#[target_feature(enable = "avx512vbmi,avx512vl")]
#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
#[cfg_attr(test, assert_instr(vperm))] //should be vpermi2b
pub unsafe fn _mm_maskz_permutex2var_epi8(
k: __mmask16,
a: __m128i,
idx: __m128i,
b: __m128i,
) -> __m128i {
let permute = _mm_permutex2var_epi8(a, idx, b).as_i8x16();
transmute(simd_select_bitmask(k, permute, i8x16::ZERO))
pub fn _mm_maskz_permutex2var_epi8(k: __mmask16, a: __m128i, idx: __m128i, b: __m128i) -> __m128i {
unsafe {
let permute = _mm_permutex2var_epi8(a, idx, b).as_i8x16();
transmute(simd_select_bitmask(k, permute, i8x16::ZERO))
}
}
/// Shuffle 8-bit integers in a and b across lanes using the corresponding selector and index in idx, and store the results in dst using writemask k (elements are copied from a when the corresponding mask bit is not set).
@ -180,14 +186,11 @@ pub unsafe fn _mm_maskz_permutex2var_epi8(
#[target_feature(enable = "avx512vbmi,avx512vl")]
#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
#[cfg_attr(test, assert_instr(vpermi2b))]
pub unsafe fn _mm_mask2_permutex2var_epi8(
a: __m128i,
idx: __m128i,
k: __mmask16,
b: __m128i,
) -> __m128i {
let permute = _mm_permutex2var_epi8(a, idx, b).as_i8x16();
transmute(simd_select_bitmask(k, permute, idx.as_i8x16()))
pub fn _mm_mask2_permutex2var_epi8(a: __m128i, idx: __m128i, k: __mmask16, b: __m128i) -> __m128i {
unsafe {
let permute = _mm_permutex2var_epi8(a, idx, b).as_i8x16();
transmute(simd_select_bitmask(k, permute, idx.as_i8x16()))
}
}
/// Shuffle 8-bit integers in a across lanes using the corresponding index in idx, and store the results in dst.
@ -197,8 +200,8 @@ pub unsafe fn _mm_mask2_permutex2var_epi8(
#[target_feature(enable = "avx512vbmi")]
#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
#[cfg_attr(test, assert_instr(vpermb))]
pub unsafe fn _mm512_permutexvar_epi8(idx: __m512i, a: __m512i) -> __m512i {
transmute(vpermb(a.as_i8x64(), idx.as_i8x64()))
pub fn _mm512_permutexvar_epi8(idx: __m512i, a: __m512i) -> __m512i {
unsafe { transmute(vpermb(a.as_i8x64(), idx.as_i8x64())) }
}
/// Shuffle 8-bit integers in a across lanes using the corresponding index in idx, and store the results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set).
@ -208,14 +211,16 @@ pub unsafe fn _mm512_permutexvar_epi8(idx: __m512i, a: __m512i) -> __m512i {
#[target_feature(enable = "avx512vbmi")]
#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
#[cfg_attr(test, assert_instr(vpermb))]
pub unsafe fn _mm512_mask_permutexvar_epi8(
pub fn _mm512_mask_permutexvar_epi8(
src: __m512i,
k: __mmask64,
idx: __m512i,
a: __m512i,
) -> __m512i {
let permute = _mm512_permutexvar_epi8(idx, a).as_i8x64();
transmute(simd_select_bitmask(k, permute, src.as_i8x64()))
unsafe {
let permute = _mm512_permutexvar_epi8(idx, a).as_i8x64();
transmute(simd_select_bitmask(k, permute, src.as_i8x64()))
}
}
/// Shuffle 8-bit integers in a across lanes using the corresponding index in idx, and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
@ -225,9 +230,11 @@ pub unsafe fn _mm512_mask_permutexvar_epi8(
#[target_feature(enable = "avx512vbmi")]
#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
#[cfg_attr(test, assert_instr(vpermb))]
pub unsafe fn _mm512_maskz_permutexvar_epi8(k: __mmask64, idx: __m512i, a: __m512i) -> __m512i {
let permute = _mm512_permutexvar_epi8(idx, a).as_i8x64();
transmute(simd_select_bitmask(k, permute, i8x64::ZERO))
pub fn _mm512_maskz_permutexvar_epi8(k: __mmask64, idx: __m512i, a: __m512i) -> __m512i {
unsafe {
let permute = _mm512_permutexvar_epi8(idx, a).as_i8x64();
transmute(simd_select_bitmask(k, permute, i8x64::ZERO))
}
}
/// Shuffle 8-bit integers in a across lanes using the corresponding index in idx, and store the results in dst.
@ -237,8 +244,8 @@ pub unsafe fn _mm512_maskz_permutexvar_epi8(k: __mmask64, idx: __m512i, a: __m51
#[target_feature(enable = "avx512vbmi,avx512vl")]
#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
#[cfg_attr(test, assert_instr(vpermb))]
pub unsafe fn _mm256_permutexvar_epi8(idx: __m256i, a: __m256i) -> __m256i {
transmute(vpermb256(a.as_i8x32(), idx.as_i8x32()))
pub fn _mm256_permutexvar_epi8(idx: __m256i, a: __m256i) -> __m256i {
unsafe { transmute(vpermb256(a.as_i8x32(), idx.as_i8x32())) }
}
/// Shuffle 8-bit integers in a across lanes using the corresponding index in idx, and store the results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set).
@ -248,14 +255,16 @@ pub unsafe fn _mm256_permutexvar_epi8(idx: __m256i, a: __m256i) -> __m256i {
#[target_feature(enable = "avx512vbmi,avx512vl")]
#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
#[cfg_attr(test, assert_instr(vpermb))]
pub unsafe fn _mm256_mask_permutexvar_epi8(
pub fn _mm256_mask_permutexvar_epi8(
src: __m256i,
k: __mmask32,
idx: __m256i,
a: __m256i,
) -> __m256i {
let permute = _mm256_permutexvar_epi8(idx, a).as_i8x32();
transmute(simd_select_bitmask(k, permute, src.as_i8x32()))
unsafe {
let permute = _mm256_permutexvar_epi8(idx, a).as_i8x32();
transmute(simd_select_bitmask(k, permute, src.as_i8x32()))
}
}
/// Shuffle 8-bit integers in a across lanes using the corresponding index in idx, and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
@ -265,9 +274,11 @@ pub unsafe fn _mm256_mask_permutexvar_epi8(
#[target_feature(enable = "avx512vbmi,avx512vl")]
#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
#[cfg_attr(test, assert_instr(vpermb))]
pub unsafe fn _mm256_maskz_permutexvar_epi8(k: __mmask32, idx: __m256i, a: __m256i) -> __m256i {
let permute = _mm256_permutexvar_epi8(idx, a).as_i8x32();
transmute(simd_select_bitmask(k, permute, i8x32::ZERO))
pub fn _mm256_maskz_permutexvar_epi8(k: __mmask32, idx: __m256i, a: __m256i) -> __m256i {
unsafe {
let permute = _mm256_permutexvar_epi8(idx, a).as_i8x32();
transmute(simd_select_bitmask(k, permute, i8x32::ZERO))
}
}
/// Shuffle 8-bit integers in a across lanes using the corresponding index in idx, and store the results in dst.
@ -277,8 +288,8 @@ pub unsafe fn _mm256_maskz_permutexvar_epi8(k: __mmask32, idx: __m256i, a: __m25
#[target_feature(enable = "avx512vbmi,avx512vl")]
#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
#[cfg_attr(test, assert_instr(vpermb))]
pub unsafe fn _mm_permutexvar_epi8(idx: __m128i, a: __m128i) -> __m128i {
transmute(vpermb128(a.as_i8x16(), idx.as_i8x16()))
pub fn _mm_permutexvar_epi8(idx: __m128i, a: __m128i) -> __m128i {
unsafe { transmute(vpermb128(a.as_i8x16(), idx.as_i8x16())) }
}
/// Shuffle 8-bit integers in a across lanes using the corresponding index in idx, and store the results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set).
@ -288,14 +299,11 @@ pub unsafe fn _mm_permutexvar_epi8(idx: __m128i, a: __m128i) -> __m128i {
#[target_feature(enable = "avx512vbmi,avx512vl")]
#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
#[cfg_attr(test, assert_instr(vpermb))]
pub unsafe fn _mm_mask_permutexvar_epi8(
src: __m128i,
k: __mmask16,
idx: __m128i,
a: __m128i,
) -> __m128i {
let permute = _mm_permutexvar_epi8(idx, a).as_i8x16();
transmute(simd_select_bitmask(k, permute, src.as_i8x16()))
pub fn _mm_mask_permutexvar_epi8(src: __m128i, k: __mmask16, idx: __m128i, a: __m128i) -> __m128i {
unsafe {
let permute = _mm_permutexvar_epi8(idx, a).as_i8x16();
transmute(simd_select_bitmask(k, permute, src.as_i8x16()))
}
}
/// Shuffle 8-bit integers in a across lanes using the corresponding index in idx, and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
@ -305,9 +313,11 @@ pub unsafe fn _mm_mask_permutexvar_epi8(
#[target_feature(enable = "avx512vbmi,avx512vl")]
#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
#[cfg_attr(test, assert_instr(vpermb))]
pub unsafe fn _mm_maskz_permutexvar_epi8(k: __mmask16, idx: __m128i, a: __m128i) -> __m128i {
let permute = _mm_permutexvar_epi8(idx, a).as_i8x16();
transmute(simd_select_bitmask(k, permute, i8x16::ZERO))
pub fn _mm_maskz_permutexvar_epi8(k: __mmask16, idx: __m128i, a: __m128i) -> __m128i {
unsafe {
let permute = _mm_permutexvar_epi8(idx, a).as_i8x16();
transmute(simd_select_bitmask(k, permute, i8x16::ZERO))
}
}
/// For each 64-bit element in b, select 8 unaligned bytes using a byte-granular shift control within the corresponding 64-bit element of a, and store the 8 assembled bytes to the corresponding 64-bit element of dst.
@ -317,8 +327,8 @@ pub unsafe fn _mm_maskz_permutexvar_epi8(k: __mmask16, idx: __m128i, a: __m128i)
#[target_feature(enable = "avx512vbmi")]
#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
#[cfg_attr(test, assert_instr(vpmultishiftqb))]
pub unsafe fn _mm512_multishift_epi64_epi8(a: __m512i, b: __m512i) -> __m512i {
transmute(vpmultishiftqb(a.as_i8x64(), b.as_i8x64()))
pub fn _mm512_multishift_epi64_epi8(a: __m512i, b: __m512i) -> __m512i {
unsafe { transmute(vpmultishiftqb(a.as_i8x64(), b.as_i8x64())) }
}
/// For each 64-bit element in b, select 8 unaligned bytes using a byte-granular shift control within the corresponding 64-bit element of a, and store the 8 assembled bytes to the corresponding 64-bit element of dst using writemask k (elements are copied from src when the corresponding mask bit is not set).
@ -328,14 +338,16 @@ pub unsafe fn _mm512_multishift_epi64_epi8(a: __m512i, b: __m512i) -> __m512i {
#[target_feature(enable = "avx512vbmi")]
#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
#[cfg_attr(test, assert_instr(vpmultishiftqb))]
pub unsafe fn _mm512_mask_multishift_epi64_epi8(
pub fn _mm512_mask_multishift_epi64_epi8(
src: __m512i,
k: __mmask64,
a: __m512i,
b: __m512i,
) -> __m512i {
let multishift = _mm512_multishift_epi64_epi8(a, b).as_i8x64();
transmute(simd_select_bitmask(k, multishift, src.as_i8x64()))
unsafe {
let multishift = _mm512_multishift_epi64_epi8(a, b).as_i8x64();
transmute(simd_select_bitmask(k, multishift, src.as_i8x64()))
}
}
/// For each 64-bit element in b, select 8 unaligned bytes using a byte-granular shift control within the corresponding 64-bit element of a, and store the 8 assembled bytes to the corresponding 64-bit element of dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
@ -345,9 +357,11 @@ pub unsafe fn _mm512_mask_multishift_epi64_epi8(
#[target_feature(enable = "avx512vbmi")]
#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
#[cfg_attr(test, assert_instr(vpmultishiftqb))]
pub unsafe fn _mm512_maskz_multishift_epi64_epi8(k: __mmask64, a: __m512i, b: __m512i) -> __m512i {
let multishift = _mm512_multishift_epi64_epi8(a, b).as_i8x64();
transmute(simd_select_bitmask(k, multishift, i8x64::ZERO))
pub fn _mm512_maskz_multishift_epi64_epi8(k: __mmask64, a: __m512i, b: __m512i) -> __m512i {
unsafe {
let multishift = _mm512_multishift_epi64_epi8(a, b).as_i8x64();
transmute(simd_select_bitmask(k, multishift, i8x64::ZERO))
}
}
/// For each 64-bit element in b, select 8 unaligned bytes using a byte-granular shift control within the corresponding 64-bit element of a, and store the 8 assembled bytes to the corresponding 64-bit element of dst.
@ -357,8 +371,8 @@ pub unsafe fn _mm512_maskz_multishift_epi64_epi8(k: __mmask64, a: __m512i, b: __
#[target_feature(enable = "avx512vbmi,avx512vl")]
#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
#[cfg_attr(test, assert_instr(vpmultishiftqb))]
pub unsafe fn _mm256_multishift_epi64_epi8(a: __m256i, b: __m256i) -> __m256i {
transmute(vpmultishiftqb256(a.as_i8x32(), b.as_i8x32()))
pub fn _mm256_multishift_epi64_epi8(a: __m256i, b: __m256i) -> __m256i {
unsafe { transmute(vpmultishiftqb256(a.as_i8x32(), b.as_i8x32())) }
}
/// For each 64-bit element in b, select 8 unaligned bytes using a byte-granular shift control within the corresponding 64-bit element of a, and store the 8 assembled bytes to the corresponding 64-bit element of dst using writemask k (elements are copied from src when the corresponding mask bit is not set).
@ -368,14 +382,16 @@ pub unsafe fn _mm256_multishift_epi64_epi8(a: __m256i, b: __m256i) -> __m256i {
#[target_feature(enable = "avx512vbmi,avx512vl")]
#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
#[cfg_attr(test, assert_instr(vpmultishiftqb))]
pub unsafe fn _mm256_mask_multishift_epi64_epi8(
pub fn _mm256_mask_multishift_epi64_epi8(
src: __m256i,
k: __mmask32,
a: __m256i,
b: __m256i,
) -> __m256i {
let multishift = _mm256_multishift_epi64_epi8(a, b).as_i8x32();
transmute(simd_select_bitmask(k, multishift, src.as_i8x32()))
unsafe {
let multishift = _mm256_multishift_epi64_epi8(a, b).as_i8x32();
transmute(simd_select_bitmask(k, multishift, src.as_i8x32()))
}
}
/// For each 64-bit element in b, select 8 unaligned bytes using a byte-granular shift control within the corresponding 64-bit element of a, and store the 8 assembled bytes to the corresponding 64-bit element of dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
@ -385,9 +401,11 @@ pub unsafe fn _mm256_mask_multishift_epi64_epi8(
#[target_feature(enable = "avx512vbmi,avx512vl")]
#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
#[cfg_attr(test, assert_instr(vpmultishiftqb))]
pub unsafe fn _mm256_maskz_multishift_epi64_epi8(k: __mmask32, a: __m256i, b: __m256i) -> __m256i {
let multishift = _mm256_multishift_epi64_epi8(a, b).as_i8x32();
transmute(simd_select_bitmask(k, multishift, i8x32::ZERO))
pub fn _mm256_maskz_multishift_epi64_epi8(k: __mmask32, a: __m256i, b: __m256i) -> __m256i {
unsafe {
let multishift = _mm256_multishift_epi64_epi8(a, b).as_i8x32();
transmute(simd_select_bitmask(k, multishift, i8x32::ZERO))
}
}
/// For each 64-bit element in b, select 8 unaligned bytes using a byte-granular shift control within the corresponding 64-bit element of a, and store the 8 assembled bytes to the corresponding 64-bit element of dst.
@ -397,8 +415,8 @@ pub unsafe fn _mm256_maskz_multishift_epi64_epi8(k: __mmask32, a: __m256i, b: __
#[target_feature(enable = "avx512vbmi,avx512vl")]
#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
#[cfg_attr(test, assert_instr(vpmultishiftqb))]
pub unsafe fn _mm_multishift_epi64_epi8(a: __m128i, b: __m128i) -> __m128i {
transmute(vpmultishiftqb128(a.as_i8x16(), b.as_i8x16()))
pub fn _mm_multishift_epi64_epi8(a: __m128i, b: __m128i) -> __m128i {
unsafe { transmute(vpmultishiftqb128(a.as_i8x16(), b.as_i8x16())) }
}
/// For each 64-bit element in b, select 8 unaligned bytes using a byte-granular shift control within the corresponding 64-bit element of a, and store the 8 assembled bytes to the corresponding 64-bit element of dst using writemask k (elements are copied from src when the corresponding mask bit is not set).
@ -408,14 +426,16 @@ pub unsafe fn _mm_multishift_epi64_epi8(a: __m128i, b: __m128i) -> __m128i {
#[target_feature(enable = "avx512vbmi,avx512vl")]
#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
#[cfg_attr(test, assert_instr(vpmultishiftqb))]
pub unsafe fn _mm_mask_multishift_epi64_epi8(
pub fn _mm_mask_multishift_epi64_epi8(
src: __m128i,
k: __mmask16,
a: __m128i,
b: __m128i,
) -> __m128i {
let multishift = _mm_multishift_epi64_epi8(a, b).as_i8x16();
transmute(simd_select_bitmask(k, multishift, src.as_i8x16()))
unsafe {
let multishift = _mm_multishift_epi64_epi8(a, b).as_i8x16();
transmute(simd_select_bitmask(k, multishift, src.as_i8x16()))
}
}
/// For each 64-bit element in b, select 8 unaligned bytes using a byte-granular shift control within the corresponding 64-bit element of a, and store the 8 assembled bytes to the corresponding 64-bit element of dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
@ -425,9 +445,11 @@ pub unsafe fn _mm_mask_multishift_epi64_epi8(
#[target_feature(enable = "avx512vbmi,avx512vl")]
#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
#[cfg_attr(test, assert_instr(vpmultishiftqb))]
pub unsafe fn _mm_maskz_multishift_epi64_epi8(k: __mmask16, a: __m128i, b: __m128i) -> __m128i {
let multishift = _mm_multishift_epi64_epi8(a, b).as_i8x16();
transmute(simd_select_bitmask(k, multishift, i8x16::ZERO))
pub fn _mm_maskz_multishift_epi64_epi8(k: __mmask16, a: __m128i, b: __m128i) -> __m128i {
unsafe {
let multishift = _mm_multishift_epi64_epi8(a, b).as_i8x16();
transmute(simd_select_bitmask(k, multishift, i8x16::ZERO))
}
}
#[allow(improper_ctypes)]

File diff suppressed because it is too large Load diff

View file

@ -11,8 +11,8 @@ use stdarch_test::assert_instr;
#[target_feature(enable = "avx512vnni")]
#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
#[cfg_attr(test, assert_instr(vpdpwssd))]
pub unsafe fn _mm512_dpwssd_epi32(src: __m512i, a: __m512i, b: __m512i) -> __m512i {
transmute(vpdpwssd(src.as_i32x16(), a.as_i32x16(), b.as_i32x16()))
pub fn _mm512_dpwssd_epi32(src: __m512i, a: __m512i, b: __m512i) -> __m512i {
unsafe { transmute(vpdpwssd(src.as_i32x16(), a.as_i32x16(), b.as_i32x16())) }
}
/// Multiply groups of 2 adjacent pairs of signed 16-bit integers in a with corresponding 16-bit integers in b, producing 2 intermediate signed 32-bit results. Sum these 2 results with the corresponding 32-bit integer in src, and store the packed 32-bit results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set).
@ -22,14 +22,11 @@ pub unsafe fn _mm512_dpwssd_epi32(src: __m512i, a: __m512i, b: __m512i) -> __m51
#[target_feature(enable = "avx512vnni")]
#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
#[cfg_attr(test, assert_instr(vpdpwssd))]
pub unsafe fn _mm512_mask_dpwssd_epi32(
src: __m512i,
k: __mmask16,
a: __m512i,
b: __m512i,
) -> __m512i {
let r = _mm512_dpwssd_epi32(src, a, b).as_i32x16();
transmute(simd_select_bitmask(k, r, src.as_i32x16()))
pub fn _mm512_mask_dpwssd_epi32(src: __m512i, k: __mmask16, a: __m512i, b: __m512i) -> __m512i {
unsafe {
let r = _mm512_dpwssd_epi32(src, a, b).as_i32x16();
transmute(simd_select_bitmask(k, r, src.as_i32x16()))
}
}
/// Multiply groups of 2 adjacent pairs of signed 16-bit integers in a with corresponding 16-bit integers in b, producing 2 intermediate signed 32-bit results. Sum these 2 results with the corresponding 32-bit integer in src, and store the packed 32-bit results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
@ -39,14 +36,11 @@ pub unsafe fn _mm512_mask_dpwssd_epi32(
#[target_feature(enable = "avx512vnni")]
#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
#[cfg_attr(test, assert_instr(vpdpwssd))]
pub unsafe fn _mm512_maskz_dpwssd_epi32(
k: __mmask16,
src: __m512i,
a: __m512i,
b: __m512i,
) -> __m512i {
let r = _mm512_dpwssd_epi32(src, a, b).as_i32x16();
transmute(simd_select_bitmask(k, r, i32x16::ZERO))
pub fn _mm512_maskz_dpwssd_epi32(k: __mmask16, src: __m512i, a: __m512i, b: __m512i) -> __m512i {
unsafe {
let r = _mm512_dpwssd_epi32(src, a, b).as_i32x16();
transmute(simd_select_bitmask(k, r, i32x16::ZERO))
}
}
/// Multiply groups of 2 adjacent pairs of signed 16-bit integers in a with corresponding 16-bit integers in b, producing 2 intermediate signed 32-bit results. Sum these 2 results with the corresponding 32-bit integer in src, and store the packed 32-bit results in dst.
@ -59,8 +53,8 @@ pub unsafe fn _mm512_maskz_dpwssd_epi32(
all(test, any(target_os = "linux", target_env = "msvc")),
assert_instr(vpdpwssd)
)]
pub unsafe fn _mm256_dpwssd_avx_epi32(src: __m256i, a: __m256i, b: __m256i) -> __m256i {
transmute(vpdpwssd256(src.as_i32x8(), a.as_i32x8(), b.as_i32x8()))
pub fn _mm256_dpwssd_avx_epi32(src: __m256i, a: __m256i, b: __m256i) -> __m256i {
unsafe { transmute(vpdpwssd256(src.as_i32x8(), a.as_i32x8(), b.as_i32x8())) }
}
/// Multiply groups of 2 adjacent pairs of signed 16-bit integers in a with corresponding 16-bit integers in b, producing 2 intermediate signed 32-bit results. Sum these 2 results with the corresponding 32-bit integer in src, and store the packed 32-bit results in dst.
@ -70,8 +64,8 @@ pub unsafe fn _mm256_dpwssd_avx_epi32(src: __m256i, a: __m256i, b: __m256i) -> _
#[target_feature(enable = "avx512vnni,avx512vl")]
#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
#[cfg_attr(test, assert_instr(vpdpwssd))]
pub unsafe fn _mm256_dpwssd_epi32(src: __m256i, a: __m256i, b: __m256i) -> __m256i {
transmute(vpdpwssd256(src.as_i32x8(), a.as_i32x8(), b.as_i32x8()))
pub fn _mm256_dpwssd_epi32(src: __m256i, a: __m256i, b: __m256i) -> __m256i {
unsafe { transmute(vpdpwssd256(src.as_i32x8(), a.as_i32x8(), b.as_i32x8())) }
}
/// Multiply groups of 2 adjacent pairs of signed 16-bit integers in a with corresponding 16-bit integers in b, producing 2 intermediate signed 32-bit results. Sum these 2 results with the corresponding 32-bit integer in src, and store the packed 32-bit results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set).
@ -81,14 +75,11 @@ pub unsafe fn _mm256_dpwssd_epi32(src: __m256i, a: __m256i, b: __m256i) -> __m25
#[target_feature(enable = "avx512vnni,avx512vl")]
#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
#[cfg_attr(test, assert_instr(vpdpwssd))]
pub unsafe fn _mm256_mask_dpwssd_epi32(
src: __m256i,
k: __mmask8,
a: __m256i,
b: __m256i,
) -> __m256i {
let r = _mm256_dpwssd_epi32(src, a, b).as_i32x8();
transmute(simd_select_bitmask(k, r, src.as_i32x8()))
pub fn _mm256_mask_dpwssd_epi32(src: __m256i, k: __mmask8, a: __m256i, b: __m256i) -> __m256i {
unsafe {
let r = _mm256_dpwssd_epi32(src, a, b).as_i32x8();
transmute(simd_select_bitmask(k, r, src.as_i32x8()))
}
}
/// Multiply groups of 2 adjacent pairs of signed 16-bit integers in a with corresponding 16-bit integers in b, producing 2 intermediate signed 32-bit results. Sum these 2 results with the corresponding 32-bit integer in src, and store the packed 32-bit results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
@ -98,14 +89,11 @@ pub unsafe fn _mm256_mask_dpwssd_epi32(
#[target_feature(enable = "avx512vnni,avx512vl")]
#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
#[cfg_attr(test, assert_instr(vpdpwssd))]
pub unsafe fn _mm256_maskz_dpwssd_epi32(
k: __mmask8,
src: __m256i,
a: __m256i,
b: __m256i,
) -> __m256i {
let r = _mm256_dpwssd_epi32(src, a, b).as_i32x8();
transmute(simd_select_bitmask(k, r, i32x8::ZERO))
pub fn _mm256_maskz_dpwssd_epi32(k: __mmask8, src: __m256i, a: __m256i, b: __m256i) -> __m256i {
unsafe {
let r = _mm256_dpwssd_epi32(src, a, b).as_i32x8();
transmute(simd_select_bitmask(k, r, i32x8::ZERO))
}
}
/// Multiply groups of 2 adjacent pairs of signed 16-bit integers in a with corresponding 16-bit integers in b, producing 2 intermediate signed 32-bit results. Sum these 2 results with the corresponding 32-bit integer in src, and store the packed 32-bit results in dst.
@ -118,8 +106,8 @@ pub unsafe fn _mm256_maskz_dpwssd_epi32(
all(test, any(target_os = "linux", target_env = "msvc")),
assert_instr(vpdpwssd)
)]
pub unsafe fn _mm_dpwssd_avx_epi32(src: __m128i, a: __m128i, b: __m128i) -> __m128i {
transmute(vpdpwssd128(src.as_i32x4(), a.as_i32x4(), b.as_i32x4()))
pub fn _mm_dpwssd_avx_epi32(src: __m128i, a: __m128i, b: __m128i) -> __m128i {
unsafe { transmute(vpdpwssd128(src.as_i32x4(), a.as_i32x4(), b.as_i32x4())) }
}
/// Multiply groups of 2 adjacent pairs of signed 16-bit integers in a with corresponding 16-bit integers in b, producing 2 intermediate signed 32-bit results. Sum these 2 results with the corresponding 32-bit integer in src, and store the packed 32-bit results in dst.
@ -129,8 +117,8 @@ pub unsafe fn _mm_dpwssd_avx_epi32(src: __m128i, a: __m128i, b: __m128i) -> __m1
#[target_feature(enable = "avx512vnni,avx512vl")]
#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
#[cfg_attr(test, assert_instr(vpdpwssd))]
pub unsafe fn _mm_dpwssd_epi32(src: __m128i, a: __m128i, b: __m128i) -> __m128i {
transmute(vpdpwssd128(src.as_i32x4(), a.as_i32x4(), b.as_i32x4()))
pub fn _mm_dpwssd_epi32(src: __m128i, a: __m128i, b: __m128i) -> __m128i {
unsafe { transmute(vpdpwssd128(src.as_i32x4(), a.as_i32x4(), b.as_i32x4())) }
}
/// Multiply groups of 2 adjacent pairs of signed 16-bit integers in a with corresponding 16-bit integers in b, producing 2 intermediate signed 32-bit results. Sum these 2 results with the corresponding 32-bit integer in src, and store the packed 32-bit results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set).
@ -140,9 +128,11 @@ pub unsafe fn _mm_dpwssd_epi32(src: __m128i, a: __m128i, b: __m128i) -> __m128i
#[target_feature(enable = "avx512vnni,avx512vl")]
#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
#[cfg_attr(test, assert_instr(vpdpwssd))]
pub unsafe fn _mm_mask_dpwssd_epi32(src: __m128i, k: __mmask8, a: __m128i, b: __m128i) -> __m128i {
let r = _mm_dpwssd_epi32(src, a, b).as_i32x4();
transmute(simd_select_bitmask(k, r, src.as_i32x4()))
pub fn _mm_mask_dpwssd_epi32(src: __m128i, k: __mmask8, a: __m128i, b: __m128i) -> __m128i {
unsafe {
let r = _mm_dpwssd_epi32(src, a, b).as_i32x4();
transmute(simd_select_bitmask(k, r, src.as_i32x4()))
}
}
/// Multiply groups of 2 adjacent pairs of signed 16-bit integers in a with corresponding 16-bit integers in b, producing 2 intermediate signed 32-bit results. Sum these 2 results with the corresponding 32-bit integer in src, and store the packed 32-bit results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
@ -152,9 +142,11 @@ pub unsafe fn _mm_mask_dpwssd_epi32(src: __m128i, k: __mmask8, a: __m128i, b: __
#[target_feature(enable = "avx512vnni,avx512vl")]
#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
#[cfg_attr(test, assert_instr(vpdpwssd))]
pub unsafe fn _mm_maskz_dpwssd_epi32(k: __mmask8, src: __m128i, a: __m128i, b: __m128i) -> __m128i {
let r = _mm_dpwssd_epi32(src, a, b).as_i32x4();
transmute(simd_select_bitmask(k, r, i32x4::ZERO))
pub fn _mm_maskz_dpwssd_epi32(k: __mmask8, src: __m128i, a: __m128i, b: __m128i) -> __m128i {
unsafe {
let r = _mm_dpwssd_epi32(src, a, b).as_i32x4();
transmute(simd_select_bitmask(k, r, i32x4::ZERO))
}
}
/// Multiply groups of 2 adjacent pairs of signed 16-bit integers in a with corresponding 16-bit integers in b, producing 2 intermediate signed 32-bit results. Sum these 2 results with the corresponding 32-bit integer in src using signed saturation, and store the packed 32-bit results in dst.
@ -164,8 +156,8 @@ pub unsafe fn _mm_maskz_dpwssd_epi32(k: __mmask8, src: __m128i, a: __m128i, b: _
#[target_feature(enable = "avx512vnni")]
#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
#[cfg_attr(test, assert_instr(vpdpwssds))]
pub unsafe fn _mm512_dpwssds_epi32(src: __m512i, a: __m512i, b: __m512i) -> __m512i {
transmute(vpdpwssds(src.as_i32x16(), a.as_i32x16(), b.as_i32x16()))
pub fn _mm512_dpwssds_epi32(src: __m512i, a: __m512i, b: __m512i) -> __m512i {
unsafe { transmute(vpdpwssds(src.as_i32x16(), a.as_i32x16(), b.as_i32x16())) }
}
/// Multiply groups of 2 adjacent pairs of signed 16-bit integers in a with corresponding 16-bit integers in b, producing 2 intermediate signed 32-bit results. Sum these 2 results with the corresponding 32-bit integer in src using signed saturation, and store the packed 32-bit results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set).
@ -175,14 +167,11 @@ pub unsafe fn _mm512_dpwssds_epi32(src: __m512i, a: __m512i, b: __m512i) -> __m5
#[target_feature(enable = "avx512vnni")]
#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
#[cfg_attr(test, assert_instr(vpdpwssds))]
pub unsafe fn _mm512_mask_dpwssds_epi32(
src: __m512i,
k: __mmask16,
a: __m512i,
b: __m512i,
) -> __m512i {
let r = _mm512_dpwssds_epi32(src, a, b).as_i32x16();
transmute(simd_select_bitmask(k, r, src.as_i32x16()))
pub fn _mm512_mask_dpwssds_epi32(src: __m512i, k: __mmask16, a: __m512i, b: __m512i) -> __m512i {
unsafe {
let r = _mm512_dpwssds_epi32(src, a, b).as_i32x16();
transmute(simd_select_bitmask(k, r, src.as_i32x16()))
}
}
/// Multiply groups of 2 adjacent pairs of signed 16-bit integers in a with corresponding 16-bit integers in b, producing 2 intermediate signed 32-bit results. Sum these 2 results with the corresponding 32-bit integer in src using signed saturation, and store the packed 32-bit results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
@ -192,14 +181,11 @@ pub unsafe fn _mm512_mask_dpwssds_epi32(
#[target_feature(enable = "avx512vnni")]
#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
#[cfg_attr(test, assert_instr(vpdpwssds))]
pub unsafe fn _mm512_maskz_dpwssds_epi32(
k: __mmask16,
src: __m512i,
a: __m512i,
b: __m512i,
) -> __m512i {
let r = _mm512_dpwssds_epi32(src, a, b).as_i32x16();
transmute(simd_select_bitmask(k, r, i32x16::ZERO))
pub fn _mm512_maskz_dpwssds_epi32(k: __mmask16, src: __m512i, a: __m512i, b: __m512i) -> __m512i {
unsafe {
let r = _mm512_dpwssds_epi32(src, a, b).as_i32x16();
transmute(simd_select_bitmask(k, r, i32x16::ZERO))
}
}
/// Multiply groups of 2 adjacent pairs of signed 16-bit integers in a with corresponding 16-bit integers in b, producing 2 intermediate signed 32-bit results. Sum these 2 results with the corresponding 32-bit integer in src using signed saturation, and store the packed 32-bit results in dst.
@ -212,8 +198,8 @@ pub unsafe fn _mm512_maskz_dpwssds_epi32(
all(test, any(target_os = "linux", target_env = "msvc")),
assert_instr(vpdpwssds)
)]
pub unsafe fn _mm256_dpwssds_avx_epi32(src: __m256i, a: __m256i, b: __m256i) -> __m256i {
transmute(vpdpwssds256(src.as_i32x8(), a.as_i32x8(), b.as_i32x8()))
pub fn _mm256_dpwssds_avx_epi32(src: __m256i, a: __m256i, b: __m256i) -> __m256i {
unsafe { transmute(vpdpwssds256(src.as_i32x8(), a.as_i32x8(), b.as_i32x8())) }
}
/// Multiply groups of 2 adjacent pairs of signed 16-bit integers in a with corresponding 16-bit integers in b, producing 2 intermediate signed 32-bit results. Sum these 2 results with the corresponding 32-bit integer in src using signed saturation, and store the packed 32-bit results in dst.
@ -223,8 +209,8 @@ pub unsafe fn _mm256_dpwssds_avx_epi32(src: __m256i, a: __m256i, b: __m256i) ->
#[target_feature(enable = "avx512vnni,avx512vl")]
#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
#[cfg_attr(test, assert_instr(vpdpwssds))]
pub unsafe fn _mm256_dpwssds_epi32(src: __m256i, a: __m256i, b: __m256i) -> __m256i {
transmute(vpdpwssds256(src.as_i32x8(), a.as_i32x8(), b.as_i32x8()))
pub fn _mm256_dpwssds_epi32(src: __m256i, a: __m256i, b: __m256i) -> __m256i {
unsafe { transmute(vpdpwssds256(src.as_i32x8(), a.as_i32x8(), b.as_i32x8())) }
}
/// Multiply groups of 2 adjacent pairs of signed 16-bit integers in a with corresponding 16-bit integers in b, producing 2 intermediate signed 32-bit results. Sum these 2 results with the corresponding 32-bit integer in src using signed saturation, and store the packed 32-bit results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set).
@ -234,14 +220,11 @@ pub unsafe fn _mm256_dpwssds_epi32(src: __m256i, a: __m256i, b: __m256i) -> __m2
#[target_feature(enable = "avx512vnni,avx512vl")]
#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
#[cfg_attr(test, assert_instr(vpdpwssds))]
pub unsafe fn _mm256_mask_dpwssds_epi32(
src: __m256i,
k: __mmask8,
a: __m256i,
b: __m256i,
) -> __m256i {
let r = _mm256_dpwssds_epi32(src, a, b).as_i32x8();
transmute(simd_select_bitmask(k, r, src.as_i32x8()))
pub fn _mm256_mask_dpwssds_epi32(src: __m256i, k: __mmask8, a: __m256i, b: __m256i) -> __m256i {
unsafe {
let r = _mm256_dpwssds_epi32(src, a, b).as_i32x8();
transmute(simd_select_bitmask(k, r, src.as_i32x8()))
}
}
/// Multiply groups of 2 adjacent pairs of signed 16-bit integers in a with corresponding 16-bit integers in b, producing 2 intermediate signed 32-bit results. Sum these 2 results with the corresponding 32-bit integer in src using signed saturation, and store the packed 32-bit results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
@ -251,14 +234,11 @@ pub unsafe fn _mm256_mask_dpwssds_epi32(
#[target_feature(enable = "avx512vnni,avx512vl")]
#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
#[cfg_attr(test, assert_instr(vpdpwssds))]
pub unsafe fn _mm256_maskz_dpwssds_epi32(
k: __mmask8,
src: __m256i,
a: __m256i,
b: __m256i,
) -> __m256i {
let r = _mm256_dpwssds_epi32(src, a, b).as_i32x8();
transmute(simd_select_bitmask(k, r, i32x8::ZERO))
pub fn _mm256_maskz_dpwssds_epi32(k: __mmask8, src: __m256i, a: __m256i, b: __m256i) -> __m256i {
unsafe {
let r = _mm256_dpwssds_epi32(src, a, b).as_i32x8();
transmute(simd_select_bitmask(k, r, i32x8::ZERO))
}
}
/// Multiply groups of 2 adjacent pairs of signed 16-bit integers in a with corresponding 16-bit integers in b, producing 2 intermediate signed 32-bit results. Sum these 2 results with the corresponding 32-bit integer in src using signed saturation, and store the packed 32-bit results in dst.
@ -271,8 +251,8 @@ pub unsafe fn _mm256_maskz_dpwssds_epi32(
all(test, any(target_os = "linux", target_env = "msvc")),
assert_instr(vpdpwssds)
)]
pub unsafe fn _mm_dpwssds_avx_epi32(src: __m128i, a: __m128i, b: __m128i) -> __m128i {
transmute(vpdpwssds128(src.as_i32x4(), a.as_i32x4(), b.as_i32x4()))
pub fn _mm_dpwssds_avx_epi32(src: __m128i, a: __m128i, b: __m128i) -> __m128i {
unsafe { transmute(vpdpwssds128(src.as_i32x4(), a.as_i32x4(), b.as_i32x4())) }
}
/// Multiply groups of 2 adjacent pairs of signed 16-bit integers in a with corresponding 16-bit integers in b, producing 2 intermediate signed 32-bit results. Sum these 2 results with the corresponding 32-bit integer in src using signed saturation, and store the packed 32-bit results in dst.
@ -282,8 +262,8 @@ pub unsafe fn _mm_dpwssds_avx_epi32(src: __m128i, a: __m128i, b: __m128i) -> __m
#[target_feature(enable = "avx512vnni,avx512vl")]
#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
#[cfg_attr(test, assert_instr(vpdpwssds))]
pub unsafe fn _mm_dpwssds_epi32(src: __m128i, a: __m128i, b: __m128i) -> __m128i {
transmute(vpdpwssds128(src.as_i32x4(), a.as_i32x4(), b.as_i32x4()))
pub fn _mm_dpwssds_epi32(src: __m128i, a: __m128i, b: __m128i) -> __m128i {
unsafe { transmute(vpdpwssds128(src.as_i32x4(), a.as_i32x4(), b.as_i32x4())) }
}
/// Multiply groups of 2 adjacent pairs of signed 16-bit integers in a with corresponding 16-bit integers in b, producing 2 intermediate signed 32-bit results. Sum these 2 results with the corresponding 32-bit integer in src using signed saturation, and store the packed 32-bit results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set).
@ -293,9 +273,11 @@ pub unsafe fn _mm_dpwssds_epi32(src: __m128i, a: __m128i, b: __m128i) -> __m128i
#[target_feature(enable = "avx512vnni,avx512vl")]
#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
#[cfg_attr(test, assert_instr(vpdpwssds))]
pub unsafe fn _mm_mask_dpwssds_epi32(src: __m128i, k: __mmask8, a: __m128i, b: __m128i) -> __m128i {
let r = _mm_dpwssds_epi32(src, a, b).as_i32x4();
transmute(simd_select_bitmask(k, r, src.as_i32x4()))
pub fn _mm_mask_dpwssds_epi32(src: __m128i, k: __mmask8, a: __m128i, b: __m128i) -> __m128i {
unsafe {
let r = _mm_dpwssds_epi32(src, a, b).as_i32x4();
transmute(simd_select_bitmask(k, r, src.as_i32x4()))
}
}
/// Multiply groups of 2 adjacent pairs of signed 16-bit integers in a with corresponding 16-bit integers in b, producing 2 intermediate signed 32-bit results. Sum these 2 results with the corresponding 32-bit integer in src using signed saturation, and store the packed 32-bit results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
@ -305,14 +287,11 @@ pub unsafe fn _mm_mask_dpwssds_epi32(src: __m128i, k: __mmask8, a: __m128i, b: _
#[target_feature(enable = "avx512vnni,avx512vl")]
#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
#[cfg_attr(test, assert_instr(vpdpwssds))]
pub unsafe fn _mm_maskz_dpwssds_epi32(
k: __mmask8,
src: __m128i,
a: __m128i,
b: __m128i,
) -> __m128i {
let r = _mm_dpwssds_epi32(src, a, b).as_i32x4();
transmute(simd_select_bitmask(k, r, i32x4::ZERO))
pub fn _mm_maskz_dpwssds_epi32(k: __mmask8, src: __m128i, a: __m128i, b: __m128i) -> __m128i {
unsafe {
let r = _mm_dpwssds_epi32(src, a, b).as_i32x4();
transmute(simd_select_bitmask(k, r, i32x4::ZERO))
}
}
/// Multiply groups of 4 adjacent pairs of unsigned 8-bit integers in a with corresponding signed 8-bit integers in b, producing 4 intermediate signed 16-bit results. Sum these 4 results with the corresponding 32-bit integer in src, and store the packed 32-bit results in dst.
@ -322,8 +301,8 @@ pub unsafe fn _mm_maskz_dpwssds_epi32(
#[target_feature(enable = "avx512vnni")]
#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
#[cfg_attr(test, assert_instr(vpdpbusd))]
pub unsafe fn _mm512_dpbusd_epi32(src: __m512i, a: __m512i, b: __m512i) -> __m512i {
transmute(vpdpbusd(src.as_i32x16(), a.as_i32x16(), b.as_i32x16()))
pub fn _mm512_dpbusd_epi32(src: __m512i, a: __m512i, b: __m512i) -> __m512i {
unsafe { transmute(vpdpbusd(src.as_i32x16(), a.as_i32x16(), b.as_i32x16())) }
}
/// Multiply groups of 4 adjacent pairs of unsigned 8-bit integers in a with corresponding signed 8-bit integers in b, producing 4 intermediate signed 16-bit results. Sum these 4 results with the corresponding 32-bit integer in src, and store the packed 32-bit results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set).
@ -333,14 +312,11 @@ pub unsafe fn _mm512_dpbusd_epi32(src: __m512i, a: __m512i, b: __m512i) -> __m51
#[target_feature(enable = "avx512vnni")]
#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
#[cfg_attr(test, assert_instr(vpdpbusd))]
pub unsafe fn _mm512_mask_dpbusd_epi32(
src: __m512i,
k: __mmask16,
a: __m512i,
b: __m512i,
) -> __m512i {
let r = _mm512_dpbusd_epi32(src, a, b).as_i32x16();
transmute(simd_select_bitmask(k, r, src.as_i32x16()))
pub fn _mm512_mask_dpbusd_epi32(src: __m512i, k: __mmask16, a: __m512i, b: __m512i) -> __m512i {
unsafe {
let r = _mm512_dpbusd_epi32(src, a, b).as_i32x16();
transmute(simd_select_bitmask(k, r, src.as_i32x16()))
}
}
/// Multiply groups of 4 adjacent pairs of unsigned 8-bit integers in a with corresponding signed 8-bit integers in b, producing 4 intermediate signed 16-bit results. Sum these 4 results with the corresponding 32-bit integer in src, and store the packed 32-bit results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
@ -350,14 +326,11 @@ pub unsafe fn _mm512_mask_dpbusd_epi32(
#[target_feature(enable = "avx512vnni")]
#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
#[cfg_attr(test, assert_instr(vpdpbusd))]
pub unsafe fn _mm512_maskz_dpbusd_epi32(
k: __mmask16,
src: __m512i,
a: __m512i,
b: __m512i,
) -> __m512i {
let r = _mm512_dpbusd_epi32(src, a, b).as_i32x16();
transmute(simd_select_bitmask(k, r, i32x16::ZERO))
pub fn _mm512_maskz_dpbusd_epi32(k: __mmask16, src: __m512i, a: __m512i, b: __m512i) -> __m512i {
unsafe {
let r = _mm512_dpbusd_epi32(src, a, b).as_i32x16();
transmute(simd_select_bitmask(k, r, i32x16::ZERO))
}
}
/// Multiply groups of 4 adjacent pairs of unsigned 8-bit integers in a with corresponding signed 8-bit integers in b, producing 4 intermediate signed 16-bit results. Sum these 4 results with the corresponding 32-bit integer in src, and store the packed 32-bit results in dst.
@ -370,8 +343,8 @@ pub unsafe fn _mm512_maskz_dpbusd_epi32(
all(test, any(target_os = "linux", target_env = "msvc")),
assert_instr(vpdpbusd)
)]
pub unsafe fn _mm256_dpbusd_avx_epi32(src: __m256i, a: __m256i, b: __m256i) -> __m256i {
transmute(vpdpbusd256(src.as_i32x8(), a.as_i32x8(), b.as_i32x8()))
pub fn _mm256_dpbusd_avx_epi32(src: __m256i, a: __m256i, b: __m256i) -> __m256i {
unsafe { transmute(vpdpbusd256(src.as_i32x8(), a.as_i32x8(), b.as_i32x8())) }
}
/// Multiply groups of 4 adjacent pairs of unsigned 8-bit integers in a with corresponding signed 8-bit integers in b, producing 4 intermediate signed 16-bit results. Sum these 4 results with the corresponding 32-bit integer in src, and store the packed 32-bit results in dst.
@ -381,8 +354,8 @@ pub unsafe fn _mm256_dpbusd_avx_epi32(src: __m256i, a: __m256i, b: __m256i) -> _
#[target_feature(enable = "avx512vnni,avx512vl")]
#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
#[cfg_attr(test, assert_instr(vpdpbusd))]
pub unsafe fn _mm256_dpbusd_epi32(src: __m256i, a: __m256i, b: __m256i) -> __m256i {
transmute(vpdpbusd256(src.as_i32x8(), a.as_i32x8(), b.as_i32x8()))
pub fn _mm256_dpbusd_epi32(src: __m256i, a: __m256i, b: __m256i) -> __m256i {
unsafe { transmute(vpdpbusd256(src.as_i32x8(), a.as_i32x8(), b.as_i32x8())) }
}
/// Multiply groups of 4 adjacent pairs of unsigned 8-bit integers in a with corresponding signed 8-bit integers in b, producing 4 intermediate signed 16-bit results. Sum these 4 results with the corresponding 32-bit integer in src, and store the packed 32-bit results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set).
@ -392,14 +365,11 @@ pub unsafe fn _mm256_dpbusd_epi32(src: __m256i, a: __m256i, b: __m256i) -> __m25
#[target_feature(enable = "avx512vnni,avx512vl")]
#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
#[cfg_attr(test, assert_instr(vpdpbusd))]
pub unsafe fn _mm256_mask_dpbusd_epi32(
src: __m256i,
k: __mmask8,
a: __m256i,
b: __m256i,
) -> __m256i {
let r = _mm256_dpbusd_epi32(src, a, b).as_i32x8();
transmute(simd_select_bitmask(k, r, src.as_i32x8()))
pub fn _mm256_mask_dpbusd_epi32(src: __m256i, k: __mmask8, a: __m256i, b: __m256i) -> __m256i {
unsafe {
let r = _mm256_dpbusd_epi32(src, a, b).as_i32x8();
transmute(simd_select_bitmask(k, r, src.as_i32x8()))
}
}
/// Multiply groups of 4 adjacent pairs of unsigned 8-bit integers in a with corresponding signed 8-bit integers in b, producing 4 intermediate signed 16-bit results. Sum these 4 results with the corresponding 32-bit integer in src, and store the packed 32-bit results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
@ -409,14 +379,11 @@ pub unsafe fn _mm256_mask_dpbusd_epi32(
#[target_feature(enable = "avx512vnni,avx512vl")]
#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
#[cfg_attr(test, assert_instr(vpdpbusd))]
pub unsafe fn _mm256_maskz_dpbusd_epi32(
k: __mmask8,
src: __m256i,
a: __m256i,
b: __m256i,
) -> __m256i {
let r = _mm256_dpbusd_epi32(src, a, b).as_i32x8();
transmute(simd_select_bitmask(k, r, i32x8::ZERO))
pub fn _mm256_maskz_dpbusd_epi32(k: __mmask8, src: __m256i, a: __m256i, b: __m256i) -> __m256i {
unsafe {
let r = _mm256_dpbusd_epi32(src, a, b).as_i32x8();
transmute(simd_select_bitmask(k, r, i32x8::ZERO))
}
}
/// Multiply groups of 4 adjacent pairs of unsigned 8-bit integers in a with corresponding signed 8-bit integers in b, producing 4 intermediate signed 16-bit results. Sum these 4 results with the corresponding 32-bit integer in src, and store the packed 32-bit results in dst.
@ -429,8 +396,8 @@ pub unsafe fn _mm256_maskz_dpbusd_epi32(
all(test, any(target_os = "linux", target_env = "msvc")),
assert_instr(vpdpbusd)
)]
pub unsafe fn _mm_dpbusd_avx_epi32(src: __m128i, a: __m128i, b: __m128i) -> __m128i {
transmute(vpdpbusd128(src.as_i32x4(), a.as_i32x4(), b.as_i32x4()))
pub fn _mm_dpbusd_avx_epi32(src: __m128i, a: __m128i, b: __m128i) -> __m128i {
unsafe { transmute(vpdpbusd128(src.as_i32x4(), a.as_i32x4(), b.as_i32x4())) }
}
/// Multiply groups of 4 adjacent pairs of unsigned 8-bit integers in a with corresponding signed 8-bit integers in b, producing 4 intermediate signed 16-bit results. Sum these 4 results with the corresponding 32-bit integer in src, and store the packed 32-bit results in dst.
@ -440,8 +407,8 @@ pub unsafe fn _mm_dpbusd_avx_epi32(src: __m128i, a: __m128i, b: __m128i) -> __m1
#[target_feature(enable = "avx512vnni,avx512vl")]
#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
#[cfg_attr(test, assert_instr(vpdpbusd))]
pub unsafe fn _mm_dpbusd_epi32(src: __m128i, a: __m128i, b: __m128i) -> __m128i {
transmute(vpdpbusd128(src.as_i32x4(), a.as_i32x4(), b.as_i32x4()))
pub fn _mm_dpbusd_epi32(src: __m128i, a: __m128i, b: __m128i) -> __m128i {
unsafe { transmute(vpdpbusd128(src.as_i32x4(), a.as_i32x4(), b.as_i32x4())) }
}
/// Multiply groups of 4 adjacent pairs of unsigned 8-bit integers in a with corresponding signed 8-bit integers in b, producing 4 intermediate signed 16-bit results. Sum these 4 results with the corresponding 32-bit integer in src, and store the packed 32-bit results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set).
@ -451,9 +418,11 @@ pub unsafe fn _mm_dpbusd_epi32(src: __m128i, a: __m128i, b: __m128i) -> __m128i
#[target_feature(enable = "avx512vnni,avx512vl")]
#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
#[cfg_attr(test, assert_instr(vpdpbusd))]
pub unsafe fn _mm_mask_dpbusd_epi32(src: __m128i, k: __mmask8, a: __m128i, b: __m128i) -> __m128i {
let r = _mm_dpbusd_epi32(src, a, b).as_i32x4();
transmute(simd_select_bitmask(k, r, src.as_i32x4()))
pub fn _mm_mask_dpbusd_epi32(src: __m128i, k: __mmask8, a: __m128i, b: __m128i) -> __m128i {
unsafe {
let r = _mm_dpbusd_epi32(src, a, b).as_i32x4();
transmute(simd_select_bitmask(k, r, src.as_i32x4()))
}
}
/// Multiply groups of 4 adjacent pairs of unsigned 8-bit integers in a with corresponding signed 8-bit integers in b, producing 4 intermediate signed 16-bit results. Sum these 4 results with the corresponding 32-bit integer in src, and store the packed 32-bit results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
@ -463,9 +432,11 @@ pub unsafe fn _mm_mask_dpbusd_epi32(src: __m128i, k: __mmask8, a: __m128i, b: __
#[target_feature(enable = "avx512vnni,avx512vl")]
#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
#[cfg_attr(test, assert_instr(vpdpbusd))]
pub unsafe fn _mm_maskz_dpbusd_epi32(k: __mmask8, src: __m128i, a: __m128i, b: __m128i) -> __m128i {
let r = _mm_dpbusd_epi32(src, a, b).as_i32x4();
transmute(simd_select_bitmask(k, r, i32x4::ZERO))
pub fn _mm_maskz_dpbusd_epi32(k: __mmask8, src: __m128i, a: __m128i, b: __m128i) -> __m128i {
unsafe {
let r = _mm_dpbusd_epi32(src, a, b).as_i32x4();
transmute(simd_select_bitmask(k, r, i32x4::ZERO))
}
}
/// Multiply groups of 4 adjacent pairs of unsigned 8-bit integers in a with corresponding signed 8-bit integers in b, producing 4 intermediate signed 16-bit results. Sum these 4 results with the corresponding 32-bit integer in src using signed saturation, and store the packed 32-bit results in dst.
@ -475,8 +446,8 @@ pub unsafe fn _mm_maskz_dpbusd_epi32(k: __mmask8, src: __m128i, a: __m128i, b: _
#[target_feature(enable = "avx512vnni")]
#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
#[cfg_attr(test, assert_instr(vpdpbusds))]
pub unsafe fn _mm512_dpbusds_epi32(src: __m512i, a: __m512i, b: __m512i) -> __m512i {
transmute(vpdpbusds(src.as_i32x16(), a.as_i32x16(), b.as_i32x16()))
pub fn _mm512_dpbusds_epi32(src: __m512i, a: __m512i, b: __m512i) -> __m512i {
unsafe { transmute(vpdpbusds(src.as_i32x16(), a.as_i32x16(), b.as_i32x16())) }
}
/// Multiply groups of 4 adjacent pairs of unsigned 8-bit integers in a with corresponding signed 8-bit integers in b, producing 4 intermediate signed 16-bit results. Sum these 4 results with the corresponding 32-bit integer in src using signed saturation, and store the packed 32-bit results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set).
@ -486,14 +457,11 @@ pub unsafe fn _mm512_dpbusds_epi32(src: __m512i, a: __m512i, b: __m512i) -> __m5
#[target_feature(enable = "avx512vnni")]
#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
#[cfg_attr(test, assert_instr(vpdpbusds))]
pub unsafe fn _mm512_mask_dpbusds_epi32(
src: __m512i,
k: __mmask16,
a: __m512i,
b: __m512i,
) -> __m512i {
let r = _mm512_dpbusds_epi32(src, a, b).as_i32x16();
transmute(simd_select_bitmask(k, r, src.as_i32x16()))
pub fn _mm512_mask_dpbusds_epi32(src: __m512i, k: __mmask16, a: __m512i, b: __m512i) -> __m512i {
unsafe {
let r = _mm512_dpbusds_epi32(src, a, b).as_i32x16();
transmute(simd_select_bitmask(k, r, src.as_i32x16()))
}
}
/// Multiply groups of 4 adjacent pairs of unsigned 8-bit integers in a with corresponding signed 8-bit integers in b, producing 4 intermediate signed 16-bit results. Sum these 4 results with the corresponding 32-bit integer in src using signed saturation, and store the packed 32-bit results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
@ -503,14 +471,11 @@ pub unsafe fn _mm512_mask_dpbusds_epi32(
#[target_feature(enable = "avx512vnni")]
#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
#[cfg_attr(test, assert_instr(vpdpbusds))]
pub unsafe fn _mm512_maskz_dpbusds_epi32(
k: __mmask16,
src: __m512i,
a: __m512i,
b: __m512i,
) -> __m512i {
let r = _mm512_dpbusds_epi32(src, a, b).as_i32x16();
transmute(simd_select_bitmask(k, r, i32x16::ZERO))
pub fn _mm512_maskz_dpbusds_epi32(k: __mmask16, src: __m512i, a: __m512i, b: __m512i) -> __m512i {
unsafe {
let r = _mm512_dpbusds_epi32(src, a, b).as_i32x16();
transmute(simd_select_bitmask(k, r, i32x16::ZERO))
}
}
/// Multiply groups of 4 adjacent pairs of unsigned 8-bit integers in a with corresponding signed 8-bit integers in b, producing 4 intermediate signed 16-bit results. Sum these 4 results with the corresponding 32-bit integer in src using signed saturation, and store the packed 32-bit results in dst.
@ -523,8 +488,8 @@ pub unsafe fn _mm512_maskz_dpbusds_epi32(
all(test, any(target_os = "linux", target_env = "msvc")),
assert_instr(vpdpbusds)
)]
pub unsafe fn _mm256_dpbusds_avx_epi32(src: __m256i, a: __m256i, b: __m256i) -> __m256i {
transmute(vpdpbusds256(src.as_i32x8(), a.as_i32x8(), b.as_i32x8()))
pub fn _mm256_dpbusds_avx_epi32(src: __m256i, a: __m256i, b: __m256i) -> __m256i {
unsafe { transmute(vpdpbusds256(src.as_i32x8(), a.as_i32x8(), b.as_i32x8())) }
}
/// Multiply groups of 4 adjacent pairs of unsigned 8-bit integers in a with corresponding signed 8-bit integers in b, producing 4 intermediate signed 16-bit results. Sum these 4 results with the corresponding 32-bit integer in src using signed saturation, and store the packed 32-bit results in dst.
@ -534,8 +499,8 @@ pub unsafe fn _mm256_dpbusds_avx_epi32(src: __m256i, a: __m256i, b: __m256i) ->
#[target_feature(enable = "avx512vnni,avx512vl")]
#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
#[cfg_attr(test, assert_instr(vpdpbusds))]
pub unsafe fn _mm256_dpbusds_epi32(src: __m256i, a: __m256i, b: __m256i) -> __m256i {
transmute(vpdpbusds256(src.as_i32x8(), a.as_i32x8(), b.as_i32x8()))
pub fn _mm256_dpbusds_epi32(src: __m256i, a: __m256i, b: __m256i) -> __m256i {
unsafe { transmute(vpdpbusds256(src.as_i32x8(), a.as_i32x8(), b.as_i32x8())) }
}
/// Multiply groups of 4 adjacent pairs of unsigned 8-bit integers in a with corresponding signed 8-bit integers in b, producing 4 intermediate signed 16-bit results. Sum these 4 results with the corresponding 32-bit integer in src using signed saturation, and store the packed 32-bit results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set).
@ -545,14 +510,11 @@ pub unsafe fn _mm256_dpbusds_epi32(src: __m256i, a: __m256i, b: __m256i) -> __m2
#[target_feature(enable = "avx512vnni,avx512vl")]
#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
#[cfg_attr(test, assert_instr(vpdpbusds))]
pub unsafe fn _mm256_mask_dpbusds_epi32(
src: __m256i,
k: __mmask8,
a: __m256i,
b: __m256i,
) -> __m256i {
let r = _mm256_dpbusds_epi32(src, a, b).as_i32x8();
transmute(simd_select_bitmask(k, r, src.as_i32x8()))
pub fn _mm256_mask_dpbusds_epi32(src: __m256i, k: __mmask8, a: __m256i, b: __m256i) -> __m256i {
unsafe {
let r = _mm256_dpbusds_epi32(src, a, b).as_i32x8();
transmute(simd_select_bitmask(k, r, src.as_i32x8()))
}
}
/// Multiply groups of 4 adjacent pairs of unsigned 8-bit integers in a with corresponding signed 8-bit integers in b, producing 4 intermediate signed 16-bit results. Sum these 4 results with the corresponding 32-bit integer in src using signed saturation, and store the packed 32-bit results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
@ -562,14 +524,11 @@ pub unsafe fn _mm256_mask_dpbusds_epi32(
#[target_feature(enable = "avx512vnni,avx512vl")]
#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
#[cfg_attr(test, assert_instr(vpdpbusds))]
pub unsafe fn _mm256_maskz_dpbusds_epi32(
k: __mmask8,
src: __m256i,
a: __m256i,
b: __m256i,
) -> __m256i {
let r = _mm256_dpbusds_epi32(src, a, b).as_i32x8();
transmute(simd_select_bitmask(k, r, i32x8::ZERO))
pub fn _mm256_maskz_dpbusds_epi32(k: __mmask8, src: __m256i, a: __m256i, b: __m256i) -> __m256i {
unsafe {
let r = _mm256_dpbusds_epi32(src, a, b).as_i32x8();
transmute(simd_select_bitmask(k, r, i32x8::ZERO))
}
}
/// Multiply groups of 4 adjacent pairs of unsigned 8-bit integers in a with corresponding signed 8-bit integers in b, producing 4 intermediate signed 16-bit results. Sum these 4 results with the corresponding 32-bit integer in src using signed saturation, and store the packed 32-bit results in dst.
@ -582,8 +541,8 @@ pub unsafe fn _mm256_maskz_dpbusds_epi32(
all(test, any(target_os = "linux", target_env = "msvc")),
assert_instr(vpdpbusds)
)]
pub unsafe fn _mm_dpbusds_avx_epi32(src: __m128i, a: __m128i, b: __m128i) -> __m128i {
transmute(vpdpbusds128(src.as_i32x4(), a.as_i32x4(), b.as_i32x4()))
pub fn _mm_dpbusds_avx_epi32(src: __m128i, a: __m128i, b: __m128i) -> __m128i {
unsafe { transmute(vpdpbusds128(src.as_i32x4(), a.as_i32x4(), b.as_i32x4())) }
}
/// Multiply groups of 4 adjacent pairs of unsigned 8-bit integers in a with corresponding signed 8-bit integers in b, producing 4 intermediate signed 16-bit results. Sum these 4 results with the corresponding 32-bit integer in src using signed saturation, and store the packed 32-bit results in dst.
@ -593,8 +552,8 @@ pub unsafe fn _mm_dpbusds_avx_epi32(src: __m128i, a: __m128i, b: __m128i) -> __m
#[target_feature(enable = "avx512vnni,avx512vl")]
#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
#[cfg_attr(test, assert_instr(vpdpbusds))]
pub unsafe fn _mm_dpbusds_epi32(src: __m128i, a: __m128i, b: __m128i) -> __m128i {
transmute(vpdpbusds128(src.as_i32x4(), a.as_i32x4(), b.as_i32x4()))
pub fn _mm_dpbusds_epi32(src: __m128i, a: __m128i, b: __m128i) -> __m128i {
unsafe { transmute(vpdpbusds128(src.as_i32x4(), a.as_i32x4(), b.as_i32x4())) }
}
/// Multiply groups of 4 adjacent pairs of unsigned 8-bit integers in a with corresponding signed 8-bit integers in b, producing 4 intermediate signed 16-bit results. Sum these 4 results with the corresponding 32-bit integer in src using signed saturation, and store the packed 32-bit results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set).
@ -604,9 +563,11 @@ pub unsafe fn _mm_dpbusds_epi32(src: __m128i, a: __m128i, b: __m128i) -> __m128i
#[target_feature(enable = "avx512vnni,avx512vl")]
#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
#[cfg_attr(test, assert_instr(vpdpbusds))]
pub unsafe fn _mm_mask_dpbusds_epi32(src: __m128i, k: __mmask8, a: __m128i, b: __m128i) -> __m128i {
let r = _mm_dpbusds_epi32(src, a, b).as_i32x4();
transmute(simd_select_bitmask(k, r, src.as_i32x4()))
pub fn _mm_mask_dpbusds_epi32(src: __m128i, k: __mmask8, a: __m128i, b: __m128i) -> __m128i {
unsafe {
let r = _mm_dpbusds_epi32(src, a, b).as_i32x4();
transmute(simd_select_bitmask(k, r, src.as_i32x4()))
}
}
/// Multiply groups of 4 adjacent pairs of unsigned 8-bit integers in a with corresponding signed 8-bit integers in b, producing 4 intermediate signed 16-bit results. Sum these 4 results with the corresponding 32-bit integer in src using signed saturation, and store the packed 32-bit results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
@ -616,14 +577,11 @@ pub unsafe fn _mm_mask_dpbusds_epi32(src: __m128i, k: __mmask8, a: __m128i, b: _
#[target_feature(enable = "avx512vnni,avx512vl")]
#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
#[cfg_attr(test, assert_instr(vpdpbusds))]
pub unsafe fn _mm_maskz_dpbusds_epi32(
k: __mmask8,
src: __m128i,
a: __m128i,
b: __m128i,
) -> __m128i {
let r = _mm_dpbusds_epi32(src, a, b).as_i32x4();
transmute(simd_select_bitmask(k, r, i32x4::ZERO))
pub fn _mm_maskz_dpbusds_epi32(k: __mmask8, src: __m128i, a: __m128i, b: __m128i) -> __m128i {
unsafe {
let r = _mm_dpbusds_epi32(src, a, b).as_i32x4();
transmute(simd_select_bitmask(k, r, i32x4::ZERO))
}
}
/// Multiply groups of 4 adjacent pairs of signed 8-bit integers in a with corresponding signed 8-bit
@ -638,8 +596,8 @@ pub unsafe fn _mm_maskz_dpbusds_epi32(
assert_instr(vpdpbssd)
)]
#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
pub unsafe fn _mm_dpbssd_epi32(src: __m128i, a: __m128i, b: __m128i) -> __m128i {
transmute(vpdpbssd_128(src.as_i32x4(), a.as_i32x4(), b.as_i32x4()))
pub fn _mm_dpbssd_epi32(src: __m128i, a: __m128i, b: __m128i) -> __m128i {
unsafe { transmute(vpdpbssd_128(src.as_i32x4(), a.as_i32x4(), b.as_i32x4())) }
}
/// Multiply groups of 4 adjacent pairs of signed 8-bit integers in a with corresponding signed 8-bit
@ -654,8 +612,8 @@ pub unsafe fn _mm_dpbssd_epi32(src: __m128i, a: __m128i, b: __m128i) -> __m128i
assert_instr(vpdpbssd)
)]
#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
pub unsafe fn _mm256_dpbssd_epi32(src: __m256i, a: __m256i, b: __m256i) -> __m256i {
transmute(vpdpbssd_256(src.as_i32x8(), a.as_i32x8(), b.as_i32x8()))
pub fn _mm256_dpbssd_epi32(src: __m256i, a: __m256i, b: __m256i) -> __m256i {
unsafe { transmute(vpdpbssd_256(src.as_i32x8(), a.as_i32x8(), b.as_i32x8())) }
}
/// Multiply groups of 4 adjacent pairs of signed 8-bit integers in a with corresponding signed 8-bit
@ -670,8 +628,8 @@ pub unsafe fn _mm256_dpbssd_epi32(src: __m256i, a: __m256i, b: __m256i) -> __m25
assert_instr(vpdpbssds)
)]
#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
pub unsafe fn _mm_dpbssds_epi32(src: __m128i, a: __m128i, b: __m128i) -> __m128i {
transmute(vpdpbssds_128(src.as_i32x4(), a.as_i32x4(), b.as_i32x4()))
pub fn _mm_dpbssds_epi32(src: __m128i, a: __m128i, b: __m128i) -> __m128i {
unsafe { transmute(vpdpbssds_128(src.as_i32x4(), a.as_i32x4(), b.as_i32x4())) }
}
/// Multiply groups of 4 adjacent pairs of signed 8-bit integers in a with corresponding signed 8-bit
@ -686,8 +644,8 @@ pub unsafe fn _mm_dpbssds_epi32(src: __m128i, a: __m128i, b: __m128i) -> __m128i
assert_instr(vpdpbssds)
)]
#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
pub unsafe fn _mm256_dpbssds_epi32(src: __m256i, a: __m256i, b: __m256i) -> __m256i {
transmute(vpdpbssds_256(src.as_i32x8(), a.as_i32x8(), b.as_i32x8()))
pub fn _mm256_dpbssds_epi32(src: __m256i, a: __m256i, b: __m256i) -> __m256i {
unsafe { transmute(vpdpbssds_256(src.as_i32x8(), a.as_i32x8(), b.as_i32x8())) }
}
/// Multiply groups of 4 adjacent pairs of signed 8-bit integers in a with corresponding unsigned 8-bit
@ -702,8 +660,8 @@ pub unsafe fn _mm256_dpbssds_epi32(src: __m256i, a: __m256i, b: __m256i) -> __m2
assert_instr(vpdpbsud)
)]
#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
pub unsafe fn _mm_dpbsud_epi32(src: __m128i, a: __m128i, b: __m128i) -> __m128i {
transmute(vpdpbsud_128(src.as_i32x4(), a.as_i32x4(), b.as_i32x4()))
pub fn _mm_dpbsud_epi32(src: __m128i, a: __m128i, b: __m128i) -> __m128i {
unsafe { transmute(vpdpbsud_128(src.as_i32x4(), a.as_i32x4(), b.as_i32x4())) }
}
/// Multiply groups of 4 adjacent pairs of signed 8-bit integers in a with corresponding unsigned 8-bit
@ -718,8 +676,8 @@ pub unsafe fn _mm_dpbsud_epi32(src: __m128i, a: __m128i, b: __m128i) -> __m128i
assert_instr(vpdpbsud)
)]
#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
pub unsafe fn _mm256_dpbsud_epi32(src: __m256i, a: __m256i, b: __m256i) -> __m256i {
transmute(vpdpbsud_256(src.as_i32x8(), a.as_i32x8(), b.as_i32x8()))
pub fn _mm256_dpbsud_epi32(src: __m256i, a: __m256i, b: __m256i) -> __m256i {
unsafe { transmute(vpdpbsud_256(src.as_i32x8(), a.as_i32x8(), b.as_i32x8())) }
}
/// Multiply groups of 4 adjacent pairs of signed 8-bit integers in a with corresponding unsigned 8-bit
@ -734,8 +692,8 @@ pub unsafe fn _mm256_dpbsud_epi32(src: __m256i, a: __m256i, b: __m256i) -> __m25
assert_instr(vpdpbsuds)
)]
#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
pub unsafe fn _mm_dpbsuds_epi32(src: __m128i, a: __m128i, b: __m128i) -> __m128i {
transmute(vpdpbsuds_128(src.as_i32x4(), a.as_i32x4(), b.as_i32x4()))
pub fn _mm_dpbsuds_epi32(src: __m128i, a: __m128i, b: __m128i) -> __m128i {
unsafe { transmute(vpdpbsuds_128(src.as_i32x4(), a.as_i32x4(), b.as_i32x4())) }
}
/// Multiply groups of 4 adjacent pairs of signed 8-bit integers in a with corresponding unsigned 8-bit
@ -750,8 +708,8 @@ pub unsafe fn _mm_dpbsuds_epi32(src: __m128i, a: __m128i, b: __m128i) -> __m128i
assert_instr(vpdpbsuds)
)]
#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
pub unsafe fn _mm256_dpbsuds_epi32(src: __m256i, a: __m256i, b: __m256i) -> __m256i {
transmute(vpdpbsuds_256(src.as_i32x8(), a.as_i32x8(), b.as_i32x8()))
pub fn _mm256_dpbsuds_epi32(src: __m256i, a: __m256i, b: __m256i) -> __m256i {
unsafe { transmute(vpdpbsuds_256(src.as_i32x8(), a.as_i32x8(), b.as_i32x8())) }
}
/// Multiply groups of 4 adjacent pairs of unsigned 8-bit integers in a with corresponding unsigned 8-bit
@ -766,8 +724,8 @@ pub unsafe fn _mm256_dpbsuds_epi32(src: __m256i, a: __m256i, b: __m256i) -> __m2
assert_instr(vpdpbuud)
)]
#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
pub unsafe fn _mm_dpbuud_epi32(src: __m128i, a: __m128i, b: __m128i) -> __m128i {
transmute(vpdpbuud_128(src.as_i32x4(), a.as_i32x4(), b.as_i32x4()))
pub fn _mm_dpbuud_epi32(src: __m128i, a: __m128i, b: __m128i) -> __m128i {
unsafe { transmute(vpdpbuud_128(src.as_i32x4(), a.as_i32x4(), b.as_i32x4())) }
}
/// Multiply groups of 4 adjacent pairs of unsigned 8-bit integers in a with corresponding unsigned 8-bit
@ -782,8 +740,8 @@ pub unsafe fn _mm_dpbuud_epi32(src: __m128i, a: __m128i, b: __m128i) -> __m128i
assert_instr(vpdpbuud)
)]
#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
pub unsafe fn _mm256_dpbuud_epi32(src: __m256i, a: __m256i, b: __m256i) -> __m256i {
transmute(vpdpbuud_256(src.as_i32x8(), a.as_i32x8(), b.as_i32x8()))
pub fn _mm256_dpbuud_epi32(src: __m256i, a: __m256i, b: __m256i) -> __m256i {
unsafe { transmute(vpdpbuud_256(src.as_i32x8(), a.as_i32x8(), b.as_i32x8())) }
}
/// Multiply groups of 4 adjacent pairs of unsigned 8-bit integers in a with corresponding unsigned 8-bit
@ -798,8 +756,8 @@ pub unsafe fn _mm256_dpbuud_epi32(src: __m256i, a: __m256i, b: __m256i) -> __m25
assert_instr(vpdpbuuds)
)]
#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
pub unsafe fn _mm_dpbuuds_epi32(src: __m128i, a: __m128i, b: __m128i) -> __m128i {
transmute(vpdpbuuds_128(src.as_i32x4(), a.as_i32x4(), b.as_i32x4()))
pub fn _mm_dpbuuds_epi32(src: __m128i, a: __m128i, b: __m128i) -> __m128i {
unsafe { transmute(vpdpbuuds_128(src.as_i32x4(), a.as_i32x4(), b.as_i32x4())) }
}
/// Multiply groups of 4 adjacent pairs of unsigned 8-bit integers in a with corresponding unsigned 8-bit
@ -814,8 +772,8 @@ pub unsafe fn _mm_dpbuuds_epi32(src: __m128i, a: __m128i, b: __m128i) -> __m128i
assert_instr(vpdpbuuds)
)]
#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
pub unsafe fn _mm256_dpbuuds_epi32(src: __m256i, a: __m256i, b: __m256i) -> __m256i {
transmute(vpdpbuuds_256(src.as_i32x8(), a.as_i32x8(), b.as_i32x8()))
pub fn _mm256_dpbuuds_epi32(src: __m256i, a: __m256i, b: __m256i) -> __m256i {
unsafe { transmute(vpdpbuuds_256(src.as_i32x8(), a.as_i32x8(), b.as_i32x8())) }
}
/// Multiply groups of 2 adjacent pairs of signed 16-bit integers in a with corresponding unsigned 16-bit
@ -830,8 +788,8 @@ pub unsafe fn _mm256_dpbuuds_epi32(src: __m256i, a: __m256i, b: __m256i) -> __m2
assert_instr(vpdpwsud)
)]
#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
pub unsafe fn _mm_dpwsud_epi32(src: __m128i, a: __m128i, b: __m128i) -> __m128i {
transmute(vpdpwsud_128(src.as_i32x4(), a.as_i32x4(), b.as_i32x4()))
pub fn _mm_dpwsud_epi32(src: __m128i, a: __m128i, b: __m128i) -> __m128i {
unsafe { transmute(vpdpwsud_128(src.as_i32x4(), a.as_i32x4(), b.as_i32x4())) }
}
/// Multiply groups of 2 adjacent pairs of signed 16-bit integers in a with corresponding unsigned 16-bit
@ -846,8 +804,8 @@ pub unsafe fn _mm_dpwsud_epi32(src: __m128i, a: __m128i, b: __m128i) -> __m128i
assert_instr(vpdpwsud)
)]
#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
pub unsafe fn _mm256_dpwsud_epi32(src: __m256i, a: __m256i, b: __m256i) -> __m256i {
transmute(vpdpwsud_256(src.as_i32x8(), a.as_i32x8(), b.as_i32x8()))
pub fn _mm256_dpwsud_epi32(src: __m256i, a: __m256i, b: __m256i) -> __m256i {
unsafe { transmute(vpdpwsud_256(src.as_i32x8(), a.as_i32x8(), b.as_i32x8())) }
}
/// Multiply groups of 2 adjacent pairs of signed 16-bit integers in a with corresponding unsigned 16-bit
@ -862,8 +820,8 @@ pub unsafe fn _mm256_dpwsud_epi32(src: __m256i, a: __m256i, b: __m256i) -> __m25
assert_instr(vpdpwsuds)
)]
#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
pub unsafe fn _mm_dpwsuds_epi32(src: __m128i, a: __m128i, b: __m128i) -> __m128i {
transmute(vpdpwsuds_128(src.as_i32x4(), a.as_i32x4(), b.as_i32x4()))
pub fn _mm_dpwsuds_epi32(src: __m128i, a: __m128i, b: __m128i) -> __m128i {
unsafe { transmute(vpdpwsuds_128(src.as_i32x4(), a.as_i32x4(), b.as_i32x4())) }
}
/// Multiply groups of 2 adjacent pairs of signed 16-bit integers in a with corresponding unsigned 16-bit
@ -878,8 +836,8 @@ pub unsafe fn _mm_dpwsuds_epi32(src: __m128i, a: __m128i, b: __m128i) -> __m128i
assert_instr(vpdpwsuds)
)]
#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
pub unsafe fn _mm256_dpwsuds_epi32(src: __m256i, a: __m256i, b: __m256i) -> __m256i {
transmute(vpdpwsuds_256(src.as_i32x8(), a.as_i32x8(), b.as_i32x8()))
pub fn _mm256_dpwsuds_epi32(src: __m256i, a: __m256i, b: __m256i) -> __m256i {
unsafe { transmute(vpdpwsuds_256(src.as_i32x8(), a.as_i32x8(), b.as_i32x8())) }
}
/// Multiply groups of 2 adjacent pairs of unsigned 16-bit integers in a with corresponding signed 16-bit
@ -894,8 +852,8 @@ pub unsafe fn _mm256_dpwsuds_epi32(src: __m256i, a: __m256i, b: __m256i) -> __m2
assert_instr(vpdpwusd)
)]
#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
pub unsafe fn _mm_dpwusd_epi32(src: __m128i, a: __m128i, b: __m128i) -> __m128i {
transmute(vpdpwusd_128(src.as_i32x4(), a.as_i32x4(), b.as_i32x4()))
pub fn _mm_dpwusd_epi32(src: __m128i, a: __m128i, b: __m128i) -> __m128i {
unsafe { transmute(vpdpwusd_128(src.as_i32x4(), a.as_i32x4(), b.as_i32x4())) }
}
/// Multiply groups of 2 adjacent pairs of unsigned 16-bit integers in a with corresponding signed 16-bit
@ -910,8 +868,8 @@ pub unsafe fn _mm_dpwusd_epi32(src: __m128i, a: __m128i, b: __m128i) -> __m128i
assert_instr(vpdpwusd)
)]
#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
pub unsafe fn _mm256_dpwusd_epi32(src: __m256i, a: __m256i, b: __m256i) -> __m256i {
transmute(vpdpwusd_256(src.as_i32x8(), a.as_i32x8(), b.as_i32x8()))
pub fn _mm256_dpwusd_epi32(src: __m256i, a: __m256i, b: __m256i) -> __m256i {
unsafe { transmute(vpdpwusd_256(src.as_i32x8(), a.as_i32x8(), b.as_i32x8())) }
}
/// Multiply groups of 2 adjacent pairs of unsigned 16-bit integers in a with corresponding signed 16-bit
@ -926,8 +884,8 @@ pub unsafe fn _mm256_dpwusd_epi32(src: __m256i, a: __m256i, b: __m256i) -> __m25
assert_instr(vpdpwusds)
)]
#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
pub unsafe fn _mm_dpwusds_epi32(src: __m128i, a: __m128i, b: __m128i) -> __m128i {
transmute(vpdpwusds_128(src.as_i32x4(), a.as_i32x4(), b.as_i32x4()))
pub fn _mm_dpwusds_epi32(src: __m128i, a: __m128i, b: __m128i) -> __m128i {
unsafe { transmute(vpdpwusds_128(src.as_i32x4(), a.as_i32x4(), b.as_i32x4())) }
}
/// Multiply groups of 2 adjacent pairs of unsigned 16-bit integers in a with corresponding signed 16-bit
@ -942,8 +900,8 @@ pub unsafe fn _mm_dpwusds_epi32(src: __m128i, a: __m128i, b: __m128i) -> __m128i
assert_instr(vpdpwusds)
)]
#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
pub unsafe fn _mm256_dpwusds_epi32(src: __m256i, a: __m256i, b: __m256i) -> __m256i {
transmute(vpdpwusds_256(src.as_i32x8(), a.as_i32x8(), b.as_i32x8()))
pub fn _mm256_dpwusds_epi32(src: __m256i, a: __m256i, b: __m256i) -> __m256i {
unsafe { transmute(vpdpwusds_256(src.as_i32x8(), a.as_i32x8(), b.as_i32x8())) }
}
/// Multiply groups of 2 adjacent pairs of unsigned 16-bit integers in a with corresponding unsigned 16-bit
@ -958,8 +916,8 @@ pub unsafe fn _mm256_dpwusds_epi32(src: __m256i, a: __m256i, b: __m256i) -> __m2
assert_instr(vpdpwuud)
)]
#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
pub unsafe fn _mm_dpwuud_epi32(src: __m128i, a: __m128i, b: __m128i) -> __m128i {
transmute(vpdpwuud_128(src.as_i32x4(), a.as_i32x4(), b.as_i32x4()))
pub fn _mm_dpwuud_epi32(src: __m128i, a: __m128i, b: __m128i) -> __m128i {
unsafe { transmute(vpdpwuud_128(src.as_i32x4(), a.as_i32x4(), b.as_i32x4())) }
}
/// Multiply groups of 2 adjacent pairs of unsigned 16-bit integers in a with corresponding unsigned 16-bit
@ -974,8 +932,8 @@ pub unsafe fn _mm_dpwuud_epi32(src: __m128i, a: __m128i, b: __m128i) -> __m128i
assert_instr(vpdpwuud)
)]
#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
pub unsafe fn _mm256_dpwuud_epi32(src: __m256i, a: __m256i, b: __m256i) -> __m256i {
transmute(vpdpwuud_256(src.as_i32x8(), a.as_i32x8(), b.as_i32x8()))
pub fn _mm256_dpwuud_epi32(src: __m256i, a: __m256i, b: __m256i) -> __m256i {
unsafe { transmute(vpdpwuud_256(src.as_i32x8(), a.as_i32x8(), b.as_i32x8())) }
}
/// Multiply groups of 2 adjacent pairs of unsigned 16-bit integers in a with corresponding unsigned 16-bit
@ -990,8 +948,8 @@ pub unsafe fn _mm256_dpwuud_epi32(src: __m256i, a: __m256i, b: __m256i) -> __m25
assert_instr(vpdpwuuds)
)]
#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
pub unsafe fn _mm_dpwuuds_epi32(src: __m128i, a: __m128i, b: __m128i) -> __m128i {
transmute(vpdpwuuds_128(src.as_i32x4(), a.as_i32x4(), b.as_i32x4()))
pub fn _mm_dpwuuds_epi32(src: __m128i, a: __m128i, b: __m128i) -> __m128i {
unsafe { transmute(vpdpwuuds_128(src.as_i32x4(), a.as_i32x4(), b.as_i32x4())) }
}
/// Multiply groups of 2 adjacent pairs of unsigned 16-bit integers in a with corresponding unsigned 16-bit
@ -1006,8 +964,8 @@ pub unsafe fn _mm_dpwuuds_epi32(src: __m128i, a: __m128i, b: __m128i) -> __m128i
assert_instr(vpdpwuuds)
)]
#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
pub unsafe fn _mm256_dpwuuds_epi32(src: __m256i, a: __m256i, b: __m256i) -> __m256i {
transmute(vpdpwuuds_256(src.as_i32x8(), a.as_i32x8(), b.as_i32x8()))
pub fn _mm256_dpwuuds_epi32(src: __m256i, a: __m256i, b: __m256i) -> __m256i {
unsafe { transmute(vpdpwuuds_256(src.as_i32x8(), a.as_i32x8(), b.as_i32x8())) }
}
#[allow(improper_ctypes)]

View file

@ -26,8 +26,8 @@ use stdarch_test::assert_instr;
#[target_feature(enable = "avx512vpopcntdq")]
#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
#[cfg_attr(test, assert_instr(vpopcntd))]
pub unsafe fn _mm512_popcnt_epi32(a: __m512i) -> __m512i {
transmute(simd_ctpop(a.as_i32x16()))
pub fn _mm512_popcnt_epi32(a: __m512i) -> __m512i {
unsafe { transmute(simd_ctpop(a.as_i32x16())) }
}
/// For each packed 32-bit integer maps the value to the number of logical 1 bits.
@ -40,12 +40,14 @@ pub unsafe fn _mm512_popcnt_epi32(a: __m512i) -> __m512i {
#[target_feature(enable = "avx512vpopcntdq")]
#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
#[cfg_attr(test, assert_instr(vpopcntd))]
pub unsafe fn _mm512_maskz_popcnt_epi32(k: __mmask16, a: __m512i) -> __m512i {
transmute(simd_select_bitmask(
k,
simd_ctpop(a.as_i32x16()),
i32x16::ZERO,
))
pub fn _mm512_maskz_popcnt_epi32(k: __mmask16, a: __m512i) -> __m512i {
unsafe {
transmute(simd_select_bitmask(
k,
simd_ctpop(a.as_i32x16()),
i32x16::ZERO,
))
}
}
/// For each packed 32-bit integer maps the value to the number of logical 1 bits.
@ -58,12 +60,14 @@ pub unsafe fn _mm512_maskz_popcnt_epi32(k: __mmask16, a: __m512i) -> __m512i {
#[target_feature(enable = "avx512vpopcntdq")]
#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
#[cfg_attr(test, assert_instr(vpopcntd))]
pub unsafe fn _mm512_mask_popcnt_epi32(src: __m512i, k: __mmask16, a: __m512i) -> __m512i {
transmute(simd_select_bitmask(
k,
simd_ctpop(a.as_i32x16()),
src.as_i32x16(),
))
pub fn _mm512_mask_popcnt_epi32(src: __m512i, k: __mmask16, a: __m512i) -> __m512i {
unsafe {
transmute(simd_select_bitmask(
k,
simd_ctpop(a.as_i32x16()),
src.as_i32x16(),
))
}
}
/// For each packed 32-bit integer maps the value to the number of logical 1 bits.
@ -73,8 +77,8 @@ pub unsafe fn _mm512_mask_popcnt_epi32(src: __m512i, k: __mmask16, a: __m512i) -
#[target_feature(enable = "avx512vpopcntdq,avx512vl")]
#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
#[cfg_attr(test, assert_instr(vpopcntd))]
pub unsafe fn _mm256_popcnt_epi32(a: __m256i) -> __m256i {
transmute(simd_ctpop(a.as_i32x8()))
pub fn _mm256_popcnt_epi32(a: __m256i) -> __m256i {
unsafe { transmute(simd_ctpop(a.as_i32x8())) }
}
/// For each packed 32-bit integer maps the value to the number of logical 1 bits.
@ -87,12 +91,14 @@ pub unsafe fn _mm256_popcnt_epi32(a: __m256i) -> __m256i {
#[target_feature(enable = "avx512vpopcntdq,avx512vl")]
#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
#[cfg_attr(test, assert_instr(vpopcntd))]
pub unsafe fn _mm256_maskz_popcnt_epi32(k: __mmask8, a: __m256i) -> __m256i {
transmute(simd_select_bitmask(
k,
simd_ctpop(a.as_i32x8()),
i32x8::ZERO,
))
pub fn _mm256_maskz_popcnt_epi32(k: __mmask8, a: __m256i) -> __m256i {
unsafe {
transmute(simd_select_bitmask(
k,
simd_ctpop(a.as_i32x8()),
i32x8::ZERO,
))
}
}
/// For each packed 32-bit integer maps the value to the number of logical 1 bits.
@ -105,12 +111,14 @@ pub unsafe fn _mm256_maskz_popcnt_epi32(k: __mmask8, a: __m256i) -> __m256i {
#[target_feature(enable = "avx512vpopcntdq,avx512vl")]
#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
#[cfg_attr(test, assert_instr(vpopcntd))]
pub unsafe fn _mm256_mask_popcnt_epi32(src: __m256i, k: __mmask8, a: __m256i) -> __m256i {
transmute(simd_select_bitmask(
k,
simd_ctpop(a.as_i32x8()),
src.as_i32x8(),
))
pub fn _mm256_mask_popcnt_epi32(src: __m256i, k: __mmask8, a: __m256i) -> __m256i {
unsafe {
transmute(simd_select_bitmask(
k,
simd_ctpop(a.as_i32x8()),
src.as_i32x8(),
))
}
}
/// For each packed 32-bit integer maps the value to the number of logical 1 bits.
@ -120,8 +128,8 @@ pub unsafe fn _mm256_mask_popcnt_epi32(src: __m256i, k: __mmask8, a: __m256i) ->
#[target_feature(enable = "avx512vpopcntdq,avx512vl")]
#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
#[cfg_attr(test, assert_instr(vpopcntd))]
pub unsafe fn _mm_popcnt_epi32(a: __m128i) -> __m128i {
transmute(simd_ctpop(a.as_i32x4()))
pub fn _mm_popcnt_epi32(a: __m128i) -> __m128i {
unsafe { transmute(simd_ctpop(a.as_i32x4())) }
}
/// For each packed 32-bit integer maps the value to the number of logical 1 bits.
@ -134,12 +142,14 @@ pub unsafe fn _mm_popcnt_epi32(a: __m128i) -> __m128i {
#[target_feature(enable = "avx512vpopcntdq,avx512vl")]
#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
#[cfg_attr(test, assert_instr(vpopcntd))]
pub unsafe fn _mm_maskz_popcnt_epi32(k: __mmask8, a: __m128i) -> __m128i {
transmute(simd_select_bitmask(
k,
simd_ctpop(a.as_i32x4()),
i32x4::ZERO,
))
pub fn _mm_maskz_popcnt_epi32(k: __mmask8, a: __m128i) -> __m128i {
unsafe {
transmute(simd_select_bitmask(
k,
simd_ctpop(a.as_i32x4()),
i32x4::ZERO,
))
}
}
/// For each packed 32-bit integer maps the value to the number of logical 1 bits.
@ -152,12 +162,14 @@ pub unsafe fn _mm_maskz_popcnt_epi32(k: __mmask8, a: __m128i) -> __m128i {
#[target_feature(enable = "avx512vpopcntdq,avx512vl")]
#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
#[cfg_attr(test, assert_instr(vpopcntd))]
pub unsafe fn _mm_mask_popcnt_epi32(src: __m128i, k: __mmask8, a: __m128i) -> __m128i {
transmute(simd_select_bitmask(
k,
simd_ctpop(a.as_i32x4()),
src.as_i32x4(),
))
pub fn _mm_mask_popcnt_epi32(src: __m128i, k: __mmask8, a: __m128i) -> __m128i {
unsafe {
transmute(simd_select_bitmask(
k,
simd_ctpop(a.as_i32x4()),
src.as_i32x4(),
))
}
}
/// For each packed 64-bit integer maps the value to the number of logical 1 bits.
@ -167,8 +179,8 @@ pub unsafe fn _mm_mask_popcnt_epi32(src: __m128i, k: __mmask8, a: __m128i) -> __
#[target_feature(enable = "avx512vpopcntdq")]
#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
#[cfg_attr(test, assert_instr(vpopcntq))]
pub unsafe fn _mm512_popcnt_epi64(a: __m512i) -> __m512i {
transmute(simd_ctpop(a.as_i64x8()))
pub fn _mm512_popcnt_epi64(a: __m512i) -> __m512i {
unsafe { transmute(simd_ctpop(a.as_i64x8())) }
}
/// For each packed 64-bit integer maps the value to the number of logical 1 bits.
@ -181,12 +193,14 @@ pub unsafe fn _mm512_popcnt_epi64(a: __m512i) -> __m512i {
#[target_feature(enable = "avx512vpopcntdq")]
#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
#[cfg_attr(test, assert_instr(vpopcntq))]
pub unsafe fn _mm512_maskz_popcnt_epi64(k: __mmask8, a: __m512i) -> __m512i {
transmute(simd_select_bitmask(
k,
simd_ctpop(a.as_i64x8()),
i64x8::ZERO,
))
pub fn _mm512_maskz_popcnt_epi64(k: __mmask8, a: __m512i) -> __m512i {
unsafe {
transmute(simd_select_bitmask(
k,
simd_ctpop(a.as_i64x8()),
i64x8::ZERO,
))
}
}
/// For each packed 64-bit integer maps the value to the number of logical 1 bits.
@ -199,12 +213,14 @@ pub unsafe fn _mm512_maskz_popcnt_epi64(k: __mmask8, a: __m512i) -> __m512i {
#[target_feature(enable = "avx512vpopcntdq")]
#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
#[cfg_attr(test, assert_instr(vpopcntq))]
pub unsafe fn _mm512_mask_popcnt_epi64(src: __m512i, k: __mmask8, a: __m512i) -> __m512i {
transmute(simd_select_bitmask(
k,
simd_ctpop(a.as_i64x8()),
src.as_i64x8(),
))
pub fn _mm512_mask_popcnt_epi64(src: __m512i, k: __mmask8, a: __m512i) -> __m512i {
unsafe {
transmute(simd_select_bitmask(
k,
simd_ctpop(a.as_i64x8()),
src.as_i64x8(),
))
}
}
/// For each packed 64-bit integer maps the value to the number of logical 1 bits.
@ -214,8 +230,8 @@ pub unsafe fn _mm512_mask_popcnt_epi64(src: __m512i, k: __mmask8, a: __m512i) ->
#[target_feature(enable = "avx512vpopcntdq,avx512vl")]
#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
#[cfg_attr(test, assert_instr(vpopcntq))]
pub unsafe fn _mm256_popcnt_epi64(a: __m256i) -> __m256i {
transmute(simd_ctpop(a.as_i64x4()))
pub fn _mm256_popcnt_epi64(a: __m256i) -> __m256i {
unsafe { transmute(simd_ctpop(a.as_i64x4())) }
}
/// For each packed 64-bit integer maps the value to the number of logical 1 bits.
@ -228,12 +244,14 @@ pub unsafe fn _mm256_popcnt_epi64(a: __m256i) -> __m256i {
#[target_feature(enable = "avx512vpopcntdq,avx512vl")]
#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
#[cfg_attr(test, assert_instr(vpopcntq))]
pub unsafe fn _mm256_maskz_popcnt_epi64(k: __mmask8, a: __m256i) -> __m256i {
transmute(simd_select_bitmask(
k,
simd_ctpop(a.as_i64x4()),
i64x4::ZERO,
))
pub fn _mm256_maskz_popcnt_epi64(k: __mmask8, a: __m256i) -> __m256i {
unsafe {
transmute(simd_select_bitmask(
k,
simd_ctpop(a.as_i64x4()),
i64x4::ZERO,
))
}
}
/// For each packed 64-bit integer maps the value to the number of logical 1 bits.
@ -246,12 +264,14 @@ pub unsafe fn _mm256_maskz_popcnt_epi64(k: __mmask8, a: __m256i) -> __m256i {
#[target_feature(enable = "avx512vpopcntdq,avx512vl")]
#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
#[cfg_attr(test, assert_instr(vpopcntq))]
pub unsafe fn _mm256_mask_popcnt_epi64(src: __m256i, k: __mmask8, a: __m256i) -> __m256i {
transmute(simd_select_bitmask(
k,
simd_ctpop(a.as_i64x4()),
src.as_i64x4(),
))
pub fn _mm256_mask_popcnt_epi64(src: __m256i, k: __mmask8, a: __m256i) -> __m256i {
unsafe {
transmute(simd_select_bitmask(
k,
simd_ctpop(a.as_i64x4()),
src.as_i64x4(),
))
}
}
/// For each packed 64-bit integer maps the value to the number of logical 1 bits.
@ -261,8 +281,8 @@ pub unsafe fn _mm256_mask_popcnt_epi64(src: __m256i, k: __mmask8, a: __m256i) ->
#[target_feature(enable = "avx512vpopcntdq,avx512vl")]
#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
#[cfg_attr(test, assert_instr(vpopcntq))]
pub unsafe fn _mm_popcnt_epi64(a: __m128i) -> __m128i {
transmute(simd_ctpop(a.as_i64x2()))
pub fn _mm_popcnt_epi64(a: __m128i) -> __m128i {
unsafe { transmute(simd_ctpop(a.as_i64x2())) }
}
/// For each packed 64-bit integer maps the value to the number of logical 1 bits.
@ -275,12 +295,14 @@ pub unsafe fn _mm_popcnt_epi64(a: __m128i) -> __m128i {
#[target_feature(enable = "avx512vpopcntdq,avx512vl")]
#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
#[cfg_attr(test, assert_instr(vpopcntq))]
pub unsafe fn _mm_maskz_popcnt_epi64(k: __mmask8, a: __m128i) -> __m128i {
transmute(simd_select_bitmask(
k,
simd_ctpop(a.as_i64x2()),
i64x2::ZERO,
))
pub fn _mm_maskz_popcnt_epi64(k: __mmask8, a: __m128i) -> __m128i {
unsafe {
transmute(simd_select_bitmask(
k,
simd_ctpop(a.as_i64x2()),
i64x2::ZERO,
))
}
}
/// For each packed 64-bit integer maps the value to the number of logical 1 bits.
@ -293,12 +315,14 @@ pub unsafe fn _mm_maskz_popcnt_epi64(k: __mmask8, a: __m128i) -> __m128i {
#[target_feature(enable = "avx512vpopcntdq,avx512vl")]
#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
#[cfg_attr(test, assert_instr(vpopcntq))]
pub unsafe fn _mm_mask_popcnt_epi64(src: __m128i, k: __mmask8, a: __m128i) -> __m128i {
transmute(simd_select_bitmask(
k,
simd_ctpop(a.as_i64x2()),
src.as_i64x2(),
))
pub fn _mm_mask_popcnt_epi64(src: __m128i, k: __mmask8, a: __m128i) -> __m128i {
unsafe {
transmute(simd_select_bitmask(
k,
simd_ctpop(a.as_i64x2()),
src.as_i64x2(),
))
}
}
#[cfg(test)]

View file

@ -199,15 +199,17 @@ pub unsafe fn _mm256_cvtneoph_ps(a: *const __m256h) -> __m256 {
assert_instr(vcvtneps2bf16)
)]
#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
pub unsafe fn _mm_cvtneps_avx_pbh(a: __m128) -> __m128bh {
let mut dst: __m128bh;
asm!(
"{{vex}}vcvtneps2bf16 {dst},{src}",
dst = lateout(xmm_reg) dst,
src = in(xmm_reg) a,
options(pure, nomem, nostack, preserves_flags)
);
dst
pub fn _mm_cvtneps_avx_pbh(a: __m128) -> __m128bh {
unsafe {
let mut dst: __m128bh;
asm!(
"{{vex}}vcvtneps2bf16 {dst},{src}",
dst = lateout(xmm_reg) dst,
src = in(xmm_reg) a,
options(pure, nomem, nostack, preserves_flags)
);
dst
}
}
/// Convert packed single precision (32-bit) floating-point elements in a to packed BF16 (16-bit) floating-point
@ -221,15 +223,17 @@ pub unsafe fn _mm_cvtneps_avx_pbh(a: __m128) -> __m128bh {
assert_instr(vcvtneps2bf16)
)]
#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
pub unsafe fn _mm256_cvtneps_avx_pbh(a: __m256) -> __m128bh {
let mut dst: __m128bh;
asm!(
"{{vex}}vcvtneps2bf16 {dst},{src}",
dst = lateout(xmm_reg) dst,
src = in(ymm_reg) a,
options(pure, nomem, nostack, preserves_flags)
);
dst
pub fn _mm256_cvtneps_avx_pbh(a: __m256) -> __m128bh {
unsafe {
let mut dst: __m128bh;
asm!(
"{{vex}}vcvtneps2bf16 {dst},{src}",
dst = lateout(xmm_reg) dst,
src = in(ymm_reg) a,
options(pure, nomem, nostack, preserves_flags)
);
dst
}
}
#[allow(improper_ctypes)]

View file

@ -6,7 +6,7 @@ use crate::core_arch::x86::*;
#[inline]
#[target_feature(enable = "avx512bw")]
#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
pub unsafe fn _cvtmask64_u64(a: __mmask64) -> u64 {
pub fn _cvtmask64_u64(a: __mmask64) -> u64 {
a
}
@ -16,7 +16,7 @@ pub unsafe fn _cvtmask64_u64(a: __mmask64) -> u64 {
#[inline]
#[target_feature(enable = "avx512bw")]
#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
pub unsafe fn _cvtu64_mask64(a: u64) -> __mmask64 {
pub fn _cvtu64_mask64(a: u64) -> __mmask64 {
a
}

View file

@ -13,7 +13,7 @@ use stdarch_test::assert_instr;
#[target_feature(enable = "avx512f")]
#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
#[cfg_attr(test, assert_instr(vcvtsd2si))]
pub unsafe fn _mm_cvtsd_i64(a: __m128d) -> i64 {
pub fn _mm_cvtsd_i64(a: __m128d) -> i64 {
_mm_cvtsd_si64(a)
}
@ -24,7 +24,7 @@ pub unsafe fn _mm_cvtsd_i64(a: __m128d) -> i64 {
#[target_feature(enable = "avx512f")]
#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
#[cfg_attr(test, assert_instr(vcvtss2si))]
pub unsafe fn _mm_cvtss_i64(a: __m128) -> i64 {
pub fn _mm_cvtss_i64(a: __m128) -> i64 {
_mm_cvtss_si64(a)
}
@ -35,8 +35,8 @@ pub unsafe fn _mm_cvtss_i64(a: __m128) -> i64 {
#[target_feature(enable = "avx512f")]
#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
#[cfg_attr(test, assert_instr(vcvtss2usi))]
pub unsafe fn _mm_cvtss_u64(a: __m128) -> u64 {
vcvtss2usi64(a.as_f32x4(), _MM_FROUND_CUR_DIRECTION)
pub fn _mm_cvtss_u64(a: __m128) -> u64 {
unsafe { vcvtss2usi64(a.as_f32x4(), _MM_FROUND_CUR_DIRECTION) }
}
/// Convert the lower double-precision (64-bit) floating-point element in a to an unsigned 64-bit integer, and store the result in dst.
@ -46,8 +46,8 @@ pub unsafe fn _mm_cvtss_u64(a: __m128) -> u64 {
#[target_feature(enable = "avx512f")]
#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
#[cfg_attr(test, assert_instr(vcvtsd2usi))]
pub unsafe fn _mm_cvtsd_u64(a: __m128d) -> u64 {
vcvtsd2usi64(a.as_f64x2(), _MM_FROUND_CUR_DIRECTION)
pub fn _mm_cvtsd_u64(a: __m128d) -> u64 {
unsafe { vcvtsd2usi64(a.as_f64x2(), _MM_FROUND_CUR_DIRECTION) }
}
/// Convert the signed 64-bit integer b to a single-precision (32-bit) floating-point element, store the result in the lower element of dst, and copy the upper 3 packed elements from a to the upper elements of dst.
@ -57,9 +57,11 @@ pub unsafe fn _mm_cvtsd_u64(a: __m128d) -> u64 {
#[target_feature(enable = "avx512f")]
#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
#[cfg_attr(test, assert_instr(vcvtsi2ss))]
pub unsafe fn _mm_cvti64_ss(a: __m128, b: i64) -> __m128 {
let b = b as f32;
simd_insert!(a, 0, b)
pub fn _mm_cvti64_ss(a: __m128, b: i64) -> __m128 {
unsafe {
let b = b as f32;
simd_insert!(a, 0, b)
}
}
/// Convert the signed 64-bit integer b to a double-precision (64-bit) floating-point element, store the result in the lower element of dst, and copy the upper element from a to the upper element of dst.
@ -69,9 +71,11 @@ pub unsafe fn _mm_cvti64_ss(a: __m128, b: i64) -> __m128 {
#[target_feature(enable = "avx512f")]
#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
#[cfg_attr(test, assert_instr(vcvtsi2sd))]
pub unsafe fn _mm_cvti64_sd(a: __m128d, b: i64) -> __m128d {
let b = b as f64;
simd_insert!(a, 0, b)
pub fn _mm_cvti64_sd(a: __m128d, b: i64) -> __m128d {
unsafe {
let b = b as f64;
simd_insert!(a, 0, b)
}
}
/// Convert the unsigned 64-bit integer b to a single-precision (32-bit) floating-point element, store the result in the lower element of dst, and copy the upper 3 packed elements from a to the upper elements of dst.
@ -81,9 +85,11 @@ pub unsafe fn _mm_cvti64_sd(a: __m128d, b: i64) -> __m128d {
#[target_feature(enable = "avx512f")]
#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
#[cfg_attr(test, assert_instr(vcvtusi2ss))]
pub unsafe fn _mm_cvtu64_ss(a: __m128, b: u64) -> __m128 {
let b = b as f32;
simd_insert!(a, 0, b)
pub fn _mm_cvtu64_ss(a: __m128, b: u64) -> __m128 {
unsafe {
let b = b as f32;
simd_insert!(a, 0, b)
}
}
/// Convert the unsigned 64-bit integer b to a double-precision (64-bit) floating-point element, store the result in the lower element of dst, and copy the upper element from a to the upper element of dst.
@ -93,9 +99,11 @@ pub unsafe fn _mm_cvtu64_ss(a: __m128, b: u64) -> __m128 {
#[target_feature(enable = "avx512f")]
#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
#[cfg_attr(test, assert_instr(vcvtusi2sd))]
pub unsafe fn _mm_cvtu64_sd(a: __m128d, b: u64) -> __m128d {
let b = b as f64;
simd_insert!(a, 0, b)
pub fn _mm_cvtu64_sd(a: __m128d, b: u64) -> __m128d {
unsafe {
let b = b as f64;
simd_insert!(a, 0, b)
}
}
/// Convert the lower double-precision (64-bit) floating-point element in a to a 64-bit integer with truncation, and store the result in dst.
@ -105,8 +113,8 @@ pub unsafe fn _mm_cvtu64_sd(a: __m128d, b: u64) -> __m128d {
#[target_feature(enable = "avx512f")]
#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
#[cfg_attr(test, assert_instr(vcvttsd2si))]
pub unsafe fn _mm_cvttsd_i64(a: __m128d) -> i64 {
vcvttsd2si64(a.as_f64x2(), _MM_FROUND_CUR_DIRECTION)
pub fn _mm_cvttsd_i64(a: __m128d) -> i64 {
unsafe { vcvttsd2si64(a.as_f64x2(), _MM_FROUND_CUR_DIRECTION) }
}
/// Convert the lower double-precision (64-bit) floating-point element in a to an unsigned 64-bit integer with truncation, and store the result in dst.
@ -116,8 +124,8 @@ pub unsafe fn _mm_cvttsd_i64(a: __m128d) -> i64 {
#[target_feature(enable = "avx512f")]
#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
#[cfg_attr(test, assert_instr(vcvttsd2usi))]
pub unsafe fn _mm_cvttsd_u64(a: __m128d) -> u64 {
vcvttsd2usi64(a.as_f64x2(), _MM_FROUND_CUR_DIRECTION)
pub fn _mm_cvttsd_u64(a: __m128d) -> u64 {
unsafe { vcvttsd2usi64(a.as_f64x2(), _MM_FROUND_CUR_DIRECTION) }
}
/// Convert the lower single-precision (32-bit) floating-point element in a to a 64-bit integer with truncation, and store the result in dst.
@ -127,8 +135,8 @@ pub unsafe fn _mm_cvttsd_u64(a: __m128d) -> u64 {
#[target_feature(enable = "avx512f")]
#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
#[cfg_attr(test, assert_instr(vcvttss2si))]
pub unsafe fn _mm_cvttss_i64(a: __m128) -> i64 {
vcvttss2si64(a.as_f32x4(), _MM_FROUND_CUR_DIRECTION)
pub fn _mm_cvttss_i64(a: __m128) -> i64 {
unsafe { vcvttss2si64(a.as_f32x4(), _MM_FROUND_CUR_DIRECTION) }
}
/// Convert the lower single-precision (32-bit) floating-point element in a to an unsigned 64-bit integer with truncation, and store the result in dst.
@ -138,8 +146,8 @@ pub unsafe fn _mm_cvttss_i64(a: __m128) -> i64 {
#[target_feature(enable = "avx512f")]
#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
#[cfg_attr(test, assert_instr(vcvttss2usi))]
pub unsafe fn _mm_cvttss_u64(a: __m128) -> u64 {
vcvttss2usi64(a.as_f32x4(), _MM_FROUND_CUR_DIRECTION)
pub fn _mm_cvttss_u64(a: __m128) -> u64 {
unsafe { vcvttss2usi64(a.as_f32x4(), _MM_FROUND_CUR_DIRECTION) }
}
/// Convert the signed 64-bit integer b to a double-precision (64-bit) floating-point element, store the result in the lower element of dst, and copy the upper element from a to the upper element of dst.
@ -156,11 +164,13 @@ pub unsafe fn _mm_cvttss_u64(a: __m128) -> u64 {
#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
#[cfg_attr(test, assert_instr(vcvtsi2sd, ROUNDING = 8))]
#[rustc_legacy_const_generics(2)]
pub unsafe fn _mm_cvt_roundi64_sd<const ROUNDING: i32>(a: __m128d, b: i64) -> __m128d {
static_assert_rounding!(ROUNDING);
let a = a.as_f64x2();
let r = vcvtsi2sd64(a, b, ROUNDING);
transmute(r)
pub fn _mm_cvt_roundi64_sd<const ROUNDING: i32>(a: __m128d, b: i64) -> __m128d {
unsafe {
static_assert_rounding!(ROUNDING);
let a = a.as_f64x2();
let r = vcvtsi2sd64(a, b, ROUNDING);
transmute(r)
}
}
/// Convert the signed 64-bit integer b to a double-precision (64-bit) floating-point element, store the result in the lower element of dst, and copy the upper element from a to the upper element of dst.
@ -177,11 +187,13 @@ pub unsafe fn _mm_cvt_roundi64_sd<const ROUNDING: i32>(a: __m128d, b: i64) -> __
#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
#[cfg_attr(test, assert_instr(vcvtsi2sd, ROUNDING = 8))]
#[rustc_legacy_const_generics(2)]
pub unsafe fn _mm_cvt_roundsi64_sd<const ROUNDING: i32>(a: __m128d, b: i64) -> __m128d {
static_assert_rounding!(ROUNDING);
let a = a.as_f64x2();
let r = vcvtsi2sd64(a, b, ROUNDING);
transmute(r)
pub fn _mm_cvt_roundsi64_sd<const ROUNDING: i32>(a: __m128d, b: i64) -> __m128d {
unsafe {
static_assert_rounding!(ROUNDING);
let a = a.as_f64x2();
let r = vcvtsi2sd64(a, b, ROUNDING);
transmute(r)
}
}
/// Convert the signed 64-bit integer b to a single-precision (32-bit) floating-point element, store the result in the lower element of dst, and copy the upper 3 packed elements from a to the upper elements of dst.
@ -198,11 +210,13 @@ pub unsafe fn _mm_cvt_roundsi64_sd<const ROUNDING: i32>(a: __m128d, b: i64) -> _
#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
#[cfg_attr(test, assert_instr(vcvtsi2ss, ROUNDING = 8))]
#[rustc_legacy_const_generics(2)]
pub unsafe fn _mm_cvt_roundi64_ss<const ROUNDING: i32>(a: __m128, b: i64) -> __m128 {
static_assert_rounding!(ROUNDING);
let a = a.as_f32x4();
let r = vcvtsi2ss64(a, b, ROUNDING);
transmute(r)
pub fn _mm_cvt_roundi64_ss<const ROUNDING: i32>(a: __m128, b: i64) -> __m128 {
unsafe {
static_assert_rounding!(ROUNDING);
let a = a.as_f32x4();
let r = vcvtsi2ss64(a, b, ROUNDING);
transmute(r)
}
}
/// Convert the unsigned 64-bit integer b to a double-precision (64-bit) floating-point element, store the result in the lower element of dst, and copy the upper element from a to the upper element of dst.\
@ -219,11 +233,13 @@ pub unsafe fn _mm_cvt_roundi64_ss<const ROUNDING: i32>(a: __m128, b: i64) -> __m
#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
#[cfg_attr(test, assert_instr(vcvtusi2sd, ROUNDING = 8))]
#[rustc_legacy_const_generics(2)]
pub unsafe fn _mm_cvt_roundu64_sd<const ROUNDING: i32>(a: __m128d, b: u64) -> __m128d {
static_assert_rounding!(ROUNDING);
let a = a.as_f64x2();
let r = vcvtusi2sd64(a, b, ROUNDING);
transmute(r)
pub fn _mm_cvt_roundu64_sd<const ROUNDING: i32>(a: __m128d, b: u64) -> __m128d {
unsafe {
static_assert_rounding!(ROUNDING);
let a = a.as_f64x2();
let r = vcvtusi2sd64(a, b, ROUNDING);
transmute(r)
}
}
/// Convert the signed 64-bit integer b to a single-precision (32-bit) floating-point element, store the result in the lower element of dst, and copy the upper 3 packed elements from a to the upper elements of dst.
@ -240,11 +256,13 @@ pub unsafe fn _mm_cvt_roundu64_sd<const ROUNDING: i32>(a: __m128d, b: u64) -> __
#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
#[cfg_attr(test, assert_instr(vcvtsi2ss, ROUNDING = 8))]
#[rustc_legacy_const_generics(2)]
pub unsafe fn _mm_cvt_roundsi64_ss<const ROUNDING: i32>(a: __m128, b: i64) -> __m128 {
static_assert_rounding!(ROUNDING);
let a = a.as_f32x4();
let r = vcvtsi2ss64(a, b, ROUNDING);
transmute(r)
pub fn _mm_cvt_roundsi64_ss<const ROUNDING: i32>(a: __m128, b: i64) -> __m128 {
unsafe {
static_assert_rounding!(ROUNDING);
let a = a.as_f32x4();
let r = vcvtsi2ss64(a, b, ROUNDING);
transmute(r)
}
}
/// Convert the unsigned 64-bit integer b to a single-precision (32-bit) floating-point element, store the result in the lower element of dst, and copy the upper 3 packed elements from a to the upper elements of dst.\
@ -261,11 +279,13 @@ pub unsafe fn _mm_cvt_roundsi64_ss<const ROUNDING: i32>(a: __m128, b: i64) -> __
#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
#[cfg_attr(test, assert_instr(vcvtusi2ss, ROUNDING = 8))]
#[rustc_legacy_const_generics(2)]
pub unsafe fn _mm_cvt_roundu64_ss<const ROUNDING: i32>(a: __m128, b: u64) -> __m128 {
static_assert_rounding!(ROUNDING);
let a = a.as_f32x4();
let r = vcvtusi2ss64(a, b, ROUNDING);
transmute(r)
pub fn _mm_cvt_roundu64_ss<const ROUNDING: i32>(a: __m128, b: u64) -> __m128 {
unsafe {
static_assert_rounding!(ROUNDING);
let a = a.as_f32x4();
let r = vcvtusi2ss64(a, b, ROUNDING);
transmute(r)
}
}
/// Convert the lower double-precision (64-bit) floating-point element in a to a 64-bit integer, and store the result in dst.\
@ -282,10 +302,12 @@ pub unsafe fn _mm_cvt_roundu64_ss<const ROUNDING: i32>(a: __m128, b: u64) -> __m
#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
#[cfg_attr(test, assert_instr(vcvtsd2si, ROUNDING = 8))]
#[rustc_legacy_const_generics(1)]
pub unsafe fn _mm_cvt_roundsd_si64<const ROUNDING: i32>(a: __m128d) -> i64 {
static_assert_rounding!(ROUNDING);
let a = a.as_f64x2();
vcvtsd2si64(a, ROUNDING)
pub fn _mm_cvt_roundsd_si64<const ROUNDING: i32>(a: __m128d) -> i64 {
unsafe {
static_assert_rounding!(ROUNDING);
let a = a.as_f64x2();
vcvtsd2si64(a, ROUNDING)
}
}
/// Convert the lower double-precision (64-bit) floating-point element in a to a 64-bit integer, and store the result in dst.\
@ -302,10 +324,12 @@ pub unsafe fn _mm_cvt_roundsd_si64<const ROUNDING: i32>(a: __m128d) -> i64 {
#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
#[cfg_attr(test, assert_instr(vcvtsd2si, ROUNDING = 8))]
#[rustc_legacy_const_generics(1)]
pub unsafe fn _mm_cvt_roundsd_i64<const ROUNDING: i32>(a: __m128d) -> i64 {
static_assert_rounding!(ROUNDING);
let a = a.as_f64x2();
vcvtsd2si64(a, ROUNDING)
pub fn _mm_cvt_roundsd_i64<const ROUNDING: i32>(a: __m128d) -> i64 {
unsafe {
static_assert_rounding!(ROUNDING);
let a = a.as_f64x2();
vcvtsd2si64(a, ROUNDING)
}
}
/// Convert the lower double-precision (64-bit) floating-point element in a to an unsigned 64-bit integer, and store the result in dst.\
@ -322,10 +346,12 @@ pub unsafe fn _mm_cvt_roundsd_i64<const ROUNDING: i32>(a: __m128d) -> i64 {
#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
#[cfg_attr(test, assert_instr(vcvtsd2usi, ROUNDING = 8))]
#[rustc_legacy_const_generics(1)]
pub unsafe fn _mm_cvt_roundsd_u64<const ROUNDING: i32>(a: __m128d) -> u64 {
static_assert_rounding!(ROUNDING);
let a = a.as_f64x2();
vcvtsd2usi64(a, ROUNDING)
pub fn _mm_cvt_roundsd_u64<const ROUNDING: i32>(a: __m128d) -> u64 {
unsafe {
static_assert_rounding!(ROUNDING);
let a = a.as_f64x2();
vcvtsd2usi64(a, ROUNDING)
}
}
/// Convert the lower single-precision (32-bit) floating-point element in a to a 64-bit integer, and store the result in dst.\
@ -342,10 +368,12 @@ pub unsafe fn _mm_cvt_roundsd_u64<const ROUNDING: i32>(a: __m128d) -> u64 {
#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
#[cfg_attr(test, assert_instr(vcvtss2si, ROUNDING = 8))]
#[rustc_legacy_const_generics(1)]
pub unsafe fn _mm_cvt_roundss_si64<const ROUNDING: i32>(a: __m128) -> i64 {
static_assert_rounding!(ROUNDING);
let a = a.as_f32x4();
vcvtss2si64(a, ROUNDING)
pub fn _mm_cvt_roundss_si64<const ROUNDING: i32>(a: __m128) -> i64 {
unsafe {
static_assert_rounding!(ROUNDING);
let a = a.as_f32x4();
vcvtss2si64(a, ROUNDING)
}
}
/// Convert the lower single-precision (32-bit) floating-point element in a to a 64-bit integer, and store the result in dst.\
@ -362,10 +390,12 @@ pub unsafe fn _mm_cvt_roundss_si64<const ROUNDING: i32>(a: __m128) -> i64 {
#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
#[cfg_attr(test, assert_instr(vcvtss2si, ROUNDING = 8))]
#[rustc_legacy_const_generics(1)]
pub unsafe fn _mm_cvt_roundss_i64<const ROUNDING: i32>(a: __m128) -> i64 {
static_assert_rounding!(ROUNDING);
let a = a.as_f32x4();
vcvtss2si64(a, ROUNDING)
pub fn _mm_cvt_roundss_i64<const ROUNDING: i32>(a: __m128) -> i64 {
unsafe {
static_assert_rounding!(ROUNDING);
let a = a.as_f32x4();
vcvtss2si64(a, ROUNDING)
}
}
/// Convert the lower single-precision (32-bit) floating-point element in a to an unsigned 64-bit integer, and store the result in dst.\
@ -382,10 +412,12 @@ pub unsafe fn _mm_cvt_roundss_i64<const ROUNDING: i32>(a: __m128) -> i64 {
#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
#[cfg_attr(test, assert_instr(vcvtss2usi, ROUNDING = 8))]
#[rustc_legacy_const_generics(1)]
pub unsafe fn _mm_cvt_roundss_u64<const ROUNDING: i32>(a: __m128) -> u64 {
static_assert_rounding!(ROUNDING);
let a = a.as_f32x4();
vcvtss2usi64(a, ROUNDING)
pub fn _mm_cvt_roundss_u64<const ROUNDING: i32>(a: __m128) -> u64 {
unsafe {
static_assert_rounding!(ROUNDING);
let a = a.as_f32x4();
vcvtss2usi64(a, ROUNDING)
}
}
/// Convert the lower double-precision (64-bit) floating-point element in a to a 64-bit integer with truncation, and store the result in dst.\
@ -397,10 +429,12 @@ pub unsafe fn _mm_cvt_roundss_u64<const ROUNDING: i32>(a: __m128) -> u64 {
#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
#[cfg_attr(test, assert_instr(vcvttsd2si, SAE = 8))]
#[rustc_legacy_const_generics(1)]
pub unsafe fn _mm_cvtt_roundsd_si64<const SAE: i32>(a: __m128d) -> i64 {
static_assert_sae!(SAE);
let a = a.as_f64x2();
vcvttsd2si64(a, SAE)
pub fn _mm_cvtt_roundsd_si64<const SAE: i32>(a: __m128d) -> i64 {
unsafe {
static_assert_sae!(SAE);
let a = a.as_f64x2();
vcvttsd2si64(a, SAE)
}
}
/// Convert the lower double-precision (64-bit) floating-point element in a to a 64-bit integer with truncation, and store the result in dst.\
@ -412,10 +446,12 @@ pub unsafe fn _mm_cvtt_roundsd_si64<const SAE: i32>(a: __m128d) -> i64 {
#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
#[cfg_attr(test, assert_instr(vcvttsd2si, SAE = 8))]
#[rustc_legacy_const_generics(1)]
pub unsafe fn _mm_cvtt_roundsd_i64<const SAE: i32>(a: __m128d) -> i64 {
static_assert_sae!(SAE);
let a = a.as_f64x2();
vcvttsd2si64(a, SAE)
pub fn _mm_cvtt_roundsd_i64<const SAE: i32>(a: __m128d) -> i64 {
unsafe {
static_assert_sae!(SAE);
let a = a.as_f64x2();
vcvttsd2si64(a, SAE)
}
}
/// Convert the lower double-precision (64-bit) floating-point element in a to an unsigned 64-bit integer with truncation, and store the result in dst.\
@ -427,10 +463,12 @@ pub unsafe fn _mm_cvtt_roundsd_i64<const SAE: i32>(a: __m128d) -> i64 {
#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
#[cfg_attr(test, assert_instr(vcvttsd2usi, SAE = 8))]
#[rustc_legacy_const_generics(1)]
pub unsafe fn _mm_cvtt_roundsd_u64<const SAE: i32>(a: __m128d) -> u64 {
static_assert_sae!(SAE);
let a = a.as_f64x2();
vcvttsd2usi64(a, SAE)
pub fn _mm_cvtt_roundsd_u64<const SAE: i32>(a: __m128d) -> u64 {
unsafe {
static_assert_sae!(SAE);
let a = a.as_f64x2();
vcvttsd2usi64(a, SAE)
}
}
/// Convert the lower single-precision (32-bit) floating-point element in a to a 64-bit integer with truncation, and store the result in dst.\
@ -442,10 +480,12 @@ pub unsafe fn _mm_cvtt_roundsd_u64<const SAE: i32>(a: __m128d) -> u64 {
#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
#[cfg_attr(test, assert_instr(vcvttss2si, SAE = 8))]
#[rustc_legacy_const_generics(1)]
pub unsafe fn _mm_cvtt_roundss_i64<const SAE: i32>(a: __m128) -> i64 {
static_assert_sae!(SAE);
let a = a.as_f32x4();
vcvttss2si64(a, SAE)
pub fn _mm_cvtt_roundss_i64<const SAE: i32>(a: __m128) -> i64 {
unsafe {
static_assert_sae!(SAE);
let a = a.as_f32x4();
vcvttss2si64(a, SAE)
}
}
/// Convert the lower single-precision (32-bit) floating-point element in a to a 64-bit integer with truncation, and store the result in dst.\
@ -457,10 +497,12 @@ pub unsafe fn _mm_cvtt_roundss_i64<const SAE: i32>(a: __m128) -> i64 {
#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
#[cfg_attr(test, assert_instr(vcvttss2si, SAE = 8))]
#[rustc_legacy_const_generics(1)]
pub unsafe fn _mm_cvtt_roundss_si64<const SAE: i32>(a: __m128) -> i64 {
static_assert_sae!(SAE);
let a = a.as_f32x4();
vcvttss2si64(a, SAE)
pub fn _mm_cvtt_roundss_si64<const SAE: i32>(a: __m128) -> i64 {
unsafe {
static_assert_sae!(SAE);
let a = a.as_f32x4();
vcvttss2si64(a, SAE)
}
}
/// Convert the lower single-precision (32-bit) floating-point element in a to an unsigned 64-bit integer with truncation, and store the result in dst.\
@ -472,10 +514,12 @@ pub unsafe fn _mm_cvtt_roundss_si64<const SAE: i32>(a: __m128) -> i64 {
#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
#[cfg_attr(test, assert_instr(vcvttss2usi, SAE = 8))]
#[rustc_legacy_const_generics(1)]
pub unsafe fn _mm_cvtt_roundss_u64<const SAE: i32>(a: __m128) -> u64 {
static_assert_sae!(SAE);
let a = a.as_f32x4();
vcvttss2usi64(a, SAE)
pub fn _mm_cvtt_roundss_u64<const SAE: i32>(a: __m128) -> u64 {
unsafe {
static_assert_sae!(SAE);
let a = a.as_f32x4();
vcvttss2usi64(a, SAE)
}
}
#[allow(improper_ctypes)]

View file

@ -11,8 +11,8 @@ use stdarch_test::assert_instr;
#[target_feature(enable = "avx512fp16")]
#[cfg_attr(test, assert_instr(vcvtsi2sh))]
#[unstable(feature = "stdarch_x86_avx512_f16", issue = "127213")]
pub unsafe fn _mm_cvti64_sh(a: __m128h, b: i64) -> __m128h {
vcvtsi642sh(a, b, _MM_FROUND_CUR_DIRECTION)
pub fn _mm_cvti64_sh(a: __m128h, b: i64) -> __m128h {
unsafe { vcvtsi642sh(a, b, _MM_FROUND_CUR_DIRECTION) }
}
/// Convert the signed 64-bit integer b to a half-precision (16-bit) floating-point element, store the
@ -33,9 +33,11 @@ pub unsafe fn _mm_cvti64_sh(a: __m128h, b: i64) -> __m128h {
#[cfg_attr(test, assert_instr(vcvtsi2sh, ROUNDING = 8))]
#[rustc_legacy_const_generics(2)]
#[unstable(feature = "stdarch_x86_avx512_f16", issue = "127213")]
pub unsafe fn _mm_cvt_roundi64_sh<const ROUNDING: i32>(a: __m128h, b: i64) -> __m128h {
static_assert_rounding!(ROUNDING);
vcvtsi642sh(a, b, ROUNDING)
pub fn _mm_cvt_roundi64_sh<const ROUNDING: i32>(a: __m128h, b: i64) -> __m128h {
unsafe {
static_assert_rounding!(ROUNDING);
vcvtsi642sh(a, b, ROUNDING)
}
}
/// Convert the unsigned 64-bit integer b to a half-precision (16-bit) floating-point element, store the
@ -47,8 +49,8 @@ pub unsafe fn _mm_cvt_roundi64_sh<const ROUNDING: i32>(a: __m128h, b: i64) -> __
#[target_feature(enable = "avx512fp16")]
#[cfg_attr(test, assert_instr(vcvtusi2sh))]
#[unstable(feature = "stdarch_x86_avx512_f16", issue = "127213")]
pub unsafe fn _mm_cvtu64_sh(a: __m128h, b: u64) -> __m128h {
vcvtusi642sh(a, b, _MM_FROUND_CUR_DIRECTION)
pub fn _mm_cvtu64_sh(a: __m128h, b: u64) -> __m128h {
unsafe { vcvtusi642sh(a, b, _MM_FROUND_CUR_DIRECTION) }
}
/// Convert the unsigned 64-bit integer b to a half-precision (16-bit) floating-point element, store the
@ -69,9 +71,11 @@ pub unsafe fn _mm_cvtu64_sh(a: __m128h, b: u64) -> __m128h {
#[cfg_attr(test, assert_instr(vcvtusi2sh, ROUNDING = 8))]
#[rustc_legacy_const_generics(2)]
#[unstable(feature = "stdarch_x86_avx512_f16", issue = "127213")]
pub unsafe fn _mm_cvt_roundu64_sh<const ROUNDING: i32>(a: __m128h, b: u64) -> __m128h {
static_assert_rounding!(ROUNDING);
vcvtusi642sh(a, b, ROUNDING)
pub fn _mm_cvt_roundu64_sh<const ROUNDING: i32>(a: __m128h, b: u64) -> __m128h {
unsafe {
static_assert_rounding!(ROUNDING);
vcvtusi642sh(a, b, ROUNDING)
}
}
/// Convert the lower half-precision (16-bit) floating-point element in a to a 64-bit integer, and store
@ -82,8 +86,8 @@ pub unsafe fn _mm_cvt_roundu64_sh<const ROUNDING: i32>(a: __m128h, b: u64) -> __
#[target_feature(enable = "avx512fp16")]
#[cfg_attr(test, assert_instr(vcvtsh2si))]
#[unstable(feature = "stdarch_x86_avx512_f16", issue = "127213")]
pub unsafe fn _mm_cvtsh_i64(a: __m128h) -> i64 {
vcvtsh2si64(a, _MM_FROUND_CUR_DIRECTION)
pub fn _mm_cvtsh_i64(a: __m128h) -> i64 {
unsafe { vcvtsh2si64(a, _MM_FROUND_CUR_DIRECTION) }
}
/// Convert the lower half-precision (16-bit) floating-point element in a to a 64-bit integer, and store
@ -103,9 +107,11 @@ pub unsafe fn _mm_cvtsh_i64(a: __m128h) -> i64 {
#[cfg_attr(test, assert_instr(vcvtsh2si, ROUNDING = 8))]
#[rustc_legacy_const_generics(1)]
#[unstable(feature = "stdarch_x86_avx512_f16", issue = "127213")]
pub unsafe fn _mm_cvt_roundsh_i64<const ROUNDING: i32>(a: __m128h) -> i64 {
static_assert_rounding!(ROUNDING);
vcvtsh2si64(a, ROUNDING)
pub fn _mm_cvt_roundsh_i64<const ROUNDING: i32>(a: __m128h) -> i64 {
unsafe {
static_assert_rounding!(ROUNDING);
vcvtsh2si64(a, ROUNDING)
}
}
/// Convert the lower half-precision (16-bit) floating-point element in a to a 64-bit unsigned integer, and store
@ -116,8 +122,8 @@ pub unsafe fn _mm_cvt_roundsh_i64<const ROUNDING: i32>(a: __m128h) -> i64 {
#[target_feature(enable = "avx512fp16")]
#[cfg_attr(test, assert_instr(vcvtsh2usi))]
#[unstable(feature = "stdarch_x86_avx512_f16", issue = "127213")]
pub unsafe fn _mm_cvtsh_u64(a: __m128h) -> u64 {
vcvtsh2usi64(a, _MM_FROUND_CUR_DIRECTION)
pub fn _mm_cvtsh_u64(a: __m128h) -> u64 {
unsafe { vcvtsh2usi64(a, _MM_FROUND_CUR_DIRECTION) }
}
/// Convert the lower half-precision (16-bit) floating-point element in a to a 64-bit unsigned integer, and store
@ -137,9 +143,11 @@ pub unsafe fn _mm_cvtsh_u64(a: __m128h) -> u64 {
#[cfg_attr(test, assert_instr(vcvtsh2usi, ROUNDING = 8))]
#[rustc_legacy_const_generics(1)]
#[unstable(feature = "stdarch_x86_avx512_f16", issue = "127213")]
pub unsafe fn _mm_cvt_roundsh_u64<const ROUNDING: i32>(a: __m128h) -> u64 {
static_assert_rounding!(ROUNDING);
vcvtsh2usi64(a, ROUNDING)
pub fn _mm_cvt_roundsh_u64<const ROUNDING: i32>(a: __m128h) -> u64 {
unsafe {
static_assert_rounding!(ROUNDING);
vcvtsh2usi64(a, ROUNDING)
}
}
/// Convert the lower half-precision (16-bit) floating-point element in a to a 64-bit integer with truncation,
@ -150,8 +158,8 @@ pub unsafe fn _mm_cvt_roundsh_u64<const ROUNDING: i32>(a: __m128h) -> u64 {
#[target_feature(enable = "avx512fp16")]
#[cfg_attr(test, assert_instr(vcvttsh2si))]
#[unstable(feature = "stdarch_x86_avx512_f16", issue = "127213")]
pub unsafe fn _mm_cvttsh_i64(a: __m128h) -> i64 {
vcvttsh2si64(a, _MM_FROUND_CUR_DIRECTION)
pub fn _mm_cvttsh_i64(a: __m128h) -> i64 {
unsafe { vcvttsh2si64(a, _MM_FROUND_CUR_DIRECTION) }
}
/// Convert the lower half-precision (16-bit) floating-point element in a to a 64-bit integer with truncation,
@ -165,9 +173,11 @@ pub unsafe fn _mm_cvttsh_i64(a: __m128h) -> i64 {
#[cfg_attr(test, assert_instr(vcvttsh2si, SAE = 8))]
#[rustc_legacy_const_generics(1)]
#[unstable(feature = "stdarch_x86_avx512_f16", issue = "127213")]
pub unsafe fn _mm_cvtt_roundsh_i64<const SAE: i32>(a: __m128h) -> i64 {
static_assert_sae!(SAE);
vcvttsh2si64(a, SAE)
pub fn _mm_cvtt_roundsh_i64<const SAE: i32>(a: __m128h) -> i64 {
unsafe {
static_assert_sae!(SAE);
vcvttsh2si64(a, SAE)
}
}
/// Convert the lower half-precision (16-bit) floating-point element in a to a 64-bit unsigned integer with truncation,
@ -178,8 +188,8 @@ pub unsafe fn _mm_cvtt_roundsh_i64<const SAE: i32>(a: __m128h) -> i64 {
#[target_feature(enable = "avx512fp16")]
#[cfg_attr(test, assert_instr(vcvttsh2usi))]
#[unstable(feature = "stdarch_x86_avx512_f16", issue = "127213")]
pub unsafe fn _mm_cvttsh_u64(a: __m128h) -> u64 {
vcvttsh2usi64(a, _MM_FROUND_CUR_DIRECTION)
pub fn _mm_cvttsh_u64(a: __m128h) -> u64 {
unsafe { vcvttsh2usi64(a, _MM_FROUND_CUR_DIRECTION) }
}
/// Convert the lower half-precision (16-bit) floating-point element in a to a 64-bit unsigned integer with truncation,
@ -193,9 +203,11 @@ pub unsafe fn _mm_cvttsh_u64(a: __m128h) -> u64 {
#[cfg_attr(test, assert_instr(vcvttsh2usi, SAE = 8))]
#[rustc_legacy_const_generics(1)]
#[unstable(feature = "stdarch_x86_avx512_f16", issue = "127213")]
pub unsafe fn _mm_cvtt_roundsh_u64<const SAE: i32>(a: __m128h) -> u64 {
static_assert_sae!(SAE);
vcvttsh2usi64(a, SAE)
pub fn _mm_cvtt_roundsh_u64<const SAE: i32>(a: __m128h) -> u64 {
unsafe {
static_assert_sae!(SAE);
vcvttsh2usi64(a, SAE)
}
}
#[allow(improper_ctypes)]