mark AVX512 & AVXNECONVERT intrinsics as safe
Mark all AVX512 & AVXNECONVERT SIMD-computing intrinsics as safe, except for those involving memory operations.
This commit is contained in:
parent
2348f153ae
commit
f53c07b3ff
16 changed files with 21793 additions and 18447 deletions
|
|
@ -37,8 +37,8 @@ unsafe extern "C" {
|
|||
#[target_feature(enable = "avx512bf16,avx512vl")]
|
||||
#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
|
||||
#[cfg_attr(test, assert_instr("vcvtne2ps2bf16"))]
|
||||
pub unsafe fn _mm_cvtne2ps_pbh(a: __m128, b: __m128) -> __m128bh {
|
||||
transmute(cvtne2ps2bf16(a.as_f32x4(), b.as_f32x4()))
|
||||
pub fn _mm_cvtne2ps_pbh(a: __m128, b: __m128) -> __m128bh {
|
||||
unsafe { transmute(cvtne2ps2bf16(a.as_f32x4(), b.as_f32x4())) }
|
||||
}
|
||||
|
||||
/// Convert packed single-precision (32-bit) floating-point elements in two vectors
|
||||
|
|
@ -50,9 +50,11 @@ pub unsafe fn _mm_cvtne2ps_pbh(a: __m128, b: __m128) -> __m128bh {
|
|||
#[target_feature(enable = "avx512bf16,avx512vl")]
|
||||
#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
|
||||
#[cfg_attr(test, assert_instr("vcvtne2ps2bf16"))]
|
||||
pub unsafe fn _mm_mask_cvtne2ps_pbh(src: __m128bh, k: __mmask8, a: __m128, b: __m128) -> __m128bh {
|
||||
let cvt = _mm_cvtne2ps_pbh(a, b).as_u16x8();
|
||||
transmute(simd_select_bitmask(k, cvt, src.as_u16x8()))
|
||||
pub fn _mm_mask_cvtne2ps_pbh(src: __m128bh, k: __mmask8, a: __m128, b: __m128) -> __m128bh {
|
||||
unsafe {
|
||||
let cvt = _mm_cvtne2ps_pbh(a, b).as_u16x8();
|
||||
transmute(simd_select_bitmask(k, cvt, src.as_u16x8()))
|
||||
}
|
||||
}
|
||||
|
||||
/// Convert packed single-precision (32-bit) floating-point elements in two vectors
|
||||
|
|
@ -64,9 +66,11 @@ pub unsafe fn _mm_mask_cvtne2ps_pbh(src: __m128bh, k: __mmask8, a: __m128, b: __
|
|||
#[target_feature(enable = "avx512bf16,avx512vl")]
|
||||
#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
|
||||
#[cfg_attr(test, assert_instr("vcvtne2ps2bf16"))]
|
||||
pub unsafe fn _mm_maskz_cvtne2ps_pbh(k: __mmask8, a: __m128, b: __m128) -> __m128bh {
|
||||
let cvt = _mm_cvtne2ps_pbh(a, b).as_u16x8();
|
||||
transmute(simd_select_bitmask(k, cvt, u16x8::ZERO))
|
||||
pub fn _mm_maskz_cvtne2ps_pbh(k: __mmask8, a: __m128, b: __m128) -> __m128bh {
|
||||
unsafe {
|
||||
let cvt = _mm_cvtne2ps_pbh(a, b).as_u16x8();
|
||||
transmute(simd_select_bitmask(k, cvt, u16x8::ZERO))
|
||||
}
|
||||
}
|
||||
|
||||
/// Convert packed single-precision (32-bit) floating-point elements in two 256-bit vectors
|
||||
|
|
@ -77,8 +81,8 @@ pub unsafe fn _mm_maskz_cvtne2ps_pbh(k: __mmask8, a: __m128, b: __m128) -> __m12
|
|||
#[target_feature(enable = "avx512bf16,avx512vl")]
|
||||
#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
|
||||
#[cfg_attr(test, assert_instr("vcvtne2ps2bf16"))]
|
||||
pub unsafe fn _mm256_cvtne2ps_pbh(a: __m256, b: __m256) -> __m256bh {
|
||||
transmute(cvtne2ps2bf16_256(a.as_f32x8(), b.as_f32x8()))
|
||||
pub fn _mm256_cvtne2ps_pbh(a: __m256, b: __m256) -> __m256bh {
|
||||
unsafe { transmute(cvtne2ps2bf16_256(a.as_f32x8(), b.as_f32x8())) }
|
||||
}
|
||||
|
||||
/// Convert packed single-precision (32-bit) floating-point elements in two vectors a and b
|
||||
|
|
@ -89,14 +93,11 @@ pub unsafe fn _mm256_cvtne2ps_pbh(a: __m256, b: __m256) -> __m256bh {
|
|||
#[target_feature(enable = "avx512bf16,avx512vl")]
|
||||
#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
|
||||
#[cfg_attr(test, assert_instr("vcvtne2ps2bf16"))]
|
||||
pub unsafe fn _mm256_mask_cvtne2ps_pbh(
|
||||
src: __m256bh,
|
||||
k: __mmask16,
|
||||
a: __m256,
|
||||
b: __m256,
|
||||
) -> __m256bh {
|
||||
let cvt = _mm256_cvtne2ps_pbh(a, b).as_u16x16();
|
||||
transmute(simd_select_bitmask(k, cvt, src.as_u16x16()))
|
||||
pub fn _mm256_mask_cvtne2ps_pbh(src: __m256bh, k: __mmask16, a: __m256, b: __m256) -> __m256bh {
|
||||
unsafe {
|
||||
let cvt = _mm256_cvtne2ps_pbh(a, b).as_u16x16();
|
||||
transmute(simd_select_bitmask(k, cvt, src.as_u16x16()))
|
||||
}
|
||||
}
|
||||
|
||||
/// Convert packed single-precision (32-bit) floating-point elements in two vectors a and b
|
||||
|
|
@ -107,9 +108,11 @@ pub unsafe fn _mm256_mask_cvtne2ps_pbh(
|
|||
#[target_feature(enable = "avx512bf16,avx512vl")]
|
||||
#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
|
||||
#[cfg_attr(test, assert_instr("vcvtne2ps2bf16"))]
|
||||
pub unsafe fn _mm256_maskz_cvtne2ps_pbh(k: __mmask16, a: __m256, b: __m256) -> __m256bh {
|
||||
let cvt = _mm256_cvtne2ps_pbh(a, b).as_u16x16();
|
||||
transmute(simd_select_bitmask(k, cvt, u16x16::ZERO))
|
||||
pub fn _mm256_maskz_cvtne2ps_pbh(k: __mmask16, a: __m256, b: __m256) -> __m256bh {
|
||||
unsafe {
|
||||
let cvt = _mm256_cvtne2ps_pbh(a, b).as_u16x16();
|
||||
transmute(simd_select_bitmask(k, cvt, u16x16::ZERO))
|
||||
}
|
||||
}
|
||||
|
||||
/// Convert packed single-precision (32-bit) floating-point elements in two 512-bit vectors
|
||||
|
|
@ -120,8 +123,8 @@ pub unsafe fn _mm256_maskz_cvtne2ps_pbh(k: __mmask16, a: __m256, b: __m256) -> _
|
|||
#[target_feature(enable = "avx512bf16,avx512f")]
|
||||
#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
|
||||
#[cfg_attr(test, assert_instr("vcvtne2ps2bf16"))]
|
||||
pub unsafe fn _mm512_cvtne2ps_pbh(a: __m512, b: __m512) -> __m512bh {
|
||||
transmute(cvtne2ps2bf16_512(a.as_f32x16(), b.as_f32x16()))
|
||||
pub fn _mm512_cvtne2ps_pbh(a: __m512, b: __m512) -> __m512bh {
|
||||
unsafe { transmute(cvtne2ps2bf16_512(a.as_f32x16(), b.as_f32x16())) }
|
||||
}
|
||||
|
||||
/// Convert packed single-precision (32-bit) floating-point elements in two vectors
|
||||
|
|
@ -133,14 +136,11 @@ pub unsafe fn _mm512_cvtne2ps_pbh(a: __m512, b: __m512) -> __m512bh {
|
|||
#[target_feature(enable = "avx512bf16,avx512f")]
|
||||
#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
|
||||
#[cfg_attr(test, assert_instr("vcvtne2ps2bf16"))]
|
||||
pub unsafe fn _mm512_mask_cvtne2ps_pbh(
|
||||
src: __m512bh,
|
||||
k: __mmask32,
|
||||
a: __m512,
|
||||
b: __m512,
|
||||
) -> __m512bh {
|
||||
let cvt = _mm512_cvtne2ps_pbh(a, b).as_u16x32();
|
||||
transmute(simd_select_bitmask(k, cvt, src.as_u16x32()))
|
||||
pub fn _mm512_mask_cvtne2ps_pbh(src: __m512bh, k: __mmask32, a: __m512, b: __m512) -> __m512bh {
|
||||
unsafe {
|
||||
let cvt = _mm512_cvtne2ps_pbh(a, b).as_u16x32();
|
||||
transmute(simd_select_bitmask(k, cvt, src.as_u16x32()))
|
||||
}
|
||||
}
|
||||
|
||||
/// Convert packed single-precision (32-bit) floating-point elements in two vectors
|
||||
|
|
@ -152,9 +152,11 @@ pub unsafe fn _mm512_mask_cvtne2ps_pbh(
|
|||
#[target_feature(enable = "avx512bf16,avx512f")]
|
||||
#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
|
||||
#[cfg_attr(test, assert_instr("vcvtne2ps2bf16"))]
|
||||
pub unsafe fn _mm512_maskz_cvtne2ps_pbh(k: __mmask32, a: __m512, b: __m512) -> __m512bh {
|
||||
let cvt = _mm512_cvtne2ps_pbh(a, b).as_u16x32();
|
||||
transmute(simd_select_bitmask(k, cvt, u16x32::ZERO))
|
||||
pub fn _mm512_maskz_cvtne2ps_pbh(k: __mmask32, a: __m512, b: __m512) -> __m512bh {
|
||||
unsafe {
|
||||
let cvt = _mm512_cvtne2ps_pbh(a, b).as_u16x32();
|
||||
transmute(simd_select_bitmask(k, cvt, u16x32::ZERO))
|
||||
}
|
||||
}
|
||||
|
||||
/// Convert packed single-precision (32-bit) floating-point elements in a to packed BF16 (16-bit)
|
||||
|
|
@ -164,8 +166,8 @@ pub unsafe fn _mm512_maskz_cvtne2ps_pbh(k: __mmask32, a: __m512, b: __m512) -> _
|
|||
#[target_feature(enable = "avx512bf16,avx512vl")]
|
||||
#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
|
||||
#[cfg_attr(test, assert_instr("vcvtneps2bf16"))]
|
||||
pub unsafe fn _mm256_cvtneps_pbh(a: __m256) -> __m128bh {
|
||||
transmute(cvtneps2bf16_256(a.as_f32x8()))
|
||||
pub fn _mm256_cvtneps_pbh(a: __m256) -> __m128bh {
|
||||
unsafe { transmute(cvtneps2bf16_256(a.as_f32x8())) }
|
||||
}
|
||||
|
||||
/// Convert packed single-precision (32-bit) floating-point elements in a to packed BF16 (16-bit)
|
||||
|
|
@ -176,9 +178,11 @@ pub unsafe fn _mm256_cvtneps_pbh(a: __m256) -> __m128bh {
|
|||
#[target_feature(enable = "avx512bf16,avx512vl")]
|
||||
#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
|
||||
#[cfg_attr(test, assert_instr("vcvtneps2bf16"))]
|
||||
pub unsafe fn _mm256_mask_cvtneps_pbh(src: __m128bh, k: __mmask8, a: __m256) -> __m128bh {
|
||||
let cvt = _mm256_cvtneps_pbh(a).as_u16x8();
|
||||
transmute(simd_select_bitmask(k, cvt, src.as_u16x8()))
|
||||
pub fn _mm256_mask_cvtneps_pbh(src: __m128bh, k: __mmask8, a: __m256) -> __m128bh {
|
||||
unsafe {
|
||||
let cvt = _mm256_cvtneps_pbh(a).as_u16x8();
|
||||
transmute(simd_select_bitmask(k, cvt, src.as_u16x8()))
|
||||
}
|
||||
}
|
||||
|
||||
/// Convert packed single-precision (32-bit) floating-point elements in a to packed BF16 (16-bit)
|
||||
|
|
@ -189,9 +193,11 @@ pub unsafe fn _mm256_mask_cvtneps_pbh(src: __m128bh, k: __mmask8, a: __m256) ->
|
|||
#[target_feature(enable = "avx512bf16,avx512vl")]
|
||||
#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
|
||||
#[cfg_attr(test, assert_instr("vcvtneps2bf16"))]
|
||||
pub unsafe fn _mm256_maskz_cvtneps_pbh(k: __mmask8, a: __m256) -> __m128bh {
|
||||
let cvt = _mm256_cvtneps_pbh(a).as_u16x8();
|
||||
transmute(simd_select_bitmask(k, cvt, u16x8::ZERO))
|
||||
pub fn _mm256_maskz_cvtneps_pbh(k: __mmask8, a: __m256) -> __m128bh {
|
||||
unsafe {
|
||||
let cvt = _mm256_cvtneps_pbh(a).as_u16x8();
|
||||
transmute(simd_select_bitmask(k, cvt, u16x8::ZERO))
|
||||
}
|
||||
}
|
||||
|
||||
/// Convert packed single-precision (32-bit) floating-point elements in a to packed BF16 (16-bit)
|
||||
|
|
@ -201,8 +207,8 @@ pub unsafe fn _mm256_maskz_cvtneps_pbh(k: __mmask8, a: __m256) -> __m128bh {
|
|||
#[target_feature(enable = "avx512bf16,avx512f")]
|
||||
#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
|
||||
#[cfg_attr(test, assert_instr("vcvtneps2bf16"))]
|
||||
pub unsafe fn _mm512_cvtneps_pbh(a: __m512) -> __m256bh {
|
||||
transmute(cvtneps2bf16_512(a.as_f32x16()))
|
||||
pub fn _mm512_cvtneps_pbh(a: __m512) -> __m256bh {
|
||||
unsafe { transmute(cvtneps2bf16_512(a.as_f32x16())) }
|
||||
}
|
||||
|
||||
/// Convert packed single-precision (32-bit) floating-point elements in a to packed BF16 (16-bit)
|
||||
|
|
@ -213,9 +219,11 @@ pub unsafe fn _mm512_cvtneps_pbh(a: __m512) -> __m256bh {
|
|||
#[target_feature(enable = "avx512bf16,avx512f")]
|
||||
#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
|
||||
#[cfg_attr(test, assert_instr("vcvtneps2bf16"))]
|
||||
pub unsafe fn _mm512_mask_cvtneps_pbh(src: __m256bh, k: __mmask16, a: __m512) -> __m256bh {
|
||||
let cvt = _mm512_cvtneps_pbh(a).as_u16x16();
|
||||
transmute(simd_select_bitmask(k, cvt, src.as_u16x16()))
|
||||
pub fn _mm512_mask_cvtneps_pbh(src: __m256bh, k: __mmask16, a: __m512) -> __m256bh {
|
||||
unsafe {
|
||||
let cvt = _mm512_cvtneps_pbh(a).as_u16x16();
|
||||
transmute(simd_select_bitmask(k, cvt, src.as_u16x16()))
|
||||
}
|
||||
}
|
||||
|
||||
/// Convert packed single-precision (32-bit) floating-point elements in a to packed BF16 (16-bit)
|
||||
|
|
@ -226,9 +234,11 @@ pub unsafe fn _mm512_mask_cvtneps_pbh(src: __m256bh, k: __mmask16, a: __m512) ->
|
|||
#[target_feature(enable = "avx512bf16,avx512f")]
|
||||
#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
|
||||
#[cfg_attr(test, assert_instr("vcvtneps2bf16"))]
|
||||
pub unsafe fn _mm512_maskz_cvtneps_pbh(k: __mmask16, a: __m512) -> __m256bh {
|
||||
let cvt = _mm512_cvtneps_pbh(a).as_u16x16();
|
||||
transmute(simd_select_bitmask(k, cvt, u16x16::ZERO))
|
||||
pub fn _mm512_maskz_cvtneps_pbh(k: __mmask16, a: __m512) -> __m256bh {
|
||||
unsafe {
|
||||
let cvt = _mm512_cvtneps_pbh(a).as_u16x16();
|
||||
transmute(simd_select_bitmask(k, cvt, u16x16::ZERO))
|
||||
}
|
||||
}
|
||||
|
||||
/// Compute dot-product of BF16 (16-bit) floating-point pairs in a and b,
|
||||
|
|
@ -239,8 +249,8 @@ pub unsafe fn _mm512_maskz_cvtneps_pbh(k: __mmask16, a: __m512) -> __m256bh {
|
|||
#[target_feature(enable = "avx512bf16,avx512vl")]
|
||||
#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
|
||||
#[cfg_attr(test, assert_instr("vdpbf16ps"))]
|
||||
pub unsafe fn _mm_dpbf16_ps(src: __m128, a: __m128bh, b: __m128bh) -> __m128 {
|
||||
transmute(dpbf16ps(src.as_f32x4(), a.as_i32x4(), b.as_i32x4()))
|
||||
pub fn _mm_dpbf16_ps(src: __m128, a: __m128bh, b: __m128bh) -> __m128 {
|
||||
unsafe { transmute(dpbf16ps(src.as_f32x4(), a.as_i32x4(), b.as_i32x4())) }
|
||||
}
|
||||
|
||||
/// Compute dot-product of BF16 (16-bit) floating-point pairs in a and b,
|
||||
|
|
@ -252,9 +262,11 @@ pub unsafe fn _mm_dpbf16_ps(src: __m128, a: __m128bh, b: __m128bh) -> __m128 {
|
|||
#[target_feature(enable = "avx512bf16,avx512vl")]
|
||||
#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
|
||||
#[cfg_attr(test, assert_instr("vdpbf16ps"))]
|
||||
pub unsafe fn _mm_mask_dpbf16_ps(src: __m128, k: __mmask8, a: __m128bh, b: __m128bh) -> __m128 {
|
||||
let rst = _mm_dpbf16_ps(src, a, b).as_f32x4();
|
||||
transmute(simd_select_bitmask(k, rst, src.as_f32x4()))
|
||||
pub fn _mm_mask_dpbf16_ps(src: __m128, k: __mmask8, a: __m128bh, b: __m128bh) -> __m128 {
|
||||
unsafe {
|
||||
let rst = _mm_dpbf16_ps(src, a, b).as_f32x4();
|
||||
transmute(simd_select_bitmask(k, rst, src.as_f32x4()))
|
||||
}
|
||||
}
|
||||
|
||||
/// Compute dot-product of BF16 (16-bit) floating-point pairs in a and b,
|
||||
|
|
@ -266,10 +278,12 @@ pub unsafe fn _mm_mask_dpbf16_ps(src: __m128, k: __mmask8, a: __m128bh, b: __m12
|
|||
#[target_feature(enable = "avx512bf16,avx512vl")]
|
||||
#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
|
||||
#[cfg_attr(test, assert_instr("vdpbf16ps"))]
|
||||
pub unsafe fn _mm_maskz_dpbf16_ps(k: __mmask8, src: __m128, a: __m128bh, b: __m128bh) -> __m128 {
|
||||
let rst = _mm_dpbf16_ps(src, a, b).as_f32x4();
|
||||
let zero = _mm_set1_ps(0.0_f32).as_f32x4();
|
||||
transmute(simd_select_bitmask(k, rst, zero))
|
||||
pub fn _mm_maskz_dpbf16_ps(k: __mmask8, src: __m128, a: __m128bh, b: __m128bh) -> __m128 {
|
||||
unsafe {
|
||||
let rst = _mm_dpbf16_ps(src, a, b).as_f32x4();
|
||||
let zero = _mm_set1_ps(0.0_f32).as_f32x4();
|
||||
transmute(simd_select_bitmask(k, rst, zero))
|
||||
}
|
||||
}
|
||||
|
||||
/// Compute dot-product of BF16 (16-bit) floating-point pairs in a and b,
|
||||
|
|
@ -280,8 +294,8 @@ pub unsafe fn _mm_maskz_dpbf16_ps(k: __mmask8, src: __m128, a: __m128bh, b: __m1
|
|||
#[target_feature(enable = "avx512bf16,avx512vl")]
|
||||
#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
|
||||
#[cfg_attr(test, assert_instr("vdpbf16ps"))]
|
||||
pub unsafe fn _mm256_dpbf16_ps(src: __m256, a: __m256bh, b: __m256bh) -> __m256 {
|
||||
transmute(dpbf16ps_256(src.as_f32x8(), a.as_i32x8(), b.as_i32x8()))
|
||||
pub fn _mm256_dpbf16_ps(src: __m256, a: __m256bh, b: __m256bh) -> __m256 {
|
||||
unsafe { transmute(dpbf16ps_256(src.as_f32x8(), a.as_i32x8(), b.as_i32x8())) }
|
||||
}
|
||||
|
||||
/// Compute dot-product of BF16 (16-bit) floating-point pairs in a and b,
|
||||
|
|
@ -293,9 +307,11 @@ pub unsafe fn _mm256_dpbf16_ps(src: __m256, a: __m256bh, b: __m256bh) -> __m256
|
|||
#[target_feature(enable = "avx512bf16,avx512vl")]
|
||||
#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
|
||||
#[cfg_attr(test, assert_instr("vdpbf16ps"))]
|
||||
pub unsafe fn _mm256_mask_dpbf16_ps(src: __m256, k: __mmask8, a: __m256bh, b: __m256bh) -> __m256 {
|
||||
let rst = _mm256_dpbf16_ps(src, a, b).as_f32x8();
|
||||
transmute(simd_select_bitmask(k, rst, src.as_f32x8()))
|
||||
pub fn _mm256_mask_dpbf16_ps(src: __m256, k: __mmask8, a: __m256bh, b: __m256bh) -> __m256 {
|
||||
unsafe {
|
||||
let rst = _mm256_dpbf16_ps(src, a, b).as_f32x8();
|
||||
transmute(simd_select_bitmask(k, rst, src.as_f32x8()))
|
||||
}
|
||||
}
|
||||
|
||||
/// Compute dot-product of BF16 (16-bit) floating-point pairs in a and b,
|
||||
|
|
@ -307,9 +323,11 @@ pub unsafe fn _mm256_mask_dpbf16_ps(src: __m256, k: __mmask8, a: __m256bh, b: __
|
|||
#[target_feature(enable = "avx512bf16,avx512vl")]
|
||||
#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
|
||||
#[cfg_attr(test, assert_instr("vdpbf16ps"))]
|
||||
pub unsafe fn _mm256_maskz_dpbf16_ps(k: __mmask8, src: __m256, a: __m256bh, b: __m256bh) -> __m256 {
|
||||
let rst = _mm256_dpbf16_ps(src, a, b).as_f32x8();
|
||||
transmute(simd_select_bitmask(k, rst, f32x8::ZERO))
|
||||
pub fn _mm256_maskz_dpbf16_ps(k: __mmask8, src: __m256, a: __m256bh, b: __m256bh) -> __m256 {
|
||||
unsafe {
|
||||
let rst = _mm256_dpbf16_ps(src, a, b).as_f32x8();
|
||||
transmute(simd_select_bitmask(k, rst, f32x8::ZERO))
|
||||
}
|
||||
}
|
||||
|
||||
/// Compute dot-product of BF16 (16-bit) floating-point pairs in a and b,
|
||||
|
|
@ -322,8 +340,8 @@ pub unsafe fn _mm256_maskz_dpbf16_ps(k: __mmask8, src: __m256, a: __m256bh, b: _
|
|||
#[target_feature(enable = "avx512bf16,avx512f")]
|
||||
#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
|
||||
#[cfg_attr(test, assert_instr("vdpbf16ps"))]
|
||||
pub unsafe fn _mm512_dpbf16_ps(src: __m512, a: __m512bh, b: __m512bh) -> __m512 {
|
||||
transmute(dpbf16ps_512(src.as_f32x16(), a.as_i32x16(), b.as_i32x16()))
|
||||
pub fn _mm512_dpbf16_ps(src: __m512, a: __m512bh, b: __m512bh) -> __m512 {
|
||||
unsafe { transmute(dpbf16ps_512(src.as_f32x16(), a.as_i32x16(), b.as_i32x16())) }
|
||||
}
|
||||
|
||||
/// Compute dot-product of BF16 (16-bit) floating-point pairs in a and b,
|
||||
|
|
@ -335,9 +353,11 @@ pub unsafe fn _mm512_dpbf16_ps(src: __m512, a: __m512bh, b: __m512bh) -> __m512
|
|||
#[target_feature(enable = "avx512bf16,avx512f")]
|
||||
#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
|
||||
#[cfg_attr(test, assert_instr("vdpbf16ps"))]
|
||||
pub unsafe fn _mm512_mask_dpbf16_ps(src: __m512, k: __mmask16, a: __m512bh, b: __m512bh) -> __m512 {
|
||||
let rst = _mm512_dpbf16_ps(src, a, b).as_f32x16();
|
||||
transmute(simd_select_bitmask(k, rst, src.as_f32x16()))
|
||||
pub fn _mm512_mask_dpbf16_ps(src: __m512, k: __mmask16, a: __m512bh, b: __m512bh) -> __m512 {
|
||||
unsafe {
|
||||
let rst = _mm512_dpbf16_ps(src, a, b).as_f32x16();
|
||||
transmute(simd_select_bitmask(k, rst, src.as_f32x16()))
|
||||
}
|
||||
}
|
||||
|
||||
/// Compute dot-product of BF16 (16-bit) floating-point pairs in a and b,
|
||||
|
|
@ -349,14 +369,11 @@ pub unsafe fn _mm512_mask_dpbf16_ps(src: __m512, k: __mmask16, a: __m512bh, b: _
|
|||
#[target_feature(enable = "avx512bf16,avx512f")]
|
||||
#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
|
||||
#[cfg_attr(test, assert_instr("vdpbf16ps"))]
|
||||
pub unsafe fn _mm512_maskz_dpbf16_ps(
|
||||
k: __mmask16,
|
||||
src: __m512,
|
||||
a: __m512bh,
|
||||
b: __m512bh,
|
||||
) -> __m512 {
|
||||
let rst = _mm512_dpbf16_ps(src, a, b).as_f32x16();
|
||||
transmute(simd_select_bitmask(k, rst, f32x16::ZERO))
|
||||
pub fn _mm512_maskz_dpbf16_ps(k: __mmask16, src: __m512, a: __m512bh, b: __m512bh) -> __m512 {
|
||||
unsafe {
|
||||
let rst = _mm512_dpbf16_ps(src, a, b).as_f32x16();
|
||||
transmute(simd_select_bitmask(k, rst, f32x16::ZERO))
|
||||
}
|
||||
}
|
||||
|
||||
/// Converts packed BF16 (16-bit) floating-point elements in a to packed single-precision (32-bit)
|
||||
|
|
@ -366,8 +383,8 @@ pub unsafe fn _mm512_maskz_dpbf16_ps(
|
|||
#[inline]
|
||||
#[target_feature(enable = "avx512bf16,avx512f")]
|
||||
#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
|
||||
pub unsafe fn _mm512_cvtpbh_ps(a: __m256bh) -> __m512 {
|
||||
_mm512_castsi512_ps(_mm512_slli_epi32::<16>(_mm512_cvtepi16_epi32(transmute(a))))
|
||||
pub fn _mm512_cvtpbh_ps(a: __m256bh) -> __m512 {
|
||||
unsafe { _mm512_castsi512_ps(_mm512_slli_epi32::<16>(_mm512_cvtepi16_epi32(transmute(a)))) }
|
||||
}
|
||||
|
||||
/// Converts packed BF16 (16-bit) floating-point elements in a to packed single-precision (32-bit)
|
||||
|
|
@ -378,9 +395,11 @@ pub unsafe fn _mm512_cvtpbh_ps(a: __m256bh) -> __m512 {
|
|||
#[inline]
|
||||
#[target_feature(enable = "avx512bf16,avx512f")]
|
||||
#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
|
||||
pub unsafe fn _mm512_mask_cvtpbh_ps(src: __m512, k: __mmask16, a: __m256bh) -> __m512 {
|
||||
let cvt = _mm512_cvtpbh_ps(a);
|
||||
transmute(simd_select_bitmask(k, cvt.as_f32x16(), src.as_f32x16()))
|
||||
pub fn _mm512_mask_cvtpbh_ps(src: __m512, k: __mmask16, a: __m256bh) -> __m512 {
|
||||
unsafe {
|
||||
let cvt = _mm512_cvtpbh_ps(a);
|
||||
transmute(simd_select_bitmask(k, cvt.as_f32x16(), src.as_f32x16()))
|
||||
}
|
||||
}
|
||||
|
||||
/// Converts packed BF16 (16-bit) floating-point elements in a to packed single-precision (32-bit)
|
||||
|
|
@ -391,9 +410,11 @@ pub unsafe fn _mm512_mask_cvtpbh_ps(src: __m512, k: __mmask16, a: __m256bh) -> _
|
|||
#[inline]
|
||||
#[target_feature(enable = "avx512bf16,avx512f")]
|
||||
#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
|
||||
pub unsafe fn _mm512_maskz_cvtpbh_ps(k: __mmask16, a: __m256bh) -> __m512 {
|
||||
let cvt = _mm512_cvtpbh_ps(a);
|
||||
transmute(simd_select_bitmask(k, cvt.as_f32x16(), f32x16::ZERO))
|
||||
pub fn _mm512_maskz_cvtpbh_ps(k: __mmask16, a: __m256bh) -> __m512 {
|
||||
unsafe {
|
||||
let cvt = _mm512_cvtpbh_ps(a);
|
||||
transmute(simd_select_bitmask(k, cvt.as_f32x16(), f32x16::ZERO))
|
||||
}
|
||||
}
|
||||
|
||||
/// Converts packed BF16 (16-bit) floating-point elements in a to packed single-precision (32-bit)
|
||||
|
|
@ -403,8 +424,8 @@ pub unsafe fn _mm512_maskz_cvtpbh_ps(k: __mmask16, a: __m256bh) -> __m512 {
|
|||
#[inline]
|
||||
#[target_feature(enable = "avx512bf16,avx512vl")]
|
||||
#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
|
||||
pub unsafe fn _mm256_cvtpbh_ps(a: __m128bh) -> __m256 {
|
||||
_mm256_castsi256_ps(_mm256_slli_epi32::<16>(_mm256_cvtepi16_epi32(transmute(a))))
|
||||
pub fn _mm256_cvtpbh_ps(a: __m128bh) -> __m256 {
|
||||
unsafe { _mm256_castsi256_ps(_mm256_slli_epi32::<16>(_mm256_cvtepi16_epi32(transmute(a)))) }
|
||||
}
|
||||
|
||||
/// Converts packed BF16 (16-bit) floating-point elements in a to packed single-precision (32-bit)
|
||||
|
|
@ -415,9 +436,11 @@ pub unsafe fn _mm256_cvtpbh_ps(a: __m128bh) -> __m256 {
|
|||
#[inline]
|
||||
#[target_feature(enable = "avx512bf16,avx512vl")]
|
||||
#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
|
||||
pub unsafe fn _mm256_mask_cvtpbh_ps(src: __m256, k: __mmask8, a: __m128bh) -> __m256 {
|
||||
let cvt = _mm256_cvtpbh_ps(a);
|
||||
transmute(simd_select_bitmask(k, cvt.as_f32x8(), src.as_f32x8()))
|
||||
pub fn _mm256_mask_cvtpbh_ps(src: __m256, k: __mmask8, a: __m128bh) -> __m256 {
|
||||
unsafe {
|
||||
let cvt = _mm256_cvtpbh_ps(a);
|
||||
transmute(simd_select_bitmask(k, cvt.as_f32x8(), src.as_f32x8()))
|
||||
}
|
||||
}
|
||||
|
||||
/// Converts packed BF16 (16-bit) floating-point elements in a to packed single-precision (32-bit)
|
||||
|
|
@ -428,9 +451,11 @@ pub unsafe fn _mm256_mask_cvtpbh_ps(src: __m256, k: __mmask8, a: __m128bh) -> __
|
|||
#[inline]
|
||||
#[target_feature(enable = "avx512bf16,avx512vl")]
|
||||
#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
|
||||
pub unsafe fn _mm256_maskz_cvtpbh_ps(k: __mmask8, a: __m128bh) -> __m256 {
|
||||
let cvt = _mm256_cvtpbh_ps(a);
|
||||
transmute(simd_select_bitmask(k, cvt.as_f32x8(), f32x8::ZERO))
|
||||
pub fn _mm256_maskz_cvtpbh_ps(k: __mmask8, a: __m128bh) -> __m256 {
|
||||
unsafe {
|
||||
let cvt = _mm256_cvtpbh_ps(a);
|
||||
transmute(simd_select_bitmask(k, cvt.as_f32x8(), f32x8::ZERO))
|
||||
}
|
||||
}
|
||||
|
||||
/// Converts packed BF16 (16-bit) floating-point elements in a to single-precision (32-bit) floating-point
|
||||
|
|
@ -440,8 +465,8 @@ pub unsafe fn _mm256_maskz_cvtpbh_ps(k: __mmask8, a: __m128bh) -> __m256 {
|
|||
#[inline]
|
||||
#[target_feature(enable = "avx512bf16,avx512vl")]
|
||||
#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
|
||||
pub unsafe fn _mm_cvtpbh_ps(a: __m128bh) -> __m128 {
|
||||
_mm_castsi128_ps(_mm_slli_epi32::<16>(_mm_cvtepi16_epi32(transmute(a))))
|
||||
pub fn _mm_cvtpbh_ps(a: __m128bh) -> __m128 {
|
||||
unsafe { _mm_castsi128_ps(_mm_slli_epi32::<16>(_mm_cvtepi16_epi32(transmute(a)))) }
|
||||
}
|
||||
|
||||
/// Converts packed BF16 (16-bit) floating-point elements in a to single-precision (32-bit) floating-point
|
||||
|
|
@ -452,9 +477,11 @@ pub unsafe fn _mm_cvtpbh_ps(a: __m128bh) -> __m128 {
|
|||
#[inline]
|
||||
#[target_feature(enable = "avx512bf16,avx512vl")]
|
||||
#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
|
||||
pub unsafe fn _mm_mask_cvtpbh_ps(src: __m128, k: __mmask8, a: __m128bh) -> __m128 {
|
||||
let cvt = _mm_cvtpbh_ps(a);
|
||||
transmute(simd_select_bitmask(k, cvt.as_f32x4(), src.as_f32x4()))
|
||||
pub fn _mm_mask_cvtpbh_ps(src: __m128, k: __mmask8, a: __m128bh) -> __m128 {
|
||||
unsafe {
|
||||
let cvt = _mm_cvtpbh_ps(a);
|
||||
transmute(simd_select_bitmask(k, cvt.as_f32x4(), src.as_f32x4()))
|
||||
}
|
||||
}
|
||||
|
||||
/// Converts packed BF16 (16-bit) floating-point elements in a to single-precision (32-bit) floating-point
|
||||
|
|
@ -465,9 +492,11 @@ pub unsafe fn _mm_mask_cvtpbh_ps(src: __m128, k: __mmask8, a: __m128bh) -> __m12
|
|||
#[inline]
|
||||
#[target_feature(enable = "avx512bf16,avx512vl")]
|
||||
#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
|
||||
pub unsafe fn _mm_maskz_cvtpbh_ps(k: __mmask8, a: __m128bh) -> __m128 {
|
||||
let cvt = _mm_cvtpbh_ps(a);
|
||||
transmute(simd_select_bitmask(k, cvt.as_f32x4(), f32x4::ZERO))
|
||||
pub fn _mm_maskz_cvtpbh_ps(k: __mmask8, a: __m128bh) -> __m128 {
|
||||
unsafe {
|
||||
let cvt = _mm_cvtpbh_ps(a);
|
||||
transmute(simd_select_bitmask(k, cvt.as_f32x4(), f32x4::ZERO))
|
||||
}
|
||||
}
|
||||
|
||||
/// Converts a single BF16 (16-bit) floating-point element in a to a single-precision (32-bit) floating-point
|
||||
|
|
@ -477,7 +506,7 @@ pub unsafe fn _mm_maskz_cvtpbh_ps(k: __mmask8, a: __m128bh) -> __m128 {
|
|||
#[inline]
|
||||
#[target_feature(enable = "avx512bf16,avx512f")]
|
||||
#[unstable(feature = "stdarch_x86_avx512_bf16", issue = "127356")]
|
||||
pub unsafe fn _mm_cvtsbh_ss(a: bf16) -> f32 {
|
||||
pub fn _mm_cvtsbh_ss(a: bf16) -> f32 {
|
||||
f32::from_bits((a.to_bits() as u32) << 16)
|
||||
}
|
||||
|
||||
|
|
@ -489,15 +518,17 @@ pub unsafe fn _mm_cvtsbh_ss(a: bf16) -> f32 {
|
|||
#[target_feature(enable = "avx512bf16,avx512vl")]
|
||||
#[cfg_attr(test, assert_instr("vcvtneps2bf16"))]
|
||||
#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
|
||||
pub unsafe fn _mm_cvtneps_pbh(a: __m128) -> __m128bh {
|
||||
let mut dst: __m128bh;
|
||||
asm!(
|
||||
"vcvtneps2bf16 {dst}, {src}",
|
||||
dst = lateout(xmm_reg) dst,
|
||||
src = in(xmm_reg) a,
|
||||
options(pure, nomem, nostack, preserves_flags)
|
||||
);
|
||||
dst
|
||||
pub fn _mm_cvtneps_pbh(a: __m128) -> __m128bh {
|
||||
unsafe {
|
||||
let mut dst: __m128bh;
|
||||
asm!(
|
||||
"vcvtneps2bf16 {dst}, {src}",
|
||||
dst = lateout(xmm_reg) dst,
|
||||
src = in(xmm_reg) a,
|
||||
options(pure, nomem, nostack, preserves_flags)
|
||||
);
|
||||
dst
|
||||
}
|
||||
}
|
||||
|
||||
/// Converts packed single-precision (32-bit) floating-point elements in a to packed BF16 (16-bit)
|
||||
|
|
@ -509,16 +540,18 @@ pub unsafe fn _mm_cvtneps_pbh(a: __m128) -> __m128bh {
|
|||
#[target_feature(enable = "avx512bf16,avx512vl")]
|
||||
#[cfg_attr(test, assert_instr("vcvtneps2bf16"))]
|
||||
#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
|
||||
pub unsafe fn _mm_mask_cvtneps_pbh(src: __m128bh, k: __mmask8, a: __m128) -> __m128bh {
|
||||
let mut dst = src;
|
||||
asm!(
|
||||
"vcvtneps2bf16 {dst}{{{k}}},{src}",
|
||||
dst = inlateout(xmm_reg) dst,
|
||||
src = in(xmm_reg) a,
|
||||
k = in(kreg) k,
|
||||
options(pure, nomem, nostack, preserves_flags)
|
||||
);
|
||||
dst
|
||||
pub fn _mm_mask_cvtneps_pbh(src: __m128bh, k: __mmask8, a: __m128) -> __m128bh {
|
||||
unsafe {
|
||||
let mut dst = src;
|
||||
asm!(
|
||||
"vcvtneps2bf16 {dst}{{{k}}},{src}",
|
||||
dst = inlateout(xmm_reg) dst,
|
||||
src = in(xmm_reg) a,
|
||||
k = in(kreg) k,
|
||||
options(pure, nomem, nostack, preserves_flags)
|
||||
);
|
||||
dst
|
||||
}
|
||||
}
|
||||
|
||||
/// Converts packed single-precision (32-bit) floating-point elements in a to packed BF16 (16-bit)
|
||||
|
|
@ -530,16 +563,18 @@ pub unsafe fn _mm_mask_cvtneps_pbh(src: __m128bh, k: __mmask8, a: __m128) -> __m
|
|||
#[target_feature(enable = "avx512bf16,avx512vl")]
|
||||
#[cfg_attr(test, assert_instr("vcvtneps2bf16"))]
|
||||
#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
|
||||
pub unsafe fn _mm_maskz_cvtneps_pbh(k: __mmask8, a: __m128) -> __m128bh {
|
||||
let mut dst: __m128bh;
|
||||
asm!(
|
||||
"vcvtneps2bf16 {dst}{{{k}}}{{z}},{src}",
|
||||
dst = lateout(xmm_reg) dst,
|
||||
src = in(xmm_reg) a,
|
||||
k = in(kreg) k,
|
||||
options(pure, nomem, nostack, preserves_flags)
|
||||
);
|
||||
dst
|
||||
pub fn _mm_maskz_cvtneps_pbh(k: __mmask8, a: __m128) -> __m128bh {
|
||||
unsafe {
|
||||
let mut dst: __m128bh;
|
||||
asm!(
|
||||
"vcvtneps2bf16 {dst}{{{k}}}{{z}},{src}",
|
||||
dst = lateout(xmm_reg) dst,
|
||||
src = in(xmm_reg) a,
|
||||
k = in(kreg) k,
|
||||
options(pure, nomem, nostack, preserves_flags)
|
||||
);
|
||||
dst
|
||||
}
|
||||
}
|
||||
|
||||
/// Converts a single-precision (32-bit) floating-point element in a to a BF16 (16-bit) floating-point
|
||||
|
|
@ -549,9 +584,11 @@ pub unsafe fn _mm_maskz_cvtneps_pbh(k: __mmask8, a: __m128) -> __m128bh {
|
|||
#[inline]
|
||||
#[target_feature(enable = "avx512bf16,avx512vl")]
|
||||
#[unstable(feature = "stdarch_x86_avx512_bf16", issue = "127356")]
|
||||
pub unsafe fn _mm_cvtness_sbh(a: f32) -> bf16 {
|
||||
let value: u16 = simd_extract!(_mm_cvtneps_pbh(_mm_set_ss(a)), 0);
|
||||
bf16::from_bits(value)
|
||||
pub fn _mm_cvtness_sbh(a: f32) -> bf16 {
|
||||
unsafe {
|
||||
let value: u16 = simd_extract!(_mm_cvtneps_pbh(_mm_set_ss(a)), 0);
|
||||
bf16::from_bits(value)
|
||||
}
|
||||
}
|
||||
|
||||
#[cfg(test)]
|
||||
|
|
|
|||
|
|
@ -43,8 +43,8 @@ unsafe extern "C" {
|
|||
#[target_feature(enable = "avx512bitalg")]
|
||||
#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
|
||||
#[cfg_attr(test, assert_instr(vpopcntw))]
|
||||
pub unsafe fn _mm512_popcnt_epi16(a: __m512i) -> __m512i {
|
||||
transmute(simd_ctpop(a.as_i16x32()))
|
||||
pub fn _mm512_popcnt_epi16(a: __m512i) -> __m512i {
|
||||
unsafe { transmute(simd_ctpop(a.as_i16x32())) }
|
||||
}
|
||||
|
||||
/// For each packed 16-bit integer maps the value to the number of logical 1 bits.
|
||||
|
|
@ -57,12 +57,14 @@ pub unsafe fn _mm512_popcnt_epi16(a: __m512i) -> __m512i {
|
|||
#[target_feature(enable = "avx512bitalg")]
|
||||
#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
|
||||
#[cfg_attr(test, assert_instr(vpopcntw))]
|
||||
pub unsafe fn _mm512_maskz_popcnt_epi16(k: __mmask32, a: __m512i) -> __m512i {
|
||||
transmute(simd_select_bitmask(
|
||||
k,
|
||||
simd_ctpop(a.as_i16x32()),
|
||||
i16x32::ZERO,
|
||||
))
|
||||
pub fn _mm512_maskz_popcnt_epi16(k: __mmask32, a: __m512i) -> __m512i {
|
||||
unsafe {
|
||||
transmute(simd_select_bitmask(
|
||||
k,
|
||||
simd_ctpop(a.as_i16x32()),
|
||||
i16x32::ZERO,
|
||||
))
|
||||
}
|
||||
}
|
||||
|
||||
/// For each packed 16-bit integer maps the value to the number of logical 1 bits.
|
||||
|
|
@ -75,12 +77,14 @@ pub unsafe fn _mm512_maskz_popcnt_epi16(k: __mmask32, a: __m512i) -> __m512i {
|
|||
#[target_feature(enable = "avx512bitalg")]
|
||||
#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
|
||||
#[cfg_attr(test, assert_instr(vpopcntw))]
|
||||
pub unsafe fn _mm512_mask_popcnt_epi16(src: __m512i, k: __mmask32, a: __m512i) -> __m512i {
|
||||
transmute(simd_select_bitmask(
|
||||
k,
|
||||
simd_ctpop(a.as_i16x32()),
|
||||
src.as_i16x32(),
|
||||
))
|
||||
pub fn _mm512_mask_popcnt_epi16(src: __m512i, k: __mmask32, a: __m512i) -> __m512i {
|
||||
unsafe {
|
||||
transmute(simd_select_bitmask(
|
||||
k,
|
||||
simd_ctpop(a.as_i16x32()),
|
||||
src.as_i16x32(),
|
||||
))
|
||||
}
|
||||
}
|
||||
|
||||
/// For each packed 16-bit integer maps the value to the number of logical 1 bits.
|
||||
|
|
@ -90,8 +94,8 @@ pub unsafe fn _mm512_mask_popcnt_epi16(src: __m512i, k: __mmask32, a: __m512i) -
|
|||
#[target_feature(enable = "avx512bitalg,avx512vl")]
|
||||
#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
|
||||
#[cfg_attr(test, assert_instr(vpopcntw))]
|
||||
pub unsafe fn _mm256_popcnt_epi16(a: __m256i) -> __m256i {
|
||||
transmute(simd_ctpop(a.as_i16x16()))
|
||||
pub fn _mm256_popcnt_epi16(a: __m256i) -> __m256i {
|
||||
unsafe { transmute(simd_ctpop(a.as_i16x16())) }
|
||||
}
|
||||
|
||||
/// For each packed 16-bit integer maps the value to the number of logical 1 bits.
|
||||
|
|
@ -104,12 +108,14 @@ pub unsafe fn _mm256_popcnt_epi16(a: __m256i) -> __m256i {
|
|||
#[target_feature(enable = "avx512bitalg,avx512vl")]
|
||||
#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
|
||||
#[cfg_attr(test, assert_instr(vpopcntw))]
|
||||
pub unsafe fn _mm256_maskz_popcnt_epi16(k: __mmask16, a: __m256i) -> __m256i {
|
||||
transmute(simd_select_bitmask(
|
||||
k,
|
||||
simd_ctpop(a.as_i16x16()),
|
||||
i16x16::ZERO,
|
||||
))
|
||||
pub fn _mm256_maskz_popcnt_epi16(k: __mmask16, a: __m256i) -> __m256i {
|
||||
unsafe {
|
||||
transmute(simd_select_bitmask(
|
||||
k,
|
||||
simd_ctpop(a.as_i16x16()),
|
||||
i16x16::ZERO,
|
||||
))
|
||||
}
|
||||
}
|
||||
|
||||
/// For each packed 16-bit integer maps the value to the number of logical 1 bits.
|
||||
|
|
@ -122,12 +128,14 @@ pub unsafe fn _mm256_maskz_popcnt_epi16(k: __mmask16, a: __m256i) -> __m256i {
|
|||
#[target_feature(enable = "avx512bitalg,avx512vl")]
|
||||
#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
|
||||
#[cfg_attr(test, assert_instr(vpopcntw))]
|
||||
pub unsafe fn _mm256_mask_popcnt_epi16(src: __m256i, k: __mmask16, a: __m256i) -> __m256i {
|
||||
transmute(simd_select_bitmask(
|
||||
k,
|
||||
simd_ctpop(a.as_i16x16()),
|
||||
src.as_i16x16(),
|
||||
))
|
||||
pub fn _mm256_mask_popcnt_epi16(src: __m256i, k: __mmask16, a: __m256i) -> __m256i {
|
||||
unsafe {
|
||||
transmute(simd_select_bitmask(
|
||||
k,
|
||||
simd_ctpop(a.as_i16x16()),
|
||||
src.as_i16x16(),
|
||||
))
|
||||
}
|
||||
}
|
||||
|
||||
/// For each packed 16-bit integer maps the value to the number of logical 1 bits.
|
||||
|
|
@ -137,8 +145,8 @@ pub unsafe fn _mm256_mask_popcnt_epi16(src: __m256i, k: __mmask16, a: __m256i) -
|
|||
#[target_feature(enable = "avx512bitalg,avx512vl")]
|
||||
#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
|
||||
#[cfg_attr(test, assert_instr(vpopcntw))]
|
||||
pub unsafe fn _mm_popcnt_epi16(a: __m128i) -> __m128i {
|
||||
transmute(simd_ctpop(a.as_i16x8()))
|
||||
pub fn _mm_popcnt_epi16(a: __m128i) -> __m128i {
|
||||
unsafe { transmute(simd_ctpop(a.as_i16x8())) }
|
||||
}
|
||||
|
||||
/// For each packed 16-bit integer maps the value to the number of logical 1 bits.
|
||||
|
|
@ -151,12 +159,14 @@ pub unsafe fn _mm_popcnt_epi16(a: __m128i) -> __m128i {
|
|||
#[target_feature(enable = "avx512bitalg,avx512vl")]
|
||||
#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
|
||||
#[cfg_attr(test, assert_instr(vpopcntw))]
|
||||
pub unsafe fn _mm_maskz_popcnt_epi16(k: __mmask8, a: __m128i) -> __m128i {
|
||||
transmute(simd_select_bitmask(
|
||||
k,
|
||||
simd_ctpop(a.as_i16x8()),
|
||||
i16x8::ZERO,
|
||||
))
|
||||
pub fn _mm_maskz_popcnt_epi16(k: __mmask8, a: __m128i) -> __m128i {
|
||||
unsafe {
|
||||
transmute(simd_select_bitmask(
|
||||
k,
|
||||
simd_ctpop(a.as_i16x8()),
|
||||
i16x8::ZERO,
|
||||
))
|
||||
}
|
||||
}
|
||||
|
||||
/// For each packed 16-bit integer maps the value to the number of logical 1 bits.
|
||||
|
|
@ -169,12 +179,14 @@ pub unsafe fn _mm_maskz_popcnt_epi16(k: __mmask8, a: __m128i) -> __m128i {
|
|||
#[target_feature(enable = "avx512bitalg,avx512vl")]
|
||||
#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
|
||||
#[cfg_attr(test, assert_instr(vpopcntw))]
|
||||
pub unsafe fn _mm_mask_popcnt_epi16(src: __m128i, k: __mmask8, a: __m128i) -> __m128i {
|
||||
transmute(simd_select_bitmask(
|
||||
k,
|
||||
simd_ctpop(a.as_i16x8()),
|
||||
src.as_i16x8(),
|
||||
))
|
||||
pub fn _mm_mask_popcnt_epi16(src: __m128i, k: __mmask8, a: __m128i) -> __m128i {
|
||||
unsafe {
|
||||
transmute(simd_select_bitmask(
|
||||
k,
|
||||
simd_ctpop(a.as_i16x8()),
|
||||
src.as_i16x8(),
|
||||
))
|
||||
}
|
||||
}
|
||||
|
||||
/// For each packed 8-bit integer maps the value to the number of logical 1 bits.
|
||||
|
|
@ -184,8 +196,8 @@ pub unsafe fn _mm_mask_popcnt_epi16(src: __m128i, k: __mmask8, a: __m128i) -> __
|
|||
#[target_feature(enable = "avx512bitalg")]
|
||||
#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
|
||||
#[cfg_attr(test, assert_instr(vpopcntb))]
|
||||
pub unsafe fn _mm512_popcnt_epi8(a: __m512i) -> __m512i {
|
||||
transmute(simd_ctpop(a.as_i8x64()))
|
||||
pub fn _mm512_popcnt_epi8(a: __m512i) -> __m512i {
|
||||
unsafe { transmute(simd_ctpop(a.as_i8x64())) }
|
||||
}
|
||||
|
||||
/// For each packed 8-bit integer maps the value to the number of logical 1 bits.
|
||||
|
|
@ -198,12 +210,14 @@ pub unsafe fn _mm512_popcnt_epi8(a: __m512i) -> __m512i {
|
|||
#[target_feature(enable = "avx512bitalg")]
|
||||
#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
|
||||
#[cfg_attr(test, assert_instr(vpopcntb))]
|
||||
pub unsafe fn _mm512_maskz_popcnt_epi8(k: __mmask64, a: __m512i) -> __m512i {
|
||||
transmute(simd_select_bitmask(
|
||||
k,
|
||||
simd_ctpop(a.as_i8x64()),
|
||||
i8x64::ZERO,
|
||||
))
|
||||
pub fn _mm512_maskz_popcnt_epi8(k: __mmask64, a: __m512i) -> __m512i {
|
||||
unsafe {
|
||||
transmute(simd_select_bitmask(
|
||||
k,
|
||||
simd_ctpop(a.as_i8x64()),
|
||||
i8x64::ZERO,
|
||||
))
|
||||
}
|
||||
}
|
||||
|
||||
/// For each packed 8-bit integer maps the value to the number of logical 1 bits.
|
||||
|
|
@ -216,12 +230,14 @@ pub unsafe fn _mm512_maskz_popcnt_epi8(k: __mmask64, a: __m512i) -> __m512i {
|
|||
#[target_feature(enable = "avx512bitalg")]
|
||||
#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
|
||||
#[cfg_attr(test, assert_instr(vpopcntb))]
|
||||
pub unsafe fn _mm512_mask_popcnt_epi8(src: __m512i, k: __mmask64, a: __m512i) -> __m512i {
|
||||
transmute(simd_select_bitmask(
|
||||
k,
|
||||
simd_ctpop(a.as_i8x64()),
|
||||
src.as_i8x64(),
|
||||
))
|
||||
pub fn _mm512_mask_popcnt_epi8(src: __m512i, k: __mmask64, a: __m512i) -> __m512i {
|
||||
unsafe {
|
||||
transmute(simd_select_bitmask(
|
||||
k,
|
||||
simd_ctpop(a.as_i8x64()),
|
||||
src.as_i8x64(),
|
||||
))
|
||||
}
|
||||
}
|
||||
|
||||
/// For each packed 8-bit integer maps the value to the number of logical 1 bits.
|
||||
|
|
@ -231,8 +247,8 @@ pub unsafe fn _mm512_mask_popcnt_epi8(src: __m512i, k: __mmask64, a: __m512i) ->
|
|||
#[target_feature(enable = "avx512bitalg,avx512vl")]
|
||||
#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
|
||||
#[cfg_attr(test, assert_instr(vpopcntb))]
|
||||
pub unsafe fn _mm256_popcnt_epi8(a: __m256i) -> __m256i {
|
||||
transmute(simd_ctpop(a.as_i8x32()))
|
||||
pub fn _mm256_popcnt_epi8(a: __m256i) -> __m256i {
|
||||
unsafe { transmute(simd_ctpop(a.as_i8x32())) }
|
||||
}
|
||||
|
||||
/// For each packed 8-bit integer maps the value to the number of logical 1 bits.
|
||||
|
|
@ -245,12 +261,14 @@ pub unsafe fn _mm256_popcnt_epi8(a: __m256i) -> __m256i {
|
|||
#[target_feature(enable = "avx512bitalg,avx512vl")]
|
||||
#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
|
||||
#[cfg_attr(test, assert_instr(vpopcntb))]
|
||||
pub unsafe fn _mm256_maskz_popcnt_epi8(k: __mmask32, a: __m256i) -> __m256i {
|
||||
transmute(simd_select_bitmask(
|
||||
k,
|
||||
simd_ctpop(a.as_i8x32()),
|
||||
i8x32::ZERO,
|
||||
))
|
||||
pub fn _mm256_maskz_popcnt_epi8(k: __mmask32, a: __m256i) -> __m256i {
|
||||
unsafe {
|
||||
transmute(simd_select_bitmask(
|
||||
k,
|
||||
simd_ctpop(a.as_i8x32()),
|
||||
i8x32::ZERO,
|
||||
))
|
||||
}
|
||||
}
|
||||
|
||||
/// For each packed 8-bit integer maps the value to the number of logical 1 bits.
|
||||
|
|
@ -263,12 +281,14 @@ pub unsafe fn _mm256_maskz_popcnt_epi8(k: __mmask32, a: __m256i) -> __m256i {
|
|||
#[target_feature(enable = "avx512bitalg,avx512vl")]
|
||||
#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
|
||||
#[cfg_attr(test, assert_instr(vpopcntb))]
|
||||
pub unsafe fn _mm256_mask_popcnt_epi8(src: __m256i, k: __mmask32, a: __m256i) -> __m256i {
|
||||
transmute(simd_select_bitmask(
|
||||
k,
|
||||
simd_ctpop(a.as_i8x32()),
|
||||
src.as_i8x32(),
|
||||
))
|
||||
pub fn _mm256_mask_popcnt_epi8(src: __m256i, k: __mmask32, a: __m256i) -> __m256i {
|
||||
unsafe {
|
||||
transmute(simd_select_bitmask(
|
||||
k,
|
||||
simd_ctpop(a.as_i8x32()),
|
||||
src.as_i8x32(),
|
||||
))
|
||||
}
|
||||
}
|
||||
|
||||
/// For each packed 8-bit integer maps the value to the number of logical 1 bits.
|
||||
|
|
@ -278,8 +298,8 @@ pub unsafe fn _mm256_mask_popcnt_epi8(src: __m256i, k: __mmask32, a: __m256i) ->
|
|||
#[target_feature(enable = "avx512bitalg,avx512vl")]
|
||||
#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
|
||||
#[cfg_attr(test, assert_instr(vpopcntb))]
|
||||
pub unsafe fn _mm_popcnt_epi8(a: __m128i) -> __m128i {
|
||||
transmute(simd_ctpop(a.as_i8x16()))
|
||||
pub fn _mm_popcnt_epi8(a: __m128i) -> __m128i {
|
||||
unsafe { transmute(simd_ctpop(a.as_i8x16())) }
|
||||
}
|
||||
|
||||
/// For each packed 8-bit integer maps the value to the number of logical 1 bits.
|
||||
|
|
@ -292,12 +312,14 @@ pub unsafe fn _mm_popcnt_epi8(a: __m128i) -> __m128i {
|
|||
#[target_feature(enable = "avx512bitalg,avx512vl")]
|
||||
#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
|
||||
#[cfg_attr(test, assert_instr(vpopcntb))]
|
||||
pub unsafe fn _mm_maskz_popcnt_epi8(k: __mmask16, a: __m128i) -> __m128i {
|
||||
transmute(simd_select_bitmask(
|
||||
k,
|
||||
simd_ctpop(a.as_i8x16()),
|
||||
i8x16::ZERO,
|
||||
))
|
||||
pub fn _mm_maskz_popcnt_epi8(k: __mmask16, a: __m128i) -> __m128i {
|
||||
unsafe {
|
||||
transmute(simd_select_bitmask(
|
||||
k,
|
||||
simd_ctpop(a.as_i8x16()),
|
||||
i8x16::ZERO,
|
||||
))
|
||||
}
|
||||
}
|
||||
|
||||
/// For each packed 8-bit integer maps the value to the number of logical 1 bits.
|
||||
|
|
@ -310,12 +332,14 @@ pub unsafe fn _mm_maskz_popcnt_epi8(k: __mmask16, a: __m128i) -> __m128i {
|
|||
#[target_feature(enable = "avx512bitalg,avx512vl")]
|
||||
#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
|
||||
#[cfg_attr(test, assert_instr(vpopcntb))]
|
||||
pub unsafe fn _mm_mask_popcnt_epi8(src: __m128i, k: __mmask16, a: __m128i) -> __m128i {
|
||||
transmute(simd_select_bitmask(
|
||||
k,
|
||||
simd_ctpop(a.as_i8x16()),
|
||||
src.as_i8x16(),
|
||||
))
|
||||
pub fn _mm_mask_popcnt_epi8(src: __m128i, k: __mmask16, a: __m128i) -> __m128i {
|
||||
unsafe {
|
||||
transmute(simd_select_bitmask(
|
||||
k,
|
||||
simd_ctpop(a.as_i8x16()),
|
||||
src.as_i8x16(),
|
||||
))
|
||||
}
|
||||
}
|
||||
|
||||
/// Considers the input `b` as packed 64-bit integers and `c` as packed 8-bit integers.
|
||||
|
|
@ -327,8 +351,8 @@ pub unsafe fn _mm_mask_popcnt_epi8(src: __m128i, k: __mmask16, a: __m128i) -> __
|
|||
#[target_feature(enable = "avx512bitalg")]
|
||||
#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
|
||||
#[cfg_attr(test, assert_instr(vpshufbitqmb))]
|
||||
pub unsafe fn _mm512_bitshuffle_epi64_mask(b: __m512i, c: __m512i) -> __mmask64 {
|
||||
bitshuffle_512(b.as_i8x64(), c.as_i8x64(), !0)
|
||||
pub fn _mm512_bitshuffle_epi64_mask(b: __m512i, c: __m512i) -> __mmask64 {
|
||||
unsafe { bitshuffle_512(b.as_i8x64(), c.as_i8x64(), !0) }
|
||||
}
|
||||
|
||||
/// Considers the input `b` as packed 64-bit integers and `c` as packed 8-bit integers.
|
||||
|
|
@ -343,8 +367,8 @@ pub unsafe fn _mm512_bitshuffle_epi64_mask(b: __m512i, c: __m512i) -> __mmask64
|
|||
#[target_feature(enable = "avx512bitalg")]
|
||||
#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
|
||||
#[cfg_attr(test, assert_instr(vpshufbitqmb))]
|
||||
pub unsafe fn _mm512_mask_bitshuffle_epi64_mask(k: __mmask64, b: __m512i, c: __m512i) -> __mmask64 {
|
||||
bitshuffle_512(b.as_i8x64(), c.as_i8x64(), k)
|
||||
pub fn _mm512_mask_bitshuffle_epi64_mask(k: __mmask64, b: __m512i, c: __m512i) -> __mmask64 {
|
||||
unsafe { bitshuffle_512(b.as_i8x64(), c.as_i8x64(), k) }
|
||||
}
|
||||
|
||||
/// Considers the input `b` as packed 64-bit integers and `c` as packed 8-bit integers.
|
||||
|
|
@ -356,8 +380,8 @@ pub unsafe fn _mm512_mask_bitshuffle_epi64_mask(k: __mmask64, b: __m512i, c: __m
|
|||
#[target_feature(enable = "avx512bitalg,avx512vl")]
|
||||
#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
|
||||
#[cfg_attr(test, assert_instr(vpshufbitqmb))]
|
||||
pub unsafe fn _mm256_bitshuffle_epi64_mask(b: __m256i, c: __m256i) -> __mmask32 {
|
||||
bitshuffle_256(b.as_i8x32(), c.as_i8x32(), !0)
|
||||
pub fn _mm256_bitshuffle_epi64_mask(b: __m256i, c: __m256i) -> __mmask32 {
|
||||
unsafe { bitshuffle_256(b.as_i8x32(), c.as_i8x32(), !0) }
|
||||
}
|
||||
|
||||
/// Considers the input `b` as packed 64-bit integers and `c` as packed 8-bit integers.
|
||||
|
|
@ -372,8 +396,8 @@ pub unsafe fn _mm256_bitshuffle_epi64_mask(b: __m256i, c: __m256i) -> __mmask32
|
|||
#[target_feature(enable = "avx512bitalg,avx512vl")]
|
||||
#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
|
||||
#[cfg_attr(test, assert_instr(vpshufbitqmb))]
|
||||
pub unsafe fn _mm256_mask_bitshuffle_epi64_mask(k: __mmask32, b: __m256i, c: __m256i) -> __mmask32 {
|
||||
bitshuffle_256(b.as_i8x32(), c.as_i8x32(), k)
|
||||
pub fn _mm256_mask_bitshuffle_epi64_mask(k: __mmask32, b: __m256i, c: __m256i) -> __mmask32 {
|
||||
unsafe { bitshuffle_256(b.as_i8x32(), c.as_i8x32(), k) }
|
||||
}
|
||||
|
||||
/// Considers the input `b` as packed 64-bit integers and `c` as packed 8-bit integers.
|
||||
|
|
@ -385,8 +409,8 @@ pub unsafe fn _mm256_mask_bitshuffle_epi64_mask(k: __mmask32, b: __m256i, c: __m
|
|||
#[target_feature(enable = "avx512bitalg,avx512vl")]
|
||||
#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
|
||||
#[cfg_attr(test, assert_instr(vpshufbitqmb))]
|
||||
pub unsafe fn _mm_bitshuffle_epi64_mask(b: __m128i, c: __m128i) -> __mmask16 {
|
||||
bitshuffle_128(b.as_i8x16(), c.as_i8x16(), !0)
|
||||
pub fn _mm_bitshuffle_epi64_mask(b: __m128i, c: __m128i) -> __mmask16 {
|
||||
unsafe { bitshuffle_128(b.as_i8x16(), c.as_i8x16(), !0) }
|
||||
}
|
||||
|
||||
/// Considers the input `b` as packed 64-bit integers and `c` as packed 8-bit integers.
|
||||
|
|
@ -401,8 +425,8 @@ pub unsafe fn _mm_bitshuffle_epi64_mask(b: __m128i, c: __m128i) -> __mmask16 {
|
|||
#[target_feature(enable = "avx512bitalg,avx512vl")]
|
||||
#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
|
||||
#[cfg_attr(test, assert_instr(vpshufbitqmb))]
|
||||
pub unsafe fn _mm_mask_bitshuffle_epi64_mask(k: __mmask16, b: __m128i, c: __m128i) -> __mmask16 {
|
||||
bitshuffle_128(b.as_i8x16(), c.as_i8x16(), k)
|
||||
pub fn _mm_mask_bitshuffle_epi64_mask(k: __mmask16, b: __m128i, c: __m128i) -> __mmask16 {
|
||||
unsafe { bitshuffle_128(b.as_i8x16(), c.as_i8x16(), k) }
|
||||
}
|
||||
|
||||
#[cfg(test)]
|
||||
|
|
|
|||
File diff suppressed because it is too large
Load diff
|
|
@ -11,7 +11,7 @@ use stdarch_test::assert_instr;
|
|||
#[target_feature(enable = "avx512cd")]
|
||||
#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
|
||||
#[cfg_attr(test, assert_instr(vpbroadcast))] // should be vpbroadcastmw2d
|
||||
pub unsafe fn _mm512_broadcastmw_epi32(k: __mmask16) -> __m512i {
|
||||
pub fn _mm512_broadcastmw_epi32(k: __mmask16) -> __m512i {
|
||||
_mm512_set1_epi32(k as i32)
|
||||
}
|
||||
|
||||
|
|
@ -22,7 +22,7 @@ pub unsafe fn _mm512_broadcastmw_epi32(k: __mmask16) -> __m512i {
|
|||
#[target_feature(enable = "avx512cd,avx512vl")]
|
||||
#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
|
||||
#[cfg_attr(test, assert_instr(vpbroadcast))] // should be vpbroadcastmw2d
|
||||
pub unsafe fn _mm256_broadcastmw_epi32(k: __mmask16) -> __m256i {
|
||||
pub fn _mm256_broadcastmw_epi32(k: __mmask16) -> __m256i {
|
||||
_mm256_set1_epi32(k as i32)
|
||||
}
|
||||
|
||||
|
|
@ -33,7 +33,7 @@ pub unsafe fn _mm256_broadcastmw_epi32(k: __mmask16) -> __m256i {
|
|||
#[target_feature(enable = "avx512cd,avx512vl")]
|
||||
#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
|
||||
#[cfg_attr(test, assert_instr(vpbroadcast))] // should be vpbroadcastmw2d
|
||||
pub unsafe fn _mm_broadcastmw_epi32(k: __mmask16) -> __m128i {
|
||||
pub fn _mm_broadcastmw_epi32(k: __mmask16) -> __m128i {
|
||||
_mm_set1_epi32(k as i32)
|
||||
}
|
||||
|
||||
|
|
@ -44,7 +44,7 @@ pub unsafe fn _mm_broadcastmw_epi32(k: __mmask16) -> __m128i {
|
|||
#[target_feature(enable = "avx512cd")]
|
||||
#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
|
||||
#[cfg_attr(test, assert_instr(vpbroadcast))] // should be vpbroadcastmb2q
|
||||
pub unsafe fn _mm512_broadcastmb_epi64(k: __mmask8) -> __m512i {
|
||||
pub fn _mm512_broadcastmb_epi64(k: __mmask8) -> __m512i {
|
||||
_mm512_set1_epi64(k as i64)
|
||||
}
|
||||
|
||||
|
|
@ -55,7 +55,7 @@ pub unsafe fn _mm512_broadcastmb_epi64(k: __mmask8) -> __m512i {
|
|||
#[target_feature(enable = "avx512cd,avx512vl")]
|
||||
#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
|
||||
#[cfg_attr(test, assert_instr(vpbroadcast))] // should be vpbroadcastmb2q
|
||||
pub unsafe fn _mm256_broadcastmb_epi64(k: __mmask8) -> __m256i {
|
||||
pub fn _mm256_broadcastmb_epi64(k: __mmask8) -> __m256i {
|
||||
_mm256_set1_epi64x(k as i64)
|
||||
}
|
||||
|
||||
|
|
@ -66,7 +66,7 @@ pub unsafe fn _mm256_broadcastmb_epi64(k: __mmask8) -> __m256i {
|
|||
#[target_feature(enable = "avx512cd,avx512vl")]
|
||||
#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
|
||||
#[cfg_attr(test, assert_instr(vpbroadcast))] // should be vpbroadcastmb2q
|
||||
pub unsafe fn _mm_broadcastmb_epi64(k: __mmask8) -> __m128i {
|
||||
pub fn _mm_broadcastmb_epi64(k: __mmask8) -> __m128i {
|
||||
_mm_set1_epi64x(k as i64)
|
||||
}
|
||||
|
||||
|
|
@ -77,8 +77,8 @@ pub unsafe fn _mm_broadcastmb_epi64(k: __mmask8) -> __m128i {
|
|||
#[target_feature(enable = "avx512cd")]
|
||||
#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
|
||||
#[cfg_attr(test, assert_instr(vpconflictd))]
|
||||
pub unsafe fn _mm512_conflict_epi32(a: __m512i) -> __m512i {
|
||||
transmute(vpconflictd(a.as_i32x16()))
|
||||
pub fn _mm512_conflict_epi32(a: __m512i) -> __m512i {
|
||||
unsafe { transmute(vpconflictd(a.as_i32x16())) }
|
||||
}
|
||||
|
||||
/// Test each 32-bit element of a for equality with all other elements in a closer to the least significant bit using writemask k (elements are copied from src when the corresponding mask bit is not set). Each element's comparison forms a zero extended bit vector in dst.
|
||||
|
|
@ -88,9 +88,11 @@ pub unsafe fn _mm512_conflict_epi32(a: __m512i) -> __m512i {
|
|||
#[target_feature(enable = "avx512cd")]
|
||||
#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
|
||||
#[cfg_attr(test, assert_instr(vpconflictd))]
|
||||
pub unsafe fn _mm512_mask_conflict_epi32(src: __m512i, k: __mmask16, a: __m512i) -> __m512i {
|
||||
let conflict = _mm512_conflict_epi32(a).as_i32x16();
|
||||
transmute(simd_select_bitmask(k, conflict, src.as_i32x16()))
|
||||
pub fn _mm512_mask_conflict_epi32(src: __m512i, k: __mmask16, a: __m512i) -> __m512i {
|
||||
unsafe {
|
||||
let conflict = _mm512_conflict_epi32(a).as_i32x16();
|
||||
transmute(simd_select_bitmask(k, conflict, src.as_i32x16()))
|
||||
}
|
||||
}
|
||||
|
||||
/// Test each 32-bit element of a for equality with all other elements in a closer to the least significant bit using zeromask k (elements are zeroed out when the corresponding mask bit is not set). Each element's comparison forms a zero extended bit vector in dst.
|
||||
|
|
@ -100,9 +102,11 @@ pub unsafe fn _mm512_mask_conflict_epi32(src: __m512i, k: __mmask16, a: __m512i)
|
|||
#[target_feature(enable = "avx512cd")]
|
||||
#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
|
||||
#[cfg_attr(test, assert_instr(vpconflictd))]
|
||||
pub unsafe fn _mm512_maskz_conflict_epi32(k: __mmask16, a: __m512i) -> __m512i {
|
||||
let conflict = _mm512_conflict_epi32(a).as_i32x16();
|
||||
transmute(simd_select_bitmask(k, conflict, i32x16::ZERO))
|
||||
pub fn _mm512_maskz_conflict_epi32(k: __mmask16, a: __m512i) -> __m512i {
|
||||
unsafe {
|
||||
let conflict = _mm512_conflict_epi32(a).as_i32x16();
|
||||
transmute(simd_select_bitmask(k, conflict, i32x16::ZERO))
|
||||
}
|
||||
}
|
||||
|
||||
/// Test each 32-bit element of a for equality with all other elements in a closer to the least significant bit. Each element's comparison forms a zero extended bit vector in dst.
|
||||
|
|
@ -112,8 +116,8 @@ pub unsafe fn _mm512_maskz_conflict_epi32(k: __mmask16, a: __m512i) -> __m512i {
|
|||
#[target_feature(enable = "avx512cd,avx512vl")]
|
||||
#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
|
||||
#[cfg_attr(test, assert_instr(vpconflictd))]
|
||||
pub unsafe fn _mm256_conflict_epi32(a: __m256i) -> __m256i {
|
||||
transmute(vpconflictd256(a.as_i32x8()))
|
||||
pub fn _mm256_conflict_epi32(a: __m256i) -> __m256i {
|
||||
unsafe { transmute(vpconflictd256(a.as_i32x8())) }
|
||||
}
|
||||
|
||||
/// Test each 32-bit element of a for equality with all other elements in a closer to the least significant bit using writemask k (elements are copied from src when the corresponding mask bit is not set). Each element's comparison forms a zero extended bit vector in dst.
|
||||
|
|
@ -123,9 +127,11 @@ pub unsafe fn _mm256_conflict_epi32(a: __m256i) -> __m256i {
|
|||
#[target_feature(enable = "avx512cd,avx512vl")]
|
||||
#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
|
||||
#[cfg_attr(test, assert_instr(vpconflictd))]
|
||||
pub unsafe fn _mm256_mask_conflict_epi32(src: __m256i, k: __mmask8, a: __m256i) -> __m256i {
|
||||
let conflict = _mm256_conflict_epi32(a).as_i32x8();
|
||||
transmute(simd_select_bitmask(k, conflict, src.as_i32x8()))
|
||||
pub fn _mm256_mask_conflict_epi32(src: __m256i, k: __mmask8, a: __m256i) -> __m256i {
|
||||
unsafe {
|
||||
let conflict = _mm256_conflict_epi32(a).as_i32x8();
|
||||
transmute(simd_select_bitmask(k, conflict, src.as_i32x8()))
|
||||
}
|
||||
}
|
||||
|
||||
/// Test each 32-bit element of a for equality with all other elements in a closer to the least significant bit using zeromask k (elements are zeroed out when the corresponding mask bit is not set). Each element's comparison forms a zero extended bit vector in dst.
|
||||
|
|
@ -135,9 +141,11 @@ pub unsafe fn _mm256_mask_conflict_epi32(src: __m256i, k: __mmask8, a: __m256i)
|
|||
#[target_feature(enable = "avx512cd,avx512vl")]
|
||||
#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
|
||||
#[cfg_attr(test, assert_instr(vpconflictd))]
|
||||
pub unsafe fn _mm256_maskz_conflict_epi32(k: __mmask8, a: __m256i) -> __m256i {
|
||||
let conflict = _mm256_conflict_epi32(a).as_i32x8();
|
||||
transmute(simd_select_bitmask(k, conflict, i32x8::ZERO))
|
||||
pub fn _mm256_maskz_conflict_epi32(k: __mmask8, a: __m256i) -> __m256i {
|
||||
unsafe {
|
||||
let conflict = _mm256_conflict_epi32(a).as_i32x8();
|
||||
transmute(simd_select_bitmask(k, conflict, i32x8::ZERO))
|
||||
}
|
||||
}
|
||||
|
||||
/// Test each 32-bit element of a for equality with all other elements in a closer to the least significant bit. Each element's comparison forms a zero extended bit vector in dst.
|
||||
|
|
@ -147,8 +155,8 @@ pub unsafe fn _mm256_maskz_conflict_epi32(k: __mmask8, a: __m256i) -> __m256i {
|
|||
#[target_feature(enable = "avx512cd,avx512vl")]
|
||||
#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
|
||||
#[cfg_attr(test, assert_instr(vpconflictd))]
|
||||
pub unsafe fn _mm_conflict_epi32(a: __m128i) -> __m128i {
|
||||
transmute(vpconflictd128(a.as_i32x4()))
|
||||
pub fn _mm_conflict_epi32(a: __m128i) -> __m128i {
|
||||
unsafe { transmute(vpconflictd128(a.as_i32x4())) }
|
||||
}
|
||||
|
||||
/// Test each 32-bit element of a for equality with all other elements in a closer to the least significant bit using writemask k (elements are copied from src when the corresponding mask bit is not set). Each element's comparison forms a zero extended bit vector in dst.
|
||||
|
|
@ -158,9 +166,11 @@ pub unsafe fn _mm_conflict_epi32(a: __m128i) -> __m128i {
|
|||
#[target_feature(enable = "avx512cd,avx512vl")]
|
||||
#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
|
||||
#[cfg_attr(test, assert_instr(vpconflictd))]
|
||||
pub unsafe fn _mm_mask_conflict_epi32(src: __m128i, k: __mmask8, a: __m128i) -> __m128i {
|
||||
let conflict = _mm_conflict_epi32(a).as_i32x4();
|
||||
transmute(simd_select_bitmask(k, conflict, src.as_i32x4()))
|
||||
pub fn _mm_mask_conflict_epi32(src: __m128i, k: __mmask8, a: __m128i) -> __m128i {
|
||||
unsafe {
|
||||
let conflict = _mm_conflict_epi32(a).as_i32x4();
|
||||
transmute(simd_select_bitmask(k, conflict, src.as_i32x4()))
|
||||
}
|
||||
}
|
||||
|
||||
/// Test each 32-bit element of a for equality with all other elements in a closer to the least significant bit using zeromask k (elements are zeroed out when the corresponding mask bit is not set). Each element's comparison forms a zero extended bit vector in dst.
|
||||
|
|
@ -170,9 +180,11 @@ pub unsafe fn _mm_mask_conflict_epi32(src: __m128i, k: __mmask8, a: __m128i) ->
|
|||
#[target_feature(enable = "avx512cd,avx512vl")]
|
||||
#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
|
||||
#[cfg_attr(test, assert_instr(vpconflictd))]
|
||||
pub unsafe fn _mm_maskz_conflict_epi32(k: __mmask8, a: __m128i) -> __m128i {
|
||||
let conflict = _mm_conflict_epi32(a).as_i32x4();
|
||||
transmute(simd_select_bitmask(k, conflict, i32x4::ZERO))
|
||||
pub fn _mm_maskz_conflict_epi32(k: __mmask8, a: __m128i) -> __m128i {
|
||||
unsafe {
|
||||
let conflict = _mm_conflict_epi32(a).as_i32x4();
|
||||
transmute(simd_select_bitmask(k, conflict, i32x4::ZERO))
|
||||
}
|
||||
}
|
||||
|
||||
/// Test each 64-bit element of a for equality with all other elements in a closer to the least significant bit. Each element's comparison forms a zero extended bit vector in dst.
|
||||
|
|
@ -182,8 +194,8 @@ pub unsafe fn _mm_maskz_conflict_epi32(k: __mmask8, a: __m128i) -> __m128i {
|
|||
#[target_feature(enable = "avx512cd")]
|
||||
#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
|
||||
#[cfg_attr(test, assert_instr(vpconflictq))]
|
||||
pub unsafe fn _mm512_conflict_epi64(a: __m512i) -> __m512i {
|
||||
transmute(vpconflictq(a.as_i64x8()))
|
||||
pub fn _mm512_conflict_epi64(a: __m512i) -> __m512i {
|
||||
unsafe { transmute(vpconflictq(a.as_i64x8())) }
|
||||
}
|
||||
|
||||
/// Test each 64-bit element of a for equality with all other elements in a closer to the least significant bit using writemask k (elements are copied from src when the corresponding mask bit is not set). Each element's comparison forms a zero extended bit vector in dst.
|
||||
|
|
@ -193,9 +205,11 @@ pub unsafe fn _mm512_conflict_epi64(a: __m512i) -> __m512i {
|
|||
#[target_feature(enable = "avx512cd")]
|
||||
#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
|
||||
#[cfg_attr(test, assert_instr(vpconflictq))]
|
||||
pub unsafe fn _mm512_mask_conflict_epi64(src: __m512i, k: __mmask8, a: __m512i) -> __m512i {
|
||||
let conflict = _mm512_conflict_epi64(a).as_i64x8();
|
||||
transmute(simd_select_bitmask(k, conflict, src.as_i64x8()))
|
||||
pub fn _mm512_mask_conflict_epi64(src: __m512i, k: __mmask8, a: __m512i) -> __m512i {
|
||||
unsafe {
|
||||
let conflict = _mm512_conflict_epi64(a).as_i64x8();
|
||||
transmute(simd_select_bitmask(k, conflict, src.as_i64x8()))
|
||||
}
|
||||
}
|
||||
|
||||
/// Test each 64-bit element of a for equality with all other elements in a closer to the least significant bit using zeromask k (elements are zeroed out when the corresponding mask bit is not set). Each element's comparison forms a zero extended bit vector in dst.
|
||||
|
|
@ -205,9 +219,11 @@ pub unsafe fn _mm512_mask_conflict_epi64(src: __m512i, k: __mmask8, a: __m512i)
|
|||
#[target_feature(enable = "avx512cd")]
|
||||
#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
|
||||
#[cfg_attr(test, assert_instr(vpconflictq))]
|
||||
pub unsafe fn _mm512_maskz_conflict_epi64(k: __mmask8, a: __m512i) -> __m512i {
|
||||
let conflict = _mm512_conflict_epi64(a).as_i64x8();
|
||||
transmute(simd_select_bitmask(k, conflict, i64x8::ZERO))
|
||||
pub fn _mm512_maskz_conflict_epi64(k: __mmask8, a: __m512i) -> __m512i {
|
||||
unsafe {
|
||||
let conflict = _mm512_conflict_epi64(a).as_i64x8();
|
||||
transmute(simd_select_bitmask(k, conflict, i64x8::ZERO))
|
||||
}
|
||||
}
|
||||
|
||||
/// Test each 64-bit element of a for equality with all other elements in a closer to the least significant bit. Each element's comparison forms a zero extended bit vector in dst.
|
||||
|
|
@ -217,8 +233,8 @@ pub unsafe fn _mm512_maskz_conflict_epi64(k: __mmask8, a: __m512i) -> __m512i {
|
|||
#[target_feature(enable = "avx512cd,avx512vl")]
|
||||
#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
|
||||
#[cfg_attr(test, assert_instr(vpconflictq))]
|
||||
pub unsafe fn _mm256_conflict_epi64(a: __m256i) -> __m256i {
|
||||
transmute(vpconflictq256(a.as_i64x4()))
|
||||
pub fn _mm256_conflict_epi64(a: __m256i) -> __m256i {
|
||||
unsafe { transmute(vpconflictq256(a.as_i64x4())) }
|
||||
}
|
||||
|
||||
/// Test each 64-bit element of a for equality with all other elements in a closer to the least significant bit using writemask k (elements are copied from src when the corresponding mask bit is not set). Each element's comparison forms a zero extended bit vector in dst.
|
||||
|
|
@ -228,9 +244,11 @@ pub unsafe fn _mm256_conflict_epi64(a: __m256i) -> __m256i {
|
|||
#[target_feature(enable = "avx512cd,avx512vl")]
|
||||
#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
|
||||
#[cfg_attr(test, assert_instr(vpconflictq))]
|
||||
pub unsafe fn _mm256_mask_conflict_epi64(src: __m256i, k: __mmask8, a: __m256i) -> __m256i {
|
||||
let conflict = _mm256_conflict_epi64(a).as_i64x4();
|
||||
transmute(simd_select_bitmask(k, conflict, src.as_i64x4()))
|
||||
pub fn _mm256_mask_conflict_epi64(src: __m256i, k: __mmask8, a: __m256i) -> __m256i {
|
||||
unsafe {
|
||||
let conflict = _mm256_conflict_epi64(a).as_i64x4();
|
||||
transmute(simd_select_bitmask(k, conflict, src.as_i64x4()))
|
||||
}
|
||||
}
|
||||
|
||||
/// Test each 64-bit element of a for equality with all other elements in a closer to the least significant bit using zeromask k (elements are zeroed out when the corresponding mask bit is not set). Each element's comparison forms a zero extended bit vector in dst.
|
||||
|
|
@ -240,9 +258,11 @@ pub unsafe fn _mm256_mask_conflict_epi64(src: __m256i, k: __mmask8, a: __m256i)
|
|||
#[target_feature(enable = "avx512cd,avx512vl")]
|
||||
#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
|
||||
#[cfg_attr(test, assert_instr(vpconflictq))]
|
||||
pub unsafe fn _mm256_maskz_conflict_epi64(k: __mmask8, a: __m256i) -> __m256i {
|
||||
let conflict = _mm256_conflict_epi64(a).as_i64x4();
|
||||
transmute(simd_select_bitmask(k, conflict, i64x4::ZERO))
|
||||
pub fn _mm256_maskz_conflict_epi64(k: __mmask8, a: __m256i) -> __m256i {
|
||||
unsafe {
|
||||
let conflict = _mm256_conflict_epi64(a).as_i64x4();
|
||||
transmute(simd_select_bitmask(k, conflict, i64x4::ZERO))
|
||||
}
|
||||
}
|
||||
|
||||
/// Test each 64-bit element of a for equality with all other elements in a closer to the least significant bit. Each element's comparison forms a zero extended bit vector in dst.
|
||||
|
|
@ -252,8 +272,8 @@ pub unsafe fn _mm256_maskz_conflict_epi64(k: __mmask8, a: __m256i) -> __m256i {
|
|||
#[target_feature(enable = "avx512cd,avx512vl")]
|
||||
#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
|
||||
#[cfg_attr(test, assert_instr(vpconflictq))]
|
||||
pub unsafe fn _mm_conflict_epi64(a: __m128i) -> __m128i {
|
||||
transmute(vpconflictq128(a.as_i64x2()))
|
||||
pub fn _mm_conflict_epi64(a: __m128i) -> __m128i {
|
||||
unsafe { transmute(vpconflictq128(a.as_i64x2())) }
|
||||
}
|
||||
|
||||
/// Test each 64-bit element of a for equality with all other elements in a closer to the least significant bit using writemask k (elements are copied from src when the corresponding mask bit is not set). Each element's comparison forms a zero extended bit vector in dst.
|
||||
|
|
@ -263,9 +283,11 @@ pub unsafe fn _mm_conflict_epi64(a: __m128i) -> __m128i {
|
|||
#[target_feature(enable = "avx512cd,avx512vl")]
|
||||
#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
|
||||
#[cfg_attr(test, assert_instr(vpconflictq))]
|
||||
pub unsafe fn _mm_mask_conflict_epi64(src: __m128i, k: __mmask8, a: __m128i) -> __m128i {
|
||||
let conflict = _mm_conflict_epi64(a).as_i64x2();
|
||||
transmute(simd_select_bitmask(k, conflict, src.as_i64x2()))
|
||||
pub fn _mm_mask_conflict_epi64(src: __m128i, k: __mmask8, a: __m128i) -> __m128i {
|
||||
unsafe {
|
||||
let conflict = _mm_conflict_epi64(a).as_i64x2();
|
||||
transmute(simd_select_bitmask(k, conflict, src.as_i64x2()))
|
||||
}
|
||||
}
|
||||
|
||||
/// Test each 64-bit element of a for equality with all other elements in a closer to the least significant bit using zeromask k (elements are zeroed out when the corresponding mask bit is not set). Each element's comparison forms a zero extended bit vector in dst.
|
||||
|
|
@ -275,9 +297,11 @@ pub unsafe fn _mm_mask_conflict_epi64(src: __m128i, k: __mmask8, a: __m128i) ->
|
|||
#[target_feature(enable = "avx512cd,avx512vl")]
|
||||
#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
|
||||
#[cfg_attr(test, assert_instr(vpconflictq))]
|
||||
pub unsafe fn _mm_maskz_conflict_epi64(k: __mmask8, a: __m128i) -> __m128i {
|
||||
let conflict = _mm_conflict_epi64(a).as_i64x2();
|
||||
transmute(simd_select_bitmask(k, conflict, i64x2::ZERO))
|
||||
pub fn _mm_maskz_conflict_epi64(k: __mmask8, a: __m128i) -> __m128i {
|
||||
unsafe {
|
||||
let conflict = _mm_conflict_epi64(a).as_i64x2();
|
||||
transmute(simd_select_bitmask(k, conflict, i64x2::ZERO))
|
||||
}
|
||||
}
|
||||
|
||||
/// Counts the number of leading zero bits in each packed 32-bit integer in a, and store the results in dst.
|
||||
|
|
@ -287,8 +311,8 @@ pub unsafe fn _mm_maskz_conflict_epi64(k: __mmask8, a: __m128i) -> __m128i {
|
|||
#[target_feature(enable = "avx512cd")]
|
||||
#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
|
||||
#[cfg_attr(test, assert_instr(vplzcntd))]
|
||||
pub unsafe fn _mm512_lzcnt_epi32(a: __m512i) -> __m512i {
|
||||
transmute(simd_ctlz(a.as_i32x16()))
|
||||
pub fn _mm512_lzcnt_epi32(a: __m512i) -> __m512i {
|
||||
unsafe { transmute(simd_ctlz(a.as_i32x16())) }
|
||||
}
|
||||
|
||||
/// Counts the number of leading zero bits in each packed 32-bit integer in a, and store the results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set).
|
||||
|
|
@ -298,9 +322,11 @@ pub unsafe fn _mm512_lzcnt_epi32(a: __m512i) -> __m512i {
|
|||
#[target_feature(enable = "avx512cd")]
|
||||
#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
|
||||
#[cfg_attr(test, assert_instr(vplzcntd))]
|
||||
pub unsafe fn _mm512_mask_lzcnt_epi32(src: __m512i, k: __mmask16, a: __m512i) -> __m512i {
|
||||
let zerocount = _mm512_lzcnt_epi32(a).as_i32x16();
|
||||
transmute(simd_select_bitmask(k, zerocount, src.as_i32x16()))
|
||||
pub fn _mm512_mask_lzcnt_epi32(src: __m512i, k: __mmask16, a: __m512i) -> __m512i {
|
||||
unsafe {
|
||||
let zerocount = _mm512_lzcnt_epi32(a).as_i32x16();
|
||||
transmute(simd_select_bitmask(k, zerocount, src.as_i32x16()))
|
||||
}
|
||||
}
|
||||
|
||||
/// Counts the number of leading zero bits in each packed 32-bit integer in a, and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
|
||||
|
|
@ -310,9 +336,11 @@ pub unsafe fn _mm512_mask_lzcnt_epi32(src: __m512i, k: __mmask16, a: __m512i) ->
|
|||
#[target_feature(enable = "avx512cd")]
|
||||
#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
|
||||
#[cfg_attr(test, assert_instr(vplzcntd))]
|
||||
pub unsafe fn _mm512_maskz_lzcnt_epi32(k: __mmask16, a: __m512i) -> __m512i {
|
||||
let zerocount = _mm512_lzcnt_epi32(a).as_i32x16();
|
||||
transmute(simd_select_bitmask(k, zerocount, i32x16::ZERO))
|
||||
pub fn _mm512_maskz_lzcnt_epi32(k: __mmask16, a: __m512i) -> __m512i {
|
||||
unsafe {
|
||||
let zerocount = _mm512_lzcnt_epi32(a).as_i32x16();
|
||||
transmute(simd_select_bitmask(k, zerocount, i32x16::ZERO))
|
||||
}
|
||||
}
|
||||
|
||||
/// Counts the number of leading zero bits in each packed 32-bit integer in a, and store the results in dst.
|
||||
|
|
@ -322,8 +350,8 @@ pub unsafe fn _mm512_maskz_lzcnt_epi32(k: __mmask16, a: __m512i) -> __m512i {
|
|||
#[target_feature(enable = "avx512cd,avx512vl")]
|
||||
#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
|
||||
#[cfg_attr(test, assert_instr(vplzcntd))]
|
||||
pub unsafe fn _mm256_lzcnt_epi32(a: __m256i) -> __m256i {
|
||||
transmute(simd_ctlz(a.as_i32x8()))
|
||||
pub fn _mm256_lzcnt_epi32(a: __m256i) -> __m256i {
|
||||
unsafe { transmute(simd_ctlz(a.as_i32x8())) }
|
||||
}
|
||||
|
||||
/// Counts the number of leading zero bits in each packed 32-bit integer in a, and store the results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set).
|
||||
|
|
@ -333,9 +361,11 @@ pub unsafe fn _mm256_lzcnt_epi32(a: __m256i) -> __m256i {
|
|||
#[target_feature(enable = "avx512cd,avx512vl")]
|
||||
#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
|
||||
#[cfg_attr(test, assert_instr(vplzcntd))]
|
||||
pub unsafe fn _mm256_mask_lzcnt_epi32(src: __m256i, k: __mmask8, a: __m256i) -> __m256i {
|
||||
let zerocount = _mm256_lzcnt_epi32(a).as_i32x8();
|
||||
transmute(simd_select_bitmask(k, zerocount, src.as_i32x8()))
|
||||
pub fn _mm256_mask_lzcnt_epi32(src: __m256i, k: __mmask8, a: __m256i) -> __m256i {
|
||||
unsafe {
|
||||
let zerocount = _mm256_lzcnt_epi32(a).as_i32x8();
|
||||
transmute(simd_select_bitmask(k, zerocount, src.as_i32x8()))
|
||||
}
|
||||
}
|
||||
|
||||
/// Counts the number of leading zero bits in each packed 32-bit integer in a, and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
|
||||
|
|
@ -345,9 +375,11 @@ pub unsafe fn _mm256_mask_lzcnt_epi32(src: __m256i, k: __mmask8, a: __m256i) ->
|
|||
#[target_feature(enable = "avx512cd,avx512vl")]
|
||||
#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
|
||||
#[cfg_attr(test, assert_instr(vplzcntd))]
|
||||
pub unsafe fn _mm256_maskz_lzcnt_epi32(k: __mmask8, a: __m256i) -> __m256i {
|
||||
let zerocount = _mm256_lzcnt_epi32(a).as_i32x8();
|
||||
transmute(simd_select_bitmask(k, zerocount, i32x8::ZERO))
|
||||
pub fn _mm256_maskz_lzcnt_epi32(k: __mmask8, a: __m256i) -> __m256i {
|
||||
unsafe {
|
||||
let zerocount = _mm256_lzcnt_epi32(a).as_i32x8();
|
||||
transmute(simd_select_bitmask(k, zerocount, i32x8::ZERO))
|
||||
}
|
||||
}
|
||||
|
||||
/// Counts the number of leading zero bits in each packed 32-bit integer in a, and store the results in dst.
|
||||
|
|
@ -357,8 +389,8 @@ pub unsafe fn _mm256_maskz_lzcnt_epi32(k: __mmask8, a: __m256i) -> __m256i {
|
|||
#[target_feature(enable = "avx512cd,avx512vl")]
|
||||
#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
|
||||
#[cfg_attr(test, assert_instr(vplzcntd))]
|
||||
pub unsafe fn _mm_lzcnt_epi32(a: __m128i) -> __m128i {
|
||||
transmute(simd_ctlz(a.as_i32x4()))
|
||||
pub fn _mm_lzcnt_epi32(a: __m128i) -> __m128i {
|
||||
unsafe { transmute(simd_ctlz(a.as_i32x4())) }
|
||||
}
|
||||
|
||||
/// Counts the number of leading zero bits in each packed 32-bit integer in a, and store the results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set).
|
||||
|
|
@ -368,9 +400,11 @@ pub unsafe fn _mm_lzcnt_epi32(a: __m128i) -> __m128i {
|
|||
#[target_feature(enable = "avx512cd,avx512vl")]
|
||||
#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
|
||||
#[cfg_attr(test, assert_instr(vplzcntd))]
|
||||
pub unsafe fn _mm_mask_lzcnt_epi32(src: __m128i, k: __mmask8, a: __m128i) -> __m128i {
|
||||
let zerocount = _mm_lzcnt_epi32(a).as_i32x4();
|
||||
transmute(simd_select_bitmask(k, zerocount, src.as_i32x4()))
|
||||
pub fn _mm_mask_lzcnt_epi32(src: __m128i, k: __mmask8, a: __m128i) -> __m128i {
|
||||
unsafe {
|
||||
let zerocount = _mm_lzcnt_epi32(a).as_i32x4();
|
||||
transmute(simd_select_bitmask(k, zerocount, src.as_i32x4()))
|
||||
}
|
||||
}
|
||||
|
||||
/// Counts the number of leading zero bits in each packed 32-bit integer in a, and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
|
||||
|
|
@ -380,9 +414,11 @@ pub unsafe fn _mm_mask_lzcnt_epi32(src: __m128i, k: __mmask8, a: __m128i) -> __m
|
|||
#[target_feature(enable = "avx512cd,avx512vl")]
|
||||
#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
|
||||
#[cfg_attr(test, assert_instr(vplzcntd))]
|
||||
pub unsafe fn _mm_maskz_lzcnt_epi32(k: __mmask8, a: __m128i) -> __m128i {
|
||||
let zerocount = _mm_lzcnt_epi32(a).as_i32x4();
|
||||
transmute(simd_select_bitmask(k, zerocount, i32x4::ZERO))
|
||||
pub fn _mm_maskz_lzcnt_epi32(k: __mmask8, a: __m128i) -> __m128i {
|
||||
unsafe {
|
||||
let zerocount = _mm_lzcnt_epi32(a).as_i32x4();
|
||||
transmute(simd_select_bitmask(k, zerocount, i32x4::ZERO))
|
||||
}
|
||||
}
|
||||
|
||||
/// Counts the number of leading zero bits in each packed 64-bit integer in a, and store the results in dst.
|
||||
|
|
@ -392,8 +428,8 @@ pub unsafe fn _mm_maskz_lzcnt_epi32(k: __mmask8, a: __m128i) -> __m128i {
|
|||
#[target_feature(enable = "avx512cd")]
|
||||
#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
|
||||
#[cfg_attr(test, assert_instr(vplzcntq))]
|
||||
pub unsafe fn _mm512_lzcnt_epi64(a: __m512i) -> __m512i {
|
||||
transmute(simd_ctlz(a.as_i64x8()))
|
||||
pub fn _mm512_lzcnt_epi64(a: __m512i) -> __m512i {
|
||||
unsafe { transmute(simd_ctlz(a.as_i64x8())) }
|
||||
}
|
||||
|
||||
/// Counts the number of leading zero bits in each packed 64-bit integer in a, and store the results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set).
|
||||
|
|
@ -403,9 +439,11 @@ pub unsafe fn _mm512_lzcnt_epi64(a: __m512i) -> __m512i {
|
|||
#[target_feature(enable = "avx512cd")]
|
||||
#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
|
||||
#[cfg_attr(test, assert_instr(vplzcntq))]
|
||||
pub unsafe fn _mm512_mask_lzcnt_epi64(src: __m512i, k: __mmask8, a: __m512i) -> __m512i {
|
||||
let zerocount = _mm512_lzcnt_epi64(a).as_i64x8();
|
||||
transmute(simd_select_bitmask(k, zerocount, src.as_i64x8()))
|
||||
pub fn _mm512_mask_lzcnt_epi64(src: __m512i, k: __mmask8, a: __m512i) -> __m512i {
|
||||
unsafe {
|
||||
let zerocount = _mm512_lzcnt_epi64(a).as_i64x8();
|
||||
transmute(simd_select_bitmask(k, zerocount, src.as_i64x8()))
|
||||
}
|
||||
}
|
||||
|
||||
/// Counts the number of leading zero bits in each packed 64-bit integer in a, and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
|
||||
|
|
@ -415,9 +453,11 @@ pub unsafe fn _mm512_mask_lzcnt_epi64(src: __m512i, k: __mmask8, a: __m512i) ->
|
|||
#[target_feature(enable = "avx512cd")]
|
||||
#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
|
||||
#[cfg_attr(test, assert_instr(vplzcntq))]
|
||||
pub unsafe fn _mm512_maskz_lzcnt_epi64(k: __mmask8, a: __m512i) -> __m512i {
|
||||
let zerocount = _mm512_lzcnt_epi64(a).as_i64x8();
|
||||
transmute(simd_select_bitmask(k, zerocount, i64x8::ZERO))
|
||||
pub fn _mm512_maskz_lzcnt_epi64(k: __mmask8, a: __m512i) -> __m512i {
|
||||
unsafe {
|
||||
let zerocount = _mm512_lzcnt_epi64(a).as_i64x8();
|
||||
transmute(simd_select_bitmask(k, zerocount, i64x8::ZERO))
|
||||
}
|
||||
}
|
||||
|
||||
/// Counts the number of leading zero bits in each packed 64-bit integer in a, and store the results in dst.
|
||||
|
|
@ -427,8 +467,8 @@ pub unsafe fn _mm512_maskz_lzcnt_epi64(k: __mmask8, a: __m512i) -> __m512i {
|
|||
#[target_feature(enable = "avx512cd,avx512vl")]
|
||||
#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
|
||||
#[cfg_attr(test, assert_instr(vplzcntq))]
|
||||
pub unsafe fn _mm256_lzcnt_epi64(a: __m256i) -> __m256i {
|
||||
transmute(simd_ctlz(a.as_i64x4()))
|
||||
pub fn _mm256_lzcnt_epi64(a: __m256i) -> __m256i {
|
||||
unsafe { transmute(simd_ctlz(a.as_i64x4())) }
|
||||
}
|
||||
|
||||
/// Counts the number of leading zero bits in each packed 64-bit integer in a, and store the results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set).
|
||||
|
|
@ -438,9 +478,11 @@ pub unsafe fn _mm256_lzcnt_epi64(a: __m256i) -> __m256i {
|
|||
#[target_feature(enable = "avx512cd,avx512vl")]
|
||||
#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
|
||||
#[cfg_attr(test, assert_instr(vplzcntq))]
|
||||
pub unsafe fn _mm256_mask_lzcnt_epi64(src: __m256i, k: __mmask8, a: __m256i) -> __m256i {
|
||||
let zerocount = _mm256_lzcnt_epi64(a).as_i64x4();
|
||||
transmute(simd_select_bitmask(k, zerocount, src.as_i64x4()))
|
||||
pub fn _mm256_mask_lzcnt_epi64(src: __m256i, k: __mmask8, a: __m256i) -> __m256i {
|
||||
unsafe {
|
||||
let zerocount = _mm256_lzcnt_epi64(a).as_i64x4();
|
||||
transmute(simd_select_bitmask(k, zerocount, src.as_i64x4()))
|
||||
}
|
||||
}
|
||||
|
||||
/// Counts the number of leading zero bits in each packed 64-bit integer in a, and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
|
||||
|
|
@ -450,9 +492,11 @@ pub unsafe fn _mm256_mask_lzcnt_epi64(src: __m256i, k: __mmask8, a: __m256i) ->
|
|||
#[target_feature(enable = "avx512cd,avx512vl")]
|
||||
#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
|
||||
#[cfg_attr(test, assert_instr(vplzcntq))]
|
||||
pub unsafe fn _mm256_maskz_lzcnt_epi64(k: __mmask8, a: __m256i) -> __m256i {
|
||||
let zerocount = _mm256_lzcnt_epi64(a).as_i64x4();
|
||||
transmute(simd_select_bitmask(k, zerocount, i64x4::ZERO))
|
||||
pub fn _mm256_maskz_lzcnt_epi64(k: __mmask8, a: __m256i) -> __m256i {
|
||||
unsafe {
|
||||
let zerocount = _mm256_lzcnt_epi64(a).as_i64x4();
|
||||
transmute(simd_select_bitmask(k, zerocount, i64x4::ZERO))
|
||||
}
|
||||
}
|
||||
|
||||
/// Counts the number of leading zero bits in each packed 64-bit integer in a, and store the results in dst.
|
||||
|
|
@ -462,8 +506,8 @@ pub unsafe fn _mm256_maskz_lzcnt_epi64(k: __mmask8, a: __m256i) -> __m256i {
|
|||
#[target_feature(enable = "avx512cd,avx512vl")]
|
||||
#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
|
||||
#[cfg_attr(test, assert_instr(vplzcntq))]
|
||||
pub unsafe fn _mm_lzcnt_epi64(a: __m128i) -> __m128i {
|
||||
transmute(simd_ctlz(a.as_i64x2()))
|
||||
pub fn _mm_lzcnt_epi64(a: __m128i) -> __m128i {
|
||||
unsafe { transmute(simd_ctlz(a.as_i64x2())) }
|
||||
}
|
||||
|
||||
/// Counts the number of leading zero bits in each packed 64-bit integer in a, and store the results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set).
|
||||
|
|
@ -473,9 +517,11 @@ pub unsafe fn _mm_lzcnt_epi64(a: __m128i) -> __m128i {
|
|||
#[target_feature(enable = "avx512cd,avx512vl")]
|
||||
#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
|
||||
#[cfg_attr(test, assert_instr(vplzcntq))]
|
||||
pub unsafe fn _mm_mask_lzcnt_epi64(src: __m128i, k: __mmask8, a: __m128i) -> __m128i {
|
||||
let zerocount = _mm_lzcnt_epi64(a).as_i64x2();
|
||||
transmute(simd_select_bitmask(k, zerocount, src.as_i64x2()))
|
||||
pub fn _mm_mask_lzcnt_epi64(src: __m128i, k: __mmask8, a: __m128i) -> __m128i {
|
||||
unsafe {
|
||||
let zerocount = _mm_lzcnt_epi64(a).as_i64x2();
|
||||
transmute(simd_select_bitmask(k, zerocount, src.as_i64x2()))
|
||||
}
|
||||
}
|
||||
|
||||
/// Counts the number of leading zero bits in each packed 64-bit integer in a, and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
|
||||
|
|
@ -485,9 +531,11 @@ pub unsafe fn _mm_mask_lzcnt_epi64(src: __m128i, k: __mmask8, a: __m128i) -> __m
|
|||
#[target_feature(enable = "avx512cd,avx512vl")]
|
||||
#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
|
||||
#[cfg_attr(test, assert_instr(vplzcntq))]
|
||||
pub unsafe fn _mm_maskz_lzcnt_epi64(k: __mmask8, a: __m128i) -> __m128i {
|
||||
let zerocount = _mm_lzcnt_epi64(a).as_i64x2();
|
||||
transmute(simd_select_bitmask(k, zerocount, i64x2::ZERO))
|
||||
pub fn _mm_maskz_lzcnt_epi64(k: __mmask8, a: __m128i) -> __m128i {
|
||||
unsafe {
|
||||
let zerocount = _mm_lzcnt_epi64(a).as_i64x2();
|
||||
transmute(simd_select_bitmask(k, zerocount, i64x2::ZERO))
|
||||
}
|
||||
}
|
||||
|
||||
#[allow(improper_ctypes)]
|
||||
|
|
|
|||
File diff suppressed because it is too large
Load diff
File diff suppressed because it is too large
Load diff
File diff suppressed because it is too large
Load diff
|
|
@ -15,8 +15,8 @@ use stdarch_test::assert_instr;
|
|||
#[target_feature(enable = "avx512ifma")]
|
||||
#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
|
||||
#[cfg_attr(test, assert_instr(vpmadd52huq))]
|
||||
pub unsafe fn _mm512_madd52hi_epu64(a: __m512i, b: __m512i, c: __m512i) -> __m512i {
|
||||
vpmadd52huq_512(a, b, c)
|
||||
pub fn _mm512_madd52hi_epu64(a: __m512i, b: __m512i, c: __m512i) -> __m512i {
|
||||
unsafe { vpmadd52huq_512(a, b, c) }
|
||||
}
|
||||
|
||||
/// Multiply packed unsigned 52-bit integers in each 64-bit element of
|
||||
|
|
@ -31,13 +31,8 @@ pub unsafe fn _mm512_madd52hi_epu64(a: __m512i, b: __m512i, c: __m512i) -> __m51
|
|||
#[target_feature(enable = "avx512ifma")]
|
||||
#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
|
||||
#[cfg_attr(test, assert_instr(vpmadd52huq))]
|
||||
pub unsafe fn _mm512_mask_madd52hi_epu64(
|
||||
a: __m512i,
|
||||
k: __mmask8,
|
||||
b: __m512i,
|
||||
c: __m512i,
|
||||
) -> __m512i {
|
||||
simd_select_bitmask(k, vpmadd52huq_512(a, b, c), a)
|
||||
pub fn _mm512_mask_madd52hi_epu64(a: __m512i, k: __mmask8, b: __m512i, c: __m512i) -> __m512i {
|
||||
unsafe { simd_select_bitmask(k, vpmadd52huq_512(a, b, c), a) }
|
||||
}
|
||||
|
||||
/// Multiply packed unsigned 52-bit integers in each 64-bit element of
|
||||
|
|
@ -52,13 +47,8 @@ pub unsafe fn _mm512_mask_madd52hi_epu64(
|
|||
#[target_feature(enable = "avx512ifma")]
|
||||
#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
|
||||
#[cfg_attr(test, assert_instr(vpmadd52huq))]
|
||||
pub unsafe fn _mm512_maskz_madd52hi_epu64(
|
||||
k: __mmask8,
|
||||
a: __m512i,
|
||||
b: __m512i,
|
||||
c: __m512i,
|
||||
) -> __m512i {
|
||||
simd_select_bitmask(k, vpmadd52huq_512(a, b, c), _mm512_setzero_si512())
|
||||
pub fn _mm512_maskz_madd52hi_epu64(k: __mmask8, a: __m512i, b: __m512i, c: __m512i) -> __m512i {
|
||||
unsafe { simd_select_bitmask(k, vpmadd52huq_512(a, b, c), _mm512_setzero_si512()) }
|
||||
}
|
||||
|
||||
/// Multiply packed unsigned 52-bit integers in each 64-bit element of
|
||||
|
|
@ -72,8 +62,8 @@ pub unsafe fn _mm512_maskz_madd52hi_epu64(
|
|||
#[target_feature(enable = "avx512ifma")]
|
||||
#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
|
||||
#[cfg_attr(test, assert_instr(vpmadd52luq))]
|
||||
pub unsafe fn _mm512_madd52lo_epu64(a: __m512i, b: __m512i, c: __m512i) -> __m512i {
|
||||
vpmadd52luq_512(a, b, c)
|
||||
pub fn _mm512_madd52lo_epu64(a: __m512i, b: __m512i, c: __m512i) -> __m512i {
|
||||
unsafe { vpmadd52luq_512(a, b, c) }
|
||||
}
|
||||
|
||||
/// Multiply packed unsigned 52-bit integers in each 64-bit element of
|
||||
|
|
@ -88,13 +78,8 @@ pub unsafe fn _mm512_madd52lo_epu64(a: __m512i, b: __m512i, c: __m512i) -> __m51
|
|||
#[target_feature(enable = "avx512ifma")]
|
||||
#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
|
||||
#[cfg_attr(test, assert_instr(vpmadd52luq))]
|
||||
pub unsafe fn _mm512_mask_madd52lo_epu64(
|
||||
a: __m512i,
|
||||
k: __mmask8,
|
||||
b: __m512i,
|
||||
c: __m512i,
|
||||
) -> __m512i {
|
||||
simd_select_bitmask(k, vpmadd52luq_512(a, b, c), a)
|
||||
pub fn _mm512_mask_madd52lo_epu64(a: __m512i, k: __mmask8, b: __m512i, c: __m512i) -> __m512i {
|
||||
unsafe { simd_select_bitmask(k, vpmadd52luq_512(a, b, c), a) }
|
||||
}
|
||||
|
||||
/// Multiply packed unsigned 52-bit integers in each 64-bit element of
|
||||
|
|
@ -109,13 +94,8 @@ pub unsafe fn _mm512_mask_madd52lo_epu64(
|
|||
#[target_feature(enable = "avx512ifma")]
|
||||
#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
|
||||
#[cfg_attr(test, assert_instr(vpmadd52luq))]
|
||||
pub unsafe fn _mm512_maskz_madd52lo_epu64(
|
||||
k: __mmask8,
|
||||
a: __m512i,
|
||||
b: __m512i,
|
||||
c: __m512i,
|
||||
) -> __m512i {
|
||||
simd_select_bitmask(k, vpmadd52luq_512(a, b, c), _mm512_setzero_si512())
|
||||
pub fn _mm512_maskz_madd52lo_epu64(k: __mmask8, a: __m512i, b: __m512i, c: __m512i) -> __m512i {
|
||||
unsafe { simd_select_bitmask(k, vpmadd52luq_512(a, b, c), _mm512_setzero_si512()) }
|
||||
}
|
||||
|
||||
/// Multiply packed unsigned 52-bit integers in each 64-bit element of
|
||||
|
|
@ -132,8 +112,8 @@ pub unsafe fn _mm512_maskz_madd52lo_epu64(
|
|||
all(test, any(target_os = "linux", target_env = "msvc")),
|
||||
assert_instr(vpmadd52huq)
|
||||
)]
|
||||
pub unsafe fn _mm256_madd52hi_avx_epu64(a: __m256i, b: __m256i, c: __m256i) -> __m256i {
|
||||
vpmadd52huq_256(a, b, c)
|
||||
pub fn _mm256_madd52hi_avx_epu64(a: __m256i, b: __m256i, c: __m256i) -> __m256i {
|
||||
unsafe { vpmadd52huq_256(a, b, c) }
|
||||
}
|
||||
|
||||
/// Multiply packed unsigned 52-bit integers in each 64-bit element of
|
||||
|
|
@ -147,8 +127,8 @@ pub unsafe fn _mm256_madd52hi_avx_epu64(a: __m256i, b: __m256i, c: __m256i) -> _
|
|||
#[target_feature(enable = "avx512ifma,avx512vl")]
|
||||
#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
|
||||
#[cfg_attr(test, assert_instr(vpmadd52huq))]
|
||||
pub unsafe fn _mm256_madd52hi_epu64(a: __m256i, b: __m256i, c: __m256i) -> __m256i {
|
||||
vpmadd52huq_256(a, b, c)
|
||||
pub fn _mm256_madd52hi_epu64(a: __m256i, b: __m256i, c: __m256i) -> __m256i {
|
||||
unsafe { vpmadd52huq_256(a, b, c) }
|
||||
}
|
||||
|
||||
/// Multiply packed unsigned 52-bit integers in each 64-bit element of
|
||||
|
|
@ -163,13 +143,8 @@ pub unsafe fn _mm256_madd52hi_epu64(a: __m256i, b: __m256i, c: __m256i) -> __m25
|
|||
#[target_feature(enable = "avx512ifma,avx512vl")]
|
||||
#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
|
||||
#[cfg_attr(test, assert_instr(vpmadd52huq))]
|
||||
pub unsafe fn _mm256_mask_madd52hi_epu64(
|
||||
a: __m256i,
|
||||
k: __mmask8,
|
||||
b: __m256i,
|
||||
c: __m256i,
|
||||
) -> __m256i {
|
||||
simd_select_bitmask(k, vpmadd52huq_256(a, b, c), a)
|
||||
pub fn _mm256_mask_madd52hi_epu64(a: __m256i, k: __mmask8, b: __m256i, c: __m256i) -> __m256i {
|
||||
unsafe { simd_select_bitmask(k, vpmadd52huq_256(a, b, c), a) }
|
||||
}
|
||||
|
||||
/// Multiply packed unsigned 52-bit integers in each 64-bit element of
|
||||
|
|
@ -184,13 +159,8 @@ pub unsafe fn _mm256_mask_madd52hi_epu64(
|
|||
#[target_feature(enable = "avx512ifma,avx512vl")]
|
||||
#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
|
||||
#[cfg_attr(test, assert_instr(vpmadd52huq))]
|
||||
pub unsafe fn _mm256_maskz_madd52hi_epu64(
|
||||
k: __mmask8,
|
||||
a: __m256i,
|
||||
b: __m256i,
|
||||
c: __m256i,
|
||||
) -> __m256i {
|
||||
simd_select_bitmask(k, vpmadd52huq_256(a, b, c), _mm256_setzero_si256())
|
||||
pub fn _mm256_maskz_madd52hi_epu64(k: __mmask8, a: __m256i, b: __m256i, c: __m256i) -> __m256i {
|
||||
unsafe { simd_select_bitmask(k, vpmadd52huq_256(a, b, c), _mm256_setzero_si256()) }
|
||||
}
|
||||
|
||||
/// Multiply packed unsigned 52-bit integers in each 64-bit element of
|
||||
|
|
@ -207,8 +177,8 @@ pub unsafe fn _mm256_maskz_madd52hi_epu64(
|
|||
all(test, any(target_os = "linux", target_env = "msvc")),
|
||||
assert_instr(vpmadd52luq)
|
||||
)]
|
||||
pub unsafe fn _mm256_madd52lo_avx_epu64(a: __m256i, b: __m256i, c: __m256i) -> __m256i {
|
||||
vpmadd52luq_256(a, b, c)
|
||||
pub fn _mm256_madd52lo_avx_epu64(a: __m256i, b: __m256i, c: __m256i) -> __m256i {
|
||||
unsafe { vpmadd52luq_256(a, b, c) }
|
||||
}
|
||||
|
||||
/// Multiply packed unsigned 52-bit integers in each 64-bit element of
|
||||
|
|
@ -222,8 +192,8 @@ pub unsafe fn _mm256_madd52lo_avx_epu64(a: __m256i, b: __m256i, c: __m256i) -> _
|
|||
#[target_feature(enable = "avx512ifma,avx512vl")]
|
||||
#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
|
||||
#[cfg_attr(test, assert_instr(vpmadd52luq))]
|
||||
pub unsafe fn _mm256_madd52lo_epu64(a: __m256i, b: __m256i, c: __m256i) -> __m256i {
|
||||
vpmadd52luq_256(a, b, c)
|
||||
pub fn _mm256_madd52lo_epu64(a: __m256i, b: __m256i, c: __m256i) -> __m256i {
|
||||
unsafe { vpmadd52luq_256(a, b, c) }
|
||||
}
|
||||
|
||||
/// Multiply packed unsigned 52-bit integers in each 64-bit element of
|
||||
|
|
@ -238,13 +208,8 @@ pub unsafe fn _mm256_madd52lo_epu64(a: __m256i, b: __m256i, c: __m256i) -> __m25
|
|||
#[target_feature(enable = "avx512ifma,avx512vl")]
|
||||
#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
|
||||
#[cfg_attr(test, assert_instr(vpmadd52luq))]
|
||||
pub unsafe fn _mm256_mask_madd52lo_epu64(
|
||||
a: __m256i,
|
||||
k: __mmask8,
|
||||
b: __m256i,
|
||||
c: __m256i,
|
||||
) -> __m256i {
|
||||
simd_select_bitmask(k, vpmadd52luq_256(a, b, c), a)
|
||||
pub fn _mm256_mask_madd52lo_epu64(a: __m256i, k: __mmask8, b: __m256i, c: __m256i) -> __m256i {
|
||||
unsafe { simd_select_bitmask(k, vpmadd52luq_256(a, b, c), a) }
|
||||
}
|
||||
|
||||
/// Multiply packed unsigned 52-bit integers in each 64-bit element of
|
||||
|
|
@ -259,13 +224,8 @@ pub unsafe fn _mm256_mask_madd52lo_epu64(
|
|||
#[target_feature(enable = "avx512ifma,avx512vl")]
|
||||
#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
|
||||
#[cfg_attr(test, assert_instr(vpmadd52luq))]
|
||||
pub unsafe fn _mm256_maskz_madd52lo_epu64(
|
||||
k: __mmask8,
|
||||
a: __m256i,
|
||||
b: __m256i,
|
||||
c: __m256i,
|
||||
) -> __m256i {
|
||||
simd_select_bitmask(k, vpmadd52luq_256(a, b, c), _mm256_setzero_si256())
|
||||
pub fn _mm256_maskz_madd52lo_epu64(k: __mmask8, a: __m256i, b: __m256i, c: __m256i) -> __m256i {
|
||||
unsafe { simd_select_bitmask(k, vpmadd52luq_256(a, b, c), _mm256_setzero_si256()) }
|
||||
}
|
||||
|
||||
/// Multiply packed unsigned 52-bit integers in each 64-bit element of
|
||||
|
|
@ -282,8 +242,8 @@ pub unsafe fn _mm256_maskz_madd52lo_epu64(
|
|||
all(test, any(target_os = "linux", target_env = "msvc")),
|
||||
assert_instr(vpmadd52huq)
|
||||
)]
|
||||
pub unsafe fn _mm_madd52hi_avx_epu64(a: __m128i, b: __m128i, c: __m128i) -> __m128i {
|
||||
vpmadd52huq_128(a, b, c)
|
||||
pub fn _mm_madd52hi_avx_epu64(a: __m128i, b: __m128i, c: __m128i) -> __m128i {
|
||||
unsafe { vpmadd52huq_128(a, b, c) }
|
||||
}
|
||||
|
||||
/// Multiply packed unsigned 52-bit integers in each 64-bit element of
|
||||
|
|
@ -297,8 +257,8 @@ pub unsafe fn _mm_madd52hi_avx_epu64(a: __m128i, b: __m128i, c: __m128i) -> __m1
|
|||
#[target_feature(enable = "avx512ifma,avx512vl")]
|
||||
#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
|
||||
#[cfg_attr(test, assert_instr(vpmadd52huq))]
|
||||
pub unsafe fn _mm_madd52hi_epu64(a: __m128i, b: __m128i, c: __m128i) -> __m128i {
|
||||
vpmadd52huq_128(a, b, c)
|
||||
pub fn _mm_madd52hi_epu64(a: __m128i, b: __m128i, c: __m128i) -> __m128i {
|
||||
unsafe { vpmadd52huq_128(a, b, c) }
|
||||
}
|
||||
|
||||
/// Multiply packed unsigned 52-bit integers in each 64-bit element of
|
||||
|
|
@ -313,8 +273,8 @@ pub unsafe fn _mm_madd52hi_epu64(a: __m128i, b: __m128i, c: __m128i) -> __m128i
|
|||
#[target_feature(enable = "avx512ifma,avx512vl")]
|
||||
#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
|
||||
#[cfg_attr(test, assert_instr(vpmadd52huq))]
|
||||
pub unsafe fn _mm_mask_madd52hi_epu64(a: __m128i, k: __mmask8, b: __m128i, c: __m128i) -> __m128i {
|
||||
simd_select_bitmask(k, vpmadd52huq_128(a, b, c), a)
|
||||
pub fn _mm_mask_madd52hi_epu64(a: __m128i, k: __mmask8, b: __m128i, c: __m128i) -> __m128i {
|
||||
unsafe { simd_select_bitmask(k, vpmadd52huq_128(a, b, c), a) }
|
||||
}
|
||||
|
||||
/// Multiply packed unsigned 52-bit integers in each 64-bit element of
|
||||
|
|
@ -329,8 +289,8 @@ pub unsafe fn _mm_mask_madd52hi_epu64(a: __m128i, k: __mmask8, b: __m128i, c: __
|
|||
#[target_feature(enable = "avx512ifma,avx512vl")]
|
||||
#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
|
||||
#[cfg_attr(test, assert_instr(vpmadd52huq))]
|
||||
pub unsafe fn _mm_maskz_madd52hi_epu64(k: __mmask8, a: __m128i, b: __m128i, c: __m128i) -> __m128i {
|
||||
simd_select_bitmask(k, vpmadd52huq_128(a, b, c), _mm_setzero_si128())
|
||||
pub fn _mm_maskz_madd52hi_epu64(k: __mmask8, a: __m128i, b: __m128i, c: __m128i) -> __m128i {
|
||||
unsafe { simd_select_bitmask(k, vpmadd52huq_128(a, b, c), _mm_setzero_si128()) }
|
||||
}
|
||||
|
||||
/// Multiply packed unsigned 52-bit integers in each 64-bit element of
|
||||
|
|
@ -347,8 +307,8 @@ pub unsafe fn _mm_maskz_madd52hi_epu64(k: __mmask8, a: __m128i, b: __m128i, c: _
|
|||
all(test, any(target_os = "linux", target_env = "msvc")),
|
||||
assert_instr(vpmadd52luq)
|
||||
)]
|
||||
pub unsafe fn _mm_madd52lo_avx_epu64(a: __m128i, b: __m128i, c: __m128i) -> __m128i {
|
||||
vpmadd52luq_128(a, b, c)
|
||||
pub fn _mm_madd52lo_avx_epu64(a: __m128i, b: __m128i, c: __m128i) -> __m128i {
|
||||
unsafe { vpmadd52luq_128(a, b, c) }
|
||||
}
|
||||
|
||||
/// Multiply packed unsigned 52-bit integers in each 64-bit element of
|
||||
|
|
@ -362,8 +322,8 @@ pub unsafe fn _mm_madd52lo_avx_epu64(a: __m128i, b: __m128i, c: __m128i) -> __m1
|
|||
#[target_feature(enable = "avx512ifma,avx512vl")]
|
||||
#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
|
||||
#[cfg_attr(test, assert_instr(vpmadd52luq))]
|
||||
pub unsafe fn _mm_madd52lo_epu64(a: __m128i, b: __m128i, c: __m128i) -> __m128i {
|
||||
vpmadd52luq_128(a, b, c)
|
||||
pub fn _mm_madd52lo_epu64(a: __m128i, b: __m128i, c: __m128i) -> __m128i {
|
||||
unsafe { vpmadd52luq_128(a, b, c) }
|
||||
}
|
||||
|
||||
/// Multiply packed unsigned 52-bit integers in each 64-bit element of
|
||||
|
|
@ -378,8 +338,8 @@ pub unsafe fn _mm_madd52lo_epu64(a: __m128i, b: __m128i, c: __m128i) -> __m128i
|
|||
#[target_feature(enable = "avx512ifma,avx512vl")]
|
||||
#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
|
||||
#[cfg_attr(test, assert_instr(vpmadd52luq))]
|
||||
pub unsafe fn _mm_mask_madd52lo_epu64(a: __m128i, k: __mmask8, b: __m128i, c: __m128i) -> __m128i {
|
||||
simd_select_bitmask(k, vpmadd52luq_128(a, b, c), a)
|
||||
pub fn _mm_mask_madd52lo_epu64(a: __m128i, k: __mmask8, b: __m128i, c: __m128i) -> __m128i {
|
||||
unsafe { simd_select_bitmask(k, vpmadd52luq_128(a, b, c), a) }
|
||||
}
|
||||
|
||||
/// Multiply packed unsigned 52-bit integers in each 64-bit element of
|
||||
|
|
@ -394,8 +354,8 @@ pub unsafe fn _mm_mask_madd52lo_epu64(a: __m128i, k: __mmask8, b: __m128i, c: __
|
|||
#[target_feature(enable = "avx512ifma,avx512vl")]
|
||||
#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
|
||||
#[cfg_attr(test, assert_instr(vpmadd52luq))]
|
||||
pub unsafe fn _mm_maskz_madd52lo_epu64(k: __mmask8, a: __m128i, b: __m128i, c: __m128i) -> __m128i {
|
||||
simd_select_bitmask(k, vpmadd52luq_128(a, b, c), _mm_setzero_si128())
|
||||
pub fn _mm_maskz_madd52lo_epu64(k: __mmask8, a: __m128i, b: __m128i, c: __m128i) -> __m128i {
|
||||
unsafe { simd_select_bitmask(k, vpmadd52luq_128(a, b, c), _mm_setzero_si128()) }
|
||||
}
|
||||
|
||||
#[allow(improper_ctypes)]
|
||||
|
|
|
|||
|
|
@ -11,8 +11,8 @@ use stdarch_test::assert_instr;
|
|||
#[target_feature(enable = "avx512vbmi")]
|
||||
#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
|
||||
#[cfg_attr(test, assert_instr(vperm))] //should be vpermi2b
|
||||
pub unsafe fn _mm512_permutex2var_epi8(a: __m512i, idx: __m512i, b: __m512i) -> __m512i {
|
||||
transmute(vpermi2b(a.as_i8x64(), idx.as_i8x64(), b.as_i8x64()))
|
||||
pub fn _mm512_permutex2var_epi8(a: __m512i, idx: __m512i, b: __m512i) -> __m512i {
|
||||
unsafe { transmute(vpermi2b(a.as_i8x64(), idx.as_i8x64(), b.as_i8x64())) }
|
||||
}
|
||||
|
||||
/// Shuffle 8-bit integers in a and b across lanes using the corresponding selector and index in idx, and store the results in dst using writemask k (elements are copied from a when the corresponding mask bit is not set).
|
||||
|
|
@ -22,14 +22,16 @@ pub unsafe fn _mm512_permutex2var_epi8(a: __m512i, idx: __m512i, b: __m512i) ->
|
|||
#[target_feature(enable = "avx512vbmi")]
|
||||
#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
|
||||
#[cfg_attr(test, assert_instr(vpermt2b))]
|
||||
pub unsafe fn _mm512_mask_permutex2var_epi8(
|
||||
pub fn _mm512_mask_permutex2var_epi8(
|
||||
a: __m512i,
|
||||
k: __mmask64,
|
||||
idx: __m512i,
|
||||
b: __m512i,
|
||||
) -> __m512i {
|
||||
let permute = _mm512_permutex2var_epi8(a, idx, b).as_i8x64();
|
||||
transmute(simd_select_bitmask(k, permute, a.as_i8x64()))
|
||||
unsafe {
|
||||
let permute = _mm512_permutex2var_epi8(a, idx, b).as_i8x64();
|
||||
transmute(simd_select_bitmask(k, permute, a.as_i8x64()))
|
||||
}
|
||||
}
|
||||
|
||||
/// Shuffle 8-bit integers in a and b across lanes using the corresponding selector and index in idx, and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
|
||||
|
|
@ -39,14 +41,16 @@ pub unsafe fn _mm512_mask_permutex2var_epi8(
|
|||
#[target_feature(enable = "avx512vbmi")]
|
||||
#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
|
||||
#[cfg_attr(test, assert_instr(vperm))] //should be vpermi2b
|
||||
pub unsafe fn _mm512_maskz_permutex2var_epi8(
|
||||
pub fn _mm512_maskz_permutex2var_epi8(
|
||||
k: __mmask64,
|
||||
a: __m512i,
|
||||
idx: __m512i,
|
||||
b: __m512i,
|
||||
) -> __m512i {
|
||||
let permute = _mm512_permutex2var_epi8(a, idx, b).as_i8x64();
|
||||
transmute(simd_select_bitmask(k, permute, i8x64::ZERO))
|
||||
unsafe {
|
||||
let permute = _mm512_permutex2var_epi8(a, idx, b).as_i8x64();
|
||||
transmute(simd_select_bitmask(k, permute, i8x64::ZERO))
|
||||
}
|
||||
}
|
||||
|
||||
/// Shuffle 8-bit integers in a and b across lanes using the corresponding selector and index in idx, and store the results in dst using writemask k (elements are copied from a when the corresponding mask bit is not set).
|
||||
|
|
@ -56,14 +60,16 @@ pub unsafe fn _mm512_maskz_permutex2var_epi8(
|
|||
#[target_feature(enable = "avx512vbmi")]
|
||||
#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
|
||||
#[cfg_attr(test, assert_instr(vpermi2b))]
|
||||
pub unsafe fn _mm512_mask2_permutex2var_epi8(
|
||||
pub fn _mm512_mask2_permutex2var_epi8(
|
||||
a: __m512i,
|
||||
idx: __m512i,
|
||||
k: __mmask64,
|
||||
b: __m512i,
|
||||
) -> __m512i {
|
||||
let permute = _mm512_permutex2var_epi8(a, idx, b).as_i8x64();
|
||||
transmute(simd_select_bitmask(k, permute, idx.as_i8x64()))
|
||||
unsafe {
|
||||
let permute = _mm512_permutex2var_epi8(a, idx, b).as_i8x64();
|
||||
transmute(simd_select_bitmask(k, permute, idx.as_i8x64()))
|
||||
}
|
||||
}
|
||||
|
||||
/// Shuffle 8-bit integers in a and b across lanes using the corresponding selector and index in idx, and store the results in dst.
|
||||
|
|
@ -73,8 +79,8 @@ pub unsafe fn _mm512_mask2_permutex2var_epi8(
|
|||
#[target_feature(enable = "avx512vbmi,avx512vl")]
|
||||
#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
|
||||
#[cfg_attr(test, assert_instr(vperm))] //should be vpermi2b
|
||||
pub unsafe fn _mm256_permutex2var_epi8(a: __m256i, idx: __m256i, b: __m256i) -> __m256i {
|
||||
transmute(vpermi2b256(a.as_i8x32(), idx.as_i8x32(), b.as_i8x32()))
|
||||
pub fn _mm256_permutex2var_epi8(a: __m256i, idx: __m256i, b: __m256i) -> __m256i {
|
||||
unsafe { transmute(vpermi2b256(a.as_i8x32(), idx.as_i8x32(), b.as_i8x32())) }
|
||||
}
|
||||
|
||||
/// Shuffle 8-bit integers in a and b across lanes using the corresponding selector and index in idx, and store the results in dst using writemask k (elements are copied from a when the corresponding mask bit is not set).
|
||||
|
|
@ -84,14 +90,16 @@ pub unsafe fn _mm256_permutex2var_epi8(a: __m256i, idx: __m256i, b: __m256i) ->
|
|||
#[target_feature(enable = "avx512vbmi,avx512vl")]
|
||||
#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
|
||||
#[cfg_attr(test, assert_instr(vpermt2b))]
|
||||
pub unsafe fn _mm256_mask_permutex2var_epi8(
|
||||
pub fn _mm256_mask_permutex2var_epi8(
|
||||
a: __m256i,
|
||||
k: __mmask32,
|
||||
idx: __m256i,
|
||||
b: __m256i,
|
||||
) -> __m256i {
|
||||
let permute = _mm256_permutex2var_epi8(a, idx, b).as_i8x32();
|
||||
transmute(simd_select_bitmask(k, permute, a.as_i8x32()))
|
||||
unsafe {
|
||||
let permute = _mm256_permutex2var_epi8(a, idx, b).as_i8x32();
|
||||
transmute(simd_select_bitmask(k, permute, a.as_i8x32()))
|
||||
}
|
||||
}
|
||||
|
||||
/// Shuffle 8-bit integers in a and b across lanes using the corresponding selector and index in idx, and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
|
||||
|
|
@ -101,14 +109,16 @@ pub unsafe fn _mm256_mask_permutex2var_epi8(
|
|||
#[target_feature(enable = "avx512vbmi,avx512vl")]
|
||||
#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
|
||||
#[cfg_attr(test, assert_instr(vperm))] //should be vpermi2b
|
||||
pub unsafe fn _mm256_maskz_permutex2var_epi8(
|
||||
pub fn _mm256_maskz_permutex2var_epi8(
|
||||
k: __mmask32,
|
||||
a: __m256i,
|
||||
idx: __m256i,
|
||||
b: __m256i,
|
||||
) -> __m256i {
|
||||
let permute = _mm256_permutex2var_epi8(a, idx, b).as_i8x32();
|
||||
transmute(simd_select_bitmask(k, permute, i8x32::ZERO))
|
||||
unsafe {
|
||||
let permute = _mm256_permutex2var_epi8(a, idx, b).as_i8x32();
|
||||
transmute(simd_select_bitmask(k, permute, i8x32::ZERO))
|
||||
}
|
||||
}
|
||||
|
||||
/// Shuffle 8-bit integers in a and b across lanes using the corresponding selector and index in idx, and store the results in dst using writemask k (elements are copied from a when the corresponding mask bit is not set).
|
||||
|
|
@ -118,14 +128,16 @@ pub unsafe fn _mm256_maskz_permutex2var_epi8(
|
|||
#[target_feature(enable = "avx512vbmi,avx512vl")]
|
||||
#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
|
||||
#[cfg_attr(test, assert_instr(vpermi2b))]
|
||||
pub unsafe fn _mm256_mask2_permutex2var_epi8(
|
||||
pub fn _mm256_mask2_permutex2var_epi8(
|
||||
a: __m256i,
|
||||
idx: __m256i,
|
||||
k: __mmask32,
|
||||
b: __m256i,
|
||||
) -> __m256i {
|
||||
let permute = _mm256_permutex2var_epi8(a, idx, b).as_i8x32();
|
||||
transmute(simd_select_bitmask(k, permute, idx.as_i8x32()))
|
||||
unsafe {
|
||||
let permute = _mm256_permutex2var_epi8(a, idx, b).as_i8x32();
|
||||
transmute(simd_select_bitmask(k, permute, idx.as_i8x32()))
|
||||
}
|
||||
}
|
||||
|
||||
/// Shuffle 8-bit integers in a and b across lanes using the corresponding selector and index in idx, and store the results in dst.
|
||||
|
|
@ -135,8 +147,8 @@ pub unsafe fn _mm256_mask2_permutex2var_epi8(
|
|||
#[target_feature(enable = "avx512vbmi,avx512vl")]
|
||||
#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
|
||||
#[cfg_attr(test, assert_instr(vperm))] //should be vpermi2b
|
||||
pub unsafe fn _mm_permutex2var_epi8(a: __m128i, idx: __m128i, b: __m128i) -> __m128i {
|
||||
transmute(vpermi2b128(a.as_i8x16(), idx.as_i8x16(), b.as_i8x16()))
|
||||
pub fn _mm_permutex2var_epi8(a: __m128i, idx: __m128i, b: __m128i) -> __m128i {
|
||||
unsafe { transmute(vpermi2b128(a.as_i8x16(), idx.as_i8x16(), b.as_i8x16())) }
|
||||
}
|
||||
|
||||
/// Shuffle 8-bit integers in a and b across lanes using the corresponding selector and index in idx, and store the results in dst using writemask k (elements are copied from a when the corresponding mask bit is not set).
|
||||
|
|
@ -146,14 +158,11 @@ pub unsafe fn _mm_permutex2var_epi8(a: __m128i, idx: __m128i, b: __m128i) -> __m
|
|||
#[target_feature(enable = "avx512vbmi,avx512vl")]
|
||||
#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
|
||||
#[cfg_attr(test, assert_instr(vpermt2b))]
|
||||
pub unsafe fn _mm_mask_permutex2var_epi8(
|
||||
a: __m128i,
|
||||
k: __mmask16,
|
||||
idx: __m128i,
|
||||
b: __m128i,
|
||||
) -> __m128i {
|
||||
let permute = _mm_permutex2var_epi8(a, idx, b).as_i8x16();
|
||||
transmute(simd_select_bitmask(k, permute, a.as_i8x16()))
|
||||
pub fn _mm_mask_permutex2var_epi8(a: __m128i, k: __mmask16, idx: __m128i, b: __m128i) -> __m128i {
|
||||
unsafe {
|
||||
let permute = _mm_permutex2var_epi8(a, idx, b).as_i8x16();
|
||||
transmute(simd_select_bitmask(k, permute, a.as_i8x16()))
|
||||
}
|
||||
}
|
||||
|
||||
/// Shuffle 8-bit integers in a and b across lanes using the corresponding selector and index in idx, and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
|
||||
|
|
@ -163,14 +172,11 @@ pub unsafe fn _mm_mask_permutex2var_epi8(
|
|||
#[target_feature(enable = "avx512vbmi,avx512vl")]
|
||||
#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
|
||||
#[cfg_attr(test, assert_instr(vperm))] //should be vpermi2b
|
||||
pub unsafe fn _mm_maskz_permutex2var_epi8(
|
||||
k: __mmask16,
|
||||
a: __m128i,
|
||||
idx: __m128i,
|
||||
b: __m128i,
|
||||
) -> __m128i {
|
||||
let permute = _mm_permutex2var_epi8(a, idx, b).as_i8x16();
|
||||
transmute(simd_select_bitmask(k, permute, i8x16::ZERO))
|
||||
pub fn _mm_maskz_permutex2var_epi8(k: __mmask16, a: __m128i, idx: __m128i, b: __m128i) -> __m128i {
|
||||
unsafe {
|
||||
let permute = _mm_permutex2var_epi8(a, idx, b).as_i8x16();
|
||||
transmute(simd_select_bitmask(k, permute, i8x16::ZERO))
|
||||
}
|
||||
}
|
||||
|
||||
/// Shuffle 8-bit integers in a and b across lanes using the corresponding selector and index in idx, and store the results in dst using writemask k (elements are copied from a when the corresponding mask bit is not set).
|
||||
|
|
@ -180,14 +186,11 @@ pub unsafe fn _mm_maskz_permutex2var_epi8(
|
|||
#[target_feature(enable = "avx512vbmi,avx512vl")]
|
||||
#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
|
||||
#[cfg_attr(test, assert_instr(vpermi2b))]
|
||||
pub unsafe fn _mm_mask2_permutex2var_epi8(
|
||||
a: __m128i,
|
||||
idx: __m128i,
|
||||
k: __mmask16,
|
||||
b: __m128i,
|
||||
) -> __m128i {
|
||||
let permute = _mm_permutex2var_epi8(a, idx, b).as_i8x16();
|
||||
transmute(simd_select_bitmask(k, permute, idx.as_i8x16()))
|
||||
pub fn _mm_mask2_permutex2var_epi8(a: __m128i, idx: __m128i, k: __mmask16, b: __m128i) -> __m128i {
|
||||
unsafe {
|
||||
let permute = _mm_permutex2var_epi8(a, idx, b).as_i8x16();
|
||||
transmute(simd_select_bitmask(k, permute, idx.as_i8x16()))
|
||||
}
|
||||
}
|
||||
|
||||
/// Shuffle 8-bit integers in a across lanes using the corresponding index in idx, and store the results in dst.
|
||||
|
|
@ -197,8 +200,8 @@ pub unsafe fn _mm_mask2_permutex2var_epi8(
|
|||
#[target_feature(enable = "avx512vbmi")]
|
||||
#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
|
||||
#[cfg_attr(test, assert_instr(vpermb))]
|
||||
pub unsafe fn _mm512_permutexvar_epi8(idx: __m512i, a: __m512i) -> __m512i {
|
||||
transmute(vpermb(a.as_i8x64(), idx.as_i8x64()))
|
||||
pub fn _mm512_permutexvar_epi8(idx: __m512i, a: __m512i) -> __m512i {
|
||||
unsafe { transmute(vpermb(a.as_i8x64(), idx.as_i8x64())) }
|
||||
}
|
||||
|
||||
/// Shuffle 8-bit integers in a across lanes using the corresponding index in idx, and store the results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set).
|
||||
|
|
@ -208,14 +211,16 @@ pub unsafe fn _mm512_permutexvar_epi8(idx: __m512i, a: __m512i) -> __m512i {
|
|||
#[target_feature(enable = "avx512vbmi")]
|
||||
#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
|
||||
#[cfg_attr(test, assert_instr(vpermb))]
|
||||
pub unsafe fn _mm512_mask_permutexvar_epi8(
|
||||
pub fn _mm512_mask_permutexvar_epi8(
|
||||
src: __m512i,
|
||||
k: __mmask64,
|
||||
idx: __m512i,
|
||||
a: __m512i,
|
||||
) -> __m512i {
|
||||
let permute = _mm512_permutexvar_epi8(idx, a).as_i8x64();
|
||||
transmute(simd_select_bitmask(k, permute, src.as_i8x64()))
|
||||
unsafe {
|
||||
let permute = _mm512_permutexvar_epi8(idx, a).as_i8x64();
|
||||
transmute(simd_select_bitmask(k, permute, src.as_i8x64()))
|
||||
}
|
||||
}
|
||||
|
||||
/// Shuffle 8-bit integers in a across lanes using the corresponding index in idx, and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
|
||||
|
|
@ -225,9 +230,11 @@ pub unsafe fn _mm512_mask_permutexvar_epi8(
|
|||
#[target_feature(enable = "avx512vbmi")]
|
||||
#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
|
||||
#[cfg_attr(test, assert_instr(vpermb))]
|
||||
pub unsafe fn _mm512_maskz_permutexvar_epi8(k: __mmask64, idx: __m512i, a: __m512i) -> __m512i {
|
||||
let permute = _mm512_permutexvar_epi8(idx, a).as_i8x64();
|
||||
transmute(simd_select_bitmask(k, permute, i8x64::ZERO))
|
||||
pub fn _mm512_maskz_permutexvar_epi8(k: __mmask64, idx: __m512i, a: __m512i) -> __m512i {
|
||||
unsafe {
|
||||
let permute = _mm512_permutexvar_epi8(idx, a).as_i8x64();
|
||||
transmute(simd_select_bitmask(k, permute, i8x64::ZERO))
|
||||
}
|
||||
}
|
||||
|
||||
/// Shuffle 8-bit integers in a across lanes using the corresponding index in idx, and store the results in dst.
|
||||
|
|
@ -237,8 +244,8 @@ pub unsafe fn _mm512_maskz_permutexvar_epi8(k: __mmask64, idx: __m512i, a: __m51
|
|||
#[target_feature(enable = "avx512vbmi,avx512vl")]
|
||||
#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
|
||||
#[cfg_attr(test, assert_instr(vpermb))]
|
||||
pub unsafe fn _mm256_permutexvar_epi8(idx: __m256i, a: __m256i) -> __m256i {
|
||||
transmute(vpermb256(a.as_i8x32(), idx.as_i8x32()))
|
||||
pub fn _mm256_permutexvar_epi8(idx: __m256i, a: __m256i) -> __m256i {
|
||||
unsafe { transmute(vpermb256(a.as_i8x32(), idx.as_i8x32())) }
|
||||
}
|
||||
|
||||
/// Shuffle 8-bit integers in a across lanes using the corresponding index in idx, and store the results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set).
|
||||
|
|
@ -248,14 +255,16 @@ pub unsafe fn _mm256_permutexvar_epi8(idx: __m256i, a: __m256i) -> __m256i {
|
|||
#[target_feature(enable = "avx512vbmi,avx512vl")]
|
||||
#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
|
||||
#[cfg_attr(test, assert_instr(vpermb))]
|
||||
pub unsafe fn _mm256_mask_permutexvar_epi8(
|
||||
pub fn _mm256_mask_permutexvar_epi8(
|
||||
src: __m256i,
|
||||
k: __mmask32,
|
||||
idx: __m256i,
|
||||
a: __m256i,
|
||||
) -> __m256i {
|
||||
let permute = _mm256_permutexvar_epi8(idx, a).as_i8x32();
|
||||
transmute(simd_select_bitmask(k, permute, src.as_i8x32()))
|
||||
unsafe {
|
||||
let permute = _mm256_permutexvar_epi8(idx, a).as_i8x32();
|
||||
transmute(simd_select_bitmask(k, permute, src.as_i8x32()))
|
||||
}
|
||||
}
|
||||
|
||||
/// Shuffle 8-bit integers in a across lanes using the corresponding index in idx, and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
|
||||
|
|
@ -265,9 +274,11 @@ pub unsafe fn _mm256_mask_permutexvar_epi8(
|
|||
#[target_feature(enable = "avx512vbmi,avx512vl")]
|
||||
#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
|
||||
#[cfg_attr(test, assert_instr(vpermb))]
|
||||
pub unsafe fn _mm256_maskz_permutexvar_epi8(k: __mmask32, idx: __m256i, a: __m256i) -> __m256i {
|
||||
let permute = _mm256_permutexvar_epi8(idx, a).as_i8x32();
|
||||
transmute(simd_select_bitmask(k, permute, i8x32::ZERO))
|
||||
pub fn _mm256_maskz_permutexvar_epi8(k: __mmask32, idx: __m256i, a: __m256i) -> __m256i {
|
||||
unsafe {
|
||||
let permute = _mm256_permutexvar_epi8(idx, a).as_i8x32();
|
||||
transmute(simd_select_bitmask(k, permute, i8x32::ZERO))
|
||||
}
|
||||
}
|
||||
|
||||
/// Shuffle 8-bit integers in a across lanes using the corresponding index in idx, and store the results in dst.
|
||||
|
|
@ -277,8 +288,8 @@ pub unsafe fn _mm256_maskz_permutexvar_epi8(k: __mmask32, idx: __m256i, a: __m25
|
|||
#[target_feature(enable = "avx512vbmi,avx512vl")]
|
||||
#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
|
||||
#[cfg_attr(test, assert_instr(vpermb))]
|
||||
pub unsafe fn _mm_permutexvar_epi8(idx: __m128i, a: __m128i) -> __m128i {
|
||||
transmute(vpermb128(a.as_i8x16(), idx.as_i8x16()))
|
||||
pub fn _mm_permutexvar_epi8(idx: __m128i, a: __m128i) -> __m128i {
|
||||
unsafe { transmute(vpermb128(a.as_i8x16(), idx.as_i8x16())) }
|
||||
}
|
||||
|
||||
/// Shuffle 8-bit integers in a across lanes using the corresponding index in idx, and store the results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set).
|
||||
|
|
@ -288,14 +299,11 @@ pub unsafe fn _mm_permutexvar_epi8(idx: __m128i, a: __m128i) -> __m128i {
|
|||
#[target_feature(enable = "avx512vbmi,avx512vl")]
|
||||
#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
|
||||
#[cfg_attr(test, assert_instr(vpermb))]
|
||||
pub unsafe fn _mm_mask_permutexvar_epi8(
|
||||
src: __m128i,
|
||||
k: __mmask16,
|
||||
idx: __m128i,
|
||||
a: __m128i,
|
||||
) -> __m128i {
|
||||
let permute = _mm_permutexvar_epi8(idx, a).as_i8x16();
|
||||
transmute(simd_select_bitmask(k, permute, src.as_i8x16()))
|
||||
pub fn _mm_mask_permutexvar_epi8(src: __m128i, k: __mmask16, idx: __m128i, a: __m128i) -> __m128i {
|
||||
unsafe {
|
||||
let permute = _mm_permutexvar_epi8(idx, a).as_i8x16();
|
||||
transmute(simd_select_bitmask(k, permute, src.as_i8x16()))
|
||||
}
|
||||
}
|
||||
|
||||
/// Shuffle 8-bit integers in a across lanes using the corresponding index in idx, and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
|
||||
|
|
@ -305,9 +313,11 @@ pub unsafe fn _mm_mask_permutexvar_epi8(
|
|||
#[target_feature(enable = "avx512vbmi,avx512vl")]
|
||||
#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
|
||||
#[cfg_attr(test, assert_instr(vpermb))]
|
||||
pub unsafe fn _mm_maskz_permutexvar_epi8(k: __mmask16, idx: __m128i, a: __m128i) -> __m128i {
|
||||
let permute = _mm_permutexvar_epi8(idx, a).as_i8x16();
|
||||
transmute(simd_select_bitmask(k, permute, i8x16::ZERO))
|
||||
pub fn _mm_maskz_permutexvar_epi8(k: __mmask16, idx: __m128i, a: __m128i) -> __m128i {
|
||||
unsafe {
|
||||
let permute = _mm_permutexvar_epi8(idx, a).as_i8x16();
|
||||
transmute(simd_select_bitmask(k, permute, i8x16::ZERO))
|
||||
}
|
||||
}
|
||||
|
||||
/// For each 64-bit element in b, select 8 unaligned bytes using a byte-granular shift control within the corresponding 64-bit element of a, and store the 8 assembled bytes to the corresponding 64-bit element of dst.
|
||||
|
|
@ -317,8 +327,8 @@ pub unsafe fn _mm_maskz_permutexvar_epi8(k: __mmask16, idx: __m128i, a: __m128i)
|
|||
#[target_feature(enable = "avx512vbmi")]
|
||||
#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
|
||||
#[cfg_attr(test, assert_instr(vpmultishiftqb))]
|
||||
pub unsafe fn _mm512_multishift_epi64_epi8(a: __m512i, b: __m512i) -> __m512i {
|
||||
transmute(vpmultishiftqb(a.as_i8x64(), b.as_i8x64()))
|
||||
pub fn _mm512_multishift_epi64_epi8(a: __m512i, b: __m512i) -> __m512i {
|
||||
unsafe { transmute(vpmultishiftqb(a.as_i8x64(), b.as_i8x64())) }
|
||||
}
|
||||
|
||||
/// For each 64-bit element in b, select 8 unaligned bytes using a byte-granular shift control within the corresponding 64-bit element of a, and store the 8 assembled bytes to the corresponding 64-bit element of dst using writemask k (elements are copied from src when the corresponding mask bit is not set).
|
||||
|
|
@ -328,14 +338,16 @@ pub unsafe fn _mm512_multishift_epi64_epi8(a: __m512i, b: __m512i) -> __m512i {
|
|||
#[target_feature(enable = "avx512vbmi")]
|
||||
#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
|
||||
#[cfg_attr(test, assert_instr(vpmultishiftqb))]
|
||||
pub unsafe fn _mm512_mask_multishift_epi64_epi8(
|
||||
pub fn _mm512_mask_multishift_epi64_epi8(
|
||||
src: __m512i,
|
||||
k: __mmask64,
|
||||
a: __m512i,
|
||||
b: __m512i,
|
||||
) -> __m512i {
|
||||
let multishift = _mm512_multishift_epi64_epi8(a, b).as_i8x64();
|
||||
transmute(simd_select_bitmask(k, multishift, src.as_i8x64()))
|
||||
unsafe {
|
||||
let multishift = _mm512_multishift_epi64_epi8(a, b).as_i8x64();
|
||||
transmute(simd_select_bitmask(k, multishift, src.as_i8x64()))
|
||||
}
|
||||
}
|
||||
|
||||
/// For each 64-bit element in b, select 8 unaligned bytes using a byte-granular shift control within the corresponding 64-bit element of a, and store the 8 assembled bytes to the corresponding 64-bit element of dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
|
||||
|
|
@ -345,9 +357,11 @@ pub unsafe fn _mm512_mask_multishift_epi64_epi8(
|
|||
#[target_feature(enable = "avx512vbmi")]
|
||||
#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
|
||||
#[cfg_attr(test, assert_instr(vpmultishiftqb))]
|
||||
pub unsafe fn _mm512_maskz_multishift_epi64_epi8(k: __mmask64, a: __m512i, b: __m512i) -> __m512i {
|
||||
let multishift = _mm512_multishift_epi64_epi8(a, b).as_i8x64();
|
||||
transmute(simd_select_bitmask(k, multishift, i8x64::ZERO))
|
||||
pub fn _mm512_maskz_multishift_epi64_epi8(k: __mmask64, a: __m512i, b: __m512i) -> __m512i {
|
||||
unsafe {
|
||||
let multishift = _mm512_multishift_epi64_epi8(a, b).as_i8x64();
|
||||
transmute(simd_select_bitmask(k, multishift, i8x64::ZERO))
|
||||
}
|
||||
}
|
||||
|
||||
/// For each 64-bit element in b, select 8 unaligned bytes using a byte-granular shift control within the corresponding 64-bit element of a, and store the 8 assembled bytes to the corresponding 64-bit element of dst.
|
||||
|
|
@ -357,8 +371,8 @@ pub unsafe fn _mm512_maskz_multishift_epi64_epi8(k: __mmask64, a: __m512i, b: __
|
|||
#[target_feature(enable = "avx512vbmi,avx512vl")]
|
||||
#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
|
||||
#[cfg_attr(test, assert_instr(vpmultishiftqb))]
|
||||
pub unsafe fn _mm256_multishift_epi64_epi8(a: __m256i, b: __m256i) -> __m256i {
|
||||
transmute(vpmultishiftqb256(a.as_i8x32(), b.as_i8x32()))
|
||||
pub fn _mm256_multishift_epi64_epi8(a: __m256i, b: __m256i) -> __m256i {
|
||||
unsafe { transmute(vpmultishiftqb256(a.as_i8x32(), b.as_i8x32())) }
|
||||
}
|
||||
|
||||
/// For each 64-bit element in b, select 8 unaligned bytes using a byte-granular shift control within the corresponding 64-bit element of a, and store the 8 assembled bytes to the corresponding 64-bit element of dst using writemask k (elements are copied from src when the corresponding mask bit is not set).
|
||||
|
|
@ -368,14 +382,16 @@ pub unsafe fn _mm256_multishift_epi64_epi8(a: __m256i, b: __m256i) -> __m256i {
|
|||
#[target_feature(enable = "avx512vbmi,avx512vl")]
|
||||
#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
|
||||
#[cfg_attr(test, assert_instr(vpmultishiftqb))]
|
||||
pub unsafe fn _mm256_mask_multishift_epi64_epi8(
|
||||
pub fn _mm256_mask_multishift_epi64_epi8(
|
||||
src: __m256i,
|
||||
k: __mmask32,
|
||||
a: __m256i,
|
||||
b: __m256i,
|
||||
) -> __m256i {
|
||||
let multishift = _mm256_multishift_epi64_epi8(a, b).as_i8x32();
|
||||
transmute(simd_select_bitmask(k, multishift, src.as_i8x32()))
|
||||
unsafe {
|
||||
let multishift = _mm256_multishift_epi64_epi8(a, b).as_i8x32();
|
||||
transmute(simd_select_bitmask(k, multishift, src.as_i8x32()))
|
||||
}
|
||||
}
|
||||
|
||||
/// For each 64-bit element in b, select 8 unaligned bytes using a byte-granular shift control within the corresponding 64-bit element of a, and store the 8 assembled bytes to the corresponding 64-bit element of dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
|
||||
|
|
@ -385,9 +401,11 @@ pub unsafe fn _mm256_mask_multishift_epi64_epi8(
|
|||
#[target_feature(enable = "avx512vbmi,avx512vl")]
|
||||
#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
|
||||
#[cfg_attr(test, assert_instr(vpmultishiftqb))]
|
||||
pub unsafe fn _mm256_maskz_multishift_epi64_epi8(k: __mmask32, a: __m256i, b: __m256i) -> __m256i {
|
||||
let multishift = _mm256_multishift_epi64_epi8(a, b).as_i8x32();
|
||||
transmute(simd_select_bitmask(k, multishift, i8x32::ZERO))
|
||||
pub fn _mm256_maskz_multishift_epi64_epi8(k: __mmask32, a: __m256i, b: __m256i) -> __m256i {
|
||||
unsafe {
|
||||
let multishift = _mm256_multishift_epi64_epi8(a, b).as_i8x32();
|
||||
transmute(simd_select_bitmask(k, multishift, i8x32::ZERO))
|
||||
}
|
||||
}
|
||||
|
||||
/// For each 64-bit element in b, select 8 unaligned bytes using a byte-granular shift control within the corresponding 64-bit element of a, and store the 8 assembled bytes to the corresponding 64-bit element of dst.
|
||||
|
|
@ -397,8 +415,8 @@ pub unsafe fn _mm256_maskz_multishift_epi64_epi8(k: __mmask32, a: __m256i, b: __
|
|||
#[target_feature(enable = "avx512vbmi,avx512vl")]
|
||||
#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
|
||||
#[cfg_attr(test, assert_instr(vpmultishiftqb))]
|
||||
pub unsafe fn _mm_multishift_epi64_epi8(a: __m128i, b: __m128i) -> __m128i {
|
||||
transmute(vpmultishiftqb128(a.as_i8x16(), b.as_i8x16()))
|
||||
pub fn _mm_multishift_epi64_epi8(a: __m128i, b: __m128i) -> __m128i {
|
||||
unsafe { transmute(vpmultishiftqb128(a.as_i8x16(), b.as_i8x16())) }
|
||||
}
|
||||
|
||||
/// For each 64-bit element in b, select 8 unaligned bytes using a byte-granular shift control within the corresponding 64-bit element of a, and store the 8 assembled bytes to the corresponding 64-bit element of dst using writemask k (elements are copied from src when the corresponding mask bit is not set).
|
||||
|
|
@ -408,14 +426,16 @@ pub unsafe fn _mm_multishift_epi64_epi8(a: __m128i, b: __m128i) -> __m128i {
|
|||
#[target_feature(enable = "avx512vbmi,avx512vl")]
|
||||
#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
|
||||
#[cfg_attr(test, assert_instr(vpmultishiftqb))]
|
||||
pub unsafe fn _mm_mask_multishift_epi64_epi8(
|
||||
pub fn _mm_mask_multishift_epi64_epi8(
|
||||
src: __m128i,
|
||||
k: __mmask16,
|
||||
a: __m128i,
|
||||
b: __m128i,
|
||||
) -> __m128i {
|
||||
let multishift = _mm_multishift_epi64_epi8(a, b).as_i8x16();
|
||||
transmute(simd_select_bitmask(k, multishift, src.as_i8x16()))
|
||||
unsafe {
|
||||
let multishift = _mm_multishift_epi64_epi8(a, b).as_i8x16();
|
||||
transmute(simd_select_bitmask(k, multishift, src.as_i8x16()))
|
||||
}
|
||||
}
|
||||
|
||||
/// For each 64-bit element in b, select 8 unaligned bytes using a byte-granular shift control within the corresponding 64-bit element of a, and store the 8 assembled bytes to the corresponding 64-bit element of dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
|
||||
|
|
@ -425,9 +445,11 @@ pub unsafe fn _mm_mask_multishift_epi64_epi8(
|
|||
#[target_feature(enable = "avx512vbmi,avx512vl")]
|
||||
#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
|
||||
#[cfg_attr(test, assert_instr(vpmultishiftqb))]
|
||||
pub unsafe fn _mm_maskz_multishift_epi64_epi8(k: __mmask16, a: __m128i, b: __m128i) -> __m128i {
|
||||
let multishift = _mm_multishift_epi64_epi8(a, b).as_i8x16();
|
||||
transmute(simd_select_bitmask(k, multishift, i8x16::ZERO))
|
||||
pub fn _mm_maskz_multishift_epi64_epi8(k: __mmask16, a: __m128i, b: __m128i) -> __m128i {
|
||||
unsafe {
|
||||
let multishift = _mm_multishift_epi64_epi8(a, b).as_i8x16();
|
||||
transmute(simd_select_bitmask(k, multishift, i8x16::ZERO))
|
||||
}
|
||||
}
|
||||
|
||||
#[allow(improper_ctypes)]
|
||||
|
|
|
|||
File diff suppressed because it is too large
Load diff
|
|
@ -11,8 +11,8 @@ use stdarch_test::assert_instr;
|
|||
#[target_feature(enable = "avx512vnni")]
|
||||
#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
|
||||
#[cfg_attr(test, assert_instr(vpdpwssd))]
|
||||
pub unsafe fn _mm512_dpwssd_epi32(src: __m512i, a: __m512i, b: __m512i) -> __m512i {
|
||||
transmute(vpdpwssd(src.as_i32x16(), a.as_i32x16(), b.as_i32x16()))
|
||||
pub fn _mm512_dpwssd_epi32(src: __m512i, a: __m512i, b: __m512i) -> __m512i {
|
||||
unsafe { transmute(vpdpwssd(src.as_i32x16(), a.as_i32x16(), b.as_i32x16())) }
|
||||
}
|
||||
|
||||
/// Multiply groups of 2 adjacent pairs of signed 16-bit integers in a with corresponding 16-bit integers in b, producing 2 intermediate signed 32-bit results. Sum these 2 results with the corresponding 32-bit integer in src, and store the packed 32-bit results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set).
|
||||
|
|
@ -22,14 +22,11 @@ pub unsafe fn _mm512_dpwssd_epi32(src: __m512i, a: __m512i, b: __m512i) -> __m51
|
|||
#[target_feature(enable = "avx512vnni")]
|
||||
#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
|
||||
#[cfg_attr(test, assert_instr(vpdpwssd))]
|
||||
pub unsafe fn _mm512_mask_dpwssd_epi32(
|
||||
src: __m512i,
|
||||
k: __mmask16,
|
||||
a: __m512i,
|
||||
b: __m512i,
|
||||
) -> __m512i {
|
||||
let r = _mm512_dpwssd_epi32(src, a, b).as_i32x16();
|
||||
transmute(simd_select_bitmask(k, r, src.as_i32x16()))
|
||||
pub fn _mm512_mask_dpwssd_epi32(src: __m512i, k: __mmask16, a: __m512i, b: __m512i) -> __m512i {
|
||||
unsafe {
|
||||
let r = _mm512_dpwssd_epi32(src, a, b).as_i32x16();
|
||||
transmute(simd_select_bitmask(k, r, src.as_i32x16()))
|
||||
}
|
||||
}
|
||||
|
||||
/// Multiply groups of 2 adjacent pairs of signed 16-bit integers in a with corresponding 16-bit integers in b, producing 2 intermediate signed 32-bit results. Sum these 2 results with the corresponding 32-bit integer in src, and store the packed 32-bit results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
|
||||
|
|
@ -39,14 +36,11 @@ pub unsafe fn _mm512_mask_dpwssd_epi32(
|
|||
#[target_feature(enable = "avx512vnni")]
|
||||
#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
|
||||
#[cfg_attr(test, assert_instr(vpdpwssd))]
|
||||
pub unsafe fn _mm512_maskz_dpwssd_epi32(
|
||||
k: __mmask16,
|
||||
src: __m512i,
|
||||
a: __m512i,
|
||||
b: __m512i,
|
||||
) -> __m512i {
|
||||
let r = _mm512_dpwssd_epi32(src, a, b).as_i32x16();
|
||||
transmute(simd_select_bitmask(k, r, i32x16::ZERO))
|
||||
pub fn _mm512_maskz_dpwssd_epi32(k: __mmask16, src: __m512i, a: __m512i, b: __m512i) -> __m512i {
|
||||
unsafe {
|
||||
let r = _mm512_dpwssd_epi32(src, a, b).as_i32x16();
|
||||
transmute(simd_select_bitmask(k, r, i32x16::ZERO))
|
||||
}
|
||||
}
|
||||
|
||||
/// Multiply groups of 2 adjacent pairs of signed 16-bit integers in a with corresponding 16-bit integers in b, producing 2 intermediate signed 32-bit results. Sum these 2 results with the corresponding 32-bit integer in src, and store the packed 32-bit results in dst.
|
||||
|
|
@ -59,8 +53,8 @@ pub unsafe fn _mm512_maskz_dpwssd_epi32(
|
|||
all(test, any(target_os = "linux", target_env = "msvc")),
|
||||
assert_instr(vpdpwssd)
|
||||
)]
|
||||
pub unsafe fn _mm256_dpwssd_avx_epi32(src: __m256i, a: __m256i, b: __m256i) -> __m256i {
|
||||
transmute(vpdpwssd256(src.as_i32x8(), a.as_i32x8(), b.as_i32x8()))
|
||||
pub fn _mm256_dpwssd_avx_epi32(src: __m256i, a: __m256i, b: __m256i) -> __m256i {
|
||||
unsafe { transmute(vpdpwssd256(src.as_i32x8(), a.as_i32x8(), b.as_i32x8())) }
|
||||
}
|
||||
|
||||
/// Multiply groups of 2 adjacent pairs of signed 16-bit integers in a with corresponding 16-bit integers in b, producing 2 intermediate signed 32-bit results. Sum these 2 results with the corresponding 32-bit integer in src, and store the packed 32-bit results in dst.
|
||||
|
|
@ -70,8 +64,8 @@ pub unsafe fn _mm256_dpwssd_avx_epi32(src: __m256i, a: __m256i, b: __m256i) -> _
|
|||
#[target_feature(enable = "avx512vnni,avx512vl")]
|
||||
#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
|
||||
#[cfg_attr(test, assert_instr(vpdpwssd))]
|
||||
pub unsafe fn _mm256_dpwssd_epi32(src: __m256i, a: __m256i, b: __m256i) -> __m256i {
|
||||
transmute(vpdpwssd256(src.as_i32x8(), a.as_i32x8(), b.as_i32x8()))
|
||||
pub fn _mm256_dpwssd_epi32(src: __m256i, a: __m256i, b: __m256i) -> __m256i {
|
||||
unsafe { transmute(vpdpwssd256(src.as_i32x8(), a.as_i32x8(), b.as_i32x8())) }
|
||||
}
|
||||
|
||||
/// Multiply groups of 2 adjacent pairs of signed 16-bit integers in a with corresponding 16-bit integers in b, producing 2 intermediate signed 32-bit results. Sum these 2 results with the corresponding 32-bit integer in src, and store the packed 32-bit results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set).
|
||||
|
|
@ -81,14 +75,11 @@ pub unsafe fn _mm256_dpwssd_epi32(src: __m256i, a: __m256i, b: __m256i) -> __m25
|
|||
#[target_feature(enable = "avx512vnni,avx512vl")]
|
||||
#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
|
||||
#[cfg_attr(test, assert_instr(vpdpwssd))]
|
||||
pub unsafe fn _mm256_mask_dpwssd_epi32(
|
||||
src: __m256i,
|
||||
k: __mmask8,
|
||||
a: __m256i,
|
||||
b: __m256i,
|
||||
) -> __m256i {
|
||||
let r = _mm256_dpwssd_epi32(src, a, b).as_i32x8();
|
||||
transmute(simd_select_bitmask(k, r, src.as_i32x8()))
|
||||
pub fn _mm256_mask_dpwssd_epi32(src: __m256i, k: __mmask8, a: __m256i, b: __m256i) -> __m256i {
|
||||
unsafe {
|
||||
let r = _mm256_dpwssd_epi32(src, a, b).as_i32x8();
|
||||
transmute(simd_select_bitmask(k, r, src.as_i32x8()))
|
||||
}
|
||||
}
|
||||
|
||||
/// Multiply groups of 2 adjacent pairs of signed 16-bit integers in a with corresponding 16-bit integers in b, producing 2 intermediate signed 32-bit results. Sum these 2 results with the corresponding 32-bit integer in src, and store the packed 32-bit results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
|
||||
|
|
@ -98,14 +89,11 @@ pub unsafe fn _mm256_mask_dpwssd_epi32(
|
|||
#[target_feature(enable = "avx512vnni,avx512vl")]
|
||||
#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
|
||||
#[cfg_attr(test, assert_instr(vpdpwssd))]
|
||||
pub unsafe fn _mm256_maskz_dpwssd_epi32(
|
||||
k: __mmask8,
|
||||
src: __m256i,
|
||||
a: __m256i,
|
||||
b: __m256i,
|
||||
) -> __m256i {
|
||||
let r = _mm256_dpwssd_epi32(src, a, b).as_i32x8();
|
||||
transmute(simd_select_bitmask(k, r, i32x8::ZERO))
|
||||
pub fn _mm256_maskz_dpwssd_epi32(k: __mmask8, src: __m256i, a: __m256i, b: __m256i) -> __m256i {
|
||||
unsafe {
|
||||
let r = _mm256_dpwssd_epi32(src, a, b).as_i32x8();
|
||||
transmute(simd_select_bitmask(k, r, i32x8::ZERO))
|
||||
}
|
||||
}
|
||||
|
||||
/// Multiply groups of 2 adjacent pairs of signed 16-bit integers in a with corresponding 16-bit integers in b, producing 2 intermediate signed 32-bit results. Sum these 2 results with the corresponding 32-bit integer in src, and store the packed 32-bit results in dst.
|
||||
|
|
@ -118,8 +106,8 @@ pub unsafe fn _mm256_maskz_dpwssd_epi32(
|
|||
all(test, any(target_os = "linux", target_env = "msvc")),
|
||||
assert_instr(vpdpwssd)
|
||||
)]
|
||||
pub unsafe fn _mm_dpwssd_avx_epi32(src: __m128i, a: __m128i, b: __m128i) -> __m128i {
|
||||
transmute(vpdpwssd128(src.as_i32x4(), a.as_i32x4(), b.as_i32x4()))
|
||||
pub fn _mm_dpwssd_avx_epi32(src: __m128i, a: __m128i, b: __m128i) -> __m128i {
|
||||
unsafe { transmute(vpdpwssd128(src.as_i32x4(), a.as_i32x4(), b.as_i32x4())) }
|
||||
}
|
||||
|
||||
/// Multiply groups of 2 adjacent pairs of signed 16-bit integers in a with corresponding 16-bit integers in b, producing 2 intermediate signed 32-bit results. Sum these 2 results with the corresponding 32-bit integer in src, and store the packed 32-bit results in dst.
|
||||
|
|
@ -129,8 +117,8 @@ pub unsafe fn _mm_dpwssd_avx_epi32(src: __m128i, a: __m128i, b: __m128i) -> __m1
|
|||
#[target_feature(enable = "avx512vnni,avx512vl")]
|
||||
#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
|
||||
#[cfg_attr(test, assert_instr(vpdpwssd))]
|
||||
pub unsafe fn _mm_dpwssd_epi32(src: __m128i, a: __m128i, b: __m128i) -> __m128i {
|
||||
transmute(vpdpwssd128(src.as_i32x4(), a.as_i32x4(), b.as_i32x4()))
|
||||
pub fn _mm_dpwssd_epi32(src: __m128i, a: __m128i, b: __m128i) -> __m128i {
|
||||
unsafe { transmute(vpdpwssd128(src.as_i32x4(), a.as_i32x4(), b.as_i32x4())) }
|
||||
}
|
||||
|
||||
/// Multiply groups of 2 adjacent pairs of signed 16-bit integers in a with corresponding 16-bit integers in b, producing 2 intermediate signed 32-bit results. Sum these 2 results with the corresponding 32-bit integer in src, and store the packed 32-bit results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set).
|
||||
|
|
@ -140,9 +128,11 @@ pub unsafe fn _mm_dpwssd_epi32(src: __m128i, a: __m128i, b: __m128i) -> __m128i
|
|||
#[target_feature(enable = "avx512vnni,avx512vl")]
|
||||
#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
|
||||
#[cfg_attr(test, assert_instr(vpdpwssd))]
|
||||
pub unsafe fn _mm_mask_dpwssd_epi32(src: __m128i, k: __mmask8, a: __m128i, b: __m128i) -> __m128i {
|
||||
let r = _mm_dpwssd_epi32(src, a, b).as_i32x4();
|
||||
transmute(simd_select_bitmask(k, r, src.as_i32x4()))
|
||||
pub fn _mm_mask_dpwssd_epi32(src: __m128i, k: __mmask8, a: __m128i, b: __m128i) -> __m128i {
|
||||
unsafe {
|
||||
let r = _mm_dpwssd_epi32(src, a, b).as_i32x4();
|
||||
transmute(simd_select_bitmask(k, r, src.as_i32x4()))
|
||||
}
|
||||
}
|
||||
|
||||
/// Multiply groups of 2 adjacent pairs of signed 16-bit integers in a with corresponding 16-bit integers in b, producing 2 intermediate signed 32-bit results. Sum these 2 results with the corresponding 32-bit integer in src, and store the packed 32-bit results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
|
||||
|
|
@ -152,9 +142,11 @@ pub unsafe fn _mm_mask_dpwssd_epi32(src: __m128i, k: __mmask8, a: __m128i, b: __
|
|||
#[target_feature(enable = "avx512vnni,avx512vl")]
|
||||
#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
|
||||
#[cfg_attr(test, assert_instr(vpdpwssd))]
|
||||
pub unsafe fn _mm_maskz_dpwssd_epi32(k: __mmask8, src: __m128i, a: __m128i, b: __m128i) -> __m128i {
|
||||
let r = _mm_dpwssd_epi32(src, a, b).as_i32x4();
|
||||
transmute(simd_select_bitmask(k, r, i32x4::ZERO))
|
||||
pub fn _mm_maskz_dpwssd_epi32(k: __mmask8, src: __m128i, a: __m128i, b: __m128i) -> __m128i {
|
||||
unsafe {
|
||||
let r = _mm_dpwssd_epi32(src, a, b).as_i32x4();
|
||||
transmute(simd_select_bitmask(k, r, i32x4::ZERO))
|
||||
}
|
||||
}
|
||||
|
||||
/// Multiply groups of 2 adjacent pairs of signed 16-bit integers in a with corresponding 16-bit integers in b, producing 2 intermediate signed 32-bit results. Sum these 2 results with the corresponding 32-bit integer in src using signed saturation, and store the packed 32-bit results in dst.
|
||||
|
|
@ -164,8 +156,8 @@ pub unsafe fn _mm_maskz_dpwssd_epi32(k: __mmask8, src: __m128i, a: __m128i, b: _
|
|||
#[target_feature(enable = "avx512vnni")]
|
||||
#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
|
||||
#[cfg_attr(test, assert_instr(vpdpwssds))]
|
||||
pub unsafe fn _mm512_dpwssds_epi32(src: __m512i, a: __m512i, b: __m512i) -> __m512i {
|
||||
transmute(vpdpwssds(src.as_i32x16(), a.as_i32x16(), b.as_i32x16()))
|
||||
pub fn _mm512_dpwssds_epi32(src: __m512i, a: __m512i, b: __m512i) -> __m512i {
|
||||
unsafe { transmute(vpdpwssds(src.as_i32x16(), a.as_i32x16(), b.as_i32x16())) }
|
||||
}
|
||||
|
||||
/// Multiply groups of 2 adjacent pairs of signed 16-bit integers in a with corresponding 16-bit integers in b, producing 2 intermediate signed 32-bit results. Sum these 2 results with the corresponding 32-bit integer in src using signed saturation, and store the packed 32-bit results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set).
|
||||
|
|
@ -175,14 +167,11 @@ pub unsafe fn _mm512_dpwssds_epi32(src: __m512i, a: __m512i, b: __m512i) -> __m5
|
|||
#[target_feature(enable = "avx512vnni")]
|
||||
#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
|
||||
#[cfg_attr(test, assert_instr(vpdpwssds))]
|
||||
pub unsafe fn _mm512_mask_dpwssds_epi32(
|
||||
src: __m512i,
|
||||
k: __mmask16,
|
||||
a: __m512i,
|
||||
b: __m512i,
|
||||
) -> __m512i {
|
||||
let r = _mm512_dpwssds_epi32(src, a, b).as_i32x16();
|
||||
transmute(simd_select_bitmask(k, r, src.as_i32x16()))
|
||||
pub fn _mm512_mask_dpwssds_epi32(src: __m512i, k: __mmask16, a: __m512i, b: __m512i) -> __m512i {
|
||||
unsafe {
|
||||
let r = _mm512_dpwssds_epi32(src, a, b).as_i32x16();
|
||||
transmute(simd_select_bitmask(k, r, src.as_i32x16()))
|
||||
}
|
||||
}
|
||||
|
||||
/// Multiply groups of 2 adjacent pairs of signed 16-bit integers in a with corresponding 16-bit integers in b, producing 2 intermediate signed 32-bit results. Sum these 2 results with the corresponding 32-bit integer in src using signed saturation, and store the packed 32-bit results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
|
||||
|
|
@ -192,14 +181,11 @@ pub unsafe fn _mm512_mask_dpwssds_epi32(
|
|||
#[target_feature(enable = "avx512vnni")]
|
||||
#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
|
||||
#[cfg_attr(test, assert_instr(vpdpwssds))]
|
||||
pub unsafe fn _mm512_maskz_dpwssds_epi32(
|
||||
k: __mmask16,
|
||||
src: __m512i,
|
||||
a: __m512i,
|
||||
b: __m512i,
|
||||
) -> __m512i {
|
||||
let r = _mm512_dpwssds_epi32(src, a, b).as_i32x16();
|
||||
transmute(simd_select_bitmask(k, r, i32x16::ZERO))
|
||||
pub fn _mm512_maskz_dpwssds_epi32(k: __mmask16, src: __m512i, a: __m512i, b: __m512i) -> __m512i {
|
||||
unsafe {
|
||||
let r = _mm512_dpwssds_epi32(src, a, b).as_i32x16();
|
||||
transmute(simd_select_bitmask(k, r, i32x16::ZERO))
|
||||
}
|
||||
}
|
||||
|
||||
/// Multiply groups of 2 adjacent pairs of signed 16-bit integers in a with corresponding 16-bit integers in b, producing 2 intermediate signed 32-bit results. Sum these 2 results with the corresponding 32-bit integer in src using signed saturation, and store the packed 32-bit results in dst.
|
||||
|
|
@ -212,8 +198,8 @@ pub unsafe fn _mm512_maskz_dpwssds_epi32(
|
|||
all(test, any(target_os = "linux", target_env = "msvc")),
|
||||
assert_instr(vpdpwssds)
|
||||
)]
|
||||
pub unsafe fn _mm256_dpwssds_avx_epi32(src: __m256i, a: __m256i, b: __m256i) -> __m256i {
|
||||
transmute(vpdpwssds256(src.as_i32x8(), a.as_i32x8(), b.as_i32x8()))
|
||||
pub fn _mm256_dpwssds_avx_epi32(src: __m256i, a: __m256i, b: __m256i) -> __m256i {
|
||||
unsafe { transmute(vpdpwssds256(src.as_i32x8(), a.as_i32x8(), b.as_i32x8())) }
|
||||
}
|
||||
|
||||
/// Multiply groups of 2 adjacent pairs of signed 16-bit integers in a with corresponding 16-bit integers in b, producing 2 intermediate signed 32-bit results. Sum these 2 results with the corresponding 32-bit integer in src using signed saturation, and store the packed 32-bit results in dst.
|
||||
|
|
@ -223,8 +209,8 @@ pub unsafe fn _mm256_dpwssds_avx_epi32(src: __m256i, a: __m256i, b: __m256i) ->
|
|||
#[target_feature(enable = "avx512vnni,avx512vl")]
|
||||
#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
|
||||
#[cfg_attr(test, assert_instr(vpdpwssds))]
|
||||
pub unsafe fn _mm256_dpwssds_epi32(src: __m256i, a: __m256i, b: __m256i) -> __m256i {
|
||||
transmute(vpdpwssds256(src.as_i32x8(), a.as_i32x8(), b.as_i32x8()))
|
||||
pub fn _mm256_dpwssds_epi32(src: __m256i, a: __m256i, b: __m256i) -> __m256i {
|
||||
unsafe { transmute(vpdpwssds256(src.as_i32x8(), a.as_i32x8(), b.as_i32x8())) }
|
||||
}
|
||||
|
||||
/// Multiply groups of 2 adjacent pairs of signed 16-bit integers in a with corresponding 16-bit integers in b, producing 2 intermediate signed 32-bit results. Sum these 2 results with the corresponding 32-bit integer in src using signed saturation, and store the packed 32-bit results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set).
|
||||
|
|
@ -234,14 +220,11 @@ pub unsafe fn _mm256_dpwssds_epi32(src: __m256i, a: __m256i, b: __m256i) -> __m2
|
|||
#[target_feature(enable = "avx512vnni,avx512vl")]
|
||||
#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
|
||||
#[cfg_attr(test, assert_instr(vpdpwssds))]
|
||||
pub unsafe fn _mm256_mask_dpwssds_epi32(
|
||||
src: __m256i,
|
||||
k: __mmask8,
|
||||
a: __m256i,
|
||||
b: __m256i,
|
||||
) -> __m256i {
|
||||
let r = _mm256_dpwssds_epi32(src, a, b).as_i32x8();
|
||||
transmute(simd_select_bitmask(k, r, src.as_i32x8()))
|
||||
pub fn _mm256_mask_dpwssds_epi32(src: __m256i, k: __mmask8, a: __m256i, b: __m256i) -> __m256i {
|
||||
unsafe {
|
||||
let r = _mm256_dpwssds_epi32(src, a, b).as_i32x8();
|
||||
transmute(simd_select_bitmask(k, r, src.as_i32x8()))
|
||||
}
|
||||
}
|
||||
|
||||
/// Multiply groups of 2 adjacent pairs of signed 16-bit integers in a with corresponding 16-bit integers in b, producing 2 intermediate signed 32-bit results. Sum these 2 results with the corresponding 32-bit integer in src using signed saturation, and store the packed 32-bit results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
|
||||
|
|
@ -251,14 +234,11 @@ pub unsafe fn _mm256_mask_dpwssds_epi32(
|
|||
#[target_feature(enable = "avx512vnni,avx512vl")]
|
||||
#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
|
||||
#[cfg_attr(test, assert_instr(vpdpwssds))]
|
||||
pub unsafe fn _mm256_maskz_dpwssds_epi32(
|
||||
k: __mmask8,
|
||||
src: __m256i,
|
||||
a: __m256i,
|
||||
b: __m256i,
|
||||
) -> __m256i {
|
||||
let r = _mm256_dpwssds_epi32(src, a, b).as_i32x8();
|
||||
transmute(simd_select_bitmask(k, r, i32x8::ZERO))
|
||||
pub fn _mm256_maskz_dpwssds_epi32(k: __mmask8, src: __m256i, a: __m256i, b: __m256i) -> __m256i {
|
||||
unsafe {
|
||||
let r = _mm256_dpwssds_epi32(src, a, b).as_i32x8();
|
||||
transmute(simd_select_bitmask(k, r, i32x8::ZERO))
|
||||
}
|
||||
}
|
||||
|
||||
/// Multiply groups of 2 adjacent pairs of signed 16-bit integers in a with corresponding 16-bit integers in b, producing 2 intermediate signed 32-bit results. Sum these 2 results with the corresponding 32-bit integer in src using signed saturation, and store the packed 32-bit results in dst.
|
||||
|
|
@ -271,8 +251,8 @@ pub unsafe fn _mm256_maskz_dpwssds_epi32(
|
|||
all(test, any(target_os = "linux", target_env = "msvc")),
|
||||
assert_instr(vpdpwssds)
|
||||
)]
|
||||
pub unsafe fn _mm_dpwssds_avx_epi32(src: __m128i, a: __m128i, b: __m128i) -> __m128i {
|
||||
transmute(vpdpwssds128(src.as_i32x4(), a.as_i32x4(), b.as_i32x4()))
|
||||
pub fn _mm_dpwssds_avx_epi32(src: __m128i, a: __m128i, b: __m128i) -> __m128i {
|
||||
unsafe { transmute(vpdpwssds128(src.as_i32x4(), a.as_i32x4(), b.as_i32x4())) }
|
||||
}
|
||||
|
||||
/// Multiply groups of 2 adjacent pairs of signed 16-bit integers in a with corresponding 16-bit integers in b, producing 2 intermediate signed 32-bit results. Sum these 2 results with the corresponding 32-bit integer in src using signed saturation, and store the packed 32-bit results in dst.
|
||||
|
|
@ -282,8 +262,8 @@ pub unsafe fn _mm_dpwssds_avx_epi32(src: __m128i, a: __m128i, b: __m128i) -> __m
|
|||
#[target_feature(enable = "avx512vnni,avx512vl")]
|
||||
#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
|
||||
#[cfg_attr(test, assert_instr(vpdpwssds))]
|
||||
pub unsafe fn _mm_dpwssds_epi32(src: __m128i, a: __m128i, b: __m128i) -> __m128i {
|
||||
transmute(vpdpwssds128(src.as_i32x4(), a.as_i32x4(), b.as_i32x4()))
|
||||
pub fn _mm_dpwssds_epi32(src: __m128i, a: __m128i, b: __m128i) -> __m128i {
|
||||
unsafe { transmute(vpdpwssds128(src.as_i32x4(), a.as_i32x4(), b.as_i32x4())) }
|
||||
}
|
||||
|
||||
/// Multiply groups of 2 adjacent pairs of signed 16-bit integers in a with corresponding 16-bit integers in b, producing 2 intermediate signed 32-bit results. Sum these 2 results with the corresponding 32-bit integer in src using signed saturation, and store the packed 32-bit results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set).
|
||||
|
|
@ -293,9 +273,11 @@ pub unsafe fn _mm_dpwssds_epi32(src: __m128i, a: __m128i, b: __m128i) -> __m128i
|
|||
#[target_feature(enable = "avx512vnni,avx512vl")]
|
||||
#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
|
||||
#[cfg_attr(test, assert_instr(vpdpwssds))]
|
||||
pub unsafe fn _mm_mask_dpwssds_epi32(src: __m128i, k: __mmask8, a: __m128i, b: __m128i) -> __m128i {
|
||||
let r = _mm_dpwssds_epi32(src, a, b).as_i32x4();
|
||||
transmute(simd_select_bitmask(k, r, src.as_i32x4()))
|
||||
pub fn _mm_mask_dpwssds_epi32(src: __m128i, k: __mmask8, a: __m128i, b: __m128i) -> __m128i {
|
||||
unsafe {
|
||||
let r = _mm_dpwssds_epi32(src, a, b).as_i32x4();
|
||||
transmute(simd_select_bitmask(k, r, src.as_i32x4()))
|
||||
}
|
||||
}
|
||||
|
||||
/// Multiply groups of 2 adjacent pairs of signed 16-bit integers in a with corresponding 16-bit integers in b, producing 2 intermediate signed 32-bit results. Sum these 2 results with the corresponding 32-bit integer in src using signed saturation, and store the packed 32-bit results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
|
||||
|
|
@ -305,14 +287,11 @@ pub unsafe fn _mm_mask_dpwssds_epi32(src: __m128i, k: __mmask8, a: __m128i, b: _
|
|||
#[target_feature(enable = "avx512vnni,avx512vl")]
|
||||
#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
|
||||
#[cfg_attr(test, assert_instr(vpdpwssds))]
|
||||
pub unsafe fn _mm_maskz_dpwssds_epi32(
|
||||
k: __mmask8,
|
||||
src: __m128i,
|
||||
a: __m128i,
|
||||
b: __m128i,
|
||||
) -> __m128i {
|
||||
let r = _mm_dpwssds_epi32(src, a, b).as_i32x4();
|
||||
transmute(simd_select_bitmask(k, r, i32x4::ZERO))
|
||||
pub fn _mm_maskz_dpwssds_epi32(k: __mmask8, src: __m128i, a: __m128i, b: __m128i) -> __m128i {
|
||||
unsafe {
|
||||
let r = _mm_dpwssds_epi32(src, a, b).as_i32x4();
|
||||
transmute(simd_select_bitmask(k, r, i32x4::ZERO))
|
||||
}
|
||||
}
|
||||
|
||||
/// Multiply groups of 4 adjacent pairs of unsigned 8-bit integers in a with corresponding signed 8-bit integers in b, producing 4 intermediate signed 16-bit results. Sum these 4 results with the corresponding 32-bit integer in src, and store the packed 32-bit results in dst.
|
||||
|
|
@ -322,8 +301,8 @@ pub unsafe fn _mm_maskz_dpwssds_epi32(
|
|||
#[target_feature(enable = "avx512vnni")]
|
||||
#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
|
||||
#[cfg_attr(test, assert_instr(vpdpbusd))]
|
||||
pub unsafe fn _mm512_dpbusd_epi32(src: __m512i, a: __m512i, b: __m512i) -> __m512i {
|
||||
transmute(vpdpbusd(src.as_i32x16(), a.as_i32x16(), b.as_i32x16()))
|
||||
pub fn _mm512_dpbusd_epi32(src: __m512i, a: __m512i, b: __m512i) -> __m512i {
|
||||
unsafe { transmute(vpdpbusd(src.as_i32x16(), a.as_i32x16(), b.as_i32x16())) }
|
||||
}
|
||||
|
||||
/// Multiply groups of 4 adjacent pairs of unsigned 8-bit integers in a with corresponding signed 8-bit integers in b, producing 4 intermediate signed 16-bit results. Sum these 4 results with the corresponding 32-bit integer in src, and store the packed 32-bit results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set).
|
||||
|
|
@ -333,14 +312,11 @@ pub unsafe fn _mm512_dpbusd_epi32(src: __m512i, a: __m512i, b: __m512i) -> __m51
|
|||
#[target_feature(enable = "avx512vnni")]
|
||||
#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
|
||||
#[cfg_attr(test, assert_instr(vpdpbusd))]
|
||||
pub unsafe fn _mm512_mask_dpbusd_epi32(
|
||||
src: __m512i,
|
||||
k: __mmask16,
|
||||
a: __m512i,
|
||||
b: __m512i,
|
||||
) -> __m512i {
|
||||
let r = _mm512_dpbusd_epi32(src, a, b).as_i32x16();
|
||||
transmute(simd_select_bitmask(k, r, src.as_i32x16()))
|
||||
pub fn _mm512_mask_dpbusd_epi32(src: __m512i, k: __mmask16, a: __m512i, b: __m512i) -> __m512i {
|
||||
unsafe {
|
||||
let r = _mm512_dpbusd_epi32(src, a, b).as_i32x16();
|
||||
transmute(simd_select_bitmask(k, r, src.as_i32x16()))
|
||||
}
|
||||
}
|
||||
|
||||
/// Multiply groups of 4 adjacent pairs of unsigned 8-bit integers in a with corresponding signed 8-bit integers in b, producing 4 intermediate signed 16-bit results. Sum these 4 results with the corresponding 32-bit integer in src, and store the packed 32-bit results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
|
||||
|
|
@ -350,14 +326,11 @@ pub unsafe fn _mm512_mask_dpbusd_epi32(
|
|||
#[target_feature(enable = "avx512vnni")]
|
||||
#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
|
||||
#[cfg_attr(test, assert_instr(vpdpbusd))]
|
||||
pub unsafe fn _mm512_maskz_dpbusd_epi32(
|
||||
k: __mmask16,
|
||||
src: __m512i,
|
||||
a: __m512i,
|
||||
b: __m512i,
|
||||
) -> __m512i {
|
||||
let r = _mm512_dpbusd_epi32(src, a, b).as_i32x16();
|
||||
transmute(simd_select_bitmask(k, r, i32x16::ZERO))
|
||||
pub fn _mm512_maskz_dpbusd_epi32(k: __mmask16, src: __m512i, a: __m512i, b: __m512i) -> __m512i {
|
||||
unsafe {
|
||||
let r = _mm512_dpbusd_epi32(src, a, b).as_i32x16();
|
||||
transmute(simd_select_bitmask(k, r, i32x16::ZERO))
|
||||
}
|
||||
}
|
||||
|
||||
/// Multiply groups of 4 adjacent pairs of unsigned 8-bit integers in a with corresponding signed 8-bit integers in b, producing 4 intermediate signed 16-bit results. Sum these 4 results with the corresponding 32-bit integer in src, and store the packed 32-bit results in dst.
|
||||
|
|
@ -370,8 +343,8 @@ pub unsafe fn _mm512_maskz_dpbusd_epi32(
|
|||
all(test, any(target_os = "linux", target_env = "msvc")),
|
||||
assert_instr(vpdpbusd)
|
||||
)]
|
||||
pub unsafe fn _mm256_dpbusd_avx_epi32(src: __m256i, a: __m256i, b: __m256i) -> __m256i {
|
||||
transmute(vpdpbusd256(src.as_i32x8(), a.as_i32x8(), b.as_i32x8()))
|
||||
pub fn _mm256_dpbusd_avx_epi32(src: __m256i, a: __m256i, b: __m256i) -> __m256i {
|
||||
unsafe { transmute(vpdpbusd256(src.as_i32x8(), a.as_i32x8(), b.as_i32x8())) }
|
||||
}
|
||||
|
||||
/// Multiply groups of 4 adjacent pairs of unsigned 8-bit integers in a with corresponding signed 8-bit integers in b, producing 4 intermediate signed 16-bit results. Sum these 4 results with the corresponding 32-bit integer in src, and store the packed 32-bit results in dst.
|
||||
|
|
@ -381,8 +354,8 @@ pub unsafe fn _mm256_dpbusd_avx_epi32(src: __m256i, a: __m256i, b: __m256i) -> _
|
|||
#[target_feature(enable = "avx512vnni,avx512vl")]
|
||||
#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
|
||||
#[cfg_attr(test, assert_instr(vpdpbusd))]
|
||||
pub unsafe fn _mm256_dpbusd_epi32(src: __m256i, a: __m256i, b: __m256i) -> __m256i {
|
||||
transmute(vpdpbusd256(src.as_i32x8(), a.as_i32x8(), b.as_i32x8()))
|
||||
pub fn _mm256_dpbusd_epi32(src: __m256i, a: __m256i, b: __m256i) -> __m256i {
|
||||
unsafe { transmute(vpdpbusd256(src.as_i32x8(), a.as_i32x8(), b.as_i32x8())) }
|
||||
}
|
||||
|
||||
/// Multiply groups of 4 adjacent pairs of unsigned 8-bit integers in a with corresponding signed 8-bit integers in b, producing 4 intermediate signed 16-bit results. Sum these 4 results with the corresponding 32-bit integer in src, and store the packed 32-bit results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set).
|
||||
|
|
@ -392,14 +365,11 @@ pub unsafe fn _mm256_dpbusd_epi32(src: __m256i, a: __m256i, b: __m256i) -> __m25
|
|||
#[target_feature(enable = "avx512vnni,avx512vl")]
|
||||
#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
|
||||
#[cfg_attr(test, assert_instr(vpdpbusd))]
|
||||
pub unsafe fn _mm256_mask_dpbusd_epi32(
|
||||
src: __m256i,
|
||||
k: __mmask8,
|
||||
a: __m256i,
|
||||
b: __m256i,
|
||||
) -> __m256i {
|
||||
let r = _mm256_dpbusd_epi32(src, a, b).as_i32x8();
|
||||
transmute(simd_select_bitmask(k, r, src.as_i32x8()))
|
||||
pub fn _mm256_mask_dpbusd_epi32(src: __m256i, k: __mmask8, a: __m256i, b: __m256i) -> __m256i {
|
||||
unsafe {
|
||||
let r = _mm256_dpbusd_epi32(src, a, b).as_i32x8();
|
||||
transmute(simd_select_bitmask(k, r, src.as_i32x8()))
|
||||
}
|
||||
}
|
||||
|
||||
/// Multiply groups of 4 adjacent pairs of unsigned 8-bit integers in a with corresponding signed 8-bit integers in b, producing 4 intermediate signed 16-bit results. Sum these 4 results with the corresponding 32-bit integer in src, and store the packed 32-bit results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
|
||||
|
|
@ -409,14 +379,11 @@ pub unsafe fn _mm256_mask_dpbusd_epi32(
|
|||
#[target_feature(enable = "avx512vnni,avx512vl")]
|
||||
#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
|
||||
#[cfg_attr(test, assert_instr(vpdpbusd))]
|
||||
pub unsafe fn _mm256_maskz_dpbusd_epi32(
|
||||
k: __mmask8,
|
||||
src: __m256i,
|
||||
a: __m256i,
|
||||
b: __m256i,
|
||||
) -> __m256i {
|
||||
let r = _mm256_dpbusd_epi32(src, a, b).as_i32x8();
|
||||
transmute(simd_select_bitmask(k, r, i32x8::ZERO))
|
||||
pub fn _mm256_maskz_dpbusd_epi32(k: __mmask8, src: __m256i, a: __m256i, b: __m256i) -> __m256i {
|
||||
unsafe {
|
||||
let r = _mm256_dpbusd_epi32(src, a, b).as_i32x8();
|
||||
transmute(simd_select_bitmask(k, r, i32x8::ZERO))
|
||||
}
|
||||
}
|
||||
|
||||
/// Multiply groups of 4 adjacent pairs of unsigned 8-bit integers in a with corresponding signed 8-bit integers in b, producing 4 intermediate signed 16-bit results. Sum these 4 results with the corresponding 32-bit integer in src, and store the packed 32-bit results in dst.
|
||||
|
|
@ -429,8 +396,8 @@ pub unsafe fn _mm256_maskz_dpbusd_epi32(
|
|||
all(test, any(target_os = "linux", target_env = "msvc")),
|
||||
assert_instr(vpdpbusd)
|
||||
)]
|
||||
pub unsafe fn _mm_dpbusd_avx_epi32(src: __m128i, a: __m128i, b: __m128i) -> __m128i {
|
||||
transmute(vpdpbusd128(src.as_i32x4(), a.as_i32x4(), b.as_i32x4()))
|
||||
pub fn _mm_dpbusd_avx_epi32(src: __m128i, a: __m128i, b: __m128i) -> __m128i {
|
||||
unsafe { transmute(vpdpbusd128(src.as_i32x4(), a.as_i32x4(), b.as_i32x4())) }
|
||||
}
|
||||
|
||||
/// Multiply groups of 4 adjacent pairs of unsigned 8-bit integers in a with corresponding signed 8-bit integers in b, producing 4 intermediate signed 16-bit results. Sum these 4 results with the corresponding 32-bit integer in src, and store the packed 32-bit results in dst.
|
||||
|
|
@ -440,8 +407,8 @@ pub unsafe fn _mm_dpbusd_avx_epi32(src: __m128i, a: __m128i, b: __m128i) -> __m1
|
|||
#[target_feature(enable = "avx512vnni,avx512vl")]
|
||||
#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
|
||||
#[cfg_attr(test, assert_instr(vpdpbusd))]
|
||||
pub unsafe fn _mm_dpbusd_epi32(src: __m128i, a: __m128i, b: __m128i) -> __m128i {
|
||||
transmute(vpdpbusd128(src.as_i32x4(), a.as_i32x4(), b.as_i32x4()))
|
||||
pub fn _mm_dpbusd_epi32(src: __m128i, a: __m128i, b: __m128i) -> __m128i {
|
||||
unsafe { transmute(vpdpbusd128(src.as_i32x4(), a.as_i32x4(), b.as_i32x4())) }
|
||||
}
|
||||
|
||||
/// Multiply groups of 4 adjacent pairs of unsigned 8-bit integers in a with corresponding signed 8-bit integers in b, producing 4 intermediate signed 16-bit results. Sum these 4 results with the corresponding 32-bit integer in src, and store the packed 32-bit results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set).
|
||||
|
|
@ -451,9 +418,11 @@ pub unsafe fn _mm_dpbusd_epi32(src: __m128i, a: __m128i, b: __m128i) -> __m128i
|
|||
#[target_feature(enable = "avx512vnni,avx512vl")]
|
||||
#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
|
||||
#[cfg_attr(test, assert_instr(vpdpbusd))]
|
||||
pub unsafe fn _mm_mask_dpbusd_epi32(src: __m128i, k: __mmask8, a: __m128i, b: __m128i) -> __m128i {
|
||||
let r = _mm_dpbusd_epi32(src, a, b).as_i32x4();
|
||||
transmute(simd_select_bitmask(k, r, src.as_i32x4()))
|
||||
pub fn _mm_mask_dpbusd_epi32(src: __m128i, k: __mmask8, a: __m128i, b: __m128i) -> __m128i {
|
||||
unsafe {
|
||||
let r = _mm_dpbusd_epi32(src, a, b).as_i32x4();
|
||||
transmute(simd_select_bitmask(k, r, src.as_i32x4()))
|
||||
}
|
||||
}
|
||||
|
||||
/// Multiply groups of 4 adjacent pairs of unsigned 8-bit integers in a with corresponding signed 8-bit integers in b, producing 4 intermediate signed 16-bit results. Sum these 4 results with the corresponding 32-bit integer in src, and store the packed 32-bit results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
|
||||
|
|
@ -463,9 +432,11 @@ pub unsafe fn _mm_mask_dpbusd_epi32(src: __m128i, k: __mmask8, a: __m128i, b: __
|
|||
#[target_feature(enable = "avx512vnni,avx512vl")]
|
||||
#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
|
||||
#[cfg_attr(test, assert_instr(vpdpbusd))]
|
||||
pub unsafe fn _mm_maskz_dpbusd_epi32(k: __mmask8, src: __m128i, a: __m128i, b: __m128i) -> __m128i {
|
||||
let r = _mm_dpbusd_epi32(src, a, b).as_i32x4();
|
||||
transmute(simd_select_bitmask(k, r, i32x4::ZERO))
|
||||
pub fn _mm_maskz_dpbusd_epi32(k: __mmask8, src: __m128i, a: __m128i, b: __m128i) -> __m128i {
|
||||
unsafe {
|
||||
let r = _mm_dpbusd_epi32(src, a, b).as_i32x4();
|
||||
transmute(simd_select_bitmask(k, r, i32x4::ZERO))
|
||||
}
|
||||
}
|
||||
|
||||
/// Multiply groups of 4 adjacent pairs of unsigned 8-bit integers in a with corresponding signed 8-bit integers in b, producing 4 intermediate signed 16-bit results. Sum these 4 results with the corresponding 32-bit integer in src using signed saturation, and store the packed 32-bit results in dst.
|
||||
|
|
@ -475,8 +446,8 @@ pub unsafe fn _mm_maskz_dpbusd_epi32(k: __mmask8, src: __m128i, a: __m128i, b: _
|
|||
#[target_feature(enable = "avx512vnni")]
|
||||
#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
|
||||
#[cfg_attr(test, assert_instr(vpdpbusds))]
|
||||
pub unsafe fn _mm512_dpbusds_epi32(src: __m512i, a: __m512i, b: __m512i) -> __m512i {
|
||||
transmute(vpdpbusds(src.as_i32x16(), a.as_i32x16(), b.as_i32x16()))
|
||||
pub fn _mm512_dpbusds_epi32(src: __m512i, a: __m512i, b: __m512i) -> __m512i {
|
||||
unsafe { transmute(vpdpbusds(src.as_i32x16(), a.as_i32x16(), b.as_i32x16())) }
|
||||
}
|
||||
|
||||
/// Multiply groups of 4 adjacent pairs of unsigned 8-bit integers in a with corresponding signed 8-bit integers in b, producing 4 intermediate signed 16-bit results. Sum these 4 results with the corresponding 32-bit integer in src using signed saturation, and store the packed 32-bit results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set).
|
||||
|
|
@ -486,14 +457,11 @@ pub unsafe fn _mm512_dpbusds_epi32(src: __m512i, a: __m512i, b: __m512i) -> __m5
|
|||
#[target_feature(enable = "avx512vnni")]
|
||||
#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
|
||||
#[cfg_attr(test, assert_instr(vpdpbusds))]
|
||||
pub unsafe fn _mm512_mask_dpbusds_epi32(
|
||||
src: __m512i,
|
||||
k: __mmask16,
|
||||
a: __m512i,
|
||||
b: __m512i,
|
||||
) -> __m512i {
|
||||
let r = _mm512_dpbusds_epi32(src, a, b).as_i32x16();
|
||||
transmute(simd_select_bitmask(k, r, src.as_i32x16()))
|
||||
pub fn _mm512_mask_dpbusds_epi32(src: __m512i, k: __mmask16, a: __m512i, b: __m512i) -> __m512i {
|
||||
unsafe {
|
||||
let r = _mm512_dpbusds_epi32(src, a, b).as_i32x16();
|
||||
transmute(simd_select_bitmask(k, r, src.as_i32x16()))
|
||||
}
|
||||
}
|
||||
|
||||
/// Multiply groups of 4 adjacent pairs of unsigned 8-bit integers in a with corresponding signed 8-bit integers in b, producing 4 intermediate signed 16-bit results. Sum these 4 results with the corresponding 32-bit integer in src using signed saturation, and store the packed 32-bit results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
|
||||
|
|
@ -503,14 +471,11 @@ pub unsafe fn _mm512_mask_dpbusds_epi32(
|
|||
#[target_feature(enable = "avx512vnni")]
|
||||
#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
|
||||
#[cfg_attr(test, assert_instr(vpdpbusds))]
|
||||
pub unsafe fn _mm512_maskz_dpbusds_epi32(
|
||||
k: __mmask16,
|
||||
src: __m512i,
|
||||
a: __m512i,
|
||||
b: __m512i,
|
||||
) -> __m512i {
|
||||
let r = _mm512_dpbusds_epi32(src, a, b).as_i32x16();
|
||||
transmute(simd_select_bitmask(k, r, i32x16::ZERO))
|
||||
pub fn _mm512_maskz_dpbusds_epi32(k: __mmask16, src: __m512i, a: __m512i, b: __m512i) -> __m512i {
|
||||
unsafe {
|
||||
let r = _mm512_dpbusds_epi32(src, a, b).as_i32x16();
|
||||
transmute(simd_select_bitmask(k, r, i32x16::ZERO))
|
||||
}
|
||||
}
|
||||
|
||||
/// Multiply groups of 4 adjacent pairs of unsigned 8-bit integers in a with corresponding signed 8-bit integers in b, producing 4 intermediate signed 16-bit results. Sum these 4 results with the corresponding 32-bit integer in src using signed saturation, and store the packed 32-bit results in dst.
|
||||
|
|
@ -523,8 +488,8 @@ pub unsafe fn _mm512_maskz_dpbusds_epi32(
|
|||
all(test, any(target_os = "linux", target_env = "msvc")),
|
||||
assert_instr(vpdpbusds)
|
||||
)]
|
||||
pub unsafe fn _mm256_dpbusds_avx_epi32(src: __m256i, a: __m256i, b: __m256i) -> __m256i {
|
||||
transmute(vpdpbusds256(src.as_i32x8(), a.as_i32x8(), b.as_i32x8()))
|
||||
pub fn _mm256_dpbusds_avx_epi32(src: __m256i, a: __m256i, b: __m256i) -> __m256i {
|
||||
unsafe { transmute(vpdpbusds256(src.as_i32x8(), a.as_i32x8(), b.as_i32x8())) }
|
||||
}
|
||||
|
||||
/// Multiply groups of 4 adjacent pairs of unsigned 8-bit integers in a with corresponding signed 8-bit integers in b, producing 4 intermediate signed 16-bit results. Sum these 4 results with the corresponding 32-bit integer in src using signed saturation, and store the packed 32-bit results in dst.
|
||||
|
|
@ -534,8 +499,8 @@ pub unsafe fn _mm256_dpbusds_avx_epi32(src: __m256i, a: __m256i, b: __m256i) ->
|
|||
#[target_feature(enable = "avx512vnni,avx512vl")]
|
||||
#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
|
||||
#[cfg_attr(test, assert_instr(vpdpbusds))]
|
||||
pub unsafe fn _mm256_dpbusds_epi32(src: __m256i, a: __m256i, b: __m256i) -> __m256i {
|
||||
transmute(vpdpbusds256(src.as_i32x8(), a.as_i32x8(), b.as_i32x8()))
|
||||
pub fn _mm256_dpbusds_epi32(src: __m256i, a: __m256i, b: __m256i) -> __m256i {
|
||||
unsafe { transmute(vpdpbusds256(src.as_i32x8(), a.as_i32x8(), b.as_i32x8())) }
|
||||
}
|
||||
|
||||
/// Multiply groups of 4 adjacent pairs of unsigned 8-bit integers in a with corresponding signed 8-bit integers in b, producing 4 intermediate signed 16-bit results. Sum these 4 results with the corresponding 32-bit integer in src using signed saturation, and store the packed 32-bit results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set).
|
||||
|
|
@ -545,14 +510,11 @@ pub unsafe fn _mm256_dpbusds_epi32(src: __m256i, a: __m256i, b: __m256i) -> __m2
|
|||
#[target_feature(enable = "avx512vnni,avx512vl")]
|
||||
#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
|
||||
#[cfg_attr(test, assert_instr(vpdpbusds))]
|
||||
pub unsafe fn _mm256_mask_dpbusds_epi32(
|
||||
src: __m256i,
|
||||
k: __mmask8,
|
||||
a: __m256i,
|
||||
b: __m256i,
|
||||
) -> __m256i {
|
||||
let r = _mm256_dpbusds_epi32(src, a, b).as_i32x8();
|
||||
transmute(simd_select_bitmask(k, r, src.as_i32x8()))
|
||||
pub fn _mm256_mask_dpbusds_epi32(src: __m256i, k: __mmask8, a: __m256i, b: __m256i) -> __m256i {
|
||||
unsafe {
|
||||
let r = _mm256_dpbusds_epi32(src, a, b).as_i32x8();
|
||||
transmute(simd_select_bitmask(k, r, src.as_i32x8()))
|
||||
}
|
||||
}
|
||||
|
||||
/// Multiply groups of 4 adjacent pairs of unsigned 8-bit integers in a with corresponding signed 8-bit integers in b, producing 4 intermediate signed 16-bit results. Sum these 4 results with the corresponding 32-bit integer in src using signed saturation, and store the packed 32-bit results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
|
||||
|
|
@ -562,14 +524,11 @@ pub unsafe fn _mm256_mask_dpbusds_epi32(
|
|||
#[target_feature(enable = "avx512vnni,avx512vl")]
|
||||
#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
|
||||
#[cfg_attr(test, assert_instr(vpdpbusds))]
|
||||
pub unsafe fn _mm256_maskz_dpbusds_epi32(
|
||||
k: __mmask8,
|
||||
src: __m256i,
|
||||
a: __m256i,
|
||||
b: __m256i,
|
||||
) -> __m256i {
|
||||
let r = _mm256_dpbusds_epi32(src, a, b).as_i32x8();
|
||||
transmute(simd_select_bitmask(k, r, i32x8::ZERO))
|
||||
pub fn _mm256_maskz_dpbusds_epi32(k: __mmask8, src: __m256i, a: __m256i, b: __m256i) -> __m256i {
|
||||
unsafe {
|
||||
let r = _mm256_dpbusds_epi32(src, a, b).as_i32x8();
|
||||
transmute(simd_select_bitmask(k, r, i32x8::ZERO))
|
||||
}
|
||||
}
|
||||
|
||||
/// Multiply groups of 4 adjacent pairs of unsigned 8-bit integers in a with corresponding signed 8-bit integers in b, producing 4 intermediate signed 16-bit results. Sum these 4 results with the corresponding 32-bit integer in src using signed saturation, and store the packed 32-bit results in dst.
|
||||
|
|
@ -582,8 +541,8 @@ pub unsafe fn _mm256_maskz_dpbusds_epi32(
|
|||
all(test, any(target_os = "linux", target_env = "msvc")),
|
||||
assert_instr(vpdpbusds)
|
||||
)]
|
||||
pub unsafe fn _mm_dpbusds_avx_epi32(src: __m128i, a: __m128i, b: __m128i) -> __m128i {
|
||||
transmute(vpdpbusds128(src.as_i32x4(), a.as_i32x4(), b.as_i32x4()))
|
||||
pub fn _mm_dpbusds_avx_epi32(src: __m128i, a: __m128i, b: __m128i) -> __m128i {
|
||||
unsafe { transmute(vpdpbusds128(src.as_i32x4(), a.as_i32x4(), b.as_i32x4())) }
|
||||
}
|
||||
|
||||
/// Multiply groups of 4 adjacent pairs of unsigned 8-bit integers in a with corresponding signed 8-bit integers in b, producing 4 intermediate signed 16-bit results. Sum these 4 results with the corresponding 32-bit integer in src using signed saturation, and store the packed 32-bit results in dst.
|
||||
|
|
@ -593,8 +552,8 @@ pub unsafe fn _mm_dpbusds_avx_epi32(src: __m128i, a: __m128i, b: __m128i) -> __m
|
|||
#[target_feature(enable = "avx512vnni,avx512vl")]
|
||||
#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
|
||||
#[cfg_attr(test, assert_instr(vpdpbusds))]
|
||||
pub unsafe fn _mm_dpbusds_epi32(src: __m128i, a: __m128i, b: __m128i) -> __m128i {
|
||||
transmute(vpdpbusds128(src.as_i32x4(), a.as_i32x4(), b.as_i32x4()))
|
||||
pub fn _mm_dpbusds_epi32(src: __m128i, a: __m128i, b: __m128i) -> __m128i {
|
||||
unsafe { transmute(vpdpbusds128(src.as_i32x4(), a.as_i32x4(), b.as_i32x4())) }
|
||||
}
|
||||
|
||||
/// Multiply groups of 4 adjacent pairs of unsigned 8-bit integers in a with corresponding signed 8-bit integers in b, producing 4 intermediate signed 16-bit results. Sum these 4 results with the corresponding 32-bit integer in src using signed saturation, and store the packed 32-bit results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set).
|
||||
|
|
@ -604,9 +563,11 @@ pub unsafe fn _mm_dpbusds_epi32(src: __m128i, a: __m128i, b: __m128i) -> __m128i
|
|||
#[target_feature(enable = "avx512vnni,avx512vl")]
|
||||
#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
|
||||
#[cfg_attr(test, assert_instr(vpdpbusds))]
|
||||
pub unsafe fn _mm_mask_dpbusds_epi32(src: __m128i, k: __mmask8, a: __m128i, b: __m128i) -> __m128i {
|
||||
let r = _mm_dpbusds_epi32(src, a, b).as_i32x4();
|
||||
transmute(simd_select_bitmask(k, r, src.as_i32x4()))
|
||||
pub fn _mm_mask_dpbusds_epi32(src: __m128i, k: __mmask8, a: __m128i, b: __m128i) -> __m128i {
|
||||
unsafe {
|
||||
let r = _mm_dpbusds_epi32(src, a, b).as_i32x4();
|
||||
transmute(simd_select_bitmask(k, r, src.as_i32x4()))
|
||||
}
|
||||
}
|
||||
|
||||
/// Multiply groups of 4 adjacent pairs of unsigned 8-bit integers in a with corresponding signed 8-bit integers in b, producing 4 intermediate signed 16-bit results. Sum these 4 results with the corresponding 32-bit integer in src using signed saturation, and store the packed 32-bit results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
|
||||
|
|
@ -616,14 +577,11 @@ pub unsafe fn _mm_mask_dpbusds_epi32(src: __m128i, k: __mmask8, a: __m128i, b: _
|
|||
#[target_feature(enable = "avx512vnni,avx512vl")]
|
||||
#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
|
||||
#[cfg_attr(test, assert_instr(vpdpbusds))]
|
||||
pub unsafe fn _mm_maskz_dpbusds_epi32(
|
||||
k: __mmask8,
|
||||
src: __m128i,
|
||||
a: __m128i,
|
||||
b: __m128i,
|
||||
) -> __m128i {
|
||||
let r = _mm_dpbusds_epi32(src, a, b).as_i32x4();
|
||||
transmute(simd_select_bitmask(k, r, i32x4::ZERO))
|
||||
pub fn _mm_maskz_dpbusds_epi32(k: __mmask8, src: __m128i, a: __m128i, b: __m128i) -> __m128i {
|
||||
unsafe {
|
||||
let r = _mm_dpbusds_epi32(src, a, b).as_i32x4();
|
||||
transmute(simd_select_bitmask(k, r, i32x4::ZERO))
|
||||
}
|
||||
}
|
||||
|
||||
/// Multiply groups of 4 adjacent pairs of signed 8-bit integers in a with corresponding signed 8-bit
|
||||
|
|
@ -638,8 +596,8 @@ pub unsafe fn _mm_maskz_dpbusds_epi32(
|
|||
assert_instr(vpdpbssd)
|
||||
)]
|
||||
#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
|
||||
pub unsafe fn _mm_dpbssd_epi32(src: __m128i, a: __m128i, b: __m128i) -> __m128i {
|
||||
transmute(vpdpbssd_128(src.as_i32x4(), a.as_i32x4(), b.as_i32x4()))
|
||||
pub fn _mm_dpbssd_epi32(src: __m128i, a: __m128i, b: __m128i) -> __m128i {
|
||||
unsafe { transmute(vpdpbssd_128(src.as_i32x4(), a.as_i32x4(), b.as_i32x4())) }
|
||||
}
|
||||
|
||||
/// Multiply groups of 4 adjacent pairs of signed 8-bit integers in a with corresponding signed 8-bit
|
||||
|
|
@ -654,8 +612,8 @@ pub unsafe fn _mm_dpbssd_epi32(src: __m128i, a: __m128i, b: __m128i) -> __m128i
|
|||
assert_instr(vpdpbssd)
|
||||
)]
|
||||
#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
|
||||
pub unsafe fn _mm256_dpbssd_epi32(src: __m256i, a: __m256i, b: __m256i) -> __m256i {
|
||||
transmute(vpdpbssd_256(src.as_i32x8(), a.as_i32x8(), b.as_i32x8()))
|
||||
pub fn _mm256_dpbssd_epi32(src: __m256i, a: __m256i, b: __m256i) -> __m256i {
|
||||
unsafe { transmute(vpdpbssd_256(src.as_i32x8(), a.as_i32x8(), b.as_i32x8())) }
|
||||
}
|
||||
|
||||
/// Multiply groups of 4 adjacent pairs of signed 8-bit integers in a with corresponding signed 8-bit
|
||||
|
|
@ -670,8 +628,8 @@ pub unsafe fn _mm256_dpbssd_epi32(src: __m256i, a: __m256i, b: __m256i) -> __m25
|
|||
assert_instr(vpdpbssds)
|
||||
)]
|
||||
#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
|
||||
pub unsafe fn _mm_dpbssds_epi32(src: __m128i, a: __m128i, b: __m128i) -> __m128i {
|
||||
transmute(vpdpbssds_128(src.as_i32x4(), a.as_i32x4(), b.as_i32x4()))
|
||||
pub fn _mm_dpbssds_epi32(src: __m128i, a: __m128i, b: __m128i) -> __m128i {
|
||||
unsafe { transmute(vpdpbssds_128(src.as_i32x4(), a.as_i32x4(), b.as_i32x4())) }
|
||||
}
|
||||
|
||||
/// Multiply groups of 4 adjacent pairs of signed 8-bit integers in a with corresponding signed 8-bit
|
||||
|
|
@ -686,8 +644,8 @@ pub unsafe fn _mm_dpbssds_epi32(src: __m128i, a: __m128i, b: __m128i) -> __m128i
|
|||
assert_instr(vpdpbssds)
|
||||
)]
|
||||
#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
|
||||
pub unsafe fn _mm256_dpbssds_epi32(src: __m256i, a: __m256i, b: __m256i) -> __m256i {
|
||||
transmute(vpdpbssds_256(src.as_i32x8(), a.as_i32x8(), b.as_i32x8()))
|
||||
pub fn _mm256_dpbssds_epi32(src: __m256i, a: __m256i, b: __m256i) -> __m256i {
|
||||
unsafe { transmute(vpdpbssds_256(src.as_i32x8(), a.as_i32x8(), b.as_i32x8())) }
|
||||
}
|
||||
|
||||
/// Multiply groups of 4 adjacent pairs of signed 8-bit integers in a with corresponding unsigned 8-bit
|
||||
|
|
@ -702,8 +660,8 @@ pub unsafe fn _mm256_dpbssds_epi32(src: __m256i, a: __m256i, b: __m256i) -> __m2
|
|||
assert_instr(vpdpbsud)
|
||||
)]
|
||||
#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
|
||||
pub unsafe fn _mm_dpbsud_epi32(src: __m128i, a: __m128i, b: __m128i) -> __m128i {
|
||||
transmute(vpdpbsud_128(src.as_i32x4(), a.as_i32x4(), b.as_i32x4()))
|
||||
pub fn _mm_dpbsud_epi32(src: __m128i, a: __m128i, b: __m128i) -> __m128i {
|
||||
unsafe { transmute(vpdpbsud_128(src.as_i32x4(), a.as_i32x4(), b.as_i32x4())) }
|
||||
}
|
||||
|
||||
/// Multiply groups of 4 adjacent pairs of signed 8-bit integers in a with corresponding unsigned 8-bit
|
||||
|
|
@ -718,8 +676,8 @@ pub unsafe fn _mm_dpbsud_epi32(src: __m128i, a: __m128i, b: __m128i) -> __m128i
|
|||
assert_instr(vpdpbsud)
|
||||
)]
|
||||
#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
|
||||
pub unsafe fn _mm256_dpbsud_epi32(src: __m256i, a: __m256i, b: __m256i) -> __m256i {
|
||||
transmute(vpdpbsud_256(src.as_i32x8(), a.as_i32x8(), b.as_i32x8()))
|
||||
pub fn _mm256_dpbsud_epi32(src: __m256i, a: __m256i, b: __m256i) -> __m256i {
|
||||
unsafe { transmute(vpdpbsud_256(src.as_i32x8(), a.as_i32x8(), b.as_i32x8())) }
|
||||
}
|
||||
|
||||
/// Multiply groups of 4 adjacent pairs of signed 8-bit integers in a with corresponding unsigned 8-bit
|
||||
|
|
@ -734,8 +692,8 @@ pub unsafe fn _mm256_dpbsud_epi32(src: __m256i, a: __m256i, b: __m256i) -> __m25
|
|||
assert_instr(vpdpbsuds)
|
||||
)]
|
||||
#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
|
||||
pub unsafe fn _mm_dpbsuds_epi32(src: __m128i, a: __m128i, b: __m128i) -> __m128i {
|
||||
transmute(vpdpbsuds_128(src.as_i32x4(), a.as_i32x4(), b.as_i32x4()))
|
||||
pub fn _mm_dpbsuds_epi32(src: __m128i, a: __m128i, b: __m128i) -> __m128i {
|
||||
unsafe { transmute(vpdpbsuds_128(src.as_i32x4(), a.as_i32x4(), b.as_i32x4())) }
|
||||
}
|
||||
|
||||
/// Multiply groups of 4 adjacent pairs of signed 8-bit integers in a with corresponding unsigned 8-bit
|
||||
|
|
@ -750,8 +708,8 @@ pub unsafe fn _mm_dpbsuds_epi32(src: __m128i, a: __m128i, b: __m128i) -> __m128i
|
|||
assert_instr(vpdpbsuds)
|
||||
)]
|
||||
#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
|
||||
pub unsafe fn _mm256_dpbsuds_epi32(src: __m256i, a: __m256i, b: __m256i) -> __m256i {
|
||||
transmute(vpdpbsuds_256(src.as_i32x8(), a.as_i32x8(), b.as_i32x8()))
|
||||
pub fn _mm256_dpbsuds_epi32(src: __m256i, a: __m256i, b: __m256i) -> __m256i {
|
||||
unsafe { transmute(vpdpbsuds_256(src.as_i32x8(), a.as_i32x8(), b.as_i32x8())) }
|
||||
}
|
||||
|
||||
/// Multiply groups of 4 adjacent pairs of unsigned 8-bit integers in a with corresponding unsigned 8-bit
|
||||
|
|
@ -766,8 +724,8 @@ pub unsafe fn _mm256_dpbsuds_epi32(src: __m256i, a: __m256i, b: __m256i) -> __m2
|
|||
assert_instr(vpdpbuud)
|
||||
)]
|
||||
#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
|
||||
pub unsafe fn _mm_dpbuud_epi32(src: __m128i, a: __m128i, b: __m128i) -> __m128i {
|
||||
transmute(vpdpbuud_128(src.as_i32x4(), a.as_i32x4(), b.as_i32x4()))
|
||||
pub fn _mm_dpbuud_epi32(src: __m128i, a: __m128i, b: __m128i) -> __m128i {
|
||||
unsafe { transmute(vpdpbuud_128(src.as_i32x4(), a.as_i32x4(), b.as_i32x4())) }
|
||||
}
|
||||
|
||||
/// Multiply groups of 4 adjacent pairs of unsigned 8-bit integers in a with corresponding unsigned 8-bit
|
||||
|
|
@ -782,8 +740,8 @@ pub unsafe fn _mm_dpbuud_epi32(src: __m128i, a: __m128i, b: __m128i) -> __m128i
|
|||
assert_instr(vpdpbuud)
|
||||
)]
|
||||
#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
|
||||
pub unsafe fn _mm256_dpbuud_epi32(src: __m256i, a: __m256i, b: __m256i) -> __m256i {
|
||||
transmute(vpdpbuud_256(src.as_i32x8(), a.as_i32x8(), b.as_i32x8()))
|
||||
pub fn _mm256_dpbuud_epi32(src: __m256i, a: __m256i, b: __m256i) -> __m256i {
|
||||
unsafe { transmute(vpdpbuud_256(src.as_i32x8(), a.as_i32x8(), b.as_i32x8())) }
|
||||
}
|
||||
|
||||
/// Multiply groups of 4 adjacent pairs of unsigned 8-bit integers in a with corresponding unsigned 8-bit
|
||||
|
|
@ -798,8 +756,8 @@ pub unsafe fn _mm256_dpbuud_epi32(src: __m256i, a: __m256i, b: __m256i) -> __m25
|
|||
assert_instr(vpdpbuuds)
|
||||
)]
|
||||
#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
|
||||
pub unsafe fn _mm_dpbuuds_epi32(src: __m128i, a: __m128i, b: __m128i) -> __m128i {
|
||||
transmute(vpdpbuuds_128(src.as_i32x4(), a.as_i32x4(), b.as_i32x4()))
|
||||
pub fn _mm_dpbuuds_epi32(src: __m128i, a: __m128i, b: __m128i) -> __m128i {
|
||||
unsafe { transmute(vpdpbuuds_128(src.as_i32x4(), a.as_i32x4(), b.as_i32x4())) }
|
||||
}
|
||||
|
||||
/// Multiply groups of 4 adjacent pairs of unsigned 8-bit integers in a with corresponding unsigned 8-bit
|
||||
|
|
@ -814,8 +772,8 @@ pub unsafe fn _mm_dpbuuds_epi32(src: __m128i, a: __m128i, b: __m128i) -> __m128i
|
|||
assert_instr(vpdpbuuds)
|
||||
)]
|
||||
#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
|
||||
pub unsafe fn _mm256_dpbuuds_epi32(src: __m256i, a: __m256i, b: __m256i) -> __m256i {
|
||||
transmute(vpdpbuuds_256(src.as_i32x8(), a.as_i32x8(), b.as_i32x8()))
|
||||
pub fn _mm256_dpbuuds_epi32(src: __m256i, a: __m256i, b: __m256i) -> __m256i {
|
||||
unsafe { transmute(vpdpbuuds_256(src.as_i32x8(), a.as_i32x8(), b.as_i32x8())) }
|
||||
}
|
||||
|
||||
/// Multiply groups of 2 adjacent pairs of signed 16-bit integers in a with corresponding unsigned 16-bit
|
||||
|
|
@ -830,8 +788,8 @@ pub unsafe fn _mm256_dpbuuds_epi32(src: __m256i, a: __m256i, b: __m256i) -> __m2
|
|||
assert_instr(vpdpwsud)
|
||||
)]
|
||||
#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
|
||||
pub unsafe fn _mm_dpwsud_epi32(src: __m128i, a: __m128i, b: __m128i) -> __m128i {
|
||||
transmute(vpdpwsud_128(src.as_i32x4(), a.as_i32x4(), b.as_i32x4()))
|
||||
pub fn _mm_dpwsud_epi32(src: __m128i, a: __m128i, b: __m128i) -> __m128i {
|
||||
unsafe { transmute(vpdpwsud_128(src.as_i32x4(), a.as_i32x4(), b.as_i32x4())) }
|
||||
}
|
||||
|
||||
/// Multiply groups of 2 adjacent pairs of signed 16-bit integers in a with corresponding unsigned 16-bit
|
||||
|
|
@ -846,8 +804,8 @@ pub unsafe fn _mm_dpwsud_epi32(src: __m128i, a: __m128i, b: __m128i) -> __m128i
|
|||
assert_instr(vpdpwsud)
|
||||
)]
|
||||
#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
|
||||
pub unsafe fn _mm256_dpwsud_epi32(src: __m256i, a: __m256i, b: __m256i) -> __m256i {
|
||||
transmute(vpdpwsud_256(src.as_i32x8(), a.as_i32x8(), b.as_i32x8()))
|
||||
pub fn _mm256_dpwsud_epi32(src: __m256i, a: __m256i, b: __m256i) -> __m256i {
|
||||
unsafe { transmute(vpdpwsud_256(src.as_i32x8(), a.as_i32x8(), b.as_i32x8())) }
|
||||
}
|
||||
|
||||
/// Multiply groups of 2 adjacent pairs of signed 16-bit integers in a with corresponding unsigned 16-bit
|
||||
|
|
@ -862,8 +820,8 @@ pub unsafe fn _mm256_dpwsud_epi32(src: __m256i, a: __m256i, b: __m256i) -> __m25
|
|||
assert_instr(vpdpwsuds)
|
||||
)]
|
||||
#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
|
||||
pub unsafe fn _mm_dpwsuds_epi32(src: __m128i, a: __m128i, b: __m128i) -> __m128i {
|
||||
transmute(vpdpwsuds_128(src.as_i32x4(), a.as_i32x4(), b.as_i32x4()))
|
||||
pub fn _mm_dpwsuds_epi32(src: __m128i, a: __m128i, b: __m128i) -> __m128i {
|
||||
unsafe { transmute(vpdpwsuds_128(src.as_i32x4(), a.as_i32x4(), b.as_i32x4())) }
|
||||
}
|
||||
|
||||
/// Multiply groups of 2 adjacent pairs of signed 16-bit integers in a with corresponding unsigned 16-bit
|
||||
|
|
@ -878,8 +836,8 @@ pub unsafe fn _mm_dpwsuds_epi32(src: __m128i, a: __m128i, b: __m128i) -> __m128i
|
|||
assert_instr(vpdpwsuds)
|
||||
)]
|
||||
#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
|
||||
pub unsafe fn _mm256_dpwsuds_epi32(src: __m256i, a: __m256i, b: __m256i) -> __m256i {
|
||||
transmute(vpdpwsuds_256(src.as_i32x8(), a.as_i32x8(), b.as_i32x8()))
|
||||
pub fn _mm256_dpwsuds_epi32(src: __m256i, a: __m256i, b: __m256i) -> __m256i {
|
||||
unsafe { transmute(vpdpwsuds_256(src.as_i32x8(), a.as_i32x8(), b.as_i32x8())) }
|
||||
}
|
||||
|
||||
/// Multiply groups of 2 adjacent pairs of unsigned 16-bit integers in a with corresponding signed 16-bit
|
||||
|
|
@ -894,8 +852,8 @@ pub unsafe fn _mm256_dpwsuds_epi32(src: __m256i, a: __m256i, b: __m256i) -> __m2
|
|||
assert_instr(vpdpwusd)
|
||||
)]
|
||||
#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
|
||||
pub unsafe fn _mm_dpwusd_epi32(src: __m128i, a: __m128i, b: __m128i) -> __m128i {
|
||||
transmute(vpdpwusd_128(src.as_i32x4(), a.as_i32x4(), b.as_i32x4()))
|
||||
pub fn _mm_dpwusd_epi32(src: __m128i, a: __m128i, b: __m128i) -> __m128i {
|
||||
unsafe { transmute(vpdpwusd_128(src.as_i32x4(), a.as_i32x4(), b.as_i32x4())) }
|
||||
}
|
||||
|
||||
/// Multiply groups of 2 adjacent pairs of unsigned 16-bit integers in a with corresponding signed 16-bit
|
||||
|
|
@ -910,8 +868,8 @@ pub unsafe fn _mm_dpwusd_epi32(src: __m128i, a: __m128i, b: __m128i) -> __m128i
|
|||
assert_instr(vpdpwusd)
|
||||
)]
|
||||
#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
|
||||
pub unsafe fn _mm256_dpwusd_epi32(src: __m256i, a: __m256i, b: __m256i) -> __m256i {
|
||||
transmute(vpdpwusd_256(src.as_i32x8(), a.as_i32x8(), b.as_i32x8()))
|
||||
pub fn _mm256_dpwusd_epi32(src: __m256i, a: __m256i, b: __m256i) -> __m256i {
|
||||
unsafe { transmute(vpdpwusd_256(src.as_i32x8(), a.as_i32x8(), b.as_i32x8())) }
|
||||
}
|
||||
|
||||
/// Multiply groups of 2 adjacent pairs of unsigned 16-bit integers in a with corresponding signed 16-bit
|
||||
|
|
@ -926,8 +884,8 @@ pub unsafe fn _mm256_dpwusd_epi32(src: __m256i, a: __m256i, b: __m256i) -> __m25
|
|||
assert_instr(vpdpwusds)
|
||||
)]
|
||||
#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
|
||||
pub unsafe fn _mm_dpwusds_epi32(src: __m128i, a: __m128i, b: __m128i) -> __m128i {
|
||||
transmute(vpdpwusds_128(src.as_i32x4(), a.as_i32x4(), b.as_i32x4()))
|
||||
pub fn _mm_dpwusds_epi32(src: __m128i, a: __m128i, b: __m128i) -> __m128i {
|
||||
unsafe { transmute(vpdpwusds_128(src.as_i32x4(), a.as_i32x4(), b.as_i32x4())) }
|
||||
}
|
||||
|
||||
/// Multiply groups of 2 adjacent pairs of unsigned 16-bit integers in a with corresponding signed 16-bit
|
||||
|
|
@ -942,8 +900,8 @@ pub unsafe fn _mm_dpwusds_epi32(src: __m128i, a: __m128i, b: __m128i) -> __m128i
|
|||
assert_instr(vpdpwusds)
|
||||
)]
|
||||
#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
|
||||
pub unsafe fn _mm256_dpwusds_epi32(src: __m256i, a: __m256i, b: __m256i) -> __m256i {
|
||||
transmute(vpdpwusds_256(src.as_i32x8(), a.as_i32x8(), b.as_i32x8()))
|
||||
pub fn _mm256_dpwusds_epi32(src: __m256i, a: __m256i, b: __m256i) -> __m256i {
|
||||
unsafe { transmute(vpdpwusds_256(src.as_i32x8(), a.as_i32x8(), b.as_i32x8())) }
|
||||
}
|
||||
|
||||
/// Multiply groups of 2 adjacent pairs of unsigned 16-bit integers in a with corresponding unsigned 16-bit
|
||||
|
|
@ -958,8 +916,8 @@ pub unsafe fn _mm256_dpwusds_epi32(src: __m256i, a: __m256i, b: __m256i) -> __m2
|
|||
assert_instr(vpdpwuud)
|
||||
)]
|
||||
#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
|
||||
pub unsafe fn _mm_dpwuud_epi32(src: __m128i, a: __m128i, b: __m128i) -> __m128i {
|
||||
transmute(vpdpwuud_128(src.as_i32x4(), a.as_i32x4(), b.as_i32x4()))
|
||||
pub fn _mm_dpwuud_epi32(src: __m128i, a: __m128i, b: __m128i) -> __m128i {
|
||||
unsafe { transmute(vpdpwuud_128(src.as_i32x4(), a.as_i32x4(), b.as_i32x4())) }
|
||||
}
|
||||
|
||||
/// Multiply groups of 2 adjacent pairs of unsigned 16-bit integers in a with corresponding unsigned 16-bit
|
||||
|
|
@ -974,8 +932,8 @@ pub unsafe fn _mm_dpwuud_epi32(src: __m128i, a: __m128i, b: __m128i) -> __m128i
|
|||
assert_instr(vpdpwuud)
|
||||
)]
|
||||
#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
|
||||
pub unsafe fn _mm256_dpwuud_epi32(src: __m256i, a: __m256i, b: __m256i) -> __m256i {
|
||||
transmute(vpdpwuud_256(src.as_i32x8(), a.as_i32x8(), b.as_i32x8()))
|
||||
pub fn _mm256_dpwuud_epi32(src: __m256i, a: __m256i, b: __m256i) -> __m256i {
|
||||
unsafe { transmute(vpdpwuud_256(src.as_i32x8(), a.as_i32x8(), b.as_i32x8())) }
|
||||
}
|
||||
|
||||
/// Multiply groups of 2 adjacent pairs of unsigned 16-bit integers in a with corresponding unsigned 16-bit
|
||||
|
|
@ -990,8 +948,8 @@ pub unsafe fn _mm256_dpwuud_epi32(src: __m256i, a: __m256i, b: __m256i) -> __m25
|
|||
assert_instr(vpdpwuuds)
|
||||
)]
|
||||
#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
|
||||
pub unsafe fn _mm_dpwuuds_epi32(src: __m128i, a: __m128i, b: __m128i) -> __m128i {
|
||||
transmute(vpdpwuuds_128(src.as_i32x4(), a.as_i32x4(), b.as_i32x4()))
|
||||
pub fn _mm_dpwuuds_epi32(src: __m128i, a: __m128i, b: __m128i) -> __m128i {
|
||||
unsafe { transmute(vpdpwuuds_128(src.as_i32x4(), a.as_i32x4(), b.as_i32x4())) }
|
||||
}
|
||||
|
||||
/// Multiply groups of 2 adjacent pairs of unsigned 16-bit integers in a with corresponding unsigned 16-bit
|
||||
|
|
@ -1006,8 +964,8 @@ pub unsafe fn _mm_dpwuuds_epi32(src: __m128i, a: __m128i, b: __m128i) -> __m128i
|
|||
assert_instr(vpdpwuuds)
|
||||
)]
|
||||
#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
|
||||
pub unsafe fn _mm256_dpwuuds_epi32(src: __m256i, a: __m256i, b: __m256i) -> __m256i {
|
||||
transmute(vpdpwuuds_256(src.as_i32x8(), a.as_i32x8(), b.as_i32x8()))
|
||||
pub fn _mm256_dpwuuds_epi32(src: __m256i, a: __m256i, b: __m256i) -> __m256i {
|
||||
unsafe { transmute(vpdpwuuds_256(src.as_i32x8(), a.as_i32x8(), b.as_i32x8())) }
|
||||
}
|
||||
|
||||
#[allow(improper_ctypes)]
|
||||
|
|
|
|||
|
|
@ -26,8 +26,8 @@ use stdarch_test::assert_instr;
|
|||
#[target_feature(enable = "avx512vpopcntdq")]
|
||||
#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
|
||||
#[cfg_attr(test, assert_instr(vpopcntd))]
|
||||
pub unsafe fn _mm512_popcnt_epi32(a: __m512i) -> __m512i {
|
||||
transmute(simd_ctpop(a.as_i32x16()))
|
||||
pub fn _mm512_popcnt_epi32(a: __m512i) -> __m512i {
|
||||
unsafe { transmute(simd_ctpop(a.as_i32x16())) }
|
||||
}
|
||||
|
||||
/// For each packed 32-bit integer maps the value to the number of logical 1 bits.
|
||||
|
|
@ -40,12 +40,14 @@ pub unsafe fn _mm512_popcnt_epi32(a: __m512i) -> __m512i {
|
|||
#[target_feature(enable = "avx512vpopcntdq")]
|
||||
#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
|
||||
#[cfg_attr(test, assert_instr(vpopcntd))]
|
||||
pub unsafe fn _mm512_maskz_popcnt_epi32(k: __mmask16, a: __m512i) -> __m512i {
|
||||
transmute(simd_select_bitmask(
|
||||
k,
|
||||
simd_ctpop(a.as_i32x16()),
|
||||
i32x16::ZERO,
|
||||
))
|
||||
pub fn _mm512_maskz_popcnt_epi32(k: __mmask16, a: __m512i) -> __m512i {
|
||||
unsafe {
|
||||
transmute(simd_select_bitmask(
|
||||
k,
|
||||
simd_ctpop(a.as_i32x16()),
|
||||
i32x16::ZERO,
|
||||
))
|
||||
}
|
||||
}
|
||||
|
||||
/// For each packed 32-bit integer maps the value to the number of logical 1 bits.
|
||||
|
|
@ -58,12 +60,14 @@ pub unsafe fn _mm512_maskz_popcnt_epi32(k: __mmask16, a: __m512i) -> __m512i {
|
|||
#[target_feature(enable = "avx512vpopcntdq")]
|
||||
#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
|
||||
#[cfg_attr(test, assert_instr(vpopcntd))]
|
||||
pub unsafe fn _mm512_mask_popcnt_epi32(src: __m512i, k: __mmask16, a: __m512i) -> __m512i {
|
||||
transmute(simd_select_bitmask(
|
||||
k,
|
||||
simd_ctpop(a.as_i32x16()),
|
||||
src.as_i32x16(),
|
||||
))
|
||||
pub fn _mm512_mask_popcnt_epi32(src: __m512i, k: __mmask16, a: __m512i) -> __m512i {
|
||||
unsafe {
|
||||
transmute(simd_select_bitmask(
|
||||
k,
|
||||
simd_ctpop(a.as_i32x16()),
|
||||
src.as_i32x16(),
|
||||
))
|
||||
}
|
||||
}
|
||||
|
||||
/// For each packed 32-bit integer maps the value to the number of logical 1 bits.
|
||||
|
|
@ -73,8 +77,8 @@ pub unsafe fn _mm512_mask_popcnt_epi32(src: __m512i, k: __mmask16, a: __m512i) -
|
|||
#[target_feature(enable = "avx512vpopcntdq,avx512vl")]
|
||||
#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
|
||||
#[cfg_attr(test, assert_instr(vpopcntd))]
|
||||
pub unsafe fn _mm256_popcnt_epi32(a: __m256i) -> __m256i {
|
||||
transmute(simd_ctpop(a.as_i32x8()))
|
||||
pub fn _mm256_popcnt_epi32(a: __m256i) -> __m256i {
|
||||
unsafe { transmute(simd_ctpop(a.as_i32x8())) }
|
||||
}
|
||||
|
||||
/// For each packed 32-bit integer maps the value to the number of logical 1 bits.
|
||||
|
|
@ -87,12 +91,14 @@ pub unsafe fn _mm256_popcnt_epi32(a: __m256i) -> __m256i {
|
|||
#[target_feature(enable = "avx512vpopcntdq,avx512vl")]
|
||||
#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
|
||||
#[cfg_attr(test, assert_instr(vpopcntd))]
|
||||
pub unsafe fn _mm256_maskz_popcnt_epi32(k: __mmask8, a: __m256i) -> __m256i {
|
||||
transmute(simd_select_bitmask(
|
||||
k,
|
||||
simd_ctpop(a.as_i32x8()),
|
||||
i32x8::ZERO,
|
||||
))
|
||||
pub fn _mm256_maskz_popcnt_epi32(k: __mmask8, a: __m256i) -> __m256i {
|
||||
unsafe {
|
||||
transmute(simd_select_bitmask(
|
||||
k,
|
||||
simd_ctpop(a.as_i32x8()),
|
||||
i32x8::ZERO,
|
||||
))
|
||||
}
|
||||
}
|
||||
|
||||
/// For each packed 32-bit integer maps the value to the number of logical 1 bits.
|
||||
|
|
@ -105,12 +111,14 @@ pub unsafe fn _mm256_maskz_popcnt_epi32(k: __mmask8, a: __m256i) -> __m256i {
|
|||
#[target_feature(enable = "avx512vpopcntdq,avx512vl")]
|
||||
#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
|
||||
#[cfg_attr(test, assert_instr(vpopcntd))]
|
||||
pub unsafe fn _mm256_mask_popcnt_epi32(src: __m256i, k: __mmask8, a: __m256i) -> __m256i {
|
||||
transmute(simd_select_bitmask(
|
||||
k,
|
||||
simd_ctpop(a.as_i32x8()),
|
||||
src.as_i32x8(),
|
||||
))
|
||||
pub fn _mm256_mask_popcnt_epi32(src: __m256i, k: __mmask8, a: __m256i) -> __m256i {
|
||||
unsafe {
|
||||
transmute(simd_select_bitmask(
|
||||
k,
|
||||
simd_ctpop(a.as_i32x8()),
|
||||
src.as_i32x8(),
|
||||
))
|
||||
}
|
||||
}
|
||||
|
||||
/// For each packed 32-bit integer maps the value to the number of logical 1 bits.
|
||||
|
|
@ -120,8 +128,8 @@ pub unsafe fn _mm256_mask_popcnt_epi32(src: __m256i, k: __mmask8, a: __m256i) ->
|
|||
#[target_feature(enable = "avx512vpopcntdq,avx512vl")]
|
||||
#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
|
||||
#[cfg_attr(test, assert_instr(vpopcntd))]
|
||||
pub unsafe fn _mm_popcnt_epi32(a: __m128i) -> __m128i {
|
||||
transmute(simd_ctpop(a.as_i32x4()))
|
||||
pub fn _mm_popcnt_epi32(a: __m128i) -> __m128i {
|
||||
unsafe { transmute(simd_ctpop(a.as_i32x4())) }
|
||||
}
|
||||
|
||||
/// For each packed 32-bit integer maps the value to the number of logical 1 bits.
|
||||
|
|
@ -134,12 +142,14 @@ pub unsafe fn _mm_popcnt_epi32(a: __m128i) -> __m128i {
|
|||
#[target_feature(enable = "avx512vpopcntdq,avx512vl")]
|
||||
#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
|
||||
#[cfg_attr(test, assert_instr(vpopcntd))]
|
||||
pub unsafe fn _mm_maskz_popcnt_epi32(k: __mmask8, a: __m128i) -> __m128i {
|
||||
transmute(simd_select_bitmask(
|
||||
k,
|
||||
simd_ctpop(a.as_i32x4()),
|
||||
i32x4::ZERO,
|
||||
))
|
||||
pub fn _mm_maskz_popcnt_epi32(k: __mmask8, a: __m128i) -> __m128i {
|
||||
unsafe {
|
||||
transmute(simd_select_bitmask(
|
||||
k,
|
||||
simd_ctpop(a.as_i32x4()),
|
||||
i32x4::ZERO,
|
||||
))
|
||||
}
|
||||
}
|
||||
|
||||
/// For each packed 32-bit integer maps the value to the number of logical 1 bits.
|
||||
|
|
@ -152,12 +162,14 @@ pub unsafe fn _mm_maskz_popcnt_epi32(k: __mmask8, a: __m128i) -> __m128i {
|
|||
#[target_feature(enable = "avx512vpopcntdq,avx512vl")]
|
||||
#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
|
||||
#[cfg_attr(test, assert_instr(vpopcntd))]
|
||||
pub unsafe fn _mm_mask_popcnt_epi32(src: __m128i, k: __mmask8, a: __m128i) -> __m128i {
|
||||
transmute(simd_select_bitmask(
|
||||
k,
|
||||
simd_ctpop(a.as_i32x4()),
|
||||
src.as_i32x4(),
|
||||
))
|
||||
pub fn _mm_mask_popcnt_epi32(src: __m128i, k: __mmask8, a: __m128i) -> __m128i {
|
||||
unsafe {
|
||||
transmute(simd_select_bitmask(
|
||||
k,
|
||||
simd_ctpop(a.as_i32x4()),
|
||||
src.as_i32x4(),
|
||||
))
|
||||
}
|
||||
}
|
||||
|
||||
/// For each packed 64-bit integer maps the value to the number of logical 1 bits.
|
||||
|
|
@ -167,8 +179,8 @@ pub unsafe fn _mm_mask_popcnt_epi32(src: __m128i, k: __mmask8, a: __m128i) -> __
|
|||
#[target_feature(enable = "avx512vpopcntdq")]
|
||||
#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
|
||||
#[cfg_attr(test, assert_instr(vpopcntq))]
|
||||
pub unsafe fn _mm512_popcnt_epi64(a: __m512i) -> __m512i {
|
||||
transmute(simd_ctpop(a.as_i64x8()))
|
||||
pub fn _mm512_popcnt_epi64(a: __m512i) -> __m512i {
|
||||
unsafe { transmute(simd_ctpop(a.as_i64x8())) }
|
||||
}
|
||||
|
||||
/// For each packed 64-bit integer maps the value to the number of logical 1 bits.
|
||||
|
|
@ -181,12 +193,14 @@ pub unsafe fn _mm512_popcnt_epi64(a: __m512i) -> __m512i {
|
|||
#[target_feature(enable = "avx512vpopcntdq")]
|
||||
#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
|
||||
#[cfg_attr(test, assert_instr(vpopcntq))]
|
||||
pub unsafe fn _mm512_maskz_popcnt_epi64(k: __mmask8, a: __m512i) -> __m512i {
|
||||
transmute(simd_select_bitmask(
|
||||
k,
|
||||
simd_ctpop(a.as_i64x8()),
|
||||
i64x8::ZERO,
|
||||
))
|
||||
pub fn _mm512_maskz_popcnt_epi64(k: __mmask8, a: __m512i) -> __m512i {
|
||||
unsafe {
|
||||
transmute(simd_select_bitmask(
|
||||
k,
|
||||
simd_ctpop(a.as_i64x8()),
|
||||
i64x8::ZERO,
|
||||
))
|
||||
}
|
||||
}
|
||||
|
||||
/// For each packed 64-bit integer maps the value to the number of logical 1 bits.
|
||||
|
|
@ -199,12 +213,14 @@ pub unsafe fn _mm512_maskz_popcnt_epi64(k: __mmask8, a: __m512i) -> __m512i {
|
|||
#[target_feature(enable = "avx512vpopcntdq")]
|
||||
#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
|
||||
#[cfg_attr(test, assert_instr(vpopcntq))]
|
||||
pub unsafe fn _mm512_mask_popcnt_epi64(src: __m512i, k: __mmask8, a: __m512i) -> __m512i {
|
||||
transmute(simd_select_bitmask(
|
||||
k,
|
||||
simd_ctpop(a.as_i64x8()),
|
||||
src.as_i64x8(),
|
||||
))
|
||||
pub fn _mm512_mask_popcnt_epi64(src: __m512i, k: __mmask8, a: __m512i) -> __m512i {
|
||||
unsafe {
|
||||
transmute(simd_select_bitmask(
|
||||
k,
|
||||
simd_ctpop(a.as_i64x8()),
|
||||
src.as_i64x8(),
|
||||
))
|
||||
}
|
||||
}
|
||||
|
||||
/// For each packed 64-bit integer maps the value to the number of logical 1 bits.
|
||||
|
|
@ -214,8 +230,8 @@ pub unsafe fn _mm512_mask_popcnt_epi64(src: __m512i, k: __mmask8, a: __m512i) ->
|
|||
#[target_feature(enable = "avx512vpopcntdq,avx512vl")]
|
||||
#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
|
||||
#[cfg_attr(test, assert_instr(vpopcntq))]
|
||||
pub unsafe fn _mm256_popcnt_epi64(a: __m256i) -> __m256i {
|
||||
transmute(simd_ctpop(a.as_i64x4()))
|
||||
pub fn _mm256_popcnt_epi64(a: __m256i) -> __m256i {
|
||||
unsafe { transmute(simd_ctpop(a.as_i64x4())) }
|
||||
}
|
||||
|
||||
/// For each packed 64-bit integer maps the value to the number of logical 1 bits.
|
||||
|
|
@ -228,12 +244,14 @@ pub unsafe fn _mm256_popcnt_epi64(a: __m256i) -> __m256i {
|
|||
#[target_feature(enable = "avx512vpopcntdq,avx512vl")]
|
||||
#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
|
||||
#[cfg_attr(test, assert_instr(vpopcntq))]
|
||||
pub unsafe fn _mm256_maskz_popcnt_epi64(k: __mmask8, a: __m256i) -> __m256i {
|
||||
transmute(simd_select_bitmask(
|
||||
k,
|
||||
simd_ctpop(a.as_i64x4()),
|
||||
i64x4::ZERO,
|
||||
))
|
||||
pub fn _mm256_maskz_popcnt_epi64(k: __mmask8, a: __m256i) -> __m256i {
|
||||
unsafe {
|
||||
transmute(simd_select_bitmask(
|
||||
k,
|
||||
simd_ctpop(a.as_i64x4()),
|
||||
i64x4::ZERO,
|
||||
))
|
||||
}
|
||||
}
|
||||
|
||||
/// For each packed 64-bit integer maps the value to the number of logical 1 bits.
|
||||
|
|
@ -246,12 +264,14 @@ pub unsafe fn _mm256_maskz_popcnt_epi64(k: __mmask8, a: __m256i) -> __m256i {
|
|||
#[target_feature(enable = "avx512vpopcntdq,avx512vl")]
|
||||
#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
|
||||
#[cfg_attr(test, assert_instr(vpopcntq))]
|
||||
pub unsafe fn _mm256_mask_popcnt_epi64(src: __m256i, k: __mmask8, a: __m256i) -> __m256i {
|
||||
transmute(simd_select_bitmask(
|
||||
k,
|
||||
simd_ctpop(a.as_i64x4()),
|
||||
src.as_i64x4(),
|
||||
))
|
||||
pub fn _mm256_mask_popcnt_epi64(src: __m256i, k: __mmask8, a: __m256i) -> __m256i {
|
||||
unsafe {
|
||||
transmute(simd_select_bitmask(
|
||||
k,
|
||||
simd_ctpop(a.as_i64x4()),
|
||||
src.as_i64x4(),
|
||||
))
|
||||
}
|
||||
}
|
||||
|
||||
/// For each packed 64-bit integer maps the value to the number of logical 1 bits.
|
||||
|
|
@ -261,8 +281,8 @@ pub unsafe fn _mm256_mask_popcnt_epi64(src: __m256i, k: __mmask8, a: __m256i) ->
|
|||
#[target_feature(enable = "avx512vpopcntdq,avx512vl")]
|
||||
#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
|
||||
#[cfg_attr(test, assert_instr(vpopcntq))]
|
||||
pub unsafe fn _mm_popcnt_epi64(a: __m128i) -> __m128i {
|
||||
transmute(simd_ctpop(a.as_i64x2()))
|
||||
pub fn _mm_popcnt_epi64(a: __m128i) -> __m128i {
|
||||
unsafe { transmute(simd_ctpop(a.as_i64x2())) }
|
||||
}
|
||||
|
||||
/// For each packed 64-bit integer maps the value to the number of logical 1 bits.
|
||||
|
|
@ -275,12 +295,14 @@ pub unsafe fn _mm_popcnt_epi64(a: __m128i) -> __m128i {
|
|||
#[target_feature(enable = "avx512vpopcntdq,avx512vl")]
|
||||
#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
|
||||
#[cfg_attr(test, assert_instr(vpopcntq))]
|
||||
pub unsafe fn _mm_maskz_popcnt_epi64(k: __mmask8, a: __m128i) -> __m128i {
|
||||
transmute(simd_select_bitmask(
|
||||
k,
|
||||
simd_ctpop(a.as_i64x2()),
|
||||
i64x2::ZERO,
|
||||
))
|
||||
pub fn _mm_maskz_popcnt_epi64(k: __mmask8, a: __m128i) -> __m128i {
|
||||
unsafe {
|
||||
transmute(simd_select_bitmask(
|
||||
k,
|
||||
simd_ctpop(a.as_i64x2()),
|
||||
i64x2::ZERO,
|
||||
))
|
||||
}
|
||||
}
|
||||
|
||||
/// For each packed 64-bit integer maps the value to the number of logical 1 bits.
|
||||
|
|
@ -293,12 +315,14 @@ pub unsafe fn _mm_maskz_popcnt_epi64(k: __mmask8, a: __m128i) -> __m128i {
|
|||
#[target_feature(enable = "avx512vpopcntdq,avx512vl")]
|
||||
#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
|
||||
#[cfg_attr(test, assert_instr(vpopcntq))]
|
||||
pub unsafe fn _mm_mask_popcnt_epi64(src: __m128i, k: __mmask8, a: __m128i) -> __m128i {
|
||||
transmute(simd_select_bitmask(
|
||||
k,
|
||||
simd_ctpop(a.as_i64x2()),
|
||||
src.as_i64x2(),
|
||||
))
|
||||
pub fn _mm_mask_popcnt_epi64(src: __m128i, k: __mmask8, a: __m128i) -> __m128i {
|
||||
unsafe {
|
||||
transmute(simd_select_bitmask(
|
||||
k,
|
||||
simd_ctpop(a.as_i64x2()),
|
||||
src.as_i64x2(),
|
||||
))
|
||||
}
|
||||
}
|
||||
|
||||
#[cfg(test)]
|
||||
|
|
|
|||
|
|
@ -199,15 +199,17 @@ pub unsafe fn _mm256_cvtneoph_ps(a: *const __m256h) -> __m256 {
|
|||
assert_instr(vcvtneps2bf16)
|
||||
)]
|
||||
#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
|
||||
pub unsafe fn _mm_cvtneps_avx_pbh(a: __m128) -> __m128bh {
|
||||
let mut dst: __m128bh;
|
||||
asm!(
|
||||
"{{vex}}vcvtneps2bf16 {dst},{src}",
|
||||
dst = lateout(xmm_reg) dst,
|
||||
src = in(xmm_reg) a,
|
||||
options(pure, nomem, nostack, preserves_flags)
|
||||
);
|
||||
dst
|
||||
pub fn _mm_cvtneps_avx_pbh(a: __m128) -> __m128bh {
|
||||
unsafe {
|
||||
let mut dst: __m128bh;
|
||||
asm!(
|
||||
"{{vex}}vcvtneps2bf16 {dst},{src}",
|
||||
dst = lateout(xmm_reg) dst,
|
||||
src = in(xmm_reg) a,
|
||||
options(pure, nomem, nostack, preserves_flags)
|
||||
);
|
||||
dst
|
||||
}
|
||||
}
|
||||
|
||||
/// Convert packed single precision (32-bit) floating-point elements in a to packed BF16 (16-bit) floating-point
|
||||
|
|
@ -221,15 +223,17 @@ pub unsafe fn _mm_cvtneps_avx_pbh(a: __m128) -> __m128bh {
|
|||
assert_instr(vcvtneps2bf16)
|
||||
)]
|
||||
#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
|
||||
pub unsafe fn _mm256_cvtneps_avx_pbh(a: __m256) -> __m128bh {
|
||||
let mut dst: __m128bh;
|
||||
asm!(
|
||||
"{{vex}}vcvtneps2bf16 {dst},{src}",
|
||||
dst = lateout(xmm_reg) dst,
|
||||
src = in(ymm_reg) a,
|
||||
options(pure, nomem, nostack, preserves_flags)
|
||||
);
|
||||
dst
|
||||
pub fn _mm256_cvtneps_avx_pbh(a: __m256) -> __m128bh {
|
||||
unsafe {
|
||||
let mut dst: __m128bh;
|
||||
asm!(
|
||||
"{{vex}}vcvtneps2bf16 {dst},{src}",
|
||||
dst = lateout(xmm_reg) dst,
|
||||
src = in(ymm_reg) a,
|
||||
options(pure, nomem, nostack, preserves_flags)
|
||||
);
|
||||
dst
|
||||
}
|
||||
}
|
||||
|
||||
#[allow(improper_ctypes)]
|
||||
|
|
|
|||
|
|
@ -6,7 +6,7 @@ use crate::core_arch::x86::*;
|
|||
#[inline]
|
||||
#[target_feature(enable = "avx512bw")]
|
||||
#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
|
||||
pub unsafe fn _cvtmask64_u64(a: __mmask64) -> u64 {
|
||||
pub fn _cvtmask64_u64(a: __mmask64) -> u64 {
|
||||
a
|
||||
}
|
||||
|
||||
|
|
@ -16,7 +16,7 @@ pub unsafe fn _cvtmask64_u64(a: __mmask64) -> u64 {
|
|||
#[inline]
|
||||
#[target_feature(enable = "avx512bw")]
|
||||
#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
|
||||
pub unsafe fn _cvtu64_mask64(a: u64) -> __mmask64 {
|
||||
pub fn _cvtu64_mask64(a: u64) -> __mmask64 {
|
||||
a
|
||||
}
|
||||
|
||||
|
|
|
|||
|
|
@ -13,7 +13,7 @@ use stdarch_test::assert_instr;
|
|||
#[target_feature(enable = "avx512f")]
|
||||
#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
|
||||
#[cfg_attr(test, assert_instr(vcvtsd2si))]
|
||||
pub unsafe fn _mm_cvtsd_i64(a: __m128d) -> i64 {
|
||||
pub fn _mm_cvtsd_i64(a: __m128d) -> i64 {
|
||||
_mm_cvtsd_si64(a)
|
||||
}
|
||||
|
||||
|
|
@ -24,7 +24,7 @@ pub unsafe fn _mm_cvtsd_i64(a: __m128d) -> i64 {
|
|||
#[target_feature(enable = "avx512f")]
|
||||
#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
|
||||
#[cfg_attr(test, assert_instr(vcvtss2si))]
|
||||
pub unsafe fn _mm_cvtss_i64(a: __m128) -> i64 {
|
||||
pub fn _mm_cvtss_i64(a: __m128) -> i64 {
|
||||
_mm_cvtss_si64(a)
|
||||
}
|
||||
|
||||
|
|
@ -35,8 +35,8 @@ pub unsafe fn _mm_cvtss_i64(a: __m128) -> i64 {
|
|||
#[target_feature(enable = "avx512f")]
|
||||
#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
|
||||
#[cfg_attr(test, assert_instr(vcvtss2usi))]
|
||||
pub unsafe fn _mm_cvtss_u64(a: __m128) -> u64 {
|
||||
vcvtss2usi64(a.as_f32x4(), _MM_FROUND_CUR_DIRECTION)
|
||||
pub fn _mm_cvtss_u64(a: __m128) -> u64 {
|
||||
unsafe { vcvtss2usi64(a.as_f32x4(), _MM_FROUND_CUR_DIRECTION) }
|
||||
}
|
||||
|
||||
/// Convert the lower double-precision (64-bit) floating-point element in a to an unsigned 64-bit integer, and store the result in dst.
|
||||
|
|
@ -46,8 +46,8 @@ pub unsafe fn _mm_cvtss_u64(a: __m128) -> u64 {
|
|||
#[target_feature(enable = "avx512f")]
|
||||
#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
|
||||
#[cfg_attr(test, assert_instr(vcvtsd2usi))]
|
||||
pub unsafe fn _mm_cvtsd_u64(a: __m128d) -> u64 {
|
||||
vcvtsd2usi64(a.as_f64x2(), _MM_FROUND_CUR_DIRECTION)
|
||||
pub fn _mm_cvtsd_u64(a: __m128d) -> u64 {
|
||||
unsafe { vcvtsd2usi64(a.as_f64x2(), _MM_FROUND_CUR_DIRECTION) }
|
||||
}
|
||||
|
||||
/// Convert the signed 64-bit integer b to a single-precision (32-bit) floating-point element, store the result in the lower element of dst, and copy the upper 3 packed elements from a to the upper elements of dst.
|
||||
|
|
@ -57,9 +57,11 @@ pub unsafe fn _mm_cvtsd_u64(a: __m128d) -> u64 {
|
|||
#[target_feature(enable = "avx512f")]
|
||||
#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
|
||||
#[cfg_attr(test, assert_instr(vcvtsi2ss))]
|
||||
pub unsafe fn _mm_cvti64_ss(a: __m128, b: i64) -> __m128 {
|
||||
let b = b as f32;
|
||||
simd_insert!(a, 0, b)
|
||||
pub fn _mm_cvti64_ss(a: __m128, b: i64) -> __m128 {
|
||||
unsafe {
|
||||
let b = b as f32;
|
||||
simd_insert!(a, 0, b)
|
||||
}
|
||||
}
|
||||
|
||||
/// Convert the signed 64-bit integer b to a double-precision (64-bit) floating-point element, store the result in the lower element of dst, and copy the upper element from a to the upper element of dst.
|
||||
|
|
@ -69,9 +71,11 @@ pub unsafe fn _mm_cvti64_ss(a: __m128, b: i64) -> __m128 {
|
|||
#[target_feature(enable = "avx512f")]
|
||||
#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
|
||||
#[cfg_attr(test, assert_instr(vcvtsi2sd))]
|
||||
pub unsafe fn _mm_cvti64_sd(a: __m128d, b: i64) -> __m128d {
|
||||
let b = b as f64;
|
||||
simd_insert!(a, 0, b)
|
||||
pub fn _mm_cvti64_sd(a: __m128d, b: i64) -> __m128d {
|
||||
unsafe {
|
||||
let b = b as f64;
|
||||
simd_insert!(a, 0, b)
|
||||
}
|
||||
}
|
||||
|
||||
/// Convert the unsigned 64-bit integer b to a single-precision (32-bit) floating-point element, store the result in the lower element of dst, and copy the upper 3 packed elements from a to the upper elements of dst.
|
||||
|
|
@ -81,9 +85,11 @@ pub unsafe fn _mm_cvti64_sd(a: __m128d, b: i64) -> __m128d {
|
|||
#[target_feature(enable = "avx512f")]
|
||||
#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
|
||||
#[cfg_attr(test, assert_instr(vcvtusi2ss))]
|
||||
pub unsafe fn _mm_cvtu64_ss(a: __m128, b: u64) -> __m128 {
|
||||
let b = b as f32;
|
||||
simd_insert!(a, 0, b)
|
||||
pub fn _mm_cvtu64_ss(a: __m128, b: u64) -> __m128 {
|
||||
unsafe {
|
||||
let b = b as f32;
|
||||
simd_insert!(a, 0, b)
|
||||
}
|
||||
}
|
||||
|
||||
/// Convert the unsigned 64-bit integer b to a double-precision (64-bit) floating-point element, store the result in the lower element of dst, and copy the upper element from a to the upper element of dst.
|
||||
|
|
@ -93,9 +99,11 @@ pub unsafe fn _mm_cvtu64_ss(a: __m128, b: u64) -> __m128 {
|
|||
#[target_feature(enable = "avx512f")]
|
||||
#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
|
||||
#[cfg_attr(test, assert_instr(vcvtusi2sd))]
|
||||
pub unsafe fn _mm_cvtu64_sd(a: __m128d, b: u64) -> __m128d {
|
||||
let b = b as f64;
|
||||
simd_insert!(a, 0, b)
|
||||
pub fn _mm_cvtu64_sd(a: __m128d, b: u64) -> __m128d {
|
||||
unsafe {
|
||||
let b = b as f64;
|
||||
simd_insert!(a, 0, b)
|
||||
}
|
||||
}
|
||||
|
||||
/// Convert the lower double-precision (64-bit) floating-point element in a to a 64-bit integer with truncation, and store the result in dst.
|
||||
|
|
@ -105,8 +113,8 @@ pub unsafe fn _mm_cvtu64_sd(a: __m128d, b: u64) -> __m128d {
|
|||
#[target_feature(enable = "avx512f")]
|
||||
#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
|
||||
#[cfg_attr(test, assert_instr(vcvttsd2si))]
|
||||
pub unsafe fn _mm_cvttsd_i64(a: __m128d) -> i64 {
|
||||
vcvttsd2si64(a.as_f64x2(), _MM_FROUND_CUR_DIRECTION)
|
||||
pub fn _mm_cvttsd_i64(a: __m128d) -> i64 {
|
||||
unsafe { vcvttsd2si64(a.as_f64x2(), _MM_FROUND_CUR_DIRECTION) }
|
||||
}
|
||||
|
||||
/// Convert the lower double-precision (64-bit) floating-point element in a to an unsigned 64-bit integer with truncation, and store the result in dst.
|
||||
|
|
@ -116,8 +124,8 @@ pub unsafe fn _mm_cvttsd_i64(a: __m128d) -> i64 {
|
|||
#[target_feature(enable = "avx512f")]
|
||||
#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
|
||||
#[cfg_attr(test, assert_instr(vcvttsd2usi))]
|
||||
pub unsafe fn _mm_cvttsd_u64(a: __m128d) -> u64 {
|
||||
vcvttsd2usi64(a.as_f64x2(), _MM_FROUND_CUR_DIRECTION)
|
||||
pub fn _mm_cvttsd_u64(a: __m128d) -> u64 {
|
||||
unsafe { vcvttsd2usi64(a.as_f64x2(), _MM_FROUND_CUR_DIRECTION) }
|
||||
}
|
||||
|
||||
/// Convert the lower single-precision (32-bit) floating-point element in a to a 64-bit integer with truncation, and store the result in dst.
|
||||
|
|
@ -127,8 +135,8 @@ pub unsafe fn _mm_cvttsd_u64(a: __m128d) -> u64 {
|
|||
#[target_feature(enable = "avx512f")]
|
||||
#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
|
||||
#[cfg_attr(test, assert_instr(vcvttss2si))]
|
||||
pub unsafe fn _mm_cvttss_i64(a: __m128) -> i64 {
|
||||
vcvttss2si64(a.as_f32x4(), _MM_FROUND_CUR_DIRECTION)
|
||||
pub fn _mm_cvttss_i64(a: __m128) -> i64 {
|
||||
unsafe { vcvttss2si64(a.as_f32x4(), _MM_FROUND_CUR_DIRECTION) }
|
||||
}
|
||||
|
||||
/// Convert the lower single-precision (32-bit) floating-point element in a to an unsigned 64-bit integer with truncation, and store the result in dst.
|
||||
|
|
@ -138,8 +146,8 @@ pub unsafe fn _mm_cvttss_i64(a: __m128) -> i64 {
|
|||
#[target_feature(enable = "avx512f")]
|
||||
#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
|
||||
#[cfg_attr(test, assert_instr(vcvttss2usi))]
|
||||
pub unsafe fn _mm_cvttss_u64(a: __m128) -> u64 {
|
||||
vcvttss2usi64(a.as_f32x4(), _MM_FROUND_CUR_DIRECTION)
|
||||
pub fn _mm_cvttss_u64(a: __m128) -> u64 {
|
||||
unsafe { vcvttss2usi64(a.as_f32x4(), _MM_FROUND_CUR_DIRECTION) }
|
||||
}
|
||||
|
||||
/// Convert the signed 64-bit integer b to a double-precision (64-bit) floating-point element, store the result in the lower element of dst, and copy the upper element from a to the upper element of dst.
|
||||
|
|
@ -156,11 +164,13 @@ pub unsafe fn _mm_cvttss_u64(a: __m128) -> u64 {
|
|||
#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
|
||||
#[cfg_attr(test, assert_instr(vcvtsi2sd, ROUNDING = 8))]
|
||||
#[rustc_legacy_const_generics(2)]
|
||||
pub unsafe fn _mm_cvt_roundi64_sd<const ROUNDING: i32>(a: __m128d, b: i64) -> __m128d {
|
||||
static_assert_rounding!(ROUNDING);
|
||||
let a = a.as_f64x2();
|
||||
let r = vcvtsi2sd64(a, b, ROUNDING);
|
||||
transmute(r)
|
||||
pub fn _mm_cvt_roundi64_sd<const ROUNDING: i32>(a: __m128d, b: i64) -> __m128d {
|
||||
unsafe {
|
||||
static_assert_rounding!(ROUNDING);
|
||||
let a = a.as_f64x2();
|
||||
let r = vcvtsi2sd64(a, b, ROUNDING);
|
||||
transmute(r)
|
||||
}
|
||||
}
|
||||
|
||||
/// Convert the signed 64-bit integer b to a double-precision (64-bit) floating-point element, store the result in the lower element of dst, and copy the upper element from a to the upper element of dst.
|
||||
|
|
@ -177,11 +187,13 @@ pub unsafe fn _mm_cvt_roundi64_sd<const ROUNDING: i32>(a: __m128d, b: i64) -> __
|
|||
#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
|
||||
#[cfg_attr(test, assert_instr(vcvtsi2sd, ROUNDING = 8))]
|
||||
#[rustc_legacy_const_generics(2)]
|
||||
pub unsafe fn _mm_cvt_roundsi64_sd<const ROUNDING: i32>(a: __m128d, b: i64) -> __m128d {
|
||||
static_assert_rounding!(ROUNDING);
|
||||
let a = a.as_f64x2();
|
||||
let r = vcvtsi2sd64(a, b, ROUNDING);
|
||||
transmute(r)
|
||||
pub fn _mm_cvt_roundsi64_sd<const ROUNDING: i32>(a: __m128d, b: i64) -> __m128d {
|
||||
unsafe {
|
||||
static_assert_rounding!(ROUNDING);
|
||||
let a = a.as_f64x2();
|
||||
let r = vcvtsi2sd64(a, b, ROUNDING);
|
||||
transmute(r)
|
||||
}
|
||||
}
|
||||
|
||||
/// Convert the signed 64-bit integer b to a single-precision (32-bit) floating-point element, store the result in the lower element of dst, and copy the upper 3 packed elements from a to the upper elements of dst.
|
||||
|
|
@ -198,11 +210,13 @@ pub unsafe fn _mm_cvt_roundsi64_sd<const ROUNDING: i32>(a: __m128d, b: i64) -> _
|
|||
#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
|
||||
#[cfg_attr(test, assert_instr(vcvtsi2ss, ROUNDING = 8))]
|
||||
#[rustc_legacy_const_generics(2)]
|
||||
pub unsafe fn _mm_cvt_roundi64_ss<const ROUNDING: i32>(a: __m128, b: i64) -> __m128 {
|
||||
static_assert_rounding!(ROUNDING);
|
||||
let a = a.as_f32x4();
|
||||
let r = vcvtsi2ss64(a, b, ROUNDING);
|
||||
transmute(r)
|
||||
pub fn _mm_cvt_roundi64_ss<const ROUNDING: i32>(a: __m128, b: i64) -> __m128 {
|
||||
unsafe {
|
||||
static_assert_rounding!(ROUNDING);
|
||||
let a = a.as_f32x4();
|
||||
let r = vcvtsi2ss64(a, b, ROUNDING);
|
||||
transmute(r)
|
||||
}
|
||||
}
|
||||
|
||||
/// Convert the unsigned 64-bit integer b to a double-precision (64-bit) floating-point element, store the result in the lower element of dst, and copy the upper element from a to the upper element of dst.\
|
||||
|
|
@ -219,11 +233,13 @@ pub unsafe fn _mm_cvt_roundi64_ss<const ROUNDING: i32>(a: __m128, b: i64) -> __m
|
|||
#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
|
||||
#[cfg_attr(test, assert_instr(vcvtusi2sd, ROUNDING = 8))]
|
||||
#[rustc_legacy_const_generics(2)]
|
||||
pub unsafe fn _mm_cvt_roundu64_sd<const ROUNDING: i32>(a: __m128d, b: u64) -> __m128d {
|
||||
static_assert_rounding!(ROUNDING);
|
||||
let a = a.as_f64x2();
|
||||
let r = vcvtusi2sd64(a, b, ROUNDING);
|
||||
transmute(r)
|
||||
pub fn _mm_cvt_roundu64_sd<const ROUNDING: i32>(a: __m128d, b: u64) -> __m128d {
|
||||
unsafe {
|
||||
static_assert_rounding!(ROUNDING);
|
||||
let a = a.as_f64x2();
|
||||
let r = vcvtusi2sd64(a, b, ROUNDING);
|
||||
transmute(r)
|
||||
}
|
||||
}
|
||||
|
||||
/// Convert the signed 64-bit integer b to a single-precision (32-bit) floating-point element, store the result in the lower element of dst, and copy the upper 3 packed elements from a to the upper elements of dst.
|
||||
|
|
@ -240,11 +256,13 @@ pub unsafe fn _mm_cvt_roundu64_sd<const ROUNDING: i32>(a: __m128d, b: u64) -> __
|
|||
#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
|
||||
#[cfg_attr(test, assert_instr(vcvtsi2ss, ROUNDING = 8))]
|
||||
#[rustc_legacy_const_generics(2)]
|
||||
pub unsafe fn _mm_cvt_roundsi64_ss<const ROUNDING: i32>(a: __m128, b: i64) -> __m128 {
|
||||
static_assert_rounding!(ROUNDING);
|
||||
let a = a.as_f32x4();
|
||||
let r = vcvtsi2ss64(a, b, ROUNDING);
|
||||
transmute(r)
|
||||
pub fn _mm_cvt_roundsi64_ss<const ROUNDING: i32>(a: __m128, b: i64) -> __m128 {
|
||||
unsafe {
|
||||
static_assert_rounding!(ROUNDING);
|
||||
let a = a.as_f32x4();
|
||||
let r = vcvtsi2ss64(a, b, ROUNDING);
|
||||
transmute(r)
|
||||
}
|
||||
}
|
||||
|
||||
/// Convert the unsigned 64-bit integer b to a single-precision (32-bit) floating-point element, store the result in the lower element of dst, and copy the upper 3 packed elements from a to the upper elements of dst.\
|
||||
|
|
@ -261,11 +279,13 @@ pub unsafe fn _mm_cvt_roundsi64_ss<const ROUNDING: i32>(a: __m128, b: i64) -> __
|
|||
#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
|
||||
#[cfg_attr(test, assert_instr(vcvtusi2ss, ROUNDING = 8))]
|
||||
#[rustc_legacy_const_generics(2)]
|
||||
pub unsafe fn _mm_cvt_roundu64_ss<const ROUNDING: i32>(a: __m128, b: u64) -> __m128 {
|
||||
static_assert_rounding!(ROUNDING);
|
||||
let a = a.as_f32x4();
|
||||
let r = vcvtusi2ss64(a, b, ROUNDING);
|
||||
transmute(r)
|
||||
pub fn _mm_cvt_roundu64_ss<const ROUNDING: i32>(a: __m128, b: u64) -> __m128 {
|
||||
unsafe {
|
||||
static_assert_rounding!(ROUNDING);
|
||||
let a = a.as_f32x4();
|
||||
let r = vcvtusi2ss64(a, b, ROUNDING);
|
||||
transmute(r)
|
||||
}
|
||||
}
|
||||
|
||||
/// Convert the lower double-precision (64-bit) floating-point element in a to a 64-bit integer, and store the result in dst.\
|
||||
|
|
@ -282,10 +302,12 @@ pub unsafe fn _mm_cvt_roundu64_ss<const ROUNDING: i32>(a: __m128, b: u64) -> __m
|
|||
#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
|
||||
#[cfg_attr(test, assert_instr(vcvtsd2si, ROUNDING = 8))]
|
||||
#[rustc_legacy_const_generics(1)]
|
||||
pub unsafe fn _mm_cvt_roundsd_si64<const ROUNDING: i32>(a: __m128d) -> i64 {
|
||||
static_assert_rounding!(ROUNDING);
|
||||
let a = a.as_f64x2();
|
||||
vcvtsd2si64(a, ROUNDING)
|
||||
pub fn _mm_cvt_roundsd_si64<const ROUNDING: i32>(a: __m128d) -> i64 {
|
||||
unsafe {
|
||||
static_assert_rounding!(ROUNDING);
|
||||
let a = a.as_f64x2();
|
||||
vcvtsd2si64(a, ROUNDING)
|
||||
}
|
||||
}
|
||||
|
||||
/// Convert the lower double-precision (64-bit) floating-point element in a to a 64-bit integer, and store the result in dst.\
|
||||
|
|
@ -302,10 +324,12 @@ pub unsafe fn _mm_cvt_roundsd_si64<const ROUNDING: i32>(a: __m128d) -> i64 {
|
|||
#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
|
||||
#[cfg_attr(test, assert_instr(vcvtsd2si, ROUNDING = 8))]
|
||||
#[rustc_legacy_const_generics(1)]
|
||||
pub unsafe fn _mm_cvt_roundsd_i64<const ROUNDING: i32>(a: __m128d) -> i64 {
|
||||
static_assert_rounding!(ROUNDING);
|
||||
let a = a.as_f64x2();
|
||||
vcvtsd2si64(a, ROUNDING)
|
||||
pub fn _mm_cvt_roundsd_i64<const ROUNDING: i32>(a: __m128d) -> i64 {
|
||||
unsafe {
|
||||
static_assert_rounding!(ROUNDING);
|
||||
let a = a.as_f64x2();
|
||||
vcvtsd2si64(a, ROUNDING)
|
||||
}
|
||||
}
|
||||
|
||||
/// Convert the lower double-precision (64-bit) floating-point element in a to an unsigned 64-bit integer, and store the result in dst.\
|
||||
|
|
@ -322,10 +346,12 @@ pub unsafe fn _mm_cvt_roundsd_i64<const ROUNDING: i32>(a: __m128d) -> i64 {
|
|||
#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
|
||||
#[cfg_attr(test, assert_instr(vcvtsd2usi, ROUNDING = 8))]
|
||||
#[rustc_legacy_const_generics(1)]
|
||||
pub unsafe fn _mm_cvt_roundsd_u64<const ROUNDING: i32>(a: __m128d) -> u64 {
|
||||
static_assert_rounding!(ROUNDING);
|
||||
let a = a.as_f64x2();
|
||||
vcvtsd2usi64(a, ROUNDING)
|
||||
pub fn _mm_cvt_roundsd_u64<const ROUNDING: i32>(a: __m128d) -> u64 {
|
||||
unsafe {
|
||||
static_assert_rounding!(ROUNDING);
|
||||
let a = a.as_f64x2();
|
||||
vcvtsd2usi64(a, ROUNDING)
|
||||
}
|
||||
}
|
||||
|
||||
/// Convert the lower single-precision (32-bit) floating-point element in a to a 64-bit integer, and store the result in dst.\
|
||||
|
|
@ -342,10 +368,12 @@ pub unsafe fn _mm_cvt_roundsd_u64<const ROUNDING: i32>(a: __m128d) -> u64 {
|
|||
#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
|
||||
#[cfg_attr(test, assert_instr(vcvtss2si, ROUNDING = 8))]
|
||||
#[rustc_legacy_const_generics(1)]
|
||||
pub unsafe fn _mm_cvt_roundss_si64<const ROUNDING: i32>(a: __m128) -> i64 {
|
||||
static_assert_rounding!(ROUNDING);
|
||||
let a = a.as_f32x4();
|
||||
vcvtss2si64(a, ROUNDING)
|
||||
pub fn _mm_cvt_roundss_si64<const ROUNDING: i32>(a: __m128) -> i64 {
|
||||
unsafe {
|
||||
static_assert_rounding!(ROUNDING);
|
||||
let a = a.as_f32x4();
|
||||
vcvtss2si64(a, ROUNDING)
|
||||
}
|
||||
}
|
||||
|
||||
/// Convert the lower single-precision (32-bit) floating-point element in a to a 64-bit integer, and store the result in dst.\
|
||||
|
|
@ -362,10 +390,12 @@ pub unsafe fn _mm_cvt_roundss_si64<const ROUNDING: i32>(a: __m128) -> i64 {
|
|||
#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
|
||||
#[cfg_attr(test, assert_instr(vcvtss2si, ROUNDING = 8))]
|
||||
#[rustc_legacy_const_generics(1)]
|
||||
pub unsafe fn _mm_cvt_roundss_i64<const ROUNDING: i32>(a: __m128) -> i64 {
|
||||
static_assert_rounding!(ROUNDING);
|
||||
let a = a.as_f32x4();
|
||||
vcvtss2si64(a, ROUNDING)
|
||||
pub fn _mm_cvt_roundss_i64<const ROUNDING: i32>(a: __m128) -> i64 {
|
||||
unsafe {
|
||||
static_assert_rounding!(ROUNDING);
|
||||
let a = a.as_f32x4();
|
||||
vcvtss2si64(a, ROUNDING)
|
||||
}
|
||||
}
|
||||
|
||||
/// Convert the lower single-precision (32-bit) floating-point element in a to an unsigned 64-bit integer, and store the result in dst.\
|
||||
|
|
@ -382,10 +412,12 @@ pub unsafe fn _mm_cvt_roundss_i64<const ROUNDING: i32>(a: __m128) -> i64 {
|
|||
#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
|
||||
#[cfg_attr(test, assert_instr(vcvtss2usi, ROUNDING = 8))]
|
||||
#[rustc_legacy_const_generics(1)]
|
||||
pub unsafe fn _mm_cvt_roundss_u64<const ROUNDING: i32>(a: __m128) -> u64 {
|
||||
static_assert_rounding!(ROUNDING);
|
||||
let a = a.as_f32x4();
|
||||
vcvtss2usi64(a, ROUNDING)
|
||||
pub fn _mm_cvt_roundss_u64<const ROUNDING: i32>(a: __m128) -> u64 {
|
||||
unsafe {
|
||||
static_assert_rounding!(ROUNDING);
|
||||
let a = a.as_f32x4();
|
||||
vcvtss2usi64(a, ROUNDING)
|
||||
}
|
||||
}
|
||||
|
||||
/// Convert the lower double-precision (64-bit) floating-point element in a to a 64-bit integer with truncation, and store the result in dst.\
|
||||
|
|
@ -397,10 +429,12 @@ pub unsafe fn _mm_cvt_roundss_u64<const ROUNDING: i32>(a: __m128) -> u64 {
|
|||
#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
|
||||
#[cfg_attr(test, assert_instr(vcvttsd2si, SAE = 8))]
|
||||
#[rustc_legacy_const_generics(1)]
|
||||
pub unsafe fn _mm_cvtt_roundsd_si64<const SAE: i32>(a: __m128d) -> i64 {
|
||||
static_assert_sae!(SAE);
|
||||
let a = a.as_f64x2();
|
||||
vcvttsd2si64(a, SAE)
|
||||
pub fn _mm_cvtt_roundsd_si64<const SAE: i32>(a: __m128d) -> i64 {
|
||||
unsafe {
|
||||
static_assert_sae!(SAE);
|
||||
let a = a.as_f64x2();
|
||||
vcvttsd2si64(a, SAE)
|
||||
}
|
||||
}
|
||||
|
||||
/// Convert the lower double-precision (64-bit) floating-point element in a to a 64-bit integer with truncation, and store the result in dst.\
|
||||
|
|
@ -412,10 +446,12 @@ pub unsafe fn _mm_cvtt_roundsd_si64<const SAE: i32>(a: __m128d) -> i64 {
|
|||
#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
|
||||
#[cfg_attr(test, assert_instr(vcvttsd2si, SAE = 8))]
|
||||
#[rustc_legacy_const_generics(1)]
|
||||
pub unsafe fn _mm_cvtt_roundsd_i64<const SAE: i32>(a: __m128d) -> i64 {
|
||||
static_assert_sae!(SAE);
|
||||
let a = a.as_f64x2();
|
||||
vcvttsd2si64(a, SAE)
|
||||
pub fn _mm_cvtt_roundsd_i64<const SAE: i32>(a: __m128d) -> i64 {
|
||||
unsafe {
|
||||
static_assert_sae!(SAE);
|
||||
let a = a.as_f64x2();
|
||||
vcvttsd2si64(a, SAE)
|
||||
}
|
||||
}
|
||||
|
||||
/// Convert the lower double-precision (64-bit) floating-point element in a to an unsigned 64-bit integer with truncation, and store the result in dst.\
|
||||
|
|
@ -427,10 +463,12 @@ pub unsafe fn _mm_cvtt_roundsd_i64<const SAE: i32>(a: __m128d) -> i64 {
|
|||
#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
|
||||
#[cfg_attr(test, assert_instr(vcvttsd2usi, SAE = 8))]
|
||||
#[rustc_legacy_const_generics(1)]
|
||||
pub unsafe fn _mm_cvtt_roundsd_u64<const SAE: i32>(a: __m128d) -> u64 {
|
||||
static_assert_sae!(SAE);
|
||||
let a = a.as_f64x2();
|
||||
vcvttsd2usi64(a, SAE)
|
||||
pub fn _mm_cvtt_roundsd_u64<const SAE: i32>(a: __m128d) -> u64 {
|
||||
unsafe {
|
||||
static_assert_sae!(SAE);
|
||||
let a = a.as_f64x2();
|
||||
vcvttsd2usi64(a, SAE)
|
||||
}
|
||||
}
|
||||
|
||||
/// Convert the lower single-precision (32-bit) floating-point element in a to a 64-bit integer with truncation, and store the result in dst.\
|
||||
|
|
@ -442,10 +480,12 @@ pub unsafe fn _mm_cvtt_roundsd_u64<const SAE: i32>(a: __m128d) -> u64 {
|
|||
#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
|
||||
#[cfg_attr(test, assert_instr(vcvttss2si, SAE = 8))]
|
||||
#[rustc_legacy_const_generics(1)]
|
||||
pub unsafe fn _mm_cvtt_roundss_i64<const SAE: i32>(a: __m128) -> i64 {
|
||||
static_assert_sae!(SAE);
|
||||
let a = a.as_f32x4();
|
||||
vcvttss2si64(a, SAE)
|
||||
pub fn _mm_cvtt_roundss_i64<const SAE: i32>(a: __m128) -> i64 {
|
||||
unsafe {
|
||||
static_assert_sae!(SAE);
|
||||
let a = a.as_f32x4();
|
||||
vcvttss2si64(a, SAE)
|
||||
}
|
||||
}
|
||||
|
||||
/// Convert the lower single-precision (32-bit) floating-point element in a to a 64-bit integer with truncation, and store the result in dst.\
|
||||
|
|
@ -457,10 +497,12 @@ pub unsafe fn _mm_cvtt_roundss_i64<const SAE: i32>(a: __m128) -> i64 {
|
|||
#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
|
||||
#[cfg_attr(test, assert_instr(vcvttss2si, SAE = 8))]
|
||||
#[rustc_legacy_const_generics(1)]
|
||||
pub unsafe fn _mm_cvtt_roundss_si64<const SAE: i32>(a: __m128) -> i64 {
|
||||
static_assert_sae!(SAE);
|
||||
let a = a.as_f32x4();
|
||||
vcvttss2si64(a, SAE)
|
||||
pub fn _mm_cvtt_roundss_si64<const SAE: i32>(a: __m128) -> i64 {
|
||||
unsafe {
|
||||
static_assert_sae!(SAE);
|
||||
let a = a.as_f32x4();
|
||||
vcvttss2si64(a, SAE)
|
||||
}
|
||||
}
|
||||
|
||||
/// Convert the lower single-precision (32-bit) floating-point element in a to an unsigned 64-bit integer with truncation, and store the result in dst.\
|
||||
|
|
@ -472,10 +514,12 @@ pub unsafe fn _mm_cvtt_roundss_si64<const SAE: i32>(a: __m128) -> i64 {
|
|||
#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
|
||||
#[cfg_attr(test, assert_instr(vcvttss2usi, SAE = 8))]
|
||||
#[rustc_legacy_const_generics(1)]
|
||||
pub unsafe fn _mm_cvtt_roundss_u64<const SAE: i32>(a: __m128) -> u64 {
|
||||
static_assert_sae!(SAE);
|
||||
let a = a.as_f32x4();
|
||||
vcvttss2usi64(a, SAE)
|
||||
pub fn _mm_cvtt_roundss_u64<const SAE: i32>(a: __m128) -> u64 {
|
||||
unsafe {
|
||||
static_assert_sae!(SAE);
|
||||
let a = a.as_f32x4();
|
||||
vcvttss2usi64(a, SAE)
|
||||
}
|
||||
}
|
||||
|
||||
#[allow(improper_ctypes)]
|
||||
|
|
|
|||
|
|
@ -11,8 +11,8 @@ use stdarch_test::assert_instr;
|
|||
#[target_feature(enable = "avx512fp16")]
|
||||
#[cfg_attr(test, assert_instr(vcvtsi2sh))]
|
||||
#[unstable(feature = "stdarch_x86_avx512_f16", issue = "127213")]
|
||||
pub unsafe fn _mm_cvti64_sh(a: __m128h, b: i64) -> __m128h {
|
||||
vcvtsi642sh(a, b, _MM_FROUND_CUR_DIRECTION)
|
||||
pub fn _mm_cvti64_sh(a: __m128h, b: i64) -> __m128h {
|
||||
unsafe { vcvtsi642sh(a, b, _MM_FROUND_CUR_DIRECTION) }
|
||||
}
|
||||
|
||||
/// Convert the signed 64-bit integer b to a half-precision (16-bit) floating-point element, store the
|
||||
|
|
@ -33,9 +33,11 @@ pub unsafe fn _mm_cvti64_sh(a: __m128h, b: i64) -> __m128h {
|
|||
#[cfg_attr(test, assert_instr(vcvtsi2sh, ROUNDING = 8))]
|
||||
#[rustc_legacy_const_generics(2)]
|
||||
#[unstable(feature = "stdarch_x86_avx512_f16", issue = "127213")]
|
||||
pub unsafe fn _mm_cvt_roundi64_sh<const ROUNDING: i32>(a: __m128h, b: i64) -> __m128h {
|
||||
static_assert_rounding!(ROUNDING);
|
||||
vcvtsi642sh(a, b, ROUNDING)
|
||||
pub fn _mm_cvt_roundi64_sh<const ROUNDING: i32>(a: __m128h, b: i64) -> __m128h {
|
||||
unsafe {
|
||||
static_assert_rounding!(ROUNDING);
|
||||
vcvtsi642sh(a, b, ROUNDING)
|
||||
}
|
||||
}
|
||||
|
||||
/// Convert the unsigned 64-bit integer b to a half-precision (16-bit) floating-point element, store the
|
||||
|
|
@ -47,8 +49,8 @@ pub unsafe fn _mm_cvt_roundi64_sh<const ROUNDING: i32>(a: __m128h, b: i64) -> __
|
|||
#[target_feature(enable = "avx512fp16")]
|
||||
#[cfg_attr(test, assert_instr(vcvtusi2sh))]
|
||||
#[unstable(feature = "stdarch_x86_avx512_f16", issue = "127213")]
|
||||
pub unsafe fn _mm_cvtu64_sh(a: __m128h, b: u64) -> __m128h {
|
||||
vcvtusi642sh(a, b, _MM_FROUND_CUR_DIRECTION)
|
||||
pub fn _mm_cvtu64_sh(a: __m128h, b: u64) -> __m128h {
|
||||
unsafe { vcvtusi642sh(a, b, _MM_FROUND_CUR_DIRECTION) }
|
||||
}
|
||||
|
||||
/// Convert the unsigned 64-bit integer b to a half-precision (16-bit) floating-point element, store the
|
||||
|
|
@ -69,9 +71,11 @@ pub unsafe fn _mm_cvtu64_sh(a: __m128h, b: u64) -> __m128h {
|
|||
#[cfg_attr(test, assert_instr(vcvtusi2sh, ROUNDING = 8))]
|
||||
#[rustc_legacy_const_generics(2)]
|
||||
#[unstable(feature = "stdarch_x86_avx512_f16", issue = "127213")]
|
||||
pub unsafe fn _mm_cvt_roundu64_sh<const ROUNDING: i32>(a: __m128h, b: u64) -> __m128h {
|
||||
static_assert_rounding!(ROUNDING);
|
||||
vcvtusi642sh(a, b, ROUNDING)
|
||||
pub fn _mm_cvt_roundu64_sh<const ROUNDING: i32>(a: __m128h, b: u64) -> __m128h {
|
||||
unsafe {
|
||||
static_assert_rounding!(ROUNDING);
|
||||
vcvtusi642sh(a, b, ROUNDING)
|
||||
}
|
||||
}
|
||||
|
||||
/// Convert the lower half-precision (16-bit) floating-point element in a to a 64-bit integer, and store
|
||||
|
|
@ -82,8 +86,8 @@ pub unsafe fn _mm_cvt_roundu64_sh<const ROUNDING: i32>(a: __m128h, b: u64) -> __
|
|||
#[target_feature(enable = "avx512fp16")]
|
||||
#[cfg_attr(test, assert_instr(vcvtsh2si))]
|
||||
#[unstable(feature = "stdarch_x86_avx512_f16", issue = "127213")]
|
||||
pub unsafe fn _mm_cvtsh_i64(a: __m128h) -> i64 {
|
||||
vcvtsh2si64(a, _MM_FROUND_CUR_DIRECTION)
|
||||
pub fn _mm_cvtsh_i64(a: __m128h) -> i64 {
|
||||
unsafe { vcvtsh2si64(a, _MM_FROUND_CUR_DIRECTION) }
|
||||
}
|
||||
|
||||
/// Convert the lower half-precision (16-bit) floating-point element in a to a 64-bit integer, and store
|
||||
|
|
@ -103,9 +107,11 @@ pub unsafe fn _mm_cvtsh_i64(a: __m128h) -> i64 {
|
|||
#[cfg_attr(test, assert_instr(vcvtsh2si, ROUNDING = 8))]
|
||||
#[rustc_legacy_const_generics(1)]
|
||||
#[unstable(feature = "stdarch_x86_avx512_f16", issue = "127213")]
|
||||
pub unsafe fn _mm_cvt_roundsh_i64<const ROUNDING: i32>(a: __m128h) -> i64 {
|
||||
static_assert_rounding!(ROUNDING);
|
||||
vcvtsh2si64(a, ROUNDING)
|
||||
pub fn _mm_cvt_roundsh_i64<const ROUNDING: i32>(a: __m128h) -> i64 {
|
||||
unsafe {
|
||||
static_assert_rounding!(ROUNDING);
|
||||
vcvtsh2si64(a, ROUNDING)
|
||||
}
|
||||
}
|
||||
|
||||
/// Convert the lower half-precision (16-bit) floating-point element in a to a 64-bit unsigned integer, and store
|
||||
|
|
@ -116,8 +122,8 @@ pub unsafe fn _mm_cvt_roundsh_i64<const ROUNDING: i32>(a: __m128h) -> i64 {
|
|||
#[target_feature(enable = "avx512fp16")]
|
||||
#[cfg_attr(test, assert_instr(vcvtsh2usi))]
|
||||
#[unstable(feature = "stdarch_x86_avx512_f16", issue = "127213")]
|
||||
pub unsafe fn _mm_cvtsh_u64(a: __m128h) -> u64 {
|
||||
vcvtsh2usi64(a, _MM_FROUND_CUR_DIRECTION)
|
||||
pub fn _mm_cvtsh_u64(a: __m128h) -> u64 {
|
||||
unsafe { vcvtsh2usi64(a, _MM_FROUND_CUR_DIRECTION) }
|
||||
}
|
||||
|
||||
/// Convert the lower half-precision (16-bit) floating-point element in a to a 64-bit unsigned integer, and store
|
||||
|
|
@ -137,9 +143,11 @@ pub unsafe fn _mm_cvtsh_u64(a: __m128h) -> u64 {
|
|||
#[cfg_attr(test, assert_instr(vcvtsh2usi, ROUNDING = 8))]
|
||||
#[rustc_legacy_const_generics(1)]
|
||||
#[unstable(feature = "stdarch_x86_avx512_f16", issue = "127213")]
|
||||
pub unsafe fn _mm_cvt_roundsh_u64<const ROUNDING: i32>(a: __m128h) -> u64 {
|
||||
static_assert_rounding!(ROUNDING);
|
||||
vcvtsh2usi64(a, ROUNDING)
|
||||
pub fn _mm_cvt_roundsh_u64<const ROUNDING: i32>(a: __m128h) -> u64 {
|
||||
unsafe {
|
||||
static_assert_rounding!(ROUNDING);
|
||||
vcvtsh2usi64(a, ROUNDING)
|
||||
}
|
||||
}
|
||||
|
||||
/// Convert the lower half-precision (16-bit) floating-point element in a to a 64-bit integer with truncation,
|
||||
|
|
@ -150,8 +158,8 @@ pub unsafe fn _mm_cvt_roundsh_u64<const ROUNDING: i32>(a: __m128h) -> u64 {
|
|||
#[target_feature(enable = "avx512fp16")]
|
||||
#[cfg_attr(test, assert_instr(vcvttsh2si))]
|
||||
#[unstable(feature = "stdarch_x86_avx512_f16", issue = "127213")]
|
||||
pub unsafe fn _mm_cvttsh_i64(a: __m128h) -> i64 {
|
||||
vcvttsh2si64(a, _MM_FROUND_CUR_DIRECTION)
|
||||
pub fn _mm_cvttsh_i64(a: __m128h) -> i64 {
|
||||
unsafe { vcvttsh2si64(a, _MM_FROUND_CUR_DIRECTION) }
|
||||
}
|
||||
|
||||
/// Convert the lower half-precision (16-bit) floating-point element in a to a 64-bit integer with truncation,
|
||||
|
|
@ -165,9 +173,11 @@ pub unsafe fn _mm_cvttsh_i64(a: __m128h) -> i64 {
|
|||
#[cfg_attr(test, assert_instr(vcvttsh2si, SAE = 8))]
|
||||
#[rustc_legacy_const_generics(1)]
|
||||
#[unstable(feature = "stdarch_x86_avx512_f16", issue = "127213")]
|
||||
pub unsafe fn _mm_cvtt_roundsh_i64<const SAE: i32>(a: __m128h) -> i64 {
|
||||
static_assert_sae!(SAE);
|
||||
vcvttsh2si64(a, SAE)
|
||||
pub fn _mm_cvtt_roundsh_i64<const SAE: i32>(a: __m128h) -> i64 {
|
||||
unsafe {
|
||||
static_assert_sae!(SAE);
|
||||
vcvttsh2si64(a, SAE)
|
||||
}
|
||||
}
|
||||
|
||||
/// Convert the lower half-precision (16-bit) floating-point element in a to a 64-bit unsigned integer with truncation,
|
||||
|
|
@ -178,8 +188,8 @@ pub unsafe fn _mm_cvtt_roundsh_i64<const SAE: i32>(a: __m128h) -> i64 {
|
|||
#[target_feature(enable = "avx512fp16")]
|
||||
#[cfg_attr(test, assert_instr(vcvttsh2usi))]
|
||||
#[unstable(feature = "stdarch_x86_avx512_f16", issue = "127213")]
|
||||
pub unsafe fn _mm_cvttsh_u64(a: __m128h) -> u64 {
|
||||
vcvttsh2usi64(a, _MM_FROUND_CUR_DIRECTION)
|
||||
pub fn _mm_cvttsh_u64(a: __m128h) -> u64 {
|
||||
unsafe { vcvttsh2usi64(a, _MM_FROUND_CUR_DIRECTION) }
|
||||
}
|
||||
|
||||
/// Convert the lower half-precision (16-bit) floating-point element in a to a 64-bit unsigned integer with truncation,
|
||||
|
|
@ -193,9 +203,11 @@ pub unsafe fn _mm_cvttsh_u64(a: __m128h) -> u64 {
|
|||
#[cfg_attr(test, assert_instr(vcvttsh2usi, SAE = 8))]
|
||||
#[rustc_legacy_const_generics(1)]
|
||||
#[unstable(feature = "stdarch_x86_avx512_f16", issue = "127213")]
|
||||
pub unsafe fn _mm_cvtt_roundsh_u64<const SAE: i32>(a: __m128h) -> u64 {
|
||||
static_assert_sae!(SAE);
|
||||
vcvttsh2usi64(a, SAE)
|
||||
pub fn _mm_cvtt_roundsh_u64<const SAE: i32>(a: __m128h) -> u64 {
|
||||
unsafe {
|
||||
static_assert_sae!(SAE);
|
||||
vcvttsh2usi64(a, SAE)
|
||||
}
|
||||
}
|
||||
|
||||
#[allow(improper_ctypes)]
|
||||
|
|
|
|||
Loading…
Add table
Add a link
Reference in a new issue