Implement AVX512f floating point comparisons (#869)

Co-authored-by: bjorn3 <bjorn3@users.noreply.github.com>
This commit is contained in:
Daniel Smith 2020-07-15 15:06:38 -04:00 committed by GitHub
parent 78135e1774
commit 5bfcdc0d57
No known key found for this signature in database
GPG key ID: 4AEE18F83AFDEB23
5 changed files with 1276 additions and 11 deletions

View file

@ -966,6 +966,670 @@ pub unsafe fn _mm512_set1_epi64(a: i64) -> __m512i {
transmute(i64x8::splat(a))
}
/// Compare packed single-precision (32-bit) floating-point elements in a and b for less-than, and store the results in a mask vector.
///
/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#expand=727,1063,4909,1062,1062&text=_mm512_cmplt_ps)
#[inline]
#[target_feature(enable = "avx512f")]
#[cfg_attr(test, assert_instr(vcmp))]
pub unsafe fn _mm512_cmplt_ps_mask(a: __m512, b: __m512) -> __mmask16 {
_mm512_cmp_ps_mask(a, b, _CMP_LT_OS)
}
/// Compare packed single-precision (32-bit) floating-point elements in a and b for less-than, and store the results in a mask vector k
/// using zeromask m (elements are zeroed out when the corresponding mask bit is not set).
///
/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#expand=727,1063,4909,1062,1062,1063&text=_mm512_mask_cmplt_ps)
#[inline]
#[target_feature(enable = "avx512f")]
#[cfg_attr(test, assert_instr(vcmp))]
pub unsafe fn _mm512_mask_cmplt_ps_mask(m: __mmask16, a: __m512, b: __m512) -> __mmask16 {
_mm512_mask_cmp_ps_mask(m, a, b, _CMP_LT_OS)
}
/// Compare packed single-precision (32-bit) floating-point elements in a and b for greater-than, and store the results in a mask vector.
///
/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#expand=727,1063,4909,1062,1062&text=_mm512_cmpnlt_ps)
#[inline]
#[target_feature(enable = "avx512f")]
#[cfg_attr(test, assert_instr(vcmp))]
pub unsafe fn _mm512_cmpnlt_ps_mask(a: __m512, b: __m512) -> __mmask16 {
_mm512_cmp_ps_mask(a, b, _CMP_NLT_US)
}
/// Compare packed single-precision (32-bit) floating-point elements in a and b for greater-than, and store the results in a mask vector k
/// using zeromask m (elements are zeroed out when the corresponding mask bit is not set).
///
/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#expand=727,1063,4909,1062,1062,1063&text=_mm512_mask_cmpnlt_ps)
#[inline]
#[target_feature(enable = "avx512f")]
#[cfg_attr(test, assert_instr(vcmp))]
pub unsafe fn _mm512_mask_cmpnlt_ps_mask(m: __mmask16, a: __m512, b: __m512) -> __mmask16 {
_mm512_mask_cmp_ps_mask(m, a, b, _CMP_NLT_US)
}
/// Compare packed single-precision (32-bit) floating-point elements in a and b for less-than-or-equal, and store the results in a mask vector.
///
/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#expand=727,1063,4909,1062,1062&text=_mm512_cmple_ps)
#[inline]
#[target_feature(enable = "avx512f")]
#[cfg_attr(test, assert_instr(vcmp))]
pub unsafe fn _mm512_cmple_ps_mask(a: __m512, b: __m512) -> __mmask16 {
_mm512_cmp_ps_mask(a, b, _CMP_LE_OS)
}
/// Compare packed single-precision (32-bit) floating-point elements in a and b for less-than-or-equal, and store the results in a mask vector k
/// using zeromask m (elements are zeroed out when the corresponding mask bit is not set).
///
/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#expand=727,1063,4909,1062,1062,1063&text=_mm512_mask_cmple_ps)
#[inline]
#[target_feature(enable = "avx512f")]
#[cfg_attr(test, assert_instr(vcmp))]
pub unsafe fn _mm512_mask_cmple_ps_mask(m: __mmask16, a: __m512, b: __m512) -> __mmask16 {
_mm512_mask_cmp_ps_mask(m, a, b, _CMP_LE_OS)
}
/// Compare packed single-precision (32-bit) floating-point elements in a and b for greater-than, and store the results in a mask vector.
///
/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#expand=727,1063,4909,1062,1062&text=_mm512_cmpnle_ps)
#[inline]
#[target_feature(enable = "avx512f")]
#[cfg_attr(test, assert_instr(vcmp))]
pub unsafe fn _mm512_cmpnle_ps_mask(a: __m512, b: __m512) -> __mmask16 {
_mm512_cmp_ps_mask(a, b, _CMP_NLE_US)
}
/// Compare packed single-precision (32-bit) floating-point elements in a and b for greater-than, and store the results in a mask vector k
/// using zeromask m (elements are zeroed out when the corresponding mask bit is not set).
///
/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#expand=727,1063,4909,1062,1062,1063&text=_mm512_mask_cmpnle_ps)
#[inline]
#[target_feature(enable = "avx512f")]
#[cfg_attr(test, assert_instr(vcmp))]
pub unsafe fn _mm512_mask_cmpnle_ps_mask(m: __mmask16, a: __m512, b: __m512) -> __mmask16 {
_mm512_mask_cmp_ps_mask(m, a, b, _CMP_NLE_US)
}
/// Compare packed single-precision (32-bit) floating-point elements in a and b for equality, and store the results in a mask vector.
///
/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#expand=727,1063,4909,1062,1062&text=_mm512_cmpeq_ps)
#[inline]
#[target_feature(enable = "avx512f")]
#[cfg_attr(test, assert_instr(vcmp))]
pub unsafe fn _mm512_cmpeq_ps_mask(a: __m512, b: __m512) -> __mmask16 {
_mm512_cmp_ps_mask(a, b, _CMP_EQ_OQ)
}
/// Compare packed single-precision (32-bit) floating-point elements in a and b for equality, and store the results in a mask vector k
/// using zeromask m (elements are zeroed out when the corresponding mask bit is not set).
///
/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#expand=727,1063,4909,1062,1062,1063&text=_mm512_mask_cmpeq_ps)
#[inline]
#[target_feature(enable = "avx512f")]
#[cfg_attr(test, assert_instr(vcmp))]
pub unsafe fn _mm512_mask_cmpeq_ps_mask(m: __mmask16, a: __m512, b: __m512) -> __mmask16 {
_mm512_mask_cmp_ps_mask(m, a, b, _CMP_EQ_OQ)
}
/// Compare packed single-precision (32-bit) floating-point elements in a and b for inequality, and store the results in a mask vector.
///
/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#expand=727,1063,4909,1062,1062&text=_mm512_cmpneq_ps)
#[inline]
#[target_feature(enable = "avx512f")]
#[cfg_attr(test, assert_instr(vcmp))]
pub unsafe fn _mm512_cmpneq_ps_mask(a: __m512, b: __m512) -> __mmask16 {
_mm512_cmp_ps_mask(a, b, _CMP_NEQ_UQ)
}
/// Compare packed single-precision (32-bit) floating-point elements in a and b for inequality, and store the results in a mask vector k
/// using zeromask m (elements are zeroed out when the corresponding mask bit is not set).
///
/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#expand=727,1063,4909,1062,1062,1063&text=_mm512_mask_cmpneq_ps_mask)
#[inline]
#[target_feature(enable = "avx512f")]
#[cfg_attr(test, assert_instr(vcmp))]
pub unsafe fn _mm512_mask_cmpneq_ps_mask(m: __mmask16, a: __m512, b: __m512) -> __mmask16 {
_mm512_mask_cmp_ps_mask(m, a, b, _CMP_NEQ_UQ)
}
/// Compare packed single-precision (32-bit) floating-point elements in a and b based on the comparison operand specified by op.
///
/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#expand=727,1063,4909,1062,1062,1063&text=_mm512_mask_cmp_ps_mask)
#[inline]
#[target_feature(enable = "avx512f")]
#[rustc_args_required_const(2)]
#[cfg_attr(test, assert_instr(vcmp, op = 0))]
pub unsafe fn _mm512_cmp_ps_mask(a: __m512, b: __m512, op: i32) -> __mmask16 {
let neg_one = -1;
macro_rules! call {
($imm5:expr) => {
vcmpps(
a.as_f32x16(),
b.as_f32x16(),
$imm5,
neg_one,
_MM_FROUND_CUR_DIRECTION,
)
};
}
let r = constify_imm5!(op, call);
transmute(r)
}
/// Compare packed single-precision (32-bit) floating-point elements in a and b based on the comparison operand specified by op,
/// using zeromask m (elements are zeroed out when the corresponding mask bit is not set).
///
/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#expand=727,1063,4909,1062,1062,1063&text=_mm512_mask_cmp_ps_mask)
#[inline]
#[target_feature(enable = "avx512f")]
#[rustc_args_required_const(3)]
#[cfg_attr(test, assert_instr(vcmp, op = 0))]
pub unsafe fn _mm512_mask_cmp_ps_mask(m: __mmask16, a: __m512, b: __m512, op: i32) -> __mmask16 {
macro_rules! call {
($imm5:expr) => {
vcmpps(
a.as_f32x16(),
b.as_f32x16(),
$imm5,
m as i16,
_MM_FROUND_CUR_DIRECTION,
)
};
}
let r = constify_imm5!(op, call);
transmute(r)
}
/// Compare packed single-precision (32-bit) floating-point elements in a and b based on the comparison operand specified by op.
///
/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#expand=727,1063,4909,1062,1062,1063&text=_mm512_mask_cmp_round_ps_mask)
#[inline]
#[target_feature(enable = "avx512f")]
#[rustc_args_required_const(2, 3)]
#[cfg_attr(test, assert_instr(vcmp, op = 0, sae = 4))]
pub unsafe fn _mm512_cmp_round_ps_mask(a: __m512, b: __m512, op: i32, sae: i32) -> __mmask16 {
let neg_one = -1;
macro_rules! call {
($imm5:expr, $imm4:expr) => {
vcmpps(a.as_f32x16(), b.as_f32x16(), $imm5, neg_one, $imm4)
};
}
let r = constify_imm5_sae!(op, sae, call);
transmute(r)
}
/// Compare packed single-precision (32-bit) floating-point elements in a and b based on the comparison operand specified by op,
/// using zeromask m (elements are zeroed out when the corresponding mask bit is not set).
///
/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#expand=727,1063,4909,1062,1062,1063&text=_mm512_mask_cmp_round_ps_mask)
#[inline]
#[target_feature(enable = "avx512f")]
#[rustc_args_required_const(3, 4)]
#[cfg_attr(test, assert_instr(vcmp, op = 0, sae = 4))]
pub unsafe fn _mm512_mask_cmp_round_ps_mask(
m: __mmask16,
a: __m512,
b: __m512,
op: i32,
sae: i32,
) -> __mmask16 {
macro_rules! call {
($imm5:expr, $imm4:expr) => {
vcmpps(a.as_f32x16(), b.as_f32x16(), $imm5, m as i16, $imm4)
};
}
let r = constify_imm5_sae!(op, sae, call);
transmute(r)
}
/// Compare packed single-precision (32-bit) floating-point elements in a and b to see if neither is NaN, and store the results in a mask vector.
///
/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#expand=727,1063,4909,1062,1062,1063&text=_mm512_mask_cmpord_ps_mask)
#[inline]
#[target_feature(enable = "avx512f")]
#[cfg_attr(test, assert_instr(vcmp, op = 0))]
pub unsafe fn _mm512_cmpord_ps_mask(a: __m512, b: __m512) -> __mmask16 {
_mm512_cmp_ps_mask(a, b, _CMP_ORD_Q)
}
/// Compare packed single-precision (32-bit) floating-point elements in a and b to see if neither is NaN, and store the results in a mask vector.
///
/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#expand=727,1063,4909,1062,1062,1063&text=_mm512_mask_cmpord_ps_mask)
#[inline]
#[target_feature(enable = "avx512f")]
#[cfg_attr(test, assert_instr(vcmp, op = 0))]
pub unsafe fn _mm512_mask_cmpord_ps_mask(m: __mmask16, a: __m512, b: __m512) -> __mmask16 {
_mm512_mask_cmp_ps_mask(m, a, b, _CMP_ORD_Q)
}
/// Compare packed single-precision (32-bit) floating-point elements in a and b to see if either is NaN, and store the results in a mask vector.
///
/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#expand=727,1063,4909,1062,1062,1063&text=_mm512_mask_cmpunord_ps_mask)
#[inline]
#[target_feature(enable = "avx512f")]
#[cfg_attr(test, assert_instr(vcmp, op = 0))]
pub unsafe fn _mm512_cmpunord_ps_mask(a: __m512, b: __m512) -> __mmask16 {
_mm512_cmp_ps_mask(a, b, _CMP_UNORD_Q)
}
/// Compare packed single-precision (32-bit) floating-point elements in a and b to see if either is NaN, and store the results in a mask vector.
///
/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#expand=727,1063,4909,1062,1062,1063&text=_mm512_mask_cmpunord_ps_mask)
#[inline]
#[target_feature(enable = "avx512f")]
#[cfg_attr(test, assert_instr(vcmp, op = 0))]
pub unsafe fn _mm512_mask_cmpunord_ps_mask(m: __mmask16, a: __m512, b: __m512) -> __mmask16 {
_mm512_mask_cmp_ps_mask(m, a, b, _CMP_UNORD_Q)
}
/// Compare packed double-precision (64-bit) floating-point elements in a and b for less-than, and store the results in a mask vector.
///
/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#expand=727,1063,4909,1062,1062&text=_mm512_cmplt_pd)
#[inline]
#[target_feature(enable = "avx512f")]
#[cfg_attr(test, assert_instr(vcmp))]
pub unsafe fn _mm512_cmplt_pd_mask(a: __m512d, b: __m512d) -> __mmask8 {
_mm512_cmp_pd_mask(a, b, _CMP_LT_OS)
}
/// Compare packed double-precision (64-bit) floating-point elements in a and b for less-than, and store the results in a mask vector k
/// using zeromask m (elements are zeroed out when the corresponding mask bit is not set).
///
/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#expand=727,1063,4909,1062,1062,1063&text=_mm512_mask_cmplt_pd)
#[inline]
#[target_feature(enable = "avx512f")]
#[cfg_attr(test, assert_instr(vcmp))]
pub unsafe fn _mm512_mask_cmplt_pd_mask(m: __mmask8, a: __m512d, b: __m512d) -> __mmask8 {
_mm512_mask_cmp_pd_mask(m, a, b, _CMP_LT_OS)
}
/// Compare packed single-precision (32-bit) floating-point elements in a and b for greater-than, and store the results in a mask vector.
///
/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#expand=727,1063,4909,1062,1062&text=_mm512_cmpnlt_pd)
#[inline]
#[target_feature(enable = "avx512f")]
#[cfg_attr(test, assert_instr(vcmp))]
pub unsafe fn _mm512_cmpnlt_pd_mask(a: __m512d, b: __m512d) -> __mmask8 {
_mm512_cmp_pd_mask(a, b, _CMP_NLT_US)
}
/// Compare packed single-precision (32-bit) floating-point elements in a and b for greater-than, and store the results in a mask vector k
/// using zeromask m (elements are zeroed out when the corresponding mask bit is not set).
///
/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#expand=727,1063,4909,1062,1062,1063&text=_mm512_mask_cmpnlt_pd)
#[inline]
#[target_feature(enable = "avx512f")]
#[cfg_attr(test, assert_instr(vcmp))]
pub unsafe fn _mm512_mask_cmpnlt_pd_mask(m: __mmask8, a: __m512d, b: __m512d) -> __mmask8 {
_mm512_mask_cmp_pd_mask(m, a, b, _CMP_NLT_US)
}
/// Compare packed double-precision (64-bit) floating-point elements in a and b for less-than-or-equal, and store the results in a mask vector.
///
/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#expand=727,1063,4909,1062,1062&text=_mm512_cmple_pd)
#[inline]
#[target_feature(enable = "avx512f")]
#[cfg_attr(test, assert_instr(vcmp))]
pub unsafe fn _mm512_cmple_pd_mask(a: __m512d, b: __m512d) -> __mmask8 {
_mm512_cmp_pd_mask(a, b, _CMP_LE_OS)
}
/// Compare packed double-precision (64-bit) floating-point elements in a and b for less-than-or-equal, and store the results in a mask vector k
/// using zeromask m (elements are zeroed out when the corresponding mask bit is not set).
///
/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#expand=727,1063,4909,1062,1062,1063&text=_mm512_mask_cmple_pd)
#[inline]
#[target_feature(enable = "avx512f")]
#[cfg_attr(test, assert_instr(vcmp))]
pub unsafe fn _mm512_mask_cmple_pd_mask(m: __mmask8, a: __m512d, b: __m512d) -> __mmask8 {
_mm512_mask_cmp_pd_mask(m, a, b, _CMP_LE_OS)
}
/// Compare packed single-precision (32-bit) floating-point elements in a and b for greater-than, and store the results in a mask vector.
///
/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#expand=727,1063,4909,1062,1062&text=_mm512_cmpnle_pd)
#[inline]
#[target_feature(enable = "avx512f")]
#[cfg_attr(test, assert_instr(vcmp))]
pub unsafe fn _mm512_cmpnle_pd_mask(a: __m512d, b: __m512d) -> __mmask8 {
_mm512_cmp_pd_mask(a, b, _CMP_NLE_US)
}
/// Compare packed single-precision (32-bit) floating-point elements in a and b for greater-than, and store the results in a mask vector k
/// using zeromask m (elements are zeroed out when the corresponding mask bit is not set).
///
/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#expand=727,1063,4909,1062,1062,1063&text=_mm512_mask_cmpnle_pd)
#[inline]
#[target_feature(enable = "avx512f")]
#[cfg_attr(test, assert_instr(vcmp))]
pub unsafe fn _mm512_mask_cmpnle_pd_mask(m: __mmask8, a: __m512d, b: __m512d) -> __mmask8 {
_mm512_mask_cmp_pd_mask(m, a, b, _CMP_NLE_US)
}
/// Compare packed double-precision (64-bit) floating-point elements in a and b for equality, and store the results in a mask vector.
///
/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#expand=727,1063,4909,1062,1062&text=_mm512_cmpeq_pd)
#[inline]
#[target_feature(enable = "avx512f")]
#[cfg_attr(test, assert_instr(vcmp))]
pub unsafe fn _mm512_cmpeq_pd_mask(a: __m512d, b: __m512d) -> __mmask8 {
_mm512_cmp_pd_mask(a, b, _CMP_EQ_OQ)
}
/// Compare packed double-precision (64-bit) floating-point elements in a and b for equality, and store the results in a mask vector k
/// using zeromask m (elements are zeroed out when the corresponding mask bit is not set).
///
/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#expand=727,1063,4909,1062,1062,1063&text=_mm512_mask_cmpeq_pd)
#[inline]
#[target_feature(enable = "avx512f")]
#[cfg_attr(test, assert_instr(vcmp))]
pub unsafe fn _mm512_mask_cmpeq_pd_mask(m: __mmask8, a: __m512d, b: __m512d) -> __mmask8 {
_mm512_mask_cmp_pd_mask(m, a, b, _CMP_EQ_OQ)
}
/// Compare packed double-precision (64-bit) floating-point elements in a and b for inequality, and store the results in a mask vector.
///
/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#expand=727,1063,4909,1062,1062&text=_mm512_cmpneq_pd)
#[inline]
#[target_feature(enable = "avx512f")]
#[cfg_attr(test, assert_instr(vcmp))]
pub unsafe fn _mm512_cmpneq_pd_mask(a: __m512d, b: __m512d) -> __mmask8 {
_mm512_cmp_pd_mask(a, b, _CMP_NEQ_UQ)
}
/// Compare packed double-precision (64-bit) floating-point elements in a and b for inequality, and store the results in a mask vector k
/// using zeromask m (elements are zeroed out when the corresponding mask bit is not set).
///
/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#expand=727,1063,4909,1062,1062,1063&text=_mm512_mask_cmpneq_pd_mask)
#[inline]
#[target_feature(enable = "avx512f")]
#[cfg_attr(test, assert_instr(vcmp))]
pub unsafe fn _mm512_mask_cmpneq_pd_mask(m: __mmask8, a: __m512d, b: __m512d) -> __mmask8 {
_mm512_mask_cmp_pd_mask(m, a, b, _CMP_NEQ_UQ)
}
/// Compare packed double-precision (64-bit) floating-point elements in a and b based on the comparison operand specified by op.
///
/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#expand=727,1063,4909,1062,1062,1063&text=_mm512_mask_cmp_pd_mask)
#[inline]
#[target_feature(enable = "avx512f")]
#[rustc_args_required_const(2)]
#[cfg_attr(test, assert_instr(vcmp, op = 0))]
pub unsafe fn _mm512_cmp_pd_mask(a: __m512d, b: __m512d, op: i32) -> __mmask8 {
let neg_one = -1;
macro_rules! call {
($imm5:expr) => {
vcmppd(
a.as_f64x8(),
b.as_f64x8(),
$imm5,
neg_one,
_MM_FROUND_CUR_DIRECTION,
)
};
}
let r = constify_imm5!(op, call);
transmute(r)
}
/// Compare packed double-precision (64-bit) floating-point elements in a and b based on the comparison operand specified by op,
/// using zeromask m (elements are zeroed out when the corresponding mask bit is not set).
///
/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#expand=727,1063,4909,1062,1062,1063&text=_mm512_mask_cmp_pd_mask)
#[inline]
#[target_feature(enable = "avx512f")]
#[rustc_args_required_const(3)]
#[cfg_attr(test, assert_instr(vcmp, op = 0))]
pub unsafe fn _mm512_mask_cmp_pd_mask(m: __mmask8, a: __m512d, b: __m512d, op: i32) -> __mmask8 {
macro_rules! call {
($imm5:expr) => {
vcmppd(
a.as_f64x8(),
b.as_f64x8(),
$imm5,
m as i8,
_MM_FROUND_CUR_DIRECTION,
)
};
}
let r = constify_imm5!(op, call);
transmute(r)
}
/// Compare packed double-precision (64-bit) floating-point elements in a and b based on the comparison operand specified by op.
///
/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#expand=727,1063,4909,1062,1062,1063&text=_mm512_mask_cmp_round_pd_mask)
#[inline]
#[target_feature(enable = "avx512f")]
#[rustc_args_required_const(2, 3)]
#[cfg_attr(test, assert_instr(vcmp, op = 0, sae = 4))]
pub unsafe fn _mm512_cmp_round_pd_mask(a: __m512d, b: __m512d, op: i32, sae: i32) -> __mmask8 {
let neg_one = -1;
macro_rules! call {
($imm5:expr, $imm4:expr) => {
vcmppd(a.as_f64x8(), b.as_f64x8(), $imm5, neg_one, $imm4)
};
}
let r = constify_imm5_sae!(op, sae, call);
transmute(r)
}
/// Compare packed double-precision (64-bit) floating-point elements in a and b based on the comparison operand specified by op,
/// using zeromask m (elements are zeroed out when the corresponding mask bit is not set).
///
/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#expand=727,1063,4909,1062,1062,1063&text=_mm512_mask_cmp_round_pd_mask)
#[inline]
#[target_feature(enable = "avx512f")]
#[rustc_args_required_const(3, 4)]
#[cfg_attr(test, assert_instr(vcmp, op = 0, sae = 4))]
pub unsafe fn _mm512_mask_cmp_round_pd_mask(
m: __mmask8,
a: __m512d,
b: __m512d,
op: i32,
sae: i32,
) -> __mmask8 {
macro_rules! call {
($imm5:expr, $imm4:expr) => {
vcmppd(a.as_f64x8(), b.as_f64x8(), $imm5, m as i8, $imm4)
};
}
let r = constify_imm5_sae!(op, sae, call);
transmute(r)
}
/// Compare packed double-precision (64-bit) floating-point elements in a and b to see if neither is NaN, and store the results in a mask vector.
///
/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#expand=727,1063,4909,1062,1062,1063&text=_mm512_mask_cmpord_pd_mask)
#[inline]
#[target_feature(enable = "avx512f")]
#[cfg_attr(test, assert_instr(vcmp, op = 0))]
pub unsafe fn _mm512_cmpord_pd_mask(a: __m512d, b: __m512d) -> __mmask8 {
_mm512_cmp_pd_mask(a, b, _CMP_ORD_Q)
}
/// Compare packed double-precision (64-bit) floating-point elements in a and b to see if neither is NaN, and store the results in a mask vector.
///
/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#expand=727,1063,4909,1062,1062,1063&text=_mm512_mask_cmpord_pd_mask)
#[inline]
#[target_feature(enable = "avx512f")]
#[cfg_attr(test, assert_instr(vcmp, op = 0))]
pub unsafe fn _mm512_mask_cmpord_pd_mask(m: __mmask8, a: __m512d, b: __m512d) -> __mmask8 {
_mm512_mask_cmp_pd_mask(m, a, b, _CMP_ORD_Q)
}
/// Compare packed double-precision (64-bit) floating-point elements in a and b to see if either is NaN, and store the results in a mask vector.
///
/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#expand=727,1063,4909,1062,1062,1063&text=_mm512_mask_cmpunord_pd_mask)
#[inline]
#[target_feature(enable = "avx512f")]
#[cfg_attr(test, assert_instr(vcmp, op = 0))]
pub unsafe fn _mm512_cmpunord_pd_mask(a: __m512d, b: __m512d) -> __mmask8 {
_mm512_cmp_pd_mask(a, b, _CMP_UNORD_Q)
}
/// Compare packed double-precision (64-bit) floating-point elements in a and b to see if either is NaN, and store the results in a mask vector.
///
/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#expand=727,1063,4909,1062,1062,1063&text=_mm512_mask_cmpunord_pd_mask)
#[inline]
#[target_feature(enable = "avx512f")]
#[cfg_attr(test, assert_instr(vcmp, op = 0))]
pub unsafe fn _mm512_mask_cmpunord_pd_mask(m: __mmask8, a: __m512d, b: __m512d) -> __mmask8 {
_mm512_mask_cmp_pd_mask(m, a, b, _CMP_UNORD_Q)
}
/// Compare the lower single-precision (32-bit) floating-point element in a and b based on the comparison operand specified by imm8, and store the result in a mask vector.
///
/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_cmp_ss_mask&expand=5236,755,757)
#[inline]
#[target_feature(enable = "avx512f")]
#[rustc_args_required_const(2)]
#[cfg_attr(test, assert_instr(vcmp, op = 0, sae = 4))]
pub unsafe fn _mm_cmp_ss_mask(a: __m128, b: __m128, op: i32) -> __mmask8 {
let neg_one = -1;
macro_rules! call {
($imm5:expr) => {
vcmpss(a, b, $imm5, neg_one, _MM_FROUND_CUR_DIRECTION)
};
}
let r = constify_imm5!(op, call);
transmute(r)
}
/// Compare the lower single-precision (32-bit) floating-point element in a and b based on the comparison operand specified by imm8, and store the result in a mask vector using zeromask m (the element is zeroed out when mask bit 0 is not set).
///
/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_mask_cmp_ss_mask&expand=5236,755,757)
#[inline]
#[target_feature(enable = "avx512f")]
#[rustc_args_required_const(3)]
#[cfg_attr(test, assert_instr(vcmp, op = 0, sae = 4))]
pub unsafe fn _mm_mask_cmp_ss_mask(m: __mmask8, a: __m128, b: __m128, op: i32) -> __mmask8 {
macro_rules! call {
($imm5:expr) => {
vcmpss(a, b, $imm5, m as i8, _MM_FROUND_CUR_DIRECTION)
};
}
let r = constify_imm5!(op, call);
transmute(r)
}
/// Compare the lower single-precision (32-bit) floating-point element in a and b based on the comparison operand specified by imm8, and store the result in a mask vector.
///
/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_cmp_round_ss_mask&expand=5236,755,757)
#[inline]
#[target_feature(enable = "avx512f")]
#[rustc_args_required_const(2, 3)]
#[cfg_attr(test, assert_instr(vcmp, op = 0, sae = 4))]
pub unsafe fn _mm_cmp_round_ss_mask(a: __m128, b: __m128, op: i32, sae: i32) -> __mmask8 {
let neg_one = -1;
macro_rules! call {
($imm5:expr, $imm4:expr) => {
vcmpss(a, b, $imm5, neg_one, $imm4)
};
}
let r = constify_imm5_sae!(op, sae, call);
transmute(r)
}
/// Compare the lower single-precision (32-bit) floating-point element in a and b based on the comparison operand specified by imm8, and store the result in a mask vector using zeromask m (the element is zeroed out when mask bit 0 is not set).
///
/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_mask_cmp_round_ss_mask&expand=5236,755,757)
#[inline]
#[target_feature(enable = "avx512f")]
#[rustc_args_required_const(3, 4)]
#[cfg_attr(test, assert_instr(vcmp, op = 0, sae = 4))]
pub unsafe fn _mm_mask_cmp_round_ss_mask(
m: __mmask8,
a: __m128,
b: __m128,
op: i32,
sae: i32,
) -> __mmask8 {
macro_rules! call {
($imm5:expr, $imm4:expr) => {
vcmpss(a, b, $imm5, m as i8, $imm4)
};
}
let r = constify_imm5_sae!(op, sae, call);
transmute(r)
}
/// Compare the lower single-precision (32-bit) floating-point element in a and b based on the comparison operand specified by imm8, and store the result in a mask vector.
///
/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_cmp_sd_mask&expand=5236,755,757)
#[inline]
#[target_feature(enable = "avx512f")]
#[rustc_args_required_const(2)]
#[cfg_attr(test, assert_instr(vcmp, op = 0, sae = 4))]
pub unsafe fn _mm_cmp_sd_mask(a: __m128d, b: __m128d, op: i32) -> __mmask8 {
let neg_one = -1;
macro_rules! call {
($imm5:expr) => {
vcmpsd(a, b, $imm5, neg_one, _MM_FROUND_CUR_DIRECTION)
};
}
let r = constify_imm5!(op, call);
transmute(r)
}
/// Compare the lower single-precision (32-bit) floating-point element in a and b based on the comparison operand specified by imm8, and store the result in a mask vector using zeromask m (the element is zeroed out when mask bit 0 is not set).
///
/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_mask_cmp_sd_mask&expand=5236,755,757)
#[inline]
#[target_feature(enable = "avx512f")]
#[rustc_args_required_const(3)]
#[cfg_attr(test, assert_instr(vcmp, op = 0, sae = 4))]
pub unsafe fn _mm_mask_cmp_sd_mask(m: __mmask8, a: __m128d, b: __m128d, op: i32) -> __mmask8 {
macro_rules! call {
($imm5:expr) => {
vcmpsd(a, b, $imm5, m as i8, _MM_FROUND_CUR_DIRECTION)
};
}
let r = constify_imm5!(op, call);
transmute(r)
}
/// Compare the lower single-precision (32-bit) floating-point element in a and b based on the comparison operand specified by imm8, and store the result in a mask vector.
///
/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_cmp_round_sd_mask&expand=5236,755,757)
#[inline]
#[target_feature(enable = "avx512f")]
#[rustc_args_required_const(2, 3)]
#[cfg_attr(test, assert_instr(vcmp, op = 0, sae = 4))]
pub unsafe fn _mm_cmp_round_sd_mask(a: __m128d, b: __m128d, op: i32, sae: i32) -> __mmask8 {
let neg_one = -1;
macro_rules! call {
($imm5:expr, $imm4:expr) => {
vcmpsd(a, b, $imm5, neg_one, $imm4)
};
}
let r = constify_imm5_sae!(op, sae, call);
transmute(r)
}
/// Compare the lower single-precision (32-bit) floating-point element in a and b based on the comparison operand specified by imm8, and store the result in a mask vector using zeromask m (the element is zeroed out when mask bit 0 is not set).
///
/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_mask_cmp_round_sd_mask&expand=5236,755,757)
#[inline]
#[target_feature(enable = "avx512f")]
#[rustc_args_required_const(3, 4)]
#[cfg_attr(test, assert_instr(vcmp, op = 0, sae = 4))]
pub unsafe fn _mm_mask_cmp_round_sd_mask(
m: __mmask8,
a: __m128d,
b: __m128d,
op: i32,
sae: i32,
) -> __mmask8 {
macro_rules! call {
($imm5:expr, $imm4:expr) => {
vcmpsd(a, b, $imm5, m as i8, $imm4)
};
}
let r = constify_imm5_sae!(op, sae, call);
transmute(r)
}
/// Compare packed unsigned 32-bit integers in a and b for less-than, and store the results in a mask vector.
///
/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#expand=727,1063,4909,1062,1062&text=_mm512_cmplt_epu32)
@ -1794,6 +2458,14 @@ extern "C" {
#[link_name = "llvm.x86.avx512.scatter.qpi.512"]
fn vpscatterqd(slice: *mut i8, mask: i8, offsets: i64x8, src: i32x8, scale: i32);
#[link_name = "llvm.x86.avx512.mask.cmp.ss"]
fn vcmpss(a: __m128, b: __m128, op: i32, m: i8, sae: i32) -> i8;
#[link_name = "llvm.x86.avx512.mask.cmp.sd"]
fn vcmpsd(a: __m128d, b: __m128d, op: i32, m: i8, sae: i32) -> i8;
#[link_name = "llvm.x86.avx512.mask.cmp.ps.512"]
fn vcmpps(a: f32x16, b: f32x16, op: i32, m: i16, sae: i32) -> i16;
#[link_name = "llvm.x86.avx512.mask.cmp.pd.512"]
fn vcmppd(a: f64x8, b: f64x8, op: i32, m: i8, sae: i32) -> i8;
#[link_name = "llvm.x86.avx512.mask.ucmp.q.512"]
fn vpcmpuq(a: i64x8, b: i64x8, op: i32, m: i8) -> i8;
#[link_name = "llvm.x86.avx512.mask.cmp.q.512"]
@ -2048,6 +2720,301 @@ mod tests {
assert_eq!(&arr[..], &expected[..],);
}
#[simd_test(enable = "avx512f")]
unsafe fn test_mm512_cmplt_ps_mask() {
#[rustfmt::skip]
let a = _mm512_set_ps(0., 1., -1., f32::MAX, f32::NAN, f32::MIN, 100., -100.,
0., 1., -1., f32::MAX, f32::NAN, f32::MIN, 100., -100.);
let b = _mm512_set1_ps(-1.);
let m = _mm512_cmplt_ps_mask(a, b);
assert_eq!(m, 0b00000101_00000101);
}
#[simd_test(enable = "avx512f")]
unsafe fn test_mm512_mask_cmplt_ps_mask() {
#[rustfmt::skip]
let a = _mm512_set_ps(0., 1., -1., f32::MAX, f32::NAN, f32::MIN, 100., -100.,
0., 1., -1., f32::MAX, f32::NAN, f32::MIN, 100., -100.);
let b = _mm512_set1_ps(-1.);
let mask = 0b01100110_01100110;
let r = _mm512_mask_cmplt_ps_mask(mask, a, b);
assert_eq!(r, 0b00000100_00000100);
}
#[simd_test(enable = "avx512f")]
unsafe fn test_mm512_cmpnlt_ps_mask() {
#[rustfmt::skip]
let a = _mm512_set_ps(0., 1., -1., f32::MAX, f32::NAN, f32::MIN, 100., -100.,
0., 1., -1., f32::MAX, f32::NAN, f32::MIN, 100., -100.);
let b = _mm512_set1_ps(-1.);
assert_eq!(_mm512_cmpnlt_ps_mask(a, b), !_mm512_cmplt_ps_mask(a, b));
}
#[simd_test(enable = "avx512f")]
unsafe fn test_mm512_mask_cmpnlt_ps_mask() {
#[rustfmt::skip]
let a = _mm512_set_ps(0., 1., -1., f32::MAX, f32::NAN, f32::MIN, 100., -100.,
0., 1., -1., f32::MAX, f32::NAN, f32::MIN, 100., -100.);
let b = _mm512_set1_ps(-1.);
let mask = 0b01111010_01111010;
assert_eq!(_mm512_mask_cmpnlt_ps_mask(mask, a, b), 0b01111010_01111010);
}
#[simd_test(enable = "avx512f")]
unsafe fn test_mm512_cmpnle_ps_mask() {
#[rustfmt::skip]
let a = _mm512_set_ps(0., 1., -1., f32::MAX, f32::NAN, f32::MIN, 100., -100.,
0., 1., -1., f32::MAX, f32::NAN, f32::MIN, 100., -100.);
let b = _mm512_set1_ps(-1.);
let m = _mm512_cmpnle_ps_mask(b, a);
assert_eq!(m, 0b00001101_00001101);
}
#[simd_test(enable = "avx512f")]
unsafe fn test_mm512_mask_cmpnle_ps_mask() {
#[rustfmt::skip]
let a = _mm512_set_ps(0., 1., -1., f32::MAX, f32::NAN, f32::MIN, 100., -100.,
0., 1., -1., f32::MAX, f32::NAN, f32::MIN, 100., -100.);
let b = _mm512_set1_ps(-1.);
let mask = 0b01100110_01100110;
let r = _mm512_mask_cmpnle_ps_mask(mask, b, a);
assert_eq!(r, 0b00000100_00000100);
}
#[simd_test(enable = "avx512f")]
unsafe fn test_mm512_cmple_ps_mask() {
#[rustfmt::skip]
let a = _mm512_set_ps(0., 1., -1., f32::MAX, f32::NAN, f32::MIN, 100., -100.,
0., 1., -1., f32::MAX, f32::NAN, f32::MIN, 100., -100.);
let b = _mm512_set1_ps(-1.);
assert_eq!(_mm512_cmple_ps_mask(a, b), 0b00100101_00100101);
}
#[simd_test(enable = "avx512f")]
unsafe fn test_mm512_mask_cmple_ps_mask() {
#[rustfmt::skip]
let a = _mm512_set_ps(0., 1., -1., f32::MAX, f32::NAN, f32::MIN, 100., -100.,
0., 1., -1., f32::MAX, f32::NAN, f32::MIN, 100., -100.);
let b = _mm512_set1_ps(-1.);
let mask = 0b01111010_01111010;
assert_eq!(_mm512_mask_cmple_ps_mask(mask, a, b), 0b00100000_00100000);
}
#[simd_test(enable = "avx512f")]
unsafe fn test_mm512_cmpeq_ps_mask() {
#[rustfmt::skip]
let a = _mm512_set_ps(0., 1., -1., 13., f32::MAX, f32::MIN, f32::NAN, -100.,
0., 1., -1., 13., f32::MAX, f32::MIN, f32::NAN, -100.);
#[rustfmt::skip]
let b = _mm512_set_ps(0., 1., 13., 42., f32::MAX, f32::MIN, f32::NAN, -100.,
0., 1., 13., 42., f32::MAX, f32::MIN, f32::NAN, -100.);
let m = _mm512_cmpeq_ps_mask(b, a);
assert_eq!(m, 0b11001101_11001101);
}
#[simd_test(enable = "avx512f")]
unsafe fn test_mm512_mask_cmpeq_ps_mask() {
#[rustfmt::skip]
let a = _mm512_set_ps(0., 1., -1., 13., f32::MAX, f32::MIN, f32::NAN, -100.,
0., 1., -1., 13., f32::MAX, f32::MIN, f32::NAN, -100.);
#[rustfmt::skip]
let b = _mm512_set_ps(0., 1., 13., 42., f32::MAX, f32::MIN, f32::NAN, -100.,
0., 1., 13., 42., f32::MAX, f32::MIN, f32::NAN, -100.);
let mask = 0b01111010_01111010;
let r = _mm512_mask_cmpeq_ps_mask(mask, b, a);
assert_eq!(r, 0b01001000_01001000);
}
#[simd_test(enable = "avx512f")]
unsafe fn test_mm512_cmpneq_ps_mask() {
#[rustfmt::skip]
let a = _mm512_set_ps(0., 1., -1., 13., f32::MAX, f32::MIN, f32::NAN, -100.,
0., 1., -1., 13., f32::MAX, f32::MIN, f32::NAN, -100.);
#[rustfmt::skip]
let b = _mm512_set_ps(0., 1., 13., 42., f32::MAX, f32::MIN, f32::NAN, -100.,
0., 1., 13., 42., f32::MAX, f32::MIN, f32::NAN, -100.);
let m = _mm512_cmpneq_ps_mask(b, a);
assert_eq!(m, 0b00110010_00110010);
}
#[simd_test(enable = "avx512f")]
unsafe fn test_mm512_mask_cmpneq_ps_mask() {
#[rustfmt::skip]
let a = _mm512_set_ps(0., 1., -1., 13., f32::MAX, f32::MIN, f32::NAN, -100.,
0., 1., -1., 13., f32::MAX, f32::MIN, f32::NAN, -100.);
#[rustfmt::skip]
let b = _mm512_set_ps(0., 1., 13., 42., f32::MAX, f32::MIN, f32::NAN, -100.,
0., 1., 13., 42., f32::MAX, f32::MIN, f32::NAN, -100.);
let mask = 0b01111010_01111010;
let r = _mm512_mask_cmpneq_ps_mask(mask, b, a);
assert_eq!(r, 0b00110010_00110010)
}
#[simd_test(enable = "avx512f")]
unsafe fn test_mm512_cmp_ps_mask() {
#[rustfmt::skip]
let a = _mm512_set_ps(0., 1., -1., 13., f32::MAX, f32::MIN, 100., -100.,
0., 1., -1., 13., f32::MAX, f32::MIN, 100., -100.);
let b = _mm512_set1_ps(-1.);
let m = _mm512_cmp_ps_mask(a, b, _CMP_LT_OQ);
assert_eq!(m, 0b00000101_00000101);
}
#[simd_test(enable = "avx512f")]
unsafe fn test_mm512_mask_cmp_ps_mask() {
#[rustfmt::skip]
let a = _mm512_set_ps(0., 1., -1., 13., f32::MAX, f32::MIN, 100., -100.,
0., 1., -1., 13., f32::MAX, f32::MIN, 100., -100.);
let b = _mm512_set1_ps(-1.);
let mask = 0b01100110_01100110;
let r = _mm512_mask_cmp_ps_mask(mask, a, b, _CMP_LT_OQ);
assert_eq!(r, 0b00000100_00000100);
}
#[simd_test(enable = "avx512f")]
unsafe fn test_mm512_cmp_round_ps_mask() {
#[rustfmt::skip]
let a = _mm512_set_ps(0., 1., -1., 13., f32::MAX, f32::MIN, 100., -100.,
0., 1., -1., 13., f32::MAX, f32::MIN, 100., -100.);
let b = _mm512_set1_ps(-1.);
let m = _mm512_cmp_round_ps_mask(a, b, _CMP_LT_OQ, _MM_FROUND_CUR_DIRECTION);
assert_eq!(m, 0b00000101_00000101);
}
#[simd_test(enable = "avx512f")]
unsafe fn test_mm512_mask_cmp_round_ps_mask() {
#[rustfmt::skip]
let a = _mm512_set_ps(0., 1., -1., 13., f32::MAX, f32::MIN, 100., -100.,
0., 1., -1., 13., f32::MAX, f32::MIN, 100., -100.);
let b = _mm512_set1_ps(-1.);
let mask = 0b01100110_01100110;
let r = _mm512_mask_cmp_round_ps_mask(mask, a, b, _CMP_LT_OQ, _MM_FROUND_CUR_DIRECTION);
assert_eq!(r, 0b00000100_00000100);
}
#[simd_test(enable = "avx512f")]
unsafe fn test_mm512_cmpord_ps_mask() {
#[rustfmt::skip]
let a = _mm512_set_ps(f32::NAN, f32::MAX, f32::NAN, f32::MIN, f32::NAN, -1., f32::NAN, 0.,
f32::NAN, f32::MAX, f32::NAN, f32::MIN, f32::NAN, 1., f32::NAN, 2.);
#[rustfmt::skip]
let b = _mm512_set_ps(f32::NAN, f32::NAN, f32::NAN, f32::NAN, f32::MIN, f32::MAX, -1., 0.,
f32::NAN, f32::NAN, f32::NAN, f32::NAN, f32::MIN, f32::MAX, -1., 2.);
let m = _mm512_cmpord_ps_mask(a, b);
assert_eq!(m, 0b00000101_00000101);
}
#[simd_test(enable = "avx512f")]
unsafe fn test_mm512_mask_cmpord_ps_mask() {
#[rustfmt::skip]
let a = _mm512_set_ps(f32::NAN, f32::MAX, f32::NAN, f32::MIN, f32::NAN, -1., f32::NAN, 0.,
f32::NAN, f32::MAX, f32::NAN, f32::MIN, f32::NAN, 1., f32::NAN, 2.);
#[rustfmt::skip]
let b = _mm512_set_ps(f32::NAN, f32::NAN, f32::NAN, f32::NAN, f32::MIN, f32::MAX, -1., 0.,
f32::NAN, f32::NAN, f32::NAN, f32::NAN, f32::MIN, f32::MAX, -1., 2.);
let mask = 0b11000011_11000011;
let m = _mm512_mask_cmpord_ps_mask(mask, a, b);
assert_eq!(m, 0b00000001_00000001);
}
#[simd_test(enable = "avx512f")]
unsafe fn test_mm512_cmpunord_ps_mask() {
#[rustfmt::skip]
let a = _mm512_set_ps(f32::NAN, f32::MAX, f32::NAN, f32::MIN, f32::NAN, -1., f32::NAN, 0.,
f32::NAN, f32::MAX, f32::NAN, f32::MIN, f32::NAN, 1., f32::NAN, 2.);
#[rustfmt::skip]
let b = _mm512_set_ps(f32::NAN, f32::NAN, f32::NAN, f32::NAN, f32::MIN, f32::MAX, -1., 0.,
f32::NAN, f32::NAN, f32::NAN, f32::NAN, f32::MIN, f32::MAX, -1., 2.);
let m = _mm512_cmpunord_ps_mask(a, b);
assert_eq!(m, 0b11111010_11111010);
}
#[simd_test(enable = "avx512f")]
unsafe fn test_mm512_mask_cmpunord_ps_mask() {
#[rustfmt::skip]
let a = _mm512_set_ps(f32::NAN, f32::MAX, f32::NAN, f32::MIN, f32::NAN, -1., f32::NAN, 0.,
f32::NAN, f32::MAX, f32::NAN, f32::MIN, f32::NAN, 1., f32::NAN, 2.);
#[rustfmt::skip]
let b = _mm512_set_ps(f32::NAN, f32::NAN, f32::NAN, f32::NAN, f32::MIN, f32::MAX, -1., 0.,
f32::NAN, f32::NAN, f32::NAN, f32::NAN, f32::MIN, f32::MAX, -1., 2.);
let mask = 0b00001111_00001111;
let m = _mm512_mask_cmpunord_ps_mask(mask, a, b);
assert_eq!(m, 0b000001010_00001010);
}
#[simd_test(enable = "avx512f")]
unsafe fn test_mm_cmp_ss_mask() {
let a = _mm_setr_ps(2., 1., 1., 1.);
let b = _mm_setr_ps(1., 2., 2., 2.);
let m = _mm_cmp_ss_mask(a, b, _CMP_GE_OS);
assert_eq!(m, 1);
}
#[simd_test(enable = "avx512f")]
unsafe fn test_mm_mask_cmp_ss_mask() {
let a = _mm_setr_ps(2., 1., 1., 1.);
let b = _mm_setr_ps(1., 2., 2., 2.);
let m = _mm_mask_cmp_ss_mask(0b10, a, b, _CMP_GE_OS);
assert_eq!(m, 0);
let m = _mm_mask_cmp_ss_mask(0b1, a, b, _CMP_GE_OS);
assert_eq!(m, 1);
}
#[simd_test(enable = "avx512f")]
unsafe fn test_mm_cmp_round_ss_mask() {
let a = _mm_setr_ps(2., 1., 1., 1.);
let b = _mm_setr_ps(1., 2., 2., 2.);
let m = _mm_cmp_round_ss_mask(a, b, _CMP_GE_OS, _MM_FROUND_CUR_DIRECTION);
assert_eq!(m, 1);
}
#[simd_test(enable = "avx512f")]
unsafe fn test_mm_mask_cmp_round_ss_mask() {
let a = _mm_setr_ps(2., 1., 1., 1.);
let b = _mm_setr_ps(1., 2., 2., 2.);
let m = _mm_mask_cmp_round_ss_mask(0b10, a, b, _CMP_GE_OS, _MM_FROUND_CUR_DIRECTION);
assert_eq!(m, 0);
let m = _mm_mask_cmp_round_ss_mask(0b1, a, b, _CMP_GE_OS, _MM_FROUND_CUR_DIRECTION);
assert_eq!(m, 1);
}
#[simd_test(enable = "avx512f")]
unsafe fn test_mm_cmp_sd_mask() {
let a = _mm_setr_pd(2., 1.);
let b = _mm_setr_pd(1., 2.);
let m = _mm_cmp_sd_mask(a, b, _CMP_GE_OS);
assert_eq!(m, 1);
}
#[simd_test(enable = "avx512f")]
unsafe fn test_mm_mask_cmp_sd_mask() {
let a = _mm_setr_pd(2., 1.);
let b = _mm_setr_pd(1., 2.);
let m = _mm_mask_cmp_sd_mask(0b10, a, b, _CMP_GE_OS);
assert_eq!(m, 0);
let m = _mm_mask_cmp_sd_mask(0b1, a, b, _CMP_GE_OS);
assert_eq!(m, 1);
}
#[simd_test(enable = "avx512f")]
unsafe fn test_mm_cmp_round_sd_mask() {
let a = _mm_setr_pd(2., 1.);
let b = _mm_setr_pd(1., 2.);
let m = _mm_cmp_round_sd_mask(a, b, _CMP_GE_OS, _MM_FROUND_CUR_DIRECTION);
assert_eq!(m, 1);
}
#[simd_test(enable = "avx512f")]
unsafe fn test_mm_mask_cmp_round_sd_mask() {
let a = _mm_setr_pd(2., 1.);
let b = _mm_setr_pd(1., 2.);
let m = _mm_mask_cmp_round_sd_mask(0b10, a, b, _CMP_GE_OS, _MM_FROUND_CUR_DIRECTION);
assert_eq!(m, 0);
let m = _mm_mask_cmp_round_sd_mask(0b1, a, b, _CMP_GE_OS, _MM_FROUND_CUR_DIRECTION);
assert_eq!(m, 1);
}
#[simd_test(enable = "avx512f")]
unsafe fn test_mm512_cmplt_epu32_mask() {
#[rustfmt::skip]

View file

@ -92,6 +92,114 @@ macro_rules! constify_imm2 {
};
}
// Constifies 5 bits along with an sae option without rounding control.
// See: https://github.com/llvm/llvm-project/blob/bd50cf905fa7c0c7caa134301c6ca0658c81eeb1/clang/lib/Sema/SemaChecking.cpp#L3497
#[allow(unused)]
macro_rules! constify_imm5_sae {
($imm5:expr, $imm4:expr, $expand:ident) => {
#[allow(overflowing_literals)]
match ($imm5 & 0b1111_1, $imm4 & 0b1111) {
(0, 4) => $expand!(0, 4),
(0, 8) => $expand!(0, 8),
(0, 12) => $expand!(0, 12),
(1, 4) => $expand!(1, 4),
(1, 8) => $expand!(1, 8),
(1, 12) => $expand!(1, 12),
(2, 4) => $expand!(2, 4),
(2, 8) => $expand!(2, 8),
(2, 12) => $expand!(2, 12),
(3, 4) => $expand!(3, 4),
(3, 8) => $expand!(3, 8),
(3, 12) => $expand!(3, 12),
(4, 4) => $expand!(4, 4),
(4, 8) => $expand!(4, 8),
(4, 12) => $expand!(4, 12),
(5, 4) => $expand!(5, 4),
(5, 8) => $expand!(5, 8),
(5, 12) => $expand!(5, 12),
(6, 4) => $expand!(6, 4),
(6, 8) => $expand!(6, 8),
(6, 12) => $expand!(6, 12),
(7, 4) => $expand!(7, 4),
(7, 8) => $expand!(7, 8),
(7, 12) => $expand!(7, 12),
(8, 4) => $expand!(8, 4),
(8, 8) => $expand!(8, 8),
(8, 12) => $expand!(8, 12),
(9, 4) => $expand!(9, 4),
(9, 8) => $expand!(9, 8),
(9, 12) => $expand!(9, 12),
(10, 4) => $expand!(10, 4),
(10, 8) => $expand!(10, 8),
(10, 12) => $expand!(10, 12),
(11, 4) => $expand!(11, 4),
(11, 8) => $expand!(11, 8),
(11, 12) => $expand!(11, 12),
(12, 4) => $expand!(12, 4),
(12, 8) => $expand!(12, 8),
(12, 12) => $expand!(12, 12),
(13, 4) => $expand!(13, 4),
(13, 8) => $expand!(13, 8),
(13, 12) => $expand!(13, 12),
(14, 4) => $expand!(14, 4),
(14, 8) => $expand!(14, 8),
(14, 12) => $expand!(14, 12),
(15, 4) => $expand!(15, 4),
(15, 8) => $expand!(15, 8),
(15, 12) => $expand!(15, 12),
(16, 4) => $expand!(16, 4),
(16, 8) => $expand!(16, 8),
(16, 12) => $expand!(16, 12),
(17, 4) => $expand!(17, 4),
(17, 8) => $expand!(17, 8),
(17, 12) => $expand!(17, 12),
(18, 4) => $expand!(18, 4),
(18, 8) => $expand!(18, 8),
(18, 12) => $expand!(18, 12),
(19, 4) => $expand!(19, 4),
(19, 8) => $expand!(19, 8),
(19, 12) => $expand!(19, 12),
(20, 4) => $expand!(20, 4),
(20, 8) => $expand!(20, 8),
(20, 12) => $expand!(20, 12),
(21, 4) => $expand!(21, 4),
(21, 8) => $expand!(21, 8),
(21, 12) => $expand!(21, 12),
(22, 4) => $expand!(22, 4),
(22, 8) => $expand!(22, 8),
(22, 12) => $expand!(22, 12),
(23, 4) => $expand!(23, 4),
(23, 8) => $expand!(23, 8),
(23, 12) => $expand!(23, 12),
(24, 4) => $expand!(24, 4),
(24, 8) => $expand!(24, 8),
(24, 12) => $expand!(24, 12),
(25, 4) => $expand!(25, 4),
(25, 8) => $expand!(25, 8),
(25, 12) => $expand!(25, 12),
(26, 4) => $expand!(26, 4),
(26, 8) => $expand!(26, 8),
(26, 12) => $expand!(26, 12),
(27, 4) => $expand!(27, 4),
(27, 8) => $expand!(27, 8),
(27, 12) => $expand!(27, 12),
(28, 4) => $expand!(28, 4),
(28, 8) => $expand!(28, 8),
(28, 12) => $expand!(28, 12),
(29, 4) => $expand!(29, 4),
(29, 8) => $expand!(29, 8),
(29, 12) => $expand!(29, 12),
(30, 4) => $expand!(30, 4),
(30, 8) => $expand!(30, 8),
(30, 12) => $expand!(30, 12),
(31, 4) => $expand!(31, 4),
(31, 8) => $expand!(31, 8),
(31, 12) => $expand!(31, 12),
(_, _) => panic!("Invalid sae value"),
}
};
}
// For gather instructions, the only valid values for scale are 1, 2, 4 and 8.
// This macro enforces that.
#[allow(unused)]

View file

@ -145,19 +145,15 @@ pub unsafe fn assert_eq_m512i(a: __m512i, b: __m512i) {
}
pub unsafe fn assert_eq_m512(a: __m512, b: __m512) {
// TODO: This should use `_mm512_cmpeq_ps_mask`, but that isn't yet implemented.
union A {
a: __m512,
b: [f32; 16],
let cmp = _mm512_cmp_ps_mask(a, b, _CMP_EQ_OQ);
if cmp != 0b11111111_11111111 {
panic!("{:?} != {:?}", a, b);
}
assert_eq!(A { a }.b, A { a: b }.b)
}
pub unsafe fn assert_eq_m512d(a: __m512d, b: __m512d) {
// TODO: This should use `_mm512_cmpeq_pd_mask`, but that isn't yet implemented.
union A {
a: __m512d,
b: [f64; 8],
let cmp = _mm512_cmp_pd_mask(a, b, _CMP_EQ_OQ);
if cmp != 0b11111111 {
panic!("{:?} != {:?}", a, b);
}
assert_eq!(A { a }.b, A { a: b }.b)
}

View file

@ -65,6 +65,201 @@ mod tests {
assert_eq_m512i(r, _mm512_set1_epi64(2));
}
#[simd_test(enable = "avx512f")]
unsafe fn test_mm512_cmplt_pd_mask() {
#[rustfmt::skip]
let a = _mm512_set_pd(0., 1., -1., f64::MAX, f64::NAN, f64::MIN, 100., -100.);
let b = _mm512_set1_pd(-1.);
let m = _mm512_cmplt_pd_mask(a, b);
assert_eq!(m, 0b00000101);
}
#[simd_test(enable = "avx512f")]
unsafe fn test_mm512_mask_cmplt_pd_mask() {
#[rustfmt::skip]
let a = _mm512_set_pd(0., 1., -1., f64::MAX, f64::NAN, f64::MIN, 100., -100.);
let b = _mm512_set1_pd(-1.);
let mask = 0b01100110;
let r = _mm512_mask_cmplt_pd_mask(mask, a, b);
assert_eq!(r, 0b00000100);
}
#[simd_test(enable = "avx512f")]
unsafe fn test_mm512_cmpnlt_pd_mask() {
#[rustfmt::skip]
let a = _mm512_set_pd(0., 1., -1., f64::MAX, f64::NAN, f64::MIN, 100., -100.);
let b = _mm512_set1_pd(-1.);
assert_eq!(_mm512_cmpnlt_pd_mask(a, b), !_mm512_cmplt_pd_mask(a, b));
}
#[simd_test(enable = "avx512f")]
unsafe fn test_mm512_mask_cmpnlt_pd_mask() {
#[rustfmt::skip]
let a = _mm512_set_pd(0., 1., -1., f64::MAX, f64::NAN, f64::MIN, 100., -100.);
let b = _mm512_set1_pd(-1.);
let mask = 0b01111010;
assert_eq!(_mm512_mask_cmpnlt_pd_mask(mask, a, b), 0b01111010);
}
#[simd_test(enable = "avx512f")]
unsafe fn test_mm512_cmple_pd_mask() {
#[rustfmt::skip]
let a = _mm512_set_pd(0., 1., -1., f64::MAX, f64::NAN, f64::MIN, 100., -100.);
let b = _mm512_set1_pd(-1.);
assert_eq!(_mm512_cmple_pd_mask(a, b), 0b00100101);
}
#[simd_test(enable = "avx512f")]
unsafe fn test_mm512_mask_cmple_pd_mask() {
#[rustfmt::skip]
let a = _mm512_set_pd(0., 1., -1., f64::MAX, f64::NAN, f64::MIN, 100., -100.);
let b = _mm512_set1_pd(-1.);
let mask = 0b01111010;
assert_eq!(_mm512_mask_cmple_pd_mask(mask, a, b), 0b00100000);
}
#[simd_test(enable = "avx512f")]
unsafe fn test_mm512_cmpnle_pd_mask() {
#[rustfmt::skip]
let a = _mm512_set_pd(0., 1., -1., f64::MAX, f64::NAN, f64::MIN, 100., -100.);
let b = _mm512_set1_pd(-1.);
let m = _mm512_cmpnle_pd_mask(b, a);
assert_eq!(m, 0b00001101);
}
#[simd_test(enable = "avx512f")]
unsafe fn test_mm512_mask_cmpnle_pd_mask() {
#[rustfmt::skip]
let a = _mm512_set_pd(0., 1., -1., f64::MAX, f64::NAN, f64::MIN, 100., -100.);
let b = _mm512_set1_pd(-1.);
let mask = 0b01100110;
let r = _mm512_mask_cmpnle_pd_mask(mask, b, a);
assert_eq!(r, 0b00000100);
}
#[simd_test(enable = "avx512f")]
unsafe fn test_mm512_cmpeq_pd_mask() {
#[rustfmt::skip]
let a = _mm512_set_pd(0., 1., -1., 13., f64::MAX, f64::MIN, f64::NAN, -100.);
#[rustfmt::skip]
let b = _mm512_set_pd(0., 1., 13., 42., f64::MAX, f64::MIN, f64::NAN, -100.);
let m = _mm512_cmpeq_pd_mask(b, a);
assert_eq!(m, 0b11001101);
}
#[simd_test(enable = "avx512f")]
unsafe fn test_mm512_mask_cmpeq_pd_mask() {
#[rustfmt::skip]
let a = _mm512_set_pd(0., 1., -1., 13., f64::MAX, f64::MIN, f64::NAN, -100.);
#[rustfmt::skip]
let b = _mm512_set_pd(0., 1., 13., 42., f64::MAX, f64::MIN, f64::NAN, -100.);
let mask = 0b01111010;
let r = _mm512_mask_cmpeq_pd_mask(mask, b, a);
assert_eq!(r, 0b01001000);
}
#[simd_test(enable = "avx512f")]
unsafe fn test_mm512_cmpneq_pd_mask() {
#[rustfmt::skip]
let a = _mm512_set_pd(0., 1., -1., 13., f64::MAX, f64::MIN, f64::NAN, -100.);
#[rustfmt::skip]
let b = _mm512_set_pd(0., 1., 13., 42., f64::MAX, f64::MIN, f64::NAN, -100.);
let m = _mm512_cmpneq_pd_mask(b, a);
assert_eq!(m, 0b00110010);
}
#[simd_test(enable = "avx512f")]
unsafe fn test_mm512_mask_cmpneq_pd_mask() {
#[rustfmt::skip]
let a = _mm512_set_pd(0., 1., -1., 13., f64::MAX, f64::MIN, f64::NAN, -100.);
#[rustfmt::skip]
let b = _mm512_set_pd(0., 1., 13., 42., f64::MAX, f64::MIN, f64::NAN, -100.);
let mask = 0b01111010;
let r = _mm512_mask_cmpneq_pd_mask(mask, b, a);
assert_eq!(r, 0b00110010)
}
#[simd_test(enable = "avx512f")]
unsafe fn test_mm512_cmp_pd_mask() {
#[rustfmt::skip]
let a = _mm512_set_pd(0., 1., -1., 13., f64::MAX, f64::MIN, 100., -100.);
let b = _mm512_set1_pd(-1.);
let m = _mm512_cmp_pd_mask(a, b, _CMP_LT_OQ);
assert_eq!(m, 0b00000101);
}
#[simd_test(enable = "avx512f")]
unsafe fn test_mm512_mask_cmp_pd_mask() {
#[rustfmt::skip]
let a = _mm512_set_pd(0., 1., -1., 13., f64::MAX, f64::MIN, 100., -100.);
let b = _mm512_set1_pd(-1.);
let mask = 0b01100110;
let r = _mm512_mask_cmp_pd_mask(mask, a, b, _CMP_LT_OQ);
assert_eq!(r, 0b00000100);
}
#[simd_test(enable = "avx512f")]
unsafe fn test_mm512_cmp_round_pd_mask() {
#[rustfmt::skip]
let a = _mm512_set_pd(0., 1., -1., 13., f64::MAX, f64::MIN, 100., -100.);
let b = _mm512_set1_pd(-1.);
let m = _mm512_cmp_round_pd_mask(a, b, _CMP_LT_OQ, _MM_FROUND_CUR_DIRECTION);
assert_eq!(m, 0b00000101);
}
#[simd_test(enable = "avx512f")]
unsafe fn test_mm512_mask_cmp_round_pd_mask() {
#[rustfmt::skip]
let a = _mm512_set_pd(0., 1., -1., 13., f64::MAX, f64::MIN, 100., -100.);
let b = _mm512_set1_pd(-1.);
let mask = 0b01100110;
let r = _mm512_mask_cmp_round_pd_mask(mask, a, b, _CMP_LT_OQ, _MM_FROUND_CUR_DIRECTION);
assert_eq!(r, 0b00000100);
}
#[simd_test(enable = "avx512f")]
unsafe fn test_mm512_cmpord_pd_mask() {
#[rustfmt::skip]
let a = _mm512_set_pd(f64::NAN, f64::MAX, f64::NAN, f64::MIN, f64::NAN, -1., f64::NAN, 0.);
#[rustfmt::skip]
let b = _mm512_set_pd(f64::NAN, f64::NAN, f64::NAN, f64::NAN, f64::MIN, f64::MAX, -1., 0.);
let m = _mm512_cmpord_pd_mask(a, b);
assert_eq!(m, 0b00000101);
}
#[simd_test(enable = "avx512f")]
unsafe fn test_mm512_mask_cmpord_pd_mask() {
#[rustfmt::skip]
let a = _mm512_set_pd(f64::NAN, f64::MAX, f64::NAN, f64::MIN, f64::NAN, -1., f64::NAN, 0.);
#[rustfmt::skip]
let b = _mm512_set_pd(f64::NAN, f64::NAN, f64::NAN, f64::NAN, f64::MIN, f64::MAX, -1., 0.);
let mask = 0b11000011;
let m = _mm512_mask_cmpord_pd_mask(mask, a, b);
assert_eq!(m, 0b00000001);
}
#[simd_test(enable = "avx512f")]
unsafe fn test_mm512_cmpunord_pd_mask() {
#[rustfmt::skip]
let a = _mm512_set_pd(f64::NAN, f64::MAX, f64::NAN, f64::MIN, f64::NAN, -1., f64::NAN, 0.);
#[rustfmt::skip]
let b = _mm512_set_pd(f64::NAN, f64::NAN, f64::NAN, f64::NAN, f64::MIN, f64::MAX, -1., 0.);
let m = _mm512_cmpunord_pd_mask(a, b);
assert_eq!(m, 0b11111010);
}
#[simd_test(enable = "avx512f")]
unsafe fn test_mm512_mask_cmpunord_pd_mask() {
#[rustfmt::skip]
let a = _mm512_set_pd(f64::NAN, f64::MAX, f64::NAN, f64::MIN, f64::NAN, -1., f64::NAN, 0.);
#[rustfmt::skip]
let b = _mm512_set_pd(f64::NAN, f64::NAN, f64::NAN, f64::NAN, f64::MIN, f64::MAX, -1., 0.);
let mask = 0b00001111;
let m = _mm512_mask_cmpunord_pd_mask(mask, a, b);
assert_eq!(m, 0b000001010);
}
#[simd_test(enable = "avx512f")]
unsafe fn test_mm512_cmplt_epu64_mask() {
let a = _mm512_set_epi64(0, 1, -1, u64::MAX as i64, i64::MAX, i64::MIN, 100, -100);

View file

@ -3,7 +3,6 @@
//! This basically just disassembles the current executable and then parses the
//! output once globally and then provides the `assert` function which makes
//! assertions about the disassembly of a function.
#![feature(const_transmute)]
#![feature(vec_leak)]
#![allow(clippy::missing_docs_in_private_items, clippy::print_stdout)]