Fix errors in incorrect SAE and ROUNDING parameters
This commit is contained in:
parent
4d665d1a01
commit
f8782f778b
3 changed files with 123 additions and 78 deletions
|
|
@ -15046,60 +15046,90 @@ pub fn _mm512_maskz_cvt_roundepu32_ps<const ROUNDING: i32>(k: __mmask16, a: __m5
|
|||
}
|
||||
|
||||
/// Convert packed single-precision (32-bit) floating-point elements in a to packed half-precision (16-bit) floating-point elements, and store the results in dst.\
|
||||
/// Exceptions can be suppressed by passing _MM_FROUND_NO_EXC in the sae parameter.
|
||||
/// Rounding is done according to the rounding\[3:0\] parameter, which can be one of:
|
||||
/// * [`_MM_FROUND_TO_NEAREST_INT`] // round to nearest
|
||||
/// * [`_MM_FROUND_TO_NEG_INF`] // round down
|
||||
/// * [`_MM_FROUND_TO_POS_INF`] // round up
|
||||
/// * [`_MM_FROUND_TO_ZERO`] // truncate
|
||||
/// * [`_MM_FROUND_CUR_DIRECTION`] // use MXCSR.RC; see [`_MM_SET_ROUNDING_MODE`]
|
||||
/// * [`_MM_FROUND_TO_NEAREST_INT`] | [`_MM_FROUND_NO_EXC`] // round to nearest, and suppress exceptions
|
||||
/// * [`_MM_FROUND_TO_NEG_INF`] | [`_MM_FROUND_NO_EXC`] // round down, and suppress exceptions
|
||||
/// * [`_MM_FROUND_TO_POS_INF`] | [`_MM_FROUND_NO_EXC`] // round up, and suppress exceptions
|
||||
/// * [`_MM_FROUND_TO_ZERO`] | [`_MM_FROUND_NO_EXC`] // truncate, and suppress exceptions
|
||||
/// * [`_MM_FROUND_CUR_DIRECTION`] | [`_MM_FROUND_NO_EXC`] // use MXCSR.RC and suppress exceptions; see [`_MM_SET_ROUNDING_MODE`]
|
||||
///
|
||||
/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_cvt_roundps_ph&expand=1354)
|
||||
#[inline]
|
||||
#[target_feature(enable = "avx512f")]
|
||||
#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
|
||||
#[cfg_attr(test, assert_instr(vcvtps2ph, SAE = 8))]
|
||||
#[cfg_attr(test, assert_instr(vcvtps2ph, ROUNDING = 8))]
|
||||
#[rustc_legacy_const_generics(1)]
|
||||
pub fn _mm512_cvt_roundps_ph<const SAE: i32>(a: __m512) -> __m256i {
|
||||
pub fn _mm512_cvt_roundps_ph<const ROUNDING: i32>(a: __m512) -> __m256i {
|
||||
unsafe {
|
||||
static_assert_sae!(SAE);
|
||||
static_assert_extended_rounding!(ROUNDING);
|
||||
let a = a.as_f32x16();
|
||||
let r = vcvtps2ph(a, SAE, i16x16::ZERO, 0b11111111_11111111);
|
||||
let r = vcvtps2ph(a, ROUNDING, i16x16::ZERO, 0b11111111_11111111);
|
||||
transmute(r)
|
||||
}
|
||||
}
|
||||
|
||||
/// Convert packed single-precision (32-bit) floating-point elements in a to packed half-precision (16-bit) floating-point elements, and store the results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set).\
|
||||
/// Exceptions can be suppressed by passing _MM_FROUND_NO_EXC in the sae parameter.
|
||||
/// Rounding is done according to the rounding\[3:0\] parameter, which can be one of:
|
||||
/// * [`_MM_FROUND_TO_NEAREST_INT`] // round to nearest
|
||||
/// * [`_MM_FROUND_TO_NEG_INF`] // round down
|
||||
/// * [`_MM_FROUND_TO_POS_INF`] // round up
|
||||
/// * [`_MM_FROUND_TO_ZERO`] // truncate
|
||||
/// * [`_MM_FROUND_CUR_DIRECTION`] // use MXCSR.RC; see [`_MM_SET_ROUNDING_MODE`]
|
||||
/// * [`_MM_FROUND_TO_NEAREST_INT`] | [`_MM_FROUND_NO_EXC`] // round to nearest, and suppress exceptions
|
||||
/// * [`_MM_FROUND_TO_NEG_INF`] | [`_MM_FROUND_NO_EXC`] // round down, and suppress exceptions
|
||||
/// * [`_MM_FROUND_TO_POS_INF`] | [`_MM_FROUND_NO_EXC`] // round up, and suppress exceptions
|
||||
/// * [`_MM_FROUND_TO_ZERO`] | [`_MM_FROUND_NO_EXC`] // truncate, and suppress exceptions
|
||||
/// * [`_MM_FROUND_CUR_DIRECTION`] | [`_MM_FROUND_NO_EXC`] // use MXCSR.RC and suppress exceptions; see [`_MM_SET_ROUNDING_MODE`]
|
||||
///
|
||||
/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_mask_cvt_roundps_ph&expand=1355)
|
||||
#[inline]
|
||||
#[target_feature(enable = "avx512f")]
|
||||
#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
|
||||
#[cfg_attr(test, assert_instr(vcvtps2ph, SAE = 8))]
|
||||
#[cfg_attr(test, assert_instr(vcvtps2ph, ROUNDING = 8))]
|
||||
#[rustc_legacy_const_generics(3)]
|
||||
pub fn _mm512_mask_cvt_roundps_ph<const SAE: i32>(
|
||||
pub fn _mm512_mask_cvt_roundps_ph<const ROUNDING: i32>(
|
||||
src: __m256i,
|
||||
k: __mmask16,
|
||||
a: __m512,
|
||||
) -> __m256i {
|
||||
unsafe {
|
||||
static_assert_sae!(SAE);
|
||||
static_assert_extended_rounding!(ROUNDING);
|
||||
let a = a.as_f32x16();
|
||||
let src = src.as_i16x16();
|
||||
let r = vcvtps2ph(a, SAE, src, k);
|
||||
let r = vcvtps2ph(a, ROUNDING, src, k);
|
||||
transmute(r)
|
||||
}
|
||||
}
|
||||
|
||||
/// Convert packed single-precision (32-bit) floating-point elements in a to packed half-precision (16-bit) floating-point elements, and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).\
|
||||
/// Exceptions can be suppressed by passing _MM_FROUND_NO_EXC in the sae parameter.
|
||||
/// Rounding is done according to the rounding\[3:0\] parameter, which can be one of:
|
||||
/// * [`_MM_FROUND_TO_NEAREST_INT`] // round to nearest
|
||||
/// * [`_MM_FROUND_TO_NEG_INF`] // round down
|
||||
/// * [`_MM_FROUND_TO_POS_INF`] // round up
|
||||
/// * [`_MM_FROUND_TO_ZERO`] // truncate
|
||||
/// * [`_MM_FROUND_CUR_DIRECTION`] // use MXCSR.RC; see [`_MM_SET_ROUNDING_MODE`]
|
||||
/// * [`_MM_FROUND_TO_NEAREST_INT`] | [`_MM_FROUND_NO_EXC`] // round to nearest, and suppress exceptions
|
||||
/// * [`_MM_FROUND_TO_NEG_INF`] | [`_MM_FROUND_NO_EXC`] // round down, and suppress exceptions
|
||||
/// * [`_MM_FROUND_TO_POS_INF`] | [`_MM_FROUND_NO_EXC`] // round up, and suppress exceptions
|
||||
/// * [`_MM_FROUND_TO_ZERO`] | [`_MM_FROUND_NO_EXC`] // truncate, and suppress exceptions
|
||||
/// * [`_MM_FROUND_CUR_DIRECTION`] | [`_MM_FROUND_NO_EXC`] // use MXCSR.RC and suppress exceptions; see [`_MM_SET_ROUNDING_MODE`]
|
||||
///
|
||||
/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_maskz_cvt_roundps_ph&expand=1356)
|
||||
#[inline]
|
||||
#[target_feature(enable = "avx512f")]
|
||||
#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
|
||||
#[cfg_attr(test, assert_instr(vcvtps2ph, SAE = 8))]
|
||||
#[cfg_attr(test, assert_instr(vcvtps2ph, ROUNDING = 8))]
|
||||
#[rustc_legacy_const_generics(2)]
|
||||
pub fn _mm512_maskz_cvt_roundps_ph<const SAE: i32>(k: __mmask16, a: __m512) -> __m256i {
|
||||
pub fn _mm512_maskz_cvt_roundps_ph<const ROUNDING: i32>(k: __mmask16, a: __m512) -> __m256i {
|
||||
unsafe {
|
||||
static_assert_sae!(SAE);
|
||||
static_assert_extended_rounding!(ROUNDING);
|
||||
let a = a.as_f32x16();
|
||||
let r = vcvtps2ph(a, SAE, i16x16::ZERO, k);
|
||||
let r = vcvtps2ph(a, ROUNDING, i16x16::ZERO, k);
|
||||
transmute(r)
|
||||
}
|
||||
}
|
||||
|
|
@ -15203,56 +15233,86 @@ pub fn _mm_maskz_cvt_roundps_ph<const IMM8: i32>(k: __mmask8, a: __m128) -> __m1
|
|||
}
|
||||
|
||||
/// Convert packed single-precision (32-bit) floating-point elements in a to packed half-precision (16-bit) floating-point elements, and store the results in dst.\
|
||||
/// Exceptions can be suppressed by passing _MM_FROUND_NO_EXC in the sae parameter.
|
||||
/// Rounding is done according to the rounding\[3:0\] parameter, which can be one of:
|
||||
/// * [`_MM_FROUND_TO_NEAREST_INT`] // round to nearest
|
||||
/// * [`_MM_FROUND_TO_NEG_INF`] // round down
|
||||
/// * [`_MM_FROUND_TO_POS_INF`] // round up
|
||||
/// * [`_MM_FROUND_TO_ZERO`] // truncate
|
||||
/// * [`_MM_FROUND_CUR_DIRECTION`] // use MXCSR.RC; see [`_MM_SET_ROUNDING_MODE`]
|
||||
/// * [`_MM_FROUND_TO_NEAREST_INT`] | [`_MM_FROUND_NO_EXC`] // round to nearest, and suppress exceptions
|
||||
/// * [`_MM_FROUND_TO_NEG_INF`] | [`_MM_FROUND_NO_EXC`] // round down, and suppress exceptions
|
||||
/// * [`_MM_FROUND_TO_POS_INF`] | [`_MM_FROUND_NO_EXC`] // round up, and suppress exceptions
|
||||
/// * [`_MM_FROUND_TO_ZERO`] | [`_MM_FROUND_NO_EXC`] // truncate, and suppress exceptions
|
||||
/// * [`_MM_FROUND_CUR_DIRECTION`] | [`_MM_FROUND_NO_EXC`] // use MXCSR.RC and suppress exceptions; see [`_MM_SET_ROUNDING_MODE`]
|
||||
///
|
||||
/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_cvtps_ph&expand=1778)
|
||||
#[inline]
|
||||
#[target_feature(enable = "avx512f")]
|
||||
#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
|
||||
#[cfg_attr(test, assert_instr(vcvtps2ph, SAE = 8))]
|
||||
#[cfg_attr(test, assert_instr(vcvtps2ph, ROUNDING = 8))]
|
||||
#[rustc_legacy_const_generics(1)]
|
||||
pub fn _mm512_cvtps_ph<const SAE: i32>(a: __m512) -> __m256i {
|
||||
pub fn _mm512_cvtps_ph<const ROUNDING: i32>(a: __m512) -> __m256i {
|
||||
unsafe {
|
||||
static_assert_sae!(SAE);
|
||||
static_assert_extended_rounding!(ROUNDING);
|
||||
let a = a.as_f32x16();
|
||||
let r = vcvtps2ph(a, SAE, i16x16::ZERO, 0b11111111_11111111);
|
||||
let r = vcvtps2ph(a, ROUNDING, i16x16::ZERO, 0b11111111_11111111);
|
||||
transmute(r)
|
||||
}
|
||||
}
|
||||
|
||||
/// Convert packed single-precision (32-bit) floating-point elements in a to packed half-precision (16-bit) floating-point elements, and store the results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set).\
|
||||
/// Exceptions can be suppressed by passing _MM_FROUND_NO_EXC in the sae parameter.
|
||||
/// Rounding is done according to the rounding\[3:0\] parameter, which can be one of:
|
||||
/// * [`_MM_FROUND_TO_NEAREST_INT`] // round to nearest
|
||||
/// * [`_MM_FROUND_TO_NEG_INF`] // round down
|
||||
/// * [`_MM_FROUND_TO_POS_INF`] // round up
|
||||
/// * [`_MM_FROUND_TO_ZERO`] // truncate
|
||||
/// * [`_MM_FROUND_CUR_DIRECTION`] // use MXCSR.RC; see [`_MM_SET_ROUNDING_MODE`]
|
||||
/// * [`_MM_FROUND_TO_NEAREST_INT`] | [`_MM_FROUND_NO_EXC`] // round to nearest, and suppress exceptions
|
||||
/// * [`_MM_FROUND_TO_NEG_INF`] | [`_MM_FROUND_NO_EXC`] // round down, and suppress exceptions
|
||||
/// * [`_MM_FROUND_TO_POS_INF`] | [`_MM_FROUND_NO_EXC`] // round up, and suppress exceptions
|
||||
/// * [`_MM_FROUND_TO_ZERO`] | [`_MM_FROUND_NO_EXC`] // truncate, and suppress exceptions
|
||||
/// * [`_MM_FROUND_CUR_DIRECTION`] | [`_MM_FROUND_NO_EXC`] // use MXCSR.RC and suppress exceptions; see [`_MM_SET_ROUNDING_MODE`]
|
||||
///
|
||||
/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_mask_cvtps_ph&expand=1779)
|
||||
#[inline]
|
||||
#[target_feature(enable = "avx512f")]
|
||||
#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
|
||||
#[cfg_attr(test, assert_instr(vcvtps2ph, SAE = 8))]
|
||||
#[cfg_attr(test, assert_instr(vcvtps2ph, ROUNDING = 8))]
|
||||
#[rustc_legacy_const_generics(3)]
|
||||
pub fn _mm512_mask_cvtps_ph<const SAE: i32>(src: __m256i, k: __mmask16, a: __m512) -> __m256i {
|
||||
pub fn _mm512_mask_cvtps_ph<const ROUNDING: i32>(src: __m256i, k: __mmask16, a: __m512) -> __m256i {
|
||||
unsafe {
|
||||
static_assert_sae!(SAE);
|
||||
static_assert_extended_rounding!(ROUNDING);
|
||||
let a = a.as_f32x16();
|
||||
let src = src.as_i16x16();
|
||||
let r = vcvtps2ph(a, SAE, src, k);
|
||||
let r = vcvtps2ph(a, ROUNDING, src, k);
|
||||
transmute(r)
|
||||
}
|
||||
}
|
||||
|
||||
/// Convert packed single-precision (32-bit) floating-point elements in a to packed half-precision (16-bit) floating-point elements, and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).\
|
||||
/// Exceptions can be suppressed by passing _MM_FROUND_NO_EXC in the sae parameter.
|
||||
/// Rounding is done according to the rounding\[3:0\] parameter, which can be one of:
|
||||
/// * [`_MM_FROUND_TO_NEAREST_INT`] // round to nearest
|
||||
/// * [`_MM_FROUND_TO_NEG_INF`] // round down
|
||||
/// * [`_MM_FROUND_TO_POS_INF`] // round up
|
||||
/// * [`_MM_FROUND_TO_ZERO`] // truncate
|
||||
/// * [`_MM_FROUND_CUR_DIRECTION`] // use MXCSR.RC; see [`_MM_SET_ROUNDING_MODE`]
|
||||
/// * [`_MM_FROUND_TO_NEAREST_INT`] | [`_MM_FROUND_NO_EXC`] // round to nearest, and suppress exceptions
|
||||
/// * [`_MM_FROUND_TO_NEG_INF`] | [`_MM_FROUND_NO_EXC`] // round down, and suppress exceptions
|
||||
/// * [`_MM_FROUND_TO_POS_INF`] | [`_MM_FROUND_NO_EXC`] // round up, and suppress exceptions
|
||||
/// * [`_MM_FROUND_TO_ZERO`] | [`_MM_FROUND_NO_EXC`] // truncate, and suppress exceptions
|
||||
/// * [`_MM_FROUND_CUR_DIRECTION`] | [`_MM_FROUND_NO_EXC`] // use MXCSR.RC and suppress exceptions; see [`_MM_SET_ROUNDING_MODE`]
|
||||
///
|
||||
/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_maskz_cvtps_ph&expand=1780)
|
||||
#[inline]
|
||||
#[target_feature(enable = "avx512f")]
|
||||
#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
|
||||
#[cfg_attr(test, assert_instr(vcvtps2ph, SAE = 8))]
|
||||
#[cfg_attr(test, assert_instr(vcvtps2ph, ROUNDING = 8))]
|
||||
#[rustc_legacy_const_generics(2)]
|
||||
pub fn _mm512_maskz_cvtps_ph<const SAE: i32>(k: __mmask16, a: __m512) -> __m256i {
|
||||
pub fn _mm512_maskz_cvtps_ph<const ROUNDING: i32>(k: __mmask16, a: __m512) -> __m256i {
|
||||
unsafe {
|
||||
static_assert_sae!(SAE);
|
||||
static_assert_extended_rounding!(ROUNDING);
|
||||
let a = a.as_f32x16();
|
||||
let r = vcvtps2ph(a, SAE, i16x16::ZERO, k);
|
||||
let r = vcvtps2ph(a, ROUNDING, i16x16::ZERO, k);
|
||||
transmute(r)
|
||||
}
|
||||
}
|
||||
|
|
@ -42487,11 +42547,11 @@ unsafe extern "C" {
|
|||
fn vcvtudq2ps(a: u32x16, rounding: i32) -> f32x16;
|
||||
|
||||
#[link_name = "llvm.x86.avx512.mask.vcvtps2ph.512"]
|
||||
fn vcvtps2ph(a: f32x16, sae: i32, src: i16x16, mask: u16) -> i16x16;
|
||||
fn vcvtps2ph(a: f32x16, rounding: i32, src: i16x16, mask: u16) -> i16x16;
|
||||
#[link_name = "llvm.x86.avx512.mask.vcvtps2ph.256"]
|
||||
fn vcvtps2ph256(a: f32x8, sae: i32, src: i16x8, mask: u8) -> i16x8;
|
||||
fn vcvtps2ph256(a: f32x8, imm8: i32, src: i16x8, mask: u8) -> i16x8;
|
||||
#[link_name = "llvm.x86.avx512.mask.vcvtps2ph.128"]
|
||||
fn vcvtps2ph128(a: f32x4, sae: i32, src: i16x8, mask: u8) -> i16x8;
|
||||
fn vcvtps2ph128(a: f32x4, imm8: i32, src: i16x8, mask: u8) -> i16x8;
|
||||
|
||||
#[link_name = "llvm.x86.avx512.mask.vcvtph2ps.512"]
|
||||
fn vcvtph2ps(a: i16x16, src: f32x16, mask: u16, sae: i32) -> f32x16;
|
||||
|
|
|
|||
|
|
@ -13711,74 +13711,56 @@ pub fn _mm512_maskz_cvtph_epu16(k: __mmask32, a: __m512h) -> __m512i {
|
|||
/// Convert packed half-precision (16-bit) floating-point elements in a to packed unsigned 16-bit integers,
|
||||
/// and store the results in dst.
|
||||
///
|
||||
/// Rounding is done according to the rounding parameter, which can be one of:
|
||||
///
|
||||
/// * [`_MM_FROUND_TO_NEAREST_INT`] | [`_MM_FROUND_NO_EXC`] : round to nearest and suppress exceptions
|
||||
/// * [`_MM_FROUND_TO_NEG_INF`] | [`_MM_FROUND_NO_EXC`] : round down and suppress exceptions
|
||||
/// * [`_MM_FROUND_TO_POS_INF`] | [`_MM_FROUND_NO_EXC`] : round up and suppress exceptions
|
||||
/// * [`_MM_FROUND_TO_ZERO`] | [`_MM_FROUND_NO_EXC`] : truncate and suppress exceptions
|
||||
/// * [`_MM_FROUND_CUR_DIRECTION`] : use `MXCSR.RC` - see [`_MM_SET_ROUNDING_MODE`]
|
||||
/// Exceptions can be suppressed by passing [`_MM_FROUND_NO_EXC`] in the sae parameter.
|
||||
///
|
||||
/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm512_cvt_roundph_epu16)
|
||||
#[inline]
|
||||
#[target_feature(enable = "avx512fp16")]
|
||||
#[cfg_attr(test, assert_instr(vcvtph2uw, ROUNDING = 8))]
|
||||
#[cfg_attr(test, assert_instr(vcvtph2uw, SAE = 8))]
|
||||
#[rustc_legacy_const_generics(1)]
|
||||
#[unstable(feature = "stdarch_x86_avx512_f16", issue = "127213")]
|
||||
pub fn _mm512_cvt_roundph_epu16<const ROUNDING: i32>(a: __m512h) -> __m512i {
|
||||
static_assert_rounding!(ROUNDING);
|
||||
_mm512_mask_cvt_roundph_epu16::<ROUNDING>(_mm512_undefined_epi32(), 0xffffffff, a)
|
||||
pub fn _mm512_cvt_roundph_epu16<const SAE: i32>(a: __m512h) -> __m512i {
|
||||
static_assert_sae!(SAE);
|
||||
_mm512_mask_cvt_roundph_epu16::<SAE>(_mm512_undefined_epi32(), 0xffffffff, a)
|
||||
}
|
||||
|
||||
/// Convert packed half-precision (16-bit) floating-point elements in a to packed unsigned 16-bit integers,
|
||||
/// and store the results in dst using writemask k (elements are copied from src when the corresponding
|
||||
/// mask bit is not set).
|
||||
///
|
||||
/// Rounding is done according to the rounding parameter, which can be one of:
|
||||
///
|
||||
/// * [`_MM_FROUND_TO_NEAREST_INT`] | [`_MM_FROUND_NO_EXC`] : round to nearest and suppress exceptions
|
||||
/// * [`_MM_FROUND_TO_NEG_INF`] | [`_MM_FROUND_NO_EXC`] : round down and suppress exceptions
|
||||
/// * [`_MM_FROUND_TO_POS_INF`] | [`_MM_FROUND_NO_EXC`] : round up and suppress exceptions
|
||||
/// * [`_MM_FROUND_TO_ZERO`] | [`_MM_FROUND_NO_EXC`] : truncate and suppress exceptions
|
||||
/// * [`_MM_FROUND_CUR_DIRECTION`] : use `MXCSR.RC` - see [`_MM_SET_ROUNDING_MODE`]
|
||||
/// Exceptions can be suppressed by passing [`_MM_FROUND_NO_EXC`] in the sae parameter.
|
||||
///
|
||||
/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm512_mask_cvt_roundph_epu16)
|
||||
#[inline]
|
||||
#[target_feature(enable = "avx512fp16")]
|
||||
#[cfg_attr(test, assert_instr(vcvtph2uw, ROUNDING = 8))]
|
||||
#[cfg_attr(test, assert_instr(vcvtph2uw, SAE = 8))]
|
||||
#[rustc_legacy_const_generics(3)]
|
||||
#[unstable(feature = "stdarch_x86_avx512_f16", issue = "127213")]
|
||||
pub fn _mm512_mask_cvt_roundph_epu16<const ROUNDING: i32>(
|
||||
pub fn _mm512_mask_cvt_roundph_epu16<const SAE: i32>(
|
||||
src: __m512i,
|
||||
k: __mmask32,
|
||||
a: __m512h,
|
||||
) -> __m512i {
|
||||
unsafe {
|
||||
static_assert_rounding!(ROUNDING);
|
||||
transmute(vcvtph2uw_512(a, src.as_u16x32(), k, ROUNDING))
|
||||
static_assert_sae!(SAE);
|
||||
transmute(vcvtph2uw_512(a, src.as_u16x32(), k, SAE))
|
||||
}
|
||||
}
|
||||
|
||||
/// Convert packed half-precision (16-bit) floating-point elements in a to packed unsigned 16-bit integers,
|
||||
/// and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
|
||||
///
|
||||
/// Rounding is done according to the rounding parameter, which can be one of:
|
||||
///
|
||||
/// * [`_MM_FROUND_TO_NEAREST_INT`] | [`_MM_FROUND_NO_EXC`] : round to nearest and suppress exceptions
|
||||
/// * [`_MM_FROUND_TO_NEG_INF`] | [`_MM_FROUND_NO_EXC`] : round down and suppress exceptions
|
||||
/// * [`_MM_FROUND_TO_POS_INF`] | [`_MM_FROUND_NO_EXC`] : round up and suppress exceptions
|
||||
/// * [`_MM_FROUND_TO_ZERO`] | [`_MM_FROUND_NO_EXC`] : truncate and suppress exceptions
|
||||
/// * [`_MM_FROUND_CUR_DIRECTION`] : use `MXCSR.RC` - see [`_MM_SET_ROUNDING_MODE`]
|
||||
/// Exceptions can be suppressed by passing [`_MM_FROUND_NO_EXC`] in the sae parameter.
|
||||
///
|
||||
/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm512_maskz_cvt_roundph_epu16)
|
||||
#[inline]
|
||||
#[target_feature(enable = "avx512fp16")]
|
||||
#[cfg_attr(test, assert_instr(vcvtph2uw, ROUNDING = 8))]
|
||||
#[cfg_attr(test, assert_instr(vcvtph2uw, SAE = 8))]
|
||||
#[rustc_legacy_const_generics(2)]
|
||||
#[unstable(feature = "stdarch_x86_avx512_f16", issue = "127213")]
|
||||
pub fn _mm512_maskz_cvt_roundph_epu16<const ROUNDING: i32>(k: __mmask32, a: __m512h) -> __m512i {
|
||||
static_assert_rounding!(ROUNDING);
|
||||
_mm512_mask_cvt_roundph_epu16::<ROUNDING>(_mm512_setzero_si512(), k, a)
|
||||
pub fn _mm512_maskz_cvt_roundph_epu16<const SAE: i32>(k: __mmask32, a: __m512h) -> __m512i {
|
||||
static_assert_sae!(SAE);
|
||||
_mm512_mask_cvt_roundph_epu16::<SAE>(_mm512_setzero_si512(), k, a)
|
||||
}
|
||||
|
||||
/// Convert packed half-precision (16-bit) floating-point elements in a to packed 16-bit integers with
|
||||
|
|
@ -14560,24 +14542,18 @@ pub fn _mm_cvtsh_u32(a: __m128h) -> u32 {
|
|||
/// Convert the lower half-precision (16-bit) floating-point element in a to a 32-bit unsigned integer, and store
|
||||
/// the result in dst.
|
||||
///
|
||||
/// Rounding is done according to the rounding parameter, which can be one of:
|
||||
///
|
||||
/// * [`_MM_FROUND_TO_NEAREST_INT`] | [`_MM_FROUND_NO_EXC`] : round to nearest and suppress exceptions
|
||||
/// * [`_MM_FROUND_TO_NEG_INF`] | [`_MM_FROUND_NO_EXC`] : round down and suppress exceptions
|
||||
/// * [`_MM_FROUND_TO_POS_INF`] | [`_MM_FROUND_NO_EXC`] : round up and suppress exceptions
|
||||
/// * [`_MM_FROUND_TO_ZERO`] | [`_MM_FROUND_NO_EXC`] : truncate and suppress exceptions
|
||||
/// * [`_MM_FROUND_CUR_DIRECTION`] : use `MXCSR.RC` - see [`_MM_SET_ROUNDING_MODE`]
|
||||
/// Exceptions can be suppressed by passing [`_MM_FROUND_NO_EXC`] in the sae parameter.
|
||||
///
|
||||
/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_cvt_roundsh_u32)
|
||||
#[inline]
|
||||
#[target_feature(enable = "avx512fp16")]
|
||||
#[cfg_attr(test, assert_instr(vcvtsh2usi, ROUNDING = 8))]
|
||||
#[cfg_attr(test, assert_instr(vcvtsh2usi, SAE = 8))]
|
||||
#[rustc_legacy_const_generics(1)]
|
||||
#[unstable(feature = "stdarch_x86_avx512_f16", issue = "127213")]
|
||||
pub fn _mm_cvt_roundsh_u32<const ROUNDING: i32>(a: __m128h) -> u32 {
|
||||
pub fn _mm_cvt_roundsh_u32<const SAE: i32>(a: __m128h) -> u32 {
|
||||
unsafe {
|
||||
static_assert_rounding!(ROUNDING);
|
||||
vcvtsh2usi32(a, ROUNDING)
|
||||
static_assert_rounding!(SAE);
|
||||
vcvtsh2usi32(a, SAE)
|
||||
}
|
||||
}
|
||||
|
||||
|
|
@ -16548,7 +16524,7 @@ unsafe extern "C" {
|
|||
#[link_name = "llvm.x86.avx512fp16.mask.vcvtph2uw.256"]
|
||||
fn vcvtph2uw_256(a: __m256h, src: u16x16, k: __mmask16) -> u16x16;
|
||||
#[link_name = "llvm.x86.avx512fp16.mask.vcvtph2uw.512"]
|
||||
fn vcvtph2uw_512(a: __m512h, src: u16x32, k: __mmask32, rounding: i32) -> u16x32;
|
||||
fn vcvtph2uw_512(a: __m512h, src: u16x32, k: __mmask32, sae: i32) -> u16x32;
|
||||
|
||||
#[link_name = "llvm.x86.avx512fp16.mask.vcvttph2w.128"]
|
||||
fn vcvttph2w_128(a: __m128h, src: i16x8, k: __mmask8) -> i16x8;
|
||||
|
|
|
|||
|
|
@ -21,6 +21,15 @@ macro_rules! static_assert_sae {
|
|||
};
|
||||
}
|
||||
|
||||
// Helper macro used to trigger const eval errors when the const generic immediate value `imm` is
|
||||
// not an extended rounding number
|
||||
#[allow(unused)]
|
||||
macro_rules! static_assert_extended_rounding {
|
||||
($imm: ident) => {
|
||||
static_assert!(($imm & 7) < 5 && ($imm & !15) == 0, "Invalid IMM value")
|
||||
};
|
||||
}
|
||||
|
||||
// Helper macro used to trigger const eval errors when the const generic immediate value `imm` is
|
||||
// not a mantissas sae number.
|
||||
#[allow(unused)]
|
||||
|
|
|
|||
Loading…
Add table
Add a link
Reference in a new issue