Upgrade more intrinsics to the new version
This commit is contained in:
parent
3de76d47a2
commit
cd63f26d86
7 changed files with 54 additions and 24 deletions
|
|
@ -21420,7 +21420,7 @@ pub fn vrbit_s8(a: int8x8_t) -> int8x8_t {
|
|||
unsafe extern "unadjusted" {
|
||||
#[cfg_attr(
|
||||
any(target_arch = "aarch64", target_arch = "arm64ec"),
|
||||
link_name = "llvm.aarch64.neon.rbit.v8i8"
|
||||
link_name = "llvm.bitreverse.v8i8"
|
||||
)]
|
||||
fn _vrbit_s8(a: int8x8_t) -> int8x8_t;
|
||||
}
|
||||
|
|
@ -21436,7 +21436,7 @@ pub fn vrbitq_s8(a: int8x16_t) -> int8x16_t {
|
|||
unsafe extern "unadjusted" {
|
||||
#[cfg_attr(
|
||||
any(target_arch = "aarch64", target_arch = "arm64ec"),
|
||||
link_name = "llvm.aarch64.neon.rbit.v16i8"
|
||||
link_name = "llvm.bitreverse.v16i8"
|
||||
)]
|
||||
fn _vrbitq_s8(a: int8x16_t) -> int8x16_t;
|
||||
}
|
||||
|
|
@ -23871,7 +23871,7 @@ pub fn vrndn_f64(a: float64x1_t) -> float64x1_t {
|
|||
unsafe extern "unadjusted" {
|
||||
#[cfg_attr(
|
||||
any(target_arch = "aarch64", target_arch = "arm64ec"),
|
||||
link_name = "llvm.aarch64.neon.frintn.v1f64"
|
||||
link_name = "llvm.roundeven.v1f64"
|
||||
)]
|
||||
fn _vrndn_f64(a: float64x1_t) -> float64x1_t;
|
||||
}
|
||||
|
|
@ -23887,7 +23887,7 @@ pub fn vrndnq_f64(a: float64x2_t) -> float64x2_t {
|
|||
unsafe extern "unadjusted" {
|
||||
#[cfg_attr(
|
||||
any(target_arch = "aarch64", target_arch = "arm64ec"),
|
||||
link_name = "llvm.aarch64.neon.frintn.v2f64"
|
||||
link_name = "llvm.roundeven.v2f64"
|
||||
)]
|
||||
fn _vrndnq_f64(a: float64x2_t) -> float64x2_t;
|
||||
}
|
||||
|
|
|
|||
|
|
@ -204,6 +204,7 @@ pub fn __crc32w(crc: u32, data: u32) -> u32 {
|
|||
#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/priv_vpadal_s8)"]
|
||||
#[inline]
|
||||
#[target_feature(enable = "neon")]
|
||||
#[cfg(target_arch = "arm")]
|
||||
#[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))]
|
||||
#[cfg_attr(all(test, target_arch = "arm"), assert_instr("vpadal.s8"))]
|
||||
#[cfg_attr(
|
||||
|
|
@ -221,6 +222,7 @@ fn priv_vpadal_s8(a: int16x4_t, b: int8x8_t) -> int16x4_t {
|
|||
#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/priv_vpadalq_s8)"]
|
||||
#[inline]
|
||||
#[target_feature(enable = "neon")]
|
||||
#[cfg(target_arch = "arm")]
|
||||
#[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))]
|
||||
#[cfg_attr(all(test, target_arch = "arm"), assert_instr("vpadal.s8"))]
|
||||
#[cfg_attr(
|
||||
|
|
@ -238,6 +240,7 @@ fn priv_vpadalq_s8(a: int16x8_t, b: int8x16_t) -> int16x8_t {
|
|||
#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/priv_vpadal_s16)"]
|
||||
#[inline]
|
||||
#[target_feature(enable = "neon")]
|
||||
#[cfg(target_arch = "arm")]
|
||||
#[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))]
|
||||
#[cfg_attr(all(test, target_arch = "arm"), assert_instr("vpadal.s16"))]
|
||||
#[cfg_attr(
|
||||
|
|
@ -255,6 +258,7 @@ fn priv_vpadal_s16(a: int32x2_t, b: int16x4_t) -> int32x2_t {
|
|||
#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/priv_vpadalq_s16)"]
|
||||
#[inline]
|
||||
#[target_feature(enable = "neon")]
|
||||
#[cfg(target_arch = "arm")]
|
||||
#[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))]
|
||||
#[cfg_attr(all(test, target_arch = "arm"), assert_instr("vpadal.s16"))]
|
||||
#[cfg_attr(
|
||||
|
|
@ -272,6 +276,7 @@ fn priv_vpadalq_s16(a: int32x4_t, b: int16x8_t) -> int32x4_t {
|
|||
#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/priv_vpadal_s32)"]
|
||||
#[inline]
|
||||
#[target_feature(enable = "neon")]
|
||||
#[cfg(target_arch = "arm")]
|
||||
#[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))]
|
||||
#[cfg_attr(all(test, target_arch = "arm"), assert_instr("vpadal.s32"))]
|
||||
#[cfg_attr(
|
||||
|
|
@ -289,6 +294,7 @@ fn priv_vpadal_s32(a: int64x1_t, b: int32x2_t) -> int64x1_t {
|
|||
#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/priv_vpadalq_s32)"]
|
||||
#[inline]
|
||||
#[target_feature(enable = "neon")]
|
||||
#[cfg(target_arch = "arm")]
|
||||
#[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))]
|
||||
#[cfg_attr(all(test, target_arch = "arm"), assert_instr("vpadal.s32"))]
|
||||
#[cfg_attr(
|
||||
|
|
@ -306,6 +312,7 @@ fn priv_vpadalq_s32(a: int64x2_t, b: int32x4_t) -> int64x2_t {
|
|||
#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/priv_vpadal_u8)"]
|
||||
#[inline]
|
||||
#[target_feature(enable = "neon")]
|
||||
#[cfg(target_arch = "arm")]
|
||||
#[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))]
|
||||
#[cfg_attr(all(test, target_arch = "arm"), assert_instr("vpadal.u8"))]
|
||||
#[cfg_attr(
|
||||
|
|
@ -323,6 +330,7 @@ fn priv_vpadal_u8(a: uint16x4_t, b: uint8x8_t) -> uint16x4_t {
|
|||
#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/priv_vpadalq_u8)"]
|
||||
#[inline]
|
||||
#[target_feature(enable = "neon")]
|
||||
#[cfg(target_arch = "arm")]
|
||||
#[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))]
|
||||
#[cfg_attr(all(test, target_arch = "arm"), assert_instr("vpadal.u8"))]
|
||||
#[cfg_attr(
|
||||
|
|
@ -340,6 +348,7 @@ fn priv_vpadalq_u8(a: uint16x8_t, b: uint8x16_t) -> uint16x8_t {
|
|||
#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/priv_vpadal_u16)"]
|
||||
#[inline]
|
||||
#[target_feature(enable = "neon")]
|
||||
#[cfg(target_arch = "arm")]
|
||||
#[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))]
|
||||
#[cfg_attr(all(test, target_arch = "arm"), assert_instr("vpadal.u16"))]
|
||||
#[cfg_attr(
|
||||
|
|
@ -357,6 +366,7 @@ fn priv_vpadal_u16(a: uint32x2_t, b: uint16x4_t) -> uint32x2_t {
|
|||
#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/priv_vpadalq_u16)"]
|
||||
#[inline]
|
||||
#[target_feature(enable = "neon")]
|
||||
#[cfg(target_arch = "arm")]
|
||||
#[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))]
|
||||
#[cfg_attr(all(test, target_arch = "arm"), assert_instr("vpadal.u16"))]
|
||||
#[cfg_attr(
|
||||
|
|
@ -374,6 +384,7 @@ fn priv_vpadalq_u16(a: uint32x4_t, b: uint16x8_t) -> uint32x4_t {
|
|||
#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/priv_vpadal_u32)"]
|
||||
#[inline]
|
||||
#[target_feature(enable = "neon")]
|
||||
#[cfg(target_arch = "arm")]
|
||||
#[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))]
|
||||
#[cfg_attr(all(test, target_arch = "arm"), assert_instr("vpadal.u32"))]
|
||||
#[cfg_attr(
|
||||
|
|
@ -391,6 +402,7 @@ fn priv_vpadal_u32(a: uint64x1_t, b: uint32x2_t) -> uint64x1_t {
|
|||
#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/priv_vpadalq_u32)"]
|
||||
#[inline]
|
||||
#[target_feature(enable = "neon")]
|
||||
#[cfg(target_arch = "arm")]
|
||||
#[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))]
|
||||
#[cfg_attr(all(test, target_arch = "arm"), assert_instr("vpadal.u32"))]
|
||||
#[cfg_attr(
|
||||
|
|
@ -58712,7 +58724,7 @@ pub fn vrndn_f16(a: float16x4_t) -> float16x4_t {
|
|||
unsafe extern "unadjusted" {
|
||||
#[cfg_attr(
|
||||
any(target_arch = "aarch64", target_arch = "arm64ec"),
|
||||
link_name = "llvm.aarch64.neon.frintn.v4f16"
|
||||
link_name = "llvm.roundeven.v4f16"
|
||||
)]
|
||||
#[cfg_attr(target_arch = "arm", link_name = "llvm.arm.neon.vrintn.v4f16")]
|
||||
fn _vrndn_f16(a: float16x4_t) -> float16x4_t;
|
||||
|
|
@ -58734,7 +58746,7 @@ pub fn vrndnq_f16(a: float16x8_t) -> float16x8_t {
|
|||
unsafe extern "unadjusted" {
|
||||
#[cfg_attr(
|
||||
any(target_arch = "aarch64", target_arch = "arm64ec"),
|
||||
link_name = "llvm.aarch64.neon.frintn.v8f16"
|
||||
link_name = "llvm.roundeven.v8f16"
|
||||
)]
|
||||
#[cfg_attr(target_arch = "arm", link_name = "llvm.arm.neon.vrintn.v8f16")]
|
||||
fn _vrndnq_f16(a: float16x8_t) -> float16x8_t;
|
||||
|
|
@ -58763,7 +58775,7 @@ pub fn vrndn_f32(a: float32x2_t) -> float32x2_t {
|
|||
unsafe extern "unadjusted" {
|
||||
#[cfg_attr(
|
||||
any(target_arch = "aarch64", target_arch = "arm64ec"),
|
||||
link_name = "llvm.aarch64.neon.frintn.v2f32"
|
||||
link_name = "llvm.roundeven.v2f32"
|
||||
)]
|
||||
#[cfg_attr(target_arch = "arm", link_name = "llvm.arm.neon.vrintn.v2f32")]
|
||||
fn _vrndn_f32(a: float32x2_t) -> float32x2_t;
|
||||
|
|
@ -58792,7 +58804,7 @@ pub fn vrndnq_f32(a: float32x4_t) -> float32x4_t {
|
|||
unsafe extern "unadjusted" {
|
||||
#[cfg_attr(
|
||||
any(target_arch = "aarch64", target_arch = "arm64ec"),
|
||||
link_name = "llvm.aarch64.neon.frintn.v4f32"
|
||||
link_name = "llvm.roundeven.v4f32"
|
||||
)]
|
||||
#[cfg_attr(target_arch = "arm", link_name = "llvm.arm.neon.vrintn.v4f32")]
|
||||
fn _vrndnq_f32(a: float32x4_t) -> float32x4_t;
|
||||
|
|
@ -61531,6 +61543,8 @@ pub fn vsha256su1q_u32(tw0_3: uint32x4_t, w8_11: uint32x4_t, w12_15: uint32x4_t)
|
|||
#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vshiftins_v16i8)"]
|
||||
#[inline]
|
||||
#[target_feature(enable = "neon")]
|
||||
#[cfg(target_arch = "arm")]
|
||||
#[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))]
|
||||
#[unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800")]
|
||||
fn vshiftins_v16i8(a: int8x16_t, b: int8x16_t, c: int8x16_t) -> int8x16_t {
|
||||
unsafe extern "unadjusted" {
|
||||
|
|
@ -61543,6 +61557,8 @@ fn vshiftins_v16i8(a: int8x16_t, b: int8x16_t, c: int8x16_t) -> int8x16_t {
|
|||
#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vshiftins_v1i64)"]
|
||||
#[inline]
|
||||
#[target_feature(enable = "neon")]
|
||||
#[cfg(target_arch = "arm")]
|
||||
#[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))]
|
||||
#[unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800")]
|
||||
fn vshiftins_v1i64(a: int64x1_t, b: int64x1_t, c: int64x1_t) -> int64x1_t {
|
||||
unsafe extern "unadjusted" {
|
||||
|
|
@ -61555,6 +61571,8 @@ fn vshiftins_v1i64(a: int64x1_t, b: int64x1_t, c: int64x1_t) -> int64x1_t {
|
|||
#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vshiftins_v2i32)"]
|
||||
#[inline]
|
||||
#[target_feature(enable = "neon")]
|
||||
#[cfg(target_arch = "arm")]
|
||||
#[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))]
|
||||
#[unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800")]
|
||||
fn vshiftins_v2i32(a: int32x2_t, b: int32x2_t, c: int32x2_t) -> int32x2_t {
|
||||
unsafe extern "unadjusted" {
|
||||
|
|
@ -61567,6 +61585,8 @@ fn vshiftins_v2i32(a: int32x2_t, b: int32x2_t, c: int32x2_t) -> int32x2_t {
|
|||
#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vshiftins_v2i64)"]
|
||||
#[inline]
|
||||
#[target_feature(enable = "neon")]
|
||||
#[cfg(target_arch = "arm")]
|
||||
#[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))]
|
||||
#[unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800")]
|
||||
fn vshiftins_v2i64(a: int64x2_t, b: int64x2_t, c: int64x2_t) -> int64x2_t {
|
||||
unsafe extern "unadjusted" {
|
||||
|
|
@ -61579,6 +61599,8 @@ fn vshiftins_v2i64(a: int64x2_t, b: int64x2_t, c: int64x2_t) -> int64x2_t {
|
|||
#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vshiftins_v4i16)"]
|
||||
#[inline]
|
||||
#[target_feature(enable = "neon")]
|
||||
#[cfg(target_arch = "arm")]
|
||||
#[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))]
|
||||
#[unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800")]
|
||||
fn vshiftins_v4i16(a: int16x4_t, b: int16x4_t, c: int16x4_t) -> int16x4_t {
|
||||
unsafe extern "unadjusted" {
|
||||
|
|
@ -61591,6 +61613,8 @@ fn vshiftins_v4i16(a: int16x4_t, b: int16x4_t, c: int16x4_t) -> int16x4_t {
|
|||
#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vshiftins_v4i32)"]
|
||||
#[inline]
|
||||
#[target_feature(enable = "neon")]
|
||||
#[cfg(target_arch = "arm")]
|
||||
#[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))]
|
||||
#[unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800")]
|
||||
fn vshiftins_v4i32(a: int32x4_t, b: int32x4_t, c: int32x4_t) -> int32x4_t {
|
||||
unsafe extern "unadjusted" {
|
||||
|
|
@ -61603,6 +61627,8 @@ fn vshiftins_v4i32(a: int32x4_t, b: int32x4_t, c: int32x4_t) -> int32x4_t {
|
|||
#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vshiftins_v8i16)"]
|
||||
#[inline]
|
||||
#[target_feature(enable = "neon")]
|
||||
#[cfg(target_arch = "arm")]
|
||||
#[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))]
|
||||
#[unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800")]
|
||||
fn vshiftins_v8i16(a: int16x8_t, b: int16x8_t, c: int16x8_t) -> int16x8_t {
|
||||
unsafe extern "unadjusted" {
|
||||
|
|
@ -61615,6 +61641,8 @@ fn vshiftins_v8i16(a: int16x8_t, b: int16x8_t, c: int16x8_t) -> int16x8_t {
|
|||
#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vshiftins_v8i8)"]
|
||||
#[inline]
|
||||
#[target_feature(enable = "neon")]
|
||||
#[cfg(target_arch = "arm")]
|
||||
#[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))]
|
||||
#[unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800")]
|
||||
fn vshiftins_v8i8(a: int8x8_t, b: int8x8_t, c: int8x8_t) -> int8x8_t {
|
||||
unsafe extern "unadjusted" {
|
||||
|
|
|
|||
|
|
@ -573,7 +573,7 @@ pub fn _mm256_blendv_ps(a: __m256, b: __m256, c: __m256) -> __m256 {
|
|||
#[stable(feature = "simd_x86", since = "1.27.0")]
|
||||
pub fn _mm256_dp_ps<const IMM8: i32>(a: __m256, b: __m256) -> __m256 {
|
||||
static_assert_uimm_bits!(IMM8, 8);
|
||||
unsafe { vdpps(a, b, IMM8) }
|
||||
unsafe { vdpps(a, b, IMM8 as i8) }
|
||||
}
|
||||
|
||||
/// Horizontal addition of adjacent pairs in the two packed vectors
|
||||
|
|
@ -3043,7 +3043,7 @@ unsafe extern "C" {
|
|||
#[link_name = "llvm.x86.avx.round.ps.256"]
|
||||
fn roundps256(a: __m256, b: i32) -> __m256;
|
||||
#[link_name = "llvm.x86.avx.dp.ps.256"]
|
||||
fn vdpps(a: __m256, b: __m256, imm8: i32) -> __m256;
|
||||
fn vdpps(a: __m256, b: __m256, imm8: i8) -> __m256;
|
||||
#[link_name = "llvm.x86.avx.hadd.pd.256"]
|
||||
fn vhaddpd(a: __m256d, b: __m256d) -> __m256d;
|
||||
#[link_name = "llvm.x86.avx.hadd.ps.256"]
|
||||
|
|
|
|||
|
|
@ -2146,7 +2146,7 @@ pub fn _mm256_movemask_epi8(a: __m256i) -> i32 {
|
|||
#[stable(feature = "simd_x86", since = "1.27.0")]
|
||||
pub fn _mm256_mpsadbw_epu8<const IMM8: i32>(a: __m256i, b: __m256i) -> __m256i {
|
||||
static_assert_uimm_bits!(IMM8, 8);
|
||||
unsafe { transmute(mpsadbw(a.as_u8x32(), b.as_u8x32(), IMM8)) }
|
||||
unsafe { transmute(mpsadbw(a.as_u8x32(), b.as_u8x32(), IMM8 as i8)) }
|
||||
}
|
||||
|
||||
/// Multiplies the low 32-bit integers from each packed 64-bit element in
|
||||
|
|
@ -3800,7 +3800,7 @@ unsafe extern "C" {
|
|||
#[link_name = "llvm.x86.avx2.maskstore.q.256"]
|
||||
fn maskstoreq256(mem_addr: *mut i8, mask: i64x4, a: i64x4);
|
||||
#[link_name = "llvm.x86.avx2.mpsadbw"]
|
||||
fn mpsadbw(a: u8x32, b: u8x32, imm8: i32) -> u16x16;
|
||||
fn mpsadbw(a: u8x32, b: u8x32, imm8: i8) -> u16x16;
|
||||
#[link_name = "llvm.x86.avx2.pmul.hr.sw"]
|
||||
fn pmulhrsw(a: i16x16, b: i16x16) -> i16x16;
|
||||
#[link_name = "llvm.x86.avx2.packsswb"]
|
||||
|
|
|
|||
|
|
@ -22,11 +22,11 @@ unsafe extern "C" {
|
|||
#[link_name = "llvm.x86.avx512bf16.cvtneps2bf16.512"]
|
||||
fn cvtneps2bf16_512(a: f32x16) -> i16x16;
|
||||
#[link_name = "llvm.x86.avx512bf16.dpbf16ps.128"]
|
||||
fn dpbf16ps(a: f32x4, b: i32x4, c: i32x4) -> f32x4;
|
||||
fn dpbf16ps(a: f32x4, b: i16x8, c: i16x8) -> f32x4;
|
||||
#[link_name = "llvm.x86.avx512bf16.dpbf16ps.256"]
|
||||
fn dpbf16ps_256(a: f32x8, b: i32x8, c: i32x8) -> f32x8;
|
||||
fn dpbf16ps_256(a: f32x8, b: i16x16, c: i16x16) -> f32x8;
|
||||
#[link_name = "llvm.x86.avx512bf16.dpbf16ps.512"]
|
||||
fn dpbf16ps_512(a: f32x16, b: i32x16, c: i32x16) -> f32x16;
|
||||
fn dpbf16ps_512(a: f32x16, b: i16x32, c: i16x32) -> f32x16;
|
||||
}
|
||||
|
||||
/// Convert packed single-precision (32-bit) floating-point elements in two 128-bit vectors
|
||||
|
|
@ -250,7 +250,7 @@ pub fn _mm512_maskz_cvtneps_pbh(k: __mmask16, a: __m512) -> __m256bh {
|
|||
#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
|
||||
#[cfg_attr(test, assert_instr("vdpbf16ps"))]
|
||||
pub fn _mm_dpbf16_ps(src: __m128, a: __m128bh, b: __m128bh) -> __m128 {
|
||||
unsafe { transmute(dpbf16ps(src.as_f32x4(), a.as_i32x4(), b.as_i32x4())) }
|
||||
unsafe { transmute(dpbf16ps(src.as_f32x4(), a.as_i16x8(), b.as_i16x8())) }
|
||||
}
|
||||
|
||||
/// Compute dot-product of BF16 (16-bit) floating-point pairs in a and b,
|
||||
|
|
@ -295,7 +295,7 @@ pub fn _mm_maskz_dpbf16_ps(k: __mmask8, src: __m128, a: __m128bh, b: __m128bh) -
|
|||
#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
|
||||
#[cfg_attr(test, assert_instr("vdpbf16ps"))]
|
||||
pub fn _mm256_dpbf16_ps(src: __m256, a: __m256bh, b: __m256bh) -> __m256 {
|
||||
unsafe { transmute(dpbf16ps_256(src.as_f32x8(), a.as_i32x8(), b.as_i32x8())) }
|
||||
unsafe { transmute(dpbf16ps_256(src.as_f32x8(), a.as_i16x16(), b.as_i16x16())) }
|
||||
}
|
||||
|
||||
/// Compute dot-product of BF16 (16-bit) floating-point pairs in a and b,
|
||||
|
|
@ -341,7 +341,7 @@ pub fn _mm256_maskz_dpbf16_ps(k: __mmask8, src: __m256, a: __m256bh, b: __m256bh
|
|||
#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
|
||||
#[cfg_attr(test, assert_instr("vdpbf16ps"))]
|
||||
pub fn _mm512_dpbf16_ps(src: __m512, a: __m512bh, b: __m512bh) -> __m512 {
|
||||
unsafe { transmute(dpbf16ps_512(src.as_f32x16(), a.as_i32x16(), b.as_i32x16())) }
|
||||
unsafe { transmute(dpbf16ps_512(src.as_f32x16(), a.as_i16x32(), b.as_i16x32())) }
|
||||
}
|
||||
|
||||
/// Compute dot-product of BF16 (16-bit) floating-point pairs in a and b,
|
||||
|
|
|
|||
|
|
@ -2943,7 +2943,7 @@ intrinsics:
|
|||
- LLVMLink:
|
||||
name: "rbit.{neon_type}"
|
||||
links:
|
||||
- link: "llvm.aarch64.neon.rbit.{neon_type}"
|
||||
- link: "llvm.bitreverse.{neon_type}"
|
||||
arch: aarch64,arm64ec
|
||||
|
||||
- name: "vrbit{neon_type[0].no}"
|
||||
|
|
@ -3096,7 +3096,7 @@ intrinsics:
|
|||
- LLVMLink:
|
||||
name: "frintn.{neon_type}"
|
||||
links:
|
||||
- link: "llvm.aarch64.neon.frintn.{neon_type}"
|
||||
- link: "llvm.roundeven.{neon_type}"
|
||||
arch: aarch64,arm64ec
|
||||
|
||||
- name: "vrndns_{type}"
|
||||
|
|
|
|||
|
|
@ -2469,7 +2469,7 @@ intrinsics:
|
|||
- LLVMLink:
|
||||
name: "llvm.frinn.{neon_type}"
|
||||
links:
|
||||
- link: "llvm.aarch64.neon.frintn.{neon_type}"
|
||||
- link: "llvm.roundeven.{neon_type}"
|
||||
arch: aarch64,arm64ec
|
||||
- link: "llvm.arm.neon.vrintn.{neon_type}"
|
||||
arch: arm
|
||||
|
|
@ -2492,7 +2492,7 @@ intrinsics:
|
|||
- LLVMLink:
|
||||
name: "llvm.frinn.{neon_type}"
|
||||
links:
|
||||
- link: "llvm.aarch64.neon.frintn.{neon_type}"
|
||||
- link: "llvm.roundeven.{neon_type}"
|
||||
arch: aarch64,arm64ec
|
||||
- link: "llvm.arm.neon.vrintn.{neon_type}"
|
||||
arch: arm
|
||||
|
|
@ -13202,6 +13202,7 @@ intrinsics:
|
|||
return_type: "{neon_type[0]}"
|
||||
safety: safe
|
||||
attr:
|
||||
- *target-is-arm
|
||||
- *neon-v7
|
||||
- FnCall: [cfg_attr, [*test-is-arm, {FnCall: [assert_instr, ['{type[2]}']]}]]
|
||||
- *neon-cfg-arm-unstable
|
||||
|
|
@ -13227,6 +13228,7 @@ intrinsics:
|
|||
return_type: "{neon_type[0]}"
|
||||
safety: safe
|
||||
attr:
|
||||
- *target-is-arm
|
||||
- *neon-v7
|
||||
- FnCall: [cfg_attr, [*test-is-arm, {FnCall: [assert_instr, ['{type[2]}']]}]]
|
||||
- *neon-cfg-arm-unstable
|
||||
|
|
@ -13812,8 +13814,8 @@ intrinsics:
|
|||
return_type: "{neon_type[1]}"
|
||||
safety: safe
|
||||
attr:
|
||||
#- *target-is-arm
|
||||
#- *neon-v7
|
||||
- *target-is-arm
|
||||
- *neon-v7
|
||||
- *neon-arm-unstable
|
||||
types:
|
||||
- ['_v8i8', "int8x8_t", '8']
|
||||
|
|
|
|||
Loading…
Add table
Add a link
Reference in a new issue