core-arch: Add NEON fp16 intrinsics
This commit is contained in:
parent
7cdc9157e6
commit
f4a31fd609
7 changed files with 16001 additions and 89 deletions
File diff suppressed because it is too large
Load diff
File diff suppressed because it is too large
Load diff
|
|
@ -4783,6 +4783,24 @@ pub unsafe fn vbsl_u64(a: uint64x1_t, b: uint64x1_t, c: uint64x1_t) -> uint64x1_
|
|||
simd_or(simd_and(a, b), simd_and(simd_xor(a, transmute(not)), c))
|
||||
}
|
||||
|
||||
/// Bitwise Select.
|
||||
#[inline]
|
||||
#[target_feature(enable = "neon,fp16")]
|
||||
#[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))]
|
||||
#[cfg_attr(all(test, target_arch = "arm"), assert_instr(vbsl))]
|
||||
#[cfg_attr(
|
||||
all(test, any(target_arch = "aarch64", target_arch = "arm64ec")),
|
||||
assert_instr(bsl)
|
||||
)]
|
||||
#[unstable(feature = "stdarch_neon_f16", issue = "136306")]
|
||||
pub unsafe fn vbsl_f16(a: uint16x4_t, b: float16x4_t, c: float16x4_t) -> float16x4_t {
|
||||
let not = int16x4_t::splat(-1);
|
||||
transmute(simd_or(
|
||||
simd_and(a, transmute(b)),
|
||||
simd_and(simd_xor(a, transmute(not)), transmute(c)),
|
||||
))
|
||||
}
|
||||
|
||||
/// Bitwise Select.
|
||||
#[inline]
|
||||
#[target_feature(enable = "neon")]
|
||||
|
|
@ -5096,6 +5114,24 @@ pub unsafe fn vbslq_p16(a: uint16x8_t, b: poly16x8_t, c: poly16x8_t) -> poly16x8
|
|||
))
|
||||
}
|
||||
|
||||
/// Bitwise Select.
|
||||
#[inline]
|
||||
#[target_feature(enable = "neon,fp16")]
|
||||
#[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))]
|
||||
#[cfg_attr(all(test, target_arch = "arm"), assert_instr(vbsl))]
|
||||
#[cfg_attr(
|
||||
all(test, any(target_arch = "aarch64", target_arch = "arm64ec")),
|
||||
assert_instr(bsl)
|
||||
)]
|
||||
#[unstable(feature = "stdarch_neon_f16", issue = "136306")]
|
||||
pub unsafe fn vbslq_f16(a: uint16x8_t, b: float16x8_t, c: float16x8_t) -> float16x8_t {
|
||||
let not = int16x8_t::splat(-1);
|
||||
transmute(simd_or(
|
||||
simd_and(a, transmute(b)),
|
||||
simd_and(simd_xor(a, transmute(not)), transmute(c)),
|
||||
))
|
||||
}
|
||||
|
||||
/// Bitwise Select. (128-bit)
|
||||
#[inline]
|
||||
#[target_feature(enable = "neon")]
|
||||
|
|
|
|||
|
|
@ -108,6 +108,74 @@ vsri_n_p64
|
|||
vsriq_n_p64
|
||||
vtst_p64
|
||||
vtstq_p64
|
||||
vaddh_f16
|
||||
vsubh_f16
|
||||
vabsh_f16
|
||||
vdivh_f16
|
||||
vmulh_f16
|
||||
vfmsh_f16
|
||||
vfmah_f16
|
||||
vminnmh_f16
|
||||
vmaxnmh_f16
|
||||
vrndh_f16
|
||||
vrndnh_f16
|
||||
vrndih_f16
|
||||
vrndah_f16
|
||||
vrndph_f16
|
||||
vrndmh_f16
|
||||
vrndxh_f16
|
||||
vsqrth_f16
|
||||
vnegh_f16
|
||||
vcvth_f16_s32
|
||||
vcvth_s32_f16
|
||||
vcvth_n_f16_s32
|
||||
vcvth_n_s32_f16
|
||||
vcvth_f16_u32
|
||||
vcvth_u32_f16
|
||||
vcvth_n_f16_u32
|
||||
vcvth_n_u32_f16
|
||||
vcvtah_s32_f16
|
||||
vcvtah_u32_f16
|
||||
vcvtmh_s32_f16
|
||||
vcvtmh_u32_f16
|
||||
vcvtpq_s16_f16
|
||||
vcvtpq_u16_f16
|
||||
vcvtp_s16_f16
|
||||
vcvtp_u16_f16
|
||||
vcvtph_s32_f16
|
||||
vcvtph_u32_f16
|
||||
vcvtnh_u32_f16
|
||||
vcvtnh_s32_f16
|
||||
vfmlsl_low_f16
|
||||
vfmlslq_low_f16
|
||||
vfmlsl_high_f16
|
||||
vfmlslq_high_f16
|
||||
vfmlsl_lane_high_f16
|
||||
vfmlsl_laneq_high_f16
|
||||
vfmlslq_lane_high_f16
|
||||
vfmlslq_laneq_high_f16
|
||||
vfmlsl_lane_low_f16
|
||||
vfmlsl_laneq_low_f16
|
||||
vfmlslq_lane_low_f16
|
||||
vfmlslq_laneq_low_f16
|
||||
vfmlal_low_f16
|
||||
vfmlalq_low_f16
|
||||
vfmlal_high_f16
|
||||
vfmlalq_high_f16
|
||||
vfmlal_lane_low_f16
|
||||
vfmlal_laneq_low_f16
|
||||
vfmlalq_lane_low_f16
|
||||
vfmlalq_laneq_low_f16
|
||||
vfmlal_lane_high_f16
|
||||
vfmlal_laneq_high_f16
|
||||
vfmlalq_lane_high_f16
|
||||
vfmlalq_laneq_high_f16
|
||||
vreinterpret_f16_p64
|
||||
vreinterpretq_f16_p64
|
||||
vreinterpret_p64_f16
|
||||
vreinterpretq_p64_f16
|
||||
vreinterpret_p128_f16
|
||||
vreinterpretq_p128_f16
|
||||
|
||||
# Present in Clang header but triggers an ICE due to lack of backend support.
|
||||
vcmla_f32
|
||||
|
|
@ -134,6 +202,31 @@ vcmlaq_rot270_laneq_f32
|
|||
vcmlaq_rot90_f32
|
||||
vcmlaq_rot90_lane_f32
|
||||
vcmlaq_rot90_laneq_f32
|
||||
vcmla_f16
|
||||
vcmlaq_f16
|
||||
vcmla_laneq_f16
|
||||
vcmla_lane_f16
|
||||
vcmla_laneq_f16
|
||||
vcmlaq_lane_f16
|
||||
vcmlaq_laneq_f16
|
||||
vcmla_rot90_f16
|
||||
vcmlaq_rot90_f16
|
||||
vcmla_rot180_f16
|
||||
vcmlaq_rot180_f16
|
||||
vcmla_rot270_f16
|
||||
vcmlaq_rot270_f16
|
||||
vcmla_rot90_lane_f16
|
||||
vcmla_rot90_laneq_f16
|
||||
vcmlaq_rot90_lane_f16
|
||||
vcmlaq_rot90_laneq_f16
|
||||
vcmla_rot180_lane_f16
|
||||
vcmla_rot180_laneq_f16
|
||||
vcmlaq_rot180_lane_f16
|
||||
vcmlaq_rot180_laneq_f16
|
||||
vcmla_rot270_lane_f16
|
||||
vcmla_rot270_laneq_f16
|
||||
vcmlaq_rot270_lane_f16
|
||||
vcmlaq_rot270_laneq_f16
|
||||
|
||||
# Implemented in stdarch for A64 only, Clang support both A32/A64
|
||||
vadd_s64
|
||||
|
|
@ -182,4 +275,46 @@ vrndpq_f32
|
|||
vrndq_f32
|
||||
vrndq_f32
|
||||
vrndx_f32
|
||||
vrndxq_f32
|
||||
vrndxq_f32
|
||||
vrnda_f16
|
||||
vrnda_f16
|
||||
vrndaq_f16
|
||||
vrndaq_f16
|
||||
vrnd_f16
|
||||
vrnd_f16
|
||||
vrndi_f16
|
||||
vrndi_f16
|
||||
vrndiq_f16
|
||||
vrndiq_f16
|
||||
vrndm_f16
|
||||
vrndm_f16
|
||||
vrndmq_f16
|
||||
vrndmq_f16
|
||||
vrndns_f16
|
||||
vrndp_f16
|
||||
vrndpq_f16
|
||||
vrndq_f16
|
||||
vrndx_f16
|
||||
vrndxq_f16
|
||||
vpmin_f16
|
||||
vpmax_f16
|
||||
vcaddq_rot270_f16
|
||||
vcaddq_rot90_f16
|
||||
vcadd_rot270_f16
|
||||
vcadd_rot90_f16
|
||||
vcvtm_s16_f16
|
||||
vcvtmq_s16_f16
|
||||
vcvtm_u16_f16
|
||||
vcvtmq_u16_f16
|
||||
vcvtaq_s16_f16
|
||||
vcvtaq_u16_f16
|
||||
vcvtnq_s16_f16
|
||||
vcvtnq_u16_f16
|
||||
vcvtn_s16_f16
|
||||
vcvtn_u16_f16
|
||||
vcvtaq_s16_f16
|
||||
vcvtaq_u16_f16
|
||||
vcvta_s16_f16
|
||||
vcvta_u16_f16
|
||||
vceqz_f16
|
||||
vceqzq_f16
|
||||
|
|
|
|||
|
|
@ -194,7 +194,7 @@ fn generate_rust_program(notices: &str, intrinsic: &Intrinsic, target: &str) ->
|
|||
#![cfg_attr(any(target_arch = "aarch64", target_arch = "arm64ec"), feature(stdarch_neon_sha3))]
|
||||
#![cfg_attr(any(target_arch = "aarch64", target_arch = "arm64ec"), feature(stdarch_neon_sm4))]
|
||||
#![cfg_attr(any(target_arch = "aarch64", target_arch = "arm64ec"), feature(stdarch_neon_ftts))]
|
||||
#![cfg_attr(any(target_arch = "aarch64", target_arch = "arm64ec"), feature(stdarch_neon_f16))]
|
||||
#![feature(stdarch_neon_f16)]
|
||||
#![allow(non_upper_case_globals)]
|
||||
use core_arch::arch::{target_arch}::*;
|
||||
|
||||
|
|
|
|||
File diff suppressed because it is too large
Load diff
File diff suppressed because it is too large
Load diff
Loading…
Add table
Add a link
Reference in a new issue