add vmovn_high, vrbit, vrnd, vsubhn neon instructions (#1103)
This commit is contained in:
parent
ef9ec33482
commit
7b21d85a41
5 changed files with 1309 additions and 14 deletions
|
|
@ -1816,6 +1816,60 @@ pub unsafe fn vmlsl_high_u32(a: uint64x2_t, b: uint32x4_t, c: uint32x4_t) -> uin
|
|||
vmlsl_u32(a, b, c)
|
||||
}
|
||||
|
||||
/// Extract narrow
|
||||
#[inline]
|
||||
#[target_feature(enable = "neon")]
|
||||
#[cfg_attr(test, assert_instr(xtn2))]
|
||||
pub unsafe fn vmovn_high_s16(a: int8x8_t, b: int16x8_t) -> int8x16_t {
|
||||
let c: int8x8_t = simd_cast(b);
|
||||
simd_shuffle16(a, c, [0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15])
|
||||
}
|
||||
|
||||
/// Extract narrow
|
||||
#[inline]
|
||||
#[target_feature(enable = "neon")]
|
||||
#[cfg_attr(test, assert_instr(xtn2))]
|
||||
pub unsafe fn vmovn_high_s32(a: int16x4_t, b: int32x4_t) -> int16x8_t {
|
||||
let c: int16x4_t = simd_cast(b);
|
||||
simd_shuffle8(a, c, [0, 1, 2, 3, 4, 5, 6, 7])
|
||||
}
|
||||
|
||||
/// Extract narrow
|
||||
#[inline]
|
||||
#[target_feature(enable = "neon")]
|
||||
#[cfg_attr(test, assert_instr(xtn2))]
|
||||
pub unsafe fn vmovn_high_s64(a: int32x2_t, b: int64x2_t) -> int32x4_t {
|
||||
let c: int32x2_t = simd_cast(b);
|
||||
simd_shuffle4(a, c, [0, 1, 2, 3])
|
||||
}
|
||||
|
||||
/// Extract narrow
|
||||
#[inline]
|
||||
#[target_feature(enable = "neon")]
|
||||
#[cfg_attr(test, assert_instr(xtn2))]
|
||||
pub unsafe fn vmovn_high_u16(a: uint8x8_t, b: uint16x8_t) -> uint8x16_t {
|
||||
let c: uint8x8_t = simd_cast(b);
|
||||
simd_shuffle16(a, c, [0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15])
|
||||
}
|
||||
|
||||
/// Extract narrow
|
||||
#[inline]
|
||||
#[target_feature(enable = "neon")]
|
||||
#[cfg_attr(test, assert_instr(xtn2))]
|
||||
pub unsafe fn vmovn_high_u32(a: uint16x4_t, b: uint32x4_t) -> uint16x8_t {
|
||||
let c: uint16x4_t = simd_cast(b);
|
||||
simd_shuffle8(a, c, [0, 1, 2, 3, 4, 5, 6, 7])
|
||||
}
|
||||
|
||||
/// Extract narrow
|
||||
#[inline]
|
||||
#[target_feature(enable = "neon")]
|
||||
#[cfg_attr(test, assert_instr(xtn2))]
|
||||
pub unsafe fn vmovn_high_u64(a: uint32x2_t, b: uint64x2_t) -> uint32x4_t {
|
||||
let c: uint32x2_t = simd_cast(b);
|
||||
simd_shuffle4(a, c, [0, 1, 2, 3])
|
||||
}
|
||||
|
||||
/// Negate
|
||||
#[inline]
|
||||
#[target_feature(enable = "neon")]
|
||||
|
|
@ -1874,6 +1928,428 @@ pub unsafe fn vqnegq_s64(a: int64x2_t) -> int64x2_t {
|
|||
vqnegq_s64_(a)
|
||||
}
|
||||
|
||||
/// Reverse bit order
|
||||
#[inline]
|
||||
#[target_feature(enable = "neon")]
|
||||
#[cfg_attr(test, assert_instr(rbit))]
|
||||
pub unsafe fn vrbit_s8(a: int8x8_t) -> int8x8_t {
|
||||
#[allow(improper_ctypes)]
|
||||
extern "C" {
|
||||
#[cfg_attr(target_arch = "aarch64", link_name = "llvm.aarch64.neon.rbit.v8i8")]
|
||||
fn vrbit_s8_(a: int8x8_t) -> int8x8_t;
|
||||
}
|
||||
vrbit_s8_(a)
|
||||
}
|
||||
|
||||
/// Reverse bit order
|
||||
#[inline]
|
||||
#[target_feature(enable = "neon")]
|
||||
#[cfg_attr(test, assert_instr(rbit))]
|
||||
pub unsafe fn vrbitq_s8(a: int8x16_t) -> int8x16_t {
|
||||
#[allow(improper_ctypes)]
|
||||
extern "C" {
|
||||
#[cfg_attr(target_arch = "aarch64", link_name = "llvm.aarch64.neon.rbit.v16i8")]
|
||||
fn vrbitq_s8_(a: int8x16_t) -> int8x16_t;
|
||||
}
|
||||
vrbitq_s8_(a)
|
||||
}
|
||||
|
||||
/// Reverse bit order
|
||||
#[inline]
|
||||
#[target_feature(enable = "neon")]
|
||||
#[cfg_attr(test, assert_instr(rbit))]
|
||||
pub unsafe fn vrbit_u8(a: uint8x8_t) -> uint8x8_t {
|
||||
transmute(vrbit_s8(transmute(a)))
|
||||
}
|
||||
|
||||
/// Reverse bit order
|
||||
#[inline]
|
||||
#[target_feature(enable = "neon")]
|
||||
#[cfg_attr(test, assert_instr(rbit))]
|
||||
pub unsafe fn vrbitq_u8(a: uint8x16_t) -> uint8x16_t {
|
||||
transmute(vrbitq_s8(transmute(a)))
|
||||
}
|
||||
|
||||
/// Reverse bit order
|
||||
#[inline]
|
||||
#[target_feature(enable = "neon")]
|
||||
#[cfg_attr(test, assert_instr(rbit))]
|
||||
pub unsafe fn vrbit_p8(a: poly8x8_t) -> poly8x8_t {
|
||||
transmute(vrbit_s8(transmute(a)))
|
||||
}
|
||||
|
||||
/// Reverse bit order
|
||||
#[inline]
|
||||
#[target_feature(enable = "neon")]
|
||||
#[cfg_attr(test, assert_instr(rbit))]
|
||||
pub unsafe fn vrbitq_p8(a: poly8x16_t) -> poly8x16_t {
|
||||
transmute(vrbitq_s8(transmute(a)))
|
||||
}
|
||||
|
||||
/// Floating-point round to integral exact, using current rounding mode
|
||||
#[inline]
|
||||
#[target_feature(enable = "neon")]
|
||||
#[cfg_attr(test, assert_instr(frintx))]
|
||||
pub unsafe fn vrndx_f32(a: float32x2_t) -> float32x2_t {
|
||||
#[allow(improper_ctypes)]
|
||||
extern "C" {
|
||||
#[cfg_attr(target_arch = "aarch64", link_name = "llvm.rint.v2f32")]
|
||||
fn vrndx_f32_(a: float32x2_t) -> float32x2_t;
|
||||
}
|
||||
vrndx_f32_(a)
|
||||
}
|
||||
|
||||
/// Floating-point round to integral exact, using current rounding mode
|
||||
#[inline]
|
||||
#[target_feature(enable = "neon")]
|
||||
#[cfg_attr(test, assert_instr(frintx))]
|
||||
pub unsafe fn vrndxq_f32(a: float32x4_t) -> float32x4_t {
|
||||
#[allow(improper_ctypes)]
|
||||
extern "C" {
|
||||
#[cfg_attr(target_arch = "aarch64", link_name = "llvm.rint.v4f32")]
|
||||
fn vrndxq_f32_(a: float32x4_t) -> float32x4_t;
|
||||
}
|
||||
vrndxq_f32_(a)
|
||||
}
|
||||
|
||||
/// Floating-point round to integral exact, using current rounding mode
|
||||
#[inline]
|
||||
#[target_feature(enable = "neon")]
|
||||
#[cfg_attr(test, assert_instr(frintx))]
|
||||
pub unsafe fn vrndx_f64(a: float64x1_t) -> float64x1_t {
|
||||
#[allow(improper_ctypes)]
|
||||
extern "C" {
|
||||
#[cfg_attr(target_arch = "aarch64", link_name = "llvm.rint.v1f64")]
|
||||
fn vrndx_f64_(a: float64x1_t) -> float64x1_t;
|
||||
}
|
||||
vrndx_f64_(a)
|
||||
}
|
||||
|
||||
/// Floating-point round to integral exact, using current rounding mode
|
||||
#[inline]
|
||||
#[target_feature(enable = "neon")]
|
||||
#[cfg_attr(test, assert_instr(frintx))]
|
||||
pub unsafe fn vrndxq_f64(a: float64x2_t) -> float64x2_t {
|
||||
#[allow(improper_ctypes)]
|
||||
extern "C" {
|
||||
#[cfg_attr(target_arch = "aarch64", link_name = "llvm.rint.v2f64")]
|
||||
fn vrndxq_f64_(a: float64x2_t) -> float64x2_t;
|
||||
}
|
||||
vrndxq_f64_(a)
|
||||
}
|
||||
|
||||
/// Floating-point round to integral, to nearest with ties to away
|
||||
#[inline]
|
||||
#[target_feature(enable = "neon")]
|
||||
#[cfg_attr(test, assert_instr(frinta))]
|
||||
pub unsafe fn vrnda_f32(a: float32x2_t) -> float32x2_t {
|
||||
#[allow(improper_ctypes)]
|
||||
extern "C" {
|
||||
#[cfg_attr(target_arch = "aarch64", link_name = "llvm.round.v2f32")]
|
||||
fn vrnda_f32_(a: float32x2_t) -> float32x2_t;
|
||||
}
|
||||
vrnda_f32_(a)
|
||||
}
|
||||
|
||||
/// Floating-point round to integral, to nearest with ties to away
|
||||
#[inline]
|
||||
#[target_feature(enable = "neon")]
|
||||
#[cfg_attr(test, assert_instr(frinta))]
|
||||
pub unsafe fn vrndaq_f32(a: float32x4_t) -> float32x4_t {
|
||||
#[allow(improper_ctypes)]
|
||||
extern "C" {
|
||||
#[cfg_attr(target_arch = "aarch64", link_name = "llvm.round.v4f32")]
|
||||
fn vrndaq_f32_(a: float32x4_t) -> float32x4_t;
|
||||
}
|
||||
vrndaq_f32_(a)
|
||||
}
|
||||
|
||||
/// Floating-point round to integral, to nearest with ties to away
|
||||
#[inline]
|
||||
#[target_feature(enable = "neon")]
|
||||
#[cfg_attr(test, assert_instr(frinta))]
|
||||
pub unsafe fn vrnda_f64(a: float64x1_t) -> float64x1_t {
|
||||
#[allow(improper_ctypes)]
|
||||
extern "C" {
|
||||
#[cfg_attr(target_arch = "aarch64", link_name = "llvm.round.v1f64")]
|
||||
fn vrnda_f64_(a: float64x1_t) -> float64x1_t;
|
||||
}
|
||||
vrnda_f64_(a)
|
||||
}
|
||||
|
||||
/// Floating-point round to integral, to nearest with ties to away
|
||||
#[inline]
|
||||
#[target_feature(enable = "neon")]
|
||||
#[cfg_attr(test, assert_instr(frinta))]
|
||||
pub unsafe fn vrndaq_f64(a: float64x2_t) -> float64x2_t {
|
||||
#[allow(improper_ctypes)]
|
||||
extern "C" {
|
||||
#[cfg_attr(target_arch = "aarch64", link_name = "llvm.round.v2f64")]
|
||||
fn vrndaq_f64_(a: float64x2_t) -> float64x2_t;
|
||||
}
|
||||
vrndaq_f64_(a)
|
||||
}
|
||||
|
||||
/// Floating-point round to integral, to nearest with ties to even
|
||||
#[inline]
|
||||
#[target_feature(enable = "neon")]
|
||||
#[cfg_attr(test, assert_instr(frintn))]
|
||||
pub unsafe fn vrndn_f32(a: float32x2_t) -> float32x2_t {
|
||||
#[allow(improper_ctypes)]
|
||||
extern "C" {
|
||||
#[cfg_attr(target_arch = "aarch64", link_name = "llvm.aarch64.neon.frintn.v2f32")]
|
||||
fn vrndn_f32_(a: float32x2_t) -> float32x2_t;
|
||||
}
|
||||
vrndn_f32_(a)
|
||||
}
|
||||
|
||||
/// Floating-point round to integral, to nearest with ties to even
|
||||
#[inline]
|
||||
#[target_feature(enable = "neon")]
|
||||
#[cfg_attr(test, assert_instr(frintn))]
|
||||
pub unsafe fn vrndnq_f32(a: float32x4_t) -> float32x4_t {
|
||||
#[allow(improper_ctypes)]
|
||||
extern "C" {
|
||||
#[cfg_attr(target_arch = "aarch64", link_name = "llvm.aarch64.neon.frintn.v4f32")]
|
||||
fn vrndnq_f32_(a: float32x4_t) -> float32x4_t;
|
||||
}
|
||||
vrndnq_f32_(a)
|
||||
}
|
||||
|
||||
/// Floating-point round to integral, to nearest with ties to even
|
||||
#[inline]
|
||||
#[target_feature(enable = "neon")]
|
||||
#[cfg_attr(test, assert_instr(frintn))]
|
||||
pub unsafe fn vrndn_f64(a: float64x1_t) -> float64x1_t {
|
||||
#[allow(improper_ctypes)]
|
||||
extern "C" {
|
||||
#[cfg_attr(target_arch = "aarch64", link_name = "llvm.aarch64.neon.frintn.v1f64")]
|
||||
fn vrndn_f64_(a: float64x1_t) -> float64x1_t;
|
||||
}
|
||||
vrndn_f64_(a)
|
||||
}
|
||||
|
||||
/// Floating-point round to integral, to nearest with ties to even
|
||||
#[inline]
|
||||
#[target_feature(enable = "neon")]
|
||||
#[cfg_attr(test, assert_instr(frintn))]
|
||||
pub unsafe fn vrndnq_f64(a: float64x2_t) -> float64x2_t {
|
||||
#[allow(improper_ctypes)]
|
||||
extern "C" {
|
||||
#[cfg_attr(target_arch = "aarch64", link_name = "llvm.aarch64.neon.frintn.v2f64")]
|
||||
fn vrndnq_f64_(a: float64x2_t) -> float64x2_t;
|
||||
}
|
||||
vrndnq_f64_(a)
|
||||
}
|
||||
|
||||
/// Floating-point round to integral, toward minus infinity
|
||||
#[inline]
|
||||
#[target_feature(enable = "neon")]
|
||||
#[cfg_attr(test, assert_instr(frintm))]
|
||||
pub unsafe fn vrndm_f32(a: float32x2_t) -> float32x2_t {
|
||||
#[allow(improper_ctypes)]
|
||||
extern "C" {
|
||||
#[cfg_attr(target_arch = "aarch64", link_name = "llvm.floor.v2f32")]
|
||||
fn vrndm_f32_(a: float32x2_t) -> float32x2_t;
|
||||
}
|
||||
vrndm_f32_(a)
|
||||
}
|
||||
|
||||
/// Floating-point round to integral, toward minus infinity
|
||||
#[inline]
|
||||
#[target_feature(enable = "neon")]
|
||||
#[cfg_attr(test, assert_instr(frintm))]
|
||||
pub unsafe fn vrndmq_f32(a: float32x4_t) -> float32x4_t {
|
||||
#[allow(improper_ctypes)]
|
||||
extern "C" {
|
||||
#[cfg_attr(target_arch = "aarch64", link_name = "llvm.floor.v4f32")]
|
||||
fn vrndmq_f32_(a: float32x4_t) -> float32x4_t;
|
||||
}
|
||||
vrndmq_f32_(a)
|
||||
}
|
||||
|
||||
/// Floating-point round to integral, toward minus infinity
|
||||
#[inline]
|
||||
#[target_feature(enable = "neon")]
|
||||
#[cfg_attr(test, assert_instr(frintm))]
|
||||
pub unsafe fn vrndm_f64(a: float64x1_t) -> float64x1_t {
|
||||
#[allow(improper_ctypes)]
|
||||
extern "C" {
|
||||
#[cfg_attr(target_arch = "aarch64", link_name = "llvm.floor.v1f64")]
|
||||
fn vrndm_f64_(a: float64x1_t) -> float64x1_t;
|
||||
}
|
||||
vrndm_f64_(a)
|
||||
}
|
||||
|
||||
/// Floating-point round to integral, toward minus infinity
|
||||
#[inline]
|
||||
#[target_feature(enable = "neon")]
|
||||
#[cfg_attr(test, assert_instr(frintm))]
|
||||
pub unsafe fn vrndmq_f64(a: float64x2_t) -> float64x2_t {
|
||||
#[allow(improper_ctypes)]
|
||||
extern "C" {
|
||||
#[cfg_attr(target_arch = "aarch64", link_name = "llvm.floor.v2f64")]
|
||||
fn vrndmq_f64_(a: float64x2_t) -> float64x2_t;
|
||||
}
|
||||
vrndmq_f64_(a)
|
||||
}
|
||||
|
||||
/// Floating-point round to integral, toward plus infinity
|
||||
#[inline]
|
||||
#[target_feature(enable = "neon")]
|
||||
#[cfg_attr(test, assert_instr(frintp))]
|
||||
pub unsafe fn vrndp_f32(a: float32x2_t) -> float32x2_t {
|
||||
#[allow(improper_ctypes)]
|
||||
extern "C" {
|
||||
#[cfg_attr(target_arch = "aarch64", link_name = "llvm.ceil.v2f32")]
|
||||
fn vrndp_f32_(a: float32x2_t) -> float32x2_t;
|
||||
}
|
||||
vrndp_f32_(a)
|
||||
}
|
||||
|
||||
/// Floating-point round to integral, toward plus infinity
|
||||
#[inline]
|
||||
#[target_feature(enable = "neon")]
|
||||
#[cfg_attr(test, assert_instr(frintp))]
|
||||
pub unsafe fn vrndpq_f32(a: float32x4_t) -> float32x4_t {
|
||||
#[allow(improper_ctypes)]
|
||||
extern "C" {
|
||||
#[cfg_attr(target_arch = "aarch64", link_name = "llvm.ceil.v4f32")]
|
||||
fn vrndpq_f32_(a: float32x4_t) -> float32x4_t;
|
||||
}
|
||||
vrndpq_f32_(a)
|
||||
}
|
||||
|
||||
/// Floating-point round to integral, toward plus infinity
|
||||
#[inline]
|
||||
#[target_feature(enable = "neon")]
|
||||
#[cfg_attr(test, assert_instr(frintp))]
|
||||
pub unsafe fn vrndp_f64(a: float64x1_t) -> float64x1_t {
|
||||
#[allow(improper_ctypes)]
|
||||
extern "C" {
|
||||
#[cfg_attr(target_arch = "aarch64", link_name = "llvm.ceil.v1f64")]
|
||||
fn vrndp_f64_(a: float64x1_t) -> float64x1_t;
|
||||
}
|
||||
vrndp_f64_(a)
|
||||
}
|
||||
|
||||
/// Floating-point round to integral, toward plus infinity
|
||||
#[inline]
|
||||
#[target_feature(enable = "neon")]
|
||||
#[cfg_attr(test, assert_instr(frintp))]
|
||||
pub unsafe fn vrndpq_f64(a: float64x2_t) -> float64x2_t {
|
||||
#[allow(improper_ctypes)]
|
||||
extern "C" {
|
||||
#[cfg_attr(target_arch = "aarch64", link_name = "llvm.ceil.v2f64")]
|
||||
fn vrndpq_f64_(a: float64x2_t) -> float64x2_t;
|
||||
}
|
||||
vrndpq_f64_(a)
|
||||
}
|
||||
|
||||
/// Floating-point round to integral, toward zero
|
||||
#[inline]
|
||||
#[target_feature(enable = "neon")]
|
||||
#[cfg_attr(test, assert_instr(frintz))]
|
||||
pub unsafe fn vrnd_f32(a: float32x2_t) -> float32x2_t {
|
||||
#[allow(improper_ctypes)]
|
||||
extern "C" {
|
||||
#[cfg_attr(target_arch = "aarch64", link_name = "llvm.trunc.v2f32")]
|
||||
fn vrnd_f32_(a: float32x2_t) -> float32x2_t;
|
||||
}
|
||||
vrnd_f32_(a)
|
||||
}
|
||||
|
||||
/// Floating-point round to integral, toward zero
|
||||
#[inline]
|
||||
#[target_feature(enable = "neon")]
|
||||
#[cfg_attr(test, assert_instr(frintz))]
|
||||
pub unsafe fn vrndq_f32(a: float32x4_t) -> float32x4_t {
|
||||
#[allow(improper_ctypes)]
|
||||
extern "C" {
|
||||
#[cfg_attr(target_arch = "aarch64", link_name = "llvm.trunc.v4f32")]
|
||||
fn vrndq_f32_(a: float32x4_t) -> float32x4_t;
|
||||
}
|
||||
vrndq_f32_(a)
|
||||
}
|
||||
|
||||
/// Floating-point round to integral, toward zero
|
||||
#[inline]
|
||||
#[target_feature(enable = "neon")]
|
||||
#[cfg_attr(test, assert_instr(frintz))]
|
||||
pub unsafe fn vrnd_f64(a: float64x1_t) -> float64x1_t {
|
||||
#[allow(improper_ctypes)]
|
||||
extern "C" {
|
||||
#[cfg_attr(target_arch = "aarch64", link_name = "llvm.trunc.v1f64")]
|
||||
fn vrnd_f64_(a: float64x1_t) -> float64x1_t;
|
||||
}
|
||||
vrnd_f64_(a)
|
||||
}
|
||||
|
||||
/// Floating-point round to integral, toward zero
|
||||
#[inline]
|
||||
#[target_feature(enable = "neon")]
|
||||
#[cfg_attr(test, assert_instr(frintz))]
|
||||
pub unsafe fn vrndq_f64(a: float64x2_t) -> float64x2_t {
|
||||
#[allow(improper_ctypes)]
|
||||
extern "C" {
|
||||
#[cfg_attr(target_arch = "aarch64", link_name = "llvm.trunc.v2f64")]
|
||||
fn vrndq_f64_(a: float64x2_t) -> float64x2_t;
|
||||
}
|
||||
vrndq_f64_(a)
|
||||
}
|
||||
|
||||
/// Floating-point round to integral, using current rounding mode
|
||||
#[inline]
|
||||
#[target_feature(enable = "neon")]
|
||||
#[cfg_attr(test, assert_instr(frinti))]
|
||||
pub unsafe fn vrndi_f32(a: float32x2_t) -> float32x2_t {
|
||||
#[allow(improper_ctypes)]
|
||||
extern "C" {
|
||||
#[cfg_attr(target_arch = "aarch64", link_name = "llvm.nearbyint.v2f32")]
|
||||
fn vrndi_f32_(a: float32x2_t) -> float32x2_t;
|
||||
}
|
||||
vrndi_f32_(a)
|
||||
}
|
||||
|
||||
/// Floating-point round to integral, using current rounding mode
|
||||
#[inline]
|
||||
#[target_feature(enable = "neon")]
|
||||
#[cfg_attr(test, assert_instr(frinti))]
|
||||
pub unsafe fn vrndiq_f32(a: float32x4_t) -> float32x4_t {
|
||||
#[allow(improper_ctypes)]
|
||||
extern "C" {
|
||||
#[cfg_attr(target_arch = "aarch64", link_name = "llvm.nearbyint.v4f32")]
|
||||
fn vrndiq_f32_(a: float32x4_t) -> float32x4_t;
|
||||
}
|
||||
vrndiq_f32_(a)
|
||||
}
|
||||
|
||||
/// Floating-point round to integral, using current rounding mode
|
||||
#[inline]
|
||||
#[target_feature(enable = "neon")]
|
||||
#[cfg_attr(test, assert_instr(frinti))]
|
||||
pub unsafe fn vrndi_f64(a: float64x1_t) -> float64x1_t {
|
||||
#[allow(improper_ctypes)]
|
||||
extern "C" {
|
||||
#[cfg_attr(target_arch = "aarch64", link_name = "llvm.nearbyint.v1f64")]
|
||||
fn vrndi_f64_(a: float64x1_t) -> float64x1_t;
|
||||
}
|
||||
vrndi_f64_(a)
|
||||
}
|
||||
|
||||
/// Floating-point round to integral, using current rounding mode
|
||||
#[inline]
|
||||
#[target_feature(enable = "neon")]
|
||||
#[cfg_attr(test, assert_instr(frinti))]
|
||||
pub unsafe fn vrndiq_f64(a: float64x2_t) -> float64x2_t {
|
||||
#[allow(improper_ctypes)]
|
||||
extern "C" {
|
||||
#[cfg_attr(target_arch = "aarch64", link_name = "llvm.nearbyint.v2f64")]
|
||||
fn vrndiq_f64_(a: float64x2_t) -> float64x2_t;
|
||||
}
|
||||
vrndiq_f64_(a)
|
||||
}
|
||||
|
||||
/// Multiply
|
||||
#[inline]
|
||||
#[target_feature(enable = "neon")]
|
||||
|
|
@ -5298,6 +5774,60 @@ mod test {
|
|||
assert_eq!(r, e);
|
||||
}
|
||||
|
||||
#[simd_test(enable = "neon")]
|
||||
unsafe fn test_vmovn_high_s16() {
|
||||
let a: i8x8 = i8x8::new(0, 1, 2, 3, 2, 3, 4, 5);
|
||||
let b: i16x8 = i16x8::new(2, 3, 4, 5, 12, 13, 14, 15);
|
||||
let e: i8x16 = i8x16::new(0, 1, 2, 3, 2, 3, 4, 5, 2, 3, 4, 5, 12, 13, 14, 15);
|
||||
let r: i8x16 = transmute(vmovn_high_s16(transmute(a), transmute(b)));
|
||||
assert_eq!(r, e);
|
||||
}
|
||||
|
||||
#[simd_test(enable = "neon")]
|
||||
unsafe fn test_vmovn_high_s32() {
|
||||
let a: i16x4 = i16x4::new(0, 1, 2, 3);
|
||||
let b: i32x4 = i32x4::new(2, 3, 4, 5);
|
||||
let e: i16x8 = i16x8::new(0, 1, 2, 3, 2, 3, 4, 5);
|
||||
let r: i16x8 = transmute(vmovn_high_s32(transmute(a), transmute(b)));
|
||||
assert_eq!(r, e);
|
||||
}
|
||||
|
||||
#[simd_test(enable = "neon")]
|
||||
unsafe fn test_vmovn_high_s64() {
|
||||
let a: i32x2 = i32x2::new(0, 1);
|
||||
let b: i64x2 = i64x2::new(2, 3);
|
||||
let e: i32x4 = i32x4::new(0, 1, 2, 3);
|
||||
let r: i32x4 = transmute(vmovn_high_s64(transmute(a), transmute(b)));
|
||||
assert_eq!(r, e);
|
||||
}
|
||||
|
||||
#[simd_test(enable = "neon")]
|
||||
unsafe fn test_vmovn_high_u16() {
|
||||
let a: u8x8 = u8x8::new(0, 1, 2, 3, 2, 3, 4, 5);
|
||||
let b: u16x8 = u16x8::new(2, 3, 4, 5, 12, 13, 14, 15);
|
||||
let e: u8x16 = u8x16::new(0, 1, 2, 3, 2, 3, 4, 5, 2, 3, 4, 5, 12, 13, 14, 15);
|
||||
let r: u8x16 = transmute(vmovn_high_u16(transmute(a), transmute(b)));
|
||||
assert_eq!(r, e);
|
||||
}
|
||||
|
||||
#[simd_test(enable = "neon")]
|
||||
unsafe fn test_vmovn_high_u32() {
|
||||
let a: u16x4 = u16x4::new(0, 1, 2, 3);
|
||||
let b: u32x4 = u32x4::new(2, 3, 4, 5);
|
||||
let e: u16x8 = u16x8::new(0, 1, 2, 3, 2, 3, 4, 5);
|
||||
let r: u16x8 = transmute(vmovn_high_u32(transmute(a), transmute(b)));
|
||||
assert_eq!(r, e);
|
||||
}
|
||||
|
||||
#[simd_test(enable = "neon")]
|
||||
unsafe fn test_vmovn_high_u64() {
|
||||
let a: u32x2 = u32x2::new(0, 1);
|
||||
let b: u64x2 = u64x2::new(2, 3);
|
||||
let e: u32x4 = u32x4::new(0, 1, 2, 3);
|
||||
let r: u32x4 = transmute(vmovn_high_u64(transmute(a), transmute(b)));
|
||||
assert_eq!(r, e);
|
||||
}
|
||||
|
||||
#[simd_test(enable = "neon")]
|
||||
unsafe fn test_vneg_s64() {
|
||||
let a: i64x1 = i64x1::new(0);
|
||||
|
|
@ -5346,6 +5876,278 @@ mod test {
|
|||
assert_eq!(r, e);
|
||||
}
|
||||
|
||||
#[simd_test(enable = "neon")]
|
||||
unsafe fn test_vrbit_s8() {
|
||||
let a: i8x8 = i8x8::new(0, 2, 4, 6, 8, 10, 12, 14);
|
||||
let e: i8x8 = i8x8::new(0, 64, 32, 96, 16, 80, 48, 112);
|
||||
let r: i8x8 = transmute(vrbit_s8(transmute(a)));
|
||||
assert_eq!(r, e);
|
||||
}
|
||||
|
||||
#[simd_test(enable = "neon")]
|
||||
unsafe fn test_vrbitq_s8() {
|
||||
let a: i8x16 = i8x16::new(0, 2, 4, 6, 8, 10, 12, 14, 16, 18, 20, 22, 24, 26, 28, 30);
|
||||
let e: i8x16 = i8x16::new(0, 64, 32, 96, 16, 80, 48, 112, 8, 72, 40, 104, 24, 88, 56, 120);
|
||||
let r: i8x16 = transmute(vrbitq_s8(transmute(a)));
|
||||
assert_eq!(r, e);
|
||||
}
|
||||
|
||||
#[simd_test(enable = "neon")]
|
||||
unsafe fn test_vrbit_u8() {
|
||||
let a: u8x8 = u8x8::new(0, 2, 4, 6, 8, 10, 12, 14);
|
||||
let e: u8x8 = u8x8::new(0, 64, 32, 96, 16, 80, 48, 112);
|
||||
let r: u8x8 = transmute(vrbit_u8(transmute(a)));
|
||||
assert_eq!(r, e);
|
||||
}
|
||||
|
||||
#[simd_test(enable = "neon")]
|
||||
unsafe fn test_vrbitq_u8() {
|
||||
let a: u8x16 = u8x16::new(0, 2, 4, 6, 8, 10, 12, 14, 16, 18, 20, 22, 24, 26, 28, 30);
|
||||
let e: u8x16 = u8x16::new(0, 64, 32, 96, 16, 80, 48, 112, 8, 72, 40, 104, 24, 88, 56, 120);
|
||||
let r: u8x16 = transmute(vrbitq_u8(transmute(a)));
|
||||
assert_eq!(r, e);
|
||||
}
|
||||
|
||||
#[simd_test(enable = "neon")]
|
||||
unsafe fn test_vrbit_p8() {
|
||||
let a: i8x8 = i8x8::new(0, 2, 4, 6, 8, 10, 12, 14);
|
||||
let e: i8x8 = i8x8::new(0, 64, 32, 96, 16, 80, 48, 112);
|
||||
let r: i8x8 = transmute(vrbit_p8(transmute(a)));
|
||||
assert_eq!(r, e);
|
||||
}
|
||||
|
||||
#[simd_test(enable = "neon")]
|
||||
unsafe fn test_vrbitq_p8() {
|
||||
let a: i8x16 = i8x16::new(0, 2, 4, 6, 8, 10, 12, 14, 16, 18, 20, 22, 24, 26, 28, 30);
|
||||
let e: i8x16 = i8x16::new(0, 64, 32, 96, 16, 80, 48, 112, 8, 72, 40, 104, 24, 88, 56, 120);
|
||||
let r: i8x16 = transmute(vrbitq_p8(transmute(a)));
|
||||
assert_eq!(r, e);
|
||||
}
|
||||
|
||||
#[simd_test(enable = "neon")]
|
||||
unsafe fn test_vrndx_f32() {
|
||||
let a: f32x2 = f32x2::new(-1.5, 0.5);
|
||||
let e: f32x2 = f32x2::new(-2.0, 0.0);
|
||||
let r: f32x2 = transmute(vrndx_f32(transmute(a)));
|
||||
assert_eq!(r, e);
|
||||
}
|
||||
|
||||
#[simd_test(enable = "neon")]
|
||||
unsafe fn test_vrndxq_f32() {
|
||||
let a: f32x4 = f32x4::new(-1.5, 0.5, 1.5, 2.5);
|
||||
let e: f32x4 = f32x4::new(-2.0, 0.0, 2.0, 2.0);
|
||||
let r: f32x4 = transmute(vrndxq_f32(transmute(a)));
|
||||
assert_eq!(r, e);
|
||||
}
|
||||
|
||||
#[simd_test(enable = "neon")]
|
||||
unsafe fn test_vrndx_f64() {
|
||||
let a: f64 = -1.5;
|
||||
let e: f64 = -2.0;
|
||||
let r: f64 = transmute(vrndx_f64(transmute(a)));
|
||||
assert_eq!(r, e);
|
||||
}
|
||||
|
||||
#[simd_test(enable = "neon")]
|
||||
unsafe fn test_vrndxq_f64() {
|
||||
let a: f64x2 = f64x2::new(-1.5, 0.5);
|
||||
let e: f64x2 = f64x2::new(-2.0, 0.0);
|
||||
let r: f64x2 = transmute(vrndxq_f64(transmute(a)));
|
||||
assert_eq!(r, e);
|
||||
}
|
||||
|
||||
#[simd_test(enable = "neon")]
|
||||
unsafe fn test_vrnda_f32() {
|
||||
let a: f32x2 = f32x2::new(-1.5, 0.5);
|
||||
let e: f32x2 = f32x2::new(-2.0, 1.0);
|
||||
let r: f32x2 = transmute(vrnda_f32(transmute(a)));
|
||||
assert_eq!(r, e);
|
||||
}
|
||||
|
||||
#[simd_test(enable = "neon")]
|
||||
unsafe fn test_vrndaq_f32() {
|
||||
let a: f32x4 = f32x4::new(-1.5, 0.5, 1.5, 2.5);
|
||||
let e: f32x4 = f32x4::new(-2.0, 1.0, 2.0, 3.0);
|
||||
let r: f32x4 = transmute(vrndaq_f32(transmute(a)));
|
||||
assert_eq!(r, e);
|
||||
}
|
||||
|
||||
#[simd_test(enable = "neon")]
|
||||
unsafe fn test_vrnda_f64() {
|
||||
let a: f64 = -1.5;
|
||||
let e: f64 = -2.0;
|
||||
let r: f64 = transmute(vrnda_f64(transmute(a)));
|
||||
assert_eq!(r, e);
|
||||
}
|
||||
|
||||
#[simd_test(enable = "neon")]
|
||||
unsafe fn test_vrndaq_f64() {
|
||||
let a: f64x2 = f64x2::new(-1.5, 0.5);
|
||||
let e: f64x2 = f64x2::new(-2.0, 1.0);
|
||||
let r: f64x2 = transmute(vrndaq_f64(transmute(a)));
|
||||
assert_eq!(r, e);
|
||||
}
|
||||
|
||||
#[simd_test(enable = "neon")]
|
||||
unsafe fn test_vrndn_f32() {
|
||||
let a: f32x2 = f32x2::new(-1.5, 0.5);
|
||||
let e: f32x2 = f32x2::new(-2.0, 0.0);
|
||||
let r: f32x2 = transmute(vrndn_f32(transmute(a)));
|
||||
assert_eq!(r, e);
|
||||
}
|
||||
|
||||
#[simd_test(enable = "neon")]
|
||||
unsafe fn test_vrndnq_f32() {
|
||||
let a: f32x4 = f32x4::new(-1.5, 0.5, 1.5, 2.5);
|
||||
let e: f32x4 = f32x4::new(-2.0, 0.0, 2.0, 2.0);
|
||||
let r: f32x4 = transmute(vrndnq_f32(transmute(a)));
|
||||
assert_eq!(r, e);
|
||||
}
|
||||
|
||||
#[simd_test(enable = "neon")]
|
||||
unsafe fn test_vrndn_f64() {
|
||||
let a: f64 = -1.5;
|
||||
let e: f64 = -2.0;
|
||||
let r: f64 = transmute(vrndn_f64(transmute(a)));
|
||||
assert_eq!(r, e);
|
||||
}
|
||||
|
||||
#[simd_test(enable = "neon")]
|
||||
unsafe fn test_vrndnq_f64() {
|
||||
let a: f64x2 = f64x2::new(-1.5, 0.5);
|
||||
let e: f64x2 = f64x2::new(-2.0, 0.0);
|
||||
let r: f64x2 = transmute(vrndnq_f64(transmute(a)));
|
||||
assert_eq!(r, e);
|
||||
}
|
||||
|
||||
#[simd_test(enable = "neon")]
|
||||
unsafe fn test_vrndm_f32() {
|
||||
let a: f32x2 = f32x2::new(-1.5, 0.5);
|
||||
let e: f32x2 = f32x2::new(-2.0, 0.0);
|
||||
let r: f32x2 = transmute(vrndm_f32(transmute(a)));
|
||||
assert_eq!(r, e);
|
||||
}
|
||||
|
||||
#[simd_test(enable = "neon")]
|
||||
unsafe fn test_vrndmq_f32() {
|
||||
let a: f32x4 = f32x4::new(-1.5, 0.5, 1.5, 2.5);
|
||||
let e: f32x4 = f32x4::new(-2.0, 0.0, 1.0, 2.0);
|
||||
let r: f32x4 = transmute(vrndmq_f32(transmute(a)));
|
||||
assert_eq!(r, e);
|
||||
}
|
||||
|
||||
#[simd_test(enable = "neon")]
|
||||
unsafe fn test_vrndm_f64() {
|
||||
let a: f64 = -1.5;
|
||||
let e: f64 = -2.0;
|
||||
let r: f64 = transmute(vrndm_f64(transmute(a)));
|
||||
assert_eq!(r, e);
|
||||
}
|
||||
|
||||
#[simd_test(enable = "neon")]
|
||||
unsafe fn test_vrndmq_f64() {
|
||||
let a: f64x2 = f64x2::new(-1.5, 0.5);
|
||||
let e: f64x2 = f64x2::new(-2.0, 0.0);
|
||||
let r: f64x2 = transmute(vrndmq_f64(transmute(a)));
|
||||
assert_eq!(r, e);
|
||||
}
|
||||
|
||||
#[simd_test(enable = "neon")]
|
||||
unsafe fn test_vrndp_f32() {
|
||||
let a: f32x2 = f32x2::new(-1.5, 0.5);
|
||||
let e: f32x2 = f32x2::new(-1.0, 1.0);
|
||||
let r: f32x2 = transmute(vrndp_f32(transmute(a)));
|
||||
assert_eq!(r, e);
|
||||
}
|
||||
|
||||
#[simd_test(enable = "neon")]
|
||||
unsafe fn test_vrndpq_f32() {
|
||||
let a: f32x4 = f32x4::new(-1.5, 0.5, 1.5, 2.5);
|
||||
let e: f32x4 = f32x4::new(-1.0, 1.0, 2.0, 3.0);
|
||||
let r: f32x4 = transmute(vrndpq_f32(transmute(a)));
|
||||
assert_eq!(r, e);
|
||||
}
|
||||
|
||||
#[simd_test(enable = "neon")]
|
||||
unsafe fn test_vrndp_f64() {
|
||||
let a: f64 = -1.5;
|
||||
let e: f64 = -1.0;
|
||||
let r: f64 = transmute(vrndp_f64(transmute(a)));
|
||||
assert_eq!(r, e);
|
||||
}
|
||||
|
||||
#[simd_test(enable = "neon")]
|
||||
unsafe fn test_vrndpq_f64() {
|
||||
let a: f64x2 = f64x2::new(-1.5, 0.5);
|
||||
let e: f64x2 = f64x2::new(-1.0, 1.0);
|
||||
let r: f64x2 = transmute(vrndpq_f64(transmute(a)));
|
||||
assert_eq!(r, e);
|
||||
}
|
||||
|
||||
#[simd_test(enable = "neon")]
|
||||
unsafe fn test_vrnd_f32() {
|
||||
let a: f32x2 = f32x2::new(-1.5, 0.5);
|
||||
let e: f32x2 = f32x2::new(-1.0, 0.0);
|
||||
let r: f32x2 = transmute(vrnd_f32(transmute(a)));
|
||||
assert_eq!(r, e);
|
||||
}
|
||||
|
||||
#[simd_test(enable = "neon")]
|
||||
unsafe fn test_vrndq_f32() {
|
||||
let a: f32x4 = f32x4::new(-1.5, 0.5, 1.5, 2.5);
|
||||
let e: f32x4 = f32x4::new(-1.0, 0.0, 1.0, 2.0);
|
||||
let r: f32x4 = transmute(vrndq_f32(transmute(a)));
|
||||
assert_eq!(r, e);
|
||||
}
|
||||
|
||||
#[simd_test(enable = "neon")]
|
||||
unsafe fn test_vrnd_f64() {
|
||||
let a: f64 = -1.5;
|
||||
let e: f64 = -1.0;
|
||||
let r: f64 = transmute(vrnd_f64(transmute(a)));
|
||||
assert_eq!(r, e);
|
||||
}
|
||||
|
||||
#[simd_test(enable = "neon")]
|
||||
unsafe fn test_vrndq_f64() {
|
||||
let a: f64x2 = f64x2::new(-1.5, 0.5);
|
||||
let e: f64x2 = f64x2::new(-1.0, 0.0);
|
||||
let r: f64x2 = transmute(vrndq_f64(transmute(a)));
|
||||
assert_eq!(r, e);
|
||||
}
|
||||
|
||||
#[simd_test(enable = "neon")]
|
||||
unsafe fn test_vrndi_f32() {
|
||||
let a: f32x2 = f32x2::new(-1.5, 0.5);
|
||||
let e: f32x2 = f32x2::new(-2.0, 0.0);
|
||||
let r: f32x2 = transmute(vrndi_f32(transmute(a)));
|
||||
assert_eq!(r, e);
|
||||
}
|
||||
|
||||
#[simd_test(enable = "neon")]
|
||||
unsafe fn test_vrndiq_f32() {
|
||||
let a: f32x4 = f32x4::new(-1.5, 0.5, 1.5, 2.5);
|
||||
let e: f32x4 = f32x4::new(-2.0, 0.0, 2.0, 2.0);
|
||||
let r: f32x4 = transmute(vrndiq_f32(transmute(a)));
|
||||
assert_eq!(r, e);
|
||||
}
|
||||
|
||||
#[simd_test(enable = "neon")]
|
||||
unsafe fn test_vrndi_f64() {
|
||||
let a: f64 = -1.5;
|
||||
let e: f64 = -2.0;
|
||||
let r: f64 = transmute(vrndi_f64(transmute(a)));
|
||||
assert_eq!(r, e);
|
||||
}
|
||||
|
||||
#[simd_test(enable = "neon")]
|
||||
unsafe fn test_vrndiq_f64() {
|
||||
let a: f64x2 = f64x2::new(-1.5, 0.5);
|
||||
let e: f64x2 = f64x2::new(-2.0, 0.0);
|
||||
let r: f64x2 = transmute(vrndiq_f64(transmute(a)));
|
||||
assert_eq!(r, e);
|
||||
}
|
||||
|
||||
#[simd_test(enable = "neon")]
|
||||
unsafe fn test_vmul_f64() {
|
||||
let a: f64 = 1.0;
|
||||
|
|
|
|||
|
|
@ -3948,6 +3948,138 @@ pub unsafe fn vsubq_f32(a: float32x4_t, b: float32x4_t) -> float32x4_t {
|
|||
simd_sub(a, b)
|
||||
}
|
||||
|
||||
/// Subtract returning high narrow
|
||||
#[inline]
|
||||
#[target_feature(enable = "neon")]
|
||||
#[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))]
|
||||
#[cfg_attr(all(test, target_arch = "arm"), assert_instr(vsubhn))]
|
||||
#[cfg_attr(all(test, target_arch = "aarch64"), assert_instr(subhn))]
|
||||
pub unsafe fn vsubhn_s16(a: int16x8_t, b: int16x8_t) -> int8x8_t {
|
||||
let c: i16x8 = i16x8::new(8, 8, 8, 8, 8, 8, 8, 8);
|
||||
simd_cast(simd_shr(simd_sub(a, b), transmute(c)))
|
||||
}
|
||||
|
||||
/// Subtract returning high narrow
|
||||
#[inline]
|
||||
#[target_feature(enable = "neon")]
|
||||
#[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))]
|
||||
#[cfg_attr(all(test, target_arch = "arm"), assert_instr(vsubhn))]
|
||||
#[cfg_attr(all(test, target_arch = "aarch64"), assert_instr(subhn))]
|
||||
pub unsafe fn vsubhn_s32(a: int32x4_t, b: int32x4_t) -> int16x4_t {
|
||||
let c: i32x4 = i32x4::new(16, 16, 16, 16);
|
||||
simd_cast(simd_shr(simd_sub(a, b), transmute(c)))
|
||||
}
|
||||
|
||||
/// Subtract returning high narrow
|
||||
#[inline]
|
||||
#[target_feature(enable = "neon")]
|
||||
#[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))]
|
||||
#[cfg_attr(all(test, target_arch = "arm"), assert_instr(vsubhn))]
|
||||
#[cfg_attr(all(test, target_arch = "aarch64"), assert_instr(subhn))]
|
||||
pub unsafe fn vsubhn_s64(a: int64x2_t, b: int64x2_t) -> int32x2_t {
|
||||
let c: i64x2 = i64x2::new(32, 32);
|
||||
simd_cast(simd_shr(simd_sub(a, b), transmute(c)))
|
||||
}
|
||||
|
||||
/// Subtract returning high narrow
|
||||
#[inline]
|
||||
#[target_feature(enable = "neon")]
|
||||
#[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))]
|
||||
#[cfg_attr(all(test, target_arch = "arm"), assert_instr(vsubhn))]
|
||||
#[cfg_attr(all(test, target_arch = "aarch64"), assert_instr(subhn))]
|
||||
pub unsafe fn vsubhn_u16(a: uint16x8_t, b: uint16x8_t) -> uint8x8_t {
|
||||
let c: u16x8 = u16x8::new(8, 8, 8, 8, 8, 8, 8, 8);
|
||||
simd_cast(simd_shr(simd_sub(a, b), transmute(c)))
|
||||
}
|
||||
|
||||
/// Subtract returning high narrow
|
||||
#[inline]
|
||||
#[target_feature(enable = "neon")]
|
||||
#[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))]
|
||||
#[cfg_attr(all(test, target_arch = "arm"), assert_instr(vsubhn))]
|
||||
#[cfg_attr(all(test, target_arch = "aarch64"), assert_instr(subhn))]
|
||||
pub unsafe fn vsubhn_u32(a: uint32x4_t, b: uint32x4_t) -> uint16x4_t {
|
||||
let c: u32x4 = u32x4::new(16, 16, 16, 16);
|
||||
simd_cast(simd_shr(simd_sub(a, b), transmute(c)))
|
||||
}
|
||||
|
||||
/// Subtract returning high narrow
|
||||
#[inline]
|
||||
#[target_feature(enable = "neon")]
|
||||
#[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))]
|
||||
#[cfg_attr(all(test, target_arch = "arm"), assert_instr(vsubhn))]
|
||||
#[cfg_attr(all(test, target_arch = "aarch64"), assert_instr(subhn))]
|
||||
pub unsafe fn vsubhn_u64(a: uint64x2_t, b: uint64x2_t) -> uint32x2_t {
|
||||
let c: u64x2 = u64x2::new(32, 32);
|
||||
simd_cast(simd_shr(simd_sub(a, b), transmute(c)))
|
||||
}
|
||||
|
||||
/// Subtract returning high narrow
|
||||
#[inline]
|
||||
#[target_feature(enable = "neon")]
|
||||
#[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))]
|
||||
#[cfg_attr(all(test, target_arch = "arm"), assert_instr(vsubhn))]
|
||||
#[cfg_attr(all(test, target_arch = "aarch64"), assert_instr(subhn2))]
|
||||
pub unsafe fn vsubhn_high_s16(a: int8x8_t, b: int16x8_t, c: int16x8_t) -> int8x16_t {
|
||||
let d: int8x8_t = vsubhn_s16(b, c);
|
||||
simd_shuffle16(a, d, [0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15])
|
||||
}
|
||||
|
||||
/// Subtract returning high narrow
|
||||
#[inline]
|
||||
#[target_feature(enable = "neon")]
|
||||
#[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))]
|
||||
#[cfg_attr(all(test, target_arch = "arm"), assert_instr(vsubhn))]
|
||||
#[cfg_attr(all(test, target_arch = "aarch64"), assert_instr(subhn2))]
|
||||
pub unsafe fn vsubhn_high_s32(a: int16x4_t, b: int32x4_t, c: int32x4_t) -> int16x8_t {
|
||||
let d: int16x4_t = vsubhn_s32(b, c);
|
||||
simd_shuffle8(a, d, [0, 1, 2, 3, 4, 5, 6, 7])
|
||||
}
|
||||
|
||||
/// Subtract returning high narrow
|
||||
#[inline]
|
||||
#[target_feature(enable = "neon")]
|
||||
#[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))]
|
||||
#[cfg_attr(all(test, target_arch = "arm"), assert_instr(vsubhn))]
|
||||
#[cfg_attr(all(test, target_arch = "aarch64"), assert_instr(subhn2))]
|
||||
pub unsafe fn vsubhn_high_s64(a: int32x2_t, b: int64x2_t, c: int64x2_t) -> int32x4_t {
|
||||
let d: int32x2_t = vsubhn_s64(b, c);
|
||||
simd_shuffle4(a, d, [0, 1, 2, 3])
|
||||
}
|
||||
|
||||
/// Subtract returning high narrow
|
||||
#[inline]
|
||||
#[target_feature(enable = "neon")]
|
||||
#[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))]
|
||||
#[cfg_attr(all(test, target_arch = "arm"), assert_instr(vsubhn))]
|
||||
#[cfg_attr(all(test, target_arch = "aarch64"), assert_instr(subhn2))]
|
||||
pub unsafe fn vsubhn_high_u16(a: uint8x8_t, b: uint16x8_t, c: uint16x8_t) -> uint8x16_t {
|
||||
let d: uint8x8_t = vsubhn_u16(b, c);
|
||||
simd_shuffle16(a, d, [0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15])
|
||||
}
|
||||
|
||||
/// Subtract returning high narrow
|
||||
#[inline]
|
||||
#[target_feature(enable = "neon")]
|
||||
#[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))]
|
||||
#[cfg_attr(all(test, target_arch = "arm"), assert_instr(vsubhn))]
|
||||
#[cfg_attr(all(test, target_arch = "aarch64"), assert_instr(subhn2))]
|
||||
pub unsafe fn vsubhn_high_u32(a: uint16x4_t, b: uint32x4_t, c: uint32x4_t) -> uint16x8_t {
|
||||
let d: uint16x4_t = vsubhn_u32(b, c);
|
||||
simd_shuffle8(a, d, [0, 1, 2, 3, 4, 5, 6, 7])
|
||||
}
|
||||
|
||||
/// Subtract returning high narrow
|
||||
#[inline]
|
||||
#[target_feature(enable = "neon")]
|
||||
#[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))]
|
||||
#[cfg_attr(all(test, target_arch = "arm"), assert_instr(vsubhn))]
|
||||
#[cfg_attr(all(test, target_arch = "aarch64"), assert_instr(subhn2))]
|
||||
pub unsafe fn vsubhn_high_u64(a: uint32x2_t, b: uint64x2_t, c: uint64x2_t) -> uint32x4_t {
|
||||
let d: uint32x2_t = vsubhn_u64(b, c);
|
||||
simd_shuffle4(a, d, [0, 1, 2, 3])
|
||||
}
|
||||
|
||||
/// Signed halving subtract
|
||||
#[inline]
|
||||
#[target_feature(enable = "neon")]
|
||||
|
|
@ -10222,6 +10354,120 @@ mod test {
|
|||
assert_eq!(r, e);
|
||||
}
|
||||
|
||||
#[simd_test(enable = "neon")]
|
||||
unsafe fn test_vsubhn_s16() {
|
||||
let a: i16x8 = i16x8::new(0x7F_FF, -32768, 1, 1, 0x7F_FF, -32768, 1, 1);
|
||||
let b: i16x8 = i16x8::new(1, 0, 0, 0, 1, 0, 0, 0);
|
||||
let e: i8x8 = i8x8::new(0x7F, -128, 0, 0, 0x7F, -128, 0, 0);
|
||||
let r: i8x8 = transmute(vsubhn_s16(transmute(a), transmute(b)));
|
||||
assert_eq!(r, e);
|
||||
}
|
||||
|
||||
#[simd_test(enable = "neon")]
|
||||
unsafe fn test_vsubhn_s32() {
|
||||
let a: i32x4 = i32x4::new(0x7F_FF_FF_FF, -2147483648, 1, 1);
|
||||
let b: i32x4 = i32x4::new(1, 0, 0, 0);
|
||||
let e: i16x4 = i16x4::new(0x7F_FF, -32768, 0, 0);
|
||||
let r: i16x4 = transmute(vsubhn_s32(transmute(a), transmute(b)));
|
||||
assert_eq!(r, e);
|
||||
}
|
||||
|
||||
#[simd_test(enable = "neon")]
|
||||
unsafe fn test_vsubhn_s64() {
|
||||
let a: i64x2 = i64x2::new(0x7F_FF_FF_FF_FF_FF_FF_FF, -9223372036854775808);
|
||||
let b: i64x2 = i64x2::new(1, 0);
|
||||
let e: i32x2 = i32x2::new(0x7F_FF_FF_FF, -2147483648);
|
||||
let r: i32x2 = transmute(vsubhn_s64(transmute(a), transmute(b)));
|
||||
assert_eq!(r, e);
|
||||
}
|
||||
|
||||
#[simd_test(enable = "neon")]
|
||||
unsafe fn test_vsubhn_u16() {
|
||||
let a: u16x8 = u16x8::new(0xFF_FF, 0, 1, 1, 0xFF_FF, 0, 1, 1);
|
||||
let b: u16x8 = u16x8::new(1, 0, 0, 0, 1, 0, 0, 0);
|
||||
let e: u8x8 = u8x8::new(0xFF, 0, 0, 0, 0xFF, 0, 0, 0);
|
||||
let r: u8x8 = transmute(vsubhn_u16(transmute(a), transmute(b)));
|
||||
assert_eq!(r, e);
|
||||
}
|
||||
|
||||
#[simd_test(enable = "neon")]
|
||||
unsafe fn test_vsubhn_u32() {
|
||||
let a: u32x4 = u32x4::new(0xFF_FF_FF_FF, 0, 1, 1);
|
||||
let b: u32x4 = u32x4::new(1, 0, 0, 0);
|
||||
let e: u16x4 = u16x4::new(0xFF_FF, 0, 0, 0);
|
||||
let r: u16x4 = transmute(vsubhn_u32(transmute(a), transmute(b)));
|
||||
assert_eq!(r, e);
|
||||
}
|
||||
|
||||
#[simd_test(enable = "neon")]
|
||||
unsafe fn test_vsubhn_u64() {
|
||||
let a: u64x2 = u64x2::new(0xFF_FF_FF_FF_FF_FF_FF_FF, 0);
|
||||
let b: u64x2 = u64x2::new(1, 0);
|
||||
let e: u32x2 = u32x2::new(0xFF_FF_FF_FF, 0);
|
||||
let r: u32x2 = transmute(vsubhn_u64(transmute(a), transmute(b)));
|
||||
assert_eq!(r, e);
|
||||
}
|
||||
|
||||
#[simd_test(enable = "neon")]
|
||||
unsafe fn test_vsubhn_high_s16() {
|
||||
let a: i8x8 = i8x8::new(0x7F, 0, 0x7F, 0, 0x7F, 0, 0x7F, 0);
|
||||
let b: i16x8 = i16x8::new(0x7F_FF, 1, 0x7F_FF, 1, 0x7F_FF, 1, 0x7F_FF, 1);
|
||||
let c: i16x8 = i16x8::new(1, 0, 1, 0, 1, 0, 1, 0);
|
||||
let e: i8x16 = i8x16::new(0x7F, 0, 0x7F, 0, 0x7F, 0, 0x7F, 0, 0x7F, 0, 0x7F, 0, 0x7F, 0, 0x7F, 0);
|
||||
let r: i8x16 = transmute(vsubhn_high_s16(transmute(a), transmute(b), transmute(c)));
|
||||
assert_eq!(r, e);
|
||||
}
|
||||
|
||||
#[simd_test(enable = "neon")]
|
||||
unsafe fn test_vsubhn_high_s32() {
|
||||
let a: i16x4 = i16x4::new(0x7F_FF, 0, 0x7F_FF, 0);
|
||||
let b: i32x4 = i32x4::new(0x7F_FF_FF_FF, 1, 0x7F_FF_FF_FF, 1);
|
||||
let c: i32x4 = i32x4::new(1, 0, 1, 0);
|
||||
let e: i16x8 = i16x8::new(0x7F_FF, 0, 0x7F_FF, 0, 0x7F_FF, 0, 0x7F_FF, 0);
|
||||
let r: i16x8 = transmute(vsubhn_high_s32(transmute(a), transmute(b), transmute(c)));
|
||||
assert_eq!(r, e);
|
||||
}
|
||||
|
||||
#[simd_test(enable = "neon")]
|
||||
unsafe fn test_vsubhn_high_s64() {
|
||||
let a: i32x2 = i32x2::new(0x7F_FF_FF_FF, 0);
|
||||
let b: i64x2 = i64x2::new(0x7F_FF_FF_FF_FF_FF_FF_FF, 1);
|
||||
let c: i64x2 = i64x2::new(1, 0);
|
||||
let e: i32x4 = i32x4::new(0x7F_FF_FF_FF, 0, 0x7F_FF_FF_FF, 0);
|
||||
let r: i32x4 = transmute(vsubhn_high_s64(transmute(a), transmute(b), transmute(c)));
|
||||
assert_eq!(r, e);
|
||||
}
|
||||
|
||||
#[simd_test(enable = "neon")]
|
||||
unsafe fn test_vsubhn_high_u16() {
|
||||
let a: u8x8 = u8x8::new(0xFF, 0, 0xFF, 0, 0xFF, 0, 0xFF, 0);
|
||||
let b: u16x8 = u16x8::new(0xFF_FF, 1, 0xFF_FF, 1, 0xFF_FF, 1, 0xFF_FF, 1);
|
||||
let c: u16x8 = u16x8::new(1, 0, 1, 0, 1, 0, 1, 0);
|
||||
let e: u8x16 = u8x16::new(0xFF, 0, 0xFF, 0, 0xFF, 0, 0xFF, 0, 0xFF, 0, 0xFF, 0, 0xFF, 0, 0xFF, 0);
|
||||
let r: u8x16 = transmute(vsubhn_high_u16(transmute(a), transmute(b), transmute(c)));
|
||||
assert_eq!(r, e);
|
||||
}
|
||||
|
||||
#[simd_test(enable = "neon")]
|
||||
unsafe fn test_vsubhn_high_u32() {
|
||||
let a: u16x4 = u16x4::new(0xFF_FF, 0, 0xFF_FF, 0);
|
||||
let b: u32x4 = u32x4::new(0xFF_FF_FF_FF, 1, 0xFF_FF_FF_FF, 1);
|
||||
let c: u32x4 = u32x4::new(1, 0, 1, 0);
|
||||
let e: u16x8 = u16x8::new(0xFF_FF, 0, 0xFF_FF, 0, 0xFF_FF, 0, 0xFF_FF, 0);
|
||||
let r: u16x8 = transmute(vsubhn_high_u32(transmute(a), transmute(b), transmute(c)));
|
||||
assert_eq!(r, e);
|
||||
}
|
||||
|
||||
#[simd_test(enable = "neon")]
|
||||
unsafe fn test_vsubhn_high_u64() {
|
||||
let a: u32x2 = u32x2::new(0xFF_FF_FF_FF, 0);
|
||||
let b: u64x2 = u64x2::new(0xFF_FF_FF_FF_FF_FF_FF_FF, 1);
|
||||
let c: u64x2 = u64x2::new(1, 0);
|
||||
let e: u32x4 = u32x4::new(0xFF_FF_FF_FF, 0, 0xFF_FF_FF_FF, 0);
|
||||
let r: u32x4 = transmute(vsubhn_high_u64(transmute(a), transmute(b), transmute(c)));
|
||||
assert_eq!(r, e);
|
||||
}
|
||||
|
||||
#[simd_test(enable = "neon")]
|
||||
unsafe fn test_vhsub_u8() {
|
||||
let a: u8x8 = u8x8::new(1, 2, 3, 4, 5, 6, 7, 8);
|
||||
|
|
|
|||
|
|
@ -5391,6 +5391,46 @@ pub unsafe fn vrev32q_u8(a: uint8x16_t) -> uint8x16_t {
|
|||
simd_shuffle16(a, a, [3, 2, 1, 0, 7, 6, 5, 4, 11, 10, 9, 8, 15, 14, 13, 12])
|
||||
}
|
||||
|
||||
/// Reversing vector elements (swap endianness)
|
||||
#[inline]
|
||||
#[target_feature(enable = "neon")]
|
||||
#[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))]
|
||||
#[cfg_attr(all(test, target_arch = "arm"), assert_instr("vrev32.16"))]
|
||||
#[cfg_attr(all(test, target_arch = "aarch64"), assert_instr(rev32))]
|
||||
pub unsafe fn vrev32_s16(a: int16x4_t) -> int16x4_t {
|
||||
simd_shuffle4(a, a, [1, 0, 3, 2])
|
||||
}
|
||||
|
||||
/// Reversing vector elements (swap endianness)
|
||||
#[inline]
|
||||
#[target_feature(enable = "neon")]
|
||||
#[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))]
|
||||
#[cfg_attr(all(test, target_arch = "arm"), assert_instr("vrev32.16"))]
|
||||
#[cfg_attr(all(test, target_arch = "aarch64"), assert_instr(rev32))]
|
||||
pub unsafe fn vrev32q_s16(a: int16x8_t) -> int16x8_t {
|
||||
simd_shuffle8(a, a, [1, 0, 3, 2, 5, 4, 7, 6])
|
||||
}
|
||||
|
||||
/// Reversing vector elements (swap endianness)
|
||||
#[inline]
|
||||
#[target_feature(enable = "neon")]
|
||||
#[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))]
|
||||
#[cfg_attr(all(test, target_arch = "arm"), assert_instr("vrev32.16"))]
|
||||
#[cfg_attr(all(test, target_arch = "aarch64"), assert_instr(rev32))]
|
||||
pub unsafe fn vrev32_p16(a: poly16x4_t) -> poly16x4_t {
|
||||
simd_shuffle4(a, a, [1, 0, 3, 2])
|
||||
}
|
||||
|
||||
/// Reversing vector elements (swap endianness)
|
||||
#[inline]
|
||||
#[target_feature(enable = "neon")]
|
||||
#[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))]
|
||||
#[cfg_attr(all(test, target_arch = "arm"), assert_instr("vrev32.16"))]
|
||||
#[cfg_attr(all(test, target_arch = "aarch64"), assert_instr(rev32))]
|
||||
pub unsafe fn vrev32q_p16(a: poly16x8_t) -> poly16x8_t {
|
||||
simd_shuffle8(a, a, [1, 0, 3, 2, 5, 4, 7, 6])
|
||||
}
|
||||
|
||||
/// Reversing vector elements (swap endianness)
|
||||
#[inline]
|
||||
#[target_feature(enable = "neon")]
|
||||
|
|
@ -10792,6 +10832,34 @@ mod tests {
|
|||
assert_eq!(r, e);
|
||||
}
|
||||
#[simd_test(enable = "neon")]
|
||||
unsafe fn test_vrev32_s16() {
|
||||
let a = i16x4::new(0, 1, 2, 3);
|
||||
let r = i16x4::new(1, 0, 3, 2);
|
||||
let e: i16x4 = transmute(vrev32_s16(transmute(a)));
|
||||
assert_eq!(r, e);
|
||||
}
|
||||
#[simd_test(enable = "neon")]
|
||||
unsafe fn test_vrev32q_s16() {
|
||||
let a = i16x8::new(0, 1, 2, 3, 4, 5, 6, 7);
|
||||
let r = i16x8::new(1, 0, 3, 2, 5, 4, 7, 6);
|
||||
let e: i16x8 = transmute(vrev32q_s16(transmute(a)));
|
||||
assert_eq!(r, e);
|
||||
}
|
||||
#[simd_test(enable = "neon")]
|
||||
unsafe fn test_vrev32_p16() {
|
||||
let a = i16x4::new(0, 1, 2, 3);
|
||||
let r = i16x4::new(1, 0, 3, 2);
|
||||
let e: i16x4 = transmute(vrev32_p16(transmute(a)));
|
||||
assert_eq!(r, e);
|
||||
}
|
||||
#[simd_test(enable = "neon")]
|
||||
unsafe fn test_vrev32q_p16() {
|
||||
let a = i16x8::new(0, 1, 2, 3, 4, 5, 6, 7);
|
||||
let r = i16x8::new(1, 0, 3, 2, 5, 4, 7, 6);
|
||||
let e: i16x8 = transmute(vrev32q_p16(transmute(a)));
|
||||
assert_eq!(r, e);
|
||||
}
|
||||
#[simd_test(enable = "neon")]
|
||||
unsafe fn test_vrev32_u16() {
|
||||
let a = u16x4::new(0, 1, 2, 3);
|
||||
let r = u16x4::new(1, 0, 3, 2);
|
||||
|
|
|
|||
|
|
@ -1050,6 +1050,19 @@ validate 14, 13, 12, 11, 10, 9, 8, 7
|
|||
aarch64 = umlsl2
|
||||
generate uint16x8_t:uint8x16_t:uint8x16_t:uint16x8_t, uint32x4_t:uint16x8_t:uint16x8_t:uint32x4_t, uint64x2_t:uint32x4_t:uint32x4_t:uint64x2_t
|
||||
|
||||
/// Extract narrow
|
||||
name = vmovn_high
|
||||
no-q
|
||||
multi_fn = simd_cast, c:in_t0, b
|
||||
multi_fn = simd_shuffle-out_len-noext, a, c, {asc-out_len}
|
||||
a = 0, 1, 2, 3, 2, 3, 4, 5
|
||||
b = 2, 3, 4, 5, 12, 13, 14, 15
|
||||
validate 0, 1, 2, 3, 2, 3, 4, 5, 2, 3, 4, 5, 12, 13, 14, 15
|
||||
|
||||
aarch64 = xtn2
|
||||
generate int8x8_t:int16x8_t:int8x16_t, int16x4_t:int32x4_t:int16x8_t, int32x2_t:int64x2_t:int32x4_t
|
||||
generate uint8x8_t:uint16x8_t:uint8x16_t, uint16x4_t:uint32x4_t:uint16x8_t, uint32x2_t:uint64x2_t:uint32x4_t
|
||||
|
||||
/// Negate
|
||||
name = vneg
|
||||
fn = simd_neg
|
||||
|
|
@ -1111,20 +1124,38 @@ a = 42, 42, 42, 42, 42, 42, 42, 42, 42, 42, 42, 42, 42, 42, 42, 42
|
|||
b = 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16
|
||||
validate 21, 22, 22, 23, 23, 24, 24, 25, 25, 26, 26, 27, 27, 28, 28, 29
|
||||
|
||||
|
||||
arm = vhadd.s
|
||||
aarch64 = uhadd
|
||||
link-aarch64 = uhadd._EXT_
|
||||
link-arm = vhaddu._EXT_
|
||||
generate uint*_t
|
||||
|
||||
|
||||
arm = vhadd.s
|
||||
aarch64 = shadd
|
||||
link-aarch64 = shadd._EXT_
|
||||
link-arm = vhadds._EXT_
|
||||
generate int*_t
|
||||
|
||||
/// Reverse bit order
|
||||
name = vrbit
|
||||
a = 0, 2, 4, 6, 8, 10, 12, 14, 16, 18, 20, 22, 24, 26, 28, 30
|
||||
validate 0, 64, 32, 96, 16, 80, 48, 112, 8, 72, 40, 104, 24, 88, 56, 120
|
||||
|
||||
aarch64 = rbit
|
||||
link-aarch64 = rbit._EXT_
|
||||
|
||||
generate int8x8_t, int8x16_t
|
||||
|
||||
/// Reverse bit order
|
||||
name = vrbit
|
||||
multi_fn = transmute, {vrbit-signed-noext, transmute(a)}
|
||||
a = 0, 2, 4, 6, 8, 10, 12, 14, 16, 18, 20, 22, 24, 26, 28, 30
|
||||
validate 0, 64, 32, 96, 16, 80, 48, 112, 8, 72, 40, 104, 24, 88, 56, 120
|
||||
|
||||
aarch64 = rbit
|
||||
|
||||
generate uint8x8_t, uint8x16_t, poly8x8_t, poly8x16_t
|
||||
|
||||
/// Rounding halving add
|
||||
name = vrhadd
|
||||
a = 42, 42, 42, 42, 42, 42, 42, 42, 42, 42, 42, 42, 42, 42, 42, 42
|
||||
|
|
@ -1143,6 +1174,69 @@ link-arm = vrhadds._EXT_
|
|||
link-aarch64 = srhadd._EXT_
|
||||
generate int*_t
|
||||
|
||||
/// Floating-point round to integral exact, using current rounding mode
|
||||
name = vrndx
|
||||
a = -1.5, 0.5, 1.5, 2.5
|
||||
validate -2.0, 0.0, 2.0, 2.0
|
||||
|
||||
aarch64 = frintx
|
||||
link-aarch64 = llvm.rint._EXT_
|
||||
generate float*_t, float64x*_t
|
||||
|
||||
/// Floating-point round to integral, to nearest with ties to away
|
||||
name = vrnda
|
||||
a = -1.5, 0.5, 1.5, 2.5
|
||||
validate -2.0, 1.0, 2.0, 3.0
|
||||
|
||||
aarch64 = frinta
|
||||
link-aarch64 = llvm.round._EXT_
|
||||
generate float*_t, float64x*_t
|
||||
|
||||
/// Floating-point round to integral, to nearest with ties to even
|
||||
name = vrndn
|
||||
a = -1.5, 0.5, 1.5, 2.5
|
||||
validate -2.0, 0.0, 2.0, 2.0
|
||||
|
||||
link-aarch64 = frintn._EXT_
|
||||
aarch64 = frintn
|
||||
generate float*_t, float64x*_t
|
||||
|
||||
/// Floating-point round to integral, toward minus infinity
|
||||
name = vrndm
|
||||
a = -1.5, 0.5, 1.5, 2.5
|
||||
validate -2.0, 0.0, 1.0, 2.0
|
||||
|
||||
aarch64 = frintm
|
||||
link-aarch64 = llvm.floor._EXT_
|
||||
generate float*_t, float64x*_t
|
||||
|
||||
/// Floating-point round to integral, toward plus infinity
|
||||
name = vrndp
|
||||
a = -1.5, 0.5, 1.5, 2.5
|
||||
validate -1.0, 1.0, 2.0, 3.0
|
||||
|
||||
aarch64 = frintp
|
||||
link-aarch64 = llvm.ceil._EXT_
|
||||
generate float*_t, float64x*_t
|
||||
|
||||
/// Floating-point round to integral, toward zero
|
||||
name = vrnd
|
||||
a = -1.5, 0.5, 1.5, 2.5
|
||||
validate -1.0, 0.0, 1.0, 2.0
|
||||
|
||||
aarch64 = frintz
|
||||
link-aarch64 = llvm.trunc._EXT_
|
||||
generate float*_t, float64x*_t
|
||||
|
||||
/// Floating-point round to integral, using current rounding mode
|
||||
name = vrndi
|
||||
a = -1.5, 0.5, 1.5, 2.5
|
||||
validate -2.0, 0.0, 2.0, 2.0
|
||||
|
||||
aarch64 = frinti
|
||||
link-aarch64 = llvm.nearbyint._EXT_
|
||||
generate float*_t, float64x*_t
|
||||
|
||||
/// Saturating add
|
||||
name = vqadd
|
||||
a = 42, 42, 42, 42, 42, 42, 42, 42, 42, 42, 42, 42, 42, 42, 42, 42
|
||||
|
|
@ -1295,6 +1389,35 @@ generate float64x*_t
|
|||
arm = vsub.
|
||||
generate float*_t
|
||||
|
||||
/// Subtract returning high narrow
|
||||
name = vsubhn
|
||||
no-q
|
||||
multi_fn = fixed, c:in_t
|
||||
multi_fn = simd_cast, {simd_shr, {simd_sub}, transmute(c)}
|
||||
a = MAX, MIN, 1, 1, MAX, MIN, 1, 1
|
||||
b = 1, 0, 0, 0, 1, 0, 0, 0
|
||||
fixed = HFBITS, HFBITS, HFBITS, HFBITS, HFBITS, HFBITS, HFBITS, HFBITS, HFBITS, HFBITS, HFBITS, HFBITS, HFBITS, HFBITS, HFBITS, HFBITS
|
||||
validate MAX, MIN, 0, 0, MAX, MIN, 0, 0
|
||||
|
||||
arm = vsubhn
|
||||
aarch64 = subhn
|
||||
generate int16x8_t:int8x8_t, int32x4_t:int16x4_t, int64x2_t:int32x2_t
|
||||
generate uint16x8_t:uint8x8_t, uint32x4_t:uint16x4_t, uint64x2_t:uint32x2_t
|
||||
|
||||
/// Subtract returning high narrow
|
||||
name = vsubhn_high
|
||||
no-q
|
||||
multi_fn = vsubhn-noqself-noext, d:in_t0, b, c
|
||||
multi_fn = simd_shuffle-out_len-noext, a, d, {asc-out_len}
|
||||
a = MAX, 0, MAX, 0, MAX, 0, MAX, 0
|
||||
b = MAX, 1, MAX, 1, MAX, 1, MAX, 1
|
||||
c = 1, 0, 1, 0, 1, 0, 1, 0
|
||||
validate MAX, 0, MAX, 0, MAX, 0, MAX, 0, MAX, 0, MAX, 0, MAX, 0, MAX, 0
|
||||
|
||||
arm = vsubhn
|
||||
aarch64 = subhn2
|
||||
generate int8x8_t:int16x8_t:int16x8_t:int8x16_t, int16x4_t:int32x4_t:int32x4_t:int16x8_t, int32x2_t:int64x2_t:int64x2_t:int32x4_t
|
||||
generate uint8x8_t:uint16x8_t:uint16x8_t:uint8x16_t, uint16x4_t:uint32x4_t:uint32x4_t:uint16x8_t, uint32x2_t:uint64x2_t:uint64x2_t:uint32x4_t
|
||||
|
||||
/// Signed halving subtract
|
||||
name = vhsub
|
||||
|
|
|
|||
|
|
@ -119,14 +119,14 @@ fn type_to_suffix(t: &str) -> &str {
|
|||
|
||||
fn type_to_signed_suffix(t: &str) -> &str {
|
||||
match t {
|
||||
"int8x8_t" | "uint8x8_t" => "_s8",
|
||||
"int8x16_t" | "uint8x16_t" => "q_s8",
|
||||
"int16x4_t" | "uint16x4_t" => "_s16",
|
||||
"int16x8_t" | "uint16x8_t" => "q_s16",
|
||||
"int8x8_t" | "uint8x8_t" | "poly8x8_t" => "_s8",
|
||||
"int8x16_t" | "uint8x16_t" | "poly8x16_t" => "q_s8",
|
||||
"int16x4_t" | "uint16x4_t" | "poly16x4_t" => "_s16",
|
||||
"int16x8_t" | "uint16x8_t" | "poly16x8_t" => "q_s16",
|
||||
"int32x2_t" | "uint32x2_t" => "_s32",
|
||||
"int32x4_t" | "uint32x4_t" => "q_s32",
|
||||
"int64x1_t" | "uint64x1_t" => "_s64",
|
||||
"int64x2_t" | "uint64x2_t" => "q_s64",
|
||||
"int64x1_t" | "uint64x1_t" | "poly64x1_t" => "_s64",
|
||||
"int64x2_t" | "uint64x2_t" | "poly64x2_t" => "q_s64",
|
||||
/*
|
||||
"float16x4_t" => "_f16",
|
||||
"float16x8_t" => "q_f16",
|
||||
|
|
@ -328,6 +328,16 @@ fn type_to_half(t: &str) -> &str {
|
|||
}
|
||||
}
|
||||
|
||||
fn asc(x: usize) -> &'static str {
|
||||
match x {
|
||||
2 => "[0, 1]",
|
||||
4 => "[0, 1, 2, 3]",
|
||||
8 => "[0, 1, 2, 3, 4, 5, 6, 7]",
|
||||
16 => "[0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15]",
|
||||
_ => panic!("unknown transpose order of len {}", x),
|
||||
}
|
||||
}
|
||||
|
||||
fn transpose1(x: usize) -> &'static str {
|
||||
match x {
|
||||
2 => "[0, 2]",
|
||||
|
|
@ -481,6 +491,23 @@ fn bits_minus_one(t: &str) -> &'static str {
|
|||
}
|
||||
}
|
||||
|
||||
fn half_bits(t: &str) -> &'static str {
|
||||
match &t[..3] {
|
||||
"u8x" => "4",
|
||||
"u16" => "8",
|
||||
"u32" => "16",
|
||||
"u64" => "32",
|
||||
"i8x" => "4",
|
||||
"i16" => "8",
|
||||
"i32" => "16",
|
||||
"i64" => "32",
|
||||
"p8x" => "4",
|
||||
"p16" => "8",
|
||||
"p64" => "32",
|
||||
_ => panic!("Unknown bits for type {}", t),
|
||||
}
|
||||
}
|
||||
|
||||
fn map_val<'v>(t: &str, v: &'v str) -> &'v str {
|
||||
match v {
|
||||
"FALSE" => false_val(t),
|
||||
|
|
@ -490,6 +517,7 @@ fn map_val<'v>(t: &str, v: &'v str) -> &'v str {
|
|||
"FF" => ff_val(t),
|
||||
"BITS" => bits(t),
|
||||
"BITS_M1" => bits_minus_one(t),
|
||||
"HFBITS" => half_bits(t),
|
||||
o => o,
|
||||
}
|
||||
}
|
||||
|
|
@ -554,14 +582,21 @@ fn gen_aarch64(
|
|||
let ext_c = if let Some(link_aarch64) = link_aarch64.clone() {
|
||||
let ext = type_to_ext(in_t[0]);
|
||||
let ext2 = type_to_ext(out_t);
|
||||
let link_aarch64 = if link_aarch64.starts_with("llvm") {
|
||||
link_aarch64.replace("_EXT_", ext).replace("_EXT2_", ext2)
|
||||
} else {
|
||||
let mut link = String::from("llvm.aarch64.neon.");
|
||||
link.push_str(&link_aarch64);
|
||||
link.replace("_EXT_", ext).replace("_EXT2_", ext2)
|
||||
};
|
||||
format!(
|
||||
r#"#[allow(improper_ctypes)]
|
||||
extern "C" {{
|
||||
#[cfg_attr(target_arch = "aarch64", link_name = "llvm.aarch64.neon.{}")]
|
||||
#[cfg_attr(target_arch = "aarch64", link_name = "{}")]
|
||||
fn {}({}) -> {};
|
||||
}}
|
||||
"#,
|
||||
link_aarch64.replace("_EXT_", ext).replace("_EXT2_", ext2),
|
||||
link_aarch64,
|
||||
current_fn,
|
||||
match para_num {
|
||||
1 => {
|
||||
|
|
@ -817,16 +852,30 @@ fn gen_arm(
|
|||
if let (Some(link_arm), Some(link_aarch64)) = (link_arm.clone(), link_aarch64.clone()) {
|
||||
let ext = type_to_ext(in_t[0]);
|
||||
let ext2 = type_to_ext(out_t);
|
||||
let link_arm = if link_arm.starts_with("llvm") {
|
||||
link_arm.replace("_EXT_", ext).replace("_EXT2_", ext2)
|
||||
} else {
|
||||
let mut link = String::from("llvm.arm.neon.");
|
||||
link.push_str(&link_arm);
|
||||
link.replace("_EXT_", ext).replace("_EXT2_", ext2)
|
||||
};
|
||||
let link_aarch64 = if link_aarch64.starts_with("llvm") {
|
||||
link_aarch64.replace("_EXT_", ext).replace("_EXT2_", ext2)
|
||||
} else {
|
||||
let mut link = String::from("llvm.aarch64.neon.");
|
||||
link.push_str(&link_aarch64);
|
||||
link.replace("_EXT_", ext).replace("_EXT2_", ext2)
|
||||
};
|
||||
format!(
|
||||
r#"#[allow(improper_ctypes)]
|
||||
extern "C" {{
|
||||
#[cfg_attr(target_arch = "arm", link_name = "llvm.arm.neon.{}")]
|
||||
#[cfg_attr(target_arch = "aarch64", link_name = "llvm.aarch64.neon.{}")]
|
||||
#[cfg_attr(target_arch = "arm", link_name = "{}")]
|
||||
#[cfg_attr(target_arch = "aarch64", link_name = "{}")]
|
||||
fn {}({}) -> {};
|
||||
}}
|
||||
"#,
|
||||
link_arm.replace("_EXT_", ext).replace("_EXT2_", ext2),
|
||||
link_aarch64.replace("_EXT_", ext).replace("_EXT2_", ext2),
|
||||
link_arm,
|
||||
link_aarch64,
|
||||
current_fn,
|
||||
match para_num {
|
||||
1 => {
|
||||
|
|
@ -1066,6 +1115,10 @@ fn get_call(
|
|||
re = Some((re_params[0].clone(), in_t[1].to_string()));
|
||||
} else if re_params[1] == "in_t" {
|
||||
re = Some((re_params[0].clone(), in_t[1].to_string()));
|
||||
} else if re_params[1] == "in_t0" {
|
||||
re = Some((re_params[0].clone(), in_t[0].to_string()));
|
||||
} else if re_params[1] == "in_t1" {
|
||||
re = Some((re_params[0].clone(), in_t[1].to_string()));
|
||||
} else if re_params[1] == "out_t" {
|
||||
re = Some((re_params[0].clone(), out_t.to_string()));
|
||||
} else if re_params[1] == "half" {
|
||||
|
|
@ -1097,6 +1150,9 @@ fn get_call(
|
|||
});
|
||||
return format!(r#"[{}]"#, &half[..half.len() - 2]);
|
||||
}
|
||||
if fn_name == "asc-out_len" {
|
||||
return asc(type_len(out_t)).to_string();
|
||||
}
|
||||
if fn_name == "transpose-1-in_len" {
|
||||
return transpose1(type_len(in_t[1])).to_string();
|
||||
}
|
||||
|
|
|
|||
Loading…
Add table
Add a link
Reference in a new issue