add vmovn_high, vrbit, vrnd, vsubhn neon instructions (#1103)

This commit is contained in:
Sparrow Li 2021-03-31 22:48:58 +08:00 committed by GitHub
parent ef9ec33482
commit 7b21d85a41
No known key found for this signature in database
GPG key ID: 4AEE18F83AFDEB23
5 changed files with 1309 additions and 14 deletions

View file

@ -1816,6 +1816,60 @@ pub unsafe fn vmlsl_high_u32(a: uint64x2_t, b: uint32x4_t, c: uint32x4_t) -> uin
vmlsl_u32(a, b, c)
}
/// Extract narrow
#[inline]
#[target_feature(enable = "neon")]
#[cfg_attr(test, assert_instr(xtn2))]
pub unsafe fn vmovn_high_s16(a: int8x8_t, b: int16x8_t) -> int8x16_t {
let c: int8x8_t = simd_cast(b);
simd_shuffle16(a, c, [0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15])
}
/// Extract narrow
#[inline]
#[target_feature(enable = "neon")]
#[cfg_attr(test, assert_instr(xtn2))]
pub unsafe fn vmovn_high_s32(a: int16x4_t, b: int32x4_t) -> int16x8_t {
let c: int16x4_t = simd_cast(b);
simd_shuffle8(a, c, [0, 1, 2, 3, 4, 5, 6, 7])
}
/// Extract narrow
#[inline]
#[target_feature(enable = "neon")]
#[cfg_attr(test, assert_instr(xtn2))]
pub unsafe fn vmovn_high_s64(a: int32x2_t, b: int64x2_t) -> int32x4_t {
let c: int32x2_t = simd_cast(b);
simd_shuffle4(a, c, [0, 1, 2, 3])
}
/// Extract narrow
#[inline]
#[target_feature(enable = "neon")]
#[cfg_attr(test, assert_instr(xtn2))]
pub unsafe fn vmovn_high_u16(a: uint8x8_t, b: uint16x8_t) -> uint8x16_t {
let c: uint8x8_t = simd_cast(b);
simd_shuffle16(a, c, [0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15])
}
/// Extract narrow
#[inline]
#[target_feature(enable = "neon")]
#[cfg_attr(test, assert_instr(xtn2))]
pub unsafe fn vmovn_high_u32(a: uint16x4_t, b: uint32x4_t) -> uint16x8_t {
let c: uint16x4_t = simd_cast(b);
simd_shuffle8(a, c, [0, 1, 2, 3, 4, 5, 6, 7])
}
/// Extract narrow
#[inline]
#[target_feature(enable = "neon")]
#[cfg_attr(test, assert_instr(xtn2))]
pub unsafe fn vmovn_high_u64(a: uint32x2_t, b: uint64x2_t) -> uint32x4_t {
let c: uint32x2_t = simd_cast(b);
simd_shuffle4(a, c, [0, 1, 2, 3])
}
/// Negate
#[inline]
#[target_feature(enable = "neon")]
@ -1874,6 +1928,428 @@ pub unsafe fn vqnegq_s64(a: int64x2_t) -> int64x2_t {
vqnegq_s64_(a)
}
/// Reverse bit order
#[inline]
#[target_feature(enable = "neon")]
#[cfg_attr(test, assert_instr(rbit))]
pub unsafe fn vrbit_s8(a: int8x8_t) -> int8x8_t {
#[allow(improper_ctypes)]
extern "C" {
#[cfg_attr(target_arch = "aarch64", link_name = "llvm.aarch64.neon.rbit.v8i8")]
fn vrbit_s8_(a: int8x8_t) -> int8x8_t;
}
vrbit_s8_(a)
}
/// Reverse bit order
#[inline]
#[target_feature(enable = "neon")]
#[cfg_attr(test, assert_instr(rbit))]
pub unsafe fn vrbitq_s8(a: int8x16_t) -> int8x16_t {
#[allow(improper_ctypes)]
extern "C" {
#[cfg_attr(target_arch = "aarch64", link_name = "llvm.aarch64.neon.rbit.v16i8")]
fn vrbitq_s8_(a: int8x16_t) -> int8x16_t;
}
vrbitq_s8_(a)
}
/// Reverse bit order
#[inline]
#[target_feature(enable = "neon")]
#[cfg_attr(test, assert_instr(rbit))]
pub unsafe fn vrbit_u8(a: uint8x8_t) -> uint8x8_t {
transmute(vrbit_s8(transmute(a)))
}
/// Reverse bit order
#[inline]
#[target_feature(enable = "neon")]
#[cfg_attr(test, assert_instr(rbit))]
pub unsafe fn vrbitq_u8(a: uint8x16_t) -> uint8x16_t {
transmute(vrbitq_s8(transmute(a)))
}
/// Reverse bit order
#[inline]
#[target_feature(enable = "neon")]
#[cfg_attr(test, assert_instr(rbit))]
pub unsafe fn vrbit_p8(a: poly8x8_t) -> poly8x8_t {
transmute(vrbit_s8(transmute(a)))
}
/// Reverse bit order
#[inline]
#[target_feature(enable = "neon")]
#[cfg_attr(test, assert_instr(rbit))]
pub unsafe fn vrbitq_p8(a: poly8x16_t) -> poly8x16_t {
transmute(vrbitq_s8(transmute(a)))
}
/// Floating-point round to integral exact, using current rounding mode
#[inline]
#[target_feature(enable = "neon")]
#[cfg_attr(test, assert_instr(frintx))]
pub unsafe fn vrndx_f32(a: float32x2_t) -> float32x2_t {
#[allow(improper_ctypes)]
extern "C" {
#[cfg_attr(target_arch = "aarch64", link_name = "llvm.rint.v2f32")]
fn vrndx_f32_(a: float32x2_t) -> float32x2_t;
}
vrndx_f32_(a)
}
/// Floating-point round to integral exact, using current rounding mode
#[inline]
#[target_feature(enable = "neon")]
#[cfg_attr(test, assert_instr(frintx))]
pub unsafe fn vrndxq_f32(a: float32x4_t) -> float32x4_t {
#[allow(improper_ctypes)]
extern "C" {
#[cfg_attr(target_arch = "aarch64", link_name = "llvm.rint.v4f32")]
fn vrndxq_f32_(a: float32x4_t) -> float32x4_t;
}
vrndxq_f32_(a)
}
/// Floating-point round to integral exact, using current rounding mode
#[inline]
#[target_feature(enable = "neon")]
#[cfg_attr(test, assert_instr(frintx))]
pub unsafe fn vrndx_f64(a: float64x1_t) -> float64x1_t {
#[allow(improper_ctypes)]
extern "C" {
#[cfg_attr(target_arch = "aarch64", link_name = "llvm.rint.v1f64")]
fn vrndx_f64_(a: float64x1_t) -> float64x1_t;
}
vrndx_f64_(a)
}
/// Floating-point round to integral exact, using current rounding mode
#[inline]
#[target_feature(enable = "neon")]
#[cfg_attr(test, assert_instr(frintx))]
pub unsafe fn vrndxq_f64(a: float64x2_t) -> float64x2_t {
#[allow(improper_ctypes)]
extern "C" {
#[cfg_attr(target_arch = "aarch64", link_name = "llvm.rint.v2f64")]
fn vrndxq_f64_(a: float64x2_t) -> float64x2_t;
}
vrndxq_f64_(a)
}
/// Floating-point round to integral, to nearest with ties to away
#[inline]
#[target_feature(enable = "neon")]
#[cfg_attr(test, assert_instr(frinta))]
pub unsafe fn vrnda_f32(a: float32x2_t) -> float32x2_t {
#[allow(improper_ctypes)]
extern "C" {
#[cfg_attr(target_arch = "aarch64", link_name = "llvm.round.v2f32")]
fn vrnda_f32_(a: float32x2_t) -> float32x2_t;
}
vrnda_f32_(a)
}
/// Floating-point round to integral, to nearest with ties to away
#[inline]
#[target_feature(enable = "neon")]
#[cfg_attr(test, assert_instr(frinta))]
pub unsafe fn vrndaq_f32(a: float32x4_t) -> float32x4_t {
#[allow(improper_ctypes)]
extern "C" {
#[cfg_attr(target_arch = "aarch64", link_name = "llvm.round.v4f32")]
fn vrndaq_f32_(a: float32x4_t) -> float32x4_t;
}
vrndaq_f32_(a)
}
/// Floating-point round to integral, to nearest with ties to away
#[inline]
#[target_feature(enable = "neon")]
#[cfg_attr(test, assert_instr(frinta))]
pub unsafe fn vrnda_f64(a: float64x1_t) -> float64x1_t {
#[allow(improper_ctypes)]
extern "C" {
#[cfg_attr(target_arch = "aarch64", link_name = "llvm.round.v1f64")]
fn vrnda_f64_(a: float64x1_t) -> float64x1_t;
}
vrnda_f64_(a)
}
/// Floating-point round to integral, to nearest with ties to away
#[inline]
#[target_feature(enable = "neon")]
#[cfg_attr(test, assert_instr(frinta))]
pub unsafe fn vrndaq_f64(a: float64x2_t) -> float64x2_t {
#[allow(improper_ctypes)]
extern "C" {
#[cfg_attr(target_arch = "aarch64", link_name = "llvm.round.v2f64")]
fn vrndaq_f64_(a: float64x2_t) -> float64x2_t;
}
vrndaq_f64_(a)
}
/// Floating-point round to integral, to nearest with ties to even
#[inline]
#[target_feature(enable = "neon")]
#[cfg_attr(test, assert_instr(frintn))]
pub unsafe fn vrndn_f32(a: float32x2_t) -> float32x2_t {
#[allow(improper_ctypes)]
extern "C" {
#[cfg_attr(target_arch = "aarch64", link_name = "llvm.aarch64.neon.frintn.v2f32")]
fn vrndn_f32_(a: float32x2_t) -> float32x2_t;
}
vrndn_f32_(a)
}
/// Floating-point round to integral, to nearest with ties to even
#[inline]
#[target_feature(enable = "neon")]
#[cfg_attr(test, assert_instr(frintn))]
pub unsafe fn vrndnq_f32(a: float32x4_t) -> float32x4_t {
#[allow(improper_ctypes)]
extern "C" {
#[cfg_attr(target_arch = "aarch64", link_name = "llvm.aarch64.neon.frintn.v4f32")]
fn vrndnq_f32_(a: float32x4_t) -> float32x4_t;
}
vrndnq_f32_(a)
}
/// Floating-point round to integral, to nearest with ties to even
#[inline]
#[target_feature(enable = "neon")]
#[cfg_attr(test, assert_instr(frintn))]
pub unsafe fn vrndn_f64(a: float64x1_t) -> float64x1_t {
#[allow(improper_ctypes)]
extern "C" {
#[cfg_attr(target_arch = "aarch64", link_name = "llvm.aarch64.neon.frintn.v1f64")]
fn vrndn_f64_(a: float64x1_t) -> float64x1_t;
}
vrndn_f64_(a)
}
/// Floating-point round to integral, to nearest with ties to even
#[inline]
#[target_feature(enable = "neon")]
#[cfg_attr(test, assert_instr(frintn))]
pub unsafe fn vrndnq_f64(a: float64x2_t) -> float64x2_t {
#[allow(improper_ctypes)]
extern "C" {
#[cfg_attr(target_arch = "aarch64", link_name = "llvm.aarch64.neon.frintn.v2f64")]
fn vrndnq_f64_(a: float64x2_t) -> float64x2_t;
}
vrndnq_f64_(a)
}
/// Floating-point round to integral, toward minus infinity
#[inline]
#[target_feature(enable = "neon")]
#[cfg_attr(test, assert_instr(frintm))]
pub unsafe fn vrndm_f32(a: float32x2_t) -> float32x2_t {
#[allow(improper_ctypes)]
extern "C" {
#[cfg_attr(target_arch = "aarch64", link_name = "llvm.floor.v2f32")]
fn vrndm_f32_(a: float32x2_t) -> float32x2_t;
}
vrndm_f32_(a)
}
/// Floating-point round to integral, toward minus infinity
#[inline]
#[target_feature(enable = "neon")]
#[cfg_attr(test, assert_instr(frintm))]
pub unsafe fn vrndmq_f32(a: float32x4_t) -> float32x4_t {
#[allow(improper_ctypes)]
extern "C" {
#[cfg_attr(target_arch = "aarch64", link_name = "llvm.floor.v4f32")]
fn vrndmq_f32_(a: float32x4_t) -> float32x4_t;
}
vrndmq_f32_(a)
}
/// Floating-point round to integral, toward minus infinity
#[inline]
#[target_feature(enable = "neon")]
#[cfg_attr(test, assert_instr(frintm))]
pub unsafe fn vrndm_f64(a: float64x1_t) -> float64x1_t {
#[allow(improper_ctypes)]
extern "C" {
#[cfg_attr(target_arch = "aarch64", link_name = "llvm.floor.v1f64")]
fn vrndm_f64_(a: float64x1_t) -> float64x1_t;
}
vrndm_f64_(a)
}
/// Floating-point round to integral, toward minus infinity
#[inline]
#[target_feature(enable = "neon")]
#[cfg_attr(test, assert_instr(frintm))]
pub unsafe fn vrndmq_f64(a: float64x2_t) -> float64x2_t {
#[allow(improper_ctypes)]
extern "C" {
#[cfg_attr(target_arch = "aarch64", link_name = "llvm.floor.v2f64")]
fn vrndmq_f64_(a: float64x2_t) -> float64x2_t;
}
vrndmq_f64_(a)
}
/// Floating-point round to integral, toward plus infinity
#[inline]
#[target_feature(enable = "neon")]
#[cfg_attr(test, assert_instr(frintp))]
pub unsafe fn vrndp_f32(a: float32x2_t) -> float32x2_t {
#[allow(improper_ctypes)]
extern "C" {
#[cfg_attr(target_arch = "aarch64", link_name = "llvm.ceil.v2f32")]
fn vrndp_f32_(a: float32x2_t) -> float32x2_t;
}
vrndp_f32_(a)
}
/// Floating-point round to integral, toward plus infinity
#[inline]
#[target_feature(enable = "neon")]
#[cfg_attr(test, assert_instr(frintp))]
pub unsafe fn vrndpq_f32(a: float32x4_t) -> float32x4_t {
#[allow(improper_ctypes)]
extern "C" {
#[cfg_attr(target_arch = "aarch64", link_name = "llvm.ceil.v4f32")]
fn vrndpq_f32_(a: float32x4_t) -> float32x4_t;
}
vrndpq_f32_(a)
}
/// Floating-point round to integral, toward plus infinity
#[inline]
#[target_feature(enable = "neon")]
#[cfg_attr(test, assert_instr(frintp))]
pub unsafe fn vrndp_f64(a: float64x1_t) -> float64x1_t {
#[allow(improper_ctypes)]
extern "C" {
#[cfg_attr(target_arch = "aarch64", link_name = "llvm.ceil.v1f64")]
fn vrndp_f64_(a: float64x1_t) -> float64x1_t;
}
vrndp_f64_(a)
}
/// Floating-point round to integral, toward plus infinity
#[inline]
#[target_feature(enable = "neon")]
#[cfg_attr(test, assert_instr(frintp))]
pub unsafe fn vrndpq_f64(a: float64x2_t) -> float64x2_t {
#[allow(improper_ctypes)]
extern "C" {
#[cfg_attr(target_arch = "aarch64", link_name = "llvm.ceil.v2f64")]
fn vrndpq_f64_(a: float64x2_t) -> float64x2_t;
}
vrndpq_f64_(a)
}
/// Floating-point round to integral, toward zero
#[inline]
#[target_feature(enable = "neon")]
#[cfg_attr(test, assert_instr(frintz))]
pub unsafe fn vrnd_f32(a: float32x2_t) -> float32x2_t {
#[allow(improper_ctypes)]
extern "C" {
#[cfg_attr(target_arch = "aarch64", link_name = "llvm.trunc.v2f32")]
fn vrnd_f32_(a: float32x2_t) -> float32x2_t;
}
vrnd_f32_(a)
}
/// Floating-point round to integral, toward zero
#[inline]
#[target_feature(enable = "neon")]
#[cfg_attr(test, assert_instr(frintz))]
pub unsafe fn vrndq_f32(a: float32x4_t) -> float32x4_t {
#[allow(improper_ctypes)]
extern "C" {
#[cfg_attr(target_arch = "aarch64", link_name = "llvm.trunc.v4f32")]
fn vrndq_f32_(a: float32x4_t) -> float32x4_t;
}
vrndq_f32_(a)
}
/// Floating-point round to integral, toward zero
#[inline]
#[target_feature(enable = "neon")]
#[cfg_attr(test, assert_instr(frintz))]
pub unsafe fn vrnd_f64(a: float64x1_t) -> float64x1_t {
#[allow(improper_ctypes)]
extern "C" {
#[cfg_attr(target_arch = "aarch64", link_name = "llvm.trunc.v1f64")]
fn vrnd_f64_(a: float64x1_t) -> float64x1_t;
}
vrnd_f64_(a)
}
/// Floating-point round to integral, toward zero
#[inline]
#[target_feature(enable = "neon")]
#[cfg_attr(test, assert_instr(frintz))]
pub unsafe fn vrndq_f64(a: float64x2_t) -> float64x2_t {
#[allow(improper_ctypes)]
extern "C" {
#[cfg_attr(target_arch = "aarch64", link_name = "llvm.trunc.v2f64")]
fn vrndq_f64_(a: float64x2_t) -> float64x2_t;
}
vrndq_f64_(a)
}
/// Floating-point round to integral, using current rounding mode
#[inline]
#[target_feature(enable = "neon")]
#[cfg_attr(test, assert_instr(frinti))]
pub unsafe fn vrndi_f32(a: float32x2_t) -> float32x2_t {
#[allow(improper_ctypes)]
extern "C" {
#[cfg_attr(target_arch = "aarch64", link_name = "llvm.nearbyint.v2f32")]
fn vrndi_f32_(a: float32x2_t) -> float32x2_t;
}
vrndi_f32_(a)
}
/// Floating-point round to integral, using current rounding mode
#[inline]
#[target_feature(enable = "neon")]
#[cfg_attr(test, assert_instr(frinti))]
pub unsafe fn vrndiq_f32(a: float32x4_t) -> float32x4_t {
#[allow(improper_ctypes)]
extern "C" {
#[cfg_attr(target_arch = "aarch64", link_name = "llvm.nearbyint.v4f32")]
fn vrndiq_f32_(a: float32x4_t) -> float32x4_t;
}
vrndiq_f32_(a)
}
/// Floating-point round to integral, using current rounding mode
#[inline]
#[target_feature(enable = "neon")]
#[cfg_attr(test, assert_instr(frinti))]
pub unsafe fn vrndi_f64(a: float64x1_t) -> float64x1_t {
#[allow(improper_ctypes)]
extern "C" {
#[cfg_attr(target_arch = "aarch64", link_name = "llvm.nearbyint.v1f64")]
fn vrndi_f64_(a: float64x1_t) -> float64x1_t;
}
vrndi_f64_(a)
}
/// Floating-point round to integral, using current rounding mode
#[inline]
#[target_feature(enable = "neon")]
#[cfg_attr(test, assert_instr(frinti))]
pub unsafe fn vrndiq_f64(a: float64x2_t) -> float64x2_t {
#[allow(improper_ctypes)]
extern "C" {
#[cfg_attr(target_arch = "aarch64", link_name = "llvm.nearbyint.v2f64")]
fn vrndiq_f64_(a: float64x2_t) -> float64x2_t;
}
vrndiq_f64_(a)
}
/// Multiply
#[inline]
#[target_feature(enable = "neon")]
@ -5298,6 +5774,60 @@ mod test {
assert_eq!(r, e);
}
#[simd_test(enable = "neon")]
unsafe fn test_vmovn_high_s16() {
let a: i8x8 = i8x8::new(0, 1, 2, 3, 2, 3, 4, 5);
let b: i16x8 = i16x8::new(2, 3, 4, 5, 12, 13, 14, 15);
let e: i8x16 = i8x16::new(0, 1, 2, 3, 2, 3, 4, 5, 2, 3, 4, 5, 12, 13, 14, 15);
let r: i8x16 = transmute(vmovn_high_s16(transmute(a), transmute(b)));
assert_eq!(r, e);
}
#[simd_test(enable = "neon")]
unsafe fn test_vmovn_high_s32() {
let a: i16x4 = i16x4::new(0, 1, 2, 3);
let b: i32x4 = i32x4::new(2, 3, 4, 5);
let e: i16x8 = i16x8::new(0, 1, 2, 3, 2, 3, 4, 5);
let r: i16x8 = transmute(vmovn_high_s32(transmute(a), transmute(b)));
assert_eq!(r, e);
}
#[simd_test(enable = "neon")]
unsafe fn test_vmovn_high_s64() {
let a: i32x2 = i32x2::new(0, 1);
let b: i64x2 = i64x2::new(2, 3);
let e: i32x4 = i32x4::new(0, 1, 2, 3);
let r: i32x4 = transmute(vmovn_high_s64(transmute(a), transmute(b)));
assert_eq!(r, e);
}
#[simd_test(enable = "neon")]
unsafe fn test_vmovn_high_u16() {
let a: u8x8 = u8x8::new(0, 1, 2, 3, 2, 3, 4, 5);
let b: u16x8 = u16x8::new(2, 3, 4, 5, 12, 13, 14, 15);
let e: u8x16 = u8x16::new(0, 1, 2, 3, 2, 3, 4, 5, 2, 3, 4, 5, 12, 13, 14, 15);
let r: u8x16 = transmute(vmovn_high_u16(transmute(a), transmute(b)));
assert_eq!(r, e);
}
#[simd_test(enable = "neon")]
unsafe fn test_vmovn_high_u32() {
let a: u16x4 = u16x4::new(0, 1, 2, 3);
let b: u32x4 = u32x4::new(2, 3, 4, 5);
let e: u16x8 = u16x8::new(0, 1, 2, 3, 2, 3, 4, 5);
let r: u16x8 = transmute(vmovn_high_u32(transmute(a), transmute(b)));
assert_eq!(r, e);
}
#[simd_test(enable = "neon")]
unsafe fn test_vmovn_high_u64() {
let a: u32x2 = u32x2::new(0, 1);
let b: u64x2 = u64x2::new(2, 3);
let e: u32x4 = u32x4::new(0, 1, 2, 3);
let r: u32x4 = transmute(vmovn_high_u64(transmute(a), transmute(b)));
assert_eq!(r, e);
}
#[simd_test(enable = "neon")]
unsafe fn test_vneg_s64() {
let a: i64x1 = i64x1::new(0);
@ -5346,6 +5876,278 @@ mod test {
assert_eq!(r, e);
}
#[simd_test(enable = "neon")]
unsafe fn test_vrbit_s8() {
let a: i8x8 = i8x8::new(0, 2, 4, 6, 8, 10, 12, 14);
let e: i8x8 = i8x8::new(0, 64, 32, 96, 16, 80, 48, 112);
let r: i8x8 = transmute(vrbit_s8(transmute(a)));
assert_eq!(r, e);
}
#[simd_test(enable = "neon")]
unsafe fn test_vrbitq_s8() {
let a: i8x16 = i8x16::new(0, 2, 4, 6, 8, 10, 12, 14, 16, 18, 20, 22, 24, 26, 28, 30);
let e: i8x16 = i8x16::new(0, 64, 32, 96, 16, 80, 48, 112, 8, 72, 40, 104, 24, 88, 56, 120);
let r: i8x16 = transmute(vrbitq_s8(transmute(a)));
assert_eq!(r, e);
}
#[simd_test(enable = "neon")]
unsafe fn test_vrbit_u8() {
let a: u8x8 = u8x8::new(0, 2, 4, 6, 8, 10, 12, 14);
let e: u8x8 = u8x8::new(0, 64, 32, 96, 16, 80, 48, 112);
let r: u8x8 = transmute(vrbit_u8(transmute(a)));
assert_eq!(r, e);
}
#[simd_test(enable = "neon")]
unsafe fn test_vrbitq_u8() {
let a: u8x16 = u8x16::new(0, 2, 4, 6, 8, 10, 12, 14, 16, 18, 20, 22, 24, 26, 28, 30);
let e: u8x16 = u8x16::new(0, 64, 32, 96, 16, 80, 48, 112, 8, 72, 40, 104, 24, 88, 56, 120);
let r: u8x16 = transmute(vrbitq_u8(transmute(a)));
assert_eq!(r, e);
}
#[simd_test(enable = "neon")]
unsafe fn test_vrbit_p8() {
let a: i8x8 = i8x8::new(0, 2, 4, 6, 8, 10, 12, 14);
let e: i8x8 = i8x8::new(0, 64, 32, 96, 16, 80, 48, 112);
let r: i8x8 = transmute(vrbit_p8(transmute(a)));
assert_eq!(r, e);
}
#[simd_test(enable = "neon")]
unsafe fn test_vrbitq_p8() {
let a: i8x16 = i8x16::new(0, 2, 4, 6, 8, 10, 12, 14, 16, 18, 20, 22, 24, 26, 28, 30);
let e: i8x16 = i8x16::new(0, 64, 32, 96, 16, 80, 48, 112, 8, 72, 40, 104, 24, 88, 56, 120);
let r: i8x16 = transmute(vrbitq_p8(transmute(a)));
assert_eq!(r, e);
}
#[simd_test(enable = "neon")]
unsafe fn test_vrndx_f32() {
let a: f32x2 = f32x2::new(-1.5, 0.5);
let e: f32x2 = f32x2::new(-2.0, 0.0);
let r: f32x2 = transmute(vrndx_f32(transmute(a)));
assert_eq!(r, e);
}
#[simd_test(enable = "neon")]
unsafe fn test_vrndxq_f32() {
let a: f32x4 = f32x4::new(-1.5, 0.5, 1.5, 2.5);
let e: f32x4 = f32x4::new(-2.0, 0.0, 2.0, 2.0);
let r: f32x4 = transmute(vrndxq_f32(transmute(a)));
assert_eq!(r, e);
}
#[simd_test(enable = "neon")]
unsafe fn test_vrndx_f64() {
let a: f64 = -1.5;
let e: f64 = -2.0;
let r: f64 = transmute(vrndx_f64(transmute(a)));
assert_eq!(r, e);
}
#[simd_test(enable = "neon")]
unsafe fn test_vrndxq_f64() {
let a: f64x2 = f64x2::new(-1.5, 0.5);
let e: f64x2 = f64x2::new(-2.0, 0.0);
let r: f64x2 = transmute(vrndxq_f64(transmute(a)));
assert_eq!(r, e);
}
#[simd_test(enable = "neon")]
unsafe fn test_vrnda_f32() {
let a: f32x2 = f32x2::new(-1.5, 0.5);
let e: f32x2 = f32x2::new(-2.0, 1.0);
let r: f32x2 = transmute(vrnda_f32(transmute(a)));
assert_eq!(r, e);
}
#[simd_test(enable = "neon")]
unsafe fn test_vrndaq_f32() {
let a: f32x4 = f32x4::new(-1.5, 0.5, 1.5, 2.5);
let e: f32x4 = f32x4::new(-2.0, 1.0, 2.0, 3.0);
let r: f32x4 = transmute(vrndaq_f32(transmute(a)));
assert_eq!(r, e);
}
#[simd_test(enable = "neon")]
unsafe fn test_vrnda_f64() {
let a: f64 = -1.5;
let e: f64 = -2.0;
let r: f64 = transmute(vrnda_f64(transmute(a)));
assert_eq!(r, e);
}
#[simd_test(enable = "neon")]
unsafe fn test_vrndaq_f64() {
let a: f64x2 = f64x2::new(-1.5, 0.5);
let e: f64x2 = f64x2::new(-2.0, 1.0);
let r: f64x2 = transmute(vrndaq_f64(transmute(a)));
assert_eq!(r, e);
}
#[simd_test(enable = "neon")]
unsafe fn test_vrndn_f32() {
let a: f32x2 = f32x2::new(-1.5, 0.5);
let e: f32x2 = f32x2::new(-2.0, 0.0);
let r: f32x2 = transmute(vrndn_f32(transmute(a)));
assert_eq!(r, e);
}
#[simd_test(enable = "neon")]
unsafe fn test_vrndnq_f32() {
let a: f32x4 = f32x4::new(-1.5, 0.5, 1.5, 2.5);
let e: f32x4 = f32x4::new(-2.0, 0.0, 2.0, 2.0);
let r: f32x4 = transmute(vrndnq_f32(transmute(a)));
assert_eq!(r, e);
}
#[simd_test(enable = "neon")]
unsafe fn test_vrndn_f64() {
let a: f64 = -1.5;
let e: f64 = -2.0;
let r: f64 = transmute(vrndn_f64(transmute(a)));
assert_eq!(r, e);
}
#[simd_test(enable = "neon")]
unsafe fn test_vrndnq_f64() {
let a: f64x2 = f64x2::new(-1.5, 0.5);
let e: f64x2 = f64x2::new(-2.0, 0.0);
let r: f64x2 = transmute(vrndnq_f64(transmute(a)));
assert_eq!(r, e);
}
#[simd_test(enable = "neon")]
unsafe fn test_vrndm_f32() {
let a: f32x2 = f32x2::new(-1.5, 0.5);
let e: f32x2 = f32x2::new(-2.0, 0.0);
let r: f32x2 = transmute(vrndm_f32(transmute(a)));
assert_eq!(r, e);
}
#[simd_test(enable = "neon")]
unsafe fn test_vrndmq_f32() {
let a: f32x4 = f32x4::new(-1.5, 0.5, 1.5, 2.5);
let e: f32x4 = f32x4::new(-2.0, 0.0, 1.0, 2.0);
let r: f32x4 = transmute(vrndmq_f32(transmute(a)));
assert_eq!(r, e);
}
#[simd_test(enable = "neon")]
unsafe fn test_vrndm_f64() {
let a: f64 = -1.5;
let e: f64 = -2.0;
let r: f64 = transmute(vrndm_f64(transmute(a)));
assert_eq!(r, e);
}
#[simd_test(enable = "neon")]
unsafe fn test_vrndmq_f64() {
let a: f64x2 = f64x2::new(-1.5, 0.5);
let e: f64x2 = f64x2::new(-2.0, 0.0);
let r: f64x2 = transmute(vrndmq_f64(transmute(a)));
assert_eq!(r, e);
}
#[simd_test(enable = "neon")]
unsafe fn test_vrndp_f32() {
let a: f32x2 = f32x2::new(-1.5, 0.5);
let e: f32x2 = f32x2::new(-1.0, 1.0);
let r: f32x2 = transmute(vrndp_f32(transmute(a)));
assert_eq!(r, e);
}
#[simd_test(enable = "neon")]
unsafe fn test_vrndpq_f32() {
let a: f32x4 = f32x4::new(-1.5, 0.5, 1.5, 2.5);
let e: f32x4 = f32x4::new(-1.0, 1.0, 2.0, 3.0);
let r: f32x4 = transmute(vrndpq_f32(transmute(a)));
assert_eq!(r, e);
}
#[simd_test(enable = "neon")]
unsafe fn test_vrndp_f64() {
let a: f64 = -1.5;
let e: f64 = -1.0;
let r: f64 = transmute(vrndp_f64(transmute(a)));
assert_eq!(r, e);
}
#[simd_test(enable = "neon")]
unsafe fn test_vrndpq_f64() {
let a: f64x2 = f64x2::new(-1.5, 0.5);
let e: f64x2 = f64x2::new(-1.0, 1.0);
let r: f64x2 = transmute(vrndpq_f64(transmute(a)));
assert_eq!(r, e);
}
#[simd_test(enable = "neon")]
unsafe fn test_vrnd_f32() {
let a: f32x2 = f32x2::new(-1.5, 0.5);
let e: f32x2 = f32x2::new(-1.0, 0.0);
let r: f32x2 = transmute(vrnd_f32(transmute(a)));
assert_eq!(r, e);
}
#[simd_test(enable = "neon")]
unsafe fn test_vrndq_f32() {
let a: f32x4 = f32x4::new(-1.5, 0.5, 1.5, 2.5);
let e: f32x4 = f32x4::new(-1.0, 0.0, 1.0, 2.0);
let r: f32x4 = transmute(vrndq_f32(transmute(a)));
assert_eq!(r, e);
}
#[simd_test(enable = "neon")]
unsafe fn test_vrnd_f64() {
let a: f64 = -1.5;
let e: f64 = -1.0;
let r: f64 = transmute(vrnd_f64(transmute(a)));
assert_eq!(r, e);
}
#[simd_test(enable = "neon")]
unsafe fn test_vrndq_f64() {
let a: f64x2 = f64x2::new(-1.5, 0.5);
let e: f64x2 = f64x2::new(-1.0, 0.0);
let r: f64x2 = transmute(vrndq_f64(transmute(a)));
assert_eq!(r, e);
}
#[simd_test(enable = "neon")]
unsafe fn test_vrndi_f32() {
let a: f32x2 = f32x2::new(-1.5, 0.5);
let e: f32x2 = f32x2::new(-2.0, 0.0);
let r: f32x2 = transmute(vrndi_f32(transmute(a)));
assert_eq!(r, e);
}
#[simd_test(enable = "neon")]
unsafe fn test_vrndiq_f32() {
let a: f32x4 = f32x4::new(-1.5, 0.5, 1.5, 2.5);
let e: f32x4 = f32x4::new(-2.0, 0.0, 2.0, 2.0);
let r: f32x4 = transmute(vrndiq_f32(transmute(a)));
assert_eq!(r, e);
}
#[simd_test(enable = "neon")]
unsafe fn test_vrndi_f64() {
let a: f64 = -1.5;
let e: f64 = -2.0;
let r: f64 = transmute(vrndi_f64(transmute(a)));
assert_eq!(r, e);
}
#[simd_test(enable = "neon")]
unsafe fn test_vrndiq_f64() {
let a: f64x2 = f64x2::new(-1.5, 0.5);
let e: f64x2 = f64x2::new(-2.0, 0.0);
let r: f64x2 = transmute(vrndiq_f64(transmute(a)));
assert_eq!(r, e);
}
#[simd_test(enable = "neon")]
unsafe fn test_vmul_f64() {
let a: f64 = 1.0;

View file

@ -3948,6 +3948,138 @@ pub unsafe fn vsubq_f32(a: float32x4_t, b: float32x4_t) -> float32x4_t {
simd_sub(a, b)
}
/// Subtract returning high narrow
#[inline]
#[target_feature(enable = "neon")]
#[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))]
#[cfg_attr(all(test, target_arch = "arm"), assert_instr(vsubhn))]
#[cfg_attr(all(test, target_arch = "aarch64"), assert_instr(subhn))]
pub unsafe fn vsubhn_s16(a: int16x8_t, b: int16x8_t) -> int8x8_t {
let c: i16x8 = i16x8::new(8, 8, 8, 8, 8, 8, 8, 8);
simd_cast(simd_shr(simd_sub(a, b), transmute(c)))
}
/// Subtract returning high narrow
#[inline]
#[target_feature(enable = "neon")]
#[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))]
#[cfg_attr(all(test, target_arch = "arm"), assert_instr(vsubhn))]
#[cfg_attr(all(test, target_arch = "aarch64"), assert_instr(subhn))]
pub unsafe fn vsubhn_s32(a: int32x4_t, b: int32x4_t) -> int16x4_t {
let c: i32x4 = i32x4::new(16, 16, 16, 16);
simd_cast(simd_shr(simd_sub(a, b), transmute(c)))
}
/// Subtract returning high narrow
#[inline]
#[target_feature(enable = "neon")]
#[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))]
#[cfg_attr(all(test, target_arch = "arm"), assert_instr(vsubhn))]
#[cfg_attr(all(test, target_arch = "aarch64"), assert_instr(subhn))]
pub unsafe fn vsubhn_s64(a: int64x2_t, b: int64x2_t) -> int32x2_t {
let c: i64x2 = i64x2::new(32, 32);
simd_cast(simd_shr(simd_sub(a, b), transmute(c)))
}
/// Subtract returning high narrow
#[inline]
#[target_feature(enable = "neon")]
#[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))]
#[cfg_attr(all(test, target_arch = "arm"), assert_instr(vsubhn))]
#[cfg_attr(all(test, target_arch = "aarch64"), assert_instr(subhn))]
pub unsafe fn vsubhn_u16(a: uint16x8_t, b: uint16x8_t) -> uint8x8_t {
let c: u16x8 = u16x8::new(8, 8, 8, 8, 8, 8, 8, 8);
simd_cast(simd_shr(simd_sub(a, b), transmute(c)))
}
/// Subtract returning high narrow
#[inline]
#[target_feature(enable = "neon")]
#[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))]
#[cfg_attr(all(test, target_arch = "arm"), assert_instr(vsubhn))]
#[cfg_attr(all(test, target_arch = "aarch64"), assert_instr(subhn))]
pub unsafe fn vsubhn_u32(a: uint32x4_t, b: uint32x4_t) -> uint16x4_t {
let c: u32x4 = u32x4::new(16, 16, 16, 16);
simd_cast(simd_shr(simd_sub(a, b), transmute(c)))
}
/// Subtract returning high narrow
#[inline]
#[target_feature(enable = "neon")]
#[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))]
#[cfg_attr(all(test, target_arch = "arm"), assert_instr(vsubhn))]
#[cfg_attr(all(test, target_arch = "aarch64"), assert_instr(subhn))]
pub unsafe fn vsubhn_u64(a: uint64x2_t, b: uint64x2_t) -> uint32x2_t {
let c: u64x2 = u64x2::new(32, 32);
simd_cast(simd_shr(simd_sub(a, b), transmute(c)))
}
/// Subtract returning high narrow
#[inline]
#[target_feature(enable = "neon")]
#[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))]
#[cfg_attr(all(test, target_arch = "arm"), assert_instr(vsubhn))]
#[cfg_attr(all(test, target_arch = "aarch64"), assert_instr(subhn2))]
pub unsafe fn vsubhn_high_s16(a: int8x8_t, b: int16x8_t, c: int16x8_t) -> int8x16_t {
let d: int8x8_t = vsubhn_s16(b, c);
simd_shuffle16(a, d, [0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15])
}
/// Subtract returning high narrow
#[inline]
#[target_feature(enable = "neon")]
#[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))]
#[cfg_attr(all(test, target_arch = "arm"), assert_instr(vsubhn))]
#[cfg_attr(all(test, target_arch = "aarch64"), assert_instr(subhn2))]
pub unsafe fn vsubhn_high_s32(a: int16x4_t, b: int32x4_t, c: int32x4_t) -> int16x8_t {
let d: int16x4_t = vsubhn_s32(b, c);
simd_shuffle8(a, d, [0, 1, 2, 3, 4, 5, 6, 7])
}
/// Subtract returning high narrow
#[inline]
#[target_feature(enable = "neon")]
#[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))]
#[cfg_attr(all(test, target_arch = "arm"), assert_instr(vsubhn))]
#[cfg_attr(all(test, target_arch = "aarch64"), assert_instr(subhn2))]
pub unsafe fn vsubhn_high_s64(a: int32x2_t, b: int64x2_t, c: int64x2_t) -> int32x4_t {
let d: int32x2_t = vsubhn_s64(b, c);
simd_shuffle4(a, d, [0, 1, 2, 3])
}
/// Subtract returning high narrow
#[inline]
#[target_feature(enable = "neon")]
#[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))]
#[cfg_attr(all(test, target_arch = "arm"), assert_instr(vsubhn))]
#[cfg_attr(all(test, target_arch = "aarch64"), assert_instr(subhn2))]
pub unsafe fn vsubhn_high_u16(a: uint8x8_t, b: uint16x8_t, c: uint16x8_t) -> uint8x16_t {
let d: uint8x8_t = vsubhn_u16(b, c);
simd_shuffle16(a, d, [0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15])
}
/// Subtract returning high narrow
#[inline]
#[target_feature(enable = "neon")]
#[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))]
#[cfg_attr(all(test, target_arch = "arm"), assert_instr(vsubhn))]
#[cfg_attr(all(test, target_arch = "aarch64"), assert_instr(subhn2))]
pub unsafe fn vsubhn_high_u32(a: uint16x4_t, b: uint32x4_t, c: uint32x4_t) -> uint16x8_t {
let d: uint16x4_t = vsubhn_u32(b, c);
simd_shuffle8(a, d, [0, 1, 2, 3, 4, 5, 6, 7])
}
/// Subtract returning high narrow
#[inline]
#[target_feature(enable = "neon")]
#[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))]
#[cfg_attr(all(test, target_arch = "arm"), assert_instr(vsubhn))]
#[cfg_attr(all(test, target_arch = "aarch64"), assert_instr(subhn2))]
pub unsafe fn vsubhn_high_u64(a: uint32x2_t, b: uint64x2_t, c: uint64x2_t) -> uint32x4_t {
let d: uint32x2_t = vsubhn_u64(b, c);
simd_shuffle4(a, d, [0, 1, 2, 3])
}
/// Signed halving subtract
#[inline]
#[target_feature(enable = "neon")]
@ -10222,6 +10354,120 @@ mod test {
assert_eq!(r, e);
}
#[simd_test(enable = "neon")]
unsafe fn test_vsubhn_s16() {
let a: i16x8 = i16x8::new(0x7F_FF, -32768, 1, 1, 0x7F_FF, -32768, 1, 1);
let b: i16x8 = i16x8::new(1, 0, 0, 0, 1, 0, 0, 0);
let e: i8x8 = i8x8::new(0x7F, -128, 0, 0, 0x7F, -128, 0, 0);
let r: i8x8 = transmute(vsubhn_s16(transmute(a), transmute(b)));
assert_eq!(r, e);
}
#[simd_test(enable = "neon")]
unsafe fn test_vsubhn_s32() {
let a: i32x4 = i32x4::new(0x7F_FF_FF_FF, -2147483648, 1, 1);
let b: i32x4 = i32x4::new(1, 0, 0, 0);
let e: i16x4 = i16x4::new(0x7F_FF, -32768, 0, 0);
let r: i16x4 = transmute(vsubhn_s32(transmute(a), transmute(b)));
assert_eq!(r, e);
}
#[simd_test(enable = "neon")]
unsafe fn test_vsubhn_s64() {
let a: i64x2 = i64x2::new(0x7F_FF_FF_FF_FF_FF_FF_FF, -9223372036854775808);
let b: i64x2 = i64x2::new(1, 0);
let e: i32x2 = i32x2::new(0x7F_FF_FF_FF, -2147483648);
let r: i32x2 = transmute(vsubhn_s64(transmute(a), transmute(b)));
assert_eq!(r, e);
}
#[simd_test(enable = "neon")]
unsafe fn test_vsubhn_u16() {
let a: u16x8 = u16x8::new(0xFF_FF, 0, 1, 1, 0xFF_FF, 0, 1, 1);
let b: u16x8 = u16x8::new(1, 0, 0, 0, 1, 0, 0, 0);
let e: u8x8 = u8x8::new(0xFF, 0, 0, 0, 0xFF, 0, 0, 0);
let r: u8x8 = transmute(vsubhn_u16(transmute(a), transmute(b)));
assert_eq!(r, e);
}
#[simd_test(enable = "neon")]
unsafe fn test_vsubhn_u32() {
let a: u32x4 = u32x4::new(0xFF_FF_FF_FF, 0, 1, 1);
let b: u32x4 = u32x4::new(1, 0, 0, 0);
let e: u16x4 = u16x4::new(0xFF_FF, 0, 0, 0);
let r: u16x4 = transmute(vsubhn_u32(transmute(a), transmute(b)));
assert_eq!(r, e);
}
#[simd_test(enable = "neon")]
unsafe fn test_vsubhn_u64() {
let a: u64x2 = u64x2::new(0xFF_FF_FF_FF_FF_FF_FF_FF, 0);
let b: u64x2 = u64x2::new(1, 0);
let e: u32x2 = u32x2::new(0xFF_FF_FF_FF, 0);
let r: u32x2 = transmute(vsubhn_u64(transmute(a), transmute(b)));
assert_eq!(r, e);
}
#[simd_test(enable = "neon")]
unsafe fn test_vsubhn_high_s16() {
let a: i8x8 = i8x8::new(0x7F, 0, 0x7F, 0, 0x7F, 0, 0x7F, 0);
let b: i16x8 = i16x8::new(0x7F_FF, 1, 0x7F_FF, 1, 0x7F_FF, 1, 0x7F_FF, 1);
let c: i16x8 = i16x8::new(1, 0, 1, 0, 1, 0, 1, 0);
let e: i8x16 = i8x16::new(0x7F, 0, 0x7F, 0, 0x7F, 0, 0x7F, 0, 0x7F, 0, 0x7F, 0, 0x7F, 0, 0x7F, 0);
let r: i8x16 = transmute(vsubhn_high_s16(transmute(a), transmute(b), transmute(c)));
assert_eq!(r, e);
}
#[simd_test(enable = "neon")]
unsafe fn test_vsubhn_high_s32() {
let a: i16x4 = i16x4::new(0x7F_FF, 0, 0x7F_FF, 0);
let b: i32x4 = i32x4::new(0x7F_FF_FF_FF, 1, 0x7F_FF_FF_FF, 1);
let c: i32x4 = i32x4::new(1, 0, 1, 0);
let e: i16x8 = i16x8::new(0x7F_FF, 0, 0x7F_FF, 0, 0x7F_FF, 0, 0x7F_FF, 0);
let r: i16x8 = transmute(vsubhn_high_s32(transmute(a), transmute(b), transmute(c)));
assert_eq!(r, e);
}
#[simd_test(enable = "neon")]
unsafe fn test_vsubhn_high_s64() {
let a: i32x2 = i32x2::new(0x7F_FF_FF_FF, 0);
let b: i64x2 = i64x2::new(0x7F_FF_FF_FF_FF_FF_FF_FF, 1);
let c: i64x2 = i64x2::new(1, 0);
let e: i32x4 = i32x4::new(0x7F_FF_FF_FF, 0, 0x7F_FF_FF_FF, 0);
let r: i32x4 = transmute(vsubhn_high_s64(transmute(a), transmute(b), transmute(c)));
assert_eq!(r, e);
}
#[simd_test(enable = "neon")]
unsafe fn test_vsubhn_high_u16() {
let a: u8x8 = u8x8::new(0xFF, 0, 0xFF, 0, 0xFF, 0, 0xFF, 0);
let b: u16x8 = u16x8::new(0xFF_FF, 1, 0xFF_FF, 1, 0xFF_FF, 1, 0xFF_FF, 1);
let c: u16x8 = u16x8::new(1, 0, 1, 0, 1, 0, 1, 0);
let e: u8x16 = u8x16::new(0xFF, 0, 0xFF, 0, 0xFF, 0, 0xFF, 0, 0xFF, 0, 0xFF, 0, 0xFF, 0, 0xFF, 0);
let r: u8x16 = transmute(vsubhn_high_u16(transmute(a), transmute(b), transmute(c)));
assert_eq!(r, e);
}
#[simd_test(enable = "neon")]
unsafe fn test_vsubhn_high_u32() {
let a: u16x4 = u16x4::new(0xFF_FF, 0, 0xFF_FF, 0);
let b: u32x4 = u32x4::new(0xFF_FF_FF_FF, 1, 0xFF_FF_FF_FF, 1);
let c: u32x4 = u32x4::new(1, 0, 1, 0);
let e: u16x8 = u16x8::new(0xFF_FF, 0, 0xFF_FF, 0, 0xFF_FF, 0, 0xFF_FF, 0);
let r: u16x8 = transmute(vsubhn_high_u32(transmute(a), transmute(b), transmute(c)));
assert_eq!(r, e);
}
#[simd_test(enable = "neon")]
unsafe fn test_vsubhn_high_u64() {
let a: u32x2 = u32x2::new(0xFF_FF_FF_FF, 0);
let b: u64x2 = u64x2::new(0xFF_FF_FF_FF_FF_FF_FF_FF, 1);
let c: u64x2 = u64x2::new(1, 0);
let e: u32x4 = u32x4::new(0xFF_FF_FF_FF, 0, 0xFF_FF_FF_FF, 0);
let r: u32x4 = transmute(vsubhn_high_u64(transmute(a), transmute(b), transmute(c)));
assert_eq!(r, e);
}
#[simd_test(enable = "neon")]
unsafe fn test_vhsub_u8() {
let a: u8x8 = u8x8::new(1, 2, 3, 4, 5, 6, 7, 8);

View file

@ -5391,6 +5391,46 @@ pub unsafe fn vrev32q_u8(a: uint8x16_t) -> uint8x16_t {
simd_shuffle16(a, a, [3, 2, 1, 0, 7, 6, 5, 4, 11, 10, 9, 8, 15, 14, 13, 12])
}
/// Reversing vector elements (swap endianness)
#[inline]
#[target_feature(enable = "neon")]
#[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))]
#[cfg_attr(all(test, target_arch = "arm"), assert_instr("vrev32.16"))]
#[cfg_attr(all(test, target_arch = "aarch64"), assert_instr(rev32))]
pub unsafe fn vrev32_s16(a: int16x4_t) -> int16x4_t {
simd_shuffle4(a, a, [1, 0, 3, 2])
}
/// Reversing vector elements (swap endianness)
#[inline]
#[target_feature(enable = "neon")]
#[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))]
#[cfg_attr(all(test, target_arch = "arm"), assert_instr("vrev32.16"))]
#[cfg_attr(all(test, target_arch = "aarch64"), assert_instr(rev32))]
pub unsafe fn vrev32q_s16(a: int16x8_t) -> int16x8_t {
simd_shuffle8(a, a, [1, 0, 3, 2, 5, 4, 7, 6])
}
/// Reversing vector elements (swap endianness)
#[inline]
#[target_feature(enable = "neon")]
#[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))]
#[cfg_attr(all(test, target_arch = "arm"), assert_instr("vrev32.16"))]
#[cfg_attr(all(test, target_arch = "aarch64"), assert_instr(rev32))]
pub unsafe fn vrev32_p16(a: poly16x4_t) -> poly16x4_t {
simd_shuffle4(a, a, [1, 0, 3, 2])
}
/// Reversing vector elements (swap endianness)
#[inline]
#[target_feature(enable = "neon")]
#[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))]
#[cfg_attr(all(test, target_arch = "arm"), assert_instr("vrev32.16"))]
#[cfg_attr(all(test, target_arch = "aarch64"), assert_instr(rev32))]
pub unsafe fn vrev32q_p16(a: poly16x8_t) -> poly16x8_t {
simd_shuffle8(a, a, [1, 0, 3, 2, 5, 4, 7, 6])
}
/// Reversing vector elements (swap endianness)
#[inline]
#[target_feature(enable = "neon")]
@ -10792,6 +10832,34 @@ mod tests {
assert_eq!(r, e);
}
#[simd_test(enable = "neon")]
unsafe fn test_vrev32_s16() {
let a = i16x4::new(0, 1, 2, 3);
let r = i16x4::new(1, 0, 3, 2);
let e: i16x4 = transmute(vrev32_s16(transmute(a)));
assert_eq!(r, e);
}
#[simd_test(enable = "neon")]
unsafe fn test_vrev32q_s16() {
let a = i16x8::new(0, 1, 2, 3, 4, 5, 6, 7);
let r = i16x8::new(1, 0, 3, 2, 5, 4, 7, 6);
let e: i16x8 = transmute(vrev32q_s16(transmute(a)));
assert_eq!(r, e);
}
#[simd_test(enable = "neon")]
unsafe fn test_vrev32_p16() {
let a = i16x4::new(0, 1, 2, 3);
let r = i16x4::new(1, 0, 3, 2);
let e: i16x4 = transmute(vrev32_p16(transmute(a)));
assert_eq!(r, e);
}
#[simd_test(enable = "neon")]
unsafe fn test_vrev32q_p16() {
let a = i16x8::new(0, 1, 2, 3, 4, 5, 6, 7);
let r = i16x8::new(1, 0, 3, 2, 5, 4, 7, 6);
let e: i16x8 = transmute(vrev32q_p16(transmute(a)));
assert_eq!(r, e);
}
#[simd_test(enable = "neon")]
unsafe fn test_vrev32_u16() {
let a = u16x4::new(0, 1, 2, 3);
let r = u16x4::new(1, 0, 3, 2);

View file

@ -1050,6 +1050,19 @@ validate 14, 13, 12, 11, 10, 9, 8, 7
aarch64 = umlsl2
generate uint16x8_t:uint8x16_t:uint8x16_t:uint16x8_t, uint32x4_t:uint16x8_t:uint16x8_t:uint32x4_t, uint64x2_t:uint32x4_t:uint32x4_t:uint64x2_t
/// Extract narrow
name = vmovn_high
no-q
multi_fn = simd_cast, c:in_t0, b
multi_fn = simd_shuffle-out_len-noext, a, c, {asc-out_len}
a = 0, 1, 2, 3, 2, 3, 4, 5
b = 2, 3, 4, 5, 12, 13, 14, 15
validate 0, 1, 2, 3, 2, 3, 4, 5, 2, 3, 4, 5, 12, 13, 14, 15
aarch64 = xtn2
generate int8x8_t:int16x8_t:int8x16_t, int16x4_t:int32x4_t:int16x8_t, int32x2_t:int64x2_t:int32x4_t
generate uint8x8_t:uint16x8_t:uint8x16_t, uint16x4_t:uint32x4_t:uint16x8_t, uint32x2_t:uint64x2_t:uint32x4_t
/// Negate
name = vneg
fn = simd_neg
@ -1111,20 +1124,38 @@ a = 42, 42, 42, 42, 42, 42, 42, 42, 42, 42, 42, 42, 42, 42, 42, 42
b = 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16
validate 21, 22, 22, 23, 23, 24, 24, 25, 25, 26, 26, 27, 27, 28, 28, 29
arm = vhadd.s
aarch64 = uhadd
link-aarch64 = uhadd._EXT_
link-arm = vhaddu._EXT_
generate uint*_t
arm = vhadd.s
aarch64 = shadd
link-aarch64 = shadd._EXT_
link-arm = vhadds._EXT_
generate int*_t
/// Reverse bit order
name = vrbit
a = 0, 2, 4, 6, 8, 10, 12, 14, 16, 18, 20, 22, 24, 26, 28, 30
validate 0, 64, 32, 96, 16, 80, 48, 112, 8, 72, 40, 104, 24, 88, 56, 120
aarch64 = rbit
link-aarch64 = rbit._EXT_
generate int8x8_t, int8x16_t
/// Reverse bit order
name = vrbit
multi_fn = transmute, {vrbit-signed-noext, transmute(a)}
a = 0, 2, 4, 6, 8, 10, 12, 14, 16, 18, 20, 22, 24, 26, 28, 30
validate 0, 64, 32, 96, 16, 80, 48, 112, 8, 72, 40, 104, 24, 88, 56, 120
aarch64 = rbit
generate uint8x8_t, uint8x16_t, poly8x8_t, poly8x16_t
/// Rounding halving add
name = vrhadd
a = 42, 42, 42, 42, 42, 42, 42, 42, 42, 42, 42, 42, 42, 42, 42, 42
@ -1143,6 +1174,69 @@ link-arm = vrhadds._EXT_
link-aarch64 = srhadd._EXT_
generate int*_t
/// Floating-point round to integral exact, using current rounding mode
name = vrndx
a = -1.5, 0.5, 1.5, 2.5
validate -2.0, 0.0, 2.0, 2.0
aarch64 = frintx
link-aarch64 = llvm.rint._EXT_
generate float*_t, float64x*_t
/// Floating-point round to integral, to nearest with ties to away
name = vrnda
a = -1.5, 0.5, 1.5, 2.5
validate -2.0, 1.0, 2.0, 3.0
aarch64 = frinta
link-aarch64 = llvm.round._EXT_
generate float*_t, float64x*_t
/// Floating-point round to integral, to nearest with ties to even
name = vrndn
a = -1.5, 0.5, 1.5, 2.5
validate -2.0, 0.0, 2.0, 2.0
link-aarch64 = frintn._EXT_
aarch64 = frintn
generate float*_t, float64x*_t
/// Floating-point round to integral, toward minus infinity
name = vrndm
a = -1.5, 0.5, 1.5, 2.5
validate -2.0, 0.0, 1.0, 2.0
aarch64 = frintm
link-aarch64 = llvm.floor._EXT_
generate float*_t, float64x*_t
/// Floating-point round to integral, toward plus infinity
name = vrndp
a = -1.5, 0.5, 1.5, 2.5
validate -1.0, 1.0, 2.0, 3.0
aarch64 = frintp
link-aarch64 = llvm.ceil._EXT_
generate float*_t, float64x*_t
/// Floating-point round to integral, toward zero
name = vrnd
a = -1.5, 0.5, 1.5, 2.5
validate -1.0, 0.0, 1.0, 2.0
aarch64 = frintz
link-aarch64 = llvm.trunc._EXT_
generate float*_t, float64x*_t
/// Floating-point round to integral, using current rounding mode
name = vrndi
a = -1.5, 0.5, 1.5, 2.5
validate -2.0, 0.0, 2.0, 2.0
aarch64 = frinti
link-aarch64 = llvm.nearbyint._EXT_
generate float*_t, float64x*_t
/// Saturating add
name = vqadd
a = 42, 42, 42, 42, 42, 42, 42, 42, 42, 42, 42, 42, 42, 42, 42, 42
@ -1295,6 +1389,35 @@ generate float64x*_t
arm = vsub.
generate float*_t
/// Subtract returning high narrow
name = vsubhn
no-q
multi_fn = fixed, c:in_t
multi_fn = simd_cast, {simd_shr, {simd_sub}, transmute(c)}
a = MAX, MIN, 1, 1, MAX, MIN, 1, 1
b = 1, 0, 0, 0, 1, 0, 0, 0
fixed = HFBITS, HFBITS, HFBITS, HFBITS, HFBITS, HFBITS, HFBITS, HFBITS, HFBITS, HFBITS, HFBITS, HFBITS, HFBITS, HFBITS, HFBITS, HFBITS
validate MAX, MIN, 0, 0, MAX, MIN, 0, 0
arm = vsubhn
aarch64 = subhn
generate int16x8_t:int8x8_t, int32x4_t:int16x4_t, int64x2_t:int32x2_t
generate uint16x8_t:uint8x8_t, uint32x4_t:uint16x4_t, uint64x2_t:uint32x2_t
/// Subtract returning high narrow
name = vsubhn_high
no-q
multi_fn = vsubhn-noqself-noext, d:in_t0, b, c
multi_fn = simd_shuffle-out_len-noext, a, d, {asc-out_len}
a = MAX, 0, MAX, 0, MAX, 0, MAX, 0
b = MAX, 1, MAX, 1, MAX, 1, MAX, 1
c = 1, 0, 1, 0, 1, 0, 1, 0
validate MAX, 0, MAX, 0, MAX, 0, MAX, 0, MAX, 0, MAX, 0, MAX, 0, MAX, 0
arm = vsubhn
aarch64 = subhn2
generate int8x8_t:int16x8_t:int16x8_t:int8x16_t, int16x4_t:int32x4_t:int32x4_t:int16x8_t, int32x2_t:int64x2_t:int64x2_t:int32x4_t
generate uint8x8_t:uint16x8_t:uint16x8_t:uint8x16_t, uint16x4_t:uint32x4_t:uint32x4_t:uint16x8_t, uint32x2_t:uint64x2_t:uint64x2_t:uint32x4_t
/// Signed halving subtract
name = vhsub

View file

@ -119,14 +119,14 @@ fn type_to_suffix(t: &str) -> &str {
fn type_to_signed_suffix(t: &str) -> &str {
match t {
"int8x8_t" | "uint8x8_t" => "_s8",
"int8x16_t" | "uint8x16_t" => "q_s8",
"int16x4_t" | "uint16x4_t" => "_s16",
"int16x8_t" | "uint16x8_t" => "q_s16",
"int8x8_t" | "uint8x8_t" | "poly8x8_t" => "_s8",
"int8x16_t" | "uint8x16_t" | "poly8x16_t" => "q_s8",
"int16x4_t" | "uint16x4_t" | "poly16x4_t" => "_s16",
"int16x8_t" | "uint16x8_t" | "poly16x8_t" => "q_s16",
"int32x2_t" | "uint32x2_t" => "_s32",
"int32x4_t" | "uint32x4_t" => "q_s32",
"int64x1_t" | "uint64x1_t" => "_s64",
"int64x2_t" | "uint64x2_t" => "q_s64",
"int64x1_t" | "uint64x1_t" | "poly64x1_t" => "_s64",
"int64x2_t" | "uint64x2_t" | "poly64x2_t" => "q_s64",
/*
"float16x4_t" => "_f16",
"float16x8_t" => "q_f16",
@ -328,6 +328,16 @@ fn type_to_half(t: &str) -> &str {
}
}
fn asc(x: usize) -> &'static str {
match x {
2 => "[0, 1]",
4 => "[0, 1, 2, 3]",
8 => "[0, 1, 2, 3, 4, 5, 6, 7]",
16 => "[0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15]",
_ => panic!("unknown transpose order of len {}", x),
}
}
fn transpose1(x: usize) -> &'static str {
match x {
2 => "[0, 2]",
@ -481,6 +491,23 @@ fn bits_minus_one(t: &str) -> &'static str {
}
}
fn half_bits(t: &str) -> &'static str {
match &t[..3] {
"u8x" => "4",
"u16" => "8",
"u32" => "16",
"u64" => "32",
"i8x" => "4",
"i16" => "8",
"i32" => "16",
"i64" => "32",
"p8x" => "4",
"p16" => "8",
"p64" => "32",
_ => panic!("Unknown bits for type {}", t),
}
}
fn map_val<'v>(t: &str, v: &'v str) -> &'v str {
match v {
"FALSE" => false_val(t),
@ -490,6 +517,7 @@ fn map_val<'v>(t: &str, v: &'v str) -> &'v str {
"FF" => ff_val(t),
"BITS" => bits(t),
"BITS_M1" => bits_minus_one(t),
"HFBITS" => half_bits(t),
o => o,
}
}
@ -554,14 +582,21 @@ fn gen_aarch64(
let ext_c = if let Some(link_aarch64) = link_aarch64.clone() {
let ext = type_to_ext(in_t[0]);
let ext2 = type_to_ext(out_t);
let link_aarch64 = if link_aarch64.starts_with("llvm") {
link_aarch64.replace("_EXT_", ext).replace("_EXT2_", ext2)
} else {
let mut link = String::from("llvm.aarch64.neon.");
link.push_str(&link_aarch64);
link.replace("_EXT_", ext).replace("_EXT2_", ext2)
};
format!(
r#"#[allow(improper_ctypes)]
extern "C" {{
#[cfg_attr(target_arch = "aarch64", link_name = "llvm.aarch64.neon.{}")]
#[cfg_attr(target_arch = "aarch64", link_name = "{}")]
fn {}({}) -> {};
}}
"#,
link_aarch64.replace("_EXT_", ext).replace("_EXT2_", ext2),
link_aarch64,
current_fn,
match para_num {
1 => {
@ -817,16 +852,30 @@ fn gen_arm(
if let (Some(link_arm), Some(link_aarch64)) = (link_arm.clone(), link_aarch64.clone()) {
let ext = type_to_ext(in_t[0]);
let ext2 = type_to_ext(out_t);
let link_arm = if link_arm.starts_with("llvm") {
link_arm.replace("_EXT_", ext).replace("_EXT2_", ext2)
} else {
let mut link = String::from("llvm.arm.neon.");
link.push_str(&link_arm);
link.replace("_EXT_", ext).replace("_EXT2_", ext2)
};
let link_aarch64 = if link_aarch64.starts_with("llvm") {
link_aarch64.replace("_EXT_", ext).replace("_EXT2_", ext2)
} else {
let mut link = String::from("llvm.aarch64.neon.");
link.push_str(&link_aarch64);
link.replace("_EXT_", ext).replace("_EXT2_", ext2)
};
format!(
r#"#[allow(improper_ctypes)]
extern "C" {{
#[cfg_attr(target_arch = "arm", link_name = "llvm.arm.neon.{}")]
#[cfg_attr(target_arch = "aarch64", link_name = "llvm.aarch64.neon.{}")]
#[cfg_attr(target_arch = "arm", link_name = "{}")]
#[cfg_attr(target_arch = "aarch64", link_name = "{}")]
fn {}({}) -> {};
}}
"#,
link_arm.replace("_EXT_", ext).replace("_EXT2_", ext2),
link_aarch64.replace("_EXT_", ext).replace("_EXT2_", ext2),
link_arm,
link_aarch64,
current_fn,
match para_num {
1 => {
@ -1066,6 +1115,10 @@ fn get_call(
re = Some((re_params[0].clone(), in_t[1].to_string()));
} else if re_params[1] == "in_t" {
re = Some((re_params[0].clone(), in_t[1].to_string()));
} else if re_params[1] == "in_t0" {
re = Some((re_params[0].clone(), in_t[0].to_string()));
} else if re_params[1] == "in_t1" {
re = Some((re_params[0].clone(), in_t[1].to_string()));
} else if re_params[1] == "out_t" {
re = Some((re_params[0].clone(), out_t.to_string()));
} else if re_params[1] == "half" {
@ -1097,6 +1150,9 @@ fn get_call(
});
return format!(r#"[{}]"#, &half[..half.len() - 2]);
}
if fn_name == "asc-out_len" {
return asc(type_len(out_t)).to_string();
}
if fn_name == "transpose-1-in_len" {
return transpose1(type_len(in_t[1])).to_string();
}