support s64|u64 for neon instruction vqadd and vqsub (#1090)

This commit is contained in:
surechen 2021-03-20 08:55:11 +08:00 committed by GitHub
parent 72dda3aae0
commit ce1027d7d5
No known key found for this signature in database
GPG key ID: 4AEE18F83AFDEB23
2 changed files with 204 additions and 4 deletions

View file

@ -2357,6 +2357,38 @@ pub unsafe fn vqsubq_u32(a: uint32x4_t, b: uint32x4_t) -> uint32x4_t {
vqsubq_u32_(a, b)
}
/// Saturating subtract
#[inline]
#[target_feature(enable = "neon")]
#[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))]
#[cfg_attr(all(test, target_arch = "arm"), assert_instr("vqsub.u64"))]
#[cfg_attr(all(test, target_arch = "aarch64"), assert_instr(uqsub))]
pub unsafe fn vqsub_u64(a: uint64x1_t, b: uint64x1_t) -> uint64x1_t {
#[allow(improper_ctypes)]
extern "C" {
#[cfg_attr(target_arch = "arm", link_name = "llvm.arm.neon.vqsubu.v1i64")]
#[cfg_attr(target_arch = "aarch64", link_name = "llvm.aarch64.neon.uqsub.v1i64")]
fn vqsub_u64_(a: uint64x1_t, b: uint64x1_t) -> uint64x1_t;
}
vqsub_u64_(a, b)
}
/// Saturating subtract
#[inline]
#[target_feature(enable = "neon")]
#[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))]
#[cfg_attr(all(test, target_arch = "arm"), assert_instr("vqsub.u64"))]
#[cfg_attr(all(test, target_arch = "aarch64"), assert_instr(uqsub))]
pub unsafe fn vqsubq_u64(a: uint64x2_t, b: uint64x2_t) -> uint64x2_t {
#[allow(improper_ctypes)]
extern "C" {
#[cfg_attr(target_arch = "arm", link_name = "llvm.arm.neon.vqsubu.v2i64")]
#[cfg_attr(target_arch = "aarch64", link_name = "llvm.aarch64.neon.uqsub.v2i64")]
fn vqsubq_u64_(a: uint64x2_t, b: uint64x2_t) -> uint64x2_t;
}
vqsubq_u64_(a, b)
}
/// Saturating subtract
#[inline]
#[target_feature(enable = "neon")]
@ -2453,6 +2485,38 @@ pub unsafe fn vqsubq_s32(a: int32x4_t, b: int32x4_t) -> int32x4_t {
vqsubq_s32_(a, b)
}
/// Saturating subtract
#[inline]
#[target_feature(enable = "neon")]
#[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))]
#[cfg_attr(all(test, target_arch = "arm"), assert_instr("vqsub.s64"))]
#[cfg_attr(all(test, target_arch = "aarch64"), assert_instr(sqsub))]
pub unsafe fn vqsub_s64(a: int64x1_t, b: int64x1_t) -> int64x1_t {
#[allow(improper_ctypes)]
extern "C" {
#[cfg_attr(target_arch = "arm", link_name = "llvm.arm.neon.vqsubs.v1i64")]
#[cfg_attr(target_arch = "aarch64", link_name = "llvm.aarch64.neon.sqsub.v1i64")]
fn vqsub_s64_(a: int64x1_t, b: int64x1_t) -> int64x1_t;
}
vqsub_s64_(a, b)
}
/// Saturating subtract
#[inline]
#[target_feature(enable = "neon")]
#[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))]
#[cfg_attr(all(test, target_arch = "arm"), assert_instr("vqsub.s64"))]
#[cfg_attr(all(test, target_arch = "aarch64"), assert_instr(sqsub))]
pub unsafe fn vqsubq_s64(a: int64x2_t, b: int64x2_t) -> int64x2_t {
#[allow(improper_ctypes)]
extern "C" {
#[cfg_attr(target_arch = "arm", link_name = "llvm.arm.neon.vqsubs.v2i64")]
#[cfg_attr(target_arch = "aarch64", link_name = "llvm.aarch64.neon.sqsub.v2i64")]
fn vqsubq_s64_(a: int64x2_t, b: int64x2_t) -> int64x2_t;
}
vqsubq_s64_(a, b)
}
/// Halving add
#[inline]
#[target_feature(enable = "neon")]
@ -2933,6 +2997,38 @@ pub unsafe fn vqaddq_u32(a: uint32x4_t, b: uint32x4_t) -> uint32x4_t {
vqaddq_u32_(a, b)
}
/// Saturating add
#[inline]
#[target_feature(enable = "neon")]
#[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))]
#[cfg_attr(all(test, target_arch = "arm"), assert_instr("vqadd.u64"))]
#[cfg_attr(all(test, target_arch = "aarch64"), assert_instr(uqadd))]
pub unsafe fn vqadd_u64(a: uint64x1_t, b: uint64x1_t) -> uint64x1_t {
#[allow(improper_ctypes)]
extern "C" {
#[cfg_attr(target_arch = "arm", link_name = "llvm.arm.neon.vqaddu.v1i64")]
#[cfg_attr(target_arch = "aarch64", link_name = "llvm.aarch64.neon.uqadd.v1i64")]
fn vqadd_u64_(a: uint64x1_t, b: uint64x1_t) -> uint64x1_t;
}
vqadd_u64_(a, b)
}
/// Saturating add
#[inline]
#[target_feature(enable = "neon")]
#[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))]
#[cfg_attr(all(test, target_arch = "arm"), assert_instr("vqadd.u64"))]
#[cfg_attr(all(test, target_arch = "aarch64"), assert_instr(uqadd))]
pub unsafe fn vqaddq_u64(a: uint64x2_t, b: uint64x2_t) -> uint64x2_t {
#[allow(improper_ctypes)]
extern "C" {
#[cfg_attr(target_arch = "arm", link_name = "llvm.arm.neon.vqaddu.v2i64")]
#[cfg_attr(target_arch = "aarch64", link_name = "llvm.aarch64.neon.uqadd.v2i64")]
fn vqaddq_u64_(a: uint64x2_t, b: uint64x2_t) -> uint64x2_t;
}
vqaddq_u64_(a, b)
}
/// Saturating add
#[inline]
#[target_feature(enable = "neon")]
@ -3029,6 +3125,38 @@ pub unsafe fn vqaddq_s32(a: int32x4_t, b: int32x4_t) -> int32x4_t {
vqaddq_s32_(a, b)
}
/// Saturating add
#[inline]
#[target_feature(enable = "neon")]
#[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))]
#[cfg_attr(all(test, target_arch = "arm"), assert_instr("vqadd.s64"))]
#[cfg_attr(all(test, target_arch = "aarch64"), assert_instr(sqadd))]
pub unsafe fn vqadd_s64(a: int64x1_t, b: int64x1_t) -> int64x1_t {
#[allow(improper_ctypes)]
extern "C" {
#[cfg_attr(target_arch = "arm", link_name = "llvm.arm.neon.vqadds.v1i64")]
#[cfg_attr(target_arch = "aarch64", link_name = "llvm.aarch64.neon.sqadd.v1i64")]
fn vqadd_s64_(a: int64x1_t, b: int64x1_t) -> int64x1_t;
}
vqadd_s64_(a, b)
}
/// Saturating add
#[inline]
#[target_feature(enable = "neon")]
#[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))]
#[cfg_attr(all(test, target_arch = "arm"), assert_instr("vqadd.s64"))]
#[cfg_attr(all(test, target_arch = "aarch64"), assert_instr(sqadd))]
pub unsafe fn vqaddq_s64(a: int64x2_t, b: int64x2_t) -> int64x2_t {
#[allow(improper_ctypes)]
extern "C" {
#[cfg_attr(target_arch = "arm", link_name = "llvm.arm.neon.vqadds.v2i64")]
#[cfg_attr(target_arch = "aarch64", link_name = "llvm.aarch64.neon.sqadd.v2i64")]
fn vqaddq_s64_(a: int64x2_t, b: int64x2_t) -> int64x2_t;
}
vqaddq_s64_(a, b)
}
/// Multiply
#[inline]
#[target_feature(enable = "neon")]
@ -6171,6 +6299,24 @@ mod test {
assert_eq!(r, e);
}
#[simd_test(enable = "neon")]
unsafe fn test_vqsub_u64() {
let a: u64x1 = u64x1::new(42);
let b: u64x1 = u64x1::new(1);
let e: u64x1 = u64x1::new(41);
let r: u64x1 = transmute(vqsub_u64(transmute(a), transmute(b)));
assert_eq!(r, e);
}
#[simd_test(enable = "neon")]
unsafe fn test_vqsubq_u64() {
let a: u64x2 = u64x2::new(42, 42);
let b: u64x2 = u64x2::new(1, 2);
let e: u64x2 = u64x2::new(41, 40);
let r: u64x2 = transmute(vqsubq_u64(transmute(a), transmute(b)));
assert_eq!(r, e);
}
#[simd_test(enable = "neon")]
unsafe fn test_vqsub_s8() {
let a: i8x8 = i8x8::new(42, 42, 42, 42, 42, 42, 42, 42);
@ -6225,6 +6371,24 @@ mod test {
assert_eq!(r, e);
}
#[simd_test(enable = "neon")]
unsafe fn test_vqsub_s64() {
let a: i64x1 = i64x1::new(42);
let b: i64x1 = i64x1::new(1);
let e: i64x1 = i64x1::new(41);
let r: i64x1 = transmute(vqsub_s64(transmute(a), transmute(b)));
assert_eq!(r, e);
}
#[simd_test(enable = "neon")]
unsafe fn test_vqsubq_s64() {
let a: i64x2 = i64x2::new(42, 42);
let b: i64x2 = i64x2::new(1, 2);
let e: i64x2 = i64x2::new(41, 40);
let r: i64x2 = transmute(vqsubq_s64(transmute(a), transmute(b)));
assert_eq!(r, e);
}
#[simd_test(enable = "neon")]
unsafe fn test_vhadd_u8() {
let a: u8x8 = u8x8::new(42, 42, 42, 42, 42, 42, 42, 42);
@ -6495,6 +6659,24 @@ mod test {
assert_eq!(r, e);
}
#[simd_test(enable = "neon")]
unsafe fn test_vqadd_u64() {
let a: u64x1 = u64x1::new(42);
let b: u64x1 = u64x1::new(1);
let e: u64x1 = u64x1::new(43);
let r: u64x1 = transmute(vqadd_u64(transmute(a), transmute(b)));
assert_eq!(r, e);
}
#[simd_test(enable = "neon")]
unsafe fn test_vqaddq_u64() {
let a: u64x2 = u64x2::new(42, 42);
let b: u64x2 = u64x2::new(1, 2);
let e: u64x2 = u64x2::new(43, 44);
let r: u64x2 = transmute(vqaddq_u64(transmute(a), transmute(b)));
assert_eq!(r, e);
}
#[simd_test(enable = "neon")]
unsafe fn test_vqadd_s8() {
let a: i8x8 = i8x8::new(42, 42, 42, 42, 42, 42, 42, 42);
@ -6549,6 +6731,24 @@ mod test {
assert_eq!(r, e);
}
#[simd_test(enable = "neon")]
unsafe fn test_vqadd_s64() {
let a: i64x1 = i64x1::new(42);
let b: i64x1 = i64x1::new(1);
let e: i64x1 = i64x1::new(43);
let r: i64x1 = transmute(vqadd_s64(transmute(a), transmute(b)));
assert_eq!(r, e);
}
#[simd_test(enable = "neon")]
unsafe fn test_vqaddq_s64() {
let a: i64x2 = i64x2::new(42, 42);
let b: i64x2 = i64x2::new(1, 2);
let e: i64x2 = i64x2::new(43, 44);
let r: i64x2 = transmute(vqaddq_s64(transmute(a), transmute(b)));
assert_eq!(r, e);
}
#[simd_test(enable = "neon")]
unsafe fn test_vmul_s8() {
let a: i8x8 = i8x8::new(1, 2, 1, 2, 1, 2, 1, 2);

View file

@ -820,13 +820,13 @@ arm = vqsub.s
aarch64 = uqsub
link-arm = vqsubu._EXT_
link-aarch64 = uqsub._EXT_
generate uint*_t
generate uint*_t, uint64x*_t
arm = vqsub.s
aarch64 = sqsub
link-arm = vqsubs._EXT_
link-aarch64 = sqsub._EXT_
generate int*_t
generate int*_t, int64x*_t
/// Halving add
name = vhadd
@ -876,13 +876,13 @@ arm = vqadd.s
aarch64 = uqadd
link-arm = vqaddu._EXT_
link-aarch64 = uqadd._EXT_
generate uint*_t
generate uint*_t, uint64x*_t
arm = vqadd.s
aarch64 = sqadd
link-arm = vqadds._EXT_
link-aarch64 = sqadd._EXT_
generate int*_t
generate int*_t, int64x*_t
/// Multiply
name = vmul