add neon instruction abs for floating-point

This commit is contained in:
surechen 2021-03-04 15:41:38 +08:00 committed by Amanieu d'Antras
parent 59ecccfba0
commit 88ae5346e9
2 changed files with 72 additions and 0 deletions

View file

@ -72,6 +72,10 @@ extern "C" {
fn vabs_s64_(a: int64x1_t) -> int64x1_t;
#[link_name = "llvm.aarch64.neon.abs.v2i64"]
fn vabsq_s64_(a: int64x2_t) -> int64x2_t;
#[link_name = "llvm.fabs.v1f64"]
fn vabs_f64_(a: float64x1_t) -> float64x1_t;
#[link_name = "llvm.fabs.v2f64"]
fn vabsq_f64_(a: float64x2_t) -> float64x2_t;
#[link_name = "llvm.aarch64.neon.suqadd.v8i8"]
fn vuqadd_s8_(a: int8x8_t, b: uint8x8_t) -> int8x8_t;
@ -688,6 +692,7 @@ pub unsafe fn vld1q_f64(ptr: *const f64) -> float64x2_t {
pub unsafe fn vabsd_s64(a: i64) -> i64 {
vabsd_s64_(a)
}
/// Absolute Value (wrapping).
#[inline]
#[target_feature(enable = "neon")]
@ -703,6 +708,21 @@ pub unsafe fn vabsq_s64(a: int64x2_t) -> int64x2_t {
vabsq_s64_(a)
}
/// Floating-point absolute value.
#[inline]
#[target_feature(enable = "neon")]
#[cfg_attr(test, assert_instr(fabs))]
pub unsafe fn vabs_f64(a: float64x1_t) -> float64x1_t {
vabs_f64_(a)
}
/// Floating-point absolute value.
#[inline]
#[target_feature(enable = "neon")]
#[cfg_attr(test, assert_instr(fabs))]
pub unsafe fn vabsq_f64(a: float64x2_t) -> float64x2_t {
vabsq_f64_(a)
}
/// Signed saturating Accumulate of Unsigned value.
#[inline]
#[target_feature(enable = "neon")]
@ -3912,6 +3932,20 @@ mod tests {
let e = i64x2::new(i64::MIN, i64::MAX);
assert_eq!(r, e);
}
#[simd_test(enable = "neon")]
unsafe fn test_vabs_f64() {
let a = f64x1::new(f64::MIN);
let r: f64x1 = transmute(vabs_f64(transmute(a)));
let e = f64x1::new(f64::MAX);
assert_eq!(r, e);
}
#[simd_test(enable = "neon")]
unsafe fn test_vabsq_f64() {
let a = f64x2::new(f64::MIN, -4.2);
let r: f64x2 = transmute(vabsq_f64(transmute(a)));
let e = f64x2::new(f64::MAX, 4.2);
assert_eq!(r, e);
}
#[simd_test(enable = "neon")]
unsafe fn test_vaddv_s16() {

View file

@ -135,6 +135,12 @@ extern "C" {
#[cfg_attr(target_arch = "arm", link_name = "llvm.arm.neon.vabs.v4i32")]
#[cfg_attr(target_arch = "aarch64", link_name = "llvm.aarch64.neon.abs.v4i32")]
fn vabsq_s32_(a: int32x4_t) -> int32x4_t;
#[cfg_attr(target_arch = "arm", link_name = "llvm.fabs.v2f32")]
#[cfg_attr(target_arch = "aarch64", link_name = "llvm.fabs.v2f32")]
fn vabs_f32_(a: float32x2_t) -> float32x2_t;
#[cfg_attr(target_arch = "arm", link_name = "llvm.fabs.v4f32")]
#[cfg_attr(target_arch = "aarch64", link_name = "llvm.fabs.v4f32")]
fn vabsq_f32_(a: float32x4_t) -> float32x4_t;
#[cfg_attr(target_arch = "arm", link_name = "llvm.arm.neon.vrsqrte.v2f32")]
#[cfg_attr(target_arch = "aarch64", link_name = "llvm.aarch64.neon.frsqrte.v2f32")]
@ -1147,6 +1153,24 @@ pub unsafe fn vabsq_s16(a: int16x8_t) -> int16x8_t {
pub unsafe fn vabsq_s32(a: int32x4_t) -> int32x4_t {
vabsq_s32_(a)
}
/// Floating-point absolute value.
#[inline]
#[target_feature(enable = "neon")]
#[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))]
#[cfg_attr(all(test, target_arch = "arm"), assert_instr(fabs))]
#[cfg_attr(all(test, target_arch = "aarch64"), assert_instr(fabs))]
pub unsafe fn vabs_f32(a: float32x2_t) -> float32x2_t {
vabs_f32_(a)
}
/// Floating-point absolute value.
#[inline]
#[target_feature(enable = "neon")]
#[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))]
#[cfg_attr(all(test, target_arch = "arm"), assert_instr(fabs))]
#[cfg_attr(all(test, target_arch = "aarch64"), assert_instr(fabs))]
pub unsafe fn vabsq_f32(a: float32x4_t) -> float32x4_t {
vabsq_f32_(a)
}
/// Add pairwise.
#[inline]
@ -8565,6 +8589,20 @@ mod tests {
assert_eq!(r, e);
}
#[simd_test(enable = "neon")]
unsafe fn test_vabs_f32() {
let a = f32x2::new(f32::MIN, -1.0);
let r: f32x2 = transmute(vabs_f32(transmute(a)));
let e = f32x2::new(f32::MAX, 1.0);
assert_eq!(r, e);
}
#[simd_test(enable = "neon")]
unsafe fn test_vabsq_f32() {
let a = f32x4::new(f32::MIN, -1.32, -4.3, -6.8);
let r: f32x4 = transmute(vabsq_f32(transmute(a)));
let e = f32x4::new(f32::MAX, 1.32, 4.3, 6.8);
assert_eq!(r, e);
}
#[simd_test(enable = "neon")]
unsafe fn test_vpadd_s16() {
let a = i16x4::new(1, 2, 3, 4);
let b = i16x4::new(0, -1, -2, -3);