From e83d05a3c04092a3e2fa894c3a1211ebe07217c6 Mon Sep 17 00:00:00 2001 From: Sparrow Li Date: Mon, 29 Mar 2021 10:43:36 +0800 Subject: [PATCH] add simd_neg platform intrinsic and vneg, vqneg neon instructions (#1099) --- .../core_arch/src/aarch64/neon/generated.rs | 106 +++++++ .../core_arch/src/arm/neon/generated.rs | 288 ++++++++++++++++++ .../stdarch/crates/core_arch/src/simd_llvm.rs | 2 + library/stdarch/crates/stdarch-gen/neon.spec | 37 +++ 4 files changed, 433 insertions(+) diff --git a/library/stdarch/crates/core_arch/src/aarch64/neon/generated.rs b/library/stdarch/crates/core_arch/src/aarch64/neon/generated.rs index b8b85c2f66fc..f59ed72fda26 100644 --- a/library/stdarch/crates/core_arch/src/aarch64/neon/generated.rs +++ b/library/stdarch/crates/core_arch/src/aarch64/neon/generated.rs @@ -1753,6 +1753,64 @@ pub unsafe fn vmlsl_high_u32(a: uint64x2_t, b: uint32x4_t, c: uint32x4_t) -> uin vmlsl_u32(a, b, c) } +/// Negate +#[inline] +#[target_feature(enable = "neon")] +#[cfg_attr(test, assert_instr(neg))] +pub unsafe fn vneg_s64(a: int64x1_t) -> int64x1_t { + simd_neg(a) +} + +/// Negate +#[inline] +#[target_feature(enable = "neon")] +#[cfg_attr(test, assert_instr(neg))] +pub unsafe fn vnegq_s64(a: int64x2_t) -> int64x2_t { + simd_neg(a) +} + +/// Negate +#[inline] +#[target_feature(enable = "neon")] +#[cfg_attr(test, assert_instr(fneg))] +pub unsafe fn vneg_f64(a: float64x1_t) -> float64x1_t { + simd_neg(a) +} + +/// Negate +#[inline] +#[target_feature(enable = "neon")] +#[cfg_attr(test, assert_instr(fneg))] +pub unsafe fn vnegq_f64(a: float64x2_t) -> float64x2_t { + simd_neg(a) +} + +/// Signed saturating negate +#[inline] +#[target_feature(enable = "neon")] +#[cfg_attr(test, assert_instr(sqneg))] +pub unsafe fn vqneg_s64(a: int64x1_t) -> int64x1_t { + #[allow(improper_ctypes)] + extern "C" { + #[cfg_attr(target_arch = "aarch64", link_name = "llvm.aarch64.neon.sqneg.v1i64")] + fn vqneg_s64_(a: int64x1_t) -> int64x1_t; + } + vqneg_s64_(a) +} + +/// Signed saturating negate +#[inline] +#[target_feature(enable = "neon")] +#[cfg_attr(test, assert_instr(sqneg))] +pub unsafe fn vqnegq_s64(a: int64x2_t) -> int64x2_t { + #[allow(improper_ctypes)] + extern "C" { + #[cfg_attr(target_arch = "aarch64", link_name = "llvm.aarch64.neon.sqneg.v2i64")] + fn vqnegq_s64_(a: int64x2_t) -> int64x2_t; + } + vqnegq_s64_(a) +} + /// Multiply #[inline] #[target_feature(enable = "neon")] @@ -4361,6 +4419,54 @@ mod test { assert_eq!(r, e); } + #[simd_test(enable = "neon")] + unsafe fn test_vneg_s64() { + let a: i64x1 = i64x1::new(0); + let e: i64x1 = i64x1::new(0); + let r: i64x1 = transmute(vneg_s64(transmute(a))); + assert_eq!(r, e); + } + + #[simd_test(enable = "neon")] + unsafe fn test_vnegq_s64() { + let a: i64x2 = i64x2::new(0, 1); + let e: i64x2 = i64x2::new(0, -1); + let r: i64x2 = transmute(vnegq_s64(transmute(a))); + assert_eq!(r, e); + } + + #[simd_test(enable = "neon")] + unsafe fn test_vneg_f64() { + let a: f64 = 0.; + let e: f64 = 0.; + let r: f64 = transmute(vneg_f64(transmute(a))); + assert_eq!(r, e); + } + + #[simd_test(enable = "neon")] + unsafe fn test_vnegq_f64() { + let a: f64x2 = f64x2::new(0., 1.); + let e: f64x2 = f64x2::new(0., -1.); + let r: f64x2 = transmute(vnegq_f64(transmute(a))); + assert_eq!(r, e); + } + + #[simd_test(enable = "neon")] + unsafe fn test_vqneg_s64() { + let a: i64x1 = i64x1::new(-9223372036854775808); + let e: i64x1 = i64x1::new(0x7F_FF_FF_FF_FF_FF_FF_FF); + let r: i64x1 = transmute(vqneg_s64(transmute(a))); + assert_eq!(r, e); + } + + #[simd_test(enable = "neon")] + unsafe fn test_vqnegq_s64() { + let a: i64x2 = i64x2::new(-9223372036854775808, 0); + let e: i64x2 = i64x2::new(0x7F_FF_FF_FF_FF_FF_FF_FF, 0); + let r: i64x2 = transmute(vqnegq_s64(transmute(a))); + assert_eq!(r, e); + } + #[simd_test(enable = "neon")] unsafe fn test_vmul_f64() { let a: f64 = 1.0; diff --git a/library/stdarch/crates/core_arch/src/arm/neon/generated.rs b/library/stdarch/crates/core_arch/src/arm/neon/generated.rs index 418682ce3627..30cb35e9f787 100644 --- a/library/stdarch/crates/core_arch/src/arm/neon/generated.rs +++ b/library/stdarch/crates/core_arch/src/arm/neon/generated.rs @@ -2381,6 +2381,182 @@ pub unsafe fn vmlsl_u32(a: uint64x2_t, b: uint32x2_t, c: uint32x2_t) -> uint64x2 simd_sub(a, vmull_u32(b, c)) } +/// Negate +#[inline] +#[target_feature(enable = "neon")] +#[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))] +#[cfg_attr(all(test, target_arch = "arm"), assert_instr("vneg.s8"))] +#[cfg_attr(all(test, target_arch = "aarch64"), assert_instr(neg))] +pub unsafe fn vneg_s8(a: int8x8_t) -> int8x8_t { + simd_neg(a) +} + +/// Negate +#[inline] +#[target_feature(enable = "neon")] +#[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))] +#[cfg_attr(all(test, target_arch = "arm"), assert_instr("vneg.s8"))] +#[cfg_attr(all(test, target_arch = "aarch64"), assert_instr(neg))] +pub unsafe fn vnegq_s8(a: int8x16_t) -> int8x16_t { + simd_neg(a) +} + +/// Negate +#[inline] +#[target_feature(enable = "neon")] +#[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))] +#[cfg_attr(all(test, target_arch = "arm"), assert_instr("vneg.s16"))] +#[cfg_attr(all(test, target_arch = "aarch64"), assert_instr(neg))] +pub unsafe fn vneg_s16(a: int16x4_t) -> int16x4_t { + simd_neg(a) +} + +/// Negate +#[inline] +#[target_feature(enable = "neon")] +#[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))] +#[cfg_attr(all(test, target_arch = "arm"), assert_instr("vneg.s16"))] +#[cfg_attr(all(test, target_arch = "aarch64"), assert_instr(neg))] +pub unsafe fn vnegq_s16(a: int16x8_t) -> int16x8_t { + simd_neg(a) +} + +/// Negate +#[inline] +#[target_feature(enable = "neon")] +#[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))] +#[cfg_attr(all(test, target_arch = "arm"), assert_instr("vneg.s32"))] +#[cfg_attr(all(test, target_arch = "aarch64"), assert_instr(neg))] +pub unsafe fn vneg_s32(a: int32x2_t) -> int32x2_t { + simd_neg(a) +} + +/// Negate +#[inline] +#[target_feature(enable = "neon")] +#[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))] +#[cfg_attr(all(test, target_arch = "arm"), assert_instr("vneg.s32"))] +#[cfg_attr(all(test, target_arch = "aarch64"), assert_instr(neg))] +pub unsafe fn vnegq_s32(a: int32x4_t) -> int32x4_t { + simd_neg(a) +} + +/// Negate +#[inline] +#[target_feature(enable = "neon")] +#[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))] +#[cfg_attr(all(test, target_arch = "arm"), assert_instr("vneg.f32"))] +#[cfg_attr(all(test, target_arch = "aarch64"), assert_instr(fneg))] +pub unsafe fn vneg_f32(a: float32x2_t) -> float32x2_t { + simd_neg(a) +} + +/// Negate +#[inline] +#[target_feature(enable = "neon")] +#[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))] +#[cfg_attr(all(test, target_arch = "arm"), assert_instr("vneg.f32"))] +#[cfg_attr(all(test, target_arch = "aarch64"), assert_instr(fneg))] +pub unsafe fn vnegq_f32(a: float32x4_t) -> float32x4_t { + simd_neg(a) +} + +/// Signed saturating negate +#[inline] +#[target_feature(enable = "neon")] +#[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))] +#[cfg_attr(all(test, target_arch = "arm"), assert_instr("vqneg.s8"))] +#[cfg_attr(all(test, target_arch = "aarch64"), assert_instr(sqneg))] +pub unsafe fn vqneg_s8(a: int8x8_t) -> int8x8_t { + #[allow(improper_ctypes)] + extern "C" { + #[cfg_attr(target_arch = "arm", link_name = "llvm.arm.neon.vqneg.v8i8")] + #[cfg_attr(target_arch = "aarch64", link_name = "llvm.aarch64.neon.sqneg.v8i8")] + fn vqneg_s8_(a: int8x8_t) -> int8x8_t; + } +vqneg_s8_(a) +} + +/// Signed saturating negate +#[inline] +#[target_feature(enable = "neon")] +#[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))] +#[cfg_attr(all(test, target_arch = "arm"), assert_instr("vqneg.s8"))] +#[cfg_attr(all(test, target_arch = "aarch64"), assert_instr(sqneg))] +pub unsafe fn vqnegq_s8(a: int8x16_t) -> int8x16_t { + #[allow(improper_ctypes)] + extern "C" { + #[cfg_attr(target_arch = "arm", link_name = "llvm.arm.neon.vqneg.v16i8")] + #[cfg_attr(target_arch = "aarch64", link_name = "llvm.aarch64.neon.sqneg.v16i8")] + fn vqnegq_s8_(a: int8x16_t) -> int8x16_t; + } +vqnegq_s8_(a) +} + +/// Signed saturating negate +#[inline] +#[target_feature(enable = "neon")] +#[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))] +#[cfg_attr(all(test, target_arch = "arm"), assert_instr("vqneg.s16"))] +#[cfg_attr(all(test, target_arch = "aarch64"), assert_instr(sqneg))] +pub unsafe fn vqneg_s16(a: int16x4_t) -> int16x4_t { + #[allow(improper_ctypes)] + extern "C" { + #[cfg_attr(target_arch = "arm", link_name = "llvm.arm.neon.vqneg.v4i16")] + #[cfg_attr(target_arch = "aarch64", link_name = "llvm.aarch64.neon.sqneg.v4i16")] + fn vqneg_s16_(a: int16x4_t) -> int16x4_t; + } +vqneg_s16_(a) +} + +/// Signed saturating negate +#[inline] +#[target_feature(enable = "neon")] +#[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))] +#[cfg_attr(all(test, target_arch = "arm"), assert_instr("vqneg.s16"))] +#[cfg_attr(all(test, target_arch = "aarch64"), assert_instr(sqneg))] +pub unsafe fn vqnegq_s16(a: int16x8_t) -> int16x8_t { + #[allow(improper_ctypes)] + extern "C" { + #[cfg_attr(target_arch = "arm", link_name = "llvm.arm.neon.vqneg.v8i16")] + #[cfg_attr(target_arch = "aarch64", link_name = "llvm.aarch64.neon.sqneg.v8i16")] + fn vqnegq_s16_(a: int16x8_t) -> int16x8_t; + } +vqnegq_s16_(a) +} + +/// Signed saturating negate +#[inline] +#[target_feature(enable = "neon")] +#[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))] +#[cfg_attr(all(test, target_arch = "arm"), assert_instr("vqneg.s32"))] +#[cfg_attr(all(test, target_arch = "aarch64"), assert_instr(sqneg))] +pub unsafe fn vqneg_s32(a: int32x2_t) -> int32x2_t { + #[allow(improper_ctypes)] + extern "C" { + #[cfg_attr(target_arch = "arm", link_name = "llvm.arm.neon.vqneg.v2i32")] + #[cfg_attr(target_arch = "aarch64", link_name = "llvm.aarch64.neon.sqneg.v2i32")] + fn vqneg_s32_(a: int32x2_t) -> int32x2_t; + } +vqneg_s32_(a) +} + +/// Signed saturating negate +#[inline] +#[target_feature(enable = "neon")] +#[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))] +#[cfg_attr(all(test, target_arch = "arm"), assert_instr("vqneg.s32"))] +#[cfg_attr(all(test, target_arch = "aarch64"), assert_instr(sqneg))] +pub unsafe fn vqnegq_s32(a: int32x4_t) -> int32x4_t { + #[allow(improper_ctypes)] + extern "C" { + #[cfg_attr(target_arch = "arm", link_name = "llvm.arm.neon.vqneg.v4i32")] + #[cfg_attr(target_arch = "aarch64", link_name = "llvm.aarch64.neon.sqneg.v4i32")] + fn vqnegq_s32_(a: int32x4_t) -> int32x4_t; + } +vqnegq_s32_(a) +} + /// Saturating subtract #[inline] #[target_feature(enable = "neon")] @@ -6666,6 +6842,118 @@ mod test { assert_eq!(r, e); } + #[simd_test(enable = "neon")] + unsafe fn test_vneg_s8() { + let a: i8x8 = i8x8::new(0, 1, -1, 2, -2, 3, -3, 4); + let e: i8x8 = i8x8::new(0, -1, 1, -2, 2, -3, 3, -4); + let r: i8x8 = transmute(vneg_s8(transmute(a))); + assert_eq!(r, e); + } + + #[simd_test(enable = "neon")] + unsafe fn test_vnegq_s8() { + let a: i8x16 = i8x16::new(0, 1, -1, 2, -2, 3, -3, 4, -4, 5, -5, 6, -6, 7, -7, 8); + let e: i8x16 = i8x16::new(0, -1, 1, -2, 2, -3, 3, -4, 4, -5, 5, -6, 6, -7, 7, -8); + let r: i8x16 = transmute(vnegq_s8(transmute(a))); + assert_eq!(r, e); + } + + #[simd_test(enable = "neon")] + unsafe fn test_vneg_s16() { + let a: i16x4 = i16x4::new(0, 1, -1, 2); + let e: i16x4 = i16x4::new(0, -1, 1, -2); + let r: i16x4 = transmute(vneg_s16(transmute(a))); + assert_eq!(r, e); + } + + #[simd_test(enable = "neon")] + unsafe fn test_vnegq_s16() { + let a: i16x8 = i16x8::new(0, 1, -1, 2, -2, 3, -3, 4); + let e: i16x8 = i16x8::new(0, -1, 1, -2, 2, -3, 3, -4); + let r: i16x8 = transmute(vnegq_s16(transmute(a))); + assert_eq!(r, e); + } + + #[simd_test(enable = "neon")] + unsafe fn test_vneg_s32() { + let a: i32x2 = i32x2::new(0, 1); + let e: i32x2 = i32x2::new(0, -1); + let r: i32x2 = transmute(vneg_s32(transmute(a))); + assert_eq!(r, e); + } + + #[simd_test(enable = "neon")] + unsafe fn test_vnegq_s32() { + let a: i32x4 = i32x4::new(0, 1, -1, 2); + let e: i32x4 = i32x4::new(0, -1, 1, -2); + let r: i32x4 = transmute(vnegq_s32(transmute(a))); + assert_eq!(r, e); + } + + #[simd_test(enable = "neon")] + unsafe fn test_vneg_f32() { + let a: f32x2 = f32x2::new(0., 1.); + let e: f32x2 = f32x2::new(0., -1.); + let r: f32x2 = transmute(vneg_f32(transmute(a))); + assert_eq!(r, e); + } + + #[simd_test(enable = "neon")] + unsafe fn test_vnegq_f32() { + let a: f32x4 = f32x4::new(0., 1., -1., 2.); + let e: f32x4 = f32x4::new(0., -1., 1., -2.); + let r: f32x4 = transmute(vnegq_f32(transmute(a))); + assert_eq!(r, e); + } + + #[simd_test(enable = "neon")] + unsafe fn test_vqneg_s8() { + let a: i8x8 = i8x8::new(-128, 0, 1, -1, 2, -2, 3, -3); + let e: i8x8 = i8x8::new(0x7F, 0, -1, 1, -2, 2, -3, 3); + let r: i8x8 = transmute(vqneg_s8(transmute(a))); + assert_eq!(r, e); + } + + #[simd_test(enable = "neon")] + unsafe fn test_vqnegq_s8() { + let a: i8x16 = i8x16::new(-128, 0, 1, -1, 2, -2, 3, -3, 4, -4, 5, -5, 6, -6, 7, -7); + let e: i8x16 = i8x16::new(0x7F, 0, -1, 1, -2, 2, -3, 3, -4, 4, -5, 5, -6, 6, -7, 7); + let r: i8x16 = transmute(vqnegq_s8(transmute(a))); + assert_eq!(r, e); + } + + #[simd_test(enable = "neon")] + unsafe fn test_vqneg_s16() { + let a: i16x4 = i16x4::new(-32768, 0, 1, -1); + let e: i16x4 = i16x4::new(0x7F_FF, 0, -1, 1); + let r: i16x4 = transmute(vqneg_s16(transmute(a))); + assert_eq!(r, e); + } + + #[simd_test(enable = "neon")] + unsafe fn test_vqnegq_s16() { + let a: i16x8 = i16x8::new(-32768, 0, 1, -1, 2, -2, 3, -3); + let e: i16x8 = i16x8::new(0x7F_FF, 0, -1, 1, -2, 2, -3, 3); + let r: i16x8 = transmute(vqnegq_s16(transmute(a))); + assert_eq!(r, e); + } + + #[simd_test(enable = "neon")] + unsafe fn test_vqneg_s32() { + let a: i32x2 = i32x2::new(-2147483648, 0); + let e: i32x2 = i32x2::new(0x7F_FF_FF_FF, 0); + let r: i32x2 = transmute(vqneg_s32(transmute(a))); + assert_eq!(r, e); + } + + #[simd_test(enable = "neon")] + unsafe fn test_vqnegq_s32() { + let a: i32x4 = i32x4::new(-2147483648, 0, 1, -1); + let e: i32x4 = i32x4::new(0x7F_FF_FF_FF, 0, -1, 1); + let r: i32x4 = transmute(vqnegq_s32(transmute(a))); + assert_eq!(r, e); + } + #[simd_test(enable = "neon")] unsafe fn test_vqsub_u8() { let a: u8x8 = u8x8::new(42, 42, 42, 42, 42, 42, 42, 42); diff --git a/library/stdarch/crates/core_arch/src/simd_llvm.rs b/library/stdarch/crates/core_arch/src/simd_llvm.rs index f472b86f8bd8..60f6a1eee9b8 100644 --- a/library/stdarch/crates/core_arch/src/simd_llvm.rs +++ b/library/stdarch/crates/core_arch/src/simd_llvm.rs @@ -43,6 +43,8 @@ extern "platform-intrinsic" { pub fn simd_or(x: T, y: T) -> T; pub fn simd_xor(x: T, y: T) -> T; + pub fn simd_neg(x: T) -> T; + pub fn simd_saturating_add(x: T, y: T) -> T; pub fn simd_saturating_sub(x: T, y: T) -> T; diff --git a/library/stdarch/crates/stdarch-gen/neon.spec b/library/stdarch/crates/stdarch-gen/neon.spec index 2d3061e88877..e8e56cc565ba 100644 --- a/library/stdarch/crates/stdarch-gen/neon.spec +++ b/library/stdarch/crates/stdarch-gen/neon.spec @@ -918,6 +918,43 @@ validate 14, 13, 12, 11, 10, 9, 8, 7 aarch64 = umlsl2 generate uint16x8_t:uint8x16_t:uint8x16_t:uint16x8_t, uint32x4_t:uint16x8_t:uint16x8_t:uint32x4_t, uint64x2_t:uint32x4_t:uint32x4_t:uint64x2_t +/// Negate +name = vneg +fn = simd_neg +a = 0, 1, -1, 2, -2, 3, -3, 4, -4, 5, -5, 6, -6, 7, -7, 8 +validate 0, -1, 1, -2, 2, -3, 3, -4, 4, -5, 5, -6, 6, -7, 7, -8 + +aarch64 = neg +generate int64x*_t + +arm = vneg.s +generate int*_t + +/// Negate +name = vneg +fn = simd_neg +a = 0., 1., -1., 2., -2., 3., -3., 4. +validate 0., -1., 1., -2., 2., -3., 3., -4. + +aarch64 = fneg +generate float64x*_t + +arm = vneg.s +generate float*_t + +/// Signed saturating negate +name = vqneg +a = MIN, 0, 1, -1, 2, -2, 3, -3, 4, -4, 5, -5, 6, -6, 7, -7 +validate MAX, 0, -1, 1, -2, 2, -3, 3, -4, 4, -5, 5, -6, 6, -7, 7 +link-arm = vqneg._EXT_ +link-aarch64 = sqneg._EXT_ + +aarch64 = sqneg +generate int64x*_t + +arm = vqneg.s +generate int*_t + /// Saturating subtract name = vqsub a = 42, 42, 42, 42, 42, 42, 42, 42, 42, 42, 42, 42, 42, 42, 42, 42