From a3b5ef2b0ce59b6d5199bfca4929bb0a0fa811d6 Mon Sep 17 00:00:00 2001 From: surechen Date: Thu, 8 Apr 2021 22:22:35 +0800 Subject: [PATCH] add neon instruction vsubw_* and vsubl_* (#1112) --- .../core_arch/src/aarch64/neon/generated.rs | 234 +++++++++++++++++ .../core_arch/src/arm/neon/generated.rs | 240 ++++++++++++++++++ library/stdarch/crates/stdarch-gen/neon.spec | 222 ++++++++++++++++ 3 files changed, 696 insertions(+) diff --git a/library/stdarch/crates/core_arch/src/aarch64/neon/generated.rs b/library/stdarch/crates/core_arch/src/aarch64/neon/generated.rs index 88b1cf128f56..d2f558a90eea 100644 --- a/library/stdarch/crates/core_arch/src/aarch64/neon/generated.rs +++ b/library/stdarch/crates/core_arch/src/aarch64/neon/generated.rs @@ -2512,6 +2512,132 @@ pub unsafe fn vsubq_f64(a: float64x2_t, b: float64x2_t) -> float64x2_t { simd_sub(a, b) } +/// Signed Subtract Wide +#[inline] +#[target_feature(enable = "neon")] +#[cfg_attr(test, assert_instr(ssubw))] +pub unsafe fn vsubw_high_s8(a: int16x8_t, b: int8x16_t) -> int16x8_t { + let c: int8x8_t = simd_shuffle8(b, b, [8, 9, 10, 11, 12, 13, 14, 15]); + simd_sub(a, simd_cast(c)) +} + +/// Signed Subtract Wide +#[inline] +#[target_feature(enable = "neon")] +#[cfg_attr(test, assert_instr(ssubw))] +pub unsafe fn vsubw_high_s16(a: int32x4_t, b: int16x8_t) -> int32x4_t { + let c: int16x4_t = simd_shuffle4(b, b, [4, 5, 6, 7]); + simd_sub(a, simd_cast(c)) +} + +/// Signed Subtract Wide +#[inline] +#[target_feature(enable = "neon")] +#[cfg_attr(test, assert_instr(ssubw))] +pub unsafe fn vsubw_high_s32(a: int64x2_t, b: int32x4_t) -> int64x2_t { + let c: int32x2_t = simd_shuffle2(b, b, [2, 3]); + simd_sub(a, simd_cast(c)) +} + +/// Unsigned Subtract Wide +#[inline] +#[target_feature(enable = "neon")] +#[cfg_attr(test, assert_instr(usubw))] +pub unsafe fn vsubw_high_u8(a: uint16x8_t, b: uint8x16_t) -> uint16x8_t { + let c: uint8x8_t = simd_shuffle8(b, b, [8, 9, 10, 11, 12, 13, 14, 15]); + simd_sub(a, simd_cast(c)) +} + +/// Unsigned Subtract Wide +#[inline] +#[target_feature(enable = "neon")] +#[cfg_attr(test, assert_instr(usubw))] +pub unsafe fn vsubw_high_u16(a: uint32x4_t, b: uint16x8_t) -> uint32x4_t { + let c: uint16x4_t = simd_shuffle4(b, b, [4, 5, 6, 7]); + simd_sub(a, simd_cast(c)) +} + +/// Unsigned Subtract Wide +#[inline] +#[target_feature(enable = "neon")] +#[cfg_attr(test, assert_instr(usubw))] +pub unsafe fn vsubw_high_u32(a: uint64x2_t, b: uint32x4_t) -> uint64x2_t { + let c: uint32x2_t = simd_shuffle2(b, b, [2, 3]); + simd_sub(a, simd_cast(c)) +} + +/// Signed Subtract Long +#[inline] +#[target_feature(enable = "neon")] +#[cfg_attr(test, assert_instr(ssubl))] +pub unsafe fn vsubl_high_s8(a: int8x16_t, b: int8x16_t) -> int16x8_t { + let c: int8x8_t = simd_shuffle8(a, a, [8, 9, 10, 11, 12, 13, 14, 15]); + let d: int16x8_t = simd_cast(c); + let e: int8x8_t = simd_shuffle8(b, b, [8, 9, 10, 11, 12, 13, 14, 15]); + let f: int16x8_t = simd_cast(e); + simd_sub(d, f) +} + +/// Signed Subtract Long +#[inline] +#[target_feature(enable = "neon")] +#[cfg_attr(test, assert_instr(ssubl))] +pub unsafe fn vsubl_high_s16(a: int16x8_t, b: int16x8_t) -> int32x4_t { + let c: int16x4_t = simd_shuffle4(a, a, [4, 5, 6, 7]); + let d: int32x4_t = simd_cast(c); + let e: int16x4_t = simd_shuffle4(b, b, [4, 5, 6, 7]); + let f: int32x4_t = simd_cast(e); + simd_sub(d, f) +} + +/// Signed Subtract Long +#[inline] +#[target_feature(enable = "neon")] +#[cfg_attr(test, assert_instr(ssubl))] +pub unsafe fn vsubl_high_s32(a: int32x4_t, b: int32x4_t) -> int64x2_t { + let c: int32x2_t = simd_shuffle2(a, a, [2, 3]); + let d: int64x2_t = simd_cast(c); + let e: int32x2_t = simd_shuffle2(b, b, [2, 3]); + let f: int64x2_t = simd_cast(e); + simd_sub(d, f) +} + +/// Unsigned Subtract Long +#[inline] +#[target_feature(enable = "neon")] +#[cfg_attr(test, assert_instr(usubl))] +pub unsafe fn vsubl_high_u8(a: uint8x16_t, b: uint8x16_t) -> uint16x8_t { + let c: uint8x8_t = simd_shuffle8(a, a, [8, 9, 10, 11, 12, 13, 14, 15]); + let d: uint16x8_t = simd_cast(c); + let e: uint8x8_t = simd_shuffle8(b, b, [8, 9, 10, 11, 12, 13, 14, 15]); + let f: uint16x8_t = simd_cast(e); + simd_sub(d, f) +} + +/// Unsigned Subtract Long +#[inline] +#[target_feature(enable = "neon")] +#[cfg_attr(test, assert_instr(usubl))] +pub unsafe fn vsubl_high_u16(a: uint16x8_t, b: uint16x8_t) -> uint32x4_t { + let c: uint16x4_t = simd_shuffle4(a, a, [4, 5, 6, 7]); + let d: uint32x4_t = simd_cast(c); + let e: uint16x4_t = simd_shuffle4(b, b, [4, 5, 6, 7]); + let f: uint32x4_t = simd_cast(e); + simd_sub(d, f) +} + +/// Unsigned Subtract Long +#[inline] +#[target_feature(enable = "neon")] +#[cfg_attr(test, assert_instr(usubl))] +pub unsafe fn vsubl_high_u32(a: uint32x4_t, b: uint32x4_t) -> uint64x2_t { + let c: uint32x2_t = simd_shuffle2(a, a, [2, 3]); + let d: uint64x2_t = simd_cast(c); + let e: uint32x2_t = simd_shuffle2(b, b, [2, 3]); + let f: uint64x2_t = simd_cast(e); + simd_sub(d, f) +} + /// Maximum (vector) #[inline] #[target_feature(enable = "neon")] @@ -6459,6 +6585,114 @@ mod test { assert_eq!(r, e); } + #[simd_test(enable = "neon")] + unsafe fn test_vsubw_high_s8() { + let a: i16x8 = i16x8::new(8, 9, 10, 12, 13, 14, 15, 16); + let b: i8x16 = i8x16::new(0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 12, 13, 14, 15, 16); + let e: i16x8 = i16x8::new(0, 0, 0, 0, 0, 0, 0, 0); + let r: i16x8 = transmute(vsubw_high_s8(transmute(a), transmute(b))); + assert_eq!(r, e); + } + + #[simd_test(enable = "neon")] + unsafe fn test_vsubw_high_s16() { + let a: i32x4 = i32x4::new(8, 9, 10, 11); + let b: i16x8 = i16x8::new(0, 1, 2, 3, 8, 9, 10, 11); + let e: i32x4 = i32x4::new(0, 0, 0, 0); + let r: i32x4 = transmute(vsubw_high_s16(transmute(a), transmute(b))); + assert_eq!(r, e); + } + + #[simd_test(enable = "neon")] + unsafe fn test_vsubw_high_s32() { + let a: i64x2 = i64x2::new(8, 9); + let b: i32x4 = i32x4::new(6, 7, 8, 9); + let e: i64x2 = i64x2::new(0, 0); + let r: i64x2 = transmute(vsubw_high_s32(transmute(a), transmute(b))); + assert_eq!(r, e); + } + + #[simd_test(enable = "neon")] + unsafe fn test_vsubw_high_u8() { + let a: u16x8 = u16x8::new(8, 9, 10, 11, 12, 13, 14, 15); + let b: u8x16 = u8x16::new(0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15); + let e: u16x8 = u16x8::new(0, 0, 0, 0, 0, 0, 0, 0); + let r: u16x8 = transmute(vsubw_high_u8(transmute(a), transmute(b))); + assert_eq!(r, e); + } + + #[simd_test(enable = "neon")] + unsafe fn test_vsubw_high_u16() { + let a: u32x4 = u32x4::new(8, 9, 10, 11); + let b: u16x8 = u16x8::new(0, 1, 2, 3, 8, 9, 10, 11); + let e: u32x4 = u32x4::new(0, 0, 0, 0); + let r: u32x4 = transmute(vsubw_high_u16(transmute(a), transmute(b))); + assert_eq!(r, e); + } + + #[simd_test(enable = "neon")] + unsafe fn test_vsubw_high_u32() { + let a: u64x2 = u64x2::new(8, 9); + let b: u32x4 = u32x4::new(6, 7, 8, 9); + let e: u64x2 = u64x2::new(0, 0); + let r: u64x2 = transmute(vsubw_high_u32(transmute(a), transmute(b))); + assert_eq!(r, e); + } + + #[simd_test(enable = "neon")] + unsafe fn test_vsubl_high_s8() { + let a: i8x16 = i8x16::new(0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15); + let b: i8x16 = i8x16::new(1, 1, 1, 1, 1, 1, 1, 1, 2, 2, 2, 2, 2, 2, 2, 2); + let e: i16x8 = i16x8::new(6, 7, 8, 9, 10, 11, 12, 13); + let r: i16x8 = transmute(vsubl_high_s8(transmute(a), transmute(b))); + assert_eq!(r, e); + } + + #[simd_test(enable = "neon")] + unsafe fn test_vsubl_high_s16() { + let a: i16x8 = i16x8::new(8, 9, 10, 11, 12, 13, 14, 15); + let b: i16x8 = i16x8::new(6, 6, 6, 6, 8, 8, 8, 8); + let e: i32x4 = i32x4::new(4, 5, 6, 7); + let r: i32x4 = transmute(vsubl_high_s16(transmute(a), transmute(b))); + assert_eq!(r, e); + } + + #[simd_test(enable = "neon")] + unsafe fn test_vsubl_high_s32() { + let a: i32x4 = i32x4::new(12, 13, 14, 15); + let b: i32x4 = i32x4::new(6, 6, 8, 8); + let e: i64x2 = i64x2::new(6, 7); + let r: i64x2 = transmute(vsubl_high_s32(transmute(a), transmute(b))); + assert_eq!(r, e); + } + + #[simd_test(enable = "neon")] + unsafe fn test_vsubl_high_u8() { + let a: u8x16 = u8x16::new(0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15); + let b: u8x16 = u8x16::new(1, 1, 1, 1, 1, 1, 1, 1, 2, 2, 2, 2, 2, 2, 2, 2); + let e: u16x8 = u16x8::new(6, 7, 8, 9, 10, 11, 12, 13); + let r: u16x8 = transmute(vsubl_high_u8(transmute(a), transmute(b))); + assert_eq!(r, e); + } + + #[simd_test(enable = "neon")] + unsafe fn test_vsubl_high_u16() { + let a: u16x8 = u16x8::new(8, 9, 10, 11, 12, 13, 14, 15); + let b: u16x8 = u16x8::new(6, 6, 6, 6, 8, 8, 8, 8); + let e: u32x4 = u32x4::new(4, 5, 6, 7); + let r: u32x4 = transmute(vsubl_high_u16(transmute(a), transmute(b))); + assert_eq!(r, e); + } + + #[simd_test(enable = "neon")] + unsafe fn test_vsubl_high_u32() { + let a: u32x4 = u32x4::new(12, 13, 14, 15); + let b: u32x4 = u32x4::new(6, 6, 8, 8); + let e: u64x2 = u64x2::new(6, 7); + let r: u64x2 = transmute(vsubl_high_u32(transmute(a), transmute(b))); + assert_eq!(r, e); + } + #[simd_test(enable = "neon")] unsafe fn test_vmax_f64() { let a: f64 = 1.0; diff --git a/library/stdarch/crates/core_arch/src/arm/neon/generated.rs b/library/stdarch/crates/core_arch/src/arm/neon/generated.rs index b538450d3df5..8114d3d50205 100644 --- a/library/stdarch/crates/core_arch/src/arm/neon/generated.rs +++ b/library/stdarch/crates/core_arch/src/arm/neon/generated.rs @@ -4682,6 +4682,138 @@ pub unsafe fn vhsubq_s32(a: int32x4_t, b: int32x4_t) -> int32x4_t { vhsubq_s32_(a, b) } +/// Signed Subtract Wide +#[inline] +#[target_feature(enable = "neon")] +#[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))] +#[cfg_attr(all(test, target_arch = "arm"), assert_instr(vsubw))] +#[cfg_attr(all(test, target_arch = "aarch64"), assert_instr(ssubw))] +pub unsafe fn vsubw_s8(a: int16x8_t, b: int8x8_t) -> int16x8_t { + simd_sub(a, simd_cast(b)) +} + +/// Signed Subtract Wide +#[inline] +#[target_feature(enable = "neon")] +#[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))] +#[cfg_attr(all(test, target_arch = "arm"), assert_instr(vsubw))] +#[cfg_attr(all(test, target_arch = "aarch64"), assert_instr(ssubw))] +pub unsafe fn vsubw_s16(a: int32x4_t, b: int16x4_t) -> int32x4_t { + simd_sub(a, simd_cast(b)) +} + +/// Signed Subtract Wide +#[inline] +#[target_feature(enable = "neon")] +#[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))] +#[cfg_attr(all(test, target_arch = "arm"), assert_instr(vsubw))] +#[cfg_attr(all(test, target_arch = "aarch64"), assert_instr(ssubw))] +pub unsafe fn vsubw_s32(a: int64x2_t, b: int32x2_t) -> int64x2_t { + simd_sub(a, simd_cast(b)) +} + +/// Unsigned Subtract Wide +#[inline] +#[target_feature(enable = "neon")] +#[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))] +#[cfg_attr(all(test, target_arch = "arm"), assert_instr(vsubw))] +#[cfg_attr(all(test, target_arch = "aarch64"), assert_instr(usubw))] +pub unsafe fn vsubw_u8(a: uint16x8_t, b: uint8x8_t) -> uint16x8_t { + simd_sub(a, simd_cast(b)) +} + +/// Unsigned Subtract Wide +#[inline] +#[target_feature(enable = "neon")] +#[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))] +#[cfg_attr(all(test, target_arch = "arm"), assert_instr(vsubw))] +#[cfg_attr(all(test, target_arch = "aarch64"), assert_instr(usubw))] +pub unsafe fn vsubw_u16(a: uint32x4_t, b: uint16x4_t) -> uint32x4_t { + simd_sub(a, simd_cast(b)) +} + +/// Unsigned Subtract Wide +#[inline] +#[target_feature(enable = "neon")] +#[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))] +#[cfg_attr(all(test, target_arch = "arm"), assert_instr(vsubw))] +#[cfg_attr(all(test, target_arch = "aarch64"), assert_instr(usubw))] +pub unsafe fn vsubw_u32(a: uint64x2_t, b: uint32x2_t) -> uint64x2_t { + simd_sub(a, simd_cast(b)) +} + +/// Signed Subtract Long +#[inline] +#[target_feature(enable = "neon")] +#[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))] +#[cfg_attr(all(test, target_arch = "arm"), assert_instr(vsubl))] +#[cfg_attr(all(test, target_arch = "aarch64"), assert_instr(ssubl))] +pub unsafe fn vsubl_s8(a: int8x8_t, b: int8x8_t) -> int16x8_t { + let c: int16x8_t = simd_cast(a); + let d: int16x8_t = simd_cast(b); + simd_sub(c, d) +} + +/// Signed Subtract Long +#[inline] +#[target_feature(enable = "neon")] +#[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))] +#[cfg_attr(all(test, target_arch = "arm"), assert_instr(vsubl))] +#[cfg_attr(all(test, target_arch = "aarch64"), assert_instr(ssubl))] +pub unsafe fn vsubl_s16(a: int16x4_t, b: int16x4_t) -> int32x4_t { + let c: int32x4_t = simd_cast(a); + let d: int32x4_t = simd_cast(b); + simd_sub(c, d) +} + +/// Signed Subtract Long +#[inline] +#[target_feature(enable = "neon")] +#[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))] +#[cfg_attr(all(test, target_arch = "arm"), assert_instr(vsubl))] +#[cfg_attr(all(test, target_arch = "aarch64"), assert_instr(ssubl))] +pub unsafe fn vsubl_s32(a: int32x2_t, b: int32x2_t) -> int64x2_t { + let c: int64x2_t = simd_cast(a); + let d: int64x2_t = simd_cast(b); + simd_sub(c, d) +} + +/// Unsigned Subtract Long +#[inline] +#[target_feature(enable = "neon")] +#[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))] +#[cfg_attr(all(test, target_arch = "arm"), assert_instr(vsubl))] +#[cfg_attr(all(test, target_arch = "aarch64"), assert_instr(usubl))] +pub unsafe fn vsubl_u8(a: uint8x8_t, b: uint8x8_t) -> uint16x8_t { + let c: uint16x8_t = simd_cast(a); + let d: uint16x8_t = simd_cast(b); + simd_sub(c, d) +} + +/// Unsigned Subtract Long +#[inline] +#[target_feature(enable = "neon")] +#[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))] +#[cfg_attr(all(test, target_arch = "arm"), assert_instr(vsubl))] +#[cfg_attr(all(test, target_arch = "aarch64"), assert_instr(usubl))] +pub unsafe fn vsubl_u16(a: uint16x4_t, b: uint16x4_t) -> uint32x4_t { + let c: uint32x4_t = simd_cast(a); + let d: uint32x4_t = simd_cast(b); + simd_sub(c, d) +} + +/// Unsigned Subtract Long +#[inline] +#[target_feature(enable = "neon")] +#[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))] +#[cfg_attr(all(test, target_arch = "arm"), assert_instr(vsubl))] +#[cfg_attr(all(test, target_arch = "aarch64"), assert_instr(usubl))] +pub unsafe fn vsubl_u32(a: uint32x2_t, b: uint32x2_t) -> uint64x2_t { + let c: uint64x2_t = simd_cast(a); + let d: uint64x2_t = simd_cast(b); + simd_sub(c, d) +} + /// Maximum (vector) #[inline] #[target_feature(enable = "neon")] @@ -11230,6 +11362,114 @@ mod test { assert_eq!(r, e); } + #[simd_test(enable = "neon")] + unsafe fn test_vsubw_s8() { + let a: i16x8 = i16x8::new(0, 1, 2, 3, 4, 5, 6, 7); + let b: i8x8 = i8x8::new(0, 1, 2, 3, 4, 5, 6, 7); + let e: i16x8 = i16x8::new(0, 0, 0, 0, 0, 0, 0, 0); + let r: i16x8 = transmute(vsubw_s8(transmute(a), transmute(b))); + assert_eq!(r, e); + } + + #[simd_test(enable = "neon")] + unsafe fn test_vsubw_s16() { + let a: i32x4 = i32x4::new(0, 1, 2, 3); + let b: i16x4 = i16x4::new(0, 1, 2, 3); + let e: i32x4 = i32x4::new(0, 0, 0, 0); + let r: i32x4 = transmute(vsubw_s16(transmute(a), transmute(b))); + assert_eq!(r, e); + } + + #[simd_test(enable = "neon")] + unsafe fn test_vsubw_s32() { + let a: i64x2 = i64x2::new(0, 1); + let b: i32x2 = i32x2::new(0, 1); + let e: i64x2 = i64x2::new(0, 0); + let r: i64x2 = transmute(vsubw_s32(transmute(a), transmute(b))); + assert_eq!(r, e); + } + + #[simd_test(enable = "neon")] + unsafe fn test_vsubw_u8() { + let a: u16x8 = u16x8::new(0, 1, 2, 3, 4, 5, 6, 7); + let b: u8x8 = u8x8::new(0, 1, 2, 3, 4, 5, 6, 7); + let e: u16x8 = u16x8::new(0, 0, 0, 0, 0, 0, 0, 0); + let r: u16x8 = transmute(vsubw_u8(transmute(a), transmute(b))); + assert_eq!(r, e); + } + + #[simd_test(enable = "neon")] + unsafe fn test_vsubw_u16() { + let a: u32x4 = u32x4::new(0, 1, 2, 3); + let b: u16x4 = u16x4::new(0, 1, 2, 3); + let e: u32x4 = u32x4::new(0, 0, 0, 0); + let r: u32x4 = transmute(vsubw_u16(transmute(a), transmute(b))); + assert_eq!(r, e); + } + + #[simd_test(enable = "neon")] + unsafe fn test_vsubw_u32() { + let a: u64x2 = u64x2::new(0, 1); + let b: u32x2 = u32x2::new(0, 1); + let e: u64x2 = u64x2::new(0, 0); + let r: u64x2 = transmute(vsubw_u32(transmute(a), transmute(b))); + assert_eq!(r, e); + } + + #[simd_test(enable = "neon")] + unsafe fn test_vsubl_s8() { + let a: i8x8 = i8x8::new(0x7F, -128, 2, 3, 4, 5, 6, 7); + let b: i8x8 = i8x8::new(0x7F, -128, 2, 3, 4, 5, 6, 7); + let e: i16x8 = i16x8::new(0, 0, 0, 0, 0, 0, 0, 0); + let r: i16x8 = transmute(vsubl_s8(transmute(a), transmute(b))); + assert_eq!(r, e); + } + + #[simd_test(enable = "neon")] + unsafe fn test_vsubl_s16() { + let a: i16x4 = i16x4::new(0x7F_FF, -32768, 2, 3); + let b: i16x4 = i16x4::new(0x7F_FF, -32768, 2, 3); + let e: i32x4 = i32x4::new(0, 0, 0, 0); + let r: i32x4 = transmute(vsubl_s16(transmute(a), transmute(b))); + assert_eq!(r, e); + } + + #[simd_test(enable = "neon")] + unsafe fn test_vsubl_s32() { + let a: i32x2 = i32x2::new(0x7F_FF_FF_FF, -2147483648); + let b: i32x2 = i32x2::new(0x7F_FF_FF_FF, -2147483648); + let e: i64x2 = i64x2::new(0, 0); + let r: i64x2 = transmute(vsubl_s32(transmute(a), transmute(b))); + assert_eq!(r, e); + } + + #[simd_test(enable = "neon")] + unsafe fn test_vsubl_u8() { + let a: u8x8 = u8x8::new(0xFF, 0, 2, 3, 4, 5, 6, 7); + let b: u8x8 = u8x8::new(0xFF, 0, 2, 3, 4, 5, 6, 7); + let e: u16x8 = u16x8::new(0, 0, 0, 0, 0, 0, 0, 0); + let r: u16x8 = transmute(vsubl_u8(transmute(a), transmute(b))); + assert_eq!(r, e); + } + + #[simd_test(enable = "neon")] + unsafe fn test_vsubl_u16() { + let a: u16x4 = u16x4::new(0xFF_FF, 0, 2, 3); + let b: u16x4 = u16x4::new(0xFF_FF, 0, 2, 3); + let e: u32x4 = u32x4::new(0, 0, 0, 0); + let r: u32x4 = transmute(vsubl_u16(transmute(a), transmute(b))); + assert_eq!(r, e); + } + + #[simd_test(enable = "neon")] + unsafe fn test_vsubl_u32() { + let a: u32x2 = u32x2::new(0xFF_FF_FF_FF, 0); + let b: u32x2 = u32x2::new(0xFF_FF_FF_FF, 0); + let e: u64x2 = u64x2::new(0, 0); + let r: u64x2 = transmute(vsubl_u32(transmute(a), transmute(b))); + assert_eq!(r, e); + } + #[simd_test(enable = "neon")] unsafe fn test_vmax_s8() { let a: i8x8 = i8x8::new(1, 2, 3, 4, 5, 6, 7, 8); diff --git a/library/stdarch/crates/stdarch-gen/neon.spec b/library/stdarch/crates/stdarch-gen/neon.spec index e9370cb9334c..50a9f02a95bb 100644 --- a/library/stdarch/crates/stdarch-gen/neon.spec +++ b/library/stdarch/crates/stdarch-gen/neon.spec @@ -1483,6 +1483,228 @@ link-arm = vhsubs._EXT_ link-aarch64 = shsub._EXT_ generate int*_t +/// Signed Subtract Wide +name = vsubw +no-q +multi_fn = simd_sub, a, {simd_cast, b} +a = 0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 12, 13, 14, 15, 16 +b = 0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 12, 13, 14, 15, 16 +validate 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0 + +arm = vsubw +aarch64 = ssubw +generate int16x8_t:int8x8_t:int16x8_t, int32x4_t:int16x4_t:int32x4_t, int64x2_t:int32x2_t:int64x2_t + +/// Unsigned Subtract Wide +name = vsubw +no-q +multi_fn = simd_sub, a, {simd_cast, b} +a = 0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 12, 13, 14, 15, 16 +b = 0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 12, 13, 14, 15, 16 +validate 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0 + +arm = vsubw +aarch64 = usubw +generate uint16x8_t:uint8x8_t:uint16x8_t, uint32x4_t:uint16x4_t:uint32x4_t, uint64x2_t:uint32x2_t:uint64x2_t + +/// Signed Subtract Wide +name = vsubw_high +no-q +multi_fn = simd_shuffle8, c:int8x8_t, b, b, [8, 9, 10, 11, 12, 13, 14, 15] +multi_fn = simd_sub, a, {simd_cast, c} +a = 8, 9, 10, 12, 13, 14, 15, 16 +b = 0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 12, 13, 14, 15, 16 +validate 0, 0, 0, 0, 0, 0, 0, 0 + +aarch64 = ssubw +generate int16x8_t:int8x16_t:int16x8_t + +/// Signed Subtract Wide +name = vsubw_high +no-q +multi_fn = simd_shuffle4, c:int16x4_t, b, b, [4, 5, 6, 7] +multi_fn = simd_sub, a, {simd_cast, c} +a = 8, 9, 10, 11 +b = 0, 1, 2, 3, 8, 9, 10, 11 +validate 0, 0, 0, 0 + +aarch64 = ssubw +generate int32x4_t:int16x8_t:int32x4_t + +/// Signed Subtract Wide +name = vsubw_high +no-q +multi_fn = simd_shuffle2, c:int32x2_t, b, b, [2, 3] +multi_fn = simd_sub, a, {simd_cast, c} +a = 8, 9 +b = 6, 7, 8, 9 +validate 0, 0 + +aarch64 = ssubw +generate int64x2_t:int32x4_t:int64x2_t + +/// Unsigned Subtract Wide +name = vsubw_high +no-q +multi_fn = simd_shuffle8, c:uint8x8_t, b, b, [8, 9, 10, 11, 12, 13, 14, 15] +multi_fn = simd_sub, a, {simd_cast, c} +a = 8, 9, 10, 11, 12, 13, 14, 15 +b = 0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15 +validate 0, 0, 0, 0, 0, 0, 0, 0 + +aarch64 = usubw +generate uint16x8_t:uint8x16_t:uint16x8_t + +/// Unsigned Subtract Wide +name = vsubw_high +no-q +multi_fn = simd_shuffle4, c:uint16x4_t, b, b, [4, 5, 6, 7] +multi_fn = simd_sub, a, {simd_cast, c} +a = 8, 9, 10, 11 +b = 0, 1, 2, 3, 8, 9, 10, 11 +validate 0, 0, 0, 0 + +aarch64 = usubw +generate uint32x4_t:uint16x8_t:uint32x4_t + +/// Unsigned Subtract Wide +name = vsubw_high +no-q +multi_fn = simd_shuffle2, c:uint32x2_t, b, b, [2, 3] +multi_fn = simd_sub, a, {simd_cast, c} +a = 8, 9 +b = 6, 7, 8, 9 +validate 0, 0 + +aarch64 = usubw +generate uint64x2_t:uint32x4_t:uint64x2_t + +/// Signed Subtract Long +name = vsubl +no-q +multi_fn = simd_cast, c:out_t, a +multi_fn = simd_cast, d:out_t, b +multi_fn = simd_sub, c, d + +a = MAX, MIN, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15 +b = MAX, MIN, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15 +validate 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0 + +arm = vsubl +aarch64 = ssubl +generate int8x8_t:int8x8_t:int16x8_t, int16x4_t:int16x4_t:int32x4_t, int32x2_t:int32x2_t:int64x2_t + +/// Unsigned Subtract Long +name = vsubl +no-q +multi_fn = simd_cast, c:out_t, a +multi_fn = simd_cast, d:out_t, b +multi_fn = simd_sub, c, d + +a = MAX, MIN, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15 +b = MAX, MIN, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15 +validate 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0 + +arm = vsubl +aarch64 = usubl +generate uint8x8_t:uint8x8_t:uint16x8_t, uint16x4_t:uint16x4_t:uint32x4_t, uint32x2_t:uint32x2_t:uint64x2_t + +/// Signed Subtract Long +name = vsubl_high +no-q +multi_fn = simd_shuffle8, c:int8x8_t, a, a, [8, 9, 10, 11, 12, 13, 14, 15] +multi_fn = simd_cast, d:out_t, c +multi_fn = simd_shuffle8, e:int8x8_t, b, b, [8, 9, 10, 11, 12, 13, 14, 15] +multi_fn = simd_cast, f:out_t, e +multi_fn = simd_sub, d, f + +a = 0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15 +b = 1, 1, 1, 1, 1, 1, 1, 1, 2, 2, 2, 2, 2, 2, 2, 2 +validate 6, 7, 8, 9, 10, 11, 12, 13 + +aarch64 = ssubl +generate int8x16_t:int8x16_t:int16x8_t + +/// Signed Subtract Long +name = vsubl_high +no-q +multi_fn = simd_shuffle4, c:int16x4_t, a, a, [4, 5, 6, 7] +multi_fn = simd_cast, d:out_t, c +multi_fn = simd_shuffle4, e:int16x4_t, b, b, [4, 5, 6, 7] +multi_fn = simd_cast, f:out_t, e +multi_fn = simd_sub, d, f + +a = 8, 9, 10, 11, 12, 13, 14, 15 +b = 6, 6, 6, 6, 8, 8, 8, 8 +validate 4, 5, 6, 7 + +aarch64 = ssubl +generate int16x8_t:int16x8_t:int32x4_t + +/// Signed Subtract Long +name = vsubl_high +no-q +multi_fn = simd_shuffle2, c:int32x2_t, a, a, [2, 3] +multi_fn = simd_cast, d:out_t, c +multi_fn = simd_shuffle2, e:int32x2_t, b, b, [2, 3] +multi_fn = simd_cast, f:out_t, e +multi_fn = simd_sub, d, f + +a = 12, 13, 14, 15 +b = 6, 6, 8, 8 +validate 6, 7 + +aarch64 = ssubl +generate int32x4_t:int32x4_t:int64x2_t + +/// Unsigned Subtract Long +name = vsubl_high +no-q +multi_fn = simd_shuffle8, c:uint8x8_t, a, a, [8, 9, 10, 11, 12, 13, 14, 15] +multi_fn = simd_cast, d:out_t, c +multi_fn = simd_shuffle8, e:uint8x8_t, b, b, [8, 9, 10, 11, 12, 13, 14, 15] +multi_fn = simd_cast, f:out_t, e +multi_fn = simd_sub, d, f + +a = 0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15 +b = 1, 1, 1, 1, 1, 1, 1, 1, 2, 2, 2, 2, 2, 2, 2, 2 +validate 6, 7, 8, 9, 10, 11, 12, 13 + +aarch64 = usubl +generate uint8x16_t:uint8x16_t:uint16x8_t + +/// Unsigned Subtract Long +name = vsubl_high +no-q +multi_fn = simd_shuffle4, c:uint16x4_t, a, a, [4, 5, 6, 7] +multi_fn = simd_cast, d:out_t, c +multi_fn = simd_shuffle4, e:uint16x4_t, b, b, [4, 5, 6, 7] +multi_fn = simd_cast, f:out_t, e +multi_fn = simd_sub, d, f + +a = 8, 9, 10, 11, 12, 13, 14, 15 +b = 6, 6, 6, 6, 8, 8, 8, 8 +validate 4, 5, 6, 7 + +aarch64 = usubl +generate uint16x8_t:uint16x8_t:uint32x4_t + +/// Unsigned Subtract Long +name = vsubl_high +no-q +multi_fn = simd_shuffle2, c:uint32x2_t, a, a, [2, 3] +multi_fn = simd_cast, d:out_t, c +multi_fn = simd_shuffle2, e:uint32x2_t, b, b, [2, 3] +multi_fn = simd_cast, f:out_t, e +multi_fn = simd_sub, d, f + +a = 12, 13, 14, 15 +b = 6, 6, 8, 8 +validate 6, 7 + +aarch64 = usubl +generate uint32x4_t:uint32x4_t:uint64x2_t + /// Maximum (vector) name = vmax a = 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16