From 7b21d85a41587325514f1c1e73bbccffeaaa5f33 Mon Sep 17 00:00:00 2001 From: Sparrow Li Date: Wed, 31 Mar 2021 22:48:58 +0800 Subject: [PATCH] add vmovn_high, vrbit, vrnd, vsubhn neon instructions (#1103) --- .../core_arch/src/aarch64/neon/generated.rs | 802 ++++++++++++++++++ .../core_arch/src/arm/neon/generated.rs | 246 ++++++ .../crates/core_arch/src/arm/neon/mod.rs | 68 ++ library/stdarch/crates/stdarch-gen/neon.spec | 127 ++- .../stdarch/crates/stdarch-gen/src/main.rs | 80 +- 5 files changed, 1309 insertions(+), 14 deletions(-) diff --git a/library/stdarch/crates/core_arch/src/aarch64/neon/generated.rs b/library/stdarch/crates/core_arch/src/aarch64/neon/generated.rs index d23f059c8290..1c7ddff7f924 100644 --- a/library/stdarch/crates/core_arch/src/aarch64/neon/generated.rs +++ b/library/stdarch/crates/core_arch/src/aarch64/neon/generated.rs @@ -1816,6 +1816,60 @@ pub unsafe fn vmlsl_high_u32(a: uint64x2_t, b: uint32x4_t, c: uint32x4_t) -> uin vmlsl_u32(a, b, c) } +/// Extract narrow +#[inline] +#[target_feature(enable = "neon")] +#[cfg_attr(test, assert_instr(xtn2))] +pub unsafe fn vmovn_high_s16(a: int8x8_t, b: int16x8_t) -> int8x16_t { + let c: int8x8_t = simd_cast(b); + simd_shuffle16(a, c, [0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15]) +} + +/// Extract narrow +#[inline] +#[target_feature(enable = "neon")] +#[cfg_attr(test, assert_instr(xtn2))] +pub unsafe fn vmovn_high_s32(a: int16x4_t, b: int32x4_t) -> int16x8_t { + let c: int16x4_t = simd_cast(b); + simd_shuffle8(a, c, [0, 1, 2, 3, 4, 5, 6, 7]) +} + +/// Extract narrow +#[inline] +#[target_feature(enable = "neon")] +#[cfg_attr(test, assert_instr(xtn2))] +pub unsafe fn vmovn_high_s64(a: int32x2_t, b: int64x2_t) -> int32x4_t { + let c: int32x2_t = simd_cast(b); + simd_shuffle4(a, c, [0, 1, 2, 3]) +} + +/// Extract narrow +#[inline] +#[target_feature(enable = "neon")] +#[cfg_attr(test, assert_instr(xtn2))] +pub unsafe fn vmovn_high_u16(a: uint8x8_t, b: uint16x8_t) -> uint8x16_t { + let c: uint8x8_t = simd_cast(b); + simd_shuffle16(a, c, [0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15]) +} + +/// Extract narrow +#[inline] +#[target_feature(enable = "neon")] +#[cfg_attr(test, assert_instr(xtn2))] +pub unsafe fn vmovn_high_u32(a: uint16x4_t, b: uint32x4_t) -> uint16x8_t { + let c: uint16x4_t = simd_cast(b); + simd_shuffle8(a, c, [0, 1, 2, 3, 4, 5, 6, 7]) +} + +/// Extract narrow +#[inline] +#[target_feature(enable = "neon")] +#[cfg_attr(test, assert_instr(xtn2))] +pub unsafe fn vmovn_high_u64(a: uint32x2_t, b: uint64x2_t) -> uint32x4_t { + let c: uint32x2_t = simd_cast(b); + simd_shuffle4(a, c, [0, 1, 2, 3]) +} + /// Negate #[inline] #[target_feature(enable = "neon")] @@ -1874,6 +1928,428 @@ pub unsafe fn vqnegq_s64(a: int64x2_t) -> int64x2_t { vqnegq_s64_(a) } +/// Reverse bit order +#[inline] +#[target_feature(enable = "neon")] +#[cfg_attr(test, assert_instr(rbit))] +pub unsafe fn vrbit_s8(a: int8x8_t) -> int8x8_t { + #[allow(improper_ctypes)] + extern "C" { + #[cfg_attr(target_arch = "aarch64", link_name = "llvm.aarch64.neon.rbit.v8i8")] + fn vrbit_s8_(a: int8x8_t) -> int8x8_t; + } + vrbit_s8_(a) +} + +/// Reverse bit order +#[inline] +#[target_feature(enable = "neon")] +#[cfg_attr(test, assert_instr(rbit))] +pub unsafe fn vrbitq_s8(a: int8x16_t) -> int8x16_t { + #[allow(improper_ctypes)] + extern "C" { + #[cfg_attr(target_arch = "aarch64", link_name = "llvm.aarch64.neon.rbit.v16i8")] + fn vrbitq_s8_(a: int8x16_t) -> int8x16_t; + } + vrbitq_s8_(a) +} + +/// Reverse bit order +#[inline] +#[target_feature(enable = "neon")] +#[cfg_attr(test, assert_instr(rbit))] +pub unsafe fn vrbit_u8(a: uint8x8_t) -> uint8x8_t { + transmute(vrbit_s8(transmute(a))) +} + +/// Reverse bit order +#[inline] +#[target_feature(enable = "neon")] +#[cfg_attr(test, assert_instr(rbit))] +pub unsafe fn vrbitq_u8(a: uint8x16_t) -> uint8x16_t { + transmute(vrbitq_s8(transmute(a))) +} + +/// Reverse bit order +#[inline] +#[target_feature(enable = "neon")] +#[cfg_attr(test, assert_instr(rbit))] +pub unsafe fn vrbit_p8(a: poly8x8_t) -> poly8x8_t { + transmute(vrbit_s8(transmute(a))) +} + +/// Reverse bit order +#[inline] +#[target_feature(enable = "neon")] +#[cfg_attr(test, assert_instr(rbit))] +pub unsafe fn vrbitq_p8(a: poly8x16_t) -> poly8x16_t { + transmute(vrbitq_s8(transmute(a))) +} + +/// Floating-point round to integral exact, using current rounding mode +#[inline] +#[target_feature(enable = "neon")] +#[cfg_attr(test, assert_instr(frintx))] +pub unsafe fn vrndx_f32(a: float32x2_t) -> float32x2_t { + #[allow(improper_ctypes)] + extern "C" { + #[cfg_attr(target_arch = "aarch64", link_name = "llvm.rint.v2f32")] + fn vrndx_f32_(a: float32x2_t) -> float32x2_t; + } + vrndx_f32_(a) +} + +/// Floating-point round to integral exact, using current rounding mode +#[inline] +#[target_feature(enable = "neon")] +#[cfg_attr(test, assert_instr(frintx))] +pub unsafe fn vrndxq_f32(a: float32x4_t) -> float32x4_t { + #[allow(improper_ctypes)] + extern "C" { + #[cfg_attr(target_arch = "aarch64", link_name = "llvm.rint.v4f32")] + fn vrndxq_f32_(a: float32x4_t) -> float32x4_t; + } + vrndxq_f32_(a) +} + +/// Floating-point round to integral exact, using current rounding mode +#[inline] +#[target_feature(enable = "neon")] +#[cfg_attr(test, assert_instr(frintx))] +pub unsafe fn vrndx_f64(a: float64x1_t) -> float64x1_t { + #[allow(improper_ctypes)] + extern "C" { + #[cfg_attr(target_arch = "aarch64", link_name = "llvm.rint.v1f64")] + fn vrndx_f64_(a: float64x1_t) -> float64x1_t; + } + vrndx_f64_(a) +} + +/// Floating-point round to integral exact, using current rounding mode +#[inline] +#[target_feature(enable = "neon")] +#[cfg_attr(test, assert_instr(frintx))] +pub unsafe fn vrndxq_f64(a: float64x2_t) -> float64x2_t { + #[allow(improper_ctypes)] + extern "C" { + #[cfg_attr(target_arch = "aarch64", link_name = "llvm.rint.v2f64")] + fn vrndxq_f64_(a: float64x2_t) -> float64x2_t; + } + vrndxq_f64_(a) +} + +/// Floating-point round to integral, to nearest with ties to away +#[inline] +#[target_feature(enable = "neon")] +#[cfg_attr(test, assert_instr(frinta))] +pub unsafe fn vrnda_f32(a: float32x2_t) -> float32x2_t { + #[allow(improper_ctypes)] + extern "C" { + #[cfg_attr(target_arch = "aarch64", link_name = "llvm.round.v2f32")] + fn vrnda_f32_(a: float32x2_t) -> float32x2_t; + } + vrnda_f32_(a) +} + +/// Floating-point round to integral, to nearest with ties to away +#[inline] +#[target_feature(enable = "neon")] +#[cfg_attr(test, assert_instr(frinta))] +pub unsafe fn vrndaq_f32(a: float32x4_t) -> float32x4_t { + #[allow(improper_ctypes)] + extern "C" { + #[cfg_attr(target_arch = "aarch64", link_name = "llvm.round.v4f32")] + fn vrndaq_f32_(a: float32x4_t) -> float32x4_t; + } + vrndaq_f32_(a) +} + +/// Floating-point round to integral, to nearest with ties to away +#[inline] +#[target_feature(enable = "neon")] +#[cfg_attr(test, assert_instr(frinta))] +pub unsafe fn vrnda_f64(a: float64x1_t) -> float64x1_t { + #[allow(improper_ctypes)] + extern "C" { + #[cfg_attr(target_arch = "aarch64", link_name = "llvm.round.v1f64")] + fn vrnda_f64_(a: float64x1_t) -> float64x1_t; + } + vrnda_f64_(a) +} + +/// Floating-point round to integral, to nearest with ties to away +#[inline] +#[target_feature(enable = "neon")] +#[cfg_attr(test, assert_instr(frinta))] +pub unsafe fn vrndaq_f64(a: float64x2_t) -> float64x2_t { + #[allow(improper_ctypes)] + extern "C" { + #[cfg_attr(target_arch = "aarch64", link_name = "llvm.round.v2f64")] + fn vrndaq_f64_(a: float64x2_t) -> float64x2_t; + } + vrndaq_f64_(a) +} + +/// Floating-point round to integral, to nearest with ties to even +#[inline] +#[target_feature(enable = "neon")] +#[cfg_attr(test, assert_instr(frintn))] +pub unsafe fn vrndn_f32(a: float32x2_t) -> float32x2_t { + #[allow(improper_ctypes)] + extern "C" { + #[cfg_attr(target_arch = "aarch64", link_name = "llvm.aarch64.neon.frintn.v2f32")] + fn vrndn_f32_(a: float32x2_t) -> float32x2_t; + } + vrndn_f32_(a) +} + +/// Floating-point round to integral, to nearest with ties to even +#[inline] +#[target_feature(enable = "neon")] +#[cfg_attr(test, assert_instr(frintn))] +pub unsafe fn vrndnq_f32(a: float32x4_t) -> float32x4_t { + #[allow(improper_ctypes)] + extern "C" { + #[cfg_attr(target_arch = "aarch64", link_name = "llvm.aarch64.neon.frintn.v4f32")] + fn vrndnq_f32_(a: float32x4_t) -> float32x4_t; + } + vrndnq_f32_(a) +} + +/// Floating-point round to integral, to nearest with ties to even +#[inline] +#[target_feature(enable = "neon")] +#[cfg_attr(test, assert_instr(frintn))] +pub unsafe fn vrndn_f64(a: float64x1_t) -> float64x1_t { + #[allow(improper_ctypes)] + extern "C" { + #[cfg_attr(target_arch = "aarch64", link_name = "llvm.aarch64.neon.frintn.v1f64")] + fn vrndn_f64_(a: float64x1_t) -> float64x1_t; + } + vrndn_f64_(a) +} + +/// Floating-point round to integral, to nearest with ties to even +#[inline] +#[target_feature(enable = "neon")] +#[cfg_attr(test, assert_instr(frintn))] +pub unsafe fn vrndnq_f64(a: float64x2_t) -> float64x2_t { + #[allow(improper_ctypes)] + extern "C" { + #[cfg_attr(target_arch = "aarch64", link_name = "llvm.aarch64.neon.frintn.v2f64")] + fn vrndnq_f64_(a: float64x2_t) -> float64x2_t; + } + vrndnq_f64_(a) +} + +/// Floating-point round to integral, toward minus infinity +#[inline] +#[target_feature(enable = "neon")] +#[cfg_attr(test, assert_instr(frintm))] +pub unsafe fn vrndm_f32(a: float32x2_t) -> float32x2_t { + #[allow(improper_ctypes)] + extern "C" { + #[cfg_attr(target_arch = "aarch64", link_name = "llvm.floor.v2f32")] + fn vrndm_f32_(a: float32x2_t) -> float32x2_t; + } + vrndm_f32_(a) +} + +/// Floating-point round to integral, toward minus infinity +#[inline] +#[target_feature(enable = "neon")] +#[cfg_attr(test, assert_instr(frintm))] +pub unsafe fn vrndmq_f32(a: float32x4_t) -> float32x4_t { + #[allow(improper_ctypes)] + extern "C" { + #[cfg_attr(target_arch = "aarch64", link_name = "llvm.floor.v4f32")] + fn vrndmq_f32_(a: float32x4_t) -> float32x4_t; + } + vrndmq_f32_(a) +} + +/// Floating-point round to integral, toward minus infinity +#[inline] +#[target_feature(enable = "neon")] +#[cfg_attr(test, assert_instr(frintm))] +pub unsafe fn vrndm_f64(a: float64x1_t) -> float64x1_t { + #[allow(improper_ctypes)] + extern "C" { + #[cfg_attr(target_arch = "aarch64", link_name = "llvm.floor.v1f64")] + fn vrndm_f64_(a: float64x1_t) -> float64x1_t; + } + vrndm_f64_(a) +} + +/// Floating-point round to integral, toward minus infinity +#[inline] +#[target_feature(enable = "neon")] +#[cfg_attr(test, assert_instr(frintm))] +pub unsafe fn vrndmq_f64(a: float64x2_t) -> float64x2_t { + #[allow(improper_ctypes)] + extern "C" { + #[cfg_attr(target_arch = "aarch64", link_name = "llvm.floor.v2f64")] + fn vrndmq_f64_(a: float64x2_t) -> float64x2_t; + } + vrndmq_f64_(a) +} + +/// Floating-point round to integral, toward plus infinity +#[inline] +#[target_feature(enable = "neon")] +#[cfg_attr(test, assert_instr(frintp))] +pub unsafe fn vrndp_f32(a: float32x2_t) -> float32x2_t { + #[allow(improper_ctypes)] + extern "C" { + #[cfg_attr(target_arch = "aarch64", link_name = "llvm.ceil.v2f32")] + fn vrndp_f32_(a: float32x2_t) -> float32x2_t; + } + vrndp_f32_(a) +} + +/// Floating-point round to integral, toward plus infinity +#[inline] +#[target_feature(enable = "neon")] +#[cfg_attr(test, assert_instr(frintp))] +pub unsafe fn vrndpq_f32(a: float32x4_t) -> float32x4_t { + #[allow(improper_ctypes)] + extern "C" { + #[cfg_attr(target_arch = "aarch64", link_name = "llvm.ceil.v4f32")] + fn vrndpq_f32_(a: float32x4_t) -> float32x4_t; + } + vrndpq_f32_(a) +} + +/// Floating-point round to integral, toward plus infinity +#[inline] +#[target_feature(enable = "neon")] +#[cfg_attr(test, assert_instr(frintp))] +pub unsafe fn vrndp_f64(a: float64x1_t) -> float64x1_t { + #[allow(improper_ctypes)] + extern "C" { + #[cfg_attr(target_arch = "aarch64", link_name = "llvm.ceil.v1f64")] + fn vrndp_f64_(a: float64x1_t) -> float64x1_t; + } + vrndp_f64_(a) +} + +/// Floating-point round to integral, toward plus infinity +#[inline] +#[target_feature(enable = "neon")] +#[cfg_attr(test, assert_instr(frintp))] +pub unsafe fn vrndpq_f64(a: float64x2_t) -> float64x2_t { + #[allow(improper_ctypes)] + extern "C" { + #[cfg_attr(target_arch = "aarch64", link_name = "llvm.ceil.v2f64")] + fn vrndpq_f64_(a: float64x2_t) -> float64x2_t; + } + vrndpq_f64_(a) +} + +/// Floating-point round to integral, toward zero +#[inline] +#[target_feature(enable = "neon")] +#[cfg_attr(test, assert_instr(frintz))] +pub unsafe fn vrnd_f32(a: float32x2_t) -> float32x2_t { + #[allow(improper_ctypes)] + extern "C" { + #[cfg_attr(target_arch = "aarch64", link_name = "llvm.trunc.v2f32")] + fn vrnd_f32_(a: float32x2_t) -> float32x2_t; + } + vrnd_f32_(a) +} + +/// Floating-point round to integral, toward zero +#[inline] +#[target_feature(enable = "neon")] +#[cfg_attr(test, assert_instr(frintz))] +pub unsafe fn vrndq_f32(a: float32x4_t) -> float32x4_t { + #[allow(improper_ctypes)] + extern "C" { + #[cfg_attr(target_arch = "aarch64", link_name = "llvm.trunc.v4f32")] + fn vrndq_f32_(a: float32x4_t) -> float32x4_t; + } + vrndq_f32_(a) +} + +/// Floating-point round to integral, toward zero +#[inline] +#[target_feature(enable = "neon")] +#[cfg_attr(test, assert_instr(frintz))] +pub unsafe fn vrnd_f64(a: float64x1_t) -> float64x1_t { + #[allow(improper_ctypes)] + extern "C" { + #[cfg_attr(target_arch = "aarch64", link_name = "llvm.trunc.v1f64")] + fn vrnd_f64_(a: float64x1_t) -> float64x1_t; + } + vrnd_f64_(a) +} + +/// Floating-point round to integral, toward zero +#[inline] +#[target_feature(enable = "neon")] +#[cfg_attr(test, assert_instr(frintz))] +pub unsafe fn vrndq_f64(a: float64x2_t) -> float64x2_t { + #[allow(improper_ctypes)] + extern "C" { + #[cfg_attr(target_arch = "aarch64", link_name = "llvm.trunc.v2f64")] + fn vrndq_f64_(a: float64x2_t) -> float64x2_t; + } + vrndq_f64_(a) +} + +/// Floating-point round to integral, using current rounding mode +#[inline] +#[target_feature(enable = "neon")] +#[cfg_attr(test, assert_instr(frinti))] +pub unsafe fn vrndi_f32(a: float32x2_t) -> float32x2_t { + #[allow(improper_ctypes)] + extern "C" { + #[cfg_attr(target_arch = "aarch64", link_name = "llvm.nearbyint.v2f32")] + fn vrndi_f32_(a: float32x2_t) -> float32x2_t; + } + vrndi_f32_(a) +} + +/// Floating-point round to integral, using current rounding mode +#[inline] +#[target_feature(enable = "neon")] +#[cfg_attr(test, assert_instr(frinti))] +pub unsafe fn vrndiq_f32(a: float32x4_t) -> float32x4_t { + #[allow(improper_ctypes)] + extern "C" { + #[cfg_attr(target_arch = "aarch64", link_name = "llvm.nearbyint.v4f32")] + fn vrndiq_f32_(a: float32x4_t) -> float32x4_t; + } + vrndiq_f32_(a) +} + +/// Floating-point round to integral, using current rounding mode +#[inline] +#[target_feature(enable = "neon")] +#[cfg_attr(test, assert_instr(frinti))] +pub unsafe fn vrndi_f64(a: float64x1_t) -> float64x1_t { + #[allow(improper_ctypes)] + extern "C" { + #[cfg_attr(target_arch = "aarch64", link_name = "llvm.nearbyint.v1f64")] + fn vrndi_f64_(a: float64x1_t) -> float64x1_t; + } + vrndi_f64_(a) +} + +/// Floating-point round to integral, using current rounding mode +#[inline] +#[target_feature(enable = "neon")] +#[cfg_attr(test, assert_instr(frinti))] +pub unsafe fn vrndiq_f64(a: float64x2_t) -> float64x2_t { + #[allow(improper_ctypes)] + extern "C" { + #[cfg_attr(target_arch = "aarch64", link_name = "llvm.nearbyint.v2f64")] + fn vrndiq_f64_(a: float64x2_t) -> float64x2_t; + } + vrndiq_f64_(a) +} + /// Multiply #[inline] #[target_feature(enable = "neon")] @@ -5298,6 +5774,60 @@ mod test { assert_eq!(r, e); } + #[simd_test(enable = "neon")] + unsafe fn test_vmovn_high_s16() { + let a: i8x8 = i8x8::new(0, 1, 2, 3, 2, 3, 4, 5); + let b: i16x8 = i16x8::new(2, 3, 4, 5, 12, 13, 14, 15); + let e: i8x16 = i8x16::new(0, 1, 2, 3, 2, 3, 4, 5, 2, 3, 4, 5, 12, 13, 14, 15); + let r: i8x16 = transmute(vmovn_high_s16(transmute(a), transmute(b))); + assert_eq!(r, e); + } + + #[simd_test(enable = "neon")] + unsafe fn test_vmovn_high_s32() { + let a: i16x4 = i16x4::new(0, 1, 2, 3); + let b: i32x4 = i32x4::new(2, 3, 4, 5); + let e: i16x8 = i16x8::new(0, 1, 2, 3, 2, 3, 4, 5); + let r: i16x8 = transmute(vmovn_high_s32(transmute(a), transmute(b))); + assert_eq!(r, e); + } + + #[simd_test(enable = "neon")] + unsafe fn test_vmovn_high_s64() { + let a: i32x2 = i32x2::new(0, 1); + let b: i64x2 = i64x2::new(2, 3); + let e: i32x4 = i32x4::new(0, 1, 2, 3); + let r: i32x4 = transmute(vmovn_high_s64(transmute(a), transmute(b))); + assert_eq!(r, e); + } + + #[simd_test(enable = "neon")] + unsafe fn test_vmovn_high_u16() { + let a: u8x8 = u8x8::new(0, 1, 2, 3, 2, 3, 4, 5); + let b: u16x8 = u16x8::new(2, 3, 4, 5, 12, 13, 14, 15); + let e: u8x16 = u8x16::new(0, 1, 2, 3, 2, 3, 4, 5, 2, 3, 4, 5, 12, 13, 14, 15); + let r: u8x16 = transmute(vmovn_high_u16(transmute(a), transmute(b))); + assert_eq!(r, e); + } + + #[simd_test(enable = "neon")] + unsafe fn test_vmovn_high_u32() { + let a: u16x4 = u16x4::new(0, 1, 2, 3); + let b: u32x4 = u32x4::new(2, 3, 4, 5); + let e: u16x8 = u16x8::new(0, 1, 2, 3, 2, 3, 4, 5); + let r: u16x8 = transmute(vmovn_high_u32(transmute(a), transmute(b))); + assert_eq!(r, e); + } + + #[simd_test(enable = "neon")] + unsafe fn test_vmovn_high_u64() { + let a: u32x2 = u32x2::new(0, 1); + let b: u64x2 = u64x2::new(2, 3); + let e: u32x4 = u32x4::new(0, 1, 2, 3); + let r: u32x4 = transmute(vmovn_high_u64(transmute(a), transmute(b))); + assert_eq!(r, e); + } + #[simd_test(enable = "neon")] unsafe fn test_vneg_s64() { let a: i64x1 = i64x1::new(0); @@ -5346,6 +5876,278 @@ mod test { assert_eq!(r, e); } + #[simd_test(enable = "neon")] + unsafe fn test_vrbit_s8() { + let a: i8x8 = i8x8::new(0, 2, 4, 6, 8, 10, 12, 14); + let e: i8x8 = i8x8::new(0, 64, 32, 96, 16, 80, 48, 112); + let r: i8x8 = transmute(vrbit_s8(transmute(a))); + assert_eq!(r, e); + } + + #[simd_test(enable = "neon")] + unsafe fn test_vrbitq_s8() { + let a: i8x16 = i8x16::new(0, 2, 4, 6, 8, 10, 12, 14, 16, 18, 20, 22, 24, 26, 28, 30); + let e: i8x16 = i8x16::new(0, 64, 32, 96, 16, 80, 48, 112, 8, 72, 40, 104, 24, 88, 56, 120); + let r: i8x16 = transmute(vrbitq_s8(transmute(a))); + assert_eq!(r, e); + } + + #[simd_test(enable = "neon")] + unsafe fn test_vrbit_u8() { + let a: u8x8 = u8x8::new(0, 2, 4, 6, 8, 10, 12, 14); + let e: u8x8 = u8x8::new(0, 64, 32, 96, 16, 80, 48, 112); + let r: u8x8 = transmute(vrbit_u8(transmute(a))); + assert_eq!(r, e); + } + + #[simd_test(enable = "neon")] + unsafe fn test_vrbitq_u8() { + let a: u8x16 = u8x16::new(0, 2, 4, 6, 8, 10, 12, 14, 16, 18, 20, 22, 24, 26, 28, 30); + let e: u8x16 = u8x16::new(0, 64, 32, 96, 16, 80, 48, 112, 8, 72, 40, 104, 24, 88, 56, 120); + let r: u8x16 = transmute(vrbitq_u8(transmute(a))); + assert_eq!(r, e); + } + + #[simd_test(enable = "neon")] + unsafe fn test_vrbit_p8() { + let a: i8x8 = i8x8::new(0, 2, 4, 6, 8, 10, 12, 14); + let e: i8x8 = i8x8::new(0, 64, 32, 96, 16, 80, 48, 112); + let r: i8x8 = transmute(vrbit_p8(transmute(a))); + assert_eq!(r, e); + } + + #[simd_test(enable = "neon")] + unsafe fn test_vrbitq_p8() { + let a: i8x16 = i8x16::new(0, 2, 4, 6, 8, 10, 12, 14, 16, 18, 20, 22, 24, 26, 28, 30); + let e: i8x16 = i8x16::new(0, 64, 32, 96, 16, 80, 48, 112, 8, 72, 40, 104, 24, 88, 56, 120); + let r: i8x16 = transmute(vrbitq_p8(transmute(a))); + assert_eq!(r, e); + } + + #[simd_test(enable = "neon")] + unsafe fn test_vrndx_f32() { + let a: f32x2 = f32x2::new(-1.5, 0.5); + let e: f32x2 = f32x2::new(-2.0, 0.0); + let r: f32x2 = transmute(vrndx_f32(transmute(a))); + assert_eq!(r, e); + } + + #[simd_test(enable = "neon")] + unsafe fn test_vrndxq_f32() { + let a: f32x4 = f32x4::new(-1.5, 0.5, 1.5, 2.5); + let e: f32x4 = f32x4::new(-2.0, 0.0, 2.0, 2.0); + let r: f32x4 = transmute(vrndxq_f32(transmute(a))); + assert_eq!(r, e); + } + + #[simd_test(enable = "neon")] + unsafe fn test_vrndx_f64() { + let a: f64 = -1.5; + let e: f64 = -2.0; + let r: f64 = transmute(vrndx_f64(transmute(a))); + assert_eq!(r, e); + } + + #[simd_test(enable = "neon")] + unsafe fn test_vrndxq_f64() { + let a: f64x2 = f64x2::new(-1.5, 0.5); + let e: f64x2 = f64x2::new(-2.0, 0.0); + let r: f64x2 = transmute(vrndxq_f64(transmute(a))); + assert_eq!(r, e); + } + + #[simd_test(enable = "neon")] + unsafe fn test_vrnda_f32() { + let a: f32x2 = f32x2::new(-1.5, 0.5); + let e: f32x2 = f32x2::new(-2.0, 1.0); + let r: f32x2 = transmute(vrnda_f32(transmute(a))); + assert_eq!(r, e); + } + + #[simd_test(enable = "neon")] + unsafe fn test_vrndaq_f32() { + let a: f32x4 = f32x4::new(-1.5, 0.5, 1.5, 2.5); + let e: f32x4 = f32x4::new(-2.0, 1.0, 2.0, 3.0); + let r: f32x4 = transmute(vrndaq_f32(transmute(a))); + assert_eq!(r, e); + } + + #[simd_test(enable = "neon")] + unsafe fn test_vrnda_f64() { + let a: f64 = -1.5; + let e: f64 = -2.0; + let r: f64 = transmute(vrnda_f64(transmute(a))); + assert_eq!(r, e); + } + + #[simd_test(enable = "neon")] + unsafe fn test_vrndaq_f64() { + let a: f64x2 = f64x2::new(-1.5, 0.5); + let e: f64x2 = f64x2::new(-2.0, 1.0); + let r: f64x2 = transmute(vrndaq_f64(transmute(a))); + assert_eq!(r, e); + } + + #[simd_test(enable = "neon")] + unsafe fn test_vrndn_f32() { + let a: f32x2 = f32x2::new(-1.5, 0.5); + let e: f32x2 = f32x2::new(-2.0, 0.0); + let r: f32x2 = transmute(vrndn_f32(transmute(a))); + assert_eq!(r, e); + } + + #[simd_test(enable = "neon")] + unsafe fn test_vrndnq_f32() { + let a: f32x4 = f32x4::new(-1.5, 0.5, 1.5, 2.5); + let e: f32x4 = f32x4::new(-2.0, 0.0, 2.0, 2.0); + let r: f32x4 = transmute(vrndnq_f32(transmute(a))); + assert_eq!(r, e); + } + + #[simd_test(enable = "neon")] + unsafe fn test_vrndn_f64() { + let a: f64 = -1.5; + let e: f64 = -2.0; + let r: f64 = transmute(vrndn_f64(transmute(a))); + assert_eq!(r, e); + } + + #[simd_test(enable = "neon")] + unsafe fn test_vrndnq_f64() { + let a: f64x2 = f64x2::new(-1.5, 0.5); + let e: f64x2 = f64x2::new(-2.0, 0.0); + let r: f64x2 = transmute(vrndnq_f64(transmute(a))); + assert_eq!(r, e); + } + + #[simd_test(enable = "neon")] + unsafe fn test_vrndm_f32() { + let a: f32x2 = f32x2::new(-1.5, 0.5); + let e: f32x2 = f32x2::new(-2.0, 0.0); + let r: f32x2 = transmute(vrndm_f32(transmute(a))); + assert_eq!(r, e); + } + + #[simd_test(enable = "neon")] + unsafe fn test_vrndmq_f32() { + let a: f32x4 = f32x4::new(-1.5, 0.5, 1.5, 2.5); + let e: f32x4 = f32x4::new(-2.0, 0.0, 1.0, 2.0); + let r: f32x4 = transmute(vrndmq_f32(transmute(a))); + assert_eq!(r, e); + } + + #[simd_test(enable = "neon")] + unsafe fn test_vrndm_f64() { + let a: f64 = -1.5; + let e: f64 = -2.0; + let r: f64 = transmute(vrndm_f64(transmute(a))); + assert_eq!(r, e); + } + + #[simd_test(enable = "neon")] + unsafe fn test_vrndmq_f64() { + let a: f64x2 = f64x2::new(-1.5, 0.5); + let e: f64x2 = f64x2::new(-2.0, 0.0); + let r: f64x2 = transmute(vrndmq_f64(transmute(a))); + assert_eq!(r, e); + } + + #[simd_test(enable = "neon")] + unsafe fn test_vrndp_f32() { + let a: f32x2 = f32x2::new(-1.5, 0.5); + let e: f32x2 = f32x2::new(-1.0, 1.0); + let r: f32x2 = transmute(vrndp_f32(transmute(a))); + assert_eq!(r, e); + } + + #[simd_test(enable = "neon")] + unsafe fn test_vrndpq_f32() { + let a: f32x4 = f32x4::new(-1.5, 0.5, 1.5, 2.5); + let e: f32x4 = f32x4::new(-1.0, 1.0, 2.0, 3.0); + let r: f32x4 = transmute(vrndpq_f32(transmute(a))); + assert_eq!(r, e); + } + + #[simd_test(enable = "neon")] + unsafe fn test_vrndp_f64() { + let a: f64 = -1.5; + let e: f64 = -1.0; + let r: f64 = transmute(vrndp_f64(transmute(a))); + assert_eq!(r, e); + } + + #[simd_test(enable = "neon")] + unsafe fn test_vrndpq_f64() { + let a: f64x2 = f64x2::new(-1.5, 0.5); + let e: f64x2 = f64x2::new(-1.0, 1.0); + let r: f64x2 = transmute(vrndpq_f64(transmute(a))); + assert_eq!(r, e); + } + + #[simd_test(enable = "neon")] + unsafe fn test_vrnd_f32() { + let a: f32x2 = f32x2::new(-1.5, 0.5); + let e: f32x2 = f32x2::new(-1.0, 0.0); + let r: f32x2 = transmute(vrnd_f32(transmute(a))); + assert_eq!(r, e); + } + + #[simd_test(enable = "neon")] + unsafe fn test_vrndq_f32() { + let a: f32x4 = f32x4::new(-1.5, 0.5, 1.5, 2.5); + let e: f32x4 = f32x4::new(-1.0, 0.0, 1.0, 2.0); + let r: f32x4 = transmute(vrndq_f32(transmute(a))); + assert_eq!(r, e); + } + + #[simd_test(enable = "neon")] + unsafe fn test_vrnd_f64() { + let a: f64 = -1.5; + let e: f64 = -1.0; + let r: f64 = transmute(vrnd_f64(transmute(a))); + assert_eq!(r, e); + } + + #[simd_test(enable = "neon")] + unsafe fn test_vrndq_f64() { + let a: f64x2 = f64x2::new(-1.5, 0.5); + let e: f64x2 = f64x2::new(-1.0, 0.0); + let r: f64x2 = transmute(vrndq_f64(transmute(a))); + assert_eq!(r, e); + } + + #[simd_test(enable = "neon")] + unsafe fn test_vrndi_f32() { + let a: f32x2 = f32x2::new(-1.5, 0.5); + let e: f32x2 = f32x2::new(-2.0, 0.0); + let r: f32x2 = transmute(vrndi_f32(transmute(a))); + assert_eq!(r, e); + } + + #[simd_test(enable = "neon")] + unsafe fn test_vrndiq_f32() { + let a: f32x4 = f32x4::new(-1.5, 0.5, 1.5, 2.5); + let e: f32x4 = f32x4::new(-2.0, 0.0, 2.0, 2.0); + let r: f32x4 = transmute(vrndiq_f32(transmute(a))); + assert_eq!(r, e); + } + + #[simd_test(enable = "neon")] + unsafe fn test_vrndi_f64() { + let a: f64 = -1.5; + let e: f64 = -2.0; + let r: f64 = transmute(vrndi_f64(transmute(a))); + assert_eq!(r, e); + } + + #[simd_test(enable = "neon")] + unsafe fn test_vrndiq_f64() { + let a: f64x2 = f64x2::new(-1.5, 0.5); + let e: f64x2 = f64x2::new(-2.0, 0.0); + let r: f64x2 = transmute(vrndiq_f64(transmute(a))); + assert_eq!(r, e); + } + #[simd_test(enable = "neon")] unsafe fn test_vmul_f64() { let a: f64 = 1.0; diff --git a/library/stdarch/crates/core_arch/src/arm/neon/generated.rs b/library/stdarch/crates/core_arch/src/arm/neon/generated.rs index 3a918ea8bd47..f01a24435480 100644 --- a/library/stdarch/crates/core_arch/src/arm/neon/generated.rs +++ b/library/stdarch/crates/core_arch/src/arm/neon/generated.rs @@ -3948,6 +3948,138 @@ pub unsafe fn vsubq_f32(a: float32x4_t, b: float32x4_t) -> float32x4_t { simd_sub(a, b) } +/// Subtract returning high narrow +#[inline] +#[target_feature(enable = "neon")] +#[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))] +#[cfg_attr(all(test, target_arch = "arm"), assert_instr(vsubhn))] +#[cfg_attr(all(test, target_arch = "aarch64"), assert_instr(subhn))] +pub unsafe fn vsubhn_s16(a: int16x8_t, b: int16x8_t) -> int8x8_t { + let c: i16x8 = i16x8::new(8, 8, 8, 8, 8, 8, 8, 8); + simd_cast(simd_shr(simd_sub(a, b), transmute(c))) +} + +/// Subtract returning high narrow +#[inline] +#[target_feature(enable = "neon")] +#[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))] +#[cfg_attr(all(test, target_arch = "arm"), assert_instr(vsubhn))] +#[cfg_attr(all(test, target_arch = "aarch64"), assert_instr(subhn))] +pub unsafe fn vsubhn_s32(a: int32x4_t, b: int32x4_t) -> int16x4_t { + let c: i32x4 = i32x4::new(16, 16, 16, 16); + simd_cast(simd_shr(simd_sub(a, b), transmute(c))) +} + +/// Subtract returning high narrow +#[inline] +#[target_feature(enable = "neon")] +#[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))] +#[cfg_attr(all(test, target_arch = "arm"), assert_instr(vsubhn))] +#[cfg_attr(all(test, target_arch = "aarch64"), assert_instr(subhn))] +pub unsafe fn vsubhn_s64(a: int64x2_t, b: int64x2_t) -> int32x2_t { + let c: i64x2 = i64x2::new(32, 32); + simd_cast(simd_shr(simd_sub(a, b), transmute(c))) +} + +/// Subtract returning high narrow +#[inline] +#[target_feature(enable = "neon")] +#[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))] +#[cfg_attr(all(test, target_arch = "arm"), assert_instr(vsubhn))] +#[cfg_attr(all(test, target_arch = "aarch64"), assert_instr(subhn))] +pub unsafe fn vsubhn_u16(a: uint16x8_t, b: uint16x8_t) -> uint8x8_t { + let c: u16x8 = u16x8::new(8, 8, 8, 8, 8, 8, 8, 8); + simd_cast(simd_shr(simd_sub(a, b), transmute(c))) +} + +/// Subtract returning high narrow +#[inline] +#[target_feature(enable = "neon")] +#[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))] +#[cfg_attr(all(test, target_arch = "arm"), assert_instr(vsubhn))] +#[cfg_attr(all(test, target_arch = "aarch64"), assert_instr(subhn))] +pub unsafe fn vsubhn_u32(a: uint32x4_t, b: uint32x4_t) -> uint16x4_t { + let c: u32x4 = u32x4::new(16, 16, 16, 16); + simd_cast(simd_shr(simd_sub(a, b), transmute(c))) +} + +/// Subtract returning high narrow +#[inline] +#[target_feature(enable = "neon")] +#[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))] +#[cfg_attr(all(test, target_arch = "arm"), assert_instr(vsubhn))] +#[cfg_attr(all(test, target_arch = "aarch64"), assert_instr(subhn))] +pub unsafe fn vsubhn_u64(a: uint64x2_t, b: uint64x2_t) -> uint32x2_t { + let c: u64x2 = u64x2::new(32, 32); + simd_cast(simd_shr(simd_sub(a, b), transmute(c))) +} + +/// Subtract returning high narrow +#[inline] +#[target_feature(enable = "neon")] +#[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))] +#[cfg_attr(all(test, target_arch = "arm"), assert_instr(vsubhn))] +#[cfg_attr(all(test, target_arch = "aarch64"), assert_instr(subhn2))] +pub unsafe fn vsubhn_high_s16(a: int8x8_t, b: int16x8_t, c: int16x8_t) -> int8x16_t { + let d: int8x8_t = vsubhn_s16(b, c); + simd_shuffle16(a, d, [0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15]) +} + +/// Subtract returning high narrow +#[inline] +#[target_feature(enable = "neon")] +#[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))] +#[cfg_attr(all(test, target_arch = "arm"), assert_instr(vsubhn))] +#[cfg_attr(all(test, target_arch = "aarch64"), assert_instr(subhn2))] +pub unsafe fn vsubhn_high_s32(a: int16x4_t, b: int32x4_t, c: int32x4_t) -> int16x8_t { + let d: int16x4_t = vsubhn_s32(b, c); + simd_shuffle8(a, d, [0, 1, 2, 3, 4, 5, 6, 7]) +} + +/// Subtract returning high narrow +#[inline] +#[target_feature(enable = "neon")] +#[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))] +#[cfg_attr(all(test, target_arch = "arm"), assert_instr(vsubhn))] +#[cfg_attr(all(test, target_arch = "aarch64"), assert_instr(subhn2))] +pub unsafe fn vsubhn_high_s64(a: int32x2_t, b: int64x2_t, c: int64x2_t) -> int32x4_t { + let d: int32x2_t = vsubhn_s64(b, c); + simd_shuffle4(a, d, [0, 1, 2, 3]) +} + +/// Subtract returning high narrow +#[inline] +#[target_feature(enable = "neon")] +#[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))] +#[cfg_attr(all(test, target_arch = "arm"), assert_instr(vsubhn))] +#[cfg_attr(all(test, target_arch = "aarch64"), assert_instr(subhn2))] +pub unsafe fn vsubhn_high_u16(a: uint8x8_t, b: uint16x8_t, c: uint16x8_t) -> uint8x16_t { + let d: uint8x8_t = vsubhn_u16(b, c); + simd_shuffle16(a, d, [0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15]) +} + +/// Subtract returning high narrow +#[inline] +#[target_feature(enable = "neon")] +#[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))] +#[cfg_attr(all(test, target_arch = "arm"), assert_instr(vsubhn))] +#[cfg_attr(all(test, target_arch = "aarch64"), assert_instr(subhn2))] +pub unsafe fn vsubhn_high_u32(a: uint16x4_t, b: uint32x4_t, c: uint32x4_t) -> uint16x8_t { + let d: uint16x4_t = vsubhn_u32(b, c); + simd_shuffle8(a, d, [0, 1, 2, 3, 4, 5, 6, 7]) +} + +/// Subtract returning high narrow +#[inline] +#[target_feature(enable = "neon")] +#[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))] +#[cfg_attr(all(test, target_arch = "arm"), assert_instr(vsubhn))] +#[cfg_attr(all(test, target_arch = "aarch64"), assert_instr(subhn2))] +pub unsafe fn vsubhn_high_u64(a: uint32x2_t, b: uint64x2_t, c: uint64x2_t) -> uint32x4_t { + let d: uint32x2_t = vsubhn_u64(b, c); + simd_shuffle4(a, d, [0, 1, 2, 3]) +} + /// Signed halving subtract #[inline] #[target_feature(enable = "neon")] @@ -10222,6 +10354,120 @@ mod test { assert_eq!(r, e); } + #[simd_test(enable = "neon")] + unsafe fn test_vsubhn_s16() { + let a: i16x8 = i16x8::new(0x7F_FF, -32768, 1, 1, 0x7F_FF, -32768, 1, 1); + let b: i16x8 = i16x8::new(1, 0, 0, 0, 1, 0, 0, 0); + let e: i8x8 = i8x8::new(0x7F, -128, 0, 0, 0x7F, -128, 0, 0); + let r: i8x8 = transmute(vsubhn_s16(transmute(a), transmute(b))); + assert_eq!(r, e); + } + + #[simd_test(enable = "neon")] + unsafe fn test_vsubhn_s32() { + let a: i32x4 = i32x4::new(0x7F_FF_FF_FF, -2147483648, 1, 1); + let b: i32x4 = i32x4::new(1, 0, 0, 0); + let e: i16x4 = i16x4::new(0x7F_FF, -32768, 0, 0); + let r: i16x4 = transmute(vsubhn_s32(transmute(a), transmute(b))); + assert_eq!(r, e); + } + + #[simd_test(enable = "neon")] + unsafe fn test_vsubhn_s64() { + let a: i64x2 = i64x2::new(0x7F_FF_FF_FF_FF_FF_FF_FF, -9223372036854775808); + let b: i64x2 = i64x2::new(1, 0); + let e: i32x2 = i32x2::new(0x7F_FF_FF_FF, -2147483648); + let r: i32x2 = transmute(vsubhn_s64(transmute(a), transmute(b))); + assert_eq!(r, e); + } + + #[simd_test(enable = "neon")] + unsafe fn test_vsubhn_u16() { + let a: u16x8 = u16x8::new(0xFF_FF, 0, 1, 1, 0xFF_FF, 0, 1, 1); + let b: u16x8 = u16x8::new(1, 0, 0, 0, 1, 0, 0, 0); + let e: u8x8 = u8x8::new(0xFF, 0, 0, 0, 0xFF, 0, 0, 0); + let r: u8x8 = transmute(vsubhn_u16(transmute(a), transmute(b))); + assert_eq!(r, e); + } + + #[simd_test(enable = "neon")] + unsafe fn test_vsubhn_u32() { + let a: u32x4 = u32x4::new(0xFF_FF_FF_FF, 0, 1, 1); + let b: u32x4 = u32x4::new(1, 0, 0, 0); + let e: u16x4 = u16x4::new(0xFF_FF, 0, 0, 0); + let r: u16x4 = transmute(vsubhn_u32(transmute(a), transmute(b))); + assert_eq!(r, e); + } + + #[simd_test(enable = "neon")] + unsafe fn test_vsubhn_u64() { + let a: u64x2 = u64x2::new(0xFF_FF_FF_FF_FF_FF_FF_FF, 0); + let b: u64x2 = u64x2::new(1, 0); + let e: u32x2 = u32x2::new(0xFF_FF_FF_FF, 0); + let r: u32x2 = transmute(vsubhn_u64(transmute(a), transmute(b))); + assert_eq!(r, e); + } + + #[simd_test(enable = "neon")] + unsafe fn test_vsubhn_high_s16() { + let a: i8x8 = i8x8::new(0x7F, 0, 0x7F, 0, 0x7F, 0, 0x7F, 0); + let b: i16x8 = i16x8::new(0x7F_FF, 1, 0x7F_FF, 1, 0x7F_FF, 1, 0x7F_FF, 1); + let c: i16x8 = i16x8::new(1, 0, 1, 0, 1, 0, 1, 0); + let e: i8x16 = i8x16::new(0x7F, 0, 0x7F, 0, 0x7F, 0, 0x7F, 0, 0x7F, 0, 0x7F, 0, 0x7F, 0, 0x7F, 0); + let r: i8x16 = transmute(vsubhn_high_s16(transmute(a), transmute(b), transmute(c))); + assert_eq!(r, e); + } + + #[simd_test(enable = "neon")] + unsafe fn test_vsubhn_high_s32() { + let a: i16x4 = i16x4::new(0x7F_FF, 0, 0x7F_FF, 0); + let b: i32x4 = i32x4::new(0x7F_FF_FF_FF, 1, 0x7F_FF_FF_FF, 1); + let c: i32x4 = i32x4::new(1, 0, 1, 0); + let e: i16x8 = i16x8::new(0x7F_FF, 0, 0x7F_FF, 0, 0x7F_FF, 0, 0x7F_FF, 0); + let r: i16x8 = transmute(vsubhn_high_s32(transmute(a), transmute(b), transmute(c))); + assert_eq!(r, e); + } + + #[simd_test(enable = "neon")] + unsafe fn test_vsubhn_high_s64() { + let a: i32x2 = i32x2::new(0x7F_FF_FF_FF, 0); + let b: i64x2 = i64x2::new(0x7F_FF_FF_FF_FF_FF_FF_FF, 1); + let c: i64x2 = i64x2::new(1, 0); + let e: i32x4 = i32x4::new(0x7F_FF_FF_FF, 0, 0x7F_FF_FF_FF, 0); + let r: i32x4 = transmute(vsubhn_high_s64(transmute(a), transmute(b), transmute(c))); + assert_eq!(r, e); + } + + #[simd_test(enable = "neon")] + unsafe fn test_vsubhn_high_u16() { + let a: u8x8 = u8x8::new(0xFF, 0, 0xFF, 0, 0xFF, 0, 0xFF, 0); + let b: u16x8 = u16x8::new(0xFF_FF, 1, 0xFF_FF, 1, 0xFF_FF, 1, 0xFF_FF, 1); + let c: u16x8 = u16x8::new(1, 0, 1, 0, 1, 0, 1, 0); + let e: u8x16 = u8x16::new(0xFF, 0, 0xFF, 0, 0xFF, 0, 0xFF, 0, 0xFF, 0, 0xFF, 0, 0xFF, 0, 0xFF, 0); + let r: u8x16 = transmute(vsubhn_high_u16(transmute(a), transmute(b), transmute(c))); + assert_eq!(r, e); + } + + #[simd_test(enable = "neon")] + unsafe fn test_vsubhn_high_u32() { + let a: u16x4 = u16x4::new(0xFF_FF, 0, 0xFF_FF, 0); + let b: u32x4 = u32x4::new(0xFF_FF_FF_FF, 1, 0xFF_FF_FF_FF, 1); + let c: u32x4 = u32x4::new(1, 0, 1, 0); + let e: u16x8 = u16x8::new(0xFF_FF, 0, 0xFF_FF, 0, 0xFF_FF, 0, 0xFF_FF, 0); + let r: u16x8 = transmute(vsubhn_high_u32(transmute(a), transmute(b), transmute(c))); + assert_eq!(r, e); + } + + #[simd_test(enable = "neon")] + unsafe fn test_vsubhn_high_u64() { + let a: u32x2 = u32x2::new(0xFF_FF_FF_FF, 0); + let b: u64x2 = u64x2::new(0xFF_FF_FF_FF_FF_FF_FF_FF, 1); + let c: u64x2 = u64x2::new(1, 0); + let e: u32x4 = u32x4::new(0xFF_FF_FF_FF, 0, 0xFF_FF_FF_FF, 0); + let r: u32x4 = transmute(vsubhn_high_u64(transmute(a), transmute(b), transmute(c))); + assert_eq!(r, e); + } + #[simd_test(enable = "neon")] unsafe fn test_vhsub_u8() { let a: u8x8 = u8x8::new(1, 2, 3, 4, 5, 6, 7, 8); diff --git a/library/stdarch/crates/core_arch/src/arm/neon/mod.rs b/library/stdarch/crates/core_arch/src/arm/neon/mod.rs index 04dc12fd490b..65d10d179701 100644 --- a/library/stdarch/crates/core_arch/src/arm/neon/mod.rs +++ b/library/stdarch/crates/core_arch/src/arm/neon/mod.rs @@ -5391,6 +5391,46 @@ pub unsafe fn vrev32q_u8(a: uint8x16_t) -> uint8x16_t { simd_shuffle16(a, a, [3, 2, 1, 0, 7, 6, 5, 4, 11, 10, 9, 8, 15, 14, 13, 12]) } +/// Reversing vector elements (swap endianness) +#[inline] +#[target_feature(enable = "neon")] +#[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))] +#[cfg_attr(all(test, target_arch = "arm"), assert_instr("vrev32.16"))] +#[cfg_attr(all(test, target_arch = "aarch64"), assert_instr(rev32))] +pub unsafe fn vrev32_s16(a: int16x4_t) -> int16x4_t { + simd_shuffle4(a, a, [1, 0, 3, 2]) +} + +/// Reversing vector elements (swap endianness) +#[inline] +#[target_feature(enable = "neon")] +#[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))] +#[cfg_attr(all(test, target_arch = "arm"), assert_instr("vrev32.16"))] +#[cfg_attr(all(test, target_arch = "aarch64"), assert_instr(rev32))] +pub unsafe fn vrev32q_s16(a: int16x8_t) -> int16x8_t { + simd_shuffle8(a, a, [1, 0, 3, 2, 5, 4, 7, 6]) +} + +/// Reversing vector elements (swap endianness) +#[inline] +#[target_feature(enable = "neon")] +#[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))] +#[cfg_attr(all(test, target_arch = "arm"), assert_instr("vrev32.16"))] +#[cfg_attr(all(test, target_arch = "aarch64"), assert_instr(rev32))] +pub unsafe fn vrev32_p16(a: poly16x4_t) -> poly16x4_t { + simd_shuffle4(a, a, [1, 0, 3, 2]) +} + +/// Reversing vector elements (swap endianness) +#[inline] +#[target_feature(enable = "neon")] +#[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))] +#[cfg_attr(all(test, target_arch = "arm"), assert_instr("vrev32.16"))] +#[cfg_attr(all(test, target_arch = "aarch64"), assert_instr(rev32))] +pub unsafe fn vrev32q_p16(a: poly16x8_t) -> poly16x8_t { + simd_shuffle8(a, a, [1, 0, 3, 2, 5, 4, 7, 6]) +} + /// Reversing vector elements (swap endianness) #[inline] #[target_feature(enable = "neon")] @@ -10792,6 +10832,34 @@ mod tests { assert_eq!(r, e); } #[simd_test(enable = "neon")] + unsafe fn test_vrev32_s16() { + let a = i16x4::new(0, 1, 2, 3); + let r = i16x4::new(1, 0, 3, 2); + let e: i16x4 = transmute(vrev32_s16(transmute(a))); + assert_eq!(r, e); + } + #[simd_test(enable = "neon")] + unsafe fn test_vrev32q_s16() { + let a = i16x8::new(0, 1, 2, 3, 4, 5, 6, 7); + let r = i16x8::new(1, 0, 3, 2, 5, 4, 7, 6); + let e: i16x8 = transmute(vrev32q_s16(transmute(a))); + assert_eq!(r, e); + } + #[simd_test(enable = "neon")] + unsafe fn test_vrev32_p16() { + let a = i16x4::new(0, 1, 2, 3); + let r = i16x4::new(1, 0, 3, 2); + let e: i16x4 = transmute(vrev32_p16(transmute(a))); + assert_eq!(r, e); + } + #[simd_test(enable = "neon")] + unsafe fn test_vrev32q_p16() { + let a = i16x8::new(0, 1, 2, 3, 4, 5, 6, 7); + let r = i16x8::new(1, 0, 3, 2, 5, 4, 7, 6); + let e: i16x8 = transmute(vrev32q_p16(transmute(a))); + assert_eq!(r, e); + } + #[simd_test(enable = "neon")] unsafe fn test_vrev32_u16() { let a = u16x4::new(0, 1, 2, 3); let r = u16x4::new(1, 0, 3, 2); diff --git a/library/stdarch/crates/stdarch-gen/neon.spec b/library/stdarch/crates/stdarch-gen/neon.spec index fd59cf5b077e..e9f82943f126 100644 --- a/library/stdarch/crates/stdarch-gen/neon.spec +++ b/library/stdarch/crates/stdarch-gen/neon.spec @@ -1050,6 +1050,19 @@ validate 14, 13, 12, 11, 10, 9, 8, 7 aarch64 = umlsl2 generate uint16x8_t:uint8x16_t:uint8x16_t:uint16x8_t, uint32x4_t:uint16x8_t:uint16x8_t:uint32x4_t, uint64x2_t:uint32x4_t:uint32x4_t:uint64x2_t +/// Extract narrow +name = vmovn_high +no-q +multi_fn = simd_cast, c:in_t0, b +multi_fn = simd_shuffle-out_len-noext, a, c, {asc-out_len} +a = 0, 1, 2, 3, 2, 3, 4, 5 +b = 2, 3, 4, 5, 12, 13, 14, 15 +validate 0, 1, 2, 3, 2, 3, 4, 5, 2, 3, 4, 5, 12, 13, 14, 15 + +aarch64 = xtn2 +generate int8x8_t:int16x8_t:int8x16_t, int16x4_t:int32x4_t:int16x8_t, int32x2_t:int64x2_t:int32x4_t +generate uint8x8_t:uint16x8_t:uint8x16_t, uint16x4_t:uint32x4_t:uint16x8_t, uint32x2_t:uint64x2_t:uint32x4_t + /// Negate name = vneg fn = simd_neg @@ -1111,20 +1124,38 @@ a = 42, 42, 42, 42, 42, 42, 42, 42, 42, 42, 42, 42, 42, 42, 42, 42 b = 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16 validate 21, 22, 22, 23, 23, 24, 24, 25, 25, 26, 26, 27, 27, 28, 28, 29 - arm = vhadd.s aarch64 = uhadd link-aarch64 = uhadd._EXT_ link-arm = vhaddu._EXT_ generate uint*_t - arm = vhadd.s aarch64 = shadd link-aarch64 = shadd._EXT_ link-arm = vhadds._EXT_ generate int*_t +/// Reverse bit order +name = vrbit +a = 0, 2, 4, 6, 8, 10, 12, 14, 16, 18, 20, 22, 24, 26, 28, 30 +validate 0, 64, 32, 96, 16, 80, 48, 112, 8, 72, 40, 104, 24, 88, 56, 120 + +aarch64 = rbit +link-aarch64 = rbit._EXT_ + +generate int8x8_t, int8x16_t + +/// Reverse bit order +name = vrbit +multi_fn = transmute, {vrbit-signed-noext, transmute(a)} +a = 0, 2, 4, 6, 8, 10, 12, 14, 16, 18, 20, 22, 24, 26, 28, 30 +validate 0, 64, 32, 96, 16, 80, 48, 112, 8, 72, 40, 104, 24, 88, 56, 120 + +aarch64 = rbit + +generate uint8x8_t, uint8x16_t, poly8x8_t, poly8x16_t + /// Rounding halving add name = vrhadd a = 42, 42, 42, 42, 42, 42, 42, 42, 42, 42, 42, 42, 42, 42, 42, 42 @@ -1143,6 +1174,69 @@ link-arm = vrhadds._EXT_ link-aarch64 = srhadd._EXT_ generate int*_t +/// Floating-point round to integral exact, using current rounding mode +name = vrndx +a = -1.5, 0.5, 1.5, 2.5 +validate -2.0, 0.0, 2.0, 2.0 + +aarch64 = frintx +link-aarch64 = llvm.rint._EXT_ +generate float*_t, float64x*_t + +/// Floating-point round to integral, to nearest with ties to away +name = vrnda +a = -1.5, 0.5, 1.5, 2.5 +validate -2.0, 1.0, 2.0, 3.0 + +aarch64 = frinta +link-aarch64 = llvm.round._EXT_ +generate float*_t, float64x*_t + +/// Floating-point round to integral, to nearest with ties to even +name = vrndn +a = -1.5, 0.5, 1.5, 2.5 +validate -2.0, 0.0, 2.0, 2.0 + +link-aarch64 = frintn._EXT_ +aarch64 = frintn +generate float*_t, float64x*_t + +/// Floating-point round to integral, toward minus infinity +name = vrndm +a = -1.5, 0.5, 1.5, 2.5 +validate -2.0, 0.0, 1.0, 2.0 + +aarch64 = frintm +link-aarch64 = llvm.floor._EXT_ +generate float*_t, float64x*_t + +/// Floating-point round to integral, toward plus infinity +name = vrndp +a = -1.5, 0.5, 1.5, 2.5 +validate -1.0, 1.0, 2.0, 3.0 + +aarch64 = frintp +link-aarch64 = llvm.ceil._EXT_ +generate float*_t, float64x*_t + +/// Floating-point round to integral, toward zero +name = vrnd +a = -1.5, 0.5, 1.5, 2.5 +validate -1.0, 0.0, 1.0, 2.0 + +aarch64 = frintz +link-aarch64 = llvm.trunc._EXT_ +generate float*_t, float64x*_t + +/// Floating-point round to integral, using current rounding mode +name = vrndi +a = -1.5, 0.5, 1.5, 2.5 +validate -2.0, 0.0, 2.0, 2.0 + +aarch64 = frinti +link-aarch64 = llvm.nearbyint._EXT_ +generate float*_t, float64x*_t + /// Saturating add name = vqadd a = 42, 42, 42, 42, 42, 42, 42, 42, 42, 42, 42, 42, 42, 42, 42, 42 @@ -1295,6 +1389,35 @@ generate float64x*_t arm = vsub. generate float*_t +/// Subtract returning high narrow +name = vsubhn +no-q +multi_fn = fixed, c:in_t +multi_fn = simd_cast, {simd_shr, {simd_sub}, transmute(c)} +a = MAX, MIN, 1, 1, MAX, MIN, 1, 1 +b = 1, 0, 0, 0, 1, 0, 0, 0 +fixed = HFBITS, HFBITS, HFBITS, HFBITS, HFBITS, HFBITS, HFBITS, HFBITS, HFBITS, HFBITS, HFBITS, HFBITS, HFBITS, HFBITS, HFBITS, HFBITS +validate MAX, MIN, 0, 0, MAX, MIN, 0, 0 + +arm = vsubhn +aarch64 = subhn +generate int16x8_t:int8x8_t, int32x4_t:int16x4_t, int64x2_t:int32x2_t +generate uint16x8_t:uint8x8_t, uint32x4_t:uint16x4_t, uint64x2_t:uint32x2_t + +/// Subtract returning high narrow +name = vsubhn_high +no-q +multi_fn = vsubhn-noqself-noext, d:in_t0, b, c +multi_fn = simd_shuffle-out_len-noext, a, d, {asc-out_len} +a = MAX, 0, MAX, 0, MAX, 0, MAX, 0 +b = MAX, 1, MAX, 1, MAX, 1, MAX, 1 +c = 1, 0, 1, 0, 1, 0, 1, 0 +validate MAX, 0, MAX, 0, MAX, 0, MAX, 0, MAX, 0, MAX, 0, MAX, 0, MAX, 0 + +arm = vsubhn +aarch64 = subhn2 +generate int8x8_t:int16x8_t:int16x8_t:int8x16_t, int16x4_t:int32x4_t:int32x4_t:int16x8_t, int32x2_t:int64x2_t:int64x2_t:int32x4_t +generate uint8x8_t:uint16x8_t:uint16x8_t:uint8x16_t, uint16x4_t:uint32x4_t:uint32x4_t:uint16x8_t, uint32x2_t:uint64x2_t:uint64x2_t:uint32x4_t /// Signed halving subtract name = vhsub diff --git a/library/stdarch/crates/stdarch-gen/src/main.rs b/library/stdarch/crates/stdarch-gen/src/main.rs index f2ca194a4b3f..3251b9f6bf52 100644 --- a/library/stdarch/crates/stdarch-gen/src/main.rs +++ b/library/stdarch/crates/stdarch-gen/src/main.rs @@ -119,14 +119,14 @@ fn type_to_suffix(t: &str) -> &str { fn type_to_signed_suffix(t: &str) -> &str { match t { - "int8x8_t" | "uint8x8_t" => "_s8", - "int8x16_t" | "uint8x16_t" => "q_s8", - "int16x4_t" | "uint16x4_t" => "_s16", - "int16x8_t" | "uint16x8_t" => "q_s16", + "int8x8_t" | "uint8x8_t" | "poly8x8_t" => "_s8", + "int8x16_t" | "uint8x16_t" | "poly8x16_t" => "q_s8", + "int16x4_t" | "uint16x4_t" | "poly16x4_t" => "_s16", + "int16x8_t" | "uint16x8_t" | "poly16x8_t" => "q_s16", "int32x2_t" | "uint32x2_t" => "_s32", "int32x4_t" | "uint32x4_t" => "q_s32", - "int64x1_t" | "uint64x1_t" => "_s64", - "int64x2_t" | "uint64x2_t" => "q_s64", + "int64x1_t" | "uint64x1_t" | "poly64x1_t" => "_s64", + "int64x2_t" | "uint64x2_t" | "poly64x2_t" => "q_s64", /* "float16x4_t" => "_f16", "float16x8_t" => "q_f16", @@ -328,6 +328,16 @@ fn type_to_half(t: &str) -> &str { } } +fn asc(x: usize) -> &'static str { + match x { + 2 => "[0, 1]", + 4 => "[0, 1, 2, 3]", + 8 => "[0, 1, 2, 3, 4, 5, 6, 7]", + 16 => "[0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15]", + _ => panic!("unknown transpose order of len {}", x), + } +} + fn transpose1(x: usize) -> &'static str { match x { 2 => "[0, 2]", @@ -481,6 +491,23 @@ fn bits_minus_one(t: &str) -> &'static str { } } +fn half_bits(t: &str) -> &'static str { + match &t[..3] { + "u8x" => "4", + "u16" => "8", + "u32" => "16", + "u64" => "32", + "i8x" => "4", + "i16" => "8", + "i32" => "16", + "i64" => "32", + "p8x" => "4", + "p16" => "8", + "p64" => "32", + _ => panic!("Unknown bits for type {}", t), + } +} + fn map_val<'v>(t: &str, v: &'v str) -> &'v str { match v { "FALSE" => false_val(t), @@ -490,6 +517,7 @@ fn map_val<'v>(t: &str, v: &'v str) -> &'v str { "FF" => ff_val(t), "BITS" => bits(t), "BITS_M1" => bits_minus_one(t), + "HFBITS" => half_bits(t), o => o, } } @@ -554,14 +582,21 @@ fn gen_aarch64( let ext_c = if let Some(link_aarch64) = link_aarch64.clone() { let ext = type_to_ext(in_t[0]); let ext2 = type_to_ext(out_t); + let link_aarch64 = if link_aarch64.starts_with("llvm") { + link_aarch64.replace("_EXT_", ext).replace("_EXT2_", ext2) + } else { + let mut link = String::from("llvm.aarch64.neon."); + link.push_str(&link_aarch64); + link.replace("_EXT_", ext).replace("_EXT2_", ext2) + }; format!( r#"#[allow(improper_ctypes)] extern "C" {{ - #[cfg_attr(target_arch = "aarch64", link_name = "llvm.aarch64.neon.{}")] + #[cfg_attr(target_arch = "aarch64", link_name = "{}")] fn {}({}) -> {}; }} "#, - link_aarch64.replace("_EXT_", ext).replace("_EXT2_", ext2), + link_aarch64, current_fn, match para_num { 1 => { @@ -817,16 +852,30 @@ fn gen_arm( if let (Some(link_arm), Some(link_aarch64)) = (link_arm.clone(), link_aarch64.clone()) { let ext = type_to_ext(in_t[0]); let ext2 = type_to_ext(out_t); + let link_arm = if link_arm.starts_with("llvm") { + link_arm.replace("_EXT_", ext).replace("_EXT2_", ext2) + } else { + let mut link = String::from("llvm.arm.neon."); + link.push_str(&link_arm); + link.replace("_EXT_", ext).replace("_EXT2_", ext2) + }; + let link_aarch64 = if link_aarch64.starts_with("llvm") { + link_aarch64.replace("_EXT_", ext).replace("_EXT2_", ext2) + } else { + let mut link = String::from("llvm.aarch64.neon."); + link.push_str(&link_aarch64); + link.replace("_EXT_", ext).replace("_EXT2_", ext2) + }; format!( r#"#[allow(improper_ctypes)] extern "C" {{ - #[cfg_attr(target_arch = "arm", link_name = "llvm.arm.neon.{}")] - #[cfg_attr(target_arch = "aarch64", link_name = "llvm.aarch64.neon.{}")] + #[cfg_attr(target_arch = "arm", link_name = "{}")] + #[cfg_attr(target_arch = "aarch64", link_name = "{}")] fn {}({}) -> {}; }} "#, - link_arm.replace("_EXT_", ext).replace("_EXT2_", ext2), - link_aarch64.replace("_EXT_", ext).replace("_EXT2_", ext2), + link_arm, + link_aarch64, current_fn, match para_num { 1 => { @@ -1066,6 +1115,10 @@ fn get_call( re = Some((re_params[0].clone(), in_t[1].to_string())); } else if re_params[1] == "in_t" { re = Some((re_params[0].clone(), in_t[1].to_string())); + } else if re_params[1] == "in_t0" { + re = Some((re_params[0].clone(), in_t[0].to_string())); + } else if re_params[1] == "in_t1" { + re = Some((re_params[0].clone(), in_t[1].to_string())); } else if re_params[1] == "out_t" { re = Some((re_params[0].clone(), out_t.to_string())); } else if re_params[1] == "half" { @@ -1097,6 +1150,9 @@ fn get_call( }); return format!(r#"[{}]"#, &half[..half.len() - 2]); } + if fn_name == "asc-out_len" { + return asc(type_len(out_t)).to_string(); + } if fn_name == "transpose-1-in_len" { return transpose1(type_len(in_t[1])).to_string(); }