From a43f92a18169610bd0aea48858681c2e0c6fa06c Mon Sep 17 00:00:00 2001 From: Christopher Serr Date: Thu, 22 Apr 2021 07:08:40 +0200 Subject: [PATCH] Add vrndn neon instructions (#1086) This adds the neon instructions for lane-wise rounding without actually converting the lanes to integers. --- .../core_arch/src/aarch64/neon/generated.rs | 42 ---------------- .../src/arm_shared/neon/generated.rs | 48 +++++++++++++++++++ library/stdarch/crates/stdarch-gen/neon.spec | 9 +++- 3 files changed, 55 insertions(+), 44 deletions(-) diff --git a/library/stdarch/crates/core_arch/src/aarch64/neon/generated.rs b/library/stdarch/crates/core_arch/src/aarch64/neon/generated.rs index fc60ea2c69f2..711f304343b5 100644 --- a/library/stdarch/crates/core_arch/src/aarch64/neon/generated.rs +++ b/library/stdarch/crates/core_arch/src/aarch64/neon/generated.rs @@ -2518,32 +2518,6 @@ pub unsafe fn vrndaq_f64(a: float64x2_t) -> float64x2_t { vrndaq_f64_(a) } -/// Floating-point round to integral, to nearest with ties to even -#[inline] -#[target_feature(enable = "neon")] -#[cfg_attr(test, assert_instr(frintn))] -pub unsafe fn vrndn_f32(a: float32x2_t) -> float32x2_t { - #[allow(improper_ctypes)] - extern "C" { - #[cfg_attr(target_arch = "aarch64", link_name = "llvm.aarch64.neon.frintn.v2f32")] - fn vrndn_f32_(a: float32x2_t) -> float32x2_t; - } - vrndn_f32_(a) -} - -/// Floating-point round to integral, to nearest with ties to even -#[inline] -#[target_feature(enable = "neon")] -#[cfg_attr(test, assert_instr(frintn))] -pub unsafe fn vrndnq_f32(a: float32x4_t) -> float32x4_t { - #[allow(improper_ctypes)] - extern "C" { - #[cfg_attr(target_arch = "aarch64", link_name = "llvm.aarch64.neon.frintn.v4f32")] - fn vrndnq_f32_(a: float32x4_t) -> float32x4_t; - } - vrndnq_f32_(a) -} - /// Floating-point round to integral, to nearest with ties to even #[inline] #[target_feature(enable = "neon")] @@ -8884,22 +8858,6 @@ mod test { assert_eq!(r, e); } - #[simd_test(enable = "neon")] - unsafe fn test_vrndn_f32() { - let a: f32x2 = f32x2::new(-1.5, 0.5); - let e: f32x2 = f32x2::new(-2.0, 0.0); - let r: f32x2 = transmute(vrndn_f32(transmute(a))); - assert_eq!(r, e); - } - - #[simd_test(enable = "neon")] - unsafe fn test_vrndnq_f32() { - let a: f32x4 = f32x4::new(-1.5, 0.5, 1.5, 2.5); - let e: f32x4 = f32x4::new(-2.0, 0.0, 2.0, 2.0); - let r: f32x4 = transmute(vrndnq_f32(transmute(a))); - assert_eq!(r, e); - } - #[simd_test(enable = "neon")] unsafe fn test_vrndn_f64() { let a: f64 = -1.5; diff --git a/library/stdarch/crates/core_arch/src/arm_shared/neon/generated.rs b/library/stdarch/crates/core_arch/src/arm_shared/neon/generated.rs index a84f5112106c..0e40deaac7ef 100644 --- a/library/stdarch/crates/core_arch/src/arm_shared/neon/generated.rs +++ b/library/stdarch/crates/core_arch/src/arm_shared/neon/generated.rs @@ -4198,6 +4198,38 @@ pub unsafe fn vrhaddq_s32(a: int32x4_t, b: int32x4_t) -> int32x4_t { vrhaddq_s32_(a, b) } +/// Floating-point round to integral, to nearest with ties to even +#[inline] +#[target_feature(enable = "neon")] +#[cfg_attr(target_arch = "arm", target_feature(enable = "fp-armv8,v8"))] +#[cfg_attr(all(test, target_arch = "arm"), assert_instr(vrintn))] +#[cfg_attr(all(test, target_arch = "aarch64"), assert_instr(frintn))] +pub unsafe fn vrndn_f32(a: float32x2_t) -> float32x2_t { + #[allow(improper_ctypes)] + extern "C" { + #[cfg_attr(target_arch = "arm", link_name = "llvm.arm.neon.vrintn.v2f32")] + #[cfg_attr(target_arch = "aarch64", link_name = "llvm.aarch64.neon.frintn.v2f32")] + fn vrndn_f32_(a: float32x2_t) -> float32x2_t; + } +vrndn_f32_(a) +} + +/// Floating-point round to integral, to nearest with ties to even +#[inline] +#[target_feature(enable = "neon")] +#[cfg_attr(target_arch = "arm", target_feature(enable = "fp-armv8,v8"))] +#[cfg_attr(all(test, target_arch = "arm"), assert_instr(vrintn))] +#[cfg_attr(all(test, target_arch = "aarch64"), assert_instr(frintn))] +pub unsafe fn vrndnq_f32(a: float32x4_t) -> float32x4_t { + #[allow(improper_ctypes)] + extern "C" { + #[cfg_attr(target_arch = "arm", link_name = "llvm.arm.neon.vrintn.v4f32")] + #[cfg_attr(target_arch = "aarch64", link_name = "llvm.aarch64.neon.frintn.v4f32")] + fn vrndnq_f32_(a: float32x4_t) -> float32x4_t; + } +vrndnq_f32_(a) +} + /// Saturating add #[inline] #[target_feature(enable = "neon")] @@ -14921,6 +14953,22 @@ mod test { assert_eq!(r, e); } + #[simd_test(enable = "neon")] + unsafe fn test_vrndn_f32() { + let a: f32x2 = f32x2::new(-1.5, 0.5); + let e: f32x2 = f32x2::new(-2.0, 0.0); + let r: f32x2 = transmute(vrndn_f32(transmute(a))); + assert_eq!(r, e); + } + + #[simd_test(enable = "neon")] + unsafe fn test_vrndnq_f32() { + let a: f32x4 = f32x4::new(-1.5, 0.5, 1.5, 2.5); + let e: f32x4 = f32x4::new(-2.0, 0.0, 2.0, 2.0); + let r: f32x4 = transmute(vrndnq_f32(transmute(a))); + assert_eq!(r, e); + } + #[simd_test(enable = "neon")] unsafe fn test_vqadd_u8() { let a: u8x8 = u8x8::new(42, 42, 42, 42, 42, 42, 42, 42); diff --git a/library/stdarch/crates/stdarch-gen/neon.spec b/library/stdarch/crates/stdarch-gen/neon.spec index fcc61fb770a4..5e5eb9388ff9 100644 --- a/library/stdarch/crates/stdarch-gen/neon.spec +++ b/library/stdarch/crates/stdarch-gen/neon.spec @@ -1401,7 +1401,12 @@ validate -2.0, 0.0, 2.0, 2.0 link-aarch64 = frintn._EXT_ aarch64 = frintn -generate float*_t, float64x*_t +generate float64x*_t + +target = fp-armv8 +arm = vrintn +link-arm = vrintn._EXT_ +generate float*_t /// Floating-point round to integral, toward minus infinity name = vrndm @@ -3901,4 +3906,4 @@ validate MAX, 7 aarch64 = sqabs link-aarch64 = sqabs._EXT_ -generate int64x*_t \ No newline at end of file +generate int64x*_t