Add vrndn neon instructions (#1086)

This adds the neon instructions for lane-wise rounding without actually converting the lanes to integers.
2021-04-22 07:08:40 +02:00 · 2021-04-22 07:08:40 +02:00 · a43f92a181
commit a43f92a181
parent de3e8f72c5
3 changed files with 55 additions and 44 deletions
--- a/library/stdarch/crates/core_arch/src/aarch64/neon/generated.rs
+++ b/library/stdarch/crates/core_arch/src/aarch64/neon/generated.rs
@ -2518,32 +2518,6 @@ pub unsafe fn vrndaq_f64(a: float64x2_t) -> float64x2_t {
    vrndaq_f64_(a)
 }

-/// Floating-point round to integral, to nearest with ties to even
-#[inline]
-#[target_feature(enable = "neon")]
-#[cfg_attr(test, assert_instr(frintn))]
-pub unsafe fn vrndn_f32(a: float32x2_t) -> float32x2_t {
-    #[allow(improper_ctypes)]
-    extern "C" {
-        #[cfg_attr(target_arch = "aarch64", link_name = "llvm.aarch64.neon.frintn.v2f32")]
-        fn vrndn_f32_(a: float32x2_t) -> float32x2_t;
-    }
-    vrndn_f32_(a)
-}
-
-/// Floating-point round to integral, to nearest with ties to even
-#[inline]
-#[target_feature(enable = "neon")]
-#[cfg_attr(test, assert_instr(frintn))]
-pub unsafe fn vrndnq_f32(a: float32x4_t) -> float32x4_t {
-    #[allow(improper_ctypes)]
-    extern "C" {
-        #[cfg_attr(target_arch = "aarch64", link_name = "llvm.aarch64.neon.frintn.v4f32")]
-        fn vrndnq_f32_(a: float32x4_t) -> float32x4_t;
-    }
-    vrndnq_f32_(a)
-}
-
 /// Floating-point round to integral, to nearest with ties to even
 #[inline]
 #[target_feature(enable = "neon")]
@ -8884,22 +8858,6 @@ mod test {
        assert_eq!(r, e);
    }

-    #[simd_test(enable = "neon")]
-    unsafe fn test_vrndn_f32() {
-        let a: f32x2 = f32x2::new(-1.5, 0.5);
-        let e: f32x2 = f32x2::new(-2.0, 0.0);
-        let r: f32x2 = transmute(vrndn_f32(transmute(a)));
-        assert_eq!(r, e);
-    }
-
-    #[simd_test(enable = "neon")]
-    unsafe fn test_vrndnq_f32() {
-        let a: f32x4 = f32x4::new(-1.5, 0.5, 1.5, 2.5);
-        let e: f32x4 = f32x4::new(-2.0, 0.0, 2.0, 2.0);
-        let r: f32x4 = transmute(vrndnq_f32(transmute(a)));
-        assert_eq!(r, e);
-    }
-
    #[simd_test(enable = "neon")]
    unsafe fn test_vrndn_f64() {
        let a: f64 = -1.5;
--- a/library/stdarch/crates/core_arch/src/arm_shared/neon/generated.rs
+++ b/library/stdarch/crates/core_arch/src/arm_shared/neon/generated.rs
@ -4198,6 +4198,38 @@ pub unsafe fn vrhaddq_s32(a: int32x4_t, b: int32x4_t) -> int32x4_t {
 vrhaddq_s32_(a, b)
 }

+/// Floating-point round to integral, to nearest with ties to even
+#[inline]
+#[target_feature(enable = "neon")]
+#[cfg_attr(target_arch = "arm", target_feature(enable = "fp-armv8,v8"))]
+#[cfg_attr(all(test, target_arch = "arm"), assert_instr(vrintn))]
+#[cfg_attr(all(test, target_arch = "aarch64"), assert_instr(frintn))]
+pub unsafe fn vrndn_f32(a: float32x2_t) -> float32x2_t {
+    #[allow(improper_ctypes)]
+    extern "C" {
+        #[cfg_attr(target_arch = "arm", link_name = "llvm.arm.neon.vrintn.v2f32")]
+        #[cfg_attr(target_arch = "aarch64", link_name = "llvm.aarch64.neon.frintn.v2f32")]
+        fn vrndn_f32_(a: float32x2_t) -> float32x2_t;
+    }
+vrndn_f32_(a)
+}
+
+/// Floating-point round to integral, to nearest with ties to even
+#[inline]
+#[target_feature(enable = "neon")]
+#[cfg_attr(target_arch = "arm", target_feature(enable = "fp-armv8,v8"))]
+#[cfg_attr(all(test, target_arch = "arm"), assert_instr(vrintn))]
+#[cfg_attr(all(test, target_arch = "aarch64"), assert_instr(frintn))]
+pub unsafe fn vrndnq_f32(a: float32x4_t) -> float32x4_t {
+    #[allow(improper_ctypes)]
+    extern "C" {
+        #[cfg_attr(target_arch = "arm", link_name = "llvm.arm.neon.vrintn.v4f32")]
+        #[cfg_attr(target_arch = "aarch64", link_name = "llvm.aarch64.neon.frintn.v4f32")]
+        fn vrndnq_f32_(a: float32x4_t) -> float32x4_t;
+    }
+vrndnq_f32_(a)
+}
+
 /// Saturating add
 #[inline]
 #[target_feature(enable = "neon")]
@ -14921,6 +14953,22 @@ mod test {
        assert_eq!(r, e);
    }

+    #[simd_test(enable = "neon")]
+    unsafe fn test_vrndn_f32() {
+        let a: f32x2 = f32x2::new(-1.5, 0.5);
+        let e: f32x2 = f32x2::new(-2.0, 0.0);
+        let r: f32x2 = transmute(vrndn_f32(transmute(a)));
+        assert_eq!(r, e);
+    }
+
+    #[simd_test(enable = "neon")]
+    unsafe fn test_vrndnq_f32() {
+        let a: f32x4 = f32x4::new(-1.5, 0.5, 1.5, 2.5);
+        let e: f32x4 = f32x4::new(-2.0, 0.0, 2.0, 2.0);
+        let r: f32x4 = transmute(vrndnq_f32(transmute(a)));
+        assert_eq!(r, e);
+    }
+
    #[simd_test(enable = "neon")]
    unsafe fn test_vqadd_u8() {
        let a: u8x8 = u8x8::new(42, 42, 42, 42, 42, 42, 42, 42);
--- a/library/stdarch/crates/stdarch-gen/neon.spec
+++ b/library/stdarch/crates/stdarch-gen/neon.spec
@ -1401,7 +1401,12 @@ validate -2.0, 0.0, 2.0, 2.0

 link-aarch64 = frintn._EXT_
 aarch64 = frintn
-generate float*_t, float64x*_t
+generate float64x*_t
+
+target = fp-armv8
+arm = vrintn
+link-arm = vrintn._EXT_
+generate float*_t

 /// Floating-point round to integral, toward minus infinity
 name = vrndm
@ -3901,4 +3906,4 @@ validate MAX, 7

 aarch64 = sqabs
 link-aarch64 = sqabs._EXT_
-generate int64x*_t
+generate int64x*_t