Add vrecpe neon instruction (#1079)

This adds the vector instructions for calculating the lane-wise reciprocal estimate.
2021-03-13 18:43:35 +01:00 · 2021-03-13 18:43:35 +01:00 · b132a5c769
commit b132a5c769
parent 282cfa4db7
3 changed files with 103 additions and 0 deletions
--- a/library/stdarch/crates/core_arch/src/aarch64/neon/generated.rs
+++ b/library/stdarch/crates/core_arch/src/aarch64/neon/generated.rs
@ -1189,6 +1189,32 @@ pub unsafe fn vrsqrteq_f64(a: float64x2_t) -> float64x2_t {
    vrsqrteq_f64_(a)
 }

+/// Reciprocal estimate.
+#[inline]
+#[target_feature(enable = "neon")]
+#[cfg_attr(test, assert_instr(frecpe))]
+pub unsafe fn vrecpe_f64(a: float64x1_t) -> float64x1_t {
+    #[allow(improper_ctypes)]
+    extern "C" {
+        #[cfg_attr(target_arch = "aarch64", link_name = "llvm.aarch64.neon.frecpe.v1f64")]
+        fn vrecpe_f64_(a: float64x1_t) -> float64x1_t;
+    }
+    vrecpe_f64_(a)
+}
+
+/// Reciprocal estimate.
+#[inline]
+#[target_feature(enable = "neon")]
+#[cfg_attr(test, assert_instr(frecpe))]
+pub unsafe fn vrecpeq_f64(a: float64x2_t) -> float64x2_t {
+    #[allow(improper_ctypes)]
+    extern "C" {
+        #[cfg_attr(target_arch = "aarch64", link_name = "llvm.aarch64.neon.frecpe.v2f64")]
+        fn vrecpeq_f64_(a: float64x2_t) -> float64x2_t;
+    }
+    vrecpeq_f64_(a)
+}
+
 #[cfg(test)]
 mod test {
    use super::*;
@ -2339,4 +2365,20 @@ mod test {
        let r: f64x2 = transmute(vrsqrteq_f64(transmute(a)));
        assert_eq!(r, e);
    }
+
+    #[simd_test(enable = "neon")]
+    unsafe fn test_vrecpe_f64() {
+        let a: f64 = 4.0;
+        let e: f64 = 0.24951171875;
+        let r: f64 = transmute(vrecpe_f64(transmute(a)));
+        assert_eq!(r, e);
+    }
+
+    #[simd_test(enable = "neon")]
+    unsafe fn test_vrecpeq_f64() {
+        let a: f64x2 = f64x2::new(4.0, 3.0);
+        let e: f64x2 = f64x2::new(0.24951171875, 0.3330078125);
+        let r: f64x2 = transmute(vrecpeq_f64(transmute(a)));
+        assert_eq!(r, e);
+    }
 }
--- a/library/stdarch/crates/core_arch/src/arm/neon/generated.rs
+++ b/library/stdarch/crates/core_arch/src/arm/neon/generated.rs
@ -3381,6 +3381,38 @@ pub unsafe fn vrsqrteq_f32(a: float32x4_t) -> float32x4_t {
 vrsqrteq_f32_(a)
 }

+/// Reciprocal estimate.
+#[inline]
+#[target_feature(enable = "neon")]
+#[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))]
+#[cfg_attr(all(test, target_arch = "arm"), assert_instr(vrecpe))]
+#[cfg_attr(all(test, target_arch = "aarch64"), assert_instr(frecpe))]
+pub unsafe fn vrecpe_f32(a: float32x2_t) -> float32x2_t {
+    #[allow(improper_ctypes)]
+    extern "C" {
+        #[cfg_attr(target_arch = "arm", link_name = "llvm.arm.neon.vrecpe.v2f32")]
+        #[cfg_attr(target_arch = "aarch64", link_name = "llvm.aarch64.neon.frecpe.v2f32")]
+        fn vrecpe_f32_(a: float32x2_t) -> float32x2_t;
+    }
+vrecpe_f32_(a)
+}
+
+/// Reciprocal estimate.
+#[inline]
+#[target_feature(enable = "neon")]
+#[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))]
+#[cfg_attr(all(test, target_arch = "arm"), assert_instr(vrecpe))]
+#[cfg_attr(all(test, target_arch = "aarch64"), assert_instr(frecpe))]
+pub unsafe fn vrecpeq_f32(a: float32x4_t) -> float32x4_t {
+    #[allow(improper_ctypes)]
+    extern "C" {
+        #[cfg_attr(target_arch = "arm", link_name = "llvm.arm.neon.vrecpe.v4f32")]
+        #[cfg_attr(target_arch = "aarch64", link_name = "llvm.aarch64.neon.frecpe.v4f32")]
+        fn vrecpeq_f32_(a: float32x4_t) -> float32x4_t;
+    }
+vrecpeq_f32_(a)
+}
+
 #[cfg(test)]
 #[allow(overflowing_literals)]
 mod test {
@ -6012,4 +6044,20 @@ mod test {
        let r: f32x4 = transmute(vrsqrteq_f32(transmute(a)));
        assert_eq!(r, e);
    }
+
+    #[simd_test(enable = "neon")]
+    unsafe fn test_vrecpe_f32() {
+        let a: f32x2 = f32x2::new(4.0, 3.0);
+        let e: f32x2 = f32x2::new(0.24951171875, 0.3330078125);
+        let r: f32x2 = transmute(vrecpe_f32(transmute(a)));
+        assert_eq!(r, e);
+    }
+
+    #[simd_test(enable = "neon")]
+    unsafe fn test_vrecpeq_f32() {
+        let a: f32x4 = f32x4::new(4.0, 3.0, 2.0, 1.0);
+        let e: f32x4 = f32x4::new(0.24951171875, 0.3330078125, 0.4990234375, 0.998046875);
+        let r: f32x4 = transmute(vrecpeq_f32(transmute(a)));
+        assert_eq!(r, e);
+    }
 }
--- a/library/stdarch/crates/stdarch-gen/neon.spec
+++ b/library/stdarch/crates/stdarch-gen/neon.spec
@ -742,3 +742,16 @@ generate float64x*_t
 arm = vrsqrte
 link-arm = vrsqrte._EXT_
 generate float*_t
+
+/// Reciprocal estimate.
+name = vrecpe
+a = 4.0, 3.0, 2.0, 1.0
+validate 0.24951171875, 0.3330078125, 0.4990234375, 0.998046875
+
+aarch64 = frecpe
+link-aarch64 = frecpe._EXT_
+generate float64x*_t
+
+arm = vrecpe
+link-arm = vrecpe._EXT_
+generate float*_t