core_arch: Add LoongArch frecipe intrinsics

2024-10-14 19:56:24 +08:00 · 2024-10-14 19:56:24 +08:00 · 49e52f3657
commit 49e52f3657
parent 2d56d53f81
4 changed files with 184 additions and 12 deletions
--- a/library/stdarch/crates/core_arch/src/loongarch64/lasx/generated.rs
+++ b/library/stdarch/crates/core_arch/src/loongarch64/lasx/generated.rs
@ -668,6 +668,14 @@ extern "unadjusted" {
    fn __lasx_xvfrecip_s(a: v8f32) -> v8f32;
    #[link_name = "llvm.loongarch.lasx.xvfrecip.d"]
    fn __lasx_xvfrecip_d(a: v4f64) -> v4f64;
+    #[link_name = "llvm.loongarch.lasx.xvfrecipe.s"]
+    fn __lasx_xvfrecipe_s(a: v8f32) -> v8f32;
+    #[link_name = "llvm.loongarch.lasx.xvfrecipe.d"]
+    fn __lasx_xvfrecipe_d(a: v4f64) -> v4f64;
+    #[link_name = "llvm.loongarch.lasx.xvfrsqrte.s"]
+    fn __lasx_xvfrsqrte_s(a: v8f32) -> v8f32;
+    #[link_name = "llvm.loongarch.lasx.xvfrsqrte.d"]
+    fn __lasx_xvfrsqrte_d(a: v4f64) -> v4f64;
    #[link_name = "llvm.loongarch.lasx.xvfrint.s"]
    fn __lasx_xvfrint_s(a: v8f32) -> v8f32;
    #[link_name = "llvm.loongarch.lasx.xvfrint.d"]
@ -941,15 +949,15 @@ extern "unadjusted" {
    #[link_name = "llvm.loongarch.lasx.xvld"]
    fn __lasx_xvld(a: *const i8, b: i32) -> v32i8;
    #[link_name = "llvm.loongarch.lasx.xvst"]
-    fn __lasx_xvst(a: v32i8, b: *mut i8, c: i32) ;
+    fn __lasx_xvst(a: v32i8, b: *mut i8, c: i32);
    #[link_name = "llvm.loongarch.lasx.xvstelm.b"]
-    fn __lasx_xvstelm_b(a: v32i8, b: *mut i8, c: i32, d: u32) ;
+    fn __lasx_xvstelm_b(a: v32i8, b: *mut i8, c: i32, d: u32);
    #[link_name = "llvm.loongarch.lasx.xvstelm.h"]
-    fn __lasx_xvstelm_h(a: v16i16, b: *mut i8, c: i32, d: u32) ;
+    fn __lasx_xvstelm_h(a: v16i16, b: *mut i8, c: i32, d: u32);
    #[link_name = "llvm.loongarch.lasx.xvstelm.w"]
-    fn __lasx_xvstelm_w(a: v8i32, b: *mut i8, c: i32, d: u32) ;
+    fn __lasx_xvstelm_w(a: v8i32, b: *mut i8, c: i32, d: u32);
    #[link_name = "llvm.loongarch.lasx.xvstelm.d"]
-    fn __lasx_xvstelm_d(a: v4i64, b: *mut i8, c: i32, d: u32) ;
+    fn __lasx_xvstelm_d(a: v4i64, b: *mut i8, c: i32, d: u32);
    #[link_name = "llvm.loongarch.lasx.xvinsve0.w"]
    fn __lasx_xvinsve0_w(a: v8i32, b: v8i32, c: u32) -> v8i32;
    #[link_name = "llvm.loongarch.lasx.xvinsve0.d"]
@ -977,7 +985,7 @@ extern "unadjusted" {
    #[link_name = "llvm.loongarch.lasx.xvldx"]
    fn __lasx_xvldx(a: *const i8, b: i64) -> v32i8;
    #[link_name = "llvm.loongarch.lasx.xvstx"]
-    fn __lasx_xvstx(a: v32i8, b: *mut i8, c: i64) ;
+    fn __lasx_xvstx(a: v32i8, b: *mut i8, c: i64);
    #[link_name = "llvm.loongarch.lasx.xvextl.qu.du"]
    fn __lasx_xvextl_qu_du(a: v4u64) -> v4u64;
    #[link_name = "llvm.loongarch.lasx.xvinsgr2vr.w"]
@ -3979,6 +3987,34 @@ pub unsafe fn lasx_xvfrecip_d(a: v4f64) -> v4f64 {
    __lasx_xvfrecip_d(a)
 }

+#[inline]
+#[target_feature(enable = "lasx,frecipe")]
+#[unstable(feature = "stdarch_loongarch", issue = "117427")]
+pub unsafe fn lasx_xvfrecipe_s(a: v8f32) -> v8f32 {
+    __lasx_xvfrecipe_s(a)
+}
+
+#[inline]
+#[target_feature(enable = "lasx,frecipe")]
+#[unstable(feature = "stdarch_loongarch", issue = "117427")]
+pub unsafe fn lasx_xvfrecipe_d(a: v4f64) -> v4f64 {
+    __lasx_xvfrecipe_d(a)
+}
+
+#[inline]
+#[target_feature(enable = "lasx,frecipe")]
+#[unstable(feature = "stdarch_loongarch", issue = "117427")]
+pub unsafe fn lasx_xvfrsqrte_s(a: v8f32) -> v8f32 {
+    __lasx_xvfrsqrte_s(a)
+}
+
+#[inline]
+#[target_feature(enable = "lasx,frecipe")]
+#[unstable(feature = "stdarch_loongarch", issue = "117427")]
+pub unsafe fn lasx_xvfrsqrte_d(a: v4f64) -> v4f64 {
+    __lasx_xvfrsqrte_d(a)
+}
+
 #[inline]
 #[target_feature(enable = "lasx")]
 #[unstable(feature = "stdarch_loongarch", issue = "117427")]
--- a/library/stdarch/crates/core_arch/src/loongarch64/lasx/tests.rs
+++ b/library/stdarch/crates/core_arch/src/loongarch64/lasx/tests.rs
@ -6558,6 +6558,74 @@ unsafe fn test_lasx_xvfrecip_d() {
    assert_eq!(r, transmute(lasx_xvfrecip_d(transmute(a))));
 }

+#[simd_test(enable = "lasx,frecipe")]
+unsafe fn test_lasx_xvfrecipe_s() {
+    let a = u32x8::new(
+        1061538089, 1009467584, 1043164316, 1030910448, 1059062619, 1048927856, 1064915194,
+        1028524176,
+    );
+    let r = i64x4::new(
+        4809660548434472067,
+        4721787188318892829,
+        4644815739361740708,
+        4728509413412007938,
+    );
+
+    assert_eq!(r, transmute(lasx_xvfrecipe_s(transmute(a))));
+}
+
+#[simd_test(enable = "lasx,frecipe")]
+unsafe fn test_lasx_xvfrecipe_d() {
+    let a = u64x4::new(
+        4599514006383746620,
+        4607114589130093485,
+        4603063439897885463,
+        4602774413388259784,
+    );
+    let r = i64x4::new(
+        4614125529786744832,
+        4607216711966392320,
+        4610977572161847296,
+        4611499011256352768,
+    );
+
+    assert_eq!(r, transmute(lasx_xvfrecipe_d(transmute(a))));
+}
+
+#[simd_test(enable = "lasx,frecipe")]
+unsafe fn test_lasx_xvfrsqrte_s() {
+    let a = u32x8::new(
+        1042369896, 1033402040, 1063640659, 1061099374, 1064617699, 1050687308, 1049602990,
+        1047907124,
+    );
+    let r = i64x4::new(
+        4641680627989561881,
+        4581330281566770462,
+        4604034110053345047,
+        4612427253546066334,
+    );
+
+    assert_eq!(r, transmute(lasx_xvfrsqrte_s(transmute(a))));
+}
+
+#[simd_test(enable = "lasx,frecipe")]
+unsafe fn test_lasx_xvfrsqrte_d() {
+    let a = u64x4::new(
+        4601640737224225970,
+        4602882853441572005,
+        4594899837086694432,
+        4596019513190087348,
+    );
+    let r = i64x4::new(
+        4609450077243572224,
+        4608908592999825408,
+        4612828109287194624,
+        4612346183891812352,
+    );
+
+    assert_eq!(r, transmute(lasx_xvfrsqrte_d(transmute(a))));
+}
+
 #[simd_test(enable = "lasx")]
 unsafe fn test_lasx_xvfrint_s() {
    let a = u32x8::new(
--- a/library/stdarch/crates/core_arch/src/loongarch64/lsx/generated.rs
+++ b/library/stdarch/crates/core_arch/src/loongarch64/lsx/generated.rs
@ -698,6 +698,14 @@ extern "unadjusted" {
    fn __lsx_vfrecip_s(a: v4f32) -> v4f32;
    #[link_name = "llvm.loongarch.lsx.vfrecip.d"]
    fn __lsx_vfrecip_d(a: v2f64) -> v2f64;
+    #[link_name = "llvm.loongarch.lsx.vfrecipe.s"]
+    fn __lsx_vfrecipe_s(a: v4f32) -> v4f32;
+    #[link_name = "llvm.loongarch.lsx.vfrecipe.d"]
+    fn __lsx_vfrecipe_d(a: v2f64) -> v2f64;
+    #[link_name = "llvm.loongarch.lsx.vfrsqrte.s"]
+    fn __lsx_vfrsqrte_s(a: v4f32) -> v4f32;
+    #[link_name = "llvm.loongarch.lsx.vfrsqrte.d"]
+    fn __lsx_vfrsqrte_d(a: v2f64) -> v2f64;
    #[link_name = "llvm.loongarch.lsx.vfrint.s"]
    fn __lsx_vfrint_s(a: v4f32) -> v4f32;
    #[link_name = "llvm.loongarch.lsx.vfrint.d"]
@ -959,13 +967,13 @@ extern "unadjusted" {
    #[link_name = "llvm.loongarch.lsx.vfrintrm.d"]
    fn __lsx_vfrintrm_d(a: v2f64) -> v2f64;
    #[link_name = "llvm.loongarch.lsx.vstelm.b"]
-    fn __lsx_vstelm_b(a: v16i8, b: *mut i8, c: i32, d: u32) ;
+    fn __lsx_vstelm_b(a: v16i8, b: *mut i8, c: i32, d: u32);
    #[link_name = "llvm.loongarch.lsx.vstelm.h"]
-    fn __lsx_vstelm_h(a: v8i16, b: *mut i8, c: i32, d: u32) ;
+    fn __lsx_vstelm_h(a: v8i16, b: *mut i8, c: i32, d: u32);
    #[link_name = "llvm.loongarch.lsx.vstelm.w"]
-    fn __lsx_vstelm_w(a: v4i32, b: *mut i8, c: i32, d: u32) ;
+    fn __lsx_vstelm_w(a: v4i32, b: *mut i8, c: i32, d: u32);
    #[link_name = "llvm.loongarch.lsx.vstelm.d"]
-    fn __lsx_vstelm_d(a: v2i64, b: *mut i8, c: i32, d: u32) ;
+    fn __lsx_vstelm_d(a: v2i64, b: *mut i8, c: i32, d: u32);
    #[link_name = "llvm.loongarch.lsx.vaddwev.d.w"]
    fn __lsx_vaddwev_d_w(a: v4i32, b: v4i32) -> v2i64;
    #[link_name = "llvm.loongarch.lsx.vaddwev.w.h"]
@ -1301,7 +1309,7 @@ extern "unadjusted" {
    #[link_name = "llvm.loongarch.lsx.vld"]
    fn __lsx_vld(a: *const i8, b: i32) -> v16i8;
    #[link_name = "llvm.loongarch.lsx.vst"]
-    fn __lsx_vst(a: v16i8, b: *mut i8, c: i32) ;
+    fn __lsx_vst(a: v16i8, b: *mut i8, c: i32);
    #[link_name = "llvm.loongarch.lsx.vssrlrn.b.h"]
    fn __lsx_vssrlrn_b_h(a: v8i16, b: v8i16) -> v16i8;
    #[link_name = "llvm.loongarch.lsx.vssrlrn.h.w"]
@ -1323,7 +1331,7 @@ extern "unadjusted" {
    #[link_name = "llvm.loongarch.lsx.vldx"]
    fn __lsx_vldx(a: *const i8, b: i64) -> v16i8;
    #[link_name = "llvm.loongarch.lsx.vstx"]
-    fn __lsx_vstx(a: v16i8, b: *mut i8, c: i64) ;
+    fn __lsx_vstx(a: v16i8, b: *mut i8, c: i64);
    #[link_name = "llvm.loongarch.lsx.vextl.qu.du"]
    fn __lsx_vextl_qu_du(a: v2u64) -> v2u64;
    #[link_name = "llvm.loongarch.lsx.bnz.b"]
@ -4068,6 +4076,34 @@ pub unsafe fn lsx_vfrecip_d(a: v2f64) -> v2f64 {
    __lsx_vfrecip_d(a)
 }

+#[inline]
+#[target_feature(enable = "lsx,frecipe")]
+#[unstable(feature = "stdarch_loongarch", issue = "117427")]
+pub unsafe fn lsx_vfrecipe_s(a: v4f32) -> v4f32 {
+    __lsx_vfrecipe_s(a)
+}
+
+#[inline]
+#[target_feature(enable = "lsx,frecipe")]
+#[unstable(feature = "stdarch_loongarch", issue = "117427")]
+pub unsafe fn lsx_vfrecipe_d(a: v2f64) -> v2f64 {
+    __lsx_vfrecipe_d(a)
+}
+
+#[inline]
+#[target_feature(enable = "lsx,frecipe")]
+#[unstable(feature = "stdarch_loongarch", issue = "117427")]
+pub unsafe fn lsx_vfrsqrte_s(a: v4f32) -> v4f32 {
+    __lsx_vfrsqrte_s(a)
+}
+
+#[inline]
+#[target_feature(enable = "lsx,frecipe")]
+#[unstable(feature = "stdarch_loongarch", issue = "117427")]
+pub unsafe fn lsx_vfrsqrte_d(a: v2f64) -> v2f64 {
+    __lsx_vfrsqrte_d(a)
+}
+
 #[inline]
 #[target_feature(enable = "lsx")]
 #[unstable(feature = "stdarch_loongarch", issue = "117427")]
--- a/library/stdarch/crates/core_arch/src/loongarch64/lsx/tests.rs
+++ b/library/stdarch/crates/core_arch/src/loongarch64/lsx/tests.rs
@ -3308,6 +3308,38 @@ unsafe fn test_lsx_vfrecip_d() {
    assert_eq!(r, transmute(lsx_vfrecip_d(transmute(a))));
 }

+#[simd_test(enable = "lsx,frecipe")]
+unsafe fn test_lsx_vfrecipe_s() {
+    let a = u32x4::new(1057583779, 1062308847, 1060089100, 1048454688);
+    let r = i64x2::new(4583644530211711115, 4647978179615164140);
+
+    assert_eq!(r, transmute(lsx_vfrecipe_s(transmute(a))));
+}
+
+#[simd_test(enable = "lsx,frecipe")]
+unsafe fn test_lsx_vfrecipe_d() {
+    let a = u64x2::new(4605515926442181274, 4605369703273365674);
+    let r = i64x2::new(4608204937770303488, 4608317161507651584);
+
+    assert_eq!(r, transmute(lsx_vfrecipe_d(transmute(a))));
+}
+
+#[simd_test(enable = "lsx,frecipe")]
+unsafe fn test_lsx_vfrsqrte_s() {
+    let a = u32x4::new(1064377488, 1055815904, 1056897740, 1064016656);
+    let r = i64x2::new(4592421282989204764, 4577184195020153336);
+
+    assert_eq!(r, transmute(lsx_vfrsqrte_s(transmute(a))));
+}
+
+#[simd_test(enable = "lsx,frecipe")]
+unsafe fn test_lsx_vfrsqrte_d() {
+    let a = u64x2::new(4602766865443628663, 4605323203937791867);
+    let r = i64x2::new(4608986772678901760, 4607734355383549952);
+
+    assert_eq!(r, transmute(lsx_vfrsqrte_d(transmute(a))));
+}
+
 #[simd_test(enable = "lsx")]
 unsafe fn test_lsx_vfrint_s() {
    let a = u32x4::new(1062138521, 1056849108, 1034089720, 1038314384);