From 55bbda8ff8c53984e1b15b69c3d0f7ed529e8b16 Mon Sep 17 00:00:00 2001
From: Trevor Gross <tmgross@umich.edu>
Date: Thu, 13 Feb 2025 03:27:00 +0000
Subject: [PATCH] Make a subset of `libm` symbols weakly available on all
 platforms

018616e78b ("Always have math functions but with `weak` linking
attribute if we can") made all math symbols available on platforms that
support weak linkage. This caused some unexpected regressions, however,
because our less accurate and sometimes slow routines were being
selected over the system `libm`, which also tends to be weak [1]. Thus,
0fab77e8d7 ("Don't include `math` for `unix` and `wasi` targets") was
applied to undo these changes on many platforms.

Now that some improvements have been made to `libm`, add back a subset
of these functions:

* cbrt
* ceil
* copysign
* fabs
* fdim
* floor
* fma
* fmax
* fmaximum
* fmin
* fminimum
* fmod
* rint
* round
* roundeven
* sqrt
* trunc

This list includes only functions that produce exact results (verified
with exhaustive / extensive tests, and also required by IEEE in most
cases), and for which benchmarks indicate performance similar to or
better than Musl's soft float math routines [^1]. All except `cbrt` also
have `f16` and `f128` implementations. Once more routines meet these
criteria, we can move them from platform-specific availability to always
available.

Once this change makes it to rust-lang/rust, we will also be able to
move the relevant functions from `std` to `core`.

[^1]: We still rely on the backend to provide optimized assmebly
      routines when available.

[1]: https://github.com/rust-lang/rust/issues/128386
---
 library/compiler-builtins/src/lib.rs  |  29 +--
 library/compiler-builtins/src/math.rs | 271 +++++++++++++++++---------
 2 files changed, 179 insertions(+), 121 deletions(-)

diff --git a/library/compiler-builtins/src/lib.rs b/library/compiler-builtins/src/lib.rs
index 533878137dcd..6f5bd85981e8 100644
--- a/library/compiler-builtins/src/lib.rs
+++ b/library/compiler-builtins/src/lib.rs
@@ -41,40 +41,13 @@ mod macros;
 
 pub mod float;
 pub mod int;
-
-// Disable for any of the following:
-// - x86 without sse2 due to ABI issues
-//   - <https://github.com/rust-lang/rust/issues/114479>
-//   - but exclude UEFI since it is a soft-float target
-//     - <https://github.com/rust-lang/rust/issues/128533>
-// - All unix targets (linux, macos, freebsd, android, etc)
-// - wasm with known target_os
-#[cfg(not(any(
-    all(
-        target_arch = "x86",
-        not(target_feature = "sse2"),
-        not(target_os = "uefi"),
-    ),
-    unix,
-    all(target_family = "wasm", not(target_os = "unknown"))
-)))]
 pub mod math;
+pub mod mem;
 
 // `libm` expects its `support` module to be available in the crate root. This config can be
 // cleaned up once `libm` is made always available.
-#[cfg(not(any(
-    all(
-        target_arch = "x86",
-        not(target_feature = "sse2"),
-        not(target_os = "uefi"),
-    ),
-    unix,
-    all(target_family = "wasm", not(target_os = "unknown"))
-)))]
 use math::libm::support;
 
-pub mod mem;
-
 #[cfg(target_arch = "arm")]
 pub mod arm;
 
diff --git a/library/compiler-builtins/src/math.rs b/library/compiler-builtins/src/math.rs
index fef5358e3234..ccd9c54216c6 100644
--- a/library/compiler-builtins/src/math.rs
+++ b/library/compiler-builtins/src/math.rs
@@ -5,110 +5,195 @@
 #[path = "../libm/src/math/mod.rs"]
 pub(crate) mod libm;
 
-#[allow(unused_macros)]
-macro_rules! no_mangle {
+macro_rules! libm_intrinsics {
     ($(fn $fun:ident($($iid:ident : $ity:ty),+) -> $oty:ty;)+) => {
         intrinsics! {
             $(
                 pub extern "C" fn $fun($($iid: $ity),+) -> $oty {
-                    self::libm::$fun($($iid),+)
+                    $crate::math::libm::$fun($($iid),+)
                 }
             )+
         }
     }
 }
 
-#[cfg(not(windows))]
-no_mangle! {
-    fn acos(x: f64) -> f64;
-    fn asin(x: f64) -> f64;
-    fn cbrt(x: f64) -> f64;
-    fn expm1(x: f64) -> f64;
-    fn hypot(x: f64, y: f64) -> f64;
-    fn tan(x: f64) -> f64;
-    fn cos(x: f64) -> f64;
-    fn expf(x: f32) -> f32;
-    fn log2(x: f64) -> f64;
-    fn log2f(x: f32) -> f32;
-    fn log10(x: f64) -> f64;
-    fn log10f(x: f32) -> f32;
-    fn log(x: f64) -> f64;
-    fn logf(x: f32) -> f32;
-    fn round(x: f64) -> f64;
-    fn roundf(x: f32) -> f32;
-    fn rint(x: f64) -> f64;
-    fn rintf(x: f32) -> f32;
-    fn sin(x: f64) -> f64;
-    fn pow(x: f64, y: f64) -> f64;
-    fn powf(x: f32, y: f32) -> f32;
-    fn acosf(n: f32) -> f32;
-    fn atan2f(a: f32, b: f32) -> f32;
-    fn atanf(n: f32) -> f32;
-    fn coshf(n: f32) -> f32;
-    fn expm1f(n: f32) -> f32;
-    fn fdim(a: f64, b: f64) -> f64;
-    fn fdimf(a: f32, b: f32) -> f32;
-    fn log1pf(n: f32) -> f32;
-    fn sinhf(n: f32) -> f32;
-    fn tanhf(n: f32) -> f32;
-    fn ldexp(f: f64, n: i32) -> f64;
-    fn ldexpf(f: f32, n: i32) -> f32;
-    fn tgamma(x: f64) -> f64;
-    fn tgammaf(x: f32) -> f32;
-    fn atan(x: f64) -> f64;
-    fn atan2(x: f64, y: f64) -> f64;
-    fn cosh(x: f64) -> f64;
-    fn log1p(x: f64) -> f64;
-    fn sinh(x: f64) -> f64;
-    fn tanh(x: f64) -> f64;
-    fn cosf(x: f32) -> f32;
-    fn exp(x: f64) -> f64;
-    fn sinf(x: f32) -> f32;
-    fn exp2(x: f64) -> f64;
-    fn exp2f(x: f32) -> f32;
-    fn fma(x: f64, y: f64, z: f64) -> f64;
-    fn fmaf(x: f32, y: f32, z: f32) -> f32;
-    fn asinf(n: f32) -> f32;
-    fn cbrtf(n: f32) -> f32;
-    fn hypotf(x: f32, y: f32) -> f32;
-    fn tanf(n: f32) -> f32;
-
-    fn sqrtf(x: f32) -> f32;
-    fn sqrt(x: f64) -> f64;
-
-    fn ceil(x: f64) -> f64;
-    fn ceilf(x: f32) -> f32;
-    fn floor(x: f64) -> f64;
-    fn floorf(x: f32) -> f32;
-    fn trunc(x: f64) -> f64;
-    fn truncf(x: f32) -> f32;
-
-    fn fmin(x: f64, y: f64) -> f64;
-    fn fminf(x: f32, y: f32) -> f32;
-    fn fmax(x: f64, y: f64) -> f64;
-    fn fmaxf(x: f32, y: f32) -> f32;
-    // `f64 % f64`
-    fn fmod(x: f64, y: f64) -> f64;
-    // `f32 % f32`
-    fn fmodf(x: f32, y: f32) -> f32;
-
-    fn erf(x: f64) -> f64;
-    fn erff(x: f32) -> f32;
-    fn erfc(x: f64) -> f64;
-    fn erfcf(x: f32) -> f32;
-}
-
-// allow for windows (and other targets)
-intrinsics! {
-    pub extern "C" fn lgamma_r(x: f64, s: &mut i32) -> f64 {
-        let r = self::libm::lgamma_r(x);
-        *s = r.1;
-        r.0
+/// This set of functions is well tested in `libm` and known to provide similar performance to
+/// system `libm`, as well as the same or better accuracy.
+pub mod full_availability {
+    #[cfg(f16_enabled)]
+    libm_intrinsics! {
+        fn ceilf16(x: f16) -> f16;
+        fn copysignf16(x: f16, y: f16) -> f16;
+        fn fabsf16(x: f16) -> f16;
+        fn fdimf16(x: f16, y: f16) -> f16;
+        fn floorf16(x: f16) -> f16;
+        fn fmaxf16(x: f16, y: f16) -> f16;
+        fn fmaximumf16(x: f16, y: f16) -> f16;
+        fn fminf16(x: f16, y: f16) -> f16;
+        fn fminimumf16(x: f16, y: f16) -> f16;
+        fn fmodf16(x: f16, y: f16) -> f16;
+        fn rintf16(x: f16) -> f16;
+        fn roundevenf16(x: f16) -> f16;
+        fn roundf16(x: f16) -> f16;
+        fn sqrtf16(x: f16) -> f16;
+        fn truncf16(x: f16) -> f16;
     }
 
-    pub extern "C" fn lgammaf_r(x: f32, s: &mut i32) -> f32 {
-        let r = self::libm::lgammaf_r(x);
-        *s = r.1;
-        r.0
+    /* Weak linkage is unreliable on Windows and Apple, so we don't expose symbols that we know
+     * the system libc provides in order to avoid conflicts. */
+
+    #[cfg(all(not(windows), not(target_vendor = "apple")))]
+    libm_intrinsics! {
+        /* f32 */
+        fn cbrtf(n: f32) -> f32;
+        fn ceilf(x: f32) -> f32;
+        fn copysignf(x: f32, y: f32) -> f32;
+        fn fabsf(x: f32) -> f32;
+        fn fdimf(a: f32, b: f32) -> f32;
+        fn floorf(x: f32) -> f32;
+        fn fmaf(x: f32, y: f32, z: f32) -> f32;
+        fn fmaxf(x: f32, y: f32) -> f32;
+        fn fminf(x: f32, y: f32) -> f32;
+        fn fmodf(x: f32, y: f32) -> f32;
+        fn rintf(x: f32) -> f32;
+        fn roundf(x: f32) -> f32;
+        fn sqrtf(x: f32) -> f32;
+        fn truncf(x: f32) -> f32;
+
+        /* f64 */
+        fn cbrt(x: f64) -> f64;
+        fn ceil(x: f64) -> f64;
+        fn copysign(x: f64, y: f64) -> f64;
+        fn fabs(x: f64) -> f64;
+        fn fdim(a: f64, b: f64) -> f64;
+        fn floor(x: f64) -> f64;
+        fn fma(x: f64, y: f64, z: f64) -> f64;
+        fn fmax(x: f64, y: f64) -> f64;
+        fn fmin(x: f64, y: f64) -> f64;
+        fn fmod(x: f64, y: f64) -> f64;
+        fn rint(x: f64) -> f64;
+        fn round(x: f64) -> f64;
+        fn sqrt(x: f64) -> f64;
+        fn trunc(x: f64) -> f64;
+    }
+
+    // Windows and MacOS do not yet expose roundeven and IEEE 754-2019 `maximum` / `minimum`,
+    // however, so we still provide a fallback.
+    libm_intrinsics! {
+        fn fmaximum(x: f64, y: f64) -> f64;
+        fn fmaximumf(x: f32, y: f32) -> f32;
+        fn fminimum(x: f64, y: f64) -> f64;
+        fn fminimumf(x: f32, y: f32) -> f32;
+        fn roundeven(x: f64) -> f64;
+        fn roundevenf(x: f32) -> f32;
+    }
+
+    #[cfg(f128_enabled)]
+    libm_intrinsics! {
+        fn ceilf128(x: f128) -> f128;
+        fn copysignf128(x: f128, y: f128) -> f128;
+        fn fabsf128(x: f128) -> f128;
+        fn fdimf128(x: f128, y: f128) -> f128;
+        fn floorf128(x: f128) -> f128;
+        fn fmaf128(x: f128, y: f128, z: f128) -> f128;
+        fn fmaxf128(x: f128, y: f128) -> f128;
+        fn fmaximumf128(x: f128, y: f128) -> f128;
+        fn fminf128(x: f128, y: f128) -> f128;
+        fn fminimumf128(x: f128, y: f128) -> f128;
+        fn fmodf128(x: f128, y: f128) -> f128;
+        fn rintf128(x: f128) -> f128;
+        fn roundevenf128(x: f128) -> f128;
+        fn roundf128(x: f128) -> f128;
+        fn sqrtf128(x: f128) -> f128;
+        fn truncf128(x: f128) -> f128;
+    }
+}
+
+/// This group of functions has more performance or precision issues than system versions, or
+/// are otherwise less well tested. Provide them only on platforms that have problems with the
+/// system `libm`.
+///
+/// As `libm` improves, more functions will be moved from this group to the first group.
+///
+/// Do not supply for any of the following:
+/// - x86 without sse2 due to ABI issues
+///   - <https://github.com/rust-lang/rust/issues/114479>
+///   - but exclude UEFI since it is a soft-float target
+///     - <https://github.com/rust-lang/rust/issues/128533>
+/// - All unix targets (linux, macos, freebsd, android, etc)
+/// - wasm with known target_os
+#[cfg(not(any(
+    all(
+        target_arch = "x86",
+        not(target_feature = "sse2"),
+        not(target_os = "uefi"),
+    ),
+    unix,
+    all(target_family = "wasm", not(target_os = "unknown"))
+)))]
+pub mod partial_availability {
+    #[cfg(not(windows))]
+    libm_intrinsics! {
+        fn acos(x: f64) -> f64;
+        fn acosf(n: f32) -> f32;
+        fn asin(x: f64) -> f64;
+        fn asinf(n: f32) -> f32;
+        fn atan(x: f64) -> f64;
+        fn atan2(x: f64, y: f64) -> f64;
+        fn atan2f(a: f32, b: f32) -> f32;
+        fn atanf(n: f32) -> f32;
+        fn cos(x: f64) -> f64;
+        fn cosf(x: f32) -> f32;
+        fn cosh(x: f64) -> f64;
+        fn coshf(n: f32) -> f32;
+        fn erf(x: f64) -> f64;
+        fn erfc(x: f64) -> f64;
+        fn erfcf(x: f32) -> f32;
+        fn erff(x: f32) -> f32;
+        fn exp(x: f64) -> f64;
+        fn exp2(x: f64) -> f64;
+        fn exp2f(x: f32) -> f32;
+        fn expf(x: f32) -> f32;
+        fn expm1(x: f64) -> f64;
+        fn expm1f(n: f32) -> f32;
+        fn hypot(x: f64, y: f64) -> f64;
+        fn hypotf(x: f32, y: f32) -> f32;
+        fn ldexp(f: f64, n: i32) -> f64;
+        fn ldexpf(f: f32, n: i32) -> f32;
+        fn log(x: f64) -> f64;
+        fn log10(x: f64) -> f64;
+        fn log10f(x: f32) -> f32;
+        fn log1p(x: f64) -> f64;
+        fn log1pf(n: f32) -> f32;
+        fn log2(x: f64) -> f64;
+        fn log2f(x: f32) -> f32;
+        fn logf(x: f32) -> f32;
+        fn pow(x: f64, y: f64) -> f64;
+        fn powf(x: f32, y: f32) -> f32;
+        fn sin(x: f64) -> f64;
+        fn sinf(x: f32) -> f32;
+        fn sinh(x: f64) -> f64;
+        fn sinhf(n: f32) -> f32;
+        fn tan(x: f64) -> f64;
+        fn tanf(n: f32) -> f32;
+        fn tanh(x: f64) -> f64;
+        fn tanhf(n: f32) -> f32;
+        fn tgamma(x: f64) -> f64;
+        fn tgammaf(x: f32) -> f32;
+    }
+
+    // allow for windows (and other targets)
+    intrinsics! {
+        pub extern "C" fn lgamma_r(x: f64, s: &mut i32) -> f64 {
+            let r = super::libm::lgamma_r(x);
+            *s = r.1;
+            r.0
+        }
+
+        pub extern "C" fn lgammaf_r(x: f32, s: &mut i32) -> f32 {
+            let r = super::libm::lgammaf_r(x);
+            *s = r.1;
+            r.0
+        }
     }
 }