Merge pull request rust-lang/libm#344 from tgross35/select-implementation

Introduce a `select_implementation` macro
2024-10-28 19:46:51 -05:00 · 2024-10-28 19:46:51 -05:00 · ee2d7fded4
commit ee2d7fded4
parent e8584d608e 60e7e3b338
15 changed files with 178 additions and 117 deletions
--- a/library/compiler-builtins/libm/src/math/arch/intrinsics.rs
+++ b/library/compiler-builtins/libm/src/math/arch/intrinsics.rs
@ -0,0 +1,52 @@
+// Config is needed for times when this module is available but we don't call everything
+#![allow(dead_code)]
+
+pub fn ceil(x: f64) -> f64 {
+    // SAFETY: safe intrinsic with no preconditions
+    unsafe { core::intrinsics::ceilf64(x) }
+}
+
+pub fn ceilf(x: f32) -> f32 {
+    // SAFETY: safe intrinsic with no preconditions
+    unsafe { core::intrinsics::ceilf32(x) }
+}
+
+pub fn fabs(x: f64) -> f64 {
+    // SAFETY: safe intrinsic with no preconditions
+    unsafe { core::intrinsics::fabsf64(x) }
+}
+
+pub fn fabsf(x: f32) -> f32 {
+    // SAFETY: safe intrinsic with no preconditions
+    unsafe { core::intrinsics::fabsf32(x) }
+}
+
+pub fn floor(x: f64) -> f64 {
+    // SAFETY: safe intrinsic with no preconditions
+    unsafe { core::intrinsics::floorf64(x) }
+}
+
+pub fn floorf(x: f32) -> f32 {
+    // SAFETY: safe intrinsic with no preconditions
+    unsafe { core::intrinsics::floorf32(x) }
+}
+
+pub fn sqrt(x: f64) -> f64 {
+    // SAFETY: safe intrinsic with no preconditions
+    unsafe { core::intrinsics::sqrtf64(x) }
+}
+
+pub fn sqrtf(x: f32) -> f32 {
+    // SAFETY: safe intrinsic with no preconditions
+    unsafe { core::intrinsics::sqrtf32(x) }
+}
+
+pub fn trunc(x: f64) -> f64 {
+    // SAFETY: safe intrinsic with no preconditions
+    unsafe { core::intrinsics::truncf64(x) }
+}
+
+pub fn truncf(x: f32) -> f32 {
+    // SAFETY: safe intrinsic with no preconditions
+    unsafe { core::intrinsics::truncf32(x) }
+}
--- a/library/compiler-builtins/libm/src/math/arch/mod.rs
+++ b/library/compiler-builtins/libm/src/math/arch/mod.rs
@ -0,0 +1,9 @@
+//! Architecture-specific routines and operations.
+//!
+//! LLVM will already optimize calls to some of these in cases that there are hardware
+//! instructions. Providing an implementation here just ensures that the faster implementation
+//! is used when calling the function directly. This helps anyone who uses `libm` directly, as
+//! well as improving things when these routines are called as part of other implementations.
+
+#[cfg(intrinsics_enabled)]
+pub mod intrinsics;
--- a/library/compiler-builtins/libm/src/math/ceil.rs
+++ b/library/compiler-builtins/libm/src/math/ceil.rs
@ -8,14 +8,12 @@ const TOINT: f64 = 1. / f64::EPSILON;
 /// Finds the nearest integer greater than or equal to `x`.
 #[cfg_attr(all(test, assert_no_panic), no_panic::no_panic)]
 pub fn ceil(x: f64) -> f64 {
-    // On wasm32 we know that LLVM's intrinsic will compile to an optimized
-    // `f64.ceil` native instruction, so we can leverage this for both code size
-    // and speed.
-    llvm_intrinsically_optimized! {
-        #[cfg(target_arch = "wasm32")] {
-            return unsafe { ::core::intrinsics::ceilf64(x) }
-        }
+    select_implementation! {
+        name: ceil,
+        use_intrinsic: target_arch = "wasm32",
+        args: x,
    }
+
    #[cfg(all(target_arch = "x86", not(target_feature = "sse2")))]
    {
        //use an alternative implementation on x86, because the
--- a/library/compiler-builtins/libm/src/math/ceilf.rs
+++ b/library/compiler-builtins/libm/src/math/ceilf.rs
@ -5,14 +5,12 @@ use core::f32;
 /// Finds the nearest integer greater than or equal to `x`.
 #[cfg_attr(all(test, assert_no_panic), no_panic::no_panic)]
 pub fn ceilf(x: f32) -> f32 {
-    // On wasm32 we know that LLVM's intrinsic will compile to an optimized
-    // `f32.ceil` native instruction, so we can leverage this for both code size
-    // and speed.
-    llvm_intrinsically_optimized! {
-        #[cfg(target_arch = "wasm32")] {
-            return unsafe { ::core::intrinsics::ceilf32(x) }
-        }
+    select_implementation! {
+        name: ceilf,
+        use_intrinsic: target_arch = "wasm32",
+        args: x,
    }
+
    let mut ui = x.to_bits();
    let e = (((ui >> 23) & 0xff).wrapping_sub(0x7f)) as i32;

--- a/library/compiler-builtins/libm/src/math/fabs.rs
+++ b/library/compiler-builtins/libm/src/math/fabs.rs
@ -5,14 +5,12 @@ use core::u64;
 /// by direct manipulation of the bit representation of `x`.
 #[cfg_attr(all(test, assert_no_panic), no_panic::no_panic)]
 pub fn fabs(x: f64) -> f64 {
-    // On wasm32 we know that LLVM's intrinsic will compile to an optimized
-    // `f64.abs` native instruction, so we can leverage this for both code size
-    // and speed.
-    llvm_intrinsically_optimized! {
-        #[cfg(target_arch = "wasm32")] {
-            return unsafe { ::core::intrinsics::fabsf64(x) }
-        }
+    select_implementation! {
+        name: fabs,
+        use_intrinsic: target_arch = "wasm32",
+        args: x,
    }
+
    f64::from_bits(x.to_bits() & (u64::MAX / 2))
 }

--- a/library/compiler-builtins/libm/src/math/fabsf.rs
+++ b/library/compiler-builtins/libm/src/math/fabsf.rs
@ -3,14 +3,12 @@
 /// by direct manipulation of the bit representation of `x`.
 #[cfg_attr(all(test, assert_no_panic), no_panic::no_panic)]
 pub fn fabsf(x: f32) -> f32 {
-    // On wasm32 we know that LLVM's intrinsic will compile to an optimized
-    // `f32.abs` native instruction, so we can leverage this for both code size
-    // and speed.
-    llvm_intrinsically_optimized! {
-        #[cfg(target_arch = "wasm32")] {
-            return unsafe { ::core::intrinsics::fabsf32(x) }
-        }
+    select_implementation! {
+        name: fabsf,
+        use_intrinsic: target_arch = "wasm32",
+        args: x,
    }
+
    f32::from_bits(x.to_bits() & 0x7fffffff)
 }

--- a/library/compiler-builtins/libm/src/math/floor.rs
+++ b/library/compiler-builtins/libm/src/math/floor.rs
@ -8,14 +8,12 @@ const TOINT: f64 = 1. / f64::EPSILON;
 /// Finds the nearest integer less than or equal to `x`.
 #[cfg_attr(all(test, assert_no_panic), no_panic::no_panic)]
 pub fn floor(x: f64) -> f64 {
-    // On wasm32 we know that LLVM's intrinsic will compile to an optimized
-    // `f64.floor` native instruction, so we can leverage this for both code size
-    // and speed.
-    llvm_intrinsically_optimized! {
-        #[cfg(target_arch = "wasm32")] {
-            return unsafe { ::core::intrinsics::floorf64(x) }
-        }
+    select_implementation! {
+        name: floor,
+        use_intrinsic: target_arch = "wasm32",
+        args: x,
    }
+
    #[cfg(all(target_arch = "x86", not(target_feature = "sse2")))]
    {
        //use an alternative implementation on x86, because the
--- a/library/compiler-builtins/libm/src/math/floorf.rs
+++ b/library/compiler-builtins/libm/src/math/floorf.rs
@ -5,14 +5,12 @@ use core::f32;
 /// Finds the nearest integer less than or equal to `x`.
 #[cfg_attr(all(test, assert_no_panic), no_panic::no_panic)]
 pub fn floorf(x: f32) -> f32 {
-    // On wasm32 we know that LLVM's intrinsic will compile to an optimized
-    // `f32.floor` native instruction, so we can leverage this for both code size
-    // and speed.
-    llvm_intrinsically_optimized! {
-        #[cfg(target_arch = "wasm32")] {
-            return unsafe { ::core::intrinsics::floorf32(x) }
-        }
+    select_implementation! {
+        name: floorf,
+        use_intrinsic: target_arch = "wasm32",
+        args: x,
    }
+
    let mut ui = x.to_bits();
    let e = (((ui >> 23) as i32) & 0xff) - 0x7f;

--- a/library/compiler-builtins/libm/src/math/mod.rs
+++ b/library/compiler-builtins/libm/src/math/mod.rs
@ -74,16 +74,37 @@ macro_rules! div {
    };
 }

-macro_rules! llvm_intrinsically_optimized {
-    (#[cfg($($clause:tt)*)] $e:expr) => {
-        #[cfg(all(intrinsics_enabled, not(feature = "force-soft-floats"), $($clause)*))]
-        {
-            if true { // thwart the dead code lint
-                $e
-            }
-        }
-    };
-}
+// Private modules
+#[macro_use]
+mod support;
+mod arch;
+mod expo2;
+mod fenv;
+mod k_cos;
+mod k_cosf;
+mod k_expo2;
+mod k_expo2f;
+mod k_sin;
+mod k_sinf;
+mod k_tan;
+mod k_tanf;
+mod rem_pio2;
+mod rem_pio2_large;
+mod rem_pio2f;
+
+// Private re-imports
+use self::expo2::expo2;
+use self::k_cos::k_cos;
+use self::k_cosf::k_cosf;
+use self::k_expo2::k_expo2;
+use self::k_expo2f::k_expo2f;
+use self::k_sin::k_sin;
+use self::k_sinf::k_sinf;
+use self::k_tan::k_tan;
+use self::k_tanf::k_tanf;
+use self::rem_pio2::rem_pio2;
+use self::rem_pio2_large::rem_pio2_large;
+use self::rem_pio2f::rem_pio2f;

 // Public modules
 mod acos;
@ -301,35 +322,6 @@ pub use self::tgammaf::tgammaf;
 pub use self::trunc::trunc;
 pub use self::truncf::truncf;

-// Private modules
-mod expo2;
-mod fenv;
-mod k_cos;
-mod k_cosf;
-mod k_expo2;
-mod k_expo2f;
-mod k_sin;
-mod k_sinf;
-mod k_tan;
-mod k_tanf;
-mod rem_pio2;
-mod rem_pio2_large;
-mod rem_pio2f;
-
-// Private re-imports
-use self::expo2::expo2;
-use self::k_cos::k_cos;
-use self::k_cosf::k_cosf;
-use self::k_expo2::k_expo2;
-use self::k_expo2f::k_expo2f;
-use self::k_sin::k_sin;
-use self::k_sinf::k_sinf;
-use self::k_tan::k_tan;
-use self::k_tanf::k_tanf;
-use self::rem_pio2::rem_pio2;
-use self::rem_pio2_large::rem_pio2_large;
-use self::rem_pio2f::rem_pio2f;
-
 #[inline]
 fn get_high_word(x: f64) -> u32 {
    (x.to_bits() >> 32) as u32
--- a/library/compiler-builtins/libm/src/math/sqrt.rs
+++ b/library/compiler-builtins/libm/src/math/sqrt.rs
@ -81,18 +81,12 @@ use core::f64;
 /// The square root of `x` (f64).
 #[cfg_attr(all(test, assert_no_panic), no_panic::no_panic)]
 pub fn sqrt(x: f64) -> f64 {
-    // On wasm32 we know that LLVM's intrinsic will compile to an optimized
-    // `f64.sqrt` native instruction, so we can leverage this for both code size
-    // and speed.
-    llvm_intrinsically_optimized! {
-        #[cfg(target_arch = "wasm32")] {
-            return if x < 0.0 {
-                f64::NAN
-            } else {
-                unsafe { ::core::intrinsics::sqrtf64(x) }
-            }
-        }
+    select_implementation! {
+        name: sqrt,
+        use_intrinsic: target_arch = "wasm32",
+        args: x,
    }
+
    #[cfg(all(target_feature = "sse2", not(feature = "force-soft-floats")))]
    {
        // Note: This path is unlikely since LLVM will usually have already
--- a/library/compiler-builtins/libm/src/math/sqrtf.rs
+++ b/library/compiler-builtins/libm/src/math/sqrtf.rs
@ -16,18 +16,12 @@
 /// The square root of `x` (f32).
 #[cfg_attr(all(test, assert_no_panic), no_panic::no_panic)]
 pub fn sqrtf(x: f32) -> f32 {
-    // On wasm32 we know that LLVM's intrinsic will compile to an optimized
-    // `f32.sqrt` native instruction, so we can leverage this for both code size
-    // and speed.
-    llvm_intrinsically_optimized! {
-        #[cfg(target_arch = "wasm32")] {
-            return if x < 0.0 {
-                ::core::f32::NAN
-            } else {
-                unsafe { ::core::intrinsics::sqrtf32(x) }
-            }
-        }
+    select_implementation! {
+        name: sqrtf,
+        use_intrinsic: target_arch = "wasm32",
+        args: x,
    }
+
    #[cfg(all(target_feature = "sse", not(feature = "force-soft-floats")))]
    {
        // Note: This path is unlikely since LLVM will usually have already
--- a/library/compiler-builtins/libm/src/math/support/macros.rs
+++ b/library/compiler-builtins/libm/src/math/support/macros.rs
@ -0,0 +1,34 @@
+/// Choose among using an intrinsic (if available) and falling back to the default function body.
+/// Returns directly if the intrinsic version is used, otherwise continues to the rest of the
+/// function.
+///
+/// Use this if the intrinsic is likely to be more performant on the platform(s) specified
+/// in `intrinsic_available`.
+///
+/// The `cfg` used here is controlled by `build.rs` so the passed meta does not need to account
+/// for e.g. the `unstable-intrinsics` or `force-soft-float` features.
+macro_rules! select_implementation {
+    (
+        name: $fname:ident,
+        // Configuration meta for when to call intrinsics and let LLVM figure it out
+        $( use_intrinsic: $use_intrinsic:meta, )?
+        args: $($arg:ident),+ ,
+    ) => {
+        // FIXME: these use paths that are a pretty fragile (`super`). We should figure out
+        // something better w.r.t. how this is vendored into compiler-builtins.
+
+        // Never use intrinsics if we are forcing soft floats, and only enable with the
+        // `unstable-intrinsics` feature.
+        #[cfg(intrinsics_enabled)]
+        select_implementation! {
+            @cfg $( $use_intrinsic )?;
+            if true {
+                return  super::arch::intrinsics::$fname( $($arg),+ );
+            }
+        }
+    };
+
+    // Coalesce helper to construct an expression only if a config is provided
+    (@cfg ; $ex:expr) => { };
+    (@cfg $provided:meta; $ex:expr) => { #[cfg($provided)] $ex };
+}
--- a/library/compiler-builtins/libm/src/math/support/mod.rs
+++ b/library/compiler-builtins/libm/src/math/support/mod.rs
@ -0,0 +1,2 @@
+#[macro_use]
+pub mod macros;
--- a/library/compiler-builtins/libm/src/math/trunc.rs
+++ b/library/compiler-builtins/libm/src/math/trunc.rs
@ -2,14 +2,12 @@ use core::f64;

 #[cfg_attr(all(test, assert_no_panic), no_panic::no_panic)]
 pub fn trunc(x: f64) -> f64 {
-    // On wasm32 we know that LLVM's intrinsic will compile to an optimized
-    // `f64.trunc` native instruction, so we can leverage this for both code size
-    // and speed.
-    llvm_intrinsically_optimized! {
-        #[cfg(target_arch = "wasm32")] {
-            return unsafe { ::core::intrinsics::truncf64(x) }
-        }
+    select_implementation! {
+        name: trunc,
+        use_intrinsic: target_arch = "wasm32",
+        args: x,
    }
+
    let x1p120 = f64::from_bits(0x4770000000000000); // 0x1p120f === 2 ^ 120

    let mut i: u64 = x.to_bits();
--- a/library/compiler-builtins/libm/src/math/truncf.rs
+++ b/library/compiler-builtins/libm/src/math/truncf.rs
@ -2,14 +2,12 @@ use core::f32;

 #[cfg_attr(all(test, assert_no_panic), no_panic::no_panic)]
 pub fn truncf(x: f32) -> f32 {
-    // On wasm32 we know that LLVM's intrinsic will compile to an optimized
-    // `f32.trunc` native instruction, so we can leverage this for both code size
-    // and speed.
-    llvm_intrinsically_optimized! {
-        #[cfg(target_arch = "wasm32")] {
-            return unsafe { ::core::intrinsics::truncf32(x) }
-        }
+    select_implementation! {
+        name: truncf,
+        use_intrinsic: target_arch = "wasm32",
+        args: x,
    }
+
    let x1p120 = f32::from_bits(0x7b800000); // 0x1p120f === 2 ^ 120

    let mut i: u32 = x.to_bits();