From 1b7346bf5fb63c4c74ad492e7a566b01feec6f56 Mon Sep 17 00:00:00 2001 From: Trevor Gross Date: Mon, 28 Oct 2024 19:30:04 -0500 Subject: [PATCH 1/3] Introduce `math::arch::intrinsics` This module provides implementations of basic functions that defer to LLVM for what to do, rather than either using a builtin operation or calling another function in this library. `math::arch` will become the home of anything architecture-specific in the future. --- .../libm/src/math/arch/intrinsics.rs | 52 +++++++++++++++++++ .../libm/src/math/arch/mod.rs | 9 ++++ .../compiler-builtins/libm/src/math/mod.rs | 1 + 3 files changed, 62 insertions(+) create mode 100644 library/compiler-builtins/libm/src/math/arch/intrinsics.rs create mode 100644 library/compiler-builtins/libm/src/math/arch/mod.rs diff --git a/library/compiler-builtins/libm/src/math/arch/intrinsics.rs b/library/compiler-builtins/libm/src/math/arch/intrinsics.rs new file mode 100644 index 000000000000..1cf9291f4c75 --- /dev/null +++ b/library/compiler-builtins/libm/src/math/arch/intrinsics.rs @@ -0,0 +1,52 @@ +// Config is needed for times when this module is available but we don't call everything +#![allow(dead_code)] + +pub fn ceil(x: f64) -> f64 { + // SAFETY: safe intrinsic with no preconditions + unsafe { core::intrinsics::ceilf64(x) } +} + +pub fn ceilf(x: f32) -> f32 { + // SAFETY: safe intrinsic with no preconditions + unsafe { core::intrinsics::ceilf32(x) } +} + +pub fn fabs(x: f64) -> f64 { + // SAFETY: safe intrinsic with no preconditions + unsafe { core::intrinsics::fabsf64(x) } +} + +pub fn fabsf(x: f32) -> f32 { + // SAFETY: safe intrinsic with no preconditions + unsafe { core::intrinsics::fabsf32(x) } +} + +pub fn floor(x: f64) -> f64 { + // SAFETY: safe intrinsic with no preconditions + unsafe { core::intrinsics::floorf64(x) } +} + +pub fn floorf(x: f32) -> f32 { + // SAFETY: safe intrinsic with no preconditions + unsafe { core::intrinsics::floorf32(x) } +} + +pub fn sqrt(x: f64) -> f64 { + // SAFETY: safe intrinsic with no preconditions + unsafe { core::intrinsics::sqrtf64(x) } +} + +pub fn sqrtf(x: f32) -> f32 { + // SAFETY: safe intrinsic with no preconditions + unsafe { core::intrinsics::sqrtf32(x) } +} + +pub fn trunc(x: f64) -> f64 { + // SAFETY: safe intrinsic with no preconditions + unsafe { core::intrinsics::truncf64(x) } +} + +pub fn truncf(x: f32) -> f32 { + // SAFETY: safe intrinsic with no preconditions + unsafe { core::intrinsics::truncf32(x) } +} diff --git a/library/compiler-builtins/libm/src/math/arch/mod.rs b/library/compiler-builtins/libm/src/math/arch/mod.rs new file mode 100644 index 000000000000..a4bc218b743d --- /dev/null +++ b/library/compiler-builtins/libm/src/math/arch/mod.rs @@ -0,0 +1,9 @@ +//! Architecture-specific routines and operations. +//! +//! LLVM will already optimize calls to some of these in cases that there are hardware +//! instructions. Providing an implementation here just ensures that the faster implementation +//! is used when calling the function directly. This helps anyone who uses `libm` directly, as +//! well as improving things when these routines are called as part of other implementations. + +#[cfg(intrinsics_enabled)] +pub mod intrinsics; diff --git a/library/compiler-builtins/libm/src/math/mod.rs b/library/compiler-builtins/libm/src/math/mod.rs index 9baa57fc8825..e3e6846d377f 100644 --- a/library/compiler-builtins/libm/src/math/mod.rs +++ b/library/compiler-builtins/libm/src/math/mod.rs @@ -302,6 +302,7 @@ pub use self::trunc::trunc; pub use self::truncf::truncf; // Private modules +mod arch; mod expo2; mod fenv; mod k_cos; From d54896343cab56e2f4c9866e54e7954c9c70d753 Mon Sep 17 00:00:00 2001 From: Trevor Gross Date: Sat, 26 Oct 2024 02:56:22 -0500 Subject: [PATCH 2/3] Introduce a `select_implementation` macro Currently there is a macro called `llvm_intrinsically_optimized` that uses an intrinsic rather than the function implementation if the configuration is correct. Add a new macro `select_implementation` that is somewhat cleaner to use. In the future, we can update this macro with more fields to specify other implementations that may be selected, such as something architecture-specific or e.g. using a generic implementation for `f32` routines, rather than those that convert to `f64`. This introduces a `macros` module within `math/support`. We will be able to move more things here later. --- .../compiler-builtins/libm/src/math/mod.rs | 63 ++++++++++--------- .../libm/src/math/support/macros.rs | 34 ++++++++++ .../libm/src/math/support/mod.rs | 2 + 3 files changed, 69 insertions(+), 30 deletions(-) create mode 100644 library/compiler-builtins/libm/src/math/support/macros.rs create mode 100644 library/compiler-builtins/libm/src/math/support/mod.rs diff --git a/library/compiler-builtins/libm/src/math/mod.rs b/library/compiler-builtins/libm/src/math/mod.rs index e3e6846d377f..a7e16bfc865c 100644 --- a/library/compiler-builtins/libm/src/math/mod.rs +++ b/library/compiler-builtins/libm/src/math/mod.rs @@ -74,6 +74,7 @@ macro_rules! div { }; } +// FIXME: phase this out, to be replaced by the more flexible `select_implementation` macro_rules! llvm_intrinsically_optimized { (#[cfg($($clause:tt)*)] $e:expr) => { #[cfg(all(intrinsics_enabled, not(feature = "force-soft-floats"), $($clause)*))] @@ -85,6 +86,38 @@ macro_rules! llvm_intrinsically_optimized { }; } +// Private modules +#[macro_use] +mod support; +mod arch; +mod expo2; +mod fenv; +mod k_cos; +mod k_cosf; +mod k_expo2; +mod k_expo2f; +mod k_sin; +mod k_sinf; +mod k_tan; +mod k_tanf; +mod rem_pio2; +mod rem_pio2_large; +mod rem_pio2f; + +// Private re-imports +use self::expo2::expo2; +use self::k_cos::k_cos; +use self::k_cosf::k_cosf; +use self::k_expo2::k_expo2; +use self::k_expo2f::k_expo2f; +use self::k_sin::k_sin; +use self::k_sinf::k_sinf; +use self::k_tan::k_tan; +use self::k_tanf::k_tanf; +use self::rem_pio2::rem_pio2; +use self::rem_pio2_large::rem_pio2_large; +use self::rem_pio2f::rem_pio2f; + // Public modules mod acos; mod acosf; @@ -301,36 +334,6 @@ pub use self::tgammaf::tgammaf; pub use self::trunc::trunc; pub use self::truncf::truncf; -// Private modules -mod arch; -mod expo2; -mod fenv; -mod k_cos; -mod k_cosf; -mod k_expo2; -mod k_expo2f; -mod k_sin; -mod k_sinf; -mod k_tan; -mod k_tanf; -mod rem_pio2; -mod rem_pio2_large; -mod rem_pio2f; - -// Private re-imports -use self::expo2::expo2; -use self::k_cos::k_cos; -use self::k_cosf::k_cosf; -use self::k_expo2::k_expo2; -use self::k_expo2f::k_expo2f; -use self::k_sin::k_sin; -use self::k_sinf::k_sinf; -use self::k_tan::k_tan; -use self::k_tanf::k_tanf; -use self::rem_pio2::rem_pio2; -use self::rem_pio2_large::rem_pio2_large; -use self::rem_pio2f::rem_pio2f; - #[inline] fn get_high_word(x: f64) -> u32 { (x.to_bits() >> 32) as u32 diff --git a/library/compiler-builtins/libm/src/math/support/macros.rs b/library/compiler-builtins/libm/src/math/support/macros.rs new file mode 100644 index 000000000000..6bc75837a349 --- /dev/null +++ b/library/compiler-builtins/libm/src/math/support/macros.rs @@ -0,0 +1,34 @@ +/// Choose among using an intrinsic (if available) and falling back to the default function body. +/// Returns directly if the intrinsic version is used, otherwise continues to the rest of the +/// function. +/// +/// Use this if the intrinsic is likely to be more performant on the platform(s) specified +/// in `intrinsic_available`. +/// +/// The `cfg` used here is controlled by `build.rs` so the passed meta does not need to account +/// for e.g. the `unstable-intrinsics` or `force-soft-float` features. +macro_rules! select_implementation { + ( + name: $fname:ident, + // Configuration meta for when to call intrinsics and let LLVM figure it out + $( use_intrinsic: $use_intrinsic:meta, )? + args: $($arg:ident),+ , + ) => { + // FIXME: these use paths that are a pretty fragile (`super`). We should figure out + // something better w.r.t. how this is vendored into compiler-builtins. + + // Never use intrinsics if we are forcing soft floats, and only enable with the + // `unstable-intrinsics` feature. + #[cfg(intrinsics_enabled)] + select_implementation! { + @cfg $( $use_intrinsic )?; + if true { + return super::arch::intrinsics::$fname( $($arg),+ ); + } + } + }; + + // Coalesce helper to construct an expression only if a config is provided + (@cfg ; $ex:expr) => { }; + (@cfg $provided:meta; $ex:expr) => { #[cfg($provided)] $ex }; +} diff --git a/library/compiler-builtins/libm/src/math/support/mod.rs b/library/compiler-builtins/libm/src/math/support/mod.rs new file mode 100644 index 000000000000..10532f0d115a --- /dev/null +++ b/library/compiler-builtins/libm/src/math/support/mod.rs @@ -0,0 +1,2 @@ +#[macro_use] +pub mod macros; From 60e7e3b338c3aea7b615073d712c9c568e2a0e9a Mon Sep 17 00:00:00 2001 From: Trevor Gross Date: Mon, 28 Oct 2024 19:38:19 -0500 Subject: [PATCH 3/3] Make use of `select_implementation` Replace all uses of `llvm_intrinsically` with select_implementation`. --- library/compiler-builtins/libm/src/math/ceil.rs | 12 +++++------- library/compiler-builtins/libm/src/math/ceilf.rs | 12 +++++------- library/compiler-builtins/libm/src/math/fabs.rs | 12 +++++------- library/compiler-builtins/libm/src/math/fabsf.rs | 12 +++++------- library/compiler-builtins/libm/src/math/floor.rs | 12 +++++------- .../compiler-builtins/libm/src/math/floorf.rs | 12 +++++------- library/compiler-builtins/libm/src/math/mod.rs | 12 ------------ library/compiler-builtins/libm/src/math/sqrt.rs | 16 +++++----------- library/compiler-builtins/libm/src/math/sqrtf.rs | 16 +++++----------- library/compiler-builtins/libm/src/math/trunc.rs | 12 +++++------- .../compiler-builtins/libm/src/math/truncf.rs | 12 +++++------- 11 files changed, 50 insertions(+), 90 deletions(-) diff --git a/library/compiler-builtins/libm/src/math/ceil.rs b/library/compiler-builtins/libm/src/math/ceil.rs index 1593fdaffcee..0da01b4d0b69 100644 --- a/library/compiler-builtins/libm/src/math/ceil.rs +++ b/library/compiler-builtins/libm/src/math/ceil.rs @@ -8,14 +8,12 @@ const TOINT: f64 = 1. / f64::EPSILON; /// Finds the nearest integer greater than or equal to `x`. #[cfg_attr(all(test, assert_no_panic), no_panic::no_panic)] pub fn ceil(x: f64) -> f64 { - // On wasm32 we know that LLVM's intrinsic will compile to an optimized - // `f64.ceil` native instruction, so we can leverage this for both code size - // and speed. - llvm_intrinsically_optimized! { - #[cfg(target_arch = "wasm32")] { - return unsafe { ::core::intrinsics::ceilf64(x) } - } + select_implementation! { + name: ceil, + use_intrinsic: target_arch = "wasm32", + args: x, } + #[cfg(all(target_arch = "x86", not(target_feature = "sse2")))] { //use an alternative implementation on x86, because the diff --git a/library/compiler-builtins/libm/src/math/ceilf.rs b/library/compiler-builtins/libm/src/math/ceilf.rs index bf9ba12279cf..0da384350aee 100644 --- a/library/compiler-builtins/libm/src/math/ceilf.rs +++ b/library/compiler-builtins/libm/src/math/ceilf.rs @@ -5,14 +5,12 @@ use core::f32; /// Finds the nearest integer greater than or equal to `x`. #[cfg_attr(all(test, assert_no_panic), no_panic::no_panic)] pub fn ceilf(x: f32) -> f32 { - // On wasm32 we know that LLVM's intrinsic will compile to an optimized - // `f32.ceil` native instruction, so we can leverage this for both code size - // and speed. - llvm_intrinsically_optimized! { - #[cfg(target_arch = "wasm32")] { - return unsafe { ::core::intrinsics::ceilf32(x) } - } + select_implementation! { + name: ceilf, + use_intrinsic: target_arch = "wasm32", + args: x, } + let mut ui = x.to_bits(); let e = (((ui >> 23) & 0xff).wrapping_sub(0x7f)) as i32; diff --git a/library/compiler-builtins/libm/src/math/fabs.rs b/library/compiler-builtins/libm/src/math/fabs.rs index 3b0628aa63a7..8d3ea2fd6479 100644 --- a/library/compiler-builtins/libm/src/math/fabs.rs +++ b/library/compiler-builtins/libm/src/math/fabs.rs @@ -5,14 +5,12 @@ use core::u64; /// by direct manipulation of the bit representation of `x`. #[cfg_attr(all(test, assert_no_panic), no_panic::no_panic)] pub fn fabs(x: f64) -> f64 { - // On wasm32 we know that LLVM's intrinsic will compile to an optimized - // `f64.abs` native instruction, so we can leverage this for both code size - // and speed. - llvm_intrinsically_optimized! { - #[cfg(target_arch = "wasm32")] { - return unsafe { ::core::intrinsics::fabsf64(x) } - } + select_implementation! { + name: fabs, + use_intrinsic: target_arch = "wasm32", + args: x, } + f64::from_bits(x.to_bits() & (u64::MAX / 2)) } diff --git a/library/compiler-builtins/libm/src/math/fabsf.rs b/library/compiler-builtins/libm/src/math/fabsf.rs index f81c8ca44236..1dac6389d8f4 100644 --- a/library/compiler-builtins/libm/src/math/fabsf.rs +++ b/library/compiler-builtins/libm/src/math/fabsf.rs @@ -3,14 +3,12 @@ /// by direct manipulation of the bit representation of `x`. #[cfg_attr(all(test, assert_no_panic), no_panic::no_panic)] pub fn fabsf(x: f32) -> f32 { - // On wasm32 we know that LLVM's intrinsic will compile to an optimized - // `f32.abs` native instruction, so we can leverage this for both code size - // and speed. - llvm_intrinsically_optimized! { - #[cfg(target_arch = "wasm32")] { - return unsafe { ::core::intrinsics::fabsf32(x) } - } + select_implementation! { + name: fabsf, + use_intrinsic: target_arch = "wasm32", + args: x, } + f32::from_bits(x.to_bits() & 0x7fffffff) } diff --git a/library/compiler-builtins/libm/src/math/floor.rs b/library/compiler-builtins/libm/src/math/floor.rs index e8fb21e5884b..2b9955ebae34 100644 --- a/library/compiler-builtins/libm/src/math/floor.rs +++ b/library/compiler-builtins/libm/src/math/floor.rs @@ -8,14 +8,12 @@ const TOINT: f64 = 1. / f64::EPSILON; /// Finds the nearest integer less than or equal to `x`. #[cfg_attr(all(test, assert_no_panic), no_panic::no_panic)] pub fn floor(x: f64) -> f64 { - // On wasm32 we know that LLVM's intrinsic will compile to an optimized - // `f64.floor` native instruction, so we can leverage this for both code size - // and speed. - llvm_intrinsically_optimized! { - #[cfg(target_arch = "wasm32")] { - return unsafe { ::core::intrinsics::floorf64(x) } - } + select_implementation! { + name: floor, + use_intrinsic: target_arch = "wasm32", + args: x, } + #[cfg(all(target_arch = "x86", not(target_feature = "sse2")))] { //use an alternative implementation on x86, because the diff --git a/library/compiler-builtins/libm/src/math/floorf.rs b/library/compiler-builtins/libm/src/math/floorf.rs index f66cab74fdcf..4f38cb15b7d8 100644 --- a/library/compiler-builtins/libm/src/math/floorf.rs +++ b/library/compiler-builtins/libm/src/math/floorf.rs @@ -5,14 +5,12 @@ use core::f32; /// Finds the nearest integer less than or equal to `x`. #[cfg_attr(all(test, assert_no_panic), no_panic::no_panic)] pub fn floorf(x: f32) -> f32 { - // On wasm32 we know that LLVM's intrinsic will compile to an optimized - // `f32.floor` native instruction, so we can leverage this for both code size - // and speed. - llvm_intrinsically_optimized! { - #[cfg(target_arch = "wasm32")] { - return unsafe { ::core::intrinsics::floorf32(x) } - } + select_implementation! { + name: floorf, + use_intrinsic: target_arch = "wasm32", + args: x, } + let mut ui = x.to_bits(); let e = (((ui >> 23) as i32) & 0xff) - 0x7f; diff --git a/library/compiler-builtins/libm/src/math/mod.rs b/library/compiler-builtins/libm/src/math/mod.rs index a7e16bfc865c..393bc5150938 100644 --- a/library/compiler-builtins/libm/src/math/mod.rs +++ b/library/compiler-builtins/libm/src/math/mod.rs @@ -74,18 +74,6 @@ macro_rules! div { }; } -// FIXME: phase this out, to be replaced by the more flexible `select_implementation` -macro_rules! llvm_intrinsically_optimized { - (#[cfg($($clause:tt)*)] $e:expr) => { - #[cfg(all(intrinsics_enabled, not(feature = "force-soft-floats"), $($clause)*))] - { - if true { // thwart the dead code lint - $e - } - } - }; -} - // Private modules #[macro_use] mod support; diff --git a/library/compiler-builtins/libm/src/math/sqrt.rs b/library/compiler-builtins/libm/src/math/sqrt.rs index e2907384dcdb..2e856100f7dd 100644 --- a/library/compiler-builtins/libm/src/math/sqrt.rs +++ b/library/compiler-builtins/libm/src/math/sqrt.rs @@ -81,18 +81,12 @@ use core::f64; /// The square root of `x` (f64). #[cfg_attr(all(test, assert_no_panic), no_panic::no_panic)] pub fn sqrt(x: f64) -> f64 { - // On wasm32 we know that LLVM's intrinsic will compile to an optimized - // `f64.sqrt` native instruction, so we can leverage this for both code size - // and speed. - llvm_intrinsically_optimized! { - #[cfg(target_arch = "wasm32")] { - return if x < 0.0 { - f64::NAN - } else { - unsafe { ::core::intrinsics::sqrtf64(x) } - } - } + select_implementation! { + name: sqrt, + use_intrinsic: target_arch = "wasm32", + args: x, } + #[cfg(all(target_feature = "sse2", not(feature = "force-soft-floats")))] { // Note: This path is unlikely since LLVM will usually have already diff --git a/library/compiler-builtins/libm/src/math/sqrtf.rs b/library/compiler-builtins/libm/src/math/sqrtf.rs index a738fc0b663a..b2996b350cb4 100644 --- a/library/compiler-builtins/libm/src/math/sqrtf.rs +++ b/library/compiler-builtins/libm/src/math/sqrtf.rs @@ -16,18 +16,12 @@ /// The square root of `x` (f32). #[cfg_attr(all(test, assert_no_panic), no_panic::no_panic)] pub fn sqrtf(x: f32) -> f32 { - // On wasm32 we know that LLVM's intrinsic will compile to an optimized - // `f32.sqrt` native instruction, so we can leverage this for both code size - // and speed. - llvm_intrinsically_optimized! { - #[cfg(target_arch = "wasm32")] { - return if x < 0.0 { - ::core::f32::NAN - } else { - unsafe { ::core::intrinsics::sqrtf32(x) } - } - } + select_implementation! { + name: sqrtf, + use_intrinsic: target_arch = "wasm32", + args: x, } + #[cfg(all(target_feature = "sse", not(feature = "force-soft-floats")))] { // Note: This path is unlikely since LLVM will usually have already diff --git a/library/compiler-builtins/libm/src/math/trunc.rs b/library/compiler-builtins/libm/src/math/trunc.rs index f7892a2c5536..6961bb950600 100644 --- a/library/compiler-builtins/libm/src/math/trunc.rs +++ b/library/compiler-builtins/libm/src/math/trunc.rs @@ -2,14 +2,12 @@ use core::f64; #[cfg_attr(all(test, assert_no_panic), no_panic::no_panic)] pub fn trunc(x: f64) -> f64 { - // On wasm32 we know that LLVM's intrinsic will compile to an optimized - // `f64.trunc` native instruction, so we can leverage this for both code size - // and speed. - llvm_intrinsically_optimized! { - #[cfg(target_arch = "wasm32")] { - return unsafe { ::core::intrinsics::truncf64(x) } - } + select_implementation! { + name: trunc, + use_intrinsic: target_arch = "wasm32", + args: x, } + let x1p120 = f64::from_bits(0x4770000000000000); // 0x1p120f === 2 ^ 120 let mut i: u64 = x.to_bits(); diff --git a/library/compiler-builtins/libm/src/math/truncf.rs b/library/compiler-builtins/libm/src/math/truncf.rs index 20d5b73bd675..8270c8eb392d 100644 --- a/library/compiler-builtins/libm/src/math/truncf.rs +++ b/library/compiler-builtins/libm/src/math/truncf.rs @@ -2,14 +2,12 @@ use core::f32; #[cfg_attr(all(test, assert_no_panic), no_panic::no_panic)] pub fn truncf(x: f32) -> f32 { - // On wasm32 we know that LLVM's intrinsic will compile to an optimized - // `f32.trunc` native instruction, so we can leverage this for both code size - // and speed. - llvm_intrinsically_optimized! { - #[cfg(target_arch = "wasm32")] { - return unsafe { ::core::intrinsics::truncf32(x) } - } + select_implementation! { + name: truncf, + use_intrinsic: target_arch = "wasm32", + args: x, } + let x1p120 = f32::from_bits(0x7b800000); // 0x1p120f === 2 ^ 120 let mut i: u32 = x.to_bits();