Merge pull request rust-lang/libm#344 from tgross35/select-implementation

Introduce a `select_implementation` macro
This commit is contained in:
Trevor Gross 2024-10-28 19:46:51 -05:00 committed by GitHub
commit ee2d7fded4
15 changed files with 178 additions and 117 deletions

View file

@ -0,0 +1,52 @@
// Config is needed for times when this module is available but we don't call everything
#![allow(dead_code)]
pub fn ceil(x: f64) -> f64 {
// SAFETY: safe intrinsic with no preconditions
unsafe { core::intrinsics::ceilf64(x) }
}
pub fn ceilf(x: f32) -> f32 {
// SAFETY: safe intrinsic with no preconditions
unsafe { core::intrinsics::ceilf32(x) }
}
pub fn fabs(x: f64) -> f64 {
// SAFETY: safe intrinsic with no preconditions
unsafe { core::intrinsics::fabsf64(x) }
}
pub fn fabsf(x: f32) -> f32 {
// SAFETY: safe intrinsic with no preconditions
unsafe { core::intrinsics::fabsf32(x) }
}
pub fn floor(x: f64) -> f64 {
// SAFETY: safe intrinsic with no preconditions
unsafe { core::intrinsics::floorf64(x) }
}
pub fn floorf(x: f32) -> f32 {
// SAFETY: safe intrinsic with no preconditions
unsafe { core::intrinsics::floorf32(x) }
}
pub fn sqrt(x: f64) -> f64 {
// SAFETY: safe intrinsic with no preconditions
unsafe { core::intrinsics::sqrtf64(x) }
}
pub fn sqrtf(x: f32) -> f32 {
// SAFETY: safe intrinsic with no preconditions
unsafe { core::intrinsics::sqrtf32(x) }
}
pub fn trunc(x: f64) -> f64 {
// SAFETY: safe intrinsic with no preconditions
unsafe { core::intrinsics::truncf64(x) }
}
pub fn truncf(x: f32) -> f32 {
// SAFETY: safe intrinsic with no preconditions
unsafe { core::intrinsics::truncf32(x) }
}

View file

@ -0,0 +1,9 @@
//! Architecture-specific routines and operations.
//!
//! LLVM will already optimize calls to some of these in cases that there are hardware
//! instructions. Providing an implementation here just ensures that the faster implementation
//! is used when calling the function directly. This helps anyone who uses `libm` directly, as
//! well as improving things when these routines are called as part of other implementations.
#[cfg(intrinsics_enabled)]
pub mod intrinsics;

View file

@ -8,14 +8,12 @@ const TOINT: f64 = 1. / f64::EPSILON;
/// Finds the nearest integer greater than or equal to `x`.
#[cfg_attr(all(test, assert_no_panic), no_panic::no_panic)]
pub fn ceil(x: f64) -> f64 {
// On wasm32 we know that LLVM's intrinsic will compile to an optimized
// `f64.ceil` native instruction, so we can leverage this for both code size
// and speed.
llvm_intrinsically_optimized! {
#[cfg(target_arch = "wasm32")] {
return unsafe { ::core::intrinsics::ceilf64(x) }
}
select_implementation! {
name: ceil,
use_intrinsic: target_arch = "wasm32",
args: x,
}
#[cfg(all(target_arch = "x86", not(target_feature = "sse2")))]
{
//use an alternative implementation on x86, because the

View file

@ -5,14 +5,12 @@ use core::f32;
/// Finds the nearest integer greater than or equal to `x`.
#[cfg_attr(all(test, assert_no_panic), no_panic::no_panic)]
pub fn ceilf(x: f32) -> f32 {
// On wasm32 we know that LLVM's intrinsic will compile to an optimized
// `f32.ceil` native instruction, so we can leverage this for both code size
// and speed.
llvm_intrinsically_optimized! {
#[cfg(target_arch = "wasm32")] {
return unsafe { ::core::intrinsics::ceilf32(x) }
}
select_implementation! {
name: ceilf,
use_intrinsic: target_arch = "wasm32",
args: x,
}
let mut ui = x.to_bits();
let e = (((ui >> 23) & 0xff).wrapping_sub(0x7f)) as i32;

View file

@ -5,14 +5,12 @@ use core::u64;
/// by direct manipulation of the bit representation of `x`.
#[cfg_attr(all(test, assert_no_panic), no_panic::no_panic)]
pub fn fabs(x: f64) -> f64 {
// On wasm32 we know that LLVM's intrinsic will compile to an optimized
// `f64.abs` native instruction, so we can leverage this for both code size
// and speed.
llvm_intrinsically_optimized! {
#[cfg(target_arch = "wasm32")] {
return unsafe { ::core::intrinsics::fabsf64(x) }
}
select_implementation! {
name: fabs,
use_intrinsic: target_arch = "wasm32",
args: x,
}
f64::from_bits(x.to_bits() & (u64::MAX / 2))
}

View file

@ -3,14 +3,12 @@
/// by direct manipulation of the bit representation of `x`.
#[cfg_attr(all(test, assert_no_panic), no_panic::no_panic)]
pub fn fabsf(x: f32) -> f32 {
// On wasm32 we know that LLVM's intrinsic will compile to an optimized
// `f32.abs` native instruction, so we can leverage this for both code size
// and speed.
llvm_intrinsically_optimized! {
#[cfg(target_arch = "wasm32")] {
return unsafe { ::core::intrinsics::fabsf32(x) }
}
select_implementation! {
name: fabsf,
use_intrinsic: target_arch = "wasm32",
args: x,
}
f32::from_bits(x.to_bits() & 0x7fffffff)
}

View file

@ -8,14 +8,12 @@ const TOINT: f64 = 1. / f64::EPSILON;
/// Finds the nearest integer less than or equal to `x`.
#[cfg_attr(all(test, assert_no_panic), no_panic::no_panic)]
pub fn floor(x: f64) -> f64 {
// On wasm32 we know that LLVM's intrinsic will compile to an optimized
// `f64.floor` native instruction, so we can leverage this for both code size
// and speed.
llvm_intrinsically_optimized! {
#[cfg(target_arch = "wasm32")] {
return unsafe { ::core::intrinsics::floorf64(x) }
}
select_implementation! {
name: floor,
use_intrinsic: target_arch = "wasm32",
args: x,
}
#[cfg(all(target_arch = "x86", not(target_feature = "sse2")))]
{
//use an alternative implementation on x86, because the

View file

@ -5,14 +5,12 @@ use core::f32;
/// Finds the nearest integer less than or equal to `x`.
#[cfg_attr(all(test, assert_no_panic), no_panic::no_panic)]
pub fn floorf(x: f32) -> f32 {
// On wasm32 we know that LLVM's intrinsic will compile to an optimized
// `f32.floor` native instruction, so we can leverage this for both code size
// and speed.
llvm_intrinsically_optimized! {
#[cfg(target_arch = "wasm32")] {
return unsafe { ::core::intrinsics::floorf32(x) }
}
select_implementation! {
name: floorf,
use_intrinsic: target_arch = "wasm32",
args: x,
}
let mut ui = x.to_bits();
let e = (((ui >> 23) as i32) & 0xff) - 0x7f;

View file

@ -74,16 +74,37 @@ macro_rules! div {
};
}
macro_rules! llvm_intrinsically_optimized {
(#[cfg($($clause:tt)*)] $e:expr) => {
#[cfg(all(intrinsics_enabled, not(feature = "force-soft-floats"), $($clause)*))]
{
if true { // thwart the dead code lint
$e
}
}
};
}
// Private modules
#[macro_use]
mod support;
mod arch;
mod expo2;
mod fenv;
mod k_cos;
mod k_cosf;
mod k_expo2;
mod k_expo2f;
mod k_sin;
mod k_sinf;
mod k_tan;
mod k_tanf;
mod rem_pio2;
mod rem_pio2_large;
mod rem_pio2f;
// Private re-imports
use self::expo2::expo2;
use self::k_cos::k_cos;
use self::k_cosf::k_cosf;
use self::k_expo2::k_expo2;
use self::k_expo2f::k_expo2f;
use self::k_sin::k_sin;
use self::k_sinf::k_sinf;
use self::k_tan::k_tan;
use self::k_tanf::k_tanf;
use self::rem_pio2::rem_pio2;
use self::rem_pio2_large::rem_pio2_large;
use self::rem_pio2f::rem_pio2f;
// Public modules
mod acos;
@ -301,35 +322,6 @@ pub use self::tgammaf::tgammaf;
pub use self::trunc::trunc;
pub use self::truncf::truncf;
// Private modules
mod expo2;
mod fenv;
mod k_cos;
mod k_cosf;
mod k_expo2;
mod k_expo2f;
mod k_sin;
mod k_sinf;
mod k_tan;
mod k_tanf;
mod rem_pio2;
mod rem_pio2_large;
mod rem_pio2f;
// Private re-imports
use self::expo2::expo2;
use self::k_cos::k_cos;
use self::k_cosf::k_cosf;
use self::k_expo2::k_expo2;
use self::k_expo2f::k_expo2f;
use self::k_sin::k_sin;
use self::k_sinf::k_sinf;
use self::k_tan::k_tan;
use self::k_tanf::k_tanf;
use self::rem_pio2::rem_pio2;
use self::rem_pio2_large::rem_pio2_large;
use self::rem_pio2f::rem_pio2f;
#[inline]
fn get_high_word(x: f64) -> u32 {
(x.to_bits() >> 32) as u32

View file

@ -81,18 +81,12 @@ use core::f64;
/// The square root of `x` (f64).
#[cfg_attr(all(test, assert_no_panic), no_panic::no_panic)]
pub fn sqrt(x: f64) -> f64 {
// On wasm32 we know that LLVM's intrinsic will compile to an optimized
// `f64.sqrt` native instruction, so we can leverage this for both code size
// and speed.
llvm_intrinsically_optimized! {
#[cfg(target_arch = "wasm32")] {
return if x < 0.0 {
f64::NAN
} else {
unsafe { ::core::intrinsics::sqrtf64(x) }
}
}
select_implementation! {
name: sqrt,
use_intrinsic: target_arch = "wasm32",
args: x,
}
#[cfg(all(target_feature = "sse2", not(feature = "force-soft-floats")))]
{
// Note: This path is unlikely since LLVM will usually have already

View file

@ -16,18 +16,12 @@
/// The square root of `x` (f32).
#[cfg_attr(all(test, assert_no_panic), no_panic::no_panic)]
pub fn sqrtf(x: f32) -> f32 {
// On wasm32 we know that LLVM's intrinsic will compile to an optimized
// `f32.sqrt` native instruction, so we can leverage this for both code size
// and speed.
llvm_intrinsically_optimized! {
#[cfg(target_arch = "wasm32")] {
return if x < 0.0 {
::core::f32::NAN
} else {
unsafe { ::core::intrinsics::sqrtf32(x) }
}
}
select_implementation! {
name: sqrtf,
use_intrinsic: target_arch = "wasm32",
args: x,
}
#[cfg(all(target_feature = "sse", not(feature = "force-soft-floats")))]
{
// Note: This path is unlikely since LLVM will usually have already

View file

@ -0,0 +1,34 @@
/// Choose among using an intrinsic (if available) and falling back to the default function body.
/// Returns directly if the intrinsic version is used, otherwise continues to the rest of the
/// function.
///
/// Use this if the intrinsic is likely to be more performant on the platform(s) specified
/// in `intrinsic_available`.
///
/// The `cfg` used here is controlled by `build.rs` so the passed meta does not need to account
/// for e.g. the `unstable-intrinsics` or `force-soft-float` features.
macro_rules! select_implementation {
(
name: $fname:ident,
// Configuration meta for when to call intrinsics and let LLVM figure it out
$( use_intrinsic: $use_intrinsic:meta, )?
args: $($arg:ident),+ ,
) => {
// FIXME: these use paths that are a pretty fragile (`super`). We should figure out
// something better w.r.t. how this is vendored into compiler-builtins.
// Never use intrinsics if we are forcing soft floats, and only enable with the
// `unstable-intrinsics` feature.
#[cfg(intrinsics_enabled)]
select_implementation! {
@cfg $( $use_intrinsic )?;
if true {
return super::arch::intrinsics::$fname( $($arg),+ );
}
}
};
// Coalesce helper to construct an expression only if a config is provided
(@cfg ; $ex:expr) => { };
(@cfg $provided:meta; $ex:expr) => { #[cfg($provided)] $ex };
}

View file

@ -0,0 +1,2 @@
#[macro_use]
pub mod macros;

View file

@ -2,14 +2,12 @@ use core::f64;
#[cfg_attr(all(test, assert_no_panic), no_panic::no_panic)]
pub fn trunc(x: f64) -> f64 {
// On wasm32 we know that LLVM's intrinsic will compile to an optimized
// `f64.trunc` native instruction, so we can leverage this for both code size
// and speed.
llvm_intrinsically_optimized! {
#[cfg(target_arch = "wasm32")] {
return unsafe { ::core::intrinsics::truncf64(x) }
}
select_implementation! {
name: trunc,
use_intrinsic: target_arch = "wasm32",
args: x,
}
let x1p120 = f64::from_bits(0x4770000000000000); // 0x1p120f === 2 ^ 120
let mut i: u64 = x.to_bits();

View file

@ -2,14 +2,12 @@ use core::f32;
#[cfg_attr(all(test, assert_no_panic), no_panic::no_panic)]
pub fn truncf(x: f32) -> f32 {
// On wasm32 we know that LLVM's intrinsic will compile to an optimized
// `f32.trunc` native instruction, so we can leverage this for both code size
// and speed.
llvm_intrinsically_optimized! {
#[cfg(target_arch = "wasm32")] {
return unsafe { ::core::intrinsics::truncf32(x) }
}
select_implementation! {
name: truncf,
use_intrinsic: target_arch = "wasm32",
args: x,
}
let x1p120 = f32::from_bits(0x7b800000); // 0x1p120f === 2 ^ 120
let mut i: u32 = x.to_bits();