Merge pull request rust-lang/libm#344 from tgross35/select-implementation
Introduce a `select_implementation` macro
This commit is contained in:
commit
ee2d7fded4
15 changed files with 178 additions and 117 deletions
52
library/compiler-builtins/libm/src/math/arch/intrinsics.rs
Normal file
52
library/compiler-builtins/libm/src/math/arch/intrinsics.rs
Normal file
|
|
@ -0,0 +1,52 @@
|
|||
// Config is needed for times when this module is available but we don't call everything
|
||||
#![allow(dead_code)]
|
||||
|
||||
pub fn ceil(x: f64) -> f64 {
|
||||
// SAFETY: safe intrinsic with no preconditions
|
||||
unsafe { core::intrinsics::ceilf64(x) }
|
||||
}
|
||||
|
||||
pub fn ceilf(x: f32) -> f32 {
|
||||
// SAFETY: safe intrinsic with no preconditions
|
||||
unsafe { core::intrinsics::ceilf32(x) }
|
||||
}
|
||||
|
||||
pub fn fabs(x: f64) -> f64 {
|
||||
// SAFETY: safe intrinsic with no preconditions
|
||||
unsafe { core::intrinsics::fabsf64(x) }
|
||||
}
|
||||
|
||||
pub fn fabsf(x: f32) -> f32 {
|
||||
// SAFETY: safe intrinsic with no preconditions
|
||||
unsafe { core::intrinsics::fabsf32(x) }
|
||||
}
|
||||
|
||||
pub fn floor(x: f64) -> f64 {
|
||||
// SAFETY: safe intrinsic with no preconditions
|
||||
unsafe { core::intrinsics::floorf64(x) }
|
||||
}
|
||||
|
||||
pub fn floorf(x: f32) -> f32 {
|
||||
// SAFETY: safe intrinsic with no preconditions
|
||||
unsafe { core::intrinsics::floorf32(x) }
|
||||
}
|
||||
|
||||
pub fn sqrt(x: f64) -> f64 {
|
||||
// SAFETY: safe intrinsic with no preconditions
|
||||
unsafe { core::intrinsics::sqrtf64(x) }
|
||||
}
|
||||
|
||||
pub fn sqrtf(x: f32) -> f32 {
|
||||
// SAFETY: safe intrinsic with no preconditions
|
||||
unsafe { core::intrinsics::sqrtf32(x) }
|
||||
}
|
||||
|
||||
pub fn trunc(x: f64) -> f64 {
|
||||
// SAFETY: safe intrinsic with no preconditions
|
||||
unsafe { core::intrinsics::truncf64(x) }
|
||||
}
|
||||
|
||||
pub fn truncf(x: f32) -> f32 {
|
||||
// SAFETY: safe intrinsic with no preconditions
|
||||
unsafe { core::intrinsics::truncf32(x) }
|
||||
}
|
||||
9
library/compiler-builtins/libm/src/math/arch/mod.rs
Normal file
9
library/compiler-builtins/libm/src/math/arch/mod.rs
Normal file
|
|
@ -0,0 +1,9 @@
|
|||
//! Architecture-specific routines and operations.
|
||||
//!
|
||||
//! LLVM will already optimize calls to some of these in cases that there are hardware
|
||||
//! instructions. Providing an implementation here just ensures that the faster implementation
|
||||
//! is used when calling the function directly. This helps anyone who uses `libm` directly, as
|
||||
//! well as improving things when these routines are called as part of other implementations.
|
||||
|
||||
#[cfg(intrinsics_enabled)]
|
||||
pub mod intrinsics;
|
||||
|
|
@ -8,14 +8,12 @@ const TOINT: f64 = 1. / f64::EPSILON;
|
|||
/// Finds the nearest integer greater than or equal to `x`.
|
||||
#[cfg_attr(all(test, assert_no_panic), no_panic::no_panic)]
|
||||
pub fn ceil(x: f64) -> f64 {
|
||||
// On wasm32 we know that LLVM's intrinsic will compile to an optimized
|
||||
// `f64.ceil` native instruction, so we can leverage this for both code size
|
||||
// and speed.
|
||||
llvm_intrinsically_optimized! {
|
||||
#[cfg(target_arch = "wasm32")] {
|
||||
return unsafe { ::core::intrinsics::ceilf64(x) }
|
||||
}
|
||||
select_implementation! {
|
||||
name: ceil,
|
||||
use_intrinsic: target_arch = "wasm32",
|
||||
args: x,
|
||||
}
|
||||
|
||||
#[cfg(all(target_arch = "x86", not(target_feature = "sse2")))]
|
||||
{
|
||||
//use an alternative implementation on x86, because the
|
||||
|
|
|
|||
|
|
@ -5,14 +5,12 @@ use core::f32;
|
|||
/// Finds the nearest integer greater than or equal to `x`.
|
||||
#[cfg_attr(all(test, assert_no_panic), no_panic::no_panic)]
|
||||
pub fn ceilf(x: f32) -> f32 {
|
||||
// On wasm32 we know that LLVM's intrinsic will compile to an optimized
|
||||
// `f32.ceil` native instruction, so we can leverage this for both code size
|
||||
// and speed.
|
||||
llvm_intrinsically_optimized! {
|
||||
#[cfg(target_arch = "wasm32")] {
|
||||
return unsafe { ::core::intrinsics::ceilf32(x) }
|
||||
}
|
||||
select_implementation! {
|
||||
name: ceilf,
|
||||
use_intrinsic: target_arch = "wasm32",
|
||||
args: x,
|
||||
}
|
||||
|
||||
let mut ui = x.to_bits();
|
||||
let e = (((ui >> 23) & 0xff).wrapping_sub(0x7f)) as i32;
|
||||
|
||||
|
|
|
|||
|
|
@ -5,14 +5,12 @@ use core::u64;
|
|||
/// by direct manipulation of the bit representation of `x`.
|
||||
#[cfg_attr(all(test, assert_no_panic), no_panic::no_panic)]
|
||||
pub fn fabs(x: f64) -> f64 {
|
||||
// On wasm32 we know that LLVM's intrinsic will compile to an optimized
|
||||
// `f64.abs` native instruction, so we can leverage this for both code size
|
||||
// and speed.
|
||||
llvm_intrinsically_optimized! {
|
||||
#[cfg(target_arch = "wasm32")] {
|
||||
return unsafe { ::core::intrinsics::fabsf64(x) }
|
||||
}
|
||||
select_implementation! {
|
||||
name: fabs,
|
||||
use_intrinsic: target_arch = "wasm32",
|
||||
args: x,
|
||||
}
|
||||
|
||||
f64::from_bits(x.to_bits() & (u64::MAX / 2))
|
||||
}
|
||||
|
||||
|
|
|
|||
|
|
@ -3,14 +3,12 @@
|
|||
/// by direct manipulation of the bit representation of `x`.
|
||||
#[cfg_attr(all(test, assert_no_panic), no_panic::no_panic)]
|
||||
pub fn fabsf(x: f32) -> f32 {
|
||||
// On wasm32 we know that LLVM's intrinsic will compile to an optimized
|
||||
// `f32.abs` native instruction, so we can leverage this for both code size
|
||||
// and speed.
|
||||
llvm_intrinsically_optimized! {
|
||||
#[cfg(target_arch = "wasm32")] {
|
||||
return unsafe { ::core::intrinsics::fabsf32(x) }
|
||||
}
|
||||
select_implementation! {
|
||||
name: fabsf,
|
||||
use_intrinsic: target_arch = "wasm32",
|
||||
args: x,
|
||||
}
|
||||
|
||||
f32::from_bits(x.to_bits() & 0x7fffffff)
|
||||
}
|
||||
|
||||
|
|
|
|||
|
|
@ -8,14 +8,12 @@ const TOINT: f64 = 1. / f64::EPSILON;
|
|||
/// Finds the nearest integer less than or equal to `x`.
|
||||
#[cfg_attr(all(test, assert_no_panic), no_panic::no_panic)]
|
||||
pub fn floor(x: f64) -> f64 {
|
||||
// On wasm32 we know that LLVM's intrinsic will compile to an optimized
|
||||
// `f64.floor` native instruction, so we can leverage this for both code size
|
||||
// and speed.
|
||||
llvm_intrinsically_optimized! {
|
||||
#[cfg(target_arch = "wasm32")] {
|
||||
return unsafe { ::core::intrinsics::floorf64(x) }
|
||||
}
|
||||
select_implementation! {
|
||||
name: floor,
|
||||
use_intrinsic: target_arch = "wasm32",
|
||||
args: x,
|
||||
}
|
||||
|
||||
#[cfg(all(target_arch = "x86", not(target_feature = "sse2")))]
|
||||
{
|
||||
//use an alternative implementation on x86, because the
|
||||
|
|
|
|||
|
|
@ -5,14 +5,12 @@ use core::f32;
|
|||
/// Finds the nearest integer less than or equal to `x`.
|
||||
#[cfg_attr(all(test, assert_no_panic), no_panic::no_panic)]
|
||||
pub fn floorf(x: f32) -> f32 {
|
||||
// On wasm32 we know that LLVM's intrinsic will compile to an optimized
|
||||
// `f32.floor` native instruction, so we can leverage this for both code size
|
||||
// and speed.
|
||||
llvm_intrinsically_optimized! {
|
||||
#[cfg(target_arch = "wasm32")] {
|
||||
return unsafe { ::core::intrinsics::floorf32(x) }
|
||||
}
|
||||
select_implementation! {
|
||||
name: floorf,
|
||||
use_intrinsic: target_arch = "wasm32",
|
||||
args: x,
|
||||
}
|
||||
|
||||
let mut ui = x.to_bits();
|
||||
let e = (((ui >> 23) as i32) & 0xff) - 0x7f;
|
||||
|
||||
|
|
|
|||
|
|
@ -74,16 +74,37 @@ macro_rules! div {
|
|||
};
|
||||
}
|
||||
|
||||
macro_rules! llvm_intrinsically_optimized {
|
||||
(#[cfg($($clause:tt)*)] $e:expr) => {
|
||||
#[cfg(all(intrinsics_enabled, not(feature = "force-soft-floats"), $($clause)*))]
|
||||
{
|
||||
if true { // thwart the dead code lint
|
||||
$e
|
||||
}
|
||||
}
|
||||
};
|
||||
}
|
||||
// Private modules
|
||||
#[macro_use]
|
||||
mod support;
|
||||
mod arch;
|
||||
mod expo2;
|
||||
mod fenv;
|
||||
mod k_cos;
|
||||
mod k_cosf;
|
||||
mod k_expo2;
|
||||
mod k_expo2f;
|
||||
mod k_sin;
|
||||
mod k_sinf;
|
||||
mod k_tan;
|
||||
mod k_tanf;
|
||||
mod rem_pio2;
|
||||
mod rem_pio2_large;
|
||||
mod rem_pio2f;
|
||||
|
||||
// Private re-imports
|
||||
use self::expo2::expo2;
|
||||
use self::k_cos::k_cos;
|
||||
use self::k_cosf::k_cosf;
|
||||
use self::k_expo2::k_expo2;
|
||||
use self::k_expo2f::k_expo2f;
|
||||
use self::k_sin::k_sin;
|
||||
use self::k_sinf::k_sinf;
|
||||
use self::k_tan::k_tan;
|
||||
use self::k_tanf::k_tanf;
|
||||
use self::rem_pio2::rem_pio2;
|
||||
use self::rem_pio2_large::rem_pio2_large;
|
||||
use self::rem_pio2f::rem_pio2f;
|
||||
|
||||
// Public modules
|
||||
mod acos;
|
||||
|
|
@ -301,35 +322,6 @@ pub use self::tgammaf::tgammaf;
|
|||
pub use self::trunc::trunc;
|
||||
pub use self::truncf::truncf;
|
||||
|
||||
// Private modules
|
||||
mod expo2;
|
||||
mod fenv;
|
||||
mod k_cos;
|
||||
mod k_cosf;
|
||||
mod k_expo2;
|
||||
mod k_expo2f;
|
||||
mod k_sin;
|
||||
mod k_sinf;
|
||||
mod k_tan;
|
||||
mod k_tanf;
|
||||
mod rem_pio2;
|
||||
mod rem_pio2_large;
|
||||
mod rem_pio2f;
|
||||
|
||||
// Private re-imports
|
||||
use self::expo2::expo2;
|
||||
use self::k_cos::k_cos;
|
||||
use self::k_cosf::k_cosf;
|
||||
use self::k_expo2::k_expo2;
|
||||
use self::k_expo2f::k_expo2f;
|
||||
use self::k_sin::k_sin;
|
||||
use self::k_sinf::k_sinf;
|
||||
use self::k_tan::k_tan;
|
||||
use self::k_tanf::k_tanf;
|
||||
use self::rem_pio2::rem_pio2;
|
||||
use self::rem_pio2_large::rem_pio2_large;
|
||||
use self::rem_pio2f::rem_pio2f;
|
||||
|
||||
#[inline]
|
||||
fn get_high_word(x: f64) -> u32 {
|
||||
(x.to_bits() >> 32) as u32
|
||||
|
|
|
|||
|
|
@ -81,18 +81,12 @@ use core::f64;
|
|||
/// The square root of `x` (f64).
|
||||
#[cfg_attr(all(test, assert_no_panic), no_panic::no_panic)]
|
||||
pub fn sqrt(x: f64) -> f64 {
|
||||
// On wasm32 we know that LLVM's intrinsic will compile to an optimized
|
||||
// `f64.sqrt` native instruction, so we can leverage this for both code size
|
||||
// and speed.
|
||||
llvm_intrinsically_optimized! {
|
||||
#[cfg(target_arch = "wasm32")] {
|
||||
return if x < 0.0 {
|
||||
f64::NAN
|
||||
} else {
|
||||
unsafe { ::core::intrinsics::sqrtf64(x) }
|
||||
}
|
||||
}
|
||||
select_implementation! {
|
||||
name: sqrt,
|
||||
use_intrinsic: target_arch = "wasm32",
|
||||
args: x,
|
||||
}
|
||||
|
||||
#[cfg(all(target_feature = "sse2", not(feature = "force-soft-floats")))]
|
||||
{
|
||||
// Note: This path is unlikely since LLVM will usually have already
|
||||
|
|
|
|||
|
|
@ -16,18 +16,12 @@
|
|||
/// The square root of `x` (f32).
|
||||
#[cfg_attr(all(test, assert_no_panic), no_panic::no_panic)]
|
||||
pub fn sqrtf(x: f32) -> f32 {
|
||||
// On wasm32 we know that LLVM's intrinsic will compile to an optimized
|
||||
// `f32.sqrt` native instruction, so we can leverage this for both code size
|
||||
// and speed.
|
||||
llvm_intrinsically_optimized! {
|
||||
#[cfg(target_arch = "wasm32")] {
|
||||
return if x < 0.0 {
|
||||
::core::f32::NAN
|
||||
} else {
|
||||
unsafe { ::core::intrinsics::sqrtf32(x) }
|
||||
}
|
||||
}
|
||||
select_implementation! {
|
||||
name: sqrtf,
|
||||
use_intrinsic: target_arch = "wasm32",
|
||||
args: x,
|
||||
}
|
||||
|
||||
#[cfg(all(target_feature = "sse", not(feature = "force-soft-floats")))]
|
||||
{
|
||||
// Note: This path is unlikely since LLVM will usually have already
|
||||
|
|
|
|||
34
library/compiler-builtins/libm/src/math/support/macros.rs
Normal file
34
library/compiler-builtins/libm/src/math/support/macros.rs
Normal file
|
|
@ -0,0 +1,34 @@
|
|||
/// Choose among using an intrinsic (if available) and falling back to the default function body.
|
||||
/// Returns directly if the intrinsic version is used, otherwise continues to the rest of the
|
||||
/// function.
|
||||
///
|
||||
/// Use this if the intrinsic is likely to be more performant on the platform(s) specified
|
||||
/// in `intrinsic_available`.
|
||||
///
|
||||
/// The `cfg` used here is controlled by `build.rs` so the passed meta does not need to account
|
||||
/// for e.g. the `unstable-intrinsics` or `force-soft-float` features.
|
||||
macro_rules! select_implementation {
|
||||
(
|
||||
name: $fname:ident,
|
||||
// Configuration meta for when to call intrinsics and let LLVM figure it out
|
||||
$( use_intrinsic: $use_intrinsic:meta, )?
|
||||
args: $($arg:ident),+ ,
|
||||
) => {
|
||||
// FIXME: these use paths that are a pretty fragile (`super`). We should figure out
|
||||
// something better w.r.t. how this is vendored into compiler-builtins.
|
||||
|
||||
// Never use intrinsics if we are forcing soft floats, and only enable with the
|
||||
// `unstable-intrinsics` feature.
|
||||
#[cfg(intrinsics_enabled)]
|
||||
select_implementation! {
|
||||
@cfg $( $use_intrinsic )?;
|
||||
if true {
|
||||
return super::arch::intrinsics::$fname( $($arg),+ );
|
||||
}
|
||||
}
|
||||
};
|
||||
|
||||
// Coalesce helper to construct an expression only if a config is provided
|
||||
(@cfg ; $ex:expr) => { };
|
||||
(@cfg $provided:meta; $ex:expr) => { #[cfg($provided)] $ex };
|
||||
}
|
||||
2
library/compiler-builtins/libm/src/math/support/mod.rs
Normal file
2
library/compiler-builtins/libm/src/math/support/mod.rs
Normal file
|
|
@ -0,0 +1,2 @@
|
|||
#[macro_use]
|
||||
pub mod macros;
|
||||
|
|
@ -2,14 +2,12 @@ use core::f64;
|
|||
|
||||
#[cfg_attr(all(test, assert_no_panic), no_panic::no_panic)]
|
||||
pub fn trunc(x: f64) -> f64 {
|
||||
// On wasm32 we know that LLVM's intrinsic will compile to an optimized
|
||||
// `f64.trunc` native instruction, so we can leverage this for both code size
|
||||
// and speed.
|
||||
llvm_intrinsically_optimized! {
|
||||
#[cfg(target_arch = "wasm32")] {
|
||||
return unsafe { ::core::intrinsics::truncf64(x) }
|
||||
}
|
||||
select_implementation! {
|
||||
name: trunc,
|
||||
use_intrinsic: target_arch = "wasm32",
|
||||
args: x,
|
||||
}
|
||||
|
||||
let x1p120 = f64::from_bits(0x4770000000000000); // 0x1p120f === 2 ^ 120
|
||||
|
||||
let mut i: u64 = x.to_bits();
|
||||
|
|
|
|||
|
|
@ -2,14 +2,12 @@ use core::f32;
|
|||
|
||||
#[cfg_attr(all(test, assert_no_panic), no_panic::no_panic)]
|
||||
pub fn truncf(x: f32) -> f32 {
|
||||
// On wasm32 we know that LLVM's intrinsic will compile to an optimized
|
||||
// `f32.trunc` native instruction, so we can leverage this for both code size
|
||||
// and speed.
|
||||
llvm_intrinsically_optimized! {
|
||||
#[cfg(target_arch = "wasm32")] {
|
||||
return unsafe { ::core::intrinsics::truncf32(x) }
|
||||
}
|
||||
select_implementation! {
|
||||
name: truncf,
|
||||
use_intrinsic: target_arch = "wasm32",
|
||||
args: x,
|
||||
}
|
||||
|
||||
let x1p120 = f32::from_bits(0x7b800000); // 0x1p120f === 2 ^ 120
|
||||
|
||||
let mut i: u32 = x.to_bits();
|
||||
|
|
|
|||
Loading…
Add table
Add a link
Reference in a new issue