Merge pull request rust-lang/libm#152 from alexcrichton/wasm-sqrt
Optimize intrinsics on wasm32
This commit is contained in:
commit
a9d5577f55
14 changed files with 112 additions and 0 deletions
|
|
@ -29,6 +29,13 @@ matrix:
|
|||
- env: TARGET=cargo-fmt
|
||||
rust: beta
|
||||
|
||||
- env: TARGET=wasm32-unknown-unknown
|
||||
rust: nightly
|
||||
install: rustup target add $TARGET
|
||||
script:
|
||||
- cargo build --target $TARGET
|
||||
- cargo build --no-default-features --target $TARGET
|
||||
|
||||
before_install: set -e
|
||||
|
||||
install:
|
||||
|
|
|
|||
|
|
@ -12,6 +12,8 @@ version = "0.1.2"
|
|||
[features]
|
||||
# only used to run our test suite
|
||||
checked = []
|
||||
default = ['stable']
|
||||
stable = []
|
||||
|
||||
[workspace]
|
||||
members = [
|
||||
|
|
|
|||
|
|
@ -11,6 +11,10 @@
|
|||
|
||||
#![deny(warnings)]
|
||||
#![no_std]
|
||||
#![cfg_attr(
|
||||
all(target_arch = "wasm32", not(feature = "stable")),
|
||||
feature(core_intrinsics)
|
||||
)]
|
||||
|
||||
mod math;
|
||||
|
||||
|
|
|
|||
|
|
@ -4,6 +4,14 @@ const TOINT: f64 = 1. / f64::EPSILON;
|
|||
|
||||
#[inline]
|
||||
pub fn ceil(x: f64) -> f64 {
|
||||
// On wasm32 we know that LLVM's intrinsic will compile to an optimized
|
||||
// `f64.ceil` native instruction, so we can leverage this for both code size
|
||||
// and speed.
|
||||
llvm_intrinsically_optimized! {
|
||||
#[cfg(target_arch = "wasm32")] {
|
||||
return unsafe { ::core::intrinsics::ceilf64(x) }
|
||||
}
|
||||
}
|
||||
let u: u64 = x.to_bits();
|
||||
let e: i64 = (u >> 52 & 0x7ff) as i64;
|
||||
let y: f64;
|
||||
|
|
|
|||
|
|
@ -2,6 +2,14 @@ use core::f32;
|
|||
|
||||
#[inline]
|
||||
pub fn ceilf(x: f32) -> f32 {
|
||||
// On wasm32 we know that LLVM's intrinsic will compile to an optimized
|
||||
// `f32.ceil` native instruction, so we can leverage this for both code size
|
||||
// and speed.
|
||||
llvm_intrinsically_optimized! {
|
||||
#[cfg(target_arch = "wasm32")] {
|
||||
return unsafe { ::core::intrinsics::ceilf32(x) }
|
||||
}
|
||||
}
|
||||
let mut ui = x.to_bits();
|
||||
let e = (((ui >> 23) & 0xff) - 0x7f) as i32;
|
||||
|
||||
|
|
|
|||
|
|
@ -2,5 +2,13 @@ use core::u64;
|
|||
|
||||
#[inline]
|
||||
pub fn fabs(x: f64) -> f64 {
|
||||
// On wasm32 we know that LLVM's intrinsic will compile to an optimized
|
||||
// `f64.abs` native instruction, so we can leverage this for both code size
|
||||
// and speed.
|
||||
llvm_intrinsically_optimized! {
|
||||
#[cfg(target_arch = "wasm32")] {
|
||||
return unsafe { ::core::intrinsics::fabsf64(x) }
|
||||
}
|
||||
}
|
||||
f64::from_bits(x.to_bits() & (u64::MAX / 2))
|
||||
}
|
||||
|
|
|
|||
|
|
@ -1,4 +1,12 @@
|
|||
#[inline]
|
||||
pub fn fabsf(x: f32) -> f32 {
|
||||
// On wasm32 we know that LLVM's intrinsic will compile to an optimized
|
||||
// `f32.abs` native instruction, so we can leverage this for both code size
|
||||
// and speed.
|
||||
llvm_intrinsically_optimized! {
|
||||
#[cfg(target_arch = "wasm32")] {
|
||||
return unsafe { ::core::intrinsics::fabsf32(x) }
|
||||
}
|
||||
}
|
||||
f32::from_bits(x.to_bits() & 0x7fffffff)
|
||||
}
|
||||
|
|
|
|||
|
|
@ -4,6 +4,14 @@ const TOINT: f64 = 1. / f64::EPSILON;
|
|||
|
||||
#[inline]
|
||||
pub fn floor(x: f64) -> f64 {
|
||||
// On wasm32 we know that LLVM's intrinsic will compile to an optimized
|
||||
// `f64.floor` native instruction, so we can leverage this for both code size
|
||||
// and speed.
|
||||
llvm_intrinsically_optimized! {
|
||||
#[cfg(target_arch = "wasm32")] {
|
||||
return unsafe { ::core::intrinsics::floorf64(x) }
|
||||
}
|
||||
}
|
||||
let ui = x.to_bits();
|
||||
let e = ((ui >> 52) & 0x7ff) as i32;
|
||||
|
||||
|
|
|
|||
|
|
@ -2,6 +2,14 @@ use core::f32;
|
|||
|
||||
#[inline]
|
||||
pub fn floorf(x: f32) -> f32 {
|
||||
// On wasm32 we know that LLVM's intrinsic will compile to an optimized
|
||||
// `f32.floor` native instruction, so we can leverage this for both code size
|
||||
// and speed.
|
||||
llvm_intrinsically_optimized! {
|
||||
#[cfg(target_arch = "wasm32")] {
|
||||
return unsafe { ::core::intrinsics::floorf32(x) }
|
||||
}
|
||||
}
|
||||
let mut ui = x.to_bits();
|
||||
let e = (((ui >> 23) & 0xff) - 0x7f) as i32;
|
||||
|
||||
|
|
|
|||
|
|
@ -58,6 +58,17 @@ macro_rules! i {
|
|||
};
|
||||
}
|
||||
|
||||
macro_rules! llvm_intrinsically_optimized {
|
||||
(#[cfg($($clause:tt)*)] $e:expr) => {
|
||||
#[cfg(all(not(feature = "stable"), $($clause)*))]
|
||||
{
|
||||
if true { // thwart the dead code lint
|
||||
$e
|
||||
}
|
||||
}
|
||||
};
|
||||
}
|
||||
|
||||
// Public modules
|
||||
mod acos;
|
||||
mod acosf;
|
||||
|
|
|
|||
|
|
@ -82,6 +82,18 @@ const TINY: f64 = 1.0e-300;
|
|||
|
||||
#[inline]
|
||||
pub fn sqrt(x: f64) -> f64 {
|
||||
// On wasm32 we know that LLVM's intrinsic will compile to an optimized
|
||||
// `f64.sqrt` native instruction, so we can leverage this for both code size
|
||||
// and speed.
|
||||
llvm_intrinsically_optimized! {
|
||||
#[cfg(target_arch = "wasm32")] {
|
||||
return if x < 0.0 {
|
||||
f64::NAN
|
||||
} else {
|
||||
unsafe { ::core::intrinsics::sqrtf64(x) }
|
||||
}
|
||||
}
|
||||
}
|
||||
let mut z: f64;
|
||||
let sign: u32 = 0x80000000;
|
||||
let mut ix0: i32;
|
||||
|
|
|
|||
|
|
@ -17,6 +17,18 @@ const TINY: f32 = 1.0e-30;
|
|||
|
||||
#[inline]
|
||||
pub fn sqrtf(x: f32) -> f32 {
|
||||
// On wasm32 we know that LLVM's intrinsic will compile to an optimized
|
||||
// `f32.sqrt` native instruction, so we can leverage this for both code size
|
||||
// and speed.
|
||||
llvm_intrinsically_optimized! {
|
||||
#[cfg(target_arch = "wasm32")] {
|
||||
return if x < 0.0 {
|
||||
::core::f32::NAN
|
||||
} else {
|
||||
unsafe { ::core::intrinsics::sqrtf32(x) }
|
||||
}
|
||||
}
|
||||
}
|
||||
let mut z: f32;
|
||||
let sign: i32 = 0x80000000u32 as i32;
|
||||
let mut ix: i32;
|
||||
|
|
|
|||
|
|
@ -2,6 +2,14 @@ use core::f64;
|
|||
|
||||
#[inline]
|
||||
pub fn trunc(x: f64) -> f64 {
|
||||
// On wasm32 we know that LLVM's intrinsic will compile to an optimized
|
||||
// `f64.trunc` native instruction, so we can leverage this for both code size
|
||||
// and speed.
|
||||
llvm_intrinsically_optimized! {
|
||||
#[cfg(target_arch = "wasm32")] {
|
||||
return unsafe { ::core::intrinsics::truncf64(x) }
|
||||
}
|
||||
}
|
||||
let x1p120 = f64::from_bits(0x4770000000000000); // 0x1p120f === 2 ^ 120
|
||||
|
||||
let mut i: u64 = x.to_bits();
|
||||
|
|
|
|||
|
|
@ -2,6 +2,14 @@ use core::f32;
|
|||
|
||||
#[inline]
|
||||
pub fn truncf(x: f32) -> f32 {
|
||||
// On wasm32 we know that LLVM's intrinsic will compile to an optimized
|
||||
// `f32.trunc` native instruction, so we can leverage this for both code size
|
||||
// and speed.
|
||||
llvm_intrinsically_optimized! {
|
||||
#[cfg(target_arch = "wasm32")] {
|
||||
return unsafe { ::core::intrinsics::truncf32(x) }
|
||||
}
|
||||
}
|
||||
let x1p120 = f32::from_bits(0x7b800000); // 0x1p120f === 2 ^ 120
|
||||
|
||||
let mut i: u32 = x.to_bits();
|
||||
|
|
|
|||
Loading…
Add table
Add a link
Reference in a new issue