librustc: use LLVM intrinsics for several floating point operations.

Achieves at least 5x speed up for some functions!

Also, reorganise the delegation code so that the delegated function wrappers
have the #[inline(always)] annotation, and reduce the repetition of
delegate!(..).
This commit is contained in:
Huon Wilson 2013-04-04 03:08:53 +11:00
parent 93c0888b6c
commit d9c54f8387
3 changed files with 166 additions and 153 deletions

View file

@ -10,12 +10,9 @@
//! Operations and constants for `f32` //! Operations and constants for `f32`
use cmath;
use libc::{c_float, c_int};
use num::strconv; use num::strconv;
use num; use num;
use option::Option; use option::Option;
use unstable::intrinsics::floorf32;
use from_str; use from_str;
use to_str; use to_str;
@ -24,79 +21,93 @@ use to_str;
pub use cmath::c_float_targ_consts::*; pub use cmath::c_float_targ_consts::*;
// An inner module is required to get the #[inline(always)] attribute on the
// functions.
pub use self::delegated::*;
macro_rules! delegate( macro_rules! delegate(
( (
fn $name:ident( $(
$( fn $name:ident(
$arg:ident : $arg_ty:ty $(
),* $arg:ident : $arg_ty:ty
) -> $rv:ty = $bound_name:path ),*
) -> $rv:ty = $bound_name:path
),*
) => ( ) => (
pub fn $name($( $arg : $arg_ty ),*) -> $rv { mod delegated {
unsafe { use cmath::c_float_utils;
$bound_name($( $arg ),*) use libc::{c_float, c_int};
} use unstable::intrinsics;
$(
#[inline(always)]
pub fn $name($( $arg : $arg_ty ),*) -> $rv {
unsafe {
$bound_name($( $arg ),*)
}
}
)*
} }
) )
) )
delegate!(fn acos(n: c_float) -> c_float = cmath::c_float_utils::acos) delegate!(
delegate!(fn asin(n: c_float) -> c_float = cmath::c_float_utils::asin) // intrinsics
delegate!(fn atan(n: c_float) -> c_float = cmath::c_float_utils::atan) fn abs(n: f32) -> f32 = intrinsics::fabsf32,
delegate!(fn atan2(a: c_float, b: c_float) -> c_float = fn cos(n: f32) -> f32 = intrinsics::cosf32,
cmath::c_float_utils::atan2) fn exp(n: f32) -> f32 = intrinsics::expf32,
delegate!(fn cbrt(n: c_float) -> c_float = cmath::c_float_utils::cbrt) fn exp2(n: f32) -> f32 = intrinsics::exp2f32,
delegate!(fn ceil(n: c_float) -> c_float = cmath::c_float_utils::ceil) fn floor(x: f32) -> f32 = intrinsics::floorf32,
delegate!(fn copysign(x: c_float, y: c_float) -> c_float = fn ln(n: f32) -> f32 = intrinsics::logf32,
cmath::c_float_utils::copysign) fn log10(n: f32) -> f32 = intrinsics::log10f32,
delegate!(fn cos(n: c_float) -> c_float = cmath::c_float_utils::cos) fn log2(n: f32) -> f32 = intrinsics::log2f32,
delegate!(fn cosh(n: c_float) -> c_float = cmath::c_float_utils::cosh) fn mul_add(a: f32, b: f32, c: f32) -> f32 = intrinsics::fmaf32,
delegate!(fn erf(n: c_float) -> c_float = cmath::c_float_utils::erf) fn pow(n: f32, e: f32) -> f32 = intrinsics::powf32,
delegate!(fn erfc(n: c_float) -> c_float = cmath::c_float_utils::erfc) fn powi(n: f32, e: c_int) -> f32 = intrinsics::powif32,
delegate!(fn exp(n: c_float) -> c_float = cmath::c_float_utils::exp) fn sin(n: f32) -> f32 = intrinsics::sinf32,
delegate!(fn expm1(n: c_float) -> c_float = cmath::c_float_utils::expm1) fn sqrt(n: f32) -> f32 = intrinsics::sqrtf32,
delegate!(fn exp2(n: c_float) -> c_float = cmath::c_float_utils::exp2)
delegate!(fn abs(n: c_float) -> c_float = cmath::c_float_utils::abs) // LLVM 3.3 required to use intrinsics for these four
delegate!(fn abs_sub(a: c_float, b: c_float) -> c_float = fn ceil(n: c_float) -> c_float = c_float_utils::ceil,
cmath::c_float_utils::abs_sub) fn trunc(n: c_float) -> c_float = c_float_utils::trunc,
delegate!(fn mul_add(a: c_float, b: c_float, c: c_float) -> c_float = /*
cmath::c_float_utils::mul_add) fn ceil(n: f32) -> f32 = intrinsics::ceilf32,
delegate!(fn fmax(a: c_float, b: c_float) -> c_float = fn trunc(n: f32) -> f32 = intrinsics::truncf32,
cmath::c_float_utils::fmax) fn rint(n: f32) -> f32 = intrinsics::rintf32,
delegate!(fn fmin(a: c_float, b: c_float) -> c_float = fn nearbyint(n: f32) -> f32 = intrinsics::nearbyintf32,
cmath::c_float_utils::fmin) */
delegate!(fn nextafter(x: c_float, y: c_float) -> c_float =
cmath::c_float_utils::nextafter) // cmath
delegate!(fn frexp(n: c_float, value: &mut c_int) -> c_float = fn acos(n: c_float) -> c_float = c_float_utils::acos,
cmath::c_float_utils::frexp) fn asin(n: c_float) -> c_float = c_float_utils::asin,
delegate!(fn hypot(x: c_float, y: c_float) -> c_float = fn atan(n: c_float) -> c_float = c_float_utils::atan,
cmath::c_float_utils::hypot) fn atan2(a: c_float, b: c_float) -> c_float = c_float_utils::atan2,
delegate!(fn ldexp(x: c_float, n: c_int) -> c_float = fn cbrt(n: c_float) -> c_float = c_float_utils::cbrt,
cmath::c_float_utils::ldexp) fn copysign(x: c_float, y: c_float) -> c_float = c_float_utils::copysign,
delegate!(fn lgamma(n: c_float, sign: &mut c_int) -> c_float = fn cosh(n: c_float) -> c_float = c_float_utils::cosh,
cmath::c_float_utils::lgamma) fn erf(n: c_float) -> c_float = c_float_utils::erf,
delegate!(fn ln(n: c_float) -> c_float = cmath::c_float_utils::ln) fn erfc(n: c_float) -> c_float = c_float_utils::erfc,
delegate!(fn log_radix(n: c_float) -> c_float = fn expm1(n: c_float) -> c_float = c_float_utils::expm1,
cmath::c_float_utils::log_radix) fn abs_sub(a: c_float, b: c_float) -> c_float = c_float_utils::abs_sub,
delegate!(fn ln1p(n: c_float) -> c_float = cmath::c_float_utils::ln1p) fn fmax(a: c_float, b: c_float) -> c_float = c_float_utils::fmax,
delegate!(fn log10(n: c_float) -> c_float = cmath::c_float_utils::log10) fn fmin(a: c_float, b: c_float) -> c_float = c_float_utils::fmin,
delegate!(fn log2(n: c_float) -> c_float = cmath::c_float_utils::log2) fn nextafter(x: c_float, y: c_float) -> c_float = c_float_utils::nextafter,
delegate!(fn ilog_radix(n: c_float) -> c_int = fn frexp(n: c_float, value: &mut c_int) -> c_float = c_float_utils::frexp,
cmath::c_float_utils::ilog_radix) fn hypot(x: c_float, y: c_float) -> c_float = c_float_utils::hypot,
delegate!(fn modf(n: c_float, iptr: &mut c_float) -> c_float = fn ldexp(x: c_float, n: c_int) -> c_float = c_float_utils::ldexp,
cmath::c_float_utils::modf) fn lgamma(n: c_float, sign: &mut c_int) -> c_float = c_float_utils::lgamma,
delegate!(fn pow(n: c_float, e: c_float) -> c_float = fn log_radix(n: c_float) -> c_float = c_float_utils::log_radix,
cmath::c_float_utils::pow) fn ln1p(n: c_float) -> c_float = c_float_utils::ln1p,
delegate!(fn round(n: c_float) -> c_float = cmath::c_float_utils::round) fn ilog_radix(n: c_float) -> c_int = c_float_utils::ilog_radix,
delegate!(fn ldexp_radix(n: c_float, i: c_int) -> c_float = fn modf(n: c_float, iptr: &mut c_float) -> c_float = c_float_utils::modf,
cmath::c_float_utils::ldexp_radix) fn round(n: c_float) -> c_float = c_float_utils::round,
delegate!(fn sin(n: c_float) -> c_float = cmath::c_float_utils::sin) fn ldexp_radix(n: c_float, i: c_int) -> c_float = c_float_utils::ldexp_radix,
delegate!(fn sinh(n: c_float) -> c_float = cmath::c_float_utils::sinh) fn sinh(n: c_float) -> c_float = c_float_utils::sinh,
delegate!(fn sqrt(n: c_float) -> c_float = cmath::c_float_utils::sqrt) fn tan(n: c_float) -> c_float = c_float_utils::tan,
delegate!(fn tan(n: c_float) -> c_float = cmath::c_float_utils::tan) fn tanh(n: c_float) -> c_float = c_float_utils::tanh,
delegate!(fn tanh(n: c_float) -> c_float = cmath::c_float_utils::tanh) fn tgamma(n: c_float) -> c_float = c_float_utils::tgamma)
delegate!(fn tgamma(n: c_float) -> c_float = cmath::c_float_utils::tgamma)
delegate!(fn trunc(n: c_float) -> c_float = cmath::c_float_utils::trunc)
// These are not defined inside consts:: for consistency with // These are not defined inside consts:: for consistency with
// the integer types // the integer types
@ -143,9 +154,6 @@ pub fn ge(x: f32, y: f32) -> bool { return x >= y; }
#[inline(always)] #[inline(always)]
pub fn gt(x: f32, y: f32) -> bool { return x > y; } pub fn gt(x: f32, y: f32) -> bool { return x > y; }
/// Returns `x` rounded down
#[inline(always)]
pub fn floor(x: f32) -> f32 { unsafe { floorf32(x) } }
// FIXME (#1999): replace the predicates below with llvm intrinsics or // FIXME (#1999): replace the predicates below with llvm intrinsics or
// calls to the libmath macros in the rust runtime for performance. // calls to the libmath macros in the rust runtime for performance.

View file

@ -10,12 +10,9 @@
//! Operations and constants for `f64` //! Operations and constants for `f64`
use cmath;
use libc::{c_double, c_int};
use num::strconv; use num::strconv;
use num; use num;
use option::Option; use option::Option;
use unstable::intrinsics::floorf64;
use to_str; use to_str;
use from_str; use from_str;
@ -25,87 +22,98 @@ use from_str;
pub use cmath::c_double_targ_consts::*; pub use cmath::c_double_targ_consts::*;
pub use cmp::{min, max}; pub use cmp::{min, max};
// An inner module is required to get the #[inline(always)] attribute on the
// functions.
pub use self::delegated::*;
macro_rules! delegate( macro_rules! delegate(
( (
fn $name:ident( $(
$( fn $name:ident(
$arg:ident : $arg_ty:ty $(
),* $arg:ident : $arg_ty:ty
) -> $rv:ty = $bound_name:path ),*
) -> $rv:ty = $bound_name:path
),*
) => ( ) => (
pub fn $name($( $arg : $arg_ty ),*) -> $rv { mod delegated {
unsafe { use cmath::c_double_utils;
$bound_name($( $arg ),*) use libc::{c_double, c_int};
} use unstable::intrinsics;
$(
#[inline(always)]
pub fn $name($( $arg : $arg_ty ),*) -> $rv {
unsafe {
$bound_name($( $arg ),*)
}
}
)*
} }
) )
) )
delegate!(fn acos(n: c_double) -> c_double = cmath::c_double_utils::acos) delegate!(
delegate!(fn asin(n: c_double) -> c_double = cmath::c_double_utils::asin) // intrinsics
delegate!(fn atan(n: c_double) -> c_double = cmath::c_double_utils::atan) fn abs(n: f64) -> f64 = intrinsics::fabsf64,
delegate!(fn atan2(a: c_double, b: c_double) -> c_double = fn cos(n: f64) -> f64 = intrinsics::cosf64,
cmath::c_double_utils::atan2) fn exp(n: f64) -> f64 = intrinsics::expf64,
delegate!(fn cbrt(n: c_double) -> c_double = cmath::c_double_utils::cbrt) fn exp2(n: f64) -> f64 = intrinsics::exp2f64,
delegate!(fn ceil(n: c_double) -> c_double = cmath::c_double_utils::ceil) fn floor(x: f64) -> f64 = intrinsics::floorf64,
delegate!(fn copysign(x: c_double, y: c_double) -> c_double = fn ln(n: f64) -> f64 = intrinsics::logf64,
cmath::c_double_utils::copysign) fn log10(n: f64) -> f64 = intrinsics::log10f64,
delegate!(fn cos(n: c_double) -> c_double = cmath::c_double_utils::cos) fn log2(n: f64) -> f64 = intrinsics::log2f64,
delegate!(fn cosh(n: c_double) -> c_double = cmath::c_double_utils::cosh) fn mul_add(a: f64, b: f64, c: f64) -> f64 = intrinsics::fmaf64,
delegate!(fn erf(n: c_double) -> c_double = cmath::c_double_utils::erf) fn pow(n: f64, e: f64) -> f64 = intrinsics::powf64,
delegate!(fn erfc(n: c_double) -> c_double = cmath::c_double_utils::erfc) fn powi(n: f64, e: c_int) -> f64 = intrinsics::powif64,
delegate!(fn exp(n: c_double) -> c_double = cmath::c_double_utils::exp) fn sin(n: f64) -> f64 = intrinsics::sinf64,
delegate!(fn expm1(n: c_double) -> c_double = cmath::c_double_utils::expm1) fn sqrt(n: f64) -> f64 = intrinsics::sqrtf64,
delegate!(fn exp2(n: c_double) -> c_double = cmath::c_double_utils::exp2)
delegate!(fn abs(n: c_double) -> c_double = cmath::c_double_utils::abs) // LLVM 3.3 required to use intrinsics for these four
delegate!(fn abs_sub(a: c_double, b: c_double) -> c_double = fn ceil(n: c_double) -> c_double = c_double_utils::ceil,
cmath::c_double_utils::abs_sub) fn trunc(n: c_double) -> c_double = c_double_utils::trunc,
delegate!(fn mul_add(a: c_double, b: c_double, c: c_double) -> c_double = /*
cmath::c_double_utils::mul_add) fn ceil(n: f64) -> f64 = intrinsics::ceilf64,
delegate!(fn fmax(a: c_double, b: c_double) -> c_double = fn trunc(n: f64) -> f64 = intrinsics::truncf64,
cmath::c_double_utils::fmax) fn rint(n: c_double) -> c_double = intrinsics::rintf64,
delegate!(fn fmin(a: c_double, b: c_double) -> c_double = fn nearbyint(n: c_double) -> c_double = intrinsics::nearbyintf64,
cmath::c_double_utils::fmin) */
delegate!(fn nextafter(x: c_double, y: c_double) -> c_double =
cmath::c_double_utils::nextafter) // cmath
delegate!(fn frexp(n: c_double, value: &mut c_int) -> c_double = fn acos(n: c_double) -> c_double = c_double_utils::acos,
cmath::c_double_utils::frexp) fn asin(n: c_double) -> c_double = c_double_utils::asin,
delegate!(fn hypot(x: c_double, y: c_double) -> c_double = fn atan(n: c_double) -> c_double = c_double_utils::atan,
cmath::c_double_utils::hypot) fn atan2(a: c_double, b: c_double) -> c_double = c_double_utils::atan2,
delegate!(fn ldexp(x: c_double, n: c_int) -> c_double = fn cbrt(n: c_double) -> c_double = c_double_utils::cbrt,
cmath::c_double_utils::ldexp) fn copysign(x: c_double, y: c_double) -> c_double = c_double_utils::copysign,
delegate!(fn lgamma(n: c_double, sign: &mut c_int) -> c_double = fn cosh(n: c_double) -> c_double = c_double_utils::cosh,
cmath::c_double_utils::lgamma) fn erf(n: c_double) -> c_double = c_double_utils::erf,
delegate!(fn ln(n: c_double) -> c_double = cmath::c_double_utils::ln) fn erfc(n: c_double) -> c_double = c_double_utils::erfc,
delegate!(fn log_radix(n: c_double) -> c_double = fn expm1(n: c_double) -> c_double = c_double_utils::expm1,
cmath::c_double_utils::log_radix) fn abs_sub(a: c_double, b: c_double) -> c_double = c_double_utils::abs_sub,
delegate!(fn ln1p(n: c_double) -> c_double = cmath::c_double_utils::ln1p) fn fmax(a: c_double, b: c_double) -> c_double = c_double_utils::fmax,
delegate!(fn log10(n: c_double) -> c_double = cmath::c_double_utils::log10) fn fmin(a: c_double, b: c_double) -> c_double = c_double_utils::fmin,
delegate!(fn log2(n: c_double) -> c_double = cmath::c_double_utils::log2) fn nextafter(x: c_double, y: c_double) -> c_double = c_double_utils::nextafter,
delegate!(fn ilog_radix(n: c_double) -> c_int = fn frexp(n: c_double, value: &mut c_int) -> c_double = c_double_utils::frexp,
cmath::c_double_utils::ilog_radix) fn hypot(x: c_double, y: c_double) -> c_double = c_double_utils::hypot,
delegate!(fn modf(n: c_double, iptr: &mut c_double) -> c_double = fn ldexp(x: c_double, n: c_int) -> c_double = c_double_utils::ldexp,
cmath::c_double_utils::modf) fn lgamma(n: c_double, sign: &mut c_int) -> c_double = c_double_utils::lgamma,
delegate!(fn pow(n: c_double, e: c_double) -> c_double = fn log_radix(n: c_double) -> c_double = c_double_utils::log_radix,
cmath::c_double_utils::pow) fn ln1p(n: c_double) -> c_double = c_double_utils::ln1p,
delegate!(fn round(n: c_double) -> c_double = cmath::c_double_utils::round) fn ilog_radix(n: c_double) -> c_int = c_double_utils::ilog_radix,
delegate!(fn ldexp_radix(n: c_double, i: c_int) -> c_double = fn modf(n: c_double, iptr: &mut c_double) -> c_double = c_double_utils::modf,
cmath::c_double_utils::ldexp_radix) fn round(n: c_double) -> c_double = c_double_utils::round,
delegate!(fn sin(n: c_double) -> c_double = cmath::c_double_utils::sin) fn ldexp_radix(n: c_double, i: c_int) -> c_double = c_double_utils::ldexp_radix,
delegate!(fn sinh(n: c_double) -> c_double = cmath::c_double_utils::sinh) fn sinh(n: c_double) -> c_double = c_double_utils::sinh,
delegate!(fn sqrt(n: c_double) -> c_double = cmath::c_double_utils::sqrt) fn tan(n: c_double) -> c_double = c_double_utils::tan,
delegate!(fn tan(n: c_double) -> c_double = cmath::c_double_utils::tan) fn tanh(n: c_double) -> c_double = c_double_utils::tanh,
delegate!(fn tanh(n: c_double) -> c_double = cmath::c_double_utils::tanh) fn tgamma(n: c_double) -> c_double = c_double_utils::tgamma,
delegate!(fn tgamma(n: c_double) -> c_double = cmath::c_double_utils::tgamma) fn j0(n: c_double) -> c_double = c_double_utils::j0,
delegate!(fn trunc(n: c_double) -> c_double = cmath::c_double_utils::trunc) fn j1(n: c_double) -> c_double = c_double_utils::j1,
delegate!(fn j0(n: c_double) -> c_double = cmath::c_double_utils::j0) fn jn(i: c_int, n: c_double) -> c_double = c_double_utils::jn,
delegate!(fn j1(n: c_double) -> c_double = cmath::c_double_utils::j1) fn y0(n: c_double) -> c_double = c_double_utils::y0,
delegate!(fn jn(i: c_int, n: c_double) -> c_double = fn y1(n: c_double) -> c_double = c_double_utils::y1,
cmath::c_double_utils::jn) fn yn(i: c_int, n: c_double) -> c_double = c_double_utils::yn)
delegate!(fn y0(n: c_double) -> c_double = cmath::c_double_utils::y0)
delegate!(fn y1(n: c_double) -> c_double = cmath::c_double_utils::y1)
delegate!(fn yn(i: c_int, n: c_double) -> c_double =
cmath::c_double_utils::yn)
// FIXME (#1433): obtain these in a different way // FIXME (#1433): obtain these in a different way
@ -218,9 +226,6 @@ pub fn is_finite(x: f64) -> bool {
return !(is_NaN(x) || is_infinite(x)); return !(is_NaN(x) || is_infinite(x));
} }
/// Returns `x` rounded down
#[inline(always)]
pub fn floor(x: f64) -> f64 { unsafe { floorf64(x) } }
// FIXME (#1999): add is_normal, is_subnormal, and fpclassify // FIXME (#1999): add is_normal, is_subnormal, and fpclassify

View file

@ -36,7 +36,7 @@ pub use f64::{acos, asin, atan2, cbrt, ceil, copysign, cosh, floor};
pub use f64::{erf, erfc, exp, expm1, exp2, abs_sub}; pub use f64::{erf, erfc, exp, expm1, exp2, abs_sub};
pub use f64::{mul_add, fmax, fmin, nextafter, frexp, hypot, ldexp}; pub use f64::{mul_add, fmax, fmin, nextafter, frexp, hypot, ldexp};
pub use f64::{lgamma, ln, log_radix, ln1p, log10, log2, ilog_radix}; pub use f64::{lgamma, ln, log_radix, ln1p, log10, log2, ilog_radix};
pub use f64::{modf, pow, round, sinh, tanh, tgamma, trunc}; pub use f64::{modf, pow, powi, round, sinh, tanh, tgamma, trunc};
pub use f64::signbit; pub use f64::signbit;
pub use f64::{j0, j1, jn, y0, y1, yn}; pub use f64::{j0, j1, jn, y0, y1, yn};