librustc: use LLVM intrinsics for several floating point operations.

Achieves at least 5x speed up for some functions! Also, reorganise the delegation code so that the delegated function wrappers have the #[inline(always)] annotation, and reduce the repetition of delegate!(..).
2013-04-04 03:08:53 +11:00 · 2013-04-04 03:08:53 +11:00 · d9c54f8387
commit d9c54f8387
parent 93c0888b6c
3 changed files with 166 additions and 153 deletions
--- a/src/libcore/num/f32.rs
+++ b/src/libcore/num/f32.rs
@ -10,12 +10,9 @@
 //! Operations and constants for `f32`
 use cmath;
 use libc::{c_float, c_int};
 use num::strconv;
 use num;
 use option::Option;
 use unstable::intrinsics::floorf32;
 use from_str;
 use to_str;
@ -24,79 +21,93 @@ use to_str;
 pub use cmath::c_float_targ_consts::*;
 // An inner module is required to get the #[inline(always)] attribute on the
 // functions.
 pub use self::delegated::*;
 macro_rules! delegate(
    (
-        fn $name:ident(
+        $(
-            $(
+            fn $name:ident(
-                $arg:ident : $arg_ty:ty
+                $(
-            ),*
+                    $arg:ident : $arg_ty:ty
-        ) -> $rv:ty = $bound_name:path
+                ),*
            ) -> $rv:ty = $bound_name:path
        ),*
    ) => (
-        pub fn $name($( $arg : $arg_ty ),*) -> $rv {
+        mod delegated {
-            unsafe {
+            use cmath::c_float_utils;
-                $bound_name($( $arg ),*)
+            use libc::{c_float, c_int};
-            }
+            use unstable::intrinsics;
            $(
                #[inline(always)]
                pub fn $name($( $arg : $arg_ty ),*) -> $rv {
                    unsafe {
                        $bound_name($( $arg ),*)
                    }
                }
            )*
        }
    )
 )
-delegate!(fn acos(n: c_float) -> c_float = cmath::c_float_utils::acos)
+delegate!(
-delegate!(fn asin(n: c_float) -> c_float = cmath::c_float_utils::asin)
+    // intrinsics
-delegate!(fn atan(n: c_float) -> c_float = cmath::c_float_utils::atan)
+    fn abs(n: f32) -> f32 = intrinsics::fabsf32,
-delegate!(fn atan2(a: c_float, b: c_float) -> c_float =
+    fn cos(n: f32) -> f32 = intrinsics::cosf32,
-    cmath::c_float_utils::atan2)
+    fn exp(n: f32) -> f32 = intrinsics::expf32,
-delegate!(fn cbrt(n: c_float) -> c_float = cmath::c_float_utils::cbrt)
+    fn exp2(n: f32) -> f32 = intrinsics::exp2f32,
-delegate!(fn ceil(n: c_float) -> c_float = cmath::c_float_utils::ceil)
+    fn floor(x: f32) -> f32 = intrinsics::floorf32,
-delegate!(fn copysign(x: c_float, y: c_float) -> c_float =
+    fn ln(n: f32) -> f32 = intrinsics::logf32,
-    cmath::c_float_utils::copysign)
+    fn log10(n: f32) -> f32 = intrinsics::log10f32,
-delegate!(fn cos(n: c_float) -> c_float = cmath::c_float_utils::cos)
+    fn log2(n: f32) -> f32 = intrinsics::log2f32,
-delegate!(fn cosh(n: c_float) -> c_float = cmath::c_float_utils::cosh)
+    fn mul_add(a: f32, b: f32, c: f32) -> f32 = intrinsics::fmaf32,
-delegate!(fn erf(n: c_float) -> c_float = cmath::c_float_utils::erf)
+    fn pow(n: f32, e: f32) -> f32 = intrinsics::powf32,
-delegate!(fn erfc(n: c_float) -> c_float = cmath::c_float_utils::erfc)
+    fn powi(n: f32, e: c_int) -> f32 = intrinsics::powif32,
-delegate!(fn exp(n: c_float) -> c_float = cmath::c_float_utils::exp)
+    fn sin(n: f32) -> f32 = intrinsics::sinf32,
-delegate!(fn expm1(n: c_float) -> c_float = cmath::c_float_utils::expm1)
+    fn sqrt(n: f32) -> f32 = intrinsics::sqrtf32,
-delegate!(fn exp2(n: c_float) -> c_float = cmath::c_float_utils::exp2)
+
-delegate!(fn abs(n: c_float) -> c_float = cmath::c_float_utils::abs)
+    // LLVM 3.3 required to use intrinsics for these four
-delegate!(fn abs_sub(a: c_float, b: c_float) -> c_float =
+    fn ceil(n: c_float) -> c_float = c_float_utils::ceil,
-    cmath::c_float_utils::abs_sub)
+    fn trunc(n: c_float) -> c_float = c_float_utils::trunc,
-delegate!(fn mul_add(a: c_float, b: c_float, c: c_float) -> c_float =
+    /*
-    cmath::c_float_utils::mul_add)
+    fn ceil(n: f32) -> f32 = intrinsics::ceilf32,
-delegate!(fn fmax(a: c_float, b: c_float) -> c_float =
+    fn trunc(n: f32) -> f32 = intrinsics::truncf32,
-    cmath::c_float_utils::fmax)
+    fn rint(n: f32) -> f32 = intrinsics::rintf32,
-delegate!(fn fmin(a: c_float, b: c_float) -> c_float =
+    fn nearbyint(n: f32) -> f32 = intrinsics::nearbyintf32,
-    cmath::c_float_utils::fmin)
+    */
-delegate!(fn nextafter(x: c_float, y: c_float) -> c_float =
+
-    cmath::c_float_utils::nextafter)
+    // cmath
-delegate!(fn frexp(n: c_float, value: &mut c_int) -> c_float =
+    fn acos(n: c_float) -> c_float = c_float_utils::acos,
-    cmath::c_float_utils::frexp)
+    fn asin(n: c_float) -> c_float = c_float_utils::asin,
-delegate!(fn hypot(x: c_float, y: c_float) -> c_float =
+    fn atan(n: c_float) -> c_float = c_float_utils::atan,
-    cmath::c_float_utils::hypot)
+    fn atan2(a: c_float, b: c_float) -> c_float = c_float_utils::atan2,
-delegate!(fn ldexp(x: c_float, n: c_int) -> c_float =
+    fn cbrt(n: c_float) -> c_float = c_float_utils::cbrt,
-    cmath::c_float_utils::ldexp)
+    fn copysign(x: c_float, y: c_float) -> c_float = c_float_utils::copysign,
-delegate!(fn lgamma(n: c_float, sign: &mut c_int) -> c_float =
+    fn cosh(n: c_float) -> c_float = c_float_utils::cosh,
-    cmath::c_float_utils::lgamma)
+    fn erf(n: c_float) -> c_float = c_float_utils::erf,
-delegate!(fn ln(n: c_float) -> c_float = cmath::c_float_utils::ln)
+    fn erfc(n: c_float) -> c_float = c_float_utils::erfc,
-delegate!(fn log_radix(n: c_float) -> c_float =
+    fn expm1(n: c_float) -> c_float = c_float_utils::expm1,
-    cmath::c_float_utils::log_radix)
+    fn abs_sub(a: c_float, b: c_float) -> c_float = c_float_utils::abs_sub,
-delegate!(fn ln1p(n: c_float) -> c_float = cmath::c_float_utils::ln1p)
+    fn fmax(a: c_float, b: c_float) -> c_float = c_float_utils::fmax,
-delegate!(fn log10(n: c_float) -> c_float = cmath::c_float_utils::log10)
+    fn fmin(a: c_float, b: c_float) -> c_float = c_float_utils::fmin,
-delegate!(fn log2(n: c_float) -> c_float = cmath::c_float_utils::log2)
+    fn nextafter(x: c_float, y: c_float) -> c_float = c_float_utils::nextafter,
-delegate!(fn ilog_radix(n: c_float) -> c_int =
+    fn frexp(n: c_float, value: &mut c_int) -> c_float = c_float_utils::frexp,
-    cmath::c_float_utils::ilog_radix)
+    fn hypot(x: c_float, y: c_float) -> c_float = c_float_utils::hypot,
-delegate!(fn modf(n: c_float, iptr: &mut c_float) -> c_float =
+    fn ldexp(x: c_float, n: c_int) -> c_float = c_float_utils::ldexp,
-    cmath::c_float_utils::modf)
+    fn lgamma(n: c_float, sign: &mut c_int) -> c_float = c_float_utils::lgamma,
-delegate!(fn pow(n: c_float, e: c_float) -> c_float =
+    fn log_radix(n: c_float) -> c_float = c_float_utils::log_radix,
-    cmath::c_float_utils::pow)
+    fn ln1p(n: c_float) -> c_float = c_float_utils::ln1p,
-delegate!(fn round(n: c_float) -> c_float = cmath::c_float_utils::round)
+    fn ilog_radix(n: c_float) -> c_int = c_float_utils::ilog_radix,
-delegate!(fn ldexp_radix(n: c_float, i: c_int) -> c_float =
+    fn modf(n: c_float, iptr: &mut c_float) -> c_float = c_float_utils::modf,
-    cmath::c_float_utils::ldexp_radix)
+    fn round(n: c_float) -> c_float = c_float_utils::round,
-delegate!(fn sin(n: c_float) -> c_float = cmath::c_float_utils::sin)
+    fn ldexp_radix(n: c_float, i: c_int) -> c_float = c_float_utils::ldexp_radix,
-delegate!(fn sinh(n: c_float) -> c_float = cmath::c_float_utils::sinh)
+    fn sinh(n: c_float) -> c_float = c_float_utils::sinh,
-delegate!(fn sqrt(n: c_float) -> c_float = cmath::c_float_utils::sqrt)
+    fn tan(n: c_float) -> c_float = c_float_utils::tan,
-delegate!(fn tan(n: c_float) -> c_float = cmath::c_float_utils::tan)
+    fn tanh(n: c_float) -> c_float = c_float_utils::tanh,
-delegate!(fn tanh(n: c_float) -> c_float = cmath::c_float_utils::tanh)
+    fn tgamma(n: c_float) -> c_float = c_float_utils::tgamma)
-delegate!(fn tgamma(n: c_float) -> c_float = cmath::c_float_utils::tgamma)
+
 delegate!(fn trunc(n: c_float) -> c_float = cmath::c_float_utils::trunc)
 // These are not defined inside consts:: for consistency with
 // the integer types
@ -143,9 +154,6 @@ pub fn ge(x: f32, y: f32) -> bool { return x >= y; }
 #[inline(always)]
 pub fn gt(x: f32, y: f32) -> bool { return x > y; }
 /// Returns `x` rounded down
 #[inline(always)]
 pub fn floor(x: f32) -> f32 { unsafe { floorf32(x) } }
 // FIXME (#1999): replace the predicates below with llvm intrinsics or
 // calls to the libmath macros in the rust runtime for performance.
--- a/src/libcore/num/f64.rs
+++ b/src/libcore/num/f64.rs
@ -10,12 +10,9 @@
 //! Operations and constants for `f64`
 use cmath;
 use libc::{c_double, c_int};
 use num::strconv;
 use num;
 use option::Option;
 use unstable::intrinsics::floorf64;
 use to_str;
 use from_str;
@ -25,87 +22,98 @@ use from_str;
 pub use cmath::c_double_targ_consts::*;
 pub use cmp::{min, max};
 // An inner module is required to get the #[inline(always)] attribute on the
 // functions.
 pub use self::delegated::*;
 macro_rules! delegate(
    (
-        fn $name:ident(
+        $(
-            $(
+            fn $name:ident(
-                $arg:ident : $arg_ty:ty
+                $(
-            ),*
+                    $arg:ident : $arg_ty:ty
-        ) -> $rv:ty = $bound_name:path
+                ),*
            ) -> $rv:ty = $bound_name:path
        ),*
    ) => (
-        pub fn $name($( $arg : $arg_ty ),*) -> $rv {
+        mod delegated {
-            unsafe {
+            use cmath::c_double_utils;
-                $bound_name($( $arg ),*)
+            use libc::{c_double, c_int};
-            }
+            use unstable::intrinsics;
            $(
                #[inline(always)]
                pub fn $name($( $arg : $arg_ty ),*) -> $rv {
                    unsafe {
                        $bound_name($( $arg ),*)
                    }
                }
            )*
        }
    )
 )
-delegate!(fn acos(n: c_double) -> c_double = cmath::c_double_utils::acos)
+delegate!(
-delegate!(fn asin(n: c_double) -> c_double = cmath::c_double_utils::asin)
+    // intrinsics
-delegate!(fn atan(n: c_double) -> c_double = cmath::c_double_utils::atan)
+    fn abs(n: f64) -> f64 = intrinsics::fabsf64,
-delegate!(fn atan2(a: c_double, b: c_double) -> c_double =
+    fn cos(n: f64) -> f64 = intrinsics::cosf64,
-    cmath::c_double_utils::atan2)
+    fn exp(n: f64) -> f64 = intrinsics::expf64,
-delegate!(fn cbrt(n: c_double) -> c_double = cmath::c_double_utils::cbrt)
+    fn exp2(n: f64) -> f64 = intrinsics::exp2f64,
-delegate!(fn ceil(n: c_double) -> c_double = cmath::c_double_utils::ceil)
+    fn floor(x: f64) -> f64 = intrinsics::floorf64,
-delegate!(fn copysign(x: c_double, y: c_double) -> c_double =
+    fn ln(n: f64) -> f64 = intrinsics::logf64,
-    cmath::c_double_utils::copysign)
+    fn log10(n: f64) -> f64 = intrinsics::log10f64,
-delegate!(fn cos(n: c_double) -> c_double = cmath::c_double_utils::cos)
+    fn log2(n: f64) -> f64 = intrinsics::log2f64,
-delegate!(fn cosh(n: c_double) -> c_double = cmath::c_double_utils::cosh)
+    fn mul_add(a: f64, b: f64, c: f64) -> f64 = intrinsics::fmaf64,
-delegate!(fn erf(n: c_double) -> c_double = cmath::c_double_utils::erf)
+    fn pow(n: f64, e: f64) -> f64 = intrinsics::powf64,
-delegate!(fn erfc(n: c_double) -> c_double = cmath::c_double_utils::erfc)
+    fn powi(n: f64, e: c_int) -> f64 = intrinsics::powif64,
-delegate!(fn exp(n: c_double) -> c_double = cmath::c_double_utils::exp)
+    fn sin(n: f64) -> f64 = intrinsics::sinf64,
-delegate!(fn expm1(n: c_double) -> c_double = cmath::c_double_utils::expm1)
+    fn sqrt(n: f64) -> f64 = intrinsics::sqrtf64,
-delegate!(fn exp2(n: c_double) -> c_double = cmath::c_double_utils::exp2)
+
-delegate!(fn abs(n: c_double) -> c_double = cmath::c_double_utils::abs)
+    // LLVM 3.3 required to use intrinsics for these four
-delegate!(fn abs_sub(a: c_double, b: c_double) -> c_double =
+    fn ceil(n: c_double) -> c_double = c_double_utils::ceil,
-    cmath::c_double_utils::abs_sub)
+    fn trunc(n: c_double) -> c_double = c_double_utils::trunc,
-delegate!(fn mul_add(a: c_double, b: c_double, c: c_double) -> c_double =
+    /*
-    cmath::c_double_utils::mul_add)
+    fn ceil(n: f64) -> f64 = intrinsics::ceilf64,
-delegate!(fn fmax(a: c_double, b: c_double) -> c_double =
+    fn trunc(n: f64) -> f64 = intrinsics::truncf64,
-    cmath::c_double_utils::fmax)
+    fn rint(n: c_double) -> c_double = intrinsics::rintf64,
-delegate!(fn fmin(a: c_double, b: c_double) -> c_double =
+    fn nearbyint(n: c_double) -> c_double = intrinsics::nearbyintf64,
-    cmath::c_double_utils::fmin)
+    */
-delegate!(fn nextafter(x: c_double, y: c_double) -> c_double =
+
-    cmath::c_double_utils::nextafter)
+    // cmath
-delegate!(fn frexp(n: c_double, value: &mut c_int) -> c_double =
+    fn acos(n: c_double) -> c_double = c_double_utils::acos,
-    cmath::c_double_utils::frexp)
+    fn asin(n: c_double) -> c_double = c_double_utils::asin,
-delegate!(fn hypot(x: c_double, y: c_double) -> c_double =
+    fn atan(n: c_double) -> c_double = c_double_utils::atan,
-    cmath::c_double_utils::hypot)
+    fn atan2(a: c_double, b: c_double) -> c_double = c_double_utils::atan2,
-delegate!(fn ldexp(x: c_double, n: c_int) -> c_double =
+    fn cbrt(n: c_double) -> c_double = c_double_utils::cbrt,
-    cmath::c_double_utils::ldexp)
+    fn copysign(x: c_double, y: c_double) -> c_double = c_double_utils::copysign,
-delegate!(fn lgamma(n: c_double, sign: &mut c_int) -> c_double =
+    fn cosh(n: c_double) -> c_double = c_double_utils::cosh,
-    cmath::c_double_utils::lgamma)
+    fn erf(n: c_double) -> c_double = c_double_utils::erf,
-delegate!(fn ln(n: c_double) -> c_double = cmath::c_double_utils::ln)
+    fn erfc(n: c_double) -> c_double = c_double_utils::erfc,
-delegate!(fn log_radix(n: c_double) -> c_double =
+    fn expm1(n: c_double) -> c_double = c_double_utils::expm1,
-    cmath::c_double_utils::log_radix)
+    fn abs_sub(a: c_double, b: c_double) -> c_double = c_double_utils::abs_sub,
-delegate!(fn ln1p(n: c_double) -> c_double = cmath::c_double_utils::ln1p)
+    fn fmax(a: c_double, b: c_double) -> c_double = c_double_utils::fmax,
-delegate!(fn log10(n: c_double) -> c_double = cmath::c_double_utils::log10)
+    fn fmin(a: c_double, b: c_double) -> c_double = c_double_utils::fmin,
-delegate!(fn log2(n: c_double) -> c_double = cmath::c_double_utils::log2)
+    fn nextafter(x: c_double, y: c_double) -> c_double = c_double_utils::nextafter,
-delegate!(fn ilog_radix(n: c_double) -> c_int =
+    fn frexp(n: c_double, value: &mut c_int) -> c_double = c_double_utils::frexp,
-    cmath::c_double_utils::ilog_radix)
+    fn hypot(x: c_double, y: c_double) -> c_double = c_double_utils::hypot,
-delegate!(fn modf(n: c_double, iptr: &mut c_double) -> c_double =
+    fn ldexp(x: c_double, n: c_int) -> c_double = c_double_utils::ldexp,
-    cmath::c_double_utils::modf)
+    fn lgamma(n: c_double, sign: &mut c_int) -> c_double = c_double_utils::lgamma,
-delegate!(fn pow(n: c_double, e: c_double) -> c_double =
+    fn log_radix(n: c_double) -> c_double = c_double_utils::log_radix,
-    cmath::c_double_utils::pow)
+    fn ln1p(n: c_double) -> c_double = c_double_utils::ln1p,
-delegate!(fn round(n: c_double) -> c_double = cmath::c_double_utils::round)
+    fn ilog_radix(n: c_double) -> c_int = c_double_utils::ilog_radix,
-delegate!(fn ldexp_radix(n: c_double, i: c_int) -> c_double =
+    fn modf(n: c_double, iptr: &mut c_double) -> c_double = c_double_utils::modf,
-    cmath::c_double_utils::ldexp_radix)
+    fn round(n: c_double) -> c_double = c_double_utils::round,
-delegate!(fn sin(n: c_double) -> c_double = cmath::c_double_utils::sin)
+    fn ldexp_radix(n: c_double, i: c_int) -> c_double = c_double_utils::ldexp_radix,
-delegate!(fn sinh(n: c_double) -> c_double = cmath::c_double_utils::sinh)
+    fn sinh(n: c_double) -> c_double = c_double_utils::sinh,
-delegate!(fn sqrt(n: c_double) -> c_double = cmath::c_double_utils::sqrt)
+    fn tan(n: c_double) -> c_double = c_double_utils::tan,
-delegate!(fn tan(n: c_double) -> c_double = cmath::c_double_utils::tan)
+    fn tanh(n: c_double) -> c_double = c_double_utils::tanh,
-delegate!(fn tanh(n: c_double) -> c_double = cmath::c_double_utils::tanh)
+    fn tgamma(n: c_double) -> c_double = c_double_utils::tgamma,
-delegate!(fn tgamma(n: c_double) -> c_double = cmath::c_double_utils::tgamma)
+    fn j0(n: c_double) -> c_double = c_double_utils::j0,
-delegate!(fn trunc(n: c_double) -> c_double = cmath::c_double_utils::trunc)
+    fn j1(n: c_double) -> c_double = c_double_utils::j1,
-delegate!(fn j0(n: c_double) -> c_double = cmath::c_double_utils::j0)
+    fn jn(i: c_int, n: c_double) -> c_double = c_double_utils::jn,
-delegate!(fn j1(n: c_double) -> c_double = cmath::c_double_utils::j1)
+    fn y0(n: c_double) -> c_double = c_double_utils::y0,
-delegate!(fn jn(i: c_int, n: c_double) -> c_double =
+    fn y1(n: c_double) -> c_double = c_double_utils::y1,
-    cmath::c_double_utils::jn)
+    fn yn(i: c_int, n: c_double) -> c_double = c_double_utils::yn)
 delegate!(fn y0(n: c_double) -> c_double = cmath::c_double_utils::y0)
 delegate!(fn y1(n: c_double) -> c_double = cmath::c_double_utils::y1)
 delegate!(fn yn(i: c_int, n: c_double) -> c_double =
    cmath::c_double_utils::yn)
 // FIXME (#1433): obtain these in a different way
@ -218,9 +226,6 @@ pub fn is_finite(x: f64) -> bool {
    return !(is_NaN(x) || is_infinite(x));
 }
 /// Returns `x` rounded down
 #[inline(always)]
 pub fn floor(x: f64) -> f64 { unsafe { floorf64(x) } }
 // FIXME (#1999): add is_normal, is_subnormal, and fpclassify
--- a/src/libcore/num/float.rs
+++ b/src/libcore/num/float.rs
@ -36,7 +36,7 @@ pub use f64::{acos, asin, atan2, cbrt, ceil, copysign, cosh, floor};
 pub use f64::{erf, erfc, exp, expm1, exp2, abs_sub};
 pub use f64::{mul_add, fmax, fmin, nextafter, frexp, hypot, ldexp};
 pub use f64::{lgamma, ln, log_radix, ln1p, log10, log2, ilog_radix};
-pub use f64::{modf, pow, round, sinh, tanh, tgamma, trunc};
+pub use f64::{modf, pow, powi, round, sinh, tanh, tgamma, trunc};
 pub use f64::signbit;
 pub use f64::{j0, j1, jn, y0, y1, yn};