Make a subset of libm symbols weakly available on all platforms

018616e78b ("Always have math functions but with `weak` linking
attribute if we can") made all math symbols available on platforms that
support weak linkage. This caused some unexpected regressions, however,
because our less accurate and sometimes slow routines were being
selected over the system `libm`, which also tends to be weak [1]. Thus,
0fab77e8d7 ("Don't include `math` for `unix` and `wasi` targets") was
applied to undo these changes on many platforms.

Now that some improvements have been made to `libm`, add back a subset
of these functions:

* cbrt
* ceil
* copysign
* fabs
* fdim
* floor
* fma
* fmax
* fmaximum
* fmin
* fminimum
* fmod
* rint
* round
* roundeven
* sqrt
* trunc

This list includes only functions that produce exact results (verified
with exhaustive / extensive tests, and also required by IEEE in most
cases), and for which benchmarks indicate performance similar to or
better than Musl's soft float math routines [^1]. All except `cbrt` also
have `f16` and `f128` implementations. Once more routines meet these
criteria, we can move them from platform-specific availability to always
available.

Once this change makes it to rust-lang/rust, we will also be able to
move the relevant functions from `std` to `core`.

[^1]: We still rely on the backend to provide optimized assmebly
      routines when available.

[1]: https://github.com/rust-lang/rust/issues/128386
This commit is contained in:
Trevor Gross 2025-02-13 03:27:00 +00:00 committed by Trevor Gross
parent 95a5ad5225
commit 55bbda8ff8
2 changed files with 179 additions and 121 deletions

View file

@ -41,40 +41,13 @@ mod macros;
pub mod float;
pub mod int;
// Disable for any of the following:
// - x86 without sse2 due to ABI issues
// - <https://github.com/rust-lang/rust/issues/114479>
// - but exclude UEFI since it is a soft-float target
// - <https://github.com/rust-lang/rust/issues/128533>
// - All unix targets (linux, macos, freebsd, android, etc)
// - wasm with known target_os
#[cfg(not(any(
all(
target_arch = "x86",
not(target_feature = "sse2"),
not(target_os = "uefi"),
),
unix,
all(target_family = "wasm", not(target_os = "unknown"))
)))]
pub mod math;
pub mod mem;
// `libm` expects its `support` module to be available in the crate root. This config can be
// cleaned up once `libm` is made always available.
#[cfg(not(any(
all(
target_arch = "x86",
not(target_feature = "sse2"),
not(target_os = "uefi"),
),
unix,
all(target_family = "wasm", not(target_os = "unknown"))
)))]
use math::libm::support;
pub mod mem;
#[cfg(target_arch = "arm")]
pub mod arm;

View file

@ -5,110 +5,195 @@
#[path = "../libm/src/math/mod.rs"]
pub(crate) mod libm;
#[allow(unused_macros)]
macro_rules! no_mangle {
macro_rules! libm_intrinsics {
($(fn $fun:ident($($iid:ident : $ity:ty),+) -> $oty:ty;)+) => {
intrinsics! {
$(
pub extern "C" fn $fun($($iid: $ity),+) -> $oty {
self::libm::$fun($($iid),+)
$crate::math::libm::$fun($($iid),+)
}
)+
}
}
}
#[cfg(not(windows))]
no_mangle! {
fn acos(x: f64) -> f64;
fn asin(x: f64) -> f64;
fn cbrt(x: f64) -> f64;
fn expm1(x: f64) -> f64;
fn hypot(x: f64, y: f64) -> f64;
fn tan(x: f64) -> f64;
fn cos(x: f64) -> f64;
fn expf(x: f32) -> f32;
fn log2(x: f64) -> f64;
fn log2f(x: f32) -> f32;
fn log10(x: f64) -> f64;
fn log10f(x: f32) -> f32;
fn log(x: f64) -> f64;
fn logf(x: f32) -> f32;
fn round(x: f64) -> f64;
fn roundf(x: f32) -> f32;
fn rint(x: f64) -> f64;
fn rintf(x: f32) -> f32;
fn sin(x: f64) -> f64;
fn pow(x: f64, y: f64) -> f64;
fn powf(x: f32, y: f32) -> f32;
fn acosf(n: f32) -> f32;
fn atan2f(a: f32, b: f32) -> f32;
fn atanf(n: f32) -> f32;
fn coshf(n: f32) -> f32;
fn expm1f(n: f32) -> f32;
fn fdim(a: f64, b: f64) -> f64;
fn fdimf(a: f32, b: f32) -> f32;
fn log1pf(n: f32) -> f32;
fn sinhf(n: f32) -> f32;
fn tanhf(n: f32) -> f32;
fn ldexp(f: f64, n: i32) -> f64;
fn ldexpf(f: f32, n: i32) -> f32;
fn tgamma(x: f64) -> f64;
fn tgammaf(x: f32) -> f32;
fn atan(x: f64) -> f64;
fn atan2(x: f64, y: f64) -> f64;
fn cosh(x: f64) -> f64;
fn log1p(x: f64) -> f64;
fn sinh(x: f64) -> f64;
fn tanh(x: f64) -> f64;
fn cosf(x: f32) -> f32;
fn exp(x: f64) -> f64;
fn sinf(x: f32) -> f32;
fn exp2(x: f64) -> f64;
fn exp2f(x: f32) -> f32;
fn fma(x: f64, y: f64, z: f64) -> f64;
fn fmaf(x: f32, y: f32, z: f32) -> f32;
fn asinf(n: f32) -> f32;
fn cbrtf(n: f32) -> f32;
fn hypotf(x: f32, y: f32) -> f32;
fn tanf(n: f32) -> f32;
fn sqrtf(x: f32) -> f32;
fn sqrt(x: f64) -> f64;
fn ceil(x: f64) -> f64;
fn ceilf(x: f32) -> f32;
fn floor(x: f64) -> f64;
fn floorf(x: f32) -> f32;
fn trunc(x: f64) -> f64;
fn truncf(x: f32) -> f32;
fn fmin(x: f64, y: f64) -> f64;
fn fminf(x: f32, y: f32) -> f32;
fn fmax(x: f64, y: f64) -> f64;
fn fmaxf(x: f32, y: f32) -> f32;
// `f64 % f64`
fn fmod(x: f64, y: f64) -> f64;
// `f32 % f32`
fn fmodf(x: f32, y: f32) -> f32;
fn erf(x: f64) -> f64;
fn erff(x: f32) -> f32;
fn erfc(x: f64) -> f64;
fn erfcf(x: f32) -> f32;
}
// allow for windows (and other targets)
intrinsics! {
pub extern "C" fn lgamma_r(x: f64, s: &mut i32) -> f64 {
let r = self::libm::lgamma_r(x);
*s = r.1;
r.0
/// This set of functions is well tested in `libm` and known to provide similar performance to
/// system `libm`, as well as the same or better accuracy.
pub mod full_availability {
#[cfg(f16_enabled)]
libm_intrinsics! {
fn ceilf16(x: f16) -> f16;
fn copysignf16(x: f16, y: f16) -> f16;
fn fabsf16(x: f16) -> f16;
fn fdimf16(x: f16, y: f16) -> f16;
fn floorf16(x: f16) -> f16;
fn fmaxf16(x: f16, y: f16) -> f16;
fn fmaximumf16(x: f16, y: f16) -> f16;
fn fminf16(x: f16, y: f16) -> f16;
fn fminimumf16(x: f16, y: f16) -> f16;
fn fmodf16(x: f16, y: f16) -> f16;
fn rintf16(x: f16) -> f16;
fn roundevenf16(x: f16) -> f16;
fn roundf16(x: f16) -> f16;
fn sqrtf16(x: f16) -> f16;
fn truncf16(x: f16) -> f16;
}
pub extern "C" fn lgammaf_r(x: f32, s: &mut i32) -> f32 {
let r = self::libm::lgammaf_r(x);
*s = r.1;
r.0
/* Weak linkage is unreliable on Windows and Apple, so we don't expose symbols that we know
* the system libc provides in order to avoid conflicts. */
#[cfg(all(not(windows), not(target_vendor = "apple")))]
libm_intrinsics! {
/* f32 */
fn cbrtf(n: f32) -> f32;
fn ceilf(x: f32) -> f32;
fn copysignf(x: f32, y: f32) -> f32;
fn fabsf(x: f32) -> f32;
fn fdimf(a: f32, b: f32) -> f32;
fn floorf(x: f32) -> f32;
fn fmaf(x: f32, y: f32, z: f32) -> f32;
fn fmaxf(x: f32, y: f32) -> f32;
fn fminf(x: f32, y: f32) -> f32;
fn fmodf(x: f32, y: f32) -> f32;
fn rintf(x: f32) -> f32;
fn roundf(x: f32) -> f32;
fn sqrtf(x: f32) -> f32;
fn truncf(x: f32) -> f32;
/* f64 */
fn cbrt(x: f64) -> f64;
fn ceil(x: f64) -> f64;
fn copysign(x: f64, y: f64) -> f64;
fn fabs(x: f64) -> f64;
fn fdim(a: f64, b: f64) -> f64;
fn floor(x: f64) -> f64;
fn fma(x: f64, y: f64, z: f64) -> f64;
fn fmax(x: f64, y: f64) -> f64;
fn fmin(x: f64, y: f64) -> f64;
fn fmod(x: f64, y: f64) -> f64;
fn rint(x: f64) -> f64;
fn round(x: f64) -> f64;
fn sqrt(x: f64) -> f64;
fn trunc(x: f64) -> f64;
}
// Windows and MacOS do not yet expose roundeven and IEEE 754-2019 `maximum` / `minimum`,
// however, so we still provide a fallback.
libm_intrinsics! {
fn fmaximum(x: f64, y: f64) -> f64;
fn fmaximumf(x: f32, y: f32) -> f32;
fn fminimum(x: f64, y: f64) -> f64;
fn fminimumf(x: f32, y: f32) -> f32;
fn roundeven(x: f64) -> f64;
fn roundevenf(x: f32) -> f32;
}
#[cfg(f128_enabled)]
libm_intrinsics! {
fn ceilf128(x: f128) -> f128;
fn copysignf128(x: f128, y: f128) -> f128;
fn fabsf128(x: f128) -> f128;
fn fdimf128(x: f128, y: f128) -> f128;
fn floorf128(x: f128) -> f128;
fn fmaf128(x: f128, y: f128, z: f128) -> f128;
fn fmaxf128(x: f128, y: f128) -> f128;
fn fmaximumf128(x: f128, y: f128) -> f128;
fn fminf128(x: f128, y: f128) -> f128;
fn fminimumf128(x: f128, y: f128) -> f128;
fn fmodf128(x: f128, y: f128) -> f128;
fn rintf128(x: f128) -> f128;
fn roundevenf128(x: f128) -> f128;
fn roundf128(x: f128) -> f128;
fn sqrtf128(x: f128) -> f128;
fn truncf128(x: f128) -> f128;
}
}
/// This group of functions has more performance or precision issues than system versions, or
/// are otherwise less well tested. Provide them only on platforms that have problems with the
/// system `libm`.
///
/// As `libm` improves, more functions will be moved from this group to the first group.
///
/// Do not supply for any of the following:
/// - x86 without sse2 due to ABI issues
/// - <https://github.com/rust-lang/rust/issues/114479>
/// - but exclude UEFI since it is a soft-float target
/// - <https://github.com/rust-lang/rust/issues/128533>
/// - All unix targets (linux, macos, freebsd, android, etc)
/// - wasm with known target_os
#[cfg(not(any(
all(
target_arch = "x86",
not(target_feature = "sse2"),
not(target_os = "uefi"),
),
unix,
all(target_family = "wasm", not(target_os = "unknown"))
)))]
pub mod partial_availability {
#[cfg(not(windows))]
libm_intrinsics! {
fn acos(x: f64) -> f64;
fn acosf(n: f32) -> f32;
fn asin(x: f64) -> f64;
fn asinf(n: f32) -> f32;
fn atan(x: f64) -> f64;
fn atan2(x: f64, y: f64) -> f64;
fn atan2f(a: f32, b: f32) -> f32;
fn atanf(n: f32) -> f32;
fn cos(x: f64) -> f64;
fn cosf(x: f32) -> f32;
fn cosh(x: f64) -> f64;
fn coshf(n: f32) -> f32;
fn erf(x: f64) -> f64;
fn erfc(x: f64) -> f64;
fn erfcf(x: f32) -> f32;
fn erff(x: f32) -> f32;
fn exp(x: f64) -> f64;
fn exp2(x: f64) -> f64;
fn exp2f(x: f32) -> f32;
fn expf(x: f32) -> f32;
fn expm1(x: f64) -> f64;
fn expm1f(n: f32) -> f32;
fn hypot(x: f64, y: f64) -> f64;
fn hypotf(x: f32, y: f32) -> f32;
fn ldexp(f: f64, n: i32) -> f64;
fn ldexpf(f: f32, n: i32) -> f32;
fn log(x: f64) -> f64;
fn log10(x: f64) -> f64;
fn log10f(x: f32) -> f32;
fn log1p(x: f64) -> f64;
fn log1pf(n: f32) -> f32;
fn log2(x: f64) -> f64;
fn log2f(x: f32) -> f32;
fn logf(x: f32) -> f32;
fn pow(x: f64, y: f64) -> f64;
fn powf(x: f32, y: f32) -> f32;
fn sin(x: f64) -> f64;
fn sinf(x: f32) -> f32;
fn sinh(x: f64) -> f64;
fn sinhf(n: f32) -> f32;
fn tan(x: f64) -> f64;
fn tanf(n: f32) -> f32;
fn tanh(x: f64) -> f64;
fn tanhf(n: f32) -> f32;
fn tgamma(x: f64) -> f64;
fn tgammaf(x: f32) -> f32;
}
// allow for windows (and other targets)
intrinsics! {
pub extern "C" fn lgamma_r(x: f64, s: &mut i32) -> f64 {
let r = super::libm::lgamma_r(x);
*s = r.1;
r.0
}
pub extern "C" fn lgammaf_r(x: f32, s: &mut i32) -> f32 {
let r = super::libm::lgammaf_r(x);
*s = r.1;
r.0
}
}
}