Replace calls to core::arch intrinsics with assembly

Some backends may replace calls to `core::arch` with multiple calls to
`sqrt` [1], which becomes recursive. Help mitigate this by replacing the
call with assembly.

Results in the same assembly as the current implementation when built
with optimizations.

[1]: https://github.com/rust-lang/compiler-builtins/issues/649
This commit is contained in:
Trevor Gross 2025-04-09 01:56:14 +00:00 committed by Trevor Gross
parent 725759602a
commit 4e5cbbeda1

View file

@ -1,22 +1,27 @@
//! Architecture-specific support for x86-32 and x86-64 with SSE2
#[cfg(target_arch = "x86")]
use core::arch::x86::*;
#[cfg(target_arch = "x86_64")]
use core::arch::x86_64::*;
pub fn sqrtf(x: f32) -> f32 {
pub fn sqrtf(mut x: f32) -> f32 {
// SAFETY: `sqrtss` is part of `sse2`, which this module is gated behind. It has no memory
// access or side effects.
unsafe {
let m = _mm_set_ss(x);
let m_sqrt = _mm_sqrt_ss(m);
_mm_cvtss_f32(m_sqrt)
}
core::arch::asm!(
"sqrtss {x}, {x}",
x = inout(xmm_reg) x,
options(nostack, nomem, pure),
)
};
x
}
pub fn sqrt(x: f64) -> f64 {
pub fn sqrt(mut x: f64) -> f64 {
// SAFETY: `sqrtsd` is part of `sse2`, which this module is gated behind. It has no memory
// access or side effects.
unsafe {
let m = _mm_set_sd(x);
let m_sqrt = _mm_sqrt_pd(m);
_mm_cvtsd_f64(m_sqrt)
}
core::arch::asm!(
"sqrtsd {x}, {x}",
x = inout(xmm_reg) x,
options(nostack, nomem, pure),
)
};
x
}