Implement remaining __clz*i2 intrinsics

This commit is contained in:
Andrey Turkin 2024-06-22 00:01:52 +03:00 committed by Amanieu d'Antras
parent e01e62aa16
commit a5c7a17d55
5 changed files with 123 additions and 67 deletions

View file

@ -157,6 +157,9 @@ rely on CI.
- [x] bswapdi2.c
- [x] bswapsi2.c
- [x] bswapti2.c
- [x] clzdi2.c
- [x] clzsi2.c
- [x] clzti2.c
- [x] comparedf2.c
- [x] comparesf2.c
- [x] divdf3.c
@ -325,9 +328,6 @@ These builtins are never called by LLVM.
- ~~arm/switch32.S~~
- ~~arm/switch8.S~~
- ~~arm/switchu8.S~~
- ~~clzdi2.c~~
- ~~clzsi2.c~~
- ~~clzti2.c~~
- ~~cmpdi2.c~~
- ~~cmpti2.c~~
- ~~ctzdi2.c~~

View file

@ -165,6 +165,7 @@ fn configure_check_cfg() {
"__bswapdi2",
"__bswapti2",
"__clzsi2",
"__clzdi2",
"__divdi3",
"__divsi3",
"__divmoddi4",
@ -382,7 +383,6 @@ mod c {
sources.extend(&[
("__absvti2", "absvti2.c"),
("__addvti3", "addvti3.c"),
("__clzti2", "clzti2.c"),
("__cmpti2", "cmpti2.c"),
("__ctzti2", "ctzti2.c"),
("__ffsti2", "ffsti2.c"),

View file

@ -3,10 +3,12 @@
// adding a zero check at the beginning, but `__clzsi2` has a precondition that `x != 0`.
// Compilers will insert the check for zero in cases where it is needed.
use crate::int::{CastInto, Int};
public_test_dep! {
/// Returns the number of leading binary zeros in `x`.
#[allow(dead_code)]
pub(crate) fn usize_leading_zeros_default(x: usize) -> usize {
pub(crate) fn leading_zeros_default<T: Int + CastInto<usize>>(x: T) -> usize {
// The basic idea is to test if the higher bits of `x` are zero and bisect the number
// of leading zeros. It is possible for all branches of the bisection to use the same
// code path by conditionally shifting the higher parts down to let the next bisection
@ -16,46 +18,47 @@ pub(crate) fn usize_leading_zeros_default(x: usize) -> usize {
// because it simplifies the final bisection step.
let mut x = x;
// the number of potential leading zeros
let mut z = usize::MAX.count_ones() as usize;
let mut z = T::BITS as usize;
// a temporary
let mut t: usize;
#[cfg(target_pointer_width = "64")]
{
let mut t: T;
const { assert!(T::BITS <= 64) };
if T::BITS >= 64 {
t = x >> 32;
if t != 0 {
if t != T::ZERO {
z -= 32;
x = t;
}
}
#[cfg(any(target_pointer_width = "32", target_pointer_width = "64"))]
{
if T::BITS >= 32 {
t = x >> 16;
if t != 0 {
if t != T::ZERO {
z -= 16;
x = t;
}
}
const { assert!(T::BITS >= 16) };
t = x >> 8;
if t != 0 {
if t != T::ZERO {
z -= 8;
x = t;
}
t = x >> 4;
if t != 0 {
if t != T::ZERO {
z -= 4;
x = t;
}
t = x >> 2;
if t != 0 {
if t != T::ZERO {
z -= 2;
x = t;
}
// the last two bisections are combined into one conditional
t = x >> 1;
if t != 0 {
if t != T::ZERO {
z - 2
} else {
z - x
z - x.cast()
}
// We could potentially save a few cycles by using the LUT trick from
@ -80,12 +83,12 @@ pub(crate) fn usize_leading_zeros_default(x: usize) -> usize {
public_test_dep! {
/// Returns the number of leading binary zeros in `x`.
#[allow(dead_code)]
pub(crate) fn usize_leading_zeros_riscv(x: usize) -> usize {
pub(crate) fn leading_zeros_riscv<T: Int + CastInto<usize>>(x: T) -> usize {
let mut x = x;
// the number of potential leading zeros
let mut z = usize::MAX.count_ones() as usize;
let mut z = T::BITS;
// a temporary
let mut t: usize;
let mut t: u32;
// RISC-V does not have a set-if-greater-than-or-equal instruction and
// `(x >= power-of-two) as usize` will get compiled into two instructions, but this is
@ -95,11 +98,11 @@ pub(crate) fn usize_leading_zeros_riscv(x: usize) -> usize {
// right). If we try to save an instruction by using `x < imm` for each bisection, we
// have to shift `x` left and compare with powers of two approaching `usize::MAX + 1`,
// but the immediate will never fit into 12 bits and never save an instruction.
#[cfg(target_pointer_width = "64")]
{
const { assert!(T::BITS <= 64) };
if T::BITS >= 64 {
// If the upper 32 bits of `x` are not all 0, `t` is set to `1 << 5`, otherwise
// `t` is set to 0.
t = ((x >= (1 << 32)) as usize) << 5;
t = ((x >= (T::ONE << 32)) as u32) << 5;
// If `t` was set to `1 << 5`, then the upper 32 bits are shifted down for the
// next step to process.
x >>= t;
@ -107,43 +110,58 @@ pub(crate) fn usize_leading_zeros_riscv(x: usize) -> usize {
// leading zeros
z -= t;
}
#[cfg(any(target_pointer_width = "32", target_pointer_width = "64"))]
{
t = ((x >= (1 << 16)) as usize) << 4;
if T::BITS >= 32 {
t = ((x >= (T::ONE << 16)) as u32) << 4;
x >>= t;
z -= t;
}
t = ((x >= (1 << 8)) as usize) << 3;
const { assert!(T::BITS >= 16) };
t = ((x >= (T::ONE << 8)) as u32) << 3;
x >>= t;
z -= t;
t = ((x >= (1 << 4)) as usize) << 2;
t = ((x >= (T::ONE << 4)) as u32) << 2;
x >>= t;
z -= t;
t = ((x >= (1 << 2)) as usize) << 1;
t = ((x >= (T::ONE << 2)) as u32) << 1;
x >>= t;
z -= t;
t = (x >= (1 << 1)) as usize;
t = (x >= (T::ONE << 1)) as u32;
x >>= t;
z -= t;
// All bits except the LSB are guaranteed to be zero for this final bisection step.
// If `x != 0` then `x == 1` and subtracts one potential zero from `z`.
z - x
z as usize - x.cast()
}
}
intrinsics! {
#[maybe_use_optimized_c_shim]
#[cfg(any(
target_pointer_width = "16",
target_pointer_width = "32",
target_pointer_width = "64"
))]
/// Returns the number of leading binary zeros in `x`.
pub extern "C" fn __clzsi2(x: usize) -> usize {
/// Returns the number of leading binary zeros in `x`
pub extern "C" fn __clzsi2(x: u32) -> usize {
if cfg!(any(target_arch = "riscv32", target_arch = "riscv64")) {
usize_leading_zeros_riscv(x)
leading_zeros_riscv(x)
} else {
usize_leading_zeros_default(x)
leading_zeros_default(x)
}
}
#[maybe_use_optimized_c_shim]
/// Returns the number of leading binary zeros in `x`
pub extern "C" fn __clzdi2(x: u64) -> usize {
if cfg!(any(target_arch = "riscv32", target_arch = "riscv64")) {
leading_zeros_riscv(x)
} else {
leading_zeros_default(x)
}
}
/// Returns the number of leading binary zeros in `x`
pub extern "C" fn __clzti2(x: u128) -> usize {
let hi = (x >> 64) as u64;
if hi == 0 {
64 + __clzdi2(x as u64)
} else {
__clzdi2(hi)
}
}
}

View file

@ -12,7 +12,6 @@ pub mod shift;
pub mod udiv;
pub use big::{i256, u256};
pub use leading_zeros::__clzsi2;
public_test_dep! {
/// Minimal integer implementations needed on all integer types, including wide integers.

View file

@ -65,31 +65,70 @@ fn fuzz_values() {
#[test]
fn leading_zeros() {
use compiler_builtins::int::__clzsi2;
use compiler_builtins::int::leading_zeros::{
usize_leading_zeros_default, usize_leading_zeros_riscv,
};
fuzz(N, |x: usize| {
let lz = x.leading_zeros() as usize;
let lz0 = __clzsi2(x);
let lz1 = usize_leading_zeros_default(x);
let lz2 = usize_leading_zeros_riscv(x);
if lz0 != lz {
panic!("__clzsi2({}): std: {}, builtins: {}", x, lz, lz0);
}
if lz1 != lz {
panic!(
"usize_leading_zeros_default({}): std: {}, builtins: {}",
x, lz, lz1
);
}
if lz2 != lz {
panic!(
"usize_leading_zeros_riscv({}): std: {}, builtins: {}",
x, lz, lz2
);
}
})
use compiler_builtins::int::leading_zeros::{leading_zeros_default, leading_zeros_riscv};
{
use compiler_builtins::int::leading_zeros::__clzsi2;
fuzz(N, |x: u32| {
if x == 0 {
return; // undefined value for an intrinsic
}
let lz = x.leading_zeros() as usize;
let lz0 = __clzsi2(x);
let lz1 = leading_zeros_default(x);
let lz2 = leading_zeros_riscv(x);
if lz0 != lz {
panic!("__clzsi2({}): std: {}, builtins: {}", x, lz, lz0);
}
if lz1 != lz {
panic!(
"leading_zeros_default({}): std: {}, builtins: {}",
x, lz, lz1
);
}
if lz2 != lz {
panic!("leading_zeros_riscv({}): std: {}, builtins: {}", x, lz, lz2);
}
});
}
{
use compiler_builtins::int::leading_zeros::__clzdi2;
fuzz(N, |x: u64| {
if x == 0 {
return; // undefined value for an intrinsic
}
let lz = x.leading_zeros() as usize;
let lz0 = __clzdi2(x);
let lz1 = leading_zeros_default(x);
let lz2 = leading_zeros_riscv(x);
if lz0 != lz {
panic!("__clzdi2({}): std: {}, builtins: {}", x, lz, lz0);
}
if lz1 != lz {
panic!(
"leading_zeros_default({}): std: {}, builtins: {}",
x, lz, lz1
);
}
if lz2 != lz {
panic!("leading_zeros_riscv({}): std: {}, builtins: {}", x, lz, lz2);
}
});
}
{
use compiler_builtins::int::leading_zeros::__clzti2;
fuzz(N, |x: u128| {
if x == 0 {
return; // undefined value for an intrinsic
}
let lz = x.leading_zeros() as usize;
let lz0 = __clzti2(x);
if lz0 != lz {
panic!("__clzti2({}): std: {}, builtins: {}", x, lz, lz0);
}
});
}
}
#[test]