Implement remaining __clz*i2 intrinsics
This commit is contained in:
parent
e01e62aa16
commit
a5c7a17d55
5 changed files with 123 additions and 67 deletions
|
|
@ -157,6 +157,9 @@ rely on CI.
|
|||
- [x] bswapdi2.c
|
||||
- [x] bswapsi2.c
|
||||
- [x] bswapti2.c
|
||||
- [x] clzdi2.c
|
||||
- [x] clzsi2.c
|
||||
- [x] clzti2.c
|
||||
- [x] comparedf2.c
|
||||
- [x] comparesf2.c
|
||||
- [x] divdf3.c
|
||||
|
|
@ -325,9 +328,6 @@ These builtins are never called by LLVM.
|
|||
- ~~arm/switch32.S~~
|
||||
- ~~arm/switch8.S~~
|
||||
- ~~arm/switchu8.S~~
|
||||
- ~~clzdi2.c~~
|
||||
- ~~clzsi2.c~~
|
||||
- ~~clzti2.c~~
|
||||
- ~~cmpdi2.c~~
|
||||
- ~~cmpti2.c~~
|
||||
- ~~ctzdi2.c~~
|
||||
|
|
|
|||
|
|
@ -165,6 +165,7 @@ fn configure_check_cfg() {
|
|||
"__bswapdi2",
|
||||
"__bswapti2",
|
||||
"__clzsi2",
|
||||
"__clzdi2",
|
||||
"__divdi3",
|
||||
"__divsi3",
|
||||
"__divmoddi4",
|
||||
|
|
@ -382,7 +383,6 @@ mod c {
|
|||
sources.extend(&[
|
||||
("__absvti2", "absvti2.c"),
|
||||
("__addvti3", "addvti3.c"),
|
||||
("__clzti2", "clzti2.c"),
|
||||
("__cmpti2", "cmpti2.c"),
|
||||
("__ctzti2", "ctzti2.c"),
|
||||
("__ffsti2", "ffsti2.c"),
|
||||
|
|
|
|||
|
|
@ -3,10 +3,12 @@
|
|||
// adding a zero check at the beginning, but `__clzsi2` has a precondition that `x != 0`.
|
||||
// Compilers will insert the check for zero in cases where it is needed.
|
||||
|
||||
use crate::int::{CastInto, Int};
|
||||
|
||||
public_test_dep! {
|
||||
/// Returns the number of leading binary zeros in `x`.
|
||||
#[allow(dead_code)]
|
||||
pub(crate) fn usize_leading_zeros_default(x: usize) -> usize {
|
||||
pub(crate) fn leading_zeros_default<T: Int + CastInto<usize>>(x: T) -> usize {
|
||||
// The basic idea is to test if the higher bits of `x` are zero and bisect the number
|
||||
// of leading zeros. It is possible for all branches of the bisection to use the same
|
||||
// code path by conditionally shifting the higher parts down to let the next bisection
|
||||
|
|
@ -16,46 +18,47 @@ pub(crate) fn usize_leading_zeros_default(x: usize) -> usize {
|
|||
// because it simplifies the final bisection step.
|
||||
let mut x = x;
|
||||
// the number of potential leading zeros
|
||||
let mut z = usize::MAX.count_ones() as usize;
|
||||
let mut z = T::BITS as usize;
|
||||
// a temporary
|
||||
let mut t: usize;
|
||||
#[cfg(target_pointer_width = "64")]
|
||||
{
|
||||
let mut t: T;
|
||||
|
||||
const { assert!(T::BITS <= 64) };
|
||||
if T::BITS >= 64 {
|
||||
t = x >> 32;
|
||||
if t != 0 {
|
||||
if t != T::ZERO {
|
||||
z -= 32;
|
||||
x = t;
|
||||
}
|
||||
}
|
||||
#[cfg(any(target_pointer_width = "32", target_pointer_width = "64"))]
|
||||
{
|
||||
if T::BITS >= 32 {
|
||||
t = x >> 16;
|
||||
if t != 0 {
|
||||
if t != T::ZERO {
|
||||
z -= 16;
|
||||
x = t;
|
||||
}
|
||||
}
|
||||
const { assert!(T::BITS >= 16) };
|
||||
t = x >> 8;
|
||||
if t != 0 {
|
||||
if t != T::ZERO {
|
||||
z -= 8;
|
||||
x = t;
|
||||
}
|
||||
t = x >> 4;
|
||||
if t != 0 {
|
||||
if t != T::ZERO {
|
||||
z -= 4;
|
||||
x = t;
|
||||
}
|
||||
t = x >> 2;
|
||||
if t != 0 {
|
||||
if t != T::ZERO {
|
||||
z -= 2;
|
||||
x = t;
|
||||
}
|
||||
// the last two bisections are combined into one conditional
|
||||
t = x >> 1;
|
||||
if t != 0 {
|
||||
if t != T::ZERO {
|
||||
z - 2
|
||||
} else {
|
||||
z - x
|
||||
z - x.cast()
|
||||
}
|
||||
|
||||
// We could potentially save a few cycles by using the LUT trick from
|
||||
|
|
@ -80,12 +83,12 @@ pub(crate) fn usize_leading_zeros_default(x: usize) -> usize {
|
|||
public_test_dep! {
|
||||
/// Returns the number of leading binary zeros in `x`.
|
||||
#[allow(dead_code)]
|
||||
pub(crate) fn usize_leading_zeros_riscv(x: usize) -> usize {
|
||||
pub(crate) fn leading_zeros_riscv<T: Int + CastInto<usize>>(x: T) -> usize {
|
||||
let mut x = x;
|
||||
// the number of potential leading zeros
|
||||
let mut z = usize::MAX.count_ones() as usize;
|
||||
let mut z = T::BITS;
|
||||
// a temporary
|
||||
let mut t: usize;
|
||||
let mut t: u32;
|
||||
|
||||
// RISC-V does not have a set-if-greater-than-or-equal instruction and
|
||||
// `(x >= power-of-two) as usize` will get compiled into two instructions, but this is
|
||||
|
|
@ -95,11 +98,11 @@ pub(crate) fn usize_leading_zeros_riscv(x: usize) -> usize {
|
|||
// right). If we try to save an instruction by using `x < imm` for each bisection, we
|
||||
// have to shift `x` left and compare with powers of two approaching `usize::MAX + 1`,
|
||||
// but the immediate will never fit into 12 bits and never save an instruction.
|
||||
#[cfg(target_pointer_width = "64")]
|
||||
{
|
||||
const { assert!(T::BITS <= 64) };
|
||||
if T::BITS >= 64 {
|
||||
// If the upper 32 bits of `x` are not all 0, `t` is set to `1 << 5`, otherwise
|
||||
// `t` is set to 0.
|
||||
t = ((x >= (1 << 32)) as usize) << 5;
|
||||
t = ((x >= (T::ONE << 32)) as u32) << 5;
|
||||
// If `t` was set to `1 << 5`, then the upper 32 bits are shifted down for the
|
||||
// next step to process.
|
||||
x >>= t;
|
||||
|
|
@ -107,43 +110,58 @@ pub(crate) fn usize_leading_zeros_riscv(x: usize) -> usize {
|
|||
// leading zeros
|
||||
z -= t;
|
||||
}
|
||||
#[cfg(any(target_pointer_width = "32", target_pointer_width = "64"))]
|
||||
{
|
||||
t = ((x >= (1 << 16)) as usize) << 4;
|
||||
if T::BITS >= 32 {
|
||||
t = ((x >= (T::ONE << 16)) as u32) << 4;
|
||||
x >>= t;
|
||||
z -= t;
|
||||
}
|
||||
t = ((x >= (1 << 8)) as usize) << 3;
|
||||
const { assert!(T::BITS >= 16) };
|
||||
t = ((x >= (T::ONE << 8)) as u32) << 3;
|
||||
x >>= t;
|
||||
z -= t;
|
||||
t = ((x >= (1 << 4)) as usize) << 2;
|
||||
t = ((x >= (T::ONE << 4)) as u32) << 2;
|
||||
x >>= t;
|
||||
z -= t;
|
||||
t = ((x >= (1 << 2)) as usize) << 1;
|
||||
t = ((x >= (T::ONE << 2)) as u32) << 1;
|
||||
x >>= t;
|
||||
z -= t;
|
||||
t = (x >= (1 << 1)) as usize;
|
||||
t = (x >= (T::ONE << 1)) as u32;
|
||||
x >>= t;
|
||||
z -= t;
|
||||
// All bits except the LSB are guaranteed to be zero for this final bisection step.
|
||||
// If `x != 0` then `x == 1` and subtracts one potential zero from `z`.
|
||||
z - x
|
||||
z as usize - x.cast()
|
||||
}
|
||||
}
|
||||
|
||||
intrinsics! {
|
||||
#[maybe_use_optimized_c_shim]
|
||||
#[cfg(any(
|
||||
target_pointer_width = "16",
|
||||
target_pointer_width = "32",
|
||||
target_pointer_width = "64"
|
||||
))]
|
||||
/// Returns the number of leading binary zeros in `x`.
|
||||
pub extern "C" fn __clzsi2(x: usize) -> usize {
|
||||
/// Returns the number of leading binary zeros in `x`
|
||||
pub extern "C" fn __clzsi2(x: u32) -> usize {
|
||||
if cfg!(any(target_arch = "riscv32", target_arch = "riscv64")) {
|
||||
usize_leading_zeros_riscv(x)
|
||||
leading_zeros_riscv(x)
|
||||
} else {
|
||||
usize_leading_zeros_default(x)
|
||||
leading_zeros_default(x)
|
||||
}
|
||||
}
|
||||
|
||||
#[maybe_use_optimized_c_shim]
|
||||
/// Returns the number of leading binary zeros in `x`
|
||||
pub extern "C" fn __clzdi2(x: u64) -> usize {
|
||||
if cfg!(any(target_arch = "riscv32", target_arch = "riscv64")) {
|
||||
leading_zeros_riscv(x)
|
||||
} else {
|
||||
leading_zeros_default(x)
|
||||
}
|
||||
}
|
||||
|
||||
/// Returns the number of leading binary zeros in `x`
|
||||
pub extern "C" fn __clzti2(x: u128) -> usize {
|
||||
let hi = (x >> 64) as u64;
|
||||
if hi == 0 {
|
||||
64 + __clzdi2(x as u64)
|
||||
} else {
|
||||
__clzdi2(hi)
|
||||
}
|
||||
}
|
||||
}
|
||||
|
|
|
|||
|
|
@ -12,7 +12,6 @@ pub mod shift;
|
|||
pub mod udiv;
|
||||
|
||||
pub use big::{i256, u256};
|
||||
pub use leading_zeros::__clzsi2;
|
||||
|
||||
public_test_dep! {
|
||||
/// Minimal integer implementations needed on all integer types, including wide integers.
|
||||
|
|
|
|||
|
|
@ -65,31 +65,70 @@ fn fuzz_values() {
|
|||
|
||||
#[test]
|
||||
fn leading_zeros() {
|
||||
use compiler_builtins::int::__clzsi2;
|
||||
use compiler_builtins::int::leading_zeros::{
|
||||
usize_leading_zeros_default, usize_leading_zeros_riscv,
|
||||
};
|
||||
fuzz(N, |x: usize| {
|
||||
let lz = x.leading_zeros() as usize;
|
||||
let lz0 = __clzsi2(x);
|
||||
let lz1 = usize_leading_zeros_default(x);
|
||||
let lz2 = usize_leading_zeros_riscv(x);
|
||||
if lz0 != lz {
|
||||
panic!("__clzsi2({}): std: {}, builtins: {}", x, lz, lz0);
|
||||
}
|
||||
if lz1 != lz {
|
||||
panic!(
|
||||
"usize_leading_zeros_default({}): std: {}, builtins: {}",
|
||||
x, lz, lz1
|
||||
);
|
||||
}
|
||||
if lz2 != lz {
|
||||
panic!(
|
||||
"usize_leading_zeros_riscv({}): std: {}, builtins: {}",
|
||||
x, lz, lz2
|
||||
);
|
||||
}
|
||||
})
|
||||
use compiler_builtins::int::leading_zeros::{leading_zeros_default, leading_zeros_riscv};
|
||||
{
|
||||
use compiler_builtins::int::leading_zeros::__clzsi2;
|
||||
fuzz(N, |x: u32| {
|
||||
if x == 0 {
|
||||
return; // undefined value for an intrinsic
|
||||
}
|
||||
let lz = x.leading_zeros() as usize;
|
||||
let lz0 = __clzsi2(x);
|
||||
let lz1 = leading_zeros_default(x);
|
||||
let lz2 = leading_zeros_riscv(x);
|
||||
if lz0 != lz {
|
||||
panic!("__clzsi2({}): std: {}, builtins: {}", x, lz, lz0);
|
||||
}
|
||||
if lz1 != lz {
|
||||
panic!(
|
||||
"leading_zeros_default({}): std: {}, builtins: {}",
|
||||
x, lz, lz1
|
||||
);
|
||||
}
|
||||
if lz2 != lz {
|
||||
panic!("leading_zeros_riscv({}): std: {}, builtins: {}", x, lz, lz2);
|
||||
}
|
||||
});
|
||||
}
|
||||
|
||||
{
|
||||
use compiler_builtins::int::leading_zeros::__clzdi2;
|
||||
fuzz(N, |x: u64| {
|
||||
if x == 0 {
|
||||
return; // undefined value for an intrinsic
|
||||
}
|
||||
let lz = x.leading_zeros() as usize;
|
||||
let lz0 = __clzdi2(x);
|
||||
let lz1 = leading_zeros_default(x);
|
||||
let lz2 = leading_zeros_riscv(x);
|
||||
if lz0 != lz {
|
||||
panic!("__clzdi2({}): std: {}, builtins: {}", x, lz, lz0);
|
||||
}
|
||||
if lz1 != lz {
|
||||
panic!(
|
||||
"leading_zeros_default({}): std: {}, builtins: {}",
|
||||
x, lz, lz1
|
||||
);
|
||||
}
|
||||
if lz2 != lz {
|
||||
panic!("leading_zeros_riscv({}): std: {}, builtins: {}", x, lz, lz2);
|
||||
}
|
||||
});
|
||||
}
|
||||
|
||||
{
|
||||
use compiler_builtins::int::leading_zeros::__clzti2;
|
||||
fuzz(N, |x: u128| {
|
||||
if x == 0 {
|
||||
return; // undefined value for an intrinsic
|
||||
}
|
||||
let lz = x.leading_zeros() as usize;
|
||||
let lz0 = __clzti2(x);
|
||||
if lz0 != lz {
|
||||
panic!("__clzti2({}): std: {}, builtins: {}", x, lz, lz0);
|
||||
}
|
||||
});
|
||||
}
|
||||
}
|
||||
|
||||
#[test]
|
||||
|
|
|
|||
Loading…
Add table
Add a link
Reference in a new issue