Merge pull request #606 from tgross35/f16-f128-intrinsics-min
Add addition, subtraction, multiplication, and compare operations for `f128`
This commit is contained in:
commit
f8b72ede7e
25 changed files with 929 additions and 232 deletions
|
|
@ -232,8 +232,8 @@ These builtins are needed to support 128-bit integers.
|
|||
|
||||
These builtins are needed to support `f16` and `f128`, which are in the process of being added to Rust.
|
||||
|
||||
- [ ] addtf3.c
|
||||
- [ ] comparetf2.c
|
||||
- [x] addtf3.c
|
||||
- [x] comparetf2.c
|
||||
- [ ] divtf3.c
|
||||
- [x] extenddftf2.c
|
||||
- [x] extendhfsf2.c
|
||||
|
|
@ -249,13 +249,13 @@ These builtins are needed to support `f16` and `f128`, which are in the process
|
|||
- [ ] floatsitf.c
|
||||
- [ ] floatunditf.c
|
||||
- [ ] floatunsitf.c
|
||||
- [ ] multf3.c
|
||||
- [x] multf3.c
|
||||
- [ ] powitf2.c
|
||||
- [ ] ppc/fixtfdi.c
|
||||
- [ ] ppc/fixunstfdi.c
|
||||
- [ ] ppc/floatditf.c
|
||||
- [ ] ppc/floatunditf.c
|
||||
- [ ] subtf3.c
|
||||
- [x] subtf3.c
|
||||
- [x] truncdfhf2.c
|
||||
- [x] truncsfhf2.c
|
||||
- [x] trunctfdf2.c
|
||||
|
|
|
|||
|
|
@ -543,9 +543,6 @@ mod c {
|
|||
("__floatsitf", "floatsitf.c"),
|
||||
("__floatunditf", "floatunditf.c"),
|
||||
("__floatunsitf", "floatunsitf.c"),
|
||||
("__addtf3", "addtf3.c"),
|
||||
("__multf3", "multf3.c"),
|
||||
("__subtf3", "subtf3.c"),
|
||||
("__divtf3", "divtf3.c"),
|
||||
("__powitf2", "powitf2.c"),
|
||||
("__fe_getround", "fp_mode.c"),
|
||||
|
|
@ -564,30 +561,22 @@ mod c {
|
|||
if target_arch == "mips64" {
|
||||
sources.extend(&[
|
||||
("__netf2", "comparetf2.c"),
|
||||
("__addtf3", "addtf3.c"),
|
||||
("__multf3", "multf3.c"),
|
||||
("__subtf3", "subtf3.c"),
|
||||
("__fixtfsi", "fixtfsi.c"),
|
||||
("__floatsitf", "floatsitf.c"),
|
||||
("__fixunstfsi", "fixunstfsi.c"),
|
||||
("__floatunsitf", "floatunsitf.c"),
|
||||
("__fe_getround", "fp_mode.c"),
|
||||
("__divtf3", "divtf3.c"),
|
||||
]);
|
||||
}
|
||||
|
||||
if target_arch == "loongarch64" {
|
||||
sources.extend(&[
|
||||
("__netf2", "comparetf2.c"),
|
||||
("__addtf3", "addtf3.c"),
|
||||
("__multf3", "multf3.c"),
|
||||
("__subtf3", "subtf3.c"),
|
||||
("__fixtfsi", "fixtfsi.c"),
|
||||
("__floatsitf", "floatsitf.c"),
|
||||
("__fixunstfsi", "fixunstfsi.c"),
|
||||
("__floatunsitf", "floatunsitf.c"),
|
||||
("__fe_getround", "fp_mode.c"),
|
||||
("__divtf3", "divtf3.c"),
|
||||
]);
|
||||
}
|
||||
|
||||
|
|
|
|||
|
|
@ -21,7 +21,7 @@ fi
|
|||
if [ "${NO_STD:-}" = "1" ]; then
|
||||
echo "nothing to do for no_std"
|
||||
else
|
||||
run="cargo test --manifest-path testcrate/Cargo.toml --target $target"
|
||||
run="cargo test --manifest-path testcrate/Cargo.toml --no-fail-fast --target $target"
|
||||
$run
|
||||
$run --release
|
||||
$run --features c
|
||||
|
|
|
|||
|
|
@ -1,5 +1,5 @@
|
|||
use crate::float::Float;
|
||||
use crate::int::{CastInto, Int};
|
||||
use crate::int::{CastInto, Int, MinInt};
|
||||
|
||||
/// Returns `a + b`
|
||||
fn add<F: Float>(a: F, b: F) -> F
|
||||
|
|
@ -57,9 +57,9 @@ where
|
|||
}
|
||||
|
||||
// zero + anything = anything
|
||||
if a_abs == Int::ZERO {
|
||||
if a_abs == MinInt::ZERO {
|
||||
// but we need to get the sign right for zero + zero
|
||||
if b_abs == Int::ZERO {
|
||||
if b_abs == MinInt::ZERO {
|
||||
return F::from_repr(a.repr() & b.repr());
|
||||
} else {
|
||||
return b;
|
||||
|
|
@ -67,7 +67,7 @@ where
|
|||
}
|
||||
|
||||
// anything + zero = anything
|
||||
if b_abs == Int::ZERO {
|
||||
if b_abs == MinInt::ZERO {
|
||||
return a;
|
||||
}
|
||||
}
|
||||
|
|
@ -113,10 +113,10 @@ where
|
|||
// Shift the significand of b by the difference in exponents, with a sticky
|
||||
// bottom bit to get rounding correct.
|
||||
let align = a_exponent.wrapping_sub(b_exponent).cast();
|
||||
if align != Int::ZERO {
|
||||
if align != MinInt::ZERO {
|
||||
if align < bits {
|
||||
let sticky =
|
||||
F::Int::from_bool(b_significand << bits.wrapping_sub(align).cast() != Int::ZERO);
|
||||
F::Int::from_bool(b_significand << bits.wrapping_sub(align).cast() != MinInt::ZERO);
|
||||
b_significand = (b_significand >> align.cast()) | sticky;
|
||||
} else {
|
||||
b_significand = one; // sticky; b is known to be non-zero.
|
||||
|
|
@ -125,8 +125,8 @@ where
|
|||
if subtraction {
|
||||
a_significand = a_significand.wrapping_sub(b_significand);
|
||||
// If a == -b, return +zero.
|
||||
if a_significand == Int::ZERO {
|
||||
return F::from_repr(Int::ZERO);
|
||||
if a_significand == MinInt::ZERO {
|
||||
return F::from_repr(MinInt::ZERO);
|
||||
}
|
||||
|
||||
// If partial cancellation occured, we need to left-shift the result
|
||||
|
|
@ -143,8 +143,8 @@ where
|
|||
|
||||
// If the addition carried up, we need to right-shift the result and
|
||||
// adjust the exponent:
|
||||
if a_significand & implicit_bit << 4 != Int::ZERO {
|
||||
let sticky = F::Int::from_bool(a_significand & one != Int::ZERO);
|
||||
if a_significand & implicit_bit << 4 != MinInt::ZERO {
|
||||
let sticky = F::Int::from_bool(a_significand & one != MinInt::ZERO);
|
||||
a_significand = a_significand >> 1 | sticky;
|
||||
a_exponent += 1;
|
||||
}
|
||||
|
|
@ -160,7 +160,7 @@ where
|
|||
// need to shift the significand.
|
||||
let shift = (1 - a_exponent).cast();
|
||||
let sticky =
|
||||
F::Int::from_bool((a_significand << bits.wrapping_sub(shift).cast()) != Int::ZERO);
|
||||
F::Int::from_bool((a_significand << bits.wrapping_sub(shift).cast()) != MinInt::ZERO);
|
||||
a_significand = a_significand >> shift.cast() | sticky;
|
||||
a_exponent = 0;
|
||||
}
|
||||
|
|
@ -203,6 +203,16 @@ intrinsics! {
|
|||
add(a, b)
|
||||
}
|
||||
|
||||
#[cfg(not(any(feature = "no-f16-f128", target_arch = "powerpc", target_arch = "powerpc64")))]
|
||||
pub extern "C" fn __addtf3(a: f128, b: f128) -> f128 {
|
||||
add(a, b)
|
||||
}
|
||||
|
||||
#[cfg(all(not(feature = "no-f16-f128"), any(target_arch = "powerpc", target_arch = "powerpc64")))]
|
||||
pub extern "C" fn __addkf3(a: f128, b: f128) -> f128 {
|
||||
add(a, b)
|
||||
}
|
||||
|
||||
#[cfg(target_arch = "arm")]
|
||||
pub extern "C" fn __addsf3vfp(a: f32, b: f32) -> f32 {
|
||||
a + b
|
||||
|
|
|
|||
|
|
@ -1,7 +1,7 @@
|
|||
#![allow(unreachable_code)]
|
||||
|
||||
use crate::float::Float;
|
||||
use crate::int::Int;
|
||||
use crate::int::MinInt;
|
||||
|
||||
#[derive(Clone, Copy)]
|
||||
enum Result {
|
||||
|
|
@ -172,6 +172,89 @@ intrinsics! {
|
|||
}
|
||||
}
|
||||
|
||||
#[cfg(not(any(
|
||||
feature = "no-f16-f128",
|
||||
target_arch = "powerpc",
|
||||
target_arch = "powerpc64"
|
||||
)))]
|
||||
intrinsics! {
|
||||
#[avr_skip]
|
||||
pub extern "C" fn __letf2(a: f128, b: f128) -> i32 {
|
||||
cmp(a, b).to_le_abi()
|
||||
}
|
||||
|
||||
#[avr_skip]
|
||||
pub extern "C" fn __getf2(a: f128, b: f128) -> i32 {
|
||||
cmp(a, b).to_ge_abi()
|
||||
}
|
||||
|
||||
#[avr_skip]
|
||||
pub extern "C" fn __unordtf2(a: f128, b: f128) -> i32 {
|
||||
unord(a, b) as i32
|
||||
}
|
||||
|
||||
#[avr_skip]
|
||||
pub extern "C" fn __eqtf2(a: f128, b: f128) -> i32 {
|
||||
cmp(a, b).to_le_abi()
|
||||
}
|
||||
|
||||
#[avr_skip]
|
||||
pub extern "C" fn __lttf2(a: f128, b: f128) -> i32 {
|
||||
cmp(a, b).to_le_abi()
|
||||
}
|
||||
|
||||
#[avr_skip]
|
||||
pub extern "C" fn __netf2(a: f128, b: f128) -> i32 {
|
||||
cmp(a, b).to_le_abi()
|
||||
}
|
||||
|
||||
#[avr_skip]
|
||||
pub extern "C" fn __gttf2(a: f128, b: f128) -> i32 {
|
||||
cmp(a, b).to_ge_abi()
|
||||
}
|
||||
}
|
||||
|
||||
#[cfg(all(
|
||||
not(feature = "no-f16-f128"),
|
||||
any(target_arch = "powerpc", target_arch = "powerpc64")
|
||||
))]
|
||||
intrinsics! {
|
||||
#[avr_skip]
|
||||
pub extern "C" fn __lekf2(a: f128, b: f128) -> i32 {
|
||||
cmp(a, b).to_le_abi()
|
||||
}
|
||||
|
||||
#[avr_skip]
|
||||
pub extern "C" fn __gekf2(a: f128, b: f128) -> i32 {
|
||||
cmp(a, b).to_ge_abi()
|
||||
}
|
||||
|
||||
#[avr_skip]
|
||||
pub extern "C" fn __unordkf2(a: f128, b: f128) -> i32 {
|
||||
unord(a, b) as i32
|
||||
}
|
||||
|
||||
#[avr_skip]
|
||||
pub extern "C" fn __eqkf2(a: f128, b: f128) -> i32 {
|
||||
cmp(a, b).to_le_abi()
|
||||
}
|
||||
|
||||
#[avr_skip]
|
||||
pub extern "C" fn __ltkf2(a: f128, b: f128) -> i32 {
|
||||
cmp(a, b).to_le_abi()
|
||||
}
|
||||
|
||||
#[avr_skip]
|
||||
pub extern "C" fn __nekf2(a: f128, b: f128) -> i32 {
|
||||
cmp(a, b).to_le_abi()
|
||||
}
|
||||
|
||||
#[avr_skip]
|
||||
pub extern "C" fn __gtkf2(a: f128, b: f128) -> i32 {
|
||||
cmp(a, b).to_ge_abi()
|
||||
}
|
||||
}
|
||||
|
||||
#[cfg(target_arch = "arm")]
|
||||
intrinsics! {
|
||||
pub extern "aapcs" fn __aeabi_fcmple(a: f32, b: f32) -> i32 {
|
||||
|
|
|
|||
|
|
@ -3,7 +3,9 @@
|
|||
#![allow(clippy::needless_return)]
|
||||
|
||||
use crate::float::Float;
|
||||
use crate::int::{CastInto, DInt, HInt, Int};
|
||||
use crate::int::{CastInto, DInt, HInt, Int, MinInt};
|
||||
|
||||
use super::HalfRep;
|
||||
|
||||
fn div32<F: Float>(a: F, b: F) -> F
|
||||
where
|
||||
|
|
@ -454,15 +456,20 @@ where
|
|||
|
||||
fn div64<F: Float>(a: F, b: F) -> F
|
||||
where
|
||||
u32: CastInto<F::Int>,
|
||||
F::Int: CastInto<u32>,
|
||||
i32: CastInto<F::Int>,
|
||||
F::Int: CastInto<i32>,
|
||||
u64: CastInto<F::Int>,
|
||||
F::Int: CastInto<HalfRep<F>>,
|
||||
F::Int: From<HalfRep<F>>,
|
||||
F::Int: From<u8>,
|
||||
F::Int: CastInto<u64>,
|
||||
i64: CastInto<F::Int>,
|
||||
F::Int: CastInto<i64>,
|
||||
F::Int: HInt,
|
||||
F::Int: HInt + DInt,
|
||||
u16: CastInto<F::Int>,
|
||||
i32: CastInto<F::Int>,
|
||||
i64: CastInto<F::Int>,
|
||||
u32: CastInto<F::Int>,
|
||||
u64: CastInto<F::Int>,
|
||||
u64: CastInto<HalfRep<F>>,
|
||||
{
|
||||
const NUMBER_OF_HALF_ITERATIONS: usize = 3;
|
||||
const NUMBER_OF_FULL_ITERATIONS: usize = 1;
|
||||
|
|
@ -471,7 +478,7 @@ where
|
|||
let one = F::Int::ONE;
|
||||
let zero = F::Int::ZERO;
|
||||
let hw = F::BITS / 2;
|
||||
let lo_mask = u64::MAX >> hw;
|
||||
let lo_mask = F::Int::MAX >> hw;
|
||||
|
||||
let significand_bits = F::SIGNIFICAND_BITS;
|
||||
let max_exponent = F::EXPONENT_MAX;
|
||||
|
|
@ -616,8 +623,9 @@ where
|
|||
|
||||
let mut x_uq0 = if NUMBER_OF_HALF_ITERATIONS > 0 {
|
||||
// Starting with (n-1) half-width iterations
|
||||
let b_uq1_hw: u32 =
|
||||
(CastInto::<u64>::cast(b_significand) >> (significand_bits + 1 - hw)) as u32;
|
||||
let b_uq1_hw: HalfRep<F> = CastInto::<HalfRep<F>>::cast(
|
||||
CastInto::<u64>::cast(b_significand) >> (significand_bits + 1 - hw),
|
||||
);
|
||||
|
||||
// C is (3/4 + 1/sqrt(2)) - 1 truncated to W0 fractional bits as UQ0.HW
|
||||
// with W0 being either 16 or 32 and W0 <= HW.
|
||||
|
|
@ -625,12 +633,13 @@ where
|
|||
// b/2 is subtracted to obtain x0) wrapped to [0, 1) range.
|
||||
|
||||
// HW is at least 32. Shifting into the highest bits if needed.
|
||||
let c_hw = (0x7504F333_u64 as u32).wrapping_shl(hw.wrapping_sub(32));
|
||||
let c_hw = (CastInto::<HalfRep<F>>::cast(0x7504F333_u64)).wrapping_shl(hw.wrapping_sub(32));
|
||||
|
||||
// b >= 1, thus an upper bound for 3/4 + 1/sqrt(2) - b/2 is about 0.9572,
|
||||
// so x0 fits to UQ0.HW without wrapping.
|
||||
let x_uq0_hw: u32 = {
|
||||
let mut x_uq0_hw: u32 = c_hw.wrapping_sub(b_uq1_hw /* exact b_hw/2 as UQ0.HW */);
|
||||
let x_uq0_hw: HalfRep<F> = {
|
||||
let mut x_uq0_hw: HalfRep<F> =
|
||||
c_hw.wrapping_sub(b_uq1_hw /* exact b_hw/2 as UQ0.HW */);
|
||||
// dbg!(x_uq0_hw);
|
||||
// An e_0 error is comprised of errors due to
|
||||
// * x0 being an inherently imprecise first approximation of 1/b_hw
|
||||
|
|
@ -661,8 +670,9 @@ where
|
|||
// no overflow occurred earlier: ((rep_t)x_UQ0_hw * b_UQ1_hw >> HW) is
|
||||
// expected to be strictly positive because b_UQ1_hw has its highest bit set
|
||||
// and x_UQ0_hw should be rather large (it converges to 1/2 < 1/b_hw <= 1).
|
||||
let corr_uq1_hw: u32 =
|
||||
0.wrapping_sub(((x_uq0_hw as u64).wrapping_mul(b_uq1_hw as u64)) >> hw) as u32;
|
||||
let corr_uq1_hw: HalfRep<F> = CastInto::<HalfRep<F>>::cast(zero.wrapping_sub(
|
||||
((F::Int::from(x_uq0_hw)).wrapping_mul(F::Int::from(b_uq1_hw))) >> hw,
|
||||
));
|
||||
// dbg!(corr_uq1_hw);
|
||||
|
||||
// Now, we should multiply UQ0.HW and UQ1.(HW-1) numbers, naturally
|
||||
|
|
@ -677,7 +687,9 @@ where
|
|||
// The fact corr_UQ1_hw was virtually round up (due to result of
|
||||
// multiplication being **first** truncated, then negated - to improve
|
||||
// error estimations) can increase x_UQ0_hw by up to 2*Ulp of x_UQ0_hw.
|
||||
x_uq0_hw = ((x_uq0_hw as u64).wrapping_mul(corr_uq1_hw as u64) >> (hw - 1)) as u32;
|
||||
x_uq0_hw = ((F::Int::from(x_uq0_hw)).wrapping_mul(F::Int::from(corr_uq1_hw))
|
||||
>> (hw - 1))
|
||||
.cast();
|
||||
// dbg!(x_uq0_hw);
|
||||
// Now, either no overflow occurred or x_UQ0_hw is 0 or 1 in its half_rep_t
|
||||
// representation. In the latter case, x_UQ0_hw will be either 0 or 1 after
|
||||
|
|
@ -707,7 +719,7 @@ where
|
|||
// be not below that value (see g(x) above), so it is safe to decrement just
|
||||
// once after the final iteration. On the other hand, an effective value of
|
||||
// divisor changes after this point (from b_hw to b), so adjust here.
|
||||
x_uq0_hw.wrapping_sub(1_u32)
|
||||
x_uq0_hw.wrapping_sub(HalfRep::<F>::ONE)
|
||||
};
|
||||
|
||||
// Error estimations for full-precision iterations are calculated just
|
||||
|
|
@ -717,7 +729,7 @@ where
|
|||
// Simulating operations on a twice_rep_t to perform a single final full-width
|
||||
// iteration. Using ad-hoc multiplication implementations to take advantage
|
||||
// of particular structure of operands.
|
||||
let blo: u64 = (CastInto::<u64>::cast(b_uq1)) & lo_mask;
|
||||
let blo: F::Int = b_uq1 & lo_mask;
|
||||
// x_UQ0 = x_UQ0_hw * 2^HW - 1
|
||||
// x_UQ0 * b_UQ1 = (x_UQ0_hw * 2^HW) * (b_UQ1_hw * 2^HW + blo) - b_UQ1
|
||||
//
|
||||
|
|
@ -726,19 +738,20 @@ where
|
|||
// + [ x_UQ0_hw * blo ]
|
||||
// - [ b_UQ1 ]
|
||||
// = [ result ][.... discarded ...]
|
||||
let corr_uq1 = negate_u64(
|
||||
(x_uq0_hw as u64) * (b_uq1_hw as u64) + (((x_uq0_hw as u64) * (blo)) >> hw) - 1,
|
||||
); // account for *possible* carry
|
||||
let lo_corr = corr_uq1 & lo_mask;
|
||||
let hi_corr = corr_uq1 >> hw;
|
||||
let corr_uq1: F::Int = (F::Int::from(x_uq0_hw) * F::Int::from(b_uq1_hw)
|
||||
+ ((F::Int::from(x_uq0_hw) * blo) >> hw))
|
||||
.wrapping_sub(one)
|
||||
.wrapping_neg(); // account for *possible* carry
|
||||
let lo_corr: F::Int = corr_uq1 & lo_mask;
|
||||
let hi_corr: F::Int = corr_uq1 >> hw;
|
||||
// x_UQ0 * corr_UQ1 = (x_UQ0_hw * 2^HW) * (hi_corr * 2^HW + lo_corr) - corr_UQ1
|
||||
let mut x_uq0: <F as Float>::Int = ((((x_uq0_hw as u64) * hi_corr) << 1)
|
||||
.wrapping_add(((x_uq0_hw as u64) * lo_corr) >> (hw - 1))
|
||||
.wrapping_sub(2))
|
||||
.cast(); // 1 to account for the highest bit of corr_UQ1 can be 1
|
||||
// 1 to account for possible carry
|
||||
// Just like the case of half-width iterations but with possibility
|
||||
// of overflowing by one extra Ulp of x_UQ0.
|
||||
let mut x_uq0: F::Int = ((F::Int::from(x_uq0_hw) * hi_corr) << 1)
|
||||
.wrapping_add((F::Int::from(x_uq0_hw) * lo_corr) >> (hw - 1))
|
||||
.wrapping_sub(F::Int::from(2u8));
|
||||
// 1 to account for the highest bit of corr_UQ1 can be 1
|
||||
// 1 to account for possible carry
|
||||
// Just like the case of half-width iterations but with possibility
|
||||
// of overflowing by one extra Ulp of x_UQ0.
|
||||
x_uq0 -= one;
|
||||
// ... and then traditional fixup by 2 should work
|
||||
|
||||
|
|
@ -755,8 +768,8 @@ where
|
|||
x_uq0
|
||||
} else {
|
||||
// C is (3/4 + 1/sqrt(2)) - 1 truncated to 64 fractional bits as UQ0.n
|
||||
let c: <F as Float>::Int = (0x7504F333 << (F::BITS - 32)).cast();
|
||||
let x_uq0: <F as Float>::Int = c.wrapping_sub(b_uq1);
|
||||
let c: F::Int = (0x7504F333 << (F::BITS - 32)).cast();
|
||||
let x_uq0: F::Int = c.wrapping_sub(b_uq1);
|
||||
// E_0 <= 3/4 - 1/sqrt(2) + 2 * 2^-64
|
||||
x_uq0
|
||||
};
|
||||
|
|
@ -806,7 +819,7 @@ where
|
|||
// Now 1/b - (2*P) * 2^-W < x < 1/b
|
||||
// FIXME Is x_UQ0 still >= 0.5?
|
||||
|
||||
let mut quotient: <F as Float>::Int = x_uq0.widen_mul(a_significand << 1).hi();
|
||||
let mut quotient: F::Int = x_uq0.widen_mul(a_significand << 1).hi();
|
||||
// Now, a/b - 4*P * 2^-W < q < a/b for q=<quotient_UQ1:dummy> in UQ1.(SB+1+W).
|
||||
|
||||
// quotient_UQ1 is in [0.5, 2.0) as UQ1.(SB+1),
|
||||
|
|
@ -868,7 +881,7 @@ where
|
|||
// r = a - b * q
|
||||
let abs_result = if written_exponent > 0 {
|
||||
let mut ret = quotient & significand_mask;
|
||||
ret |= ((written_exponent as u64) << significand_bits).cast();
|
||||
ret |= written_exponent.cast() << significand_bits;
|
||||
residual <<= 1;
|
||||
ret
|
||||
} else {
|
||||
|
|
|
|||
|
|
@ -1,5 +1,5 @@
|
|||
use crate::float::Float;
|
||||
use crate::int::{CastInto, Int};
|
||||
use crate::int::{CastInto, Int, MinInt};
|
||||
|
||||
/// Generic conversion from a narrower to a wider IEEE-754 floating-point type
|
||||
fn extend<F: Float, R: Float>(a: F) -> R
|
||||
|
|
@ -100,19 +100,37 @@ intrinsics! {
|
|||
|
||||
#[avr_skip]
|
||||
#[aapcs_on_arm]
|
||||
#[cfg(not(any(target_arch = "powerpc", target_arch = "powerpc64")))]
|
||||
pub extern "C" fn __extendhftf2(a: f16) -> f128 {
|
||||
extend(a)
|
||||
}
|
||||
|
||||
#[avr_skip]
|
||||
#[aapcs_on_arm]
|
||||
pub extern "C" fn __extendsftf2(a: f32) -> f128 {
|
||||
#[cfg(any(target_arch = "powerpc", target_arch = "powerpc64"))]
|
||||
pub extern "C" fn __extendhfkf2(a: f16) -> f128 {
|
||||
extend(a)
|
||||
}
|
||||
|
||||
#[avr_skip]
|
||||
#[aapcs_on_arm]
|
||||
#[cfg(not(any(target_arch = "powerpc", target_arch = "powerpc64")))]
|
||||
pub extern "C" fn __extendsftf2(a: f32) -> f128 {
|
||||
extend(a)
|
||||
}
|
||||
|
||||
#[cfg(any(target_arch = "powerpc", target_arch = "powerpc64"))]
|
||||
pub extern "C" fn __extendsfkf2(a: f32) -> f128 {
|
||||
extend(a)
|
||||
}
|
||||
|
||||
#[avr_skip]
|
||||
#[aapcs_on_arm]
|
||||
#[cfg(not(any(target_arch = "powerpc", target_arch = "powerpc64")))]
|
||||
pub extern "C" fn __extenddftf2(a: f64) -> f128 {
|
||||
extend(a)
|
||||
}
|
||||
|
||||
#[cfg(any(target_arch = "powerpc", target_arch = "powerpc64"))]
|
||||
pub extern "C" fn __extenddfkf2(a: f64) -> f128 {
|
||||
extend(a)
|
||||
}
|
||||
}
|
||||
|
|
|
|||
|
|
@ -1,6 +1,6 @@
|
|||
use core::ops;
|
||||
|
||||
use super::int::Int;
|
||||
use crate::int::{DInt, Int, MinInt};
|
||||
|
||||
pub mod add;
|
||||
pub mod cmp;
|
||||
|
|
@ -12,6 +12,9 @@ pub mod pow;
|
|||
pub mod sub;
|
||||
pub mod trunc;
|
||||
|
||||
/// Wrapper to extract the integer type half of the float's size
|
||||
pub(crate) type HalfRep<F> = <<F as Float>::Int as DInt>::H;
|
||||
|
||||
public_test_dep! {
|
||||
/// Trait for some basic operations on floats
|
||||
#[allow(dead_code)]
|
||||
|
|
@ -60,7 +63,7 @@ pub(crate) trait Float:
|
|||
/// A mask for the significand
|
||||
const SIGNIFICAND_MASK: Self::Int;
|
||||
|
||||
// The implicit bit of the float format
|
||||
/// The implicit bit of the float format
|
||||
const IMPLICIT_BIT: Self::Int;
|
||||
|
||||
/// A mask for the exponent
|
||||
|
|
|
|||
|
|
@ -1,5 +1,5 @@
|
|||
use crate::float::Float;
|
||||
use crate::int::{CastInto, DInt, HInt, Int};
|
||||
use crate::int::{CastInto, DInt, HInt, Int, MinInt};
|
||||
|
||||
fn mul<F: Float>(a: F, b: F) -> F
|
||||
where
|
||||
|
|
@ -199,6 +199,17 @@ intrinsics! {
|
|||
mul(a, b)
|
||||
}
|
||||
|
||||
#[cfg(not(any(feature = "no-f16-f128", target_arch = "powerpc", target_arch = "powerpc64")))]
|
||||
pub extern "C" fn __multf3(a: f128, b: f128) -> f128 {
|
||||
mul(a, b)
|
||||
}
|
||||
|
||||
|
||||
#[cfg(all(not(feature = "no-f16-f128"), any(target_arch = "powerpc", target_arch = "powerpc64")))]
|
||||
pub extern "C" fn __mulkf3(a: f128, b: f128) -> f128 {
|
||||
mul(a, b)
|
||||
}
|
||||
|
||||
#[cfg(target_arch = "arm")]
|
||||
pub extern "C" fn __mulsf3vfp(a: f32, b: f32) -> f32 {
|
||||
a * b
|
||||
|
|
|
|||
|
|
@ -15,6 +15,18 @@ intrinsics! {
|
|||
__adddf3(a, f64::from_repr(b.repr() ^ f64::SIGN_MASK))
|
||||
}
|
||||
|
||||
#[cfg(not(any(feature = "no-f16-f128", target_arch = "powerpc", target_arch = "powerpc64")))]
|
||||
pub extern "C" fn __subtf3(a: f128, b: f128) -> f128 {
|
||||
use crate::float::add::__addtf3;
|
||||
__addtf3(a, f128::from_repr(b.repr() ^ f128::SIGN_MASK))
|
||||
}
|
||||
|
||||
#[cfg(all(not(feature = "no-f16-f128"), any(target_arch = "powerpc", target_arch = "powerpc64")))]
|
||||
pub extern "C" fn __subkf3(a: f128, b: f128) -> f128 {
|
||||
use crate::float::add::__addkf3;
|
||||
__addkf3(a, f128::from_repr(b.repr() ^ f128::SIGN_MASK))
|
||||
}
|
||||
|
||||
#[cfg(target_arch = "arm")]
|
||||
pub extern "C" fn __subsf3vfp(a: f32, b: f32) -> f32 {
|
||||
a - b
|
||||
|
|
|
|||
|
|
@ -1,5 +1,5 @@
|
|||
use crate::float::Float;
|
||||
use crate::int::{CastInto, Int};
|
||||
use crate::int::{CastInto, Int, MinInt};
|
||||
|
||||
fn trunc<F: Float, R: Float>(a: F) -> R
|
||||
where
|
||||
|
|
@ -155,19 +155,37 @@ intrinsics! {
|
|||
|
||||
#[avr_skip]
|
||||
#[aapcs_on_arm]
|
||||
#[cfg(not(any(target_arch = "powerpc", target_arch = "powerpc64")))]
|
||||
pub extern "C" fn __trunctfhf2(a: f128) -> f16 {
|
||||
trunc(a)
|
||||
}
|
||||
|
||||
#[avr_skip]
|
||||
#[aapcs_on_arm]
|
||||
pub extern "C" fn __trunctfsf2(a: f128) -> f32 {
|
||||
#[cfg(any(target_arch = "powerpc", target_arch = "powerpc64"))]
|
||||
pub extern "C" fn __trunckfhf2(a: f128) -> f16 {
|
||||
trunc(a)
|
||||
}
|
||||
|
||||
#[avr_skip]
|
||||
#[aapcs_on_arm]
|
||||
#[cfg(not(any(target_arch = "powerpc", target_arch = "powerpc64")))]
|
||||
pub extern "C" fn __trunctfsf2(a: f128) -> f32 {
|
||||
trunc(a)
|
||||
}
|
||||
|
||||
#[cfg(any(target_arch = "powerpc", target_arch = "powerpc64"))]
|
||||
pub extern "C" fn __trunckfsf2(a: f128) -> f32 {
|
||||
trunc(a)
|
||||
}
|
||||
|
||||
#[avr_skip]
|
||||
#[aapcs_on_arm]
|
||||
#[cfg(not(any(target_arch = "powerpc", target_arch = "powerpc64")))]
|
||||
pub extern "C" fn __trunctfdf2(a: f128) -> f64 {
|
||||
trunc(a)
|
||||
}
|
||||
|
||||
#[cfg(any(target_arch = "powerpc", target_arch = "powerpc64"))]
|
||||
pub extern "C" fn __trunckfdf2(a: f128) -> f64 {
|
||||
trunc(a)
|
||||
}
|
||||
}
|
||||
|
|
|
|||
|
|
@ -1,6 +1,6 @@
|
|||
use crate::int::{DInt, Int};
|
||||
use crate::int::{DInt, Int, MinInt};
|
||||
|
||||
trait UAddSub: DInt {
|
||||
trait UAddSub: DInt + Int {
|
||||
fn uadd(self, other: Self) -> Self {
|
||||
let (lo, carry) = self.lo().overflowing_add(other.lo());
|
||||
let hi = self.hi().wrapping_add(other.hi());
|
||||
|
|
@ -22,7 +22,7 @@ impl UAddSub for u128 {}
|
|||
|
||||
trait AddSub: Int
|
||||
where
|
||||
<Self as Int>::UnsignedInt: UAddSub,
|
||||
<Self as MinInt>::UnsignedInt: UAddSub,
|
||||
{
|
||||
fn add(self, other: Self) -> Self {
|
||||
Self::from_unsigned(self.unsigned().uadd(other.unsigned()))
|
||||
|
|
@ -37,7 +37,7 @@ impl AddSub for i128 {}
|
|||
|
||||
trait Addo: AddSub
|
||||
where
|
||||
<Self as Int>::UnsignedInt: UAddSub,
|
||||
<Self as MinInt>::UnsignedInt: UAddSub,
|
||||
{
|
||||
fn addo(self, other: Self) -> (Self, bool) {
|
||||
let sum = AddSub::add(self, other);
|
||||
|
|
@ -50,7 +50,7 @@ impl Addo for u128 {}
|
|||
|
||||
trait Subo: AddSub
|
||||
where
|
||||
<Self as Int>::UnsignedInt: UAddSub,
|
||||
<Self as MinInt>::UnsignedInt: UAddSub,
|
||||
{
|
||||
fn subo(self, other: Self) -> (Self, bool) {
|
||||
let sum = AddSub::sub(self, other);
|
||||
|
|
|
|||
251
library/compiler-builtins/src/int/big.rs
Normal file
251
library/compiler-builtins/src/int/big.rs
Normal file
|
|
@ -0,0 +1,251 @@
|
|||
//! Integers used for wide operations, larger than `u128`.
|
||||
|
||||
#![allow(unused)]
|
||||
|
||||
use crate::int::{DInt, HInt, Int, MinInt};
|
||||
use core::{fmt, ops};
|
||||
|
||||
const WORD_LO_MASK: u64 = 0x00000000ffffffff;
|
||||
const WORD_HI_MASK: u64 = 0xffffffff00000000;
|
||||
const WORD_FULL_MASK: u64 = 0xffffffffffffffff;
|
||||
const U128_LO_MASK: u128 = u64::MAX as u128;
|
||||
const U128_HI_MASK: u128 = (u64::MAX as u128) << 64;
|
||||
|
||||
/// A 256-bit unsigned integer represented as 4 64-bit limbs.
|
||||
///
|
||||
/// Each limb is a native-endian number, but the array is little-limb-endian.
|
||||
#[allow(non_camel_case_types)]
|
||||
#[derive(Clone, Copy, Debug, PartialEq, PartialOrd)]
|
||||
pub struct u256(pub [u64; 4]);
|
||||
|
||||
impl u256 {
|
||||
pub const MAX: Self = Self([u64::MAX, u64::MAX, u64::MAX, u64::MAX]);
|
||||
|
||||
/// Reinterpret as a signed integer
|
||||
pub fn signed(self) -> i256 {
|
||||
i256(self.0)
|
||||
}
|
||||
}
|
||||
|
||||
/// A 256-bit signed integer represented as 4 64-bit limbs.
|
||||
///
|
||||
/// Each limb is a native-endian number, but the array is little-limb-endian.
|
||||
#[allow(non_camel_case_types)]
|
||||
#[derive(Clone, Copy, Debug, PartialEq, PartialOrd)]
|
||||
pub struct i256(pub [u64; 4]);
|
||||
|
||||
impl i256 {
|
||||
/// Reinterpret as an unsigned integer
|
||||
pub fn unsigned(self) -> u256 {
|
||||
u256(self.0)
|
||||
}
|
||||
}
|
||||
|
||||
impl MinInt for u256 {
|
||||
type OtherSign = i256;
|
||||
|
||||
type UnsignedInt = u256;
|
||||
|
||||
const SIGNED: bool = false;
|
||||
const BITS: u32 = 256;
|
||||
const ZERO: Self = Self([0u64; 4]);
|
||||
const ONE: Self = Self([1, 0, 0, 0]);
|
||||
const MIN: Self = Self([0u64; 4]);
|
||||
const MAX: Self = Self([u64::MAX; 4]);
|
||||
}
|
||||
|
||||
impl MinInt for i256 {
|
||||
type OtherSign = u256;
|
||||
|
||||
type UnsignedInt = u256;
|
||||
|
||||
const SIGNED: bool = false;
|
||||
const BITS: u32 = 256;
|
||||
const ZERO: Self = Self([0u64; 4]);
|
||||
const ONE: Self = Self([1, 0, 0, 0]);
|
||||
const MIN: Self = Self([0, 0, 0, 1 << 63]);
|
||||
const MAX: Self = Self([u64::MAX, u64::MAX, u64::MAX, u64::MAX << 1]);
|
||||
}
|
||||
|
||||
macro_rules! impl_common {
|
||||
($ty:ty) => {
|
||||
impl ops::BitOr for $ty {
|
||||
type Output = Self;
|
||||
|
||||
fn bitor(mut self, rhs: Self) -> Self::Output {
|
||||
self.0[0] |= rhs.0[0];
|
||||
self.0[1] |= rhs.0[1];
|
||||
self.0[2] |= rhs.0[2];
|
||||
self.0[3] |= rhs.0[3];
|
||||
self
|
||||
}
|
||||
}
|
||||
|
||||
impl ops::Not for $ty {
|
||||
type Output = Self;
|
||||
|
||||
fn not(self) -> Self::Output {
|
||||
Self([!self.0[0], !self.0[1], !self.0[2], !self.0[3]])
|
||||
}
|
||||
}
|
||||
|
||||
impl ops::Shl<u32> for $ty {
|
||||
type Output = Self;
|
||||
|
||||
fn shl(self, rhs: u32) -> Self::Output {
|
||||
todo!()
|
||||
}
|
||||
}
|
||||
};
|
||||
}
|
||||
|
||||
impl_common!(i256);
|
||||
impl_common!(u256);
|
||||
|
||||
macro_rules! word {
|
||||
(1, $val:expr) => {
|
||||
(($val >> (32 * 3)) & Self::from(WORD_LO_MASK)) as u64
|
||||
};
|
||||
(2, $val:expr) => {
|
||||
(($val >> (32 * 2)) & Self::from(WORD_LO_MASK)) as u64
|
||||
};
|
||||
(3, $val:expr) => {
|
||||
(($val >> (32 * 1)) & Self::from(WORD_LO_MASK)) as u64
|
||||
};
|
||||
(4, $val:expr) => {
|
||||
(($val >> (32 * 0)) & Self::from(WORD_LO_MASK)) as u64
|
||||
};
|
||||
}
|
||||
|
||||
impl HInt for u128 {
|
||||
type D = u256;
|
||||
|
||||
fn widen(self) -> Self::D {
|
||||
let w0 = self & u128::from(u64::MAX);
|
||||
let w1 = (self >> u64::BITS) & u128::from(u64::MAX);
|
||||
u256([w0 as u64, w1 as u64, 0, 0])
|
||||
}
|
||||
|
||||
fn zero_widen(self) -> Self::D {
|
||||
self.widen()
|
||||
}
|
||||
|
||||
fn zero_widen_mul(self, rhs: Self) -> Self::D {
|
||||
let product11: u64 = word!(1, self) * word!(1, rhs);
|
||||
let product12: u64 = word!(1, self) * word!(2, rhs);
|
||||
let product13: u64 = word!(1, self) * word!(3, rhs);
|
||||
let product14: u64 = word!(1, self) * word!(4, rhs);
|
||||
let product21: u64 = word!(2, self) * word!(1, rhs);
|
||||
let product22: u64 = word!(2, self) * word!(2, rhs);
|
||||
let product23: u64 = word!(2, self) * word!(3, rhs);
|
||||
let product24: u64 = word!(2, self) * word!(4, rhs);
|
||||
let product31: u64 = word!(3, self) * word!(1, rhs);
|
||||
let product32: u64 = word!(3, self) * word!(2, rhs);
|
||||
let product33: u64 = word!(3, self) * word!(3, rhs);
|
||||
let product34: u64 = word!(3, self) * word!(4, rhs);
|
||||
let product41: u64 = word!(4, self) * word!(1, rhs);
|
||||
let product42: u64 = word!(4, self) * word!(2, rhs);
|
||||
let product43: u64 = word!(4, self) * word!(3, rhs);
|
||||
let product44: u64 = word!(4, self) * word!(4, rhs);
|
||||
|
||||
let sum0: u128 = u128::from(product44);
|
||||
let sum1: u128 = u128::from(product34) + u128::from(product43);
|
||||
let sum2: u128 = u128::from(product24) + u128::from(product33) + u128::from(product42);
|
||||
let sum3: u128 = u128::from(product14)
|
||||
+ u128::from(product23)
|
||||
+ u128::from(product32)
|
||||
+ u128::from(product41);
|
||||
let sum4: u128 = u128::from(product13) + u128::from(product22) + u128::from(product31);
|
||||
let sum5: u128 = u128::from(product12) + u128::from(product21);
|
||||
let sum6: u128 = u128::from(product11);
|
||||
|
||||
let r0: u128 =
|
||||
(sum0 & u128::from(WORD_FULL_MASK)) + ((sum1 & u128::from(WORD_LO_MASK)) << 32);
|
||||
let r1: u128 = (sum0 >> 64)
|
||||
+ ((sum1 >> 32) & u128::from(WORD_FULL_MASK))
|
||||
+ (sum2 & u128::from(WORD_FULL_MASK))
|
||||
+ ((sum3 << 32) & u128::from(WORD_HI_MASK));
|
||||
|
||||
let (lo, carry) = r0.overflowing_add(r1 << 64);
|
||||
let hi = (r1 >> 64)
|
||||
+ (sum1 >> 96)
|
||||
+ (sum2 >> 64)
|
||||
+ (sum3 >> 32)
|
||||
+ sum4
|
||||
+ (sum5 << 32)
|
||||
+ (sum6 << 64)
|
||||
+ u128::from(carry);
|
||||
|
||||
u256([
|
||||
(lo & U128_LO_MASK) as u64,
|
||||
((lo >> 64) & U128_LO_MASK) as u64,
|
||||
(hi & U128_LO_MASK) as u64,
|
||||
((hi >> 64) & U128_LO_MASK) as u64,
|
||||
])
|
||||
}
|
||||
|
||||
fn widen_mul(self, rhs: Self) -> Self::D {
|
||||
self.zero_widen_mul(rhs)
|
||||
}
|
||||
}
|
||||
|
||||
impl HInt for i128 {
|
||||
type D = i256;
|
||||
|
||||
fn widen(self) -> Self::D {
|
||||
let mut ret = self.unsigned().zero_widen().signed();
|
||||
if self.is_negative() {
|
||||
ret.0[2] = u64::MAX;
|
||||
ret.0[3] = u64::MAX;
|
||||
}
|
||||
ret
|
||||
}
|
||||
|
||||
fn zero_widen(self) -> Self::D {
|
||||
self.unsigned().zero_widen().signed()
|
||||
}
|
||||
|
||||
fn zero_widen_mul(self, rhs: Self) -> Self::D {
|
||||
self.unsigned().zero_widen_mul(rhs.unsigned()).signed()
|
||||
}
|
||||
|
||||
fn widen_mul(self, rhs: Self) -> Self::D {
|
||||
unimplemented!("signed i128 widening multiply is not used")
|
||||
}
|
||||
}
|
||||
|
||||
impl DInt for u256 {
|
||||
type H = u128;
|
||||
|
||||
fn lo(self) -> Self::H {
|
||||
let mut tmp = [0u8; 16];
|
||||
tmp[..8].copy_from_slice(&self.0[0].to_le_bytes());
|
||||
tmp[8..].copy_from_slice(&self.0[1].to_le_bytes());
|
||||
u128::from_le_bytes(tmp)
|
||||
}
|
||||
|
||||
fn hi(self) -> Self::H {
|
||||
let mut tmp = [0u8; 16];
|
||||
tmp[..8].copy_from_slice(&self.0[2].to_le_bytes());
|
||||
tmp[8..].copy_from_slice(&self.0[3].to_le_bytes());
|
||||
u128::from_le_bytes(tmp)
|
||||
}
|
||||
}
|
||||
|
||||
impl DInt for i256 {
|
||||
type H = i128;
|
||||
|
||||
fn lo(self) -> Self::H {
|
||||
let mut tmp = [0u8; 16];
|
||||
tmp[..8].copy_from_slice(&self.0[0].to_le_bytes());
|
||||
tmp[8..].copy_from_slice(&self.0[1].to_le_bytes());
|
||||
i128::from_le_bytes(tmp)
|
||||
}
|
||||
|
||||
fn hi(self) -> Self::H {
|
||||
let mut tmp = [0u8; 16];
|
||||
tmp[..8].copy_from_slice(&self.0[2].to_le_bytes());
|
||||
tmp[8..].copy_from_slice(&self.0[3].to_le_bytes());
|
||||
i128::from_le_bytes(tmp)
|
||||
}
|
||||
}
|
||||
|
|
@ -3,43 +3,30 @@ use core::ops;
|
|||
mod specialized_div_rem;
|
||||
|
||||
pub mod addsub;
|
||||
mod big;
|
||||
pub mod leading_zeros;
|
||||
pub mod mul;
|
||||
pub mod sdiv;
|
||||
pub mod shift;
|
||||
pub mod udiv;
|
||||
|
||||
pub use self::leading_zeros::__clzsi2;
|
||||
pub use big::{i256, u256};
|
||||
pub use leading_zeros::__clzsi2;
|
||||
|
||||
public_test_dep! {
|
||||
/// Trait for some basic operations on integers
|
||||
/// Minimal integer implementations needed on all integer types, including wide integers.
|
||||
#[allow(dead_code)]
|
||||
pub(crate) trait Int:
|
||||
Copy
|
||||
pub(crate) trait MinInt: Copy
|
||||
+ core::fmt::Debug
|
||||
+ PartialEq
|
||||
+ PartialOrd
|
||||
+ ops::AddAssign
|
||||
+ ops::SubAssign
|
||||
+ ops::BitAndAssign
|
||||
+ ops::BitOrAssign
|
||||
+ ops::BitXorAssign
|
||||
+ ops::ShlAssign<i32>
|
||||
+ ops::ShrAssign<u32>
|
||||
+ ops::Add<Output = Self>
|
||||
+ ops::Sub<Output = Self>
|
||||
+ ops::Div<Output = Self>
|
||||
+ ops::Shl<u32, Output = Self>
|
||||
+ ops::Shr<u32, Output = Self>
|
||||
+ ops::BitOr<Output = Self>
|
||||
+ ops::BitXor<Output = Self>
|
||||
+ ops::BitAnd<Output = Self>
|
||||
+ ops::Not<Output = Self>
|
||||
+ ops::Shl<u32, Output = Self>
|
||||
{
|
||||
|
||||
/// Type with the same width but other signedness
|
||||
type OtherSign: Int;
|
||||
type OtherSign: MinInt;
|
||||
/// Unsigned version of Self
|
||||
type UnsignedInt: Int;
|
||||
type UnsignedInt: MinInt;
|
||||
|
||||
/// If `Self` is a signed integer
|
||||
const SIGNED: bool;
|
||||
|
|
@ -51,13 +38,47 @@ pub(crate) trait Int:
|
|||
const ONE: Self;
|
||||
const MIN: Self;
|
||||
const MAX: Self;
|
||||
}
|
||||
}
|
||||
|
||||
public_test_dep! {
|
||||
/// Trait for some basic operations on integers
|
||||
#[allow(dead_code)]
|
||||
pub(crate) trait Int: MinInt
|
||||
+ PartialEq
|
||||
+ PartialOrd
|
||||
+ ops::AddAssign
|
||||
+ ops::SubAssign
|
||||
+ ops::BitAndAssign
|
||||
+ ops::BitOrAssign
|
||||
+ ops::BitXorAssign
|
||||
+ ops::ShlAssign<i32>
|
||||
+ ops::ShrAssign<u32>
|
||||
+ ops::Add<Output = Self>
|
||||
+ ops::Sub<Output = Self>
|
||||
+ ops::Mul<Output = Self>
|
||||
+ ops::Div<Output = Self>
|
||||
+ ops::Shr<u32, Output = Self>
|
||||
+ ops::BitXor<Output = Self>
|
||||
+ ops::BitAnd<Output = Self>
|
||||
{
|
||||
/// LUT used for maximizing the space covered and minimizing the computational cost of fuzzing
|
||||
/// in `testcrate`. For example, Self = u128 produces [0,1,2,7,8,15,16,31,32,63,64,95,96,111,
|
||||
/// 112,119,120,125,126,127].
|
||||
const FUZZ_LENGTHS: [u8; 20];
|
||||
const FUZZ_LENGTHS: [u8; 20] = make_fuzz_lengths(<Self as MinInt>::BITS);
|
||||
|
||||
/// The number of entries of `FUZZ_LENGTHS` actually used. The maximum is 20 for u128.
|
||||
const FUZZ_NUM: usize;
|
||||
const FUZZ_NUM: usize = {
|
||||
let log2 = (<Self as MinInt>::BITS - 1).count_ones() as usize;
|
||||
if log2 == 3 {
|
||||
// case for u8
|
||||
6
|
||||
} else {
|
||||
// 3 entries on each extreme, 2 in the middle, and 4 for each scale of intermediate
|
||||
// boundaries.
|
||||
8 + (4 * (log2 - 4))
|
||||
}
|
||||
};
|
||||
|
||||
fn unsigned(self) -> Self::UnsignedInt;
|
||||
fn from_unsigned(unsigned: Self::UnsignedInt) -> Self;
|
||||
|
|
@ -84,74 +105,54 @@ pub(crate) trait Int:
|
|||
}
|
||||
}
|
||||
|
||||
pub(crate) const fn make_fuzz_lengths(bits: u32) -> [u8; 20] {
|
||||
let mut v = [0u8; 20];
|
||||
v[0] = 0;
|
||||
v[1] = 1;
|
||||
v[2] = 2; // important for parity and the iX::MIN case when reversed
|
||||
let mut i = 3;
|
||||
|
||||
// No need for any more until the byte boundary, because there should be no algorithms
|
||||
// that are sensitive to anything not next to byte boundaries after 2. We also scale
|
||||
// in powers of two, which is important to prevent u128 corner tests from getting too
|
||||
// big.
|
||||
let mut l = 8;
|
||||
loop {
|
||||
if l >= ((bits / 2) as u8) {
|
||||
break;
|
||||
}
|
||||
// get both sides of the byte boundary
|
||||
v[i] = l - 1;
|
||||
i += 1;
|
||||
v[i] = l;
|
||||
i += 1;
|
||||
l *= 2;
|
||||
}
|
||||
|
||||
if bits != 8 {
|
||||
// add the lower side of the middle boundary
|
||||
v[i] = ((bits / 2) - 1) as u8;
|
||||
i += 1;
|
||||
}
|
||||
|
||||
// We do not want to jump directly from the Self::BITS/2 boundary to the Self::BITS
|
||||
// boundary because of algorithms that split the high part up. We reverse the scaling
|
||||
// as we go to Self::BITS.
|
||||
let mid = i;
|
||||
let mut j = 1;
|
||||
loop {
|
||||
v[i] = (bits as u8) - (v[mid - j]) - 1;
|
||||
if j == mid {
|
||||
break;
|
||||
}
|
||||
i += 1;
|
||||
j += 1;
|
||||
}
|
||||
v
|
||||
}
|
||||
|
||||
macro_rules! int_impl_common {
|
||||
($ty:ty) => {
|
||||
const BITS: u32 = <Self as Int>::ZERO.count_zeros();
|
||||
const SIGNED: bool = Self::MIN != Self::ZERO;
|
||||
|
||||
const ZERO: Self = 0;
|
||||
const ONE: Self = 1;
|
||||
const MIN: Self = <Self>::MIN;
|
||||
const MAX: Self = <Self>::MAX;
|
||||
|
||||
const FUZZ_LENGTHS: [u8; 20] = {
|
||||
let bits = <Self as Int>::BITS;
|
||||
let mut v = [0u8; 20];
|
||||
v[0] = 0;
|
||||
v[1] = 1;
|
||||
v[2] = 2; // important for parity and the iX::MIN case when reversed
|
||||
let mut i = 3;
|
||||
// No need for any more until the byte boundary, because there should be no algorithms
|
||||
// that are sensitive to anything not next to byte boundaries after 2. We also scale
|
||||
// in powers of two, which is important to prevent u128 corner tests from getting too
|
||||
// big.
|
||||
let mut l = 8;
|
||||
loop {
|
||||
if l >= ((bits / 2) as u8) {
|
||||
break;
|
||||
}
|
||||
// get both sides of the byte boundary
|
||||
v[i] = l - 1;
|
||||
i += 1;
|
||||
v[i] = l;
|
||||
i += 1;
|
||||
l *= 2;
|
||||
}
|
||||
|
||||
if bits != 8 {
|
||||
// add the lower side of the middle boundary
|
||||
v[i] = ((bits / 2) - 1) as u8;
|
||||
i += 1;
|
||||
}
|
||||
|
||||
// We do not want to jump directly from the Self::BITS/2 boundary to the Self::BITS
|
||||
// boundary because of algorithms that split the high part up. We reverse the scaling
|
||||
// as we go to Self::BITS.
|
||||
let mid = i;
|
||||
let mut j = 1;
|
||||
loop {
|
||||
v[i] = (bits as u8) - (v[mid - j]) - 1;
|
||||
if j == mid {
|
||||
break;
|
||||
}
|
||||
i += 1;
|
||||
j += 1;
|
||||
}
|
||||
v
|
||||
};
|
||||
|
||||
const FUZZ_NUM: usize = {
|
||||
let log2 = (<Self as Int>::BITS - 1).count_ones() as usize;
|
||||
if log2 == 3 {
|
||||
// case for u8
|
||||
6
|
||||
} else {
|
||||
// 3 entries on each extreme, 2 in the middle, and 4 for each scale of intermediate
|
||||
// boundaries.
|
||||
8 + (4 * (log2 - 4))
|
||||
}
|
||||
};
|
||||
|
||||
fn from_bool(b: bool) -> Self {
|
||||
b as $ty
|
||||
}
|
||||
|
|
@ -204,10 +205,20 @@ macro_rules! int_impl_common {
|
|||
|
||||
macro_rules! int_impl {
|
||||
($ity:ty, $uty:ty) => {
|
||||
impl Int for $uty {
|
||||
impl MinInt for $uty {
|
||||
type OtherSign = $ity;
|
||||
type UnsignedInt = $uty;
|
||||
|
||||
const BITS: u32 = <Self as MinInt>::ZERO.count_zeros();
|
||||
const SIGNED: bool = Self::MIN != Self::ZERO;
|
||||
|
||||
const ZERO: Self = 0;
|
||||
const ONE: Self = 1;
|
||||
const MIN: Self = <Self>::MIN;
|
||||
const MAX: Self = <Self>::MAX;
|
||||
}
|
||||
|
||||
impl Int for $uty {
|
||||
fn unsigned(self) -> $uty {
|
||||
self
|
||||
}
|
||||
|
|
@ -229,10 +240,20 @@ macro_rules! int_impl {
|
|||
int_impl_common!($uty);
|
||||
}
|
||||
|
||||
impl Int for $ity {
|
||||
impl MinInt for $ity {
|
||||
type OtherSign = $uty;
|
||||
type UnsignedInt = $uty;
|
||||
|
||||
const BITS: u32 = <Self as MinInt>::ZERO.count_zeros();
|
||||
const SIGNED: bool = Self::MIN != Self::ZERO;
|
||||
|
||||
const ZERO: Self = 0;
|
||||
const ONE: Self = 1;
|
||||
const MIN: Self = <Self>::MIN;
|
||||
const MAX: Self = <Self>::MAX;
|
||||
}
|
||||
|
||||
impl Int for $ity {
|
||||
fn unsigned(self) -> $uty {
|
||||
self as $uty
|
||||
}
|
||||
|
|
@ -260,18 +281,22 @@ int_impl!(i128, u128);
|
|||
public_test_dep! {
|
||||
/// Trait for integers twice the bit width of another integer. This is implemented for all
|
||||
/// primitives except for `u8`, because there is not a smaller primitive.
|
||||
pub(crate) trait DInt: Int {
|
||||
pub(crate) trait DInt: MinInt {
|
||||
/// Integer that is half the bit width of the integer this trait is implemented for
|
||||
type H: HInt<D = Self> + Int;
|
||||
type H: HInt<D = Self>;
|
||||
|
||||
/// Returns the low half of `self`
|
||||
fn lo(self) -> Self::H;
|
||||
/// Returns the high half of `self`
|
||||
fn hi(self) -> Self::H;
|
||||
/// Returns the low and high halves of `self` as a tuple
|
||||
fn lo_hi(self) -> (Self::H, Self::H);
|
||||
fn lo_hi(self) -> (Self::H, Self::H) {
|
||||
(self.lo(), self.hi())
|
||||
}
|
||||
/// Constructs an integer using lower and higher half parts
|
||||
fn from_lo_hi(lo: Self::H, hi: Self::H) -> Self;
|
||||
fn from_lo_hi(lo: Self::H, hi: Self::H) -> Self {
|
||||
lo.zero_widen() | hi.widen_hi()
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
|
|
@ -280,7 +305,7 @@ public_test_dep! {
|
|||
/// primitives except for `u128`, because it there is not a larger primitive.
|
||||
pub(crate) trait HInt: Int {
|
||||
/// Integer that is double the bit width of the integer this trait is implemented for
|
||||
type D: DInt<H = Self> + Int;
|
||||
type D: DInt<H = Self> + MinInt;
|
||||
|
||||
/// Widens (using default extension) the integer to have double bit width
|
||||
fn widen(self) -> Self::D;
|
||||
|
|
@ -288,7 +313,9 @@ pub(crate) trait HInt: Int {
|
|||
/// around problems with associated type bounds (such as `Int<Othersign: DInt>`) being unstable
|
||||
fn zero_widen(self) -> Self::D;
|
||||
/// Widens the integer to have double bit width and shifts the integer into the higher bits
|
||||
fn widen_hi(self) -> Self::D;
|
||||
fn widen_hi(self) -> Self::D {
|
||||
self.widen() << <Self as MinInt>::BITS
|
||||
}
|
||||
/// Widening multiplication with zero widening. This cannot overflow.
|
||||
fn zero_widen_mul(self, rhs: Self) -> Self::D;
|
||||
/// Widening multiplication. This cannot overflow.
|
||||
|
|
@ -306,13 +333,7 @@ macro_rules! impl_d_int {
|
|||
self as $X
|
||||
}
|
||||
fn hi(self) -> Self::H {
|
||||
(self >> <$X as Int>::BITS) as $X
|
||||
}
|
||||
fn lo_hi(self) -> (Self::H, Self::H) {
|
||||
(self.lo(), self.hi())
|
||||
}
|
||||
fn from_lo_hi(lo: Self::H, hi: Self::H) -> Self {
|
||||
lo.zero_widen() | hi.widen_hi()
|
||||
(self >> <$X as MinInt>::BITS) as $X
|
||||
}
|
||||
}
|
||||
)*
|
||||
|
|
@ -331,9 +352,6 @@ macro_rules! impl_h_int {
|
|||
fn zero_widen(self) -> Self::D {
|
||||
(self as $uH) as $X
|
||||
}
|
||||
fn widen_hi(self) -> Self::D {
|
||||
(self as $X) << <$H as Int>::BITS
|
||||
}
|
||||
fn zero_widen_mul(self, rhs: Self) -> Self::D {
|
||||
self.zero_widen().wrapping_mul(rhs.zero_widen())
|
||||
}
|
||||
|
|
|
|||
|
|
@ -1,6 +1,6 @@
|
|||
use crate::int::{DInt, HInt, Int};
|
||||
|
||||
trait Mul: DInt
|
||||
trait Mul: DInt + Int
|
||||
where
|
||||
Self::H: DInt,
|
||||
{
|
||||
|
|
@ -30,7 +30,7 @@ where
|
|||
impl Mul for u64 {}
|
||||
impl Mul for i128 {}
|
||||
|
||||
pub(crate) trait UMulo: Int + DInt {
|
||||
pub(crate) trait UMulo: DInt + Int {
|
||||
fn mulo(self, rhs: Self) -> (Self, bool) {
|
||||
match (self.hi().is_zero(), rhs.hi().is_zero()) {
|
||||
// overflow is guaranteed
|
||||
|
|
|
|||
|
|
@ -1,4 +1,4 @@
|
|||
use crate::int::{DInt, HInt, Int};
|
||||
use crate::int::{DInt, HInt, Int, MinInt};
|
||||
|
||||
trait Ashl: DInt {
|
||||
/// Returns `a << b`, requires `b < Self::BITS`
|
||||
|
|
|
|||
|
|
@ -33,3 +33,5 @@ no-asm = ["compiler_builtins/no-asm"]
|
|||
no-f16-f128 = ["compiler_builtins/no-f16-f128"]
|
||||
mem = ["compiler_builtins/mem"]
|
||||
mangled-names = ["compiler_builtins/mangled-names"]
|
||||
# Skip tests that rely on f128 symbols being available on the system
|
||||
no-sys-f128 = []
|
||||
|
|
|
|||
27
library/compiler-builtins/testcrate/build.rs
Normal file
27
library/compiler-builtins/testcrate/build.rs
Normal file
|
|
@ -0,0 +1,27 @@
|
|||
use std::env;
|
||||
|
||||
fn main() {
|
||||
let target = env::var("TARGET").unwrap();
|
||||
|
||||
// These platforms do not have f128 symbols available in their system libraries, so
|
||||
// skip related tests.
|
||||
if target.starts_with("arm-")
|
||||
|| target.contains("apple-darwin")
|
||||
|| target.contains("windows-msvc")
|
||||
// GCC and LLVM disagree on the ABI of `f16` and `f128` with MinGW. See
|
||||
// <https://gcc.gnu.org/bugzilla/show_bug.cgi?id=115054>.
|
||||
|| target.contains("windows-gnu")
|
||||
// FIXME(llvm): There is an ABI incompatibility between GCC and Clang on 32-bit x86.
|
||||
// See <https://github.com/llvm/llvm-project/issues/77401>.
|
||||
|| target.starts_with("i686")
|
||||
// 32-bit PowerPC gets code generated that Qemu cannot handle. See
|
||||
// <https://github.com/rust-lang/compiler-builtins/pull/606#issuecomment-2105635926>.
|
||||
|| target.starts_with("powerpc-")
|
||||
// FIXME: We get different results from the builtin functions. See
|
||||
// <https://github.com/rust-lang/compiler-builtins/pull/606#issuecomment-2105657287>.
|
||||
|| target.starts_with("powerpc64-")
|
||||
{
|
||||
println!("cargo:warning=using apfloat fallback for f128");
|
||||
println!("cargo:rustc-cfg=feature=\"no-sys-f128\"");
|
||||
}
|
||||
}
|
||||
|
|
@ -15,7 +15,7 @@
|
|||
#![no_std]
|
||||
|
||||
use compiler_builtins::float::Float;
|
||||
use compiler_builtins::int::Int;
|
||||
use compiler_builtins::int::{Int, MinInt};
|
||||
|
||||
use rand_xoshiro::rand_core::{RngCore, SeedableRng};
|
||||
use rand_xoshiro::Xoshiro128StarStar;
|
||||
|
|
@ -101,7 +101,10 @@ macro_rules! edge_cases {
|
|||
|
||||
/// Feeds a series of fuzzing inputs to `f`. The fuzzer first uses an algorithm designed to find
|
||||
/// edge cases, followed by a more random fuzzer that runs `n` times.
|
||||
pub fn fuzz<I: Int, F: FnMut(I)>(n: u32, mut f: F) {
|
||||
pub fn fuzz<I: Int, F: FnMut(I)>(n: u32, mut f: F)
|
||||
where
|
||||
<I as MinInt>::UnsignedInt: Int,
|
||||
{
|
||||
// edge case tester. Calls `f` 210 times for u128.
|
||||
// zero gets skipped by the loop
|
||||
f(I::ZERO);
|
||||
|
|
@ -111,7 +114,7 @@ pub fn fuzz<I: Int, F: FnMut(I)>(n: u32, mut f: F) {
|
|||
|
||||
// random fuzzer
|
||||
let mut rng = Xoshiro128StarStar::seed_from_u64(0);
|
||||
let mut x: I = Int::ZERO;
|
||||
let mut x: I = MinInt::ZERO;
|
||||
for _ in 0..n {
|
||||
fuzz_step(&mut rng, &mut x);
|
||||
f(x)
|
||||
|
|
@ -119,7 +122,10 @@ pub fn fuzz<I: Int, F: FnMut(I)>(n: u32, mut f: F) {
|
|||
}
|
||||
|
||||
/// The same as `fuzz`, except `f` has two inputs.
|
||||
pub fn fuzz_2<I: Int, F: Fn(I, I)>(n: u32, f: F) {
|
||||
pub fn fuzz_2<I: Int, F: Fn(I, I)>(n: u32, f: F)
|
||||
where
|
||||
<I as MinInt>::UnsignedInt: Int,
|
||||
{
|
||||
// Check cases where the first and second inputs are zero. Both call `f` 210 times for `u128`.
|
||||
edge_cases!(I, case, {
|
||||
f(I::ZERO, case);
|
||||
|
|
@ -150,10 +156,10 @@ pub fn fuzz_shift<I: Int, F: Fn(I, u32)>(f: F) {
|
|||
// Shift functions are very simple and do not need anything other than shifting a small
|
||||
// set of random patterns for every fuzz length.
|
||||
let mut rng = Xoshiro128StarStar::seed_from_u64(0);
|
||||
let mut x: I = Int::ZERO;
|
||||
let mut x: I = MinInt::ZERO;
|
||||
for i in 0..I::FUZZ_NUM {
|
||||
fuzz_step(&mut rng, &mut x);
|
||||
f(x, Int::ZERO);
|
||||
f(x, MinInt::ZERO);
|
||||
f(x, I::FUZZ_LENGTHS[i] as u32);
|
||||
}
|
||||
}
|
||||
|
|
@ -257,3 +263,55 @@ pub fn fuzz_float_2<F: Float, E: Fn(F, F)>(n: u32, f: E) {
|
|||
f(x, y)
|
||||
}
|
||||
}
|
||||
|
||||
/// Perform an operation using builtin types if available, falling back to apfloat if not.
|
||||
#[macro_export]
|
||||
macro_rules! apfloat_fallback {
|
||||
(
|
||||
$float_ty:ty,
|
||||
// Type name in `rustc_apfloat::ieee`. Not a full path, it automatically gets the prefix.
|
||||
$apfloat_ty:ident,
|
||||
// Cfg expression for when builtin system operations should be used
|
||||
$sys_available:meta,
|
||||
// The expression to run. This expression may use `FloatTy` for its signature.
|
||||
// Optionally, the final conversion back to a float can be suppressed using
|
||||
// `=> no_convert` (for e.g. operations that return a bool).
|
||||
$op:expr $(=> $convert:ident)?,
|
||||
// Arguments that get passed to `$op` after converting to a float
|
||||
$($arg:expr),+
|
||||
$(,)?
|
||||
) => {{
|
||||
#[cfg($sys_available)]
|
||||
let ret = {
|
||||
type FloatTy = $float_ty;
|
||||
$op( $($arg),+ )
|
||||
};
|
||||
|
||||
#[cfg(not($sys_available))]
|
||||
let ret = {
|
||||
use rustc_apfloat::Float;
|
||||
type FloatTy = rustc_apfloat::ieee::$apfloat_ty;
|
||||
|
||||
let op_res = $op( $(FloatTy::from_bits($arg.to_bits().into())),+ );
|
||||
|
||||
apfloat_fallback!(@convert $float_ty, op_res $(,$convert)?)
|
||||
};
|
||||
|
||||
ret
|
||||
}};
|
||||
|
||||
// Operations that do not need converting back to a float
|
||||
(@convert $float_ty:ty, $val:expr, no_convert) => {
|
||||
$val
|
||||
};
|
||||
|
||||
// Some apfloat operations return a `StatusAnd` that we need to extract the value from. This
|
||||
// is the default.
|
||||
(@convert $float_ty:ty, $val:expr) => {{
|
||||
// ignore the status, just get the value
|
||||
let unwrapped = $val.value;
|
||||
|
||||
<$float_ty>::from_bits(FloatTy::to_bits(unwrapped).try_into().unwrap())
|
||||
}};
|
||||
|
||||
}
|
||||
|
|
|
|||
|
|
@ -1,4 +1,6 @@
|
|||
#![allow(unused_macros)]
|
||||
#![feature(f128)]
|
||||
#![feature(f16)]
|
||||
|
||||
use testcrate::*;
|
||||
|
||||
|
|
@ -71,28 +73,28 @@ fn addsub() {
|
|||
}
|
||||
|
||||
macro_rules! float_sum {
|
||||
($($f:ty, $fn_add:ident, $fn_sub:ident);*;) => {
|
||||
($($f:ty, $fn_add:ident, $fn_sub:ident, $apfloat_ty:ident, $sys_available:meta);*;) => {
|
||||
$(
|
||||
fuzz_float_2(N, |x: $f, y: $f| {
|
||||
let add0 = x + y;
|
||||
let sub0 = x - y;
|
||||
let add0 = apfloat_fallback!($f, $apfloat_ty, $sys_available, Add::add, x, y);
|
||||
let sub0 = apfloat_fallback!($f, $apfloat_ty, $sys_available, Sub::sub, x, y);
|
||||
let add1: $f = $fn_add(x, y);
|
||||
let sub1: $f = $fn_sub(x, y);
|
||||
if !Float::eq_repr(add0, add1) {
|
||||
panic!(
|
||||
"{}({}, {}): std: {}, builtins: {}",
|
||||
"{}({:?}, {:?}): std: {:?}, builtins: {:?}",
|
||||
stringify!($fn_add), x, y, add0, add1
|
||||
);
|
||||
}
|
||||
if !Float::eq_repr(sub0, sub1) {
|
||||
panic!(
|
||||
"{}({}, {}): std: {}, builtins: {}",
|
||||
"{}({:?}, {:?}): std: {:?}, builtins: {:?}",
|
||||
stringify!($fn_sub), x, y, sub0, sub1
|
||||
);
|
||||
}
|
||||
});
|
||||
)*
|
||||
};
|
||||
}
|
||||
}
|
||||
|
||||
#[cfg(not(all(target_arch = "x86", not(target_feature = "sse"))))]
|
||||
|
|
@ -103,11 +105,24 @@ fn float_addsub() {
|
|||
sub::{__subdf3, __subsf3},
|
||||
Float,
|
||||
};
|
||||
use core::ops::{Add, Sub};
|
||||
|
||||
float_sum!(
|
||||
f32, __addsf3, __subsf3;
|
||||
f64, __adddf3, __subdf3;
|
||||
f32, __addsf3, __subsf3, Single, all();
|
||||
f64, __adddf3, __subdf3, Double, all();
|
||||
);
|
||||
|
||||
#[cfg(not(feature = "no-f16-f128"))]
|
||||
{
|
||||
#[cfg(any(target_arch = "powerpc", target_arch = "powerpc64"))]
|
||||
use compiler_builtins::float::{add::__addkf3 as __addtf3, sub::__subkf3 as __subtf3};
|
||||
#[cfg(not(any(target_arch = "powerpc", target_arch = "powerpc64")))]
|
||||
use compiler_builtins::float::{add::__addtf3, sub::__subtf3};
|
||||
|
||||
float_sum!(
|
||||
f128, __addtf3, __subtf3, Quad, not(feature = "no-sys-f128");
|
||||
);
|
||||
}
|
||||
}
|
||||
|
||||
#[cfg(target_arch = "arm")]
|
||||
|
|
@ -118,9 +133,10 @@ fn float_addsub_arm() {
|
|||
sub::{__subdf3vfp, __subsf3vfp},
|
||||
Float,
|
||||
};
|
||||
use core::ops::{Add, Sub};
|
||||
|
||||
float_sum!(
|
||||
f32, __addsf3vfp, __subsf3vfp;
|
||||
f64, __adddf3vfp, __subdf3vfp;
|
||||
f32, __addsf3vfp, __subsf3vfp, Single, all();
|
||||
f64, __adddf3vfp, __subdf3vfp, Double, all();
|
||||
);
|
||||
}
|
||||
|
|
|
|||
61
library/compiler-builtins/testcrate/tests/big.rs
Normal file
61
library/compiler-builtins/testcrate/tests/big.rs
Normal file
|
|
@ -0,0 +1,61 @@
|
|||
use compiler_builtins::int::{i256, u256, HInt, MinInt};
|
||||
|
||||
const LOHI_SPLIT: u128 = 0xaaaaaaaaaaaaaaaaffffffffffffffff;
|
||||
|
||||
/// Print a `u256` as hex since we can't add format implementations
|
||||
fn hexu(v: u256) -> String {
|
||||
format!(
|
||||
"0x{:016x}{:016x}{:016x}{:016x}",
|
||||
v.0[3], v.0[2], v.0[1], v.0[0]
|
||||
)
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn widen_u128() {
|
||||
assert_eq!(u128::MAX.widen(), u256([u64::MAX, u64::MAX, 0, 0]));
|
||||
assert_eq!(
|
||||
LOHI_SPLIT.widen(),
|
||||
u256([u64::MAX, 0xaaaaaaaaaaaaaaaa, 0, 0])
|
||||
);
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn widen_i128() {
|
||||
assert_eq!((-1i128).widen(), u256::MAX.signed());
|
||||
assert_eq!(
|
||||
(LOHI_SPLIT as i128).widen(),
|
||||
i256([u64::MAX, 0xaaaaaaaaaaaaaaaa, u64::MAX, u64::MAX])
|
||||
);
|
||||
assert_eq!((-1i128).zero_widen().unsigned(), (u128::MAX).widen());
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn widen_mul_u128() {
|
||||
let tests = [
|
||||
(u128::MAX / 2, 2_u128, u256([u64::MAX - 1, u64::MAX, 0, 0])),
|
||||
(u128::MAX, 2_u128, u256([u64::MAX - 1, u64::MAX, 1, 0])),
|
||||
(u128::MAX, u128::MAX, u256([1, 0, u64::MAX - 1, u64::MAX])),
|
||||
(u128::MIN, u128::MIN, u256::ZERO),
|
||||
(1234, 0, u256::ZERO),
|
||||
(0, 1234, u256::ZERO),
|
||||
];
|
||||
|
||||
let mut errors = Vec::new();
|
||||
for (i, (a, b, exp)) in tests.iter().copied().enumerate() {
|
||||
let res = a.widen_mul(b);
|
||||
let res_z = a.zero_widen_mul(b);
|
||||
assert_eq!(res, res_z);
|
||||
if res != exp {
|
||||
errors.push((i, a, b, exp, res));
|
||||
}
|
||||
}
|
||||
|
||||
for (i, a, b, exp, res) in &errors {
|
||||
eprintln!(
|
||||
"FAILURE ({i}): {a:#034x} * {b:#034x} = {} got {}",
|
||||
hexu(*exp),
|
||||
hexu(*res)
|
||||
);
|
||||
}
|
||||
assert!(errors.is_empty());
|
||||
}
|
||||
|
|
@ -1,23 +1,50 @@
|
|||
#![allow(unused_macros)]
|
||||
#![allow(unreachable_code)]
|
||||
#![feature(f128)]
|
||||
#![feature(f16)]
|
||||
|
||||
#[cfg(not(target_arch = "powerpc64"))]
|
||||
use testcrate::*;
|
||||
|
||||
macro_rules! cmp {
|
||||
($x:ident, $y:ident, $($unordered_val:expr, $fn:ident);*;) => {
|
||||
(
|
||||
$f:ty, $x:ident, $y:ident, $apfloat_ty:ident, $sys_available:meta,
|
||||
$($unordered_val:expr, $fn:ident);*;
|
||||
) => {
|
||||
$(
|
||||
let cmp0 = if $x.is_nan() || $y.is_nan() {
|
||||
let cmp0 = if apfloat_fallback!(
|
||||
$f, $apfloat_ty, $sys_available,
|
||||
|x: FloatTy| x.is_nan() => no_convert,
|
||||
$x
|
||||
) || apfloat_fallback!(
|
||||
$f, $apfloat_ty, $sys_available,
|
||||
|y: FloatTy| y.is_nan() => no_convert,
|
||||
$y
|
||||
)
|
||||
{
|
||||
$unordered_val
|
||||
} else if $x < $y {
|
||||
} else if apfloat_fallback!(
|
||||
$f, $apfloat_ty, $sys_available,
|
||||
|x, y| x < y => no_convert,
|
||||
$x, $y
|
||||
) {
|
||||
-1
|
||||
} else if $x == $y {
|
||||
} else if apfloat_fallback!(
|
||||
$f, $apfloat_ty, $sys_available,
|
||||
|x, y| x == y => no_convert,
|
||||
$x, $y
|
||||
) {
|
||||
0
|
||||
} else {
|
||||
1
|
||||
};
|
||||
|
||||
let cmp1 = $fn($x, $y);
|
||||
if cmp0 != cmp1 {
|
||||
panic!("{}({}, {}): std: {}, builtins: {}", stringify!($fn_builtins), $x, $y, cmp0, cmp1);
|
||||
panic!(
|
||||
"{}({:?}, {:?}): std: {:?}, builtins: {:?}",
|
||||
stringify!($fn), $x, $y, cmp0, cmp1
|
||||
);
|
||||
}
|
||||
)*
|
||||
};
|
||||
|
|
@ -34,7 +61,7 @@ fn float_comparisons() {
|
|||
|
||||
fuzz_float_2(N, |x: f32, y: f32| {
|
||||
assert_eq!(__unordsf2(x, y) != 0, x.is_nan() || y.is_nan());
|
||||
cmp!(x, y,
|
||||
cmp!(f32, x, y, Single, all(),
|
||||
1, __ltsf2;
|
||||
1, __lesf2;
|
||||
1, __eqsf2;
|
||||
|
|
@ -45,7 +72,7 @@ fn float_comparisons() {
|
|||
});
|
||||
fuzz_float_2(N, |x: f64, y: f64| {
|
||||
assert_eq!(__unorddf2(x, y) != 0, x.is_nan() || y.is_nan());
|
||||
cmp!(x, y,
|
||||
cmp!(f64, x, y, Double, all(),
|
||||
1, __ltdf2;
|
||||
1, __ledf2;
|
||||
1, __eqdf2;
|
||||
|
|
@ -54,6 +81,44 @@ fn float_comparisons() {
|
|||
1, __nedf2;
|
||||
);
|
||||
});
|
||||
|
||||
#[cfg(not(feature = "no-f16-f128"))]
|
||||
{
|
||||
#[cfg(any(target_arch = "powerpc", target_arch = "powerpc64"))]
|
||||
use compiler_builtins::float::cmp::{
|
||||
__eqkf2 as __eqtf2, __gekf2 as __getf2, __gtkf2 as __gttf2, __lekf2 as __letf2,
|
||||
__ltkf2 as __lttf2, __nekf2 as __netf2, __unordkf2 as __unordtf2,
|
||||
};
|
||||
|
||||
#[cfg(not(any(target_arch = "powerpc", target_arch = "powerpc64")))]
|
||||
use compiler_builtins::float::cmp::{
|
||||
__eqtf2, __getf2, __gttf2, __letf2, __lttf2, __netf2, __unordtf2,
|
||||
};
|
||||
|
||||
fuzz_float_2(N, |x: f128, y: f128| {
|
||||
let x_is_nan = apfloat_fallback!(
|
||||
f128, Quad, not(feature = "no-sys-f128"),
|
||||
|x: FloatTy| x.is_nan() => no_convert,
|
||||
x
|
||||
);
|
||||
let y_is_nan = apfloat_fallback!(
|
||||
f128, Quad, not(feature = "no-sys-f128"),
|
||||
|x: FloatTy| x.is_nan() => no_convert,
|
||||
y
|
||||
);
|
||||
|
||||
assert_eq!(__unordtf2(x, y) != 0, x_is_nan || y_is_nan);
|
||||
|
||||
cmp!(f128, x, y, Quad, not(feature = "no-sys-f128"),
|
||||
1, __lttf2;
|
||||
1, __letf2;
|
||||
1, __eqtf2;
|
||||
-1, __getf2;
|
||||
-1, __gttf2;
|
||||
1, __netf2;
|
||||
);
|
||||
});
|
||||
}
|
||||
}
|
||||
|
||||
macro_rules! cmp2 {
|
||||
|
|
|
|||
|
|
@ -187,9 +187,15 @@ fn float_extend() {
|
|||
conv!(f32, f64, __extendsfdf2, Single, Double);
|
||||
#[cfg(not(feature = "no-f16-f128"))]
|
||||
{
|
||||
#[cfg(any(target_arch = "powerpc", target_arch = "powerpc64"))]
|
||||
use compiler_builtins::float::extend::{
|
||||
__extenddftf2, __extendhfsf2, __extendhftf2, __extendsftf2, __gnu_h2f_ieee,
|
||||
__extenddfkf2 as __extenddftf2, __extendhfkf2 as __extendhftf2,
|
||||
__extendsfkf2 as __extendsftf2,
|
||||
};
|
||||
#[cfg(not(any(target_arch = "powerpc", target_arch = "powerpc64")))]
|
||||
use compiler_builtins::float::extend::{__extenddftf2, __extendhftf2, __extendsftf2};
|
||||
use compiler_builtins::float::extend::{__extendhfsf2, __gnu_h2f_ieee};
|
||||
|
||||
// FIXME(f16_f128): Also do extend!() for `f16` and `f128` when builtins are in nightly
|
||||
conv!(f16, f32, __extendhfsf2, Half, Single);
|
||||
conv!(f16, f32, __gnu_h2f_ieee, Half, Single);
|
||||
|
|
@ -234,9 +240,15 @@ fn float_trunc() {
|
|||
conv!(f64, f32, __truncdfsf2, Double, Single);
|
||||
#[cfg(not(feature = "no-f16-f128"))]
|
||||
{
|
||||
use compiler_builtins::float::trunc::{__gnu_f2h_ieee, __truncdfhf2, __truncsfhf2};
|
||||
#[cfg(any(target_arch = "powerpc", target_arch = "powerpc64"))]
|
||||
use compiler_builtins::float::trunc::{
|
||||
__gnu_f2h_ieee, __truncdfhf2, __truncsfhf2, __trunctfdf2, __trunctfhf2, __trunctfsf2,
|
||||
__trunckfdf2 as __trunctfdf2, __trunckfhf2 as __trunctfhf2,
|
||||
__trunckfsf2 as __trunctfsf2,
|
||||
};
|
||||
#[cfg(not(any(target_arch = "powerpc", target_arch = "powerpc64")))]
|
||||
use compiler_builtins::float::trunc::{__trunctfdf2, __trunctfhf2, __trunctfsf2};
|
||||
|
||||
// FIXME(f16_f128): Also do trunc!() for `f16` and `f128` when builtins are in nightly
|
||||
conv!(f32, f16, __truncsfhf2, Single, Half);
|
||||
conv!(f32, f16, __gnu_f2h_ieee, Single, Half);
|
||||
|
|
|
|||
|
|
@ -2,6 +2,7 @@
|
|||
|
||||
use compiler_builtins::int::sdiv::{__divmoddi4, __divmodsi4, __divmodti4};
|
||||
use compiler_builtins::int::udiv::{__udivmoddi4, __udivmodsi4, __udivmodti4, u128_divide_sparc};
|
||||
|
||||
use testcrate::*;
|
||||
|
||||
// Division algorithms have by far the nastiest and largest number of edge cases, and experience shows
|
||||
|
|
@ -104,16 +105,20 @@ fn divide_sparc() {
|
|||
}
|
||||
|
||||
macro_rules! float {
|
||||
($($i:ty, $fn:ident);*;) => {
|
||||
($($f:ty, $fn:ident, $apfloat_ty:ident, $sys_available:meta);*;) => {
|
||||
$(
|
||||
fuzz_float_2(N, |x: $i, y: $i| {
|
||||
let quo0 = x / y;
|
||||
let quo1: $i = $fn(x, y);
|
||||
fuzz_float_2(N, |x: $f, y: $f| {
|
||||
let quo0: $f = apfloat_fallback!($f, $apfloat_ty, $sys_available, Div::div, x, y);
|
||||
let quo1: $f = $fn(x, y);
|
||||
#[cfg(not(target_arch = "arm"))]
|
||||
if !Float::eq_repr(quo0, quo1) {
|
||||
panic!(
|
||||
"{}({}, {}): std: {}, builtins: {}",
|
||||
stringify!($fn), x, y, quo0, quo1
|
||||
"{}({:?}, {:?}): std: {:?}, builtins: {:?}",
|
||||
stringify!($fn),
|
||||
x,
|
||||
y,
|
||||
quo0,
|
||||
quo1
|
||||
);
|
||||
}
|
||||
|
||||
|
|
@ -122,8 +127,12 @@ macro_rules! float {
|
|||
if !(Float::is_subnormal(quo0) || Float::is_subnormal(quo1)) {
|
||||
if !Float::eq_repr(quo0, quo1) {
|
||||
panic!(
|
||||
"{}({}, {}): std: {}, builtins: {}",
|
||||
stringify!($fn), x, y, quo0, quo1
|
||||
"{}({:?}, {:?}): std: {:?}, builtins: {:?}",
|
||||
stringify!($fn),
|
||||
x,
|
||||
y,
|
||||
quo0,
|
||||
quo1
|
||||
);
|
||||
}
|
||||
}
|
||||
|
|
@ -139,10 +148,11 @@ fn float_div() {
|
|||
div::{__divdf3, __divsf3},
|
||||
Float,
|
||||
};
|
||||
use core::ops::Div;
|
||||
|
||||
float!(
|
||||
f32, __divsf3;
|
||||
f64, __divdf3;
|
||||
f32, __divsf3, Single, all();
|
||||
f64, __divdf3, Double, all();
|
||||
);
|
||||
}
|
||||
|
||||
|
|
@ -153,9 +163,10 @@ fn float_div_arm() {
|
|||
div::{__divdf3vfp, __divsf3vfp},
|
||||
Float,
|
||||
};
|
||||
use core::ops::Div;
|
||||
|
||||
float!(
|
||||
f32, __divsf3vfp;
|
||||
f64, __divdf3vfp;
|
||||
f32, __divsf3vfp, Single, all();
|
||||
f64, __divdf3vfp, Double, all();
|
||||
);
|
||||
}
|
||||
|
|
|
|||
|
|
@ -1,4 +1,6 @@
|
|||
#![allow(unused_macros)]
|
||||
#![feature(f128)]
|
||||
#![feature(f16)]
|
||||
|
||||
use testcrate::*;
|
||||
|
||||
|
|
@ -82,16 +84,16 @@ fn overflowing_mul() {
|
|||
}
|
||||
|
||||
macro_rules! float_mul {
|
||||
($($f:ty, $fn:ident);*;) => {
|
||||
($($f:ty, $fn:ident, $apfloat_ty:ident, $sys_available:meta);*;) => {
|
||||
$(
|
||||
fuzz_float_2(N, |x: $f, y: $f| {
|
||||
let mul0 = x * y;
|
||||
let mul0 = apfloat_fallback!($f, $apfloat_ty, $sys_available, Mul::mul, x, y);
|
||||
let mul1: $f = $fn(x, y);
|
||||
// multiplication of subnormals is not currently handled
|
||||
if !(Float::is_subnormal(mul0) || Float::is_subnormal(mul1)) {
|
||||
if !Float::eq_repr(mul0, mul1) {
|
||||
panic!(
|
||||
"{}({}, {}): std: {}, builtins: {}",
|
||||
"{}({:?}, {:?}): std: {:?}, builtins: {:?}",
|
||||
stringify!($fn), x, y, mul0, mul1
|
||||
);
|
||||
}
|
||||
|
|
@ -108,11 +110,27 @@ fn float_mul() {
|
|||
mul::{__muldf3, __mulsf3},
|
||||
Float,
|
||||
};
|
||||
use core::ops::Mul;
|
||||
|
||||
float_mul!(
|
||||
f32, __mulsf3;
|
||||
f64, __muldf3;
|
||||
f32, __mulsf3, Single, all();
|
||||
f64, __muldf3, Double, all();
|
||||
);
|
||||
|
||||
#[cfg(not(feature = "no-f16-f128"))]
|
||||
{
|
||||
#[cfg(any(target_arch = "powerpc", target_arch = "powerpc64"))]
|
||||
use compiler_builtins::float::mul::__mulkf3 as __multf3;
|
||||
#[cfg(not(any(target_arch = "powerpc", target_arch = "powerpc64")))]
|
||||
use compiler_builtins::float::mul::__multf3;
|
||||
|
||||
float_mul!(
|
||||
f128, __multf3, Quad,
|
||||
// FIXME(llvm): there is a bug in LLVM rt.
|
||||
// See <https://github.com/llvm/llvm-project/issues/91840>.
|
||||
not(any(feature = "no-sys-f128", all(target_arch = "aarch64", target_os = "linux")));
|
||||
);
|
||||
}
|
||||
}
|
||||
|
||||
#[cfg(target_arch = "arm")]
|
||||
|
|
@ -122,9 +140,10 @@ fn float_mul_arm() {
|
|||
mul::{__muldf3vfp, __mulsf3vfp},
|
||||
Float,
|
||||
};
|
||||
use core::ops::Mul;
|
||||
|
||||
float_mul!(
|
||||
f32, __mulsf3vfp;
|
||||
f64, __muldf3vfp;
|
||||
f32, __mulsf3vfp, Single, all();
|
||||
f64, __muldf3vfp, Double, all();
|
||||
);
|
||||
}
|
||||
|
|
|
|||
Loading…
Add table
Add a link
Reference in a new issue