Merge pull request #622 from tgross35/f128-div

Add `__divtf3`
This commit is contained in:
Trevor Gross 2024-09-24 18:40:16 +02:00 committed by GitHub
commit 608fd00051
No known key found for this signature in database
GPG key ID: B5690EEEBB952194
10 changed files with 548 additions and 695 deletions

View file

@ -222,7 +222,7 @@ of being added to Rust.
- [x] addtf3.c
- [x] comparetf2.c
- [ ] divtf3.c
- [x] divtf3.c
- [x] extenddftf2.c
- [x] extendhfsf2.c
- [x] extendhftf2.c

View file

@ -526,7 +526,6 @@ mod c {
("__floatsitf", "floatsitf.c"),
("__floatunditf", "floatunditf.c"),
("__floatunsitf", "floatunsitf.c"),
("__divtf3", "divtf3.c"),
("__powitf2", "powitf2.c"),
("__fe_getround", "fp_mode.c"),
("__fe_raise_inexact", "fp_mode.c"),

View file

@ -256,6 +256,10 @@ mod intrinsics {
a * b
}
pub fn divtf(a: f128, b: f128) -> f128 {
a / b
}
pub fn subtf(a: f128, b: f128) -> f128 {
a - b
}
@ -440,6 +444,7 @@ fn run() {
bb(aeabi_uldivmod(bb(2), bb(3)));
bb(ashlti3(bb(2), bb(2)));
bb(ashrti3(bb(2), bb(2)));
bb(divtf(bb(2.), bb(2.)));
bb(divti3(bb(2), bb(2)));
bb(eqtf(bb(2.), bb(2.)));
bb(extendhfdf(bb(2.)));

File diff suppressed because it is too large Load diff

View file

@ -31,10 +31,10 @@ pub(crate) trait Float:
+ ops::Rem<Output = Self>
{
/// A uint of the same width as the float
type Int: Int;
type Int: Int<OtherSign = Self::SignedInt, UnsignedInt = Self::Int>;
/// A int of the same width as the float
type SignedInt: Int;
type SignedInt: Int + MinInt<OtherSign = Self::Int, UnsignedInt = Self::Int>;
/// An int capable of containing the exponent bits plus a sign bit. This is signed.
type ExpInt: Int;
@ -51,7 +51,7 @@ pub(crate) trait Float:
/// The bitwidth of the exponent
const EXPONENT_BITS: u32 = Self::BITS - Self::SIGNIFICAND_BITS - 1;
/// The maximum value of the exponent
/// The saturated value of the exponent (infinite representation), in the rightmost postiion.
const EXPONENT_MAX: u32 = (1 << Self::EXPONENT_BITS) - 1;
/// The exponent bias value
@ -83,7 +83,7 @@ pub(crate) trait Float:
/// Returns true if the sign is negative
fn is_sign_negative(self) -> bool;
/// Returns the exponent with bias
/// Returns the exponent, not adjusting for bias.
fn exp(self) -> Self::ExpInt;
/// Returns the significand with no implicit bit (or the "fractional" part)
@ -175,7 +175,7 @@ macro_rules! float_impl {
fn normalize(significand: Self::Int) -> (i32, Self::Int) {
let shift = significand
.leading_zeros()
.wrapping_sub((Self::Int::ONE << Self::SIGNIFICAND_BITS).leading_zeros());
.wrapping_sub(Self::EXPONENT_BITS);
(
1i32.wrapping_sub(shift as i32),
significand << shift as Self::Int,

View file

@ -93,7 +93,7 @@ macro_rules! impl_common {
type Output = Self;
fn shl(self, rhs: u32) -> Self::Output {
todo!()
unimplemented!("only used to meet trait bounds")
}
}
};
@ -102,6 +102,41 @@ macro_rules! impl_common {
impl_common!(i256);
impl_common!(u256);
impl ops::Shr<u32> for u256 {
type Output = Self;
fn shr(self, rhs: u32) -> Self::Output {
assert!(rhs < Self::BITS, "attempted to shift right with overflow");
if rhs == 0 {
return self;
}
let mut ret = self;
let byte_shift = rhs / 64;
let bit_shift = rhs % 64;
for idx in 0..4 {
let base_idx = idx + byte_shift as usize;
let Some(base) = ret.0.get(base_idx) else {
ret.0[idx] = 0;
continue;
};
let mut new_val = base >> bit_shift;
if let Some(new) = ret.0.get(base_idx + 1) {
new_val |= new.overflowing_shl(64 - bit_shift).0;
}
ret.0[idx] = new_val;
}
ret
}
}
macro_rules! word {
(1, $val:expr) => {
(($val >> (32 * 3)) & Self::from(WORD_LO_MASK)) as u64

View file

@ -1,5 +1,7 @@
#![cfg_attr(f128_enabled, feature(f128))]
use compiler_builtins::float::div;
use criterion::{criterion_group, criterion_main, Criterion};
use criterion::{criterion_main, Criterion};
use testcrate::float_bench;
float_bench! {
@ -64,5 +66,28 @@ float_bench! {
],
}
criterion_group!(float_div, div_f32, div_f64);
#[cfg(f128_enabled)]
float_bench! {
name: div_f128,
sig: (a: f128, b: f128) -> f128,
crate_fn: div::__divtf3,
crate_fn_ppc: div::__divkf3,
sys_fn: __divtf3,
sys_fn_ppc: __divkf3,
sys_available: not(feature = "no-sys-f128"),
asm: []
}
pub fn float_div() {
let mut criterion = Criterion::default().configure_from_args();
div_f32(&mut criterion);
div_f64(&mut criterion);
#[cfg(f128_enabled)]
{
div_f128(&mut criterion);
}
}
criterion_main!(float_div);

View file

@ -30,13 +30,14 @@ pub fn skip_sys_checks(test_name: &str) -> bool {
// FIXME(f16_f128): system symbols have incorrect results
// <https://github.com/rust-lang/compiler-builtins/issues/617#issuecomment-2125914639>
const X86_NO_SSE_SKIPPED: &[&str] =
&["add_f128", "sub_f128", "mul_f128", "powi_f32", "powi_f64"];
const X86_NO_SSE_SKIPPED: &[&str] = &[
"add_f128", "sub_f128", "mul_f128", "div_f128", "powi_f32", "powi_f64",
];
// FIXME(f16_f128): Wide multiply carry bug in `compiler-rt`, re-enable when nightly no longer
// uses `compiler-rt` version.
// <https://github.com/llvm/llvm-project/issues/91840>
const AARCH64_SKIPPED: &[&str] = &["mul_f128"];
const AARCH64_SKIPPED: &[&str] = &["mul_f128", "div_f128"];
// FIXME(llvm): system symbols have incorrect results on Windows
// <https://github.com/rust-lang/compiler-builtins/issues/617#issuecomment-2121359807>

View file

@ -59,3 +59,76 @@ fn widen_mul_u128() {
}
assert!(errors.is_empty());
}
#[test]
fn not_u128() {
assert_eq!(!u256::ZERO, u256::MAX);
}
#[test]
fn shr_u128() {
let only_low = [
1,
u16::MAX.into(),
u32::MAX.into(),
u64::MAX.into(),
u128::MAX,
];
let mut errors = Vec::new();
for a in only_low {
for perturb in 0..10 {
let a = a.saturating_add(perturb);
for shift in 0..128 {
let res = a.widen() >> shift;
let expected = (a >> shift).widen();
if res != expected {
errors.push((a.widen(), shift, res, expected));
}
}
}
}
let check = [
(
u256::MAX,
1,
u256([u64::MAX, u64::MAX, u64::MAX, u64::MAX >> 1]),
),
(
u256::MAX,
5,
u256([u64::MAX, u64::MAX, u64::MAX, u64::MAX >> 5]),
),
(u256::MAX, 63, u256([u64::MAX, u64::MAX, u64::MAX, 1])),
(u256::MAX, 64, u256([u64::MAX, u64::MAX, u64::MAX, 0])),
(u256::MAX, 65, u256([u64::MAX, u64::MAX, u64::MAX >> 1, 0])),
(u256::MAX, 127, u256([u64::MAX, u64::MAX, 1, 0])),
(u256::MAX, 128, u256([u64::MAX, u64::MAX, 0, 0])),
(u256::MAX, 129, u256([u64::MAX, u64::MAX >> 1, 0, 0])),
(u256::MAX, 191, u256([u64::MAX, 1, 0, 0])),
(u256::MAX, 192, u256([u64::MAX, 0, 0, 0])),
(u256::MAX, 193, u256([u64::MAX >> 1, 0, 0, 0])),
(u256::MAX, 191, u256([u64::MAX, 1, 0, 0])),
(u256::MAX, 254, u256([0b11, 0, 0, 0])),
(u256::MAX, 255, u256([1, 0, 0, 0])),
];
for (input, shift, expected) in check {
let res = input >> shift;
if res != expected {
errors.push((input, shift, res, expected));
}
}
for (a, b, res, expected) in &errors {
eprintln!(
"FAILURE: {} >> {b} = {} got {}",
hexu(*a),
hexu(*expected),
hexu(*res),
);
}
assert!(errors.is_empty());
}

View file

@ -1,3 +1,4 @@
#![feature(f128)]
#![allow(unused_macros)]
use compiler_builtins::int::sdiv::{__divmoddi4, __divmodsi4, __divmodti4};
@ -115,7 +116,13 @@ macro_rules! float {
fuzz_float_2(N, |x: $f, y: $f| {
let quo0: $f = apfloat_fallback!($f, $apfloat_ty, $sys_available, Div::div, x, y);
let quo1: $f = $fn(x, y);
#[cfg(not(target_arch = "arm"))]
// ARM SIMD instructions always flush subnormals to zero
if cfg!(target_arch = "arm") &&
((Float::is_subnormal(quo0)) || Float::is_subnormal(quo1)) {
return;
}
if !Float::eq_repr(quo0, quo1) {
panic!(
"{}({:?}, {:?}): std: {:?}, builtins: {:?}",
@ -126,21 +133,6 @@ macro_rules! float {
quo1
);
}
// ARM SIMD instructions always flush subnormals to zero
#[cfg(target_arch = "arm")]
if !(Float::is_subnormal(quo0) || Float::is_subnormal(quo1)) {
if !Float::eq_repr(quo0, quo1) {
panic!(
"{}({:?}, {:?}): std: {:?}, builtins: {:?}",
stringify!($fn),
x,
y,
quo0,
quo1
);
}
}
});
}
)*
@ -155,4 +147,19 @@ mod float_div {
f32, __divsf3, Single, all();
f64, __divdf3, Double, all();
}
#[cfg(not(feature = "no-f16-f128"))]
#[cfg(not(any(target_arch = "powerpc", target_arch = "powerpc64")))]
float! {
f128, __divtf3, Quad,
// FIXME(llvm): there is a bug in LLVM rt.
// See <https://github.com/llvm/llvm-project/issues/91840>.
not(any(feature = "no-sys-f128", all(target_arch = "aarch64", target_os = "linux")));
}
#[cfg(not(feature = "no-f16-f128"))]
#[cfg(any(target_arch = "powerpc", target_arch = "powerpc64"))]
float! {
f128, __divkf3, Quad, not(feature = "no-sys-f128");
}
}