Merge pull request #624 from tgross35/f128-int-to-float
Add f128 int to float conversions
This commit is contained in:
commit
717128af3c
8 changed files with 561 additions and 167 deletions
|
|
@ -233,12 +233,12 @@ of being added to Rust.
|
|||
- [x] fixunstfdi.c
|
||||
- [x] fixunstfsi.c
|
||||
- [x] fixunstfti.c
|
||||
- [ ] floatditf.c
|
||||
- [ ] floatsitf.c
|
||||
- [ ] floattitf.c
|
||||
- [ ] floatunditf.c
|
||||
- [ ] floatunsitf.c
|
||||
- [ ] floatuntitf.c
|
||||
- [x] floatditf.c
|
||||
- [x] floatsitf.c
|
||||
- [x] floattitf.c
|
||||
- [x] floatunditf.c
|
||||
- [x] floatunsitf.c
|
||||
- [x] floatuntitf.c
|
||||
- [x] multf3.c
|
||||
- [x] powitf2.c
|
||||
- [x] subtf3.c
|
||||
|
|
|
|||
|
|
@ -532,10 +532,6 @@ mod c {
|
|||
if (target.arch == "aarch64" || target.arch == "arm64ec") && consider_float_intrinsics {
|
||||
sources.extend(&[
|
||||
("__comparetf2", "comparetf2.c"),
|
||||
("__floatditf", "floatditf.c"),
|
||||
("__floatsitf", "floatsitf.c"),
|
||||
("__floatunditf", "floatunditf.c"),
|
||||
("__floatunsitf", "floatunsitf.c"),
|
||||
("__fe_getround", "fp_mode.c"),
|
||||
("__fe_raise_inexact", "fp_mode.c"),
|
||||
]);
|
||||
|
|
@ -550,21 +546,11 @@ mod c {
|
|||
}
|
||||
|
||||
if target.arch == "mips64" {
|
||||
sources.extend(&[
|
||||
("__netf2", "comparetf2.c"),
|
||||
("__floatsitf", "floatsitf.c"),
|
||||
("__floatunsitf", "floatunsitf.c"),
|
||||
("__fe_getround", "fp_mode.c"),
|
||||
]);
|
||||
sources.extend(&[("__netf2", "comparetf2.c"), ("__fe_getround", "fp_mode.c")]);
|
||||
}
|
||||
|
||||
if target.arch == "loongarch64" {
|
||||
sources.extend(&[
|
||||
("__netf2", "comparetf2.c"),
|
||||
("__floatsitf", "floatsitf.c"),
|
||||
("__floatunsitf", "floatunsitf.c"),
|
||||
("__fe_getround", "fp_mode.c"),
|
||||
]);
|
||||
sources.extend(&[("__netf2", "comparetf2.c"), ("__fe_getround", "fp_mode.c")]);
|
||||
}
|
||||
|
||||
// Remove the assembly implementations that won't compile for the target
|
||||
|
|
|
|||
|
|
@ -264,14 +264,18 @@ mod intrinsics {
|
|||
|
||||
/* i32 operations */
|
||||
|
||||
// floatsisf
|
||||
pub fn aeabi_i2f(x: i32) -> f32 {
|
||||
x as f32
|
||||
}
|
||||
|
||||
// floatsidf
|
||||
pub fn aeabi_i2d(x: i32) -> f64 {
|
||||
x as f64
|
||||
}
|
||||
|
||||
// floatsisf
|
||||
pub fn aeabi_i2f(x: i32) -> f32 {
|
||||
x as f32
|
||||
pub fn floatsitf(x: i32) -> f128 {
|
||||
x as f128
|
||||
}
|
||||
|
||||
pub fn aeabi_idiv(a: i32, b: i32) -> i32 {
|
||||
|
|
@ -294,6 +298,10 @@ mod intrinsics {
|
|||
x as f64
|
||||
}
|
||||
|
||||
pub fn floatditf(x: i64) -> f128 {
|
||||
x as f128
|
||||
}
|
||||
|
||||
pub fn mulodi4(a: i64, b: i64) -> i64 {
|
||||
a * b
|
||||
}
|
||||
|
|
@ -314,6 +322,18 @@ mod intrinsics {
|
|||
|
||||
/* i128 operations */
|
||||
|
||||
pub fn floattisf(x: i128) -> f32 {
|
||||
x as f32
|
||||
}
|
||||
|
||||
pub fn floattidf(x: i128) -> f64 {
|
||||
x as f64
|
||||
}
|
||||
|
||||
pub fn floattitf(x: i128) -> f128 {
|
||||
x as f128
|
||||
}
|
||||
|
||||
pub fn lshrti3(a: i128, b: usize) -> i128 {
|
||||
a >> b
|
||||
}
|
||||
|
|
@ -328,14 +348,18 @@ mod intrinsics {
|
|||
|
||||
/* u32 operations */
|
||||
|
||||
// floatunsisf
|
||||
pub fn aeabi_ui2f(x: u32) -> f32 {
|
||||
x as f32
|
||||
}
|
||||
|
||||
// floatunsidf
|
||||
pub fn aeabi_ui2d(x: u32) -> f64 {
|
||||
x as f64
|
||||
}
|
||||
|
||||
// floatunsisf
|
||||
pub fn aeabi_ui2f(x: u32) -> f32 {
|
||||
x as f32
|
||||
pub fn floatunsitf(x: u32) -> f128 {
|
||||
x as f128
|
||||
}
|
||||
|
||||
pub fn aeabi_uidiv(a: u32, b: u32) -> u32 {
|
||||
|
|
@ -358,6 +382,10 @@ mod intrinsics {
|
|||
x as f64
|
||||
}
|
||||
|
||||
pub fn floatunditf(x: u64) -> f128 {
|
||||
x as f128
|
||||
}
|
||||
|
||||
// udivdi3
|
||||
pub fn aeabi_uldivmod(a: u64, b: u64) -> u64 {
|
||||
a * b
|
||||
|
|
@ -369,6 +397,18 @@ mod intrinsics {
|
|||
|
||||
/* u128 operations */
|
||||
|
||||
pub fn floatuntisf(x: u128) -> f32 {
|
||||
x as f32
|
||||
}
|
||||
|
||||
pub fn floatuntidf(x: u128) -> f64 {
|
||||
x as f64
|
||||
}
|
||||
|
||||
pub fn floatuntitf(x: u128) -> f128 {
|
||||
x as f128
|
||||
}
|
||||
|
||||
pub fn muloti4(a: u128, b: u128) -> Option<u128> {
|
||||
a.checked_mul(b)
|
||||
}
|
||||
|
|
@ -466,6 +506,16 @@ fn run() {
|
|||
bb(fixunstfsi(bb(2.)));
|
||||
#[cfg(not(any(target_arch = "powerpc", target_arch = "powerpc64")))]
|
||||
bb(fixunstfti(bb(2.)));
|
||||
bb(floatditf(bb(2)));
|
||||
bb(floatsitf(bb(2)));
|
||||
bb(floattidf(bb(2)));
|
||||
bb(floattisf(bb(2)));
|
||||
bb(floattitf(bb(2)));
|
||||
bb(floatunditf(bb(2)));
|
||||
bb(floatunsitf(bb(2)));
|
||||
bb(floatuntidf(bb(2)));
|
||||
bb(floatuntisf(bb(2)));
|
||||
bb(floatuntitf(bb(2)));
|
||||
bb(gttf(bb(2.), bb(2.)));
|
||||
bb(lshrti3(bb(2), bb(2)));
|
||||
bb(lttf(bb(2.), bb(2.)));
|
||||
|
|
|
|||
|
|
@ -6,21 +6,91 @@ use super::Float;
|
|||
|
||||
/// Conversions from integers to floats.
|
||||
///
|
||||
/// These are hand-optimized bit twiddling code,
|
||||
/// which unfortunately isn't the easiest kind of code to read.
|
||||
/// The algorithm is explained here: <https://blog.m-ou.se/floats/>. It roughly does the following:
|
||||
/// - Calculate a base mantissa by shifting the integer into mantissa position. This gives us a
|
||||
/// mantissa _with the implicit bit set_!
|
||||
/// - Figure out if rounding needs to occur by classifying the bits that are to be truncated. Some
|
||||
/// patterns are used to simplify this. Adjust the mantissa with the result if needed.
|
||||
/// - Calculate the exponent based on the base-2 logarithm of `i` (leading zeros). Subtract one.
|
||||
/// - Shift the exponent and add the mantissa to create the final representation. Subtracting one
|
||||
/// from the exponent (above) accounts for the explicit bit being set in the mantissa.
|
||||
///
|
||||
/// The algorithm is explained here: <https://blog.m-ou.se/floats/>
|
||||
/// # Terminology
|
||||
///
|
||||
/// - `i`: the original integer
|
||||
/// - `i_m`: the integer, shifted fully left (no leading zeros)
|
||||
/// - `n`: number of leading zeroes
|
||||
/// - `e`: the resulting exponent. Usually 1 is subtracted to offset the mantissa implicit bit.
|
||||
/// - `m_base`: the mantissa before adjusting for truncated bits. Implicit bit is usually set.
|
||||
/// - `adj`: the bits that will be truncated, possibly compressed in some way.
|
||||
/// - `m`: the resulting mantissa. Implicit bit is usually set.
|
||||
mod int_to_float {
|
||||
use super::*;
|
||||
|
||||
/// Calculate the exponent from the number of leading zeros.
|
||||
///
|
||||
/// Usually 1 is subtracted from this function's result, so that a mantissa with the implicit
|
||||
/// bit set can be added back later.
|
||||
fn exp<I: Int, F: Float<Int: CastFrom<u32>>>(n: u32) -> F::Int {
|
||||
F::Int::cast_from(F::EXPONENT_BIAS - 1 + I::BITS - n)
|
||||
}
|
||||
|
||||
/// Adjust a mantissa with dropped bits to perform correct rounding.
|
||||
///
|
||||
/// The dropped bits should be exactly the bits that get truncated (left-aligned), but they
|
||||
/// can be combined or compressed in some way that simplifies operations.
|
||||
fn m_adj<F: Float>(m_base: F::Int, dropped_bits: F::Int) -> F::Int {
|
||||
// Branchlessly extract a `1` if rounding up should happen, 0 otherwise
|
||||
// This accounts for rounding to even.
|
||||
let adj = (dropped_bits - (dropped_bits >> (F::BITS - 1) & !m_base)) >> (F::BITS - 1);
|
||||
|
||||
// Add one when we need to round up. Break ties to even.
|
||||
m_base + adj
|
||||
}
|
||||
|
||||
/// Shift the exponent to its position and add the mantissa.
|
||||
///
|
||||
/// If the mantissa has the implicit bit set, the exponent should be one less than its actual
|
||||
/// value to cancel it out.
|
||||
fn repr<F: Float>(e: F::Int, m: F::Int) -> F::Int {
|
||||
// + rather than | so the mantissa can overflow into the exponent
|
||||
(e << F::SIGNIFICAND_BITS) + m
|
||||
}
|
||||
|
||||
/// Shift distance from a left-aligned integer to a smaller float.
|
||||
fn shift_f_lt_i<I: Int, F: Float>() -> u32 {
|
||||
(I::BITS - F::BITS) + F::EXPONENT_BITS
|
||||
}
|
||||
|
||||
/// Shift distance from an integer with `n` leading zeros to a smaller float.
|
||||
fn shift_f_gt_i<I: Int, F: Float>(n: u32) -> u32 {
|
||||
F::SIGNIFICAND_BITS - I::BITS + 1 + n
|
||||
}
|
||||
|
||||
/// Perform a signed operation as unsigned, then add the sign back.
|
||||
pub fn signed<I, F, Conv>(i: I, conv: Conv) -> F
|
||||
where
|
||||
F: Float,
|
||||
I: Int,
|
||||
F::Int: CastFrom<I>,
|
||||
Conv: Fn(I::UnsignedInt) -> F::Int,
|
||||
{
|
||||
let sign_bit = F::Int::cast_from(i >> (I::BITS - 1)) << (F::BITS - 1);
|
||||
F::from_bits(conv(i.unsigned_abs()) | sign_bit)
|
||||
}
|
||||
|
||||
pub fn u32_to_f32_bits(i: u32) -> u32 {
|
||||
if i == 0 {
|
||||
return 0;
|
||||
}
|
||||
let n = i.leading_zeros();
|
||||
let a = (i << n) >> 8; // Significant bits, with bit 24 still in tact.
|
||||
let b = (i << n) << 24; // Insignificant bits, only relevant for rounding.
|
||||
let m = a + ((b - (b >> 31 & !a)) >> 31); // Add one when we need to round up. Break ties to even.
|
||||
let e = 157 - n; // Exponent plus 127, minus one.
|
||||
(e << 23) + m // + not |, so the mantissa can overflow into the exponent.
|
||||
// Mantissa with implicit bit set (significant bits)
|
||||
let m_base = (i << n) >> f32::EXPONENT_BITS;
|
||||
// Bits that will be dropped (insignificant bits)
|
||||
let adj = (i << n) << (f32::SIGNIFICAND_BITS + 1);
|
||||
let m = m_adj::<f32>(m_base, adj);
|
||||
let e = exp::<u32, f32>(n) - 1;
|
||||
repr::<f32>(e, m)
|
||||
}
|
||||
|
||||
pub fn u32_to_f64_bits(i: u32) -> u64 {
|
||||
|
|
@ -28,19 +98,41 @@ mod int_to_float {
|
|||
return 0;
|
||||
}
|
||||
let n = i.leading_zeros();
|
||||
let m = (i as u64) << (21 + n); // Significant bits, with bit 53 still in tact.
|
||||
let e = 1053 - n as u64; // Exponent plus 1023, minus one.
|
||||
(e << 52) + m // Bit 53 of m will overflow into e.
|
||||
// Mantissa with implicit bit set
|
||||
let m = (i as u64) << shift_f_gt_i::<u32, f64>(n);
|
||||
let e = exp::<u32, f64>(n) - 1;
|
||||
repr::<f64>(e, m)
|
||||
}
|
||||
|
||||
#[cfg(f128_enabled)]
|
||||
pub fn u32_to_f128_bits(i: u32) -> u128 {
|
||||
if i == 0 {
|
||||
return 0;
|
||||
}
|
||||
let n = i.leading_zeros();
|
||||
|
||||
// Shift into mantissa position that is correct for the type, but shifted into the lower
|
||||
// 64 bits over so can can avoid 128-bit math.
|
||||
let m = (i as u64) << (shift_f_gt_i::<u32, f128>(n) - 64);
|
||||
let e = exp::<u32, f128>(n) as u64 - 1;
|
||||
// High 64 bits of f128 representation.
|
||||
let h = (e << (f128::SIGNIFICAND_BITS - 64)) + m;
|
||||
|
||||
// Shift back to the high bits, the rest of the mantissa will always be 0.
|
||||
(h as u128) << 64
|
||||
}
|
||||
|
||||
pub fn u64_to_f32_bits(i: u64) -> u32 {
|
||||
let n = i.leading_zeros();
|
||||
let y = i.wrapping_shl(n);
|
||||
let a = (y >> 40) as u32; // Significant bits, with bit 24 still in tact.
|
||||
let b = (y >> 8 | y & 0xFFFF) as u32; // Insignificant bits, only relevant for rounding.
|
||||
let m = a + ((b - (b >> 31 & !a)) >> 31); // Add one when we need to round up. Break ties to even.
|
||||
let e = if i == 0 { 0 } else { 189 - n }; // Exponent plus 127, minus one, except for zero.
|
||||
(e << 23) + m // + not |, so the mantissa can overflow into the exponent.
|
||||
let i_m = i.wrapping_shl(n);
|
||||
// Mantissa with implicit bit set
|
||||
let m_base: u32 = (i_m >> shift_f_lt_i::<u64, f32>()) as u32;
|
||||
// The entire lower half of `i` will be truncated (masked portion), plus the
|
||||
// next `EXPONENT_BITS` bits.
|
||||
let adj = (i_m >> f32::EXPONENT_BITS | i_m & 0xFFFF) as u32;
|
||||
let m = m_adj::<f32>(m_base, adj);
|
||||
let e = if i == 0 { 0 } else { exp::<u64, f32>(n) - 1 };
|
||||
repr::<f32>(e, m)
|
||||
}
|
||||
|
||||
pub fn u64_to_f64_bits(i: u64) -> u64 {
|
||||
|
|
@ -48,31 +140,71 @@ mod int_to_float {
|
|||
return 0;
|
||||
}
|
||||
let n = i.leading_zeros();
|
||||
let a = (i << n) >> 11; // Significant bits, with bit 53 still in tact.
|
||||
let b = (i << n) << 53; // Insignificant bits, only relevant for rounding.
|
||||
let m = a + ((b - (b >> 63 & !a)) >> 63); // Add one when we need to round up. Break ties to even.
|
||||
let e = 1085 - n as u64; // Exponent plus 1023, minus one.
|
||||
(e << 52) + m // + not |, so the mantissa can overflow into the exponent.
|
||||
// Mantissa with implicit bit set
|
||||
let m_base = (i << n) >> f64::EXPONENT_BITS;
|
||||
let adj = (i << n) << (f64::SIGNIFICAND_BITS + 1);
|
||||
let m = m_adj::<f64>(m_base, adj);
|
||||
let e = exp::<u64, f64>(n) - 1;
|
||||
repr::<f64>(e, m)
|
||||
}
|
||||
|
||||
#[cfg(f128_enabled)]
|
||||
pub fn u64_to_f128_bits(i: u64) -> u128 {
|
||||
if i == 0 {
|
||||
return 0;
|
||||
}
|
||||
let n = i.leading_zeros();
|
||||
// Mantissa with implicit bit set
|
||||
let m = (i as u128) << shift_f_gt_i::<u64, f128>(n);
|
||||
let e = exp::<u64, f128>(n) - 1;
|
||||
repr::<f128>(e, m)
|
||||
}
|
||||
|
||||
pub fn u128_to_f32_bits(i: u128) -> u32 {
|
||||
let n = i.leading_zeros();
|
||||
let y = i.wrapping_shl(n);
|
||||
let a = (y >> 104) as u32; // Significant bits, with bit 24 still in tact.
|
||||
let b = (y >> 72) as u32 | ((y << 32) >> 32 != 0) as u32; // Insignificant bits, only relevant for rounding.
|
||||
let m = a + ((b - (b >> 31 & !a)) >> 31); // Add one when we need to round up. Break ties to even.
|
||||
let e = if i == 0 { 0 } else { 253 - n }; // Exponent plus 127, minus one, except for zero.
|
||||
(e << 23) + m // + not |, so the mantissa can overflow into the exponent.
|
||||
let i_m = i.wrapping_shl(n); // Mantissa, shifted so the first bit is nonzero
|
||||
let m_base: u32 = (i_m >> shift_f_lt_i::<u128, f32>()) as u32;
|
||||
|
||||
// Within the upper `F::BITS`, everything except for the signifcand
|
||||
// gets truncated
|
||||
let d1: u32 = (i_m >> (u128::BITS - f32::BITS - f32::SIGNIFICAND_BITS - 1)).cast();
|
||||
|
||||
// The entire rest of `i_m` gets truncated. Zero the upper `F::BITS` then just
|
||||
// check if it is nonzero.
|
||||
let d2: u32 = (i_m << f32::BITS >> f32::BITS != 0).into();
|
||||
let adj = d1 | d2;
|
||||
|
||||
// Mantissa with implicit bit set
|
||||
let m = m_adj::<f32>(m_base, adj);
|
||||
let e = if i == 0 { 0 } else { exp::<u128, f32>(n) - 1 };
|
||||
repr::<f32>(e, m)
|
||||
}
|
||||
|
||||
pub fn u128_to_f64_bits(i: u128) -> u64 {
|
||||
let n = i.leading_zeros();
|
||||
let y = i.wrapping_shl(n);
|
||||
let a = (y >> 75) as u64; // Significant bits, with bit 53 still in tact.
|
||||
let b = (y >> 11 | y & 0xFFFF_FFFF) as u64; // Insignificant bits, only relevant for rounding.
|
||||
let m = a + ((b - (b >> 63 & !a)) >> 63); // Add one when we need to round up. Break ties to even.
|
||||
let e = if i == 0 { 0 } else { 1149 - n as u64 }; // Exponent plus 1023, minus one, except for zero.
|
||||
(e << 52) + m // + not |, so the mantissa can overflow into the exponent.
|
||||
let i_m = i.wrapping_shl(n);
|
||||
// Mantissa with implicit bit set
|
||||
let m_base: u64 = (i_m >> shift_f_lt_i::<u128, f64>()) as u64;
|
||||
// The entire lower half of `i` will be truncated (masked portion), plus the
|
||||
// next `EXPONENT_BITS` bits.
|
||||
let adj = (i_m >> f64::EXPONENT_BITS | i_m & 0xFFFF_FFFF) as u64;
|
||||
let m = m_adj::<f64>(m_base, adj);
|
||||
let e = if i == 0 { 0 } else { exp::<u128, f64>(n) - 1 };
|
||||
repr::<f64>(e, m)
|
||||
}
|
||||
|
||||
#[cfg(f128_enabled)]
|
||||
pub fn u128_to_f128_bits(i: u128) -> u128 {
|
||||
if i == 0 {
|
||||
return 0;
|
||||
}
|
||||
let n = i.leading_zeros();
|
||||
// Mantissa with implicit bit set
|
||||
let m_base = (i << n) >> f128::EXPONENT_BITS;
|
||||
let adj = (i << n) << (f128::SIGNIFICAND_BITS + 1);
|
||||
let m = m_adj::<f128>(m_base, adj);
|
||||
let e = exp::<u128, f128>(n) - 1;
|
||||
repr::<f128>(e, m)
|
||||
}
|
||||
}
|
||||
|
||||
|
|
@ -107,44 +239,74 @@ intrinsics! {
|
|||
pub extern "C" fn __floatuntidf(i: u128) -> f64 {
|
||||
f64::from_bits(int_to_float::u128_to_f64_bits(i))
|
||||
}
|
||||
|
||||
#[ppc_alias = __floatunsikf]
|
||||
#[cfg(f128_enabled)]
|
||||
pub extern "C" fn __floatunsitf(i: u32) -> f128 {
|
||||
f128::from_bits(int_to_float::u32_to_f128_bits(i))
|
||||
}
|
||||
|
||||
#[ppc_alias = __floatundikf]
|
||||
#[cfg(f128_enabled)]
|
||||
pub extern "C" fn __floatunditf(i: u64) -> f128 {
|
||||
f128::from_bits(int_to_float::u64_to_f128_bits(i))
|
||||
}
|
||||
|
||||
#[ppc_alias = __floatuntikf]
|
||||
#[cfg(f128_enabled)]
|
||||
pub extern "C" fn __floatuntitf(i: u128) -> f128 {
|
||||
f128::from_bits(int_to_float::u128_to_f128_bits(i))
|
||||
}
|
||||
}
|
||||
|
||||
// Conversions from signed integers to floats.
|
||||
intrinsics! {
|
||||
#[arm_aeabi_alias = __aeabi_i2f]
|
||||
pub extern "C" fn __floatsisf(i: i32) -> f32 {
|
||||
let sign_bit = ((i >> 31) as u32) << 31;
|
||||
f32::from_bits(int_to_float::u32_to_f32_bits(i.unsigned_abs()) | sign_bit)
|
||||
int_to_float::signed(i, int_to_float::u32_to_f32_bits)
|
||||
}
|
||||
|
||||
#[arm_aeabi_alias = __aeabi_i2d]
|
||||
pub extern "C" fn __floatsidf(i: i32) -> f64 {
|
||||
let sign_bit = ((i >> 31) as u64) << 63;
|
||||
f64::from_bits(int_to_float::u32_to_f64_bits(i.unsigned_abs()) | sign_bit)
|
||||
int_to_float::signed(i, int_to_float::u32_to_f64_bits)
|
||||
}
|
||||
|
||||
#[arm_aeabi_alias = __aeabi_l2f]
|
||||
pub extern "C" fn __floatdisf(i: i64) -> f32 {
|
||||
let sign_bit = ((i >> 63) as u32) << 31;
|
||||
f32::from_bits(int_to_float::u64_to_f32_bits(i.unsigned_abs()) | sign_bit)
|
||||
int_to_float::signed(i, int_to_float::u64_to_f32_bits)
|
||||
}
|
||||
|
||||
#[arm_aeabi_alias = __aeabi_l2d]
|
||||
pub extern "C" fn __floatdidf(i: i64) -> f64 {
|
||||
let sign_bit = ((i >> 63) as u64) << 63;
|
||||
f64::from_bits(int_to_float::u64_to_f64_bits(i.unsigned_abs()) | sign_bit)
|
||||
int_to_float::signed(i, int_to_float::u64_to_f64_bits)
|
||||
}
|
||||
|
||||
#[cfg_attr(target_os = "uefi", unadjusted_on_win64)]
|
||||
pub extern "C" fn __floattisf(i: i128) -> f32 {
|
||||
let sign_bit = ((i >> 127) as u32) << 31;
|
||||
f32::from_bits(int_to_float::u128_to_f32_bits(i.unsigned_abs()) | sign_bit)
|
||||
int_to_float::signed(i, int_to_float::u128_to_f32_bits)
|
||||
}
|
||||
|
||||
#[cfg_attr(target_os = "uefi", unadjusted_on_win64)]
|
||||
pub extern "C" fn __floattidf(i: i128) -> f64 {
|
||||
let sign_bit = ((i >> 127) as u64) << 63;
|
||||
f64::from_bits(int_to_float::u128_to_f64_bits(i.unsigned_abs()) | sign_bit)
|
||||
int_to_float::signed(i, int_to_float::u128_to_f64_bits)
|
||||
}
|
||||
|
||||
#[ppc_alias = __floatsikf]
|
||||
#[cfg(f128_enabled)]
|
||||
pub extern "C" fn __floatsitf(i: i32) -> f128 {
|
||||
int_to_float::signed(i, int_to_float::u32_to_f128_bits)
|
||||
}
|
||||
|
||||
#[ppc_alias = __floatdikf]
|
||||
#[cfg(f128_enabled)]
|
||||
pub extern "C" fn __floatditf(i: i64) -> f128 {
|
||||
int_to_float::signed(i, int_to_float::u64_to_f128_bits)
|
||||
}
|
||||
|
||||
#[ppc_alias = __floattikf]
|
||||
#[cfg(f128_enabled)]
|
||||
pub extern "C" fn __floattitf(i: i128) -> f128 {
|
||||
int_to_float::signed(i, int_to_float::u128_to_f128_bits)
|
||||
}
|
||||
}
|
||||
|
||||
|
|
|
|||
|
|
@ -83,6 +83,7 @@ pub(crate) trait Int: MinInt
|
|||
|
||||
fn unsigned(self) -> Self::UnsignedInt;
|
||||
fn from_unsigned(unsigned: Self::UnsignedInt) -> Self;
|
||||
fn unsigned_abs(self) -> Self::UnsignedInt;
|
||||
|
||||
fn from_bool(b: bool) -> Self;
|
||||
|
||||
|
|
@ -178,7 +179,6 @@ macro_rules! int_impl_common {
|
|||
fn wrapping_mul(self, other: Self) -> Self {
|
||||
<Self>::wrapping_mul(self, other)
|
||||
}
|
||||
|
||||
fn wrapping_sub(self, other: Self) -> Self {
|
||||
<Self>::wrapping_sub(self, other)
|
||||
}
|
||||
|
|
@ -235,6 +235,10 @@ macro_rules! int_impl {
|
|||
me
|
||||
}
|
||||
|
||||
fn unsigned_abs(self) -> Self {
|
||||
self
|
||||
}
|
||||
|
||||
fn abs_diff(self, other: Self) -> Self {
|
||||
if self < other {
|
||||
other.wrapping_sub(self)
|
||||
|
|
@ -268,6 +272,10 @@ macro_rules! int_impl {
|
|||
me as $ity
|
||||
}
|
||||
|
||||
fn unsigned_abs(self) -> Self::UnsignedInt {
|
||||
self.unsigned_abs()
|
||||
}
|
||||
|
||||
fn abs_diff(self, other: Self) -> $uty {
|
||||
self.wrapping_sub(other).wrapping_abs() as $uty
|
||||
}
|
||||
|
|
|
|||
|
|
@ -1,7 +1,8 @@
|
|||
#![allow(improper_ctypes)]
|
||||
#![cfg_attr(f128_enabled, feature(f128))]
|
||||
|
||||
use compiler_builtins::float::conv;
|
||||
use criterion::{criterion_group, criterion_main, Criterion};
|
||||
use criterion::{criterion_main, Criterion};
|
||||
use testcrate::float_bench;
|
||||
|
||||
/* unsigned int -> float */
|
||||
|
|
@ -76,6 +77,18 @@ float_bench! {
|
|||
],
|
||||
}
|
||||
|
||||
#[cfg(f128_enabled)]
|
||||
float_bench! {
|
||||
name: conv_u32_f128,
|
||||
sig: (a: u32) -> f128,
|
||||
crate_fn: conv::__floatunsitf,
|
||||
crate_fn_ppc: conv::__floatunsikf,
|
||||
sys_fn: __floatunsitf,
|
||||
sys_fn_ppc: __floatunsikf,
|
||||
sys_available: not(feature = "no-sys-f16-f128-convert"),
|
||||
asm: []
|
||||
}
|
||||
|
||||
float_bench! {
|
||||
name: conv_u64_f32,
|
||||
sig: (a: u64) -> f32,
|
||||
|
|
@ -118,6 +131,18 @@ float_bench! {
|
|||
],
|
||||
}
|
||||
|
||||
#[cfg(f128_enabled)]
|
||||
float_bench! {
|
||||
name: conv_u64_f128,
|
||||
sig: (a: u64) -> f128,
|
||||
crate_fn: conv::__floatunditf,
|
||||
crate_fn_ppc: conv::__floatundikf,
|
||||
sys_fn: __floatunditf,
|
||||
sys_fn_ppc: __floatundikf,
|
||||
sys_available: not(feature = "no-sys-f16-f128-convert"),
|
||||
asm: []
|
||||
}
|
||||
|
||||
float_bench! {
|
||||
name: conv_u128_f32,
|
||||
sig: (a: u128) -> f32,
|
||||
|
|
@ -136,6 +161,18 @@ float_bench! {
|
|||
asm: []
|
||||
}
|
||||
|
||||
#[cfg(f128_enabled)]
|
||||
float_bench! {
|
||||
name: conv_u128_f128,
|
||||
sig: (a: u128) -> f128,
|
||||
crate_fn: conv::__floatuntitf,
|
||||
crate_fn_ppc: conv::__floatuntikf,
|
||||
sys_fn: __floatuntitf,
|
||||
sys_fn_ppc: __floatuntikf,
|
||||
sys_available: not(feature = "no-sys-f16-f128-convert"),
|
||||
asm: []
|
||||
}
|
||||
|
||||
/* signed int -> float */
|
||||
|
||||
float_bench! {
|
||||
|
|
@ -205,6 +242,18 @@ float_bench! {
|
|||
],
|
||||
}
|
||||
|
||||
#[cfg(f128_enabled)]
|
||||
float_bench! {
|
||||
name: conv_i32_f128,
|
||||
sig: (a: i32) -> f128,
|
||||
crate_fn: conv::__floatsitf,
|
||||
crate_fn_ppc: conv::__floatsikf,
|
||||
sys_fn: __floatsitf,
|
||||
sys_fn_ppc: __floatsikf,
|
||||
sys_available: not(feature = "no-sys-f16-f128-convert"),
|
||||
asm: []
|
||||
}
|
||||
|
||||
float_bench! {
|
||||
name: conv_i64_f32,
|
||||
sig: (a: i64) -> f32,
|
||||
|
|
@ -272,6 +321,18 @@ float_bench! {
|
|||
],
|
||||
}
|
||||
|
||||
#[cfg(f128_enabled)]
|
||||
float_bench! {
|
||||
name: conv_i64_f128,
|
||||
sig: (a: i64) -> f128,
|
||||
crate_fn: conv::__floatditf,
|
||||
crate_fn_ppc: conv::__floatdikf,
|
||||
sys_fn: __floatditf,
|
||||
sys_fn_ppc: __floatdikf,
|
||||
sys_available: not(feature = "no-sys-f16-f128-convert"),
|
||||
asm: []
|
||||
}
|
||||
|
||||
float_bench! {
|
||||
name: conv_i128_f32,
|
||||
sig: (a: i128) -> f32,
|
||||
|
|
@ -290,6 +351,18 @@ float_bench! {
|
|||
asm: []
|
||||
}
|
||||
|
||||
#[cfg(f128_enabled)]
|
||||
float_bench! {
|
||||
name: conv_i128_f128,
|
||||
sig: (a: i128) -> f128,
|
||||
crate_fn: conv::__floattitf,
|
||||
crate_fn_ppc: conv::__floattikf,
|
||||
sys_fn: __floattitf,
|
||||
sys_fn_ppc: __floattikf,
|
||||
sys_available: not(feature = "no-sys-f16-f128-convert"),
|
||||
asm: []
|
||||
}
|
||||
|
||||
/* float -> unsigned int */
|
||||
|
||||
#[cfg(not(all(target_arch = "powerpc64", target_endian = "little")))]
|
||||
|
|
@ -397,6 +470,39 @@ float_bench! {
|
|||
asm: []
|
||||
}
|
||||
|
||||
#[cfg(f128_enabled)]
|
||||
float_bench! {
|
||||
name: conv_f128_u32,
|
||||
sig: (a: f128) -> u32,
|
||||
crate_fn: conv::__fixunstfsi,
|
||||
crate_fn_ppc: conv::__fixunskfsi,
|
||||
sys_fn: __fixunstfsi,
|
||||
sys_available: not(feature = "no-sys-f16-f128-convert"),
|
||||
asm: []
|
||||
}
|
||||
|
||||
#[cfg(f128_enabled)]
|
||||
float_bench! {
|
||||
name: conv_f128_u64,
|
||||
sig: (a: f128) -> u64,
|
||||
crate_fn: conv::__fixunstfdi,
|
||||
crate_fn_ppc: conv::__fixunskfdi,
|
||||
sys_fn: __fixunstfdi,
|
||||
sys_available: not(feature = "no-sys-f16-f128-convert"),
|
||||
asm: []
|
||||
}
|
||||
|
||||
#[cfg(f128_enabled)]
|
||||
float_bench! {
|
||||
name: conv_f128_u128,
|
||||
sig: (a: f128) -> u128,
|
||||
crate_fn: conv::__fixunstfti,
|
||||
crate_fn_ppc: conv::__fixunskfti,
|
||||
sys_fn: __fixunstfti,
|
||||
sys_available: not(feature = "no-sys-f16-f128-convert"),
|
||||
asm: []
|
||||
}
|
||||
|
||||
/* float -> signed int */
|
||||
|
||||
#[cfg(not(all(target_arch = "powerpc64", target_endian = "little")))]
|
||||
|
|
@ -504,43 +610,79 @@ float_bench! {
|
|||
asm: []
|
||||
}
|
||||
|
||||
criterion_group!(
|
||||
float_conv,
|
||||
conv_u32_f32,
|
||||
conv_u32_f64,
|
||||
conv_u64_f32,
|
||||
conv_u64_f64,
|
||||
conv_u128_f32,
|
||||
conv_u128_f64,
|
||||
conv_i32_f32,
|
||||
conv_i32_f64,
|
||||
conv_i64_f32,
|
||||
conv_i64_f64,
|
||||
conv_i128_f32,
|
||||
conv_i128_f64,
|
||||
conv_f64_u32,
|
||||
conv_f64_u64,
|
||||
conv_f64_u128,
|
||||
conv_f64_i32,
|
||||
conv_f64_i64,
|
||||
conv_f64_i128,
|
||||
);
|
||||
#[cfg(f128_enabled)]
|
||||
float_bench! {
|
||||
name: conv_f128_i32,
|
||||
sig: (a: f128) -> i32,
|
||||
crate_fn: conv::__fixtfsi,
|
||||
crate_fn_ppc: conv::__fixkfsi,
|
||||
sys_fn: __fixtfsi,
|
||||
sys_available: not(feature = "no-sys-f16-f128-convert"),
|
||||
asm: []
|
||||
}
|
||||
|
||||
// FIXME: ppc64le has a sporadic overflow panic in the crate functions
|
||||
// <https://github.com/rust-lang/compiler-builtins/issues/617#issuecomment-2125914639>
|
||||
#[cfg(not(all(target_arch = "powerpc64", target_endian = "little")))]
|
||||
criterion_group!(
|
||||
float_conv_not_ppc64le,
|
||||
conv_f32_u32,
|
||||
conv_f32_u64,
|
||||
conv_f32_u128,
|
||||
conv_f32_i32,
|
||||
conv_f32_i64,
|
||||
conv_f32_i128,
|
||||
);
|
||||
#[cfg(f128_enabled)]
|
||||
float_bench! {
|
||||
name: conv_f128_i64,
|
||||
sig: (a: f128) -> i64,
|
||||
crate_fn: conv::__fixtfdi,
|
||||
crate_fn_ppc: conv::__fixkfdi,
|
||||
sys_fn: __fixtfdi,
|
||||
sys_available: not(feature = "no-sys-f16-f128-convert"),
|
||||
asm: []
|
||||
}
|
||||
|
||||
#[cfg(f128_enabled)]
|
||||
float_bench! {
|
||||
name: conv_f128_i128,
|
||||
sig: (a: f128) -> i128,
|
||||
crate_fn: conv::__fixtfti,
|
||||
crate_fn_ppc: conv::__fixkfti,
|
||||
sys_fn: __fixtfti,
|
||||
sys_available: not(feature = "no-sys-f16-f128-convert"),
|
||||
asm: []
|
||||
}
|
||||
|
||||
pub fn float_conv() {
|
||||
let mut criterion = Criterion::default().configure_from_args();
|
||||
|
||||
conv_u32_f32(&mut criterion);
|
||||
conv_u32_f64(&mut criterion);
|
||||
conv_u64_f32(&mut criterion);
|
||||
conv_u64_f64(&mut criterion);
|
||||
conv_u128_f32(&mut criterion);
|
||||
conv_u128_f64(&mut criterion);
|
||||
conv_i32_f32(&mut criterion);
|
||||
conv_i32_f64(&mut criterion);
|
||||
conv_i64_f32(&mut criterion);
|
||||
conv_i64_f64(&mut criterion);
|
||||
conv_i128_f32(&mut criterion);
|
||||
conv_i128_f64(&mut criterion);
|
||||
conv_f64_u32(&mut criterion);
|
||||
conv_f64_u64(&mut criterion);
|
||||
conv_f64_u128(&mut criterion);
|
||||
conv_f64_i32(&mut criterion);
|
||||
conv_f64_i64(&mut criterion);
|
||||
conv_f64_i128(&mut criterion);
|
||||
|
||||
#[cfg(all(f128_enabled))]
|
||||
// FIXME: ppc64le has a sporadic overflow panic in the crate functions
|
||||
// <https://github.com/rust-lang/compiler-builtins/issues/617#issuecomment-2125914639>
|
||||
#[cfg(not(all(target_arch = "powerpc64", target_endian = "little")))]
|
||||
{
|
||||
conv_u32_f128(&mut criterion);
|
||||
conv_u64_f128(&mut criterion);
|
||||
conv_u128_f128(&mut criterion);
|
||||
conv_i32_f128(&mut criterion);
|
||||
conv_i64_f128(&mut criterion);
|
||||
conv_i128_f128(&mut criterion);
|
||||
conv_f128_u32(&mut criterion);
|
||||
conv_f128_u64(&mut criterion);
|
||||
conv_f128_u128(&mut criterion);
|
||||
conv_f128_i32(&mut criterion);
|
||||
conv_f128_i64(&mut criterion);
|
||||
conv_f128_i128(&mut criterion);
|
||||
}
|
||||
}
|
||||
|
||||
#[cfg(all(target_arch = "powerpc64", target_endian = "little"))]
|
||||
criterion_main!(float_conv);
|
||||
|
||||
#[cfg(not(all(target_arch = "powerpc64", target_endian = "little")))]
|
||||
criterion_main!(float_conv, float_conv_not_ppc64le);
|
||||
|
|
|
|||
|
|
@ -15,7 +15,6 @@
|
|||
#![no_std]
|
||||
#![cfg_attr(f128_enabled, feature(f128))]
|
||||
#![cfg_attr(f16_enabled, feature(f16))]
|
||||
#![feature(isqrt)]
|
||||
|
||||
pub mod bench;
|
||||
extern crate alloc;
|
||||
|
|
|
|||
|
|
@ -8,64 +8,86 @@ use compiler_builtins::float::Float;
|
|||
use rustc_apfloat::{Float as _, FloatConvert as _};
|
||||
use testcrate::*;
|
||||
|
||||
mod int_to_float {
|
||||
mod i_to_f {
|
||||
use super::*;
|
||||
|
||||
macro_rules! i_to_f {
|
||||
($($from:ty, $into:ty, $fn:ident);*;) => {
|
||||
($f_ty:ty, $apfloat_ty:ident, $sys_available:meta, $($i_ty:ty, $fn:ident);*;) => {
|
||||
$(
|
||||
#[test]
|
||||
fn $fn() {
|
||||
use compiler_builtins::float::conv::$fn;
|
||||
use compiler_builtins::int::Int;
|
||||
|
||||
fuzz(N, |x: $from| {
|
||||
let f0 = x as $into;
|
||||
let f1: $into = $fn(x);
|
||||
// This makes sure that the conversion produced the best rounding possible, and does
|
||||
// this independent of `x as $into` rounding correctly.
|
||||
// This assumes that float to integer conversion is correct.
|
||||
let y_minus_ulp = <$into>::from_bits(f1.to_bits().wrapping_sub(1)) as $from;
|
||||
let y = f1 as $from;
|
||||
let y_plus_ulp = <$into>::from_bits(f1.to_bits().wrapping_add(1)) as $from;
|
||||
let error_minus = <$from as Int>::abs_diff(y_minus_ulp, x);
|
||||
let error = <$from as Int>::abs_diff(y, x);
|
||||
let error_plus = <$from as Int>::abs_diff(y_plus_ulp, x);
|
||||
// The first two conditions check that none of the two closest float values are
|
||||
// strictly closer in representation to `x`. The second makes sure that rounding is
|
||||
// towards even significand if two float values are equally close to the integer.
|
||||
if error_minus < error
|
||||
|| error_plus < error
|
||||
|| ((error_minus == error || error_plus == error)
|
||||
&& ((f0.to_bits() & 1) != 0))
|
||||
{
|
||||
if !cfg!(any(
|
||||
target_arch = "powerpc",
|
||||
target_arch = "powerpc64"
|
||||
)) {
|
||||
panic!(
|
||||
"incorrect rounding by {}({}): {}, ({}, {}, {}), errors ({}, {}, {})",
|
||||
stringify!($fn),
|
||||
x,
|
||||
f1.to_bits(),
|
||||
y_minus_ulp,
|
||||
y,
|
||||
y_plus_ulp,
|
||||
error_minus,
|
||||
error,
|
||||
error_plus,
|
||||
);
|
||||
fuzz(N, |x: $i_ty| {
|
||||
let f0 = apfloat_fallback!(
|
||||
$f_ty, $apfloat_ty, $sys_available,
|
||||
|x| x as $f_ty;
|
||||
// When the builtin is not available, we need to use a different conversion
|
||||
// method (since apfloat doesn't support `as` casting).
|
||||
|x: $i_ty| {
|
||||
use compiler_builtins::int::MinInt;
|
||||
|
||||
let apf = if <$i_ty>::SIGNED {
|
||||
FloatTy::from_i128(x.try_into().unwrap()).value
|
||||
} else {
|
||||
FloatTy::from_u128(x.try_into().unwrap()).value
|
||||
};
|
||||
|
||||
<$f_ty>::from_bits(apf.to_bits())
|
||||
},
|
||||
x
|
||||
);
|
||||
let f1: $f_ty = $fn(x);
|
||||
|
||||
#[cfg($sys_available)] {
|
||||
// This makes sure that the conversion produced the best rounding possible, and does
|
||||
// this independent of `x as $into` rounding correctly.
|
||||
// This assumes that float to integer conversion is correct.
|
||||
let y_minus_ulp = <$f_ty>::from_bits(f1.to_bits().wrapping_sub(1)) as $i_ty;
|
||||
let y = f1 as $i_ty;
|
||||
let y_plus_ulp = <$f_ty>::from_bits(f1.to_bits().wrapping_add(1)) as $i_ty;
|
||||
let error_minus = <$i_ty as Int>::abs_diff(y_minus_ulp, x);
|
||||
let error = <$i_ty as Int>::abs_diff(y, x);
|
||||
let error_plus = <$i_ty as Int>::abs_diff(y_plus_ulp, x);
|
||||
|
||||
// The first two conditions check that none of the two closest float values are
|
||||
// strictly closer in representation to `x`. The second makes sure that rounding is
|
||||
// towards even significand if two float values are equally close to the integer.
|
||||
if error_minus < error
|
||||
|| error_plus < error
|
||||
|| ((error_minus == error || error_plus == error)
|
||||
&& ((f0.to_bits() & 1) != 0))
|
||||
{
|
||||
if !cfg!(any(
|
||||
target_arch = "powerpc",
|
||||
target_arch = "powerpc64"
|
||||
)) {
|
||||
panic!(
|
||||
"incorrect rounding by {}({}): {}, ({}, {}, {}), errors ({}, {}, {})",
|
||||
stringify!($fn),
|
||||
x,
|
||||
f1.to_bits(),
|
||||
y_minus_ulp,
|
||||
y,
|
||||
y_plus_ulp,
|
||||
error_minus,
|
||||
error,
|
||||
error_plus,
|
||||
);
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
// Test against native conversion. We disable testing on all `x86` because of
|
||||
// rounding bugs with `i686`. `powerpc` also has the same rounding bug.
|
||||
if f0 != f1 && !cfg!(any(
|
||||
if !Float::eq_repr(f0, f1) && !cfg!(any(
|
||||
target_arch = "x86",
|
||||
target_arch = "powerpc",
|
||||
target_arch = "powerpc64"
|
||||
)) {
|
||||
panic!(
|
||||
"{}({}): std: {}, builtins: {}",
|
||||
"{}({}): std: {:?}, builtins: {:?}",
|
||||
stringify!($fn),
|
||||
x,
|
||||
f0,
|
||||
|
|
@ -78,19 +100,44 @@ mod int_to_float {
|
|||
};
|
||||
}
|
||||
|
||||
i_to_f! {
|
||||
u32, f32, __floatunsisf;
|
||||
u32, f64, __floatunsidf;
|
||||
i32, f32, __floatsisf;
|
||||
i32, f64, __floatsidf;
|
||||
u64, f32, __floatundisf;
|
||||
u64, f64, __floatundidf;
|
||||
i64, f32, __floatdisf;
|
||||
i64, f64, __floatdidf;
|
||||
u128, f32, __floatuntisf;
|
||||
u128, f64, __floatuntidf;
|
||||
i128, f32, __floattisf;
|
||||
i128, f64, __floattidf;
|
||||
i_to_f! { f32, Single, all(),
|
||||
u32, __floatunsisf;
|
||||
i32, __floatsisf;
|
||||
u64, __floatundisf;
|
||||
i64, __floatdisf;
|
||||
u128, __floatuntisf;
|
||||
i128, __floattisf;
|
||||
}
|
||||
|
||||
i_to_f! { f64, Double, all(),
|
||||
u32, __floatunsidf;
|
||||
i32, __floatsidf;
|
||||
u64, __floatundidf;
|
||||
i64, __floatdidf;
|
||||
u128, __floatuntidf;
|
||||
i128, __floattidf;
|
||||
}
|
||||
|
||||
#[cfg(not(feature = "no-f16-f128"))]
|
||||
#[cfg(not(any(target_arch = "powerpc", target_arch = "powerpc64")))]
|
||||
i_to_f! { f128, Quad, not(feature = "no-sys-f128-int-convert"),
|
||||
u32, __floatunsitf;
|
||||
i32, __floatsitf;
|
||||
u64, __floatunditf;
|
||||
i64, __floatditf;
|
||||
u128, __floatuntitf;
|
||||
i128, __floattitf;
|
||||
}
|
||||
|
||||
#[cfg(not(feature = "no-f16-f128"))]
|
||||
#[cfg(any(target_arch = "powerpc", target_arch = "powerpc64"))]
|
||||
i_to_f! { f128, Quad, not(feature = "no-sys-f128-int-convert"),
|
||||
u32, __floatunsikf;
|
||||
i32, __floatsikf;
|
||||
u64, __floatundikf;
|
||||
i64, __floatdikf;
|
||||
u128, __floatuntikf;
|
||||
i128, __floattikf;
|
||||
}
|
||||
}
|
||||
|
||||
|
|
|
|||
Loading…
Add table
Add a link
Reference in a new issue