Implement u256 with two u128s rather than u64
This produces better assembly, e.g. on aarch64:
.globl libm::u128_wmul
.p2align 2
libm::u128_wmul:
Lfunc_begin124:
.cfi_startproc
mul x9, x2, x0
umulh x10, x2, x0
umulh x11, x3, x0
mul x12, x3, x0
umulh x13, x2, x1
mul x14, x2, x1
umulh x15, x3, x1
mul x16, x3, x1
adds x10, x10, x14
cinc x13, x13, hs
adds x13, x13, x16
cinc x14, x15, hs
adds x10, x10, x12
cinc x11, x11, hs
adds x11, x13, x11
stp x9, x10, [x8]
cinc x9, x14, hs
stp x11, x9, [x8, rust-lang/libm#16]
ret
The original was ~70 instructions so the improvement is significant.
With these changes, the result is reasonably close to what LLVM
generates using `u256` operands [1].
[1]: https://llvm.godbolt.org/z/re1aGdaqY
This commit is contained in:
parent
b7fdce0505
commit
2f0685a9a2
7 changed files with 295 additions and 185 deletions
|
|
@ -77,7 +77,6 @@ fn setup_u128_mul() -> Vec<(u128, u128)> {
|
|||
v
|
||||
}
|
||||
|
||||
/*
|
||||
fn setup_u256_add() -> Vec<(u256, u256)> {
|
||||
let mut v = Vec::new();
|
||||
for (x, y) in setup_u128_mul() {
|
||||
|
|
@ -88,7 +87,6 @@ fn setup_u256_add() -> Vec<(u256, u256)> {
|
|||
v.push((u256::MAX, u256::MAX));
|
||||
v
|
||||
}
|
||||
*/
|
||||
|
||||
fn setup_u256_shift() -> Vec<(u256, u32)> {
|
||||
let mut v = Vec::new();
|
||||
|
|
@ -116,7 +114,6 @@ library_benchmark_group!(
|
|||
benchmarks = icount_bench_u128_widen_mul
|
||||
);
|
||||
|
||||
/* Not yet implemented
|
||||
#[library_benchmark]
|
||||
#[bench::linspace(setup_u256_add())]
|
||||
fn icount_bench_u256_add(cases: Vec<(u256, u256)>) {
|
||||
|
|
@ -129,7 +126,6 @@ library_benchmark_group!(
|
|||
name = icount_bench_u256_add_group;
|
||||
benchmarks = icount_bench_u256_add
|
||||
);
|
||||
*/
|
||||
|
||||
#[library_benchmark]
|
||||
#[bench::linspace(setup_u256_shift())]
|
||||
|
|
@ -148,7 +144,7 @@ main!(
|
|||
library_benchmark_groups =
|
||||
// u256-related benchmarks
|
||||
icount_bench_u128_widen_mul_group,
|
||||
// icount_bench_u256_add_group,
|
||||
icount_bench_u256_add_group,
|
||||
icount_bench_u256_shr_group,
|
||||
// verify-apilist-start
|
||||
// verify-sorted-start
|
||||
|
|
|
|||
|
|
@ -14,7 +14,7 @@ use crate::run_cfg::{int_range, iteration_count};
|
|||
|
||||
pub(crate) const SEED_ENV: &str = "LIBM_SEED";
|
||||
|
||||
pub(crate) static SEED: LazyLock<[u8; 32]> = LazyLock::new(|| {
|
||||
pub static SEED: LazyLock<[u8; 32]> = LazyLock::new(|| {
|
||||
let s = env::var(SEED_ENV).unwrap_or_else(|_| {
|
||||
let mut rng = rand::thread_rng();
|
||||
(0..32).map(|_| rng.sample(Alphanumeric) as char).collect()
|
||||
|
|
|
|||
|
|
@ -29,7 +29,10 @@ pub use op::{
|
|||
};
|
||||
pub use precision::{MaybeOverride, SpecialCase, default_ulp};
|
||||
use run_cfg::extensive_max_iterations;
|
||||
pub use run_cfg::{CheckBasis, CheckCtx, EXTENSIVE_ENV, GeneratorKind, skip_extensive_test};
|
||||
pub use run_cfg::{
|
||||
CheckBasis, CheckCtx, EXTENSIVE_ENV, GeneratorKind, bigint_fuzz_iteration_count,
|
||||
skip_extensive_test,
|
||||
};
|
||||
pub use test_traits::{CheckOutput, Hex, TupleCall};
|
||||
|
||||
/// Result type for tests is usually from `anyhow`. Most times there is no success value to
|
||||
|
|
|
|||
|
|
@ -158,14 +158,6 @@ impl TestEnv {
|
|||
let op = id.math_op();
|
||||
|
||||
let will_run_mp = cfg!(feature = "build-mpfr");
|
||||
|
||||
// Tests are pretty slow on non-64-bit targets, x86 MacOS, and targets that run in QEMU. Start
|
||||
// with a reduced number on these platforms.
|
||||
let slow_on_ci = crate::emulated()
|
||||
|| usize::BITS < 64
|
||||
|| cfg!(all(target_arch = "x86_64", target_vendor = "apple"));
|
||||
let slow_platform = slow_on_ci && crate::ci();
|
||||
|
||||
let large_float_ty = match op.float_ty {
|
||||
FloatTy::F16 | FloatTy::F32 => false,
|
||||
FloatTy::F64 | FloatTy::F128 => true,
|
||||
|
|
@ -176,7 +168,7 @@ impl TestEnv {
|
|||
let input_count = op.rust_sig.args.len();
|
||||
|
||||
Self {
|
||||
slow_platform,
|
||||
slow_platform: slow_platform(),
|
||||
large_float_ty,
|
||||
should_run_extensive: will_run_extensive,
|
||||
mp_tests_enabled: will_run_mp,
|
||||
|
|
@ -185,6 +177,17 @@ impl TestEnv {
|
|||
}
|
||||
}
|
||||
|
||||
/// Tests are pretty slow on non-64-bit targets, x86 MacOS, and targets that run in QEMU. Start
|
||||
/// with a reduced number on these platforms.
|
||||
fn slow_platform() -> bool {
|
||||
let slow_on_ci = crate::emulated()
|
||||
|| usize::BITS < 64
|
||||
|| cfg!(all(target_arch = "x86_64", target_vendor = "apple"));
|
||||
|
||||
// If not running in CI, there is no need to reduce iteration count.
|
||||
slow_on_ci && crate::ci()
|
||||
}
|
||||
|
||||
/// The number of iterations to run for a given test.
|
||||
pub fn iteration_count(ctx: &CheckCtx, argnum: usize) -> u64 {
|
||||
let t_env = TestEnv::from_env(ctx);
|
||||
|
|
@ -351,3 +354,12 @@ pub fn skip_extensive_test(ctx: &CheckCtx) -> bool {
|
|||
let t_env = TestEnv::from_env(ctx);
|
||||
!t_env.should_run_extensive
|
||||
}
|
||||
|
||||
/// The number of iterations to run for `u256` fuzz tests.
|
||||
pub fn bigint_fuzz_iteration_count() -> u64 {
|
||||
if !cfg!(optimizations_enabled) {
|
||||
return 1000;
|
||||
}
|
||||
|
||||
if slow_platform() { 100_000 } else { 5_000_000 }
|
||||
}
|
||||
|
|
|
|||
147
library/compiler-builtins/libm/crates/libm-test/tests/u256.rs
Normal file
147
library/compiler-builtins/libm/crates/libm-test/tests/u256.rs
Normal file
|
|
@ -0,0 +1,147 @@
|
|||
//! Test the u256 implementation. the ops already get exercised reasonably well through the `f128`
|
||||
//! routines, so this only does a few million fuzz iterations against GMP.
|
||||
|
||||
#![cfg(feature = "build-mpfr")]
|
||||
|
||||
use std::sync::LazyLock;
|
||||
|
||||
use libm::support::{HInt, u256};
|
||||
type BigInt = rug::Integer;
|
||||
|
||||
use libm_test::bigint_fuzz_iteration_count;
|
||||
use libm_test::gen::random::SEED;
|
||||
use rand::{Rng, SeedableRng};
|
||||
use rand_chacha::ChaCha8Rng;
|
||||
use rug::Assign;
|
||||
use rug::integer::Order;
|
||||
use rug::ops::NotAssign;
|
||||
|
||||
static BIGINT_U256_MAX: LazyLock<BigInt> =
|
||||
LazyLock::new(|| BigInt::from_digits(&[u128::MAX, u128::MAX], Order::Lsf));
|
||||
|
||||
/// Copied from the test module.
|
||||
fn hexu(v: u256) -> String {
|
||||
format!("0x{:032x}{:032x}", v.hi, v.lo)
|
||||
}
|
||||
|
||||
fn random_u256(rng: &mut ChaCha8Rng) -> u256 {
|
||||
let lo: u128 = rng.gen();
|
||||
let hi: u128 = rng.gen();
|
||||
u256 { lo, hi }
|
||||
}
|
||||
|
||||
fn assign_bigint(bx: &mut BigInt, x: u256) {
|
||||
bx.assign_digits(&[x.lo, x.hi], Order::Lsf);
|
||||
}
|
||||
|
||||
fn from_bigint(bx: &mut BigInt) -> u256 {
|
||||
// Truncate so the result fits into `[u128; 2]`. This makes all ops overflowing.
|
||||
*bx &= &*BIGINT_U256_MAX;
|
||||
let mut bres = [0u128, 0];
|
||||
bx.write_digits(&mut bres, Order::Lsf);
|
||||
bx.assign(0);
|
||||
u256 { lo: bres[0], hi: bres[1] }
|
||||
}
|
||||
|
||||
fn check_one(
|
||||
x: impl FnOnce() -> String,
|
||||
y: impl FnOnce() -> Option<String>,
|
||||
actual: u256,
|
||||
expected: &mut BigInt,
|
||||
) {
|
||||
let expected = from_bigint(expected);
|
||||
if actual != expected {
|
||||
let xmsg = x();
|
||||
let ymsg = y().map(|y| format!("y: {y}\n")).unwrap_or_default();
|
||||
panic!(
|
||||
"Results do not match\n\
|
||||
input: {xmsg}\n\
|
||||
{ymsg}\
|
||||
actual: {}\n\
|
||||
expected: {}\
|
||||
",
|
||||
hexu(actual),
|
||||
hexu(expected),
|
||||
)
|
||||
}
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn mp_u256_bitor() {
|
||||
let mut rng = ChaCha8Rng::from_seed(*SEED);
|
||||
let mut bx = BigInt::new();
|
||||
let mut by = BigInt::new();
|
||||
|
||||
for _ in 0..bigint_fuzz_iteration_count() {
|
||||
let x = random_u256(&mut rng);
|
||||
let y = random_u256(&mut rng);
|
||||
assign_bigint(&mut bx, x);
|
||||
assign_bigint(&mut by, y);
|
||||
let actual = x | y;
|
||||
bx |= &by;
|
||||
check_one(|| hexu(x), || Some(hexu(y)), actual, &mut bx);
|
||||
}
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn mp_u256_not() {
|
||||
let mut rng = ChaCha8Rng::from_seed(*SEED);
|
||||
let mut bx = BigInt::new();
|
||||
|
||||
for _ in 0..bigint_fuzz_iteration_count() {
|
||||
let x = random_u256(&mut rng);
|
||||
assign_bigint(&mut bx, x);
|
||||
let actual = !x;
|
||||
bx.not_assign();
|
||||
check_one(|| hexu(x), || None, actual, &mut bx);
|
||||
}
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn mp_u256_add() {
|
||||
let mut rng = ChaCha8Rng::from_seed(*SEED);
|
||||
let mut bx = BigInt::new();
|
||||
let mut by = BigInt::new();
|
||||
|
||||
for _ in 0..bigint_fuzz_iteration_count() {
|
||||
let x = random_u256(&mut rng);
|
||||
let y = random_u256(&mut rng);
|
||||
assign_bigint(&mut bx, x);
|
||||
assign_bigint(&mut by, y);
|
||||
let actual = x + y;
|
||||
bx += &by;
|
||||
check_one(|| hexu(x), || Some(hexu(y)), actual, &mut bx);
|
||||
}
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn mp_u256_shr() {
|
||||
let mut rng = ChaCha8Rng::from_seed(*SEED);
|
||||
let mut bx = BigInt::new();
|
||||
|
||||
for _ in 0..bigint_fuzz_iteration_count() {
|
||||
let x = random_u256(&mut rng);
|
||||
let shift: u32 = rng.gen_range(0..255);
|
||||
assign_bigint(&mut bx, x);
|
||||
let actual = x >> shift;
|
||||
bx >>= shift;
|
||||
check_one(|| hexu(x), || Some(shift.to_string()), actual, &mut bx);
|
||||
}
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn mp_u256_widen_mul() {
|
||||
let mut rng = ChaCha8Rng::from_seed(*SEED);
|
||||
let mut bx = BigInt::new();
|
||||
let mut by = BigInt::new();
|
||||
|
||||
for _ in 0..bigint_fuzz_iteration_count() {
|
||||
let x: u128 = rng.gen();
|
||||
let y: u128 = rng.gen();
|
||||
bx.assign(x);
|
||||
by.assign(y);
|
||||
let actual = x.widen_mul(y);
|
||||
bx *= &by;
|
||||
check_one(|| format!("{x:#034x}"), || Some(format!("{y:#034x}")), actual, &mut bx);
|
||||
}
|
||||
}
|
||||
|
|
@ -7,40 +7,39 @@ use core::ops;
|
|||
|
||||
use super::{DInt, HInt, Int, MinInt};
|
||||
|
||||
const WORD_LO_MASK: u64 = 0x00000000ffffffff;
|
||||
const WORD_HI_MASK: u64 = 0xffffffff00000000;
|
||||
const WORD_FULL_MASK: u64 = 0xffffffffffffffff;
|
||||
const U128_LO_MASK: u128 = u64::MAX as u128;
|
||||
|
||||
/// A 256-bit unsigned integer represented as 4 64-bit limbs.
|
||||
///
|
||||
/// Each limb is a native-endian number, but the array is little-limb-endian.
|
||||
/// A 256-bit unsigned integer represented as two 128-bit native-endian limbs.
|
||||
#[allow(non_camel_case_types)]
|
||||
#[derive(Clone, Copy, Debug, PartialEq, PartialOrd)]
|
||||
pub struct u256(pub [u64; 4]);
|
||||
pub struct u256 {
|
||||
pub lo: u128,
|
||||
pub hi: u128,
|
||||
}
|
||||
|
||||
impl u256 {
|
||||
#[allow(unused)]
|
||||
pub const MAX: Self = Self([u64::MAX, u64::MAX, u64::MAX, u64::MAX]);
|
||||
#[cfg(any(test, feature = "unstable-public-internals"))]
|
||||
pub const MAX: Self = Self { lo: u128::MAX, hi: u128::MAX };
|
||||
|
||||
/// Reinterpret as a signed integer
|
||||
pub fn signed(self) -> i256 {
|
||||
i256(self.0)
|
||||
i256 { lo: self.lo, hi: self.hi }
|
||||
}
|
||||
}
|
||||
|
||||
/// A 256-bit signed integer represented as 4 64-bit limbs.
|
||||
///
|
||||
/// Each limb is a native-endian number, but the array is little-limb-endian.
|
||||
/// A 256-bit signed integer represented as two 128-bit native-endian limbs.
|
||||
#[allow(non_camel_case_types)]
|
||||
#[derive(Clone, Copy, Debug, PartialEq, PartialOrd)]
|
||||
pub struct i256(pub [u64; 4]);
|
||||
pub struct i256 {
|
||||
pub lo: u128,
|
||||
pub hi: u128,
|
||||
}
|
||||
|
||||
impl i256 {
|
||||
/// Reinterpret as an unsigned integer
|
||||
#[cfg(test)]
|
||||
#[cfg(any(test, feature = "unstable-public-internals"))]
|
||||
pub fn unsigned(self) -> u256 {
|
||||
u256(self.0)
|
||||
u256 { lo: self.lo, hi: self.hi }
|
||||
}
|
||||
}
|
||||
|
||||
|
|
@ -51,10 +50,10 @@ impl MinInt for u256 {
|
|||
|
||||
const SIGNED: bool = false;
|
||||
const BITS: u32 = 256;
|
||||
const ZERO: Self = Self([0u64; 4]);
|
||||
const ONE: Self = Self([1, 0, 0, 0]);
|
||||
const MIN: Self = Self([0u64; 4]);
|
||||
const MAX: Self = Self([u64::MAX; 4]);
|
||||
const ZERO: Self = Self { lo: 0, hi: 0 };
|
||||
const ONE: Self = Self { lo: 1, hi: 0 };
|
||||
const MIN: Self = Self { lo: 0, hi: 0 };
|
||||
const MAX: Self = Self { lo: u128::MAX, hi: u128::MAX };
|
||||
}
|
||||
|
||||
impl MinInt for i256 {
|
||||
|
|
@ -64,10 +63,10 @@ impl MinInt for i256 {
|
|||
|
||||
const SIGNED: bool = false;
|
||||
const BITS: u32 = 256;
|
||||
const ZERO: Self = Self([0u64; 4]);
|
||||
const ONE: Self = Self([1, 0, 0, 0]);
|
||||
const MIN: Self = Self([0, 0, 0, 1 << 63]);
|
||||
const MAX: Self = Self([u64::MAX, u64::MAX, u64::MAX, u64::MAX << 1]);
|
||||
const ZERO: Self = Self { lo: 0, hi: 0 };
|
||||
const ONE: Self = Self { lo: 1, hi: 0 };
|
||||
const MIN: Self = Self { lo: 0, hi: 1 << 127 };
|
||||
const MAX: Self = Self { lo: u128::MAX, hi: u128::MAX << 1 };
|
||||
}
|
||||
|
||||
macro_rules! impl_common {
|
||||
|
|
@ -76,10 +75,8 @@ macro_rules! impl_common {
|
|||
type Output = Self;
|
||||
|
||||
fn bitor(mut self, rhs: Self) -> Self::Output {
|
||||
self.0[0] |= rhs.0[0];
|
||||
self.0[1] |= rhs.0[1];
|
||||
self.0[2] |= rhs.0[2];
|
||||
self.0[3] |= rhs.0[3];
|
||||
self.lo |= rhs.lo;
|
||||
self.hi |= rhs.hi;
|
||||
self
|
||||
}
|
||||
}
|
||||
|
|
@ -87,8 +84,10 @@ macro_rules! impl_common {
|
|||
impl ops::Not for $ty {
|
||||
type Output = Self;
|
||||
|
||||
fn not(self) -> Self::Output {
|
||||
Self([!self.0[0], !self.0[1], !self.0[2], !self.0[3]])
|
||||
fn not(mut self) -> Self::Output {
|
||||
self.lo = !self.lo;
|
||||
self.hi = !self.hi;
|
||||
self
|
||||
}
|
||||
}
|
||||
|
||||
|
|
@ -105,10 +104,21 @@ macro_rules! impl_common {
|
|||
impl_common!(i256);
|
||||
impl_common!(u256);
|
||||
|
||||
impl ops::Add<Self> for u256 {
|
||||
type Output = Self;
|
||||
|
||||
fn add(self, rhs: Self) -> Self::Output {
|
||||
let (lo, carry) = self.lo.overflowing_add(rhs.lo);
|
||||
let hi = self.hi.wrapping_add(carry as u128).wrapping_add(rhs.hi);
|
||||
|
||||
Self { lo, hi }
|
||||
}
|
||||
}
|
||||
|
||||
impl ops::Shr<u32> for u256 {
|
||||
type Output = Self;
|
||||
|
||||
fn shr(self, rhs: u32) -> Self::Output {
|
||||
fn shr(mut self, rhs: u32) -> Self::Output {
|
||||
debug_assert!(rhs < Self::BITS, "attempted to shift right with overflow");
|
||||
if rhs >= Self::BITS {
|
||||
return Self::ZERO;
|
||||
|
|
@ -118,57 +128,28 @@ impl ops::Shr<u32> for u256 {
|
|||
return self;
|
||||
}
|
||||
|
||||
let mut ret = self;
|
||||
let byte_shift = rhs / 64;
|
||||
let bit_shift = rhs % 64;
|
||||
|
||||
for idx in 0..4 {
|
||||
let base_idx = idx + byte_shift as usize;
|
||||
|
||||
// FIXME(msrv): could be let...else.
|
||||
let base = match ret.0.get(base_idx) {
|
||||
Some(v) => v,
|
||||
None => {
|
||||
ret.0[idx] = 0;
|
||||
continue;
|
||||
}
|
||||
};
|
||||
|
||||
let mut new_val = base >> bit_shift;
|
||||
|
||||
if let Some(new) = ret.0.get(base_idx + 1) {
|
||||
new_val |= new.overflowing_shl(64 - bit_shift).0;
|
||||
}
|
||||
|
||||
ret.0[idx] = new_val;
|
||||
if rhs < 128 {
|
||||
self.lo >>= rhs;
|
||||
self.lo |= self.hi << (128 - rhs);
|
||||
} else {
|
||||
self.lo = self.hi >> (rhs - 128);
|
||||
}
|
||||
|
||||
ret
|
||||
}
|
||||
}
|
||||
if rhs < 128 {
|
||||
self.hi >>= rhs;
|
||||
} else {
|
||||
self.hi = 0;
|
||||
}
|
||||
|
||||
macro_rules! word {
|
||||
(1, $val:expr) => {
|
||||
(($val >> (32 * 3)) & Self::from(WORD_LO_MASK)) as u64
|
||||
};
|
||||
(2, $val:expr) => {
|
||||
(($val >> (32 * 2)) & Self::from(WORD_LO_MASK)) as u64
|
||||
};
|
||||
(3, $val:expr) => {
|
||||
(($val >> (32 * 1)) & Self::from(WORD_LO_MASK)) as u64
|
||||
};
|
||||
(4, $val:expr) => {
|
||||
(($val >> (32 * 0)) & Self::from(WORD_LO_MASK)) as u64
|
||||
};
|
||||
self
|
||||
}
|
||||
}
|
||||
|
||||
impl HInt for u128 {
|
||||
type D = u256;
|
||||
|
||||
fn widen(self) -> Self::D {
|
||||
let w0 = self & u128::from(u64::MAX);
|
||||
let w1 = (self >> u64::BITS) & u128::from(u64::MAX);
|
||||
u256([w0 as u64, w1 as u64, 0, 0])
|
||||
u256 { lo: self, hi: 0 }
|
||||
}
|
||||
|
||||
fn zero_widen(self) -> Self::D {
|
||||
|
|
@ -176,57 +157,24 @@ impl HInt for u128 {
|
|||
}
|
||||
|
||||
fn zero_widen_mul(self, rhs: Self) -> Self::D {
|
||||
let product11: u64 = word!(1, self) * word!(1, rhs);
|
||||
let product12: u64 = word!(1, self) * word!(2, rhs);
|
||||
let product13: u64 = word!(1, self) * word!(3, rhs);
|
||||
let product14: u64 = word!(1, self) * word!(4, rhs);
|
||||
let product21: u64 = word!(2, self) * word!(1, rhs);
|
||||
let product22: u64 = word!(2, self) * word!(2, rhs);
|
||||
let product23: u64 = word!(2, self) * word!(3, rhs);
|
||||
let product24: u64 = word!(2, self) * word!(4, rhs);
|
||||
let product31: u64 = word!(3, self) * word!(1, rhs);
|
||||
let product32: u64 = word!(3, self) * word!(2, rhs);
|
||||
let product33: u64 = word!(3, self) * word!(3, rhs);
|
||||
let product34: u64 = word!(3, self) * word!(4, rhs);
|
||||
let product41: u64 = word!(4, self) * word!(1, rhs);
|
||||
let product42: u64 = word!(4, self) * word!(2, rhs);
|
||||
let product43: u64 = word!(4, self) * word!(3, rhs);
|
||||
let product44: u64 = word!(4, self) * word!(4, rhs);
|
||||
let l0 = self & U128_LO_MASK;
|
||||
let l1 = rhs & U128_LO_MASK;
|
||||
let h0 = self >> 64;
|
||||
let h1 = rhs >> 64;
|
||||
|
||||
let sum0: u128 = u128::from(product44);
|
||||
let sum1: u128 = u128::from(product34) + u128::from(product43);
|
||||
let sum2: u128 = u128::from(product24) + u128::from(product33) + u128::from(product42);
|
||||
let sum3: u128 = u128::from(product14)
|
||||
+ u128::from(product23)
|
||||
+ u128::from(product32)
|
||||
+ u128::from(product41);
|
||||
let sum4: u128 = u128::from(product13) + u128::from(product22) + u128::from(product31);
|
||||
let sum5: u128 = u128::from(product12) + u128::from(product21);
|
||||
let sum6: u128 = u128::from(product11);
|
||||
let p_ll: u128 = l0.overflowing_mul(l1).0;
|
||||
let p_lh: u128 = l0.overflowing_mul(h1).0;
|
||||
let p_hl: u128 = h0.overflowing_mul(l1).0;
|
||||
let p_hh: u128 = h0.overflowing_mul(h1).0;
|
||||
|
||||
let r0: u128 =
|
||||
(sum0 & u128::from(WORD_FULL_MASK)) + ((sum1 & u128::from(WORD_LO_MASK)) << 32);
|
||||
let r1: u128 = (sum0 >> 64)
|
||||
+ ((sum1 >> 32) & u128::from(WORD_FULL_MASK))
|
||||
+ (sum2 & u128::from(WORD_FULL_MASK))
|
||||
+ ((sum3 << 32) & u128::from(WORD_HI_MASK));
|
||||
let s0 = p_hl + (p_ll >> 64);
|
||||
let s1 = (p_ll & U128_LO_MASK) + (s0 << 64);
|
||||
let s2 = p_lh + (s1 >> 64);
|
||||
|
||||
let (lo, carry) = r0.overflowing_add(r1 << 64);
|
||||
let hi = (r1 >> 64)
|
||||
+ (sum1 >> 96)
|
||||
+ (sum2 >> 64)
|
||||
+ (sum3 >> 32)
|
||||
+ sum4
|
||||
+ (sum5 << 32)
|
||||
+ (sum6 << 64)
|
||||
+ u128::from(carry);
|
||||
let lo = (p_ll & U128_LO_MASK) + (s2 << 64);
|
||||
let hi = p_hh + (s0 >> 64) + (s2 >> 64);
|
||||
|
||||
u256([
|
||||
(lo & U128_LO_MASK) as u64,
|
||||
((lo >> 64) & U128_LO_MASK) as u64,
|
||||
(hi & U128_LO_MASK) as u64,
|
||||
((hi >> 64) & U128_LO_MASK) as u64,
|
||||
])
|
||||
u256 { lo, hi }
|
||||
}
|
||||
|
||||
fn widen_mul(self, rhs: Self) -> Self::D {
|
||||
|
|
@ -244,8 +192,7 @@ impl HInt for i128 {
|
|||
fn widen(self) -> Self::D {
|
||||
let mut ret = self.unsigned().zero_widen().signed();
|
||||
if self.is_negative() {
|
||||
ret.0[2] = u64::MAX;
|
||||
ret.0[3] = u64::MAX;
|
||||
ret.hi = u128::MAX;
|
||||
}
|
||||
ret
|
||||
}
|
||||
|
|
@ -271,17 +218,11 @@ impl DInt for u256 {
|
|||
type H = u128;
|
||||
|
||||
fn lo(self) -> Self::H {
|
||||
let mut tmp = [0u8; 16];
|
||||
tmp[..8].copy_from_slice(&self.0[0].to_le_bytes());
|
||||
tmp[8..].copy_from_slice(&self.0[1].to_le_bytes());
|
||||
u128::from_le_bytes(tmp)
|
||||
self.lo
|
||||
}
|
||||
|
||||
fn hi(self) -> Self::H {
|
||||
let mut tmp = [0u8; 16];
|
||||
tmp[..8].copy_from_slice(&self.0[2].to_le_bytes());
|
||||
tmp[8..].copy_from_slice(&self.0[3].to_le_bytes());
|
||||
u128::from_le_bytes(tmp)
|
||||
self.hi
|
||||
}
|
||||
}
|
||||
|
||||
|
|
@ -289,16 +230,10 @@ impl DInt for i256 {
|
|||
type H = i128;
|
||||
|
||||
fn lo(self) -> Self::H {
|
||||
let mut tmp = [0u8; 16];
|
||||
tmp[..8].copy_from_slice(&self.0[0].to_le_bytes());
|
||||
tmp[8..].copy_from_slice(&self.0[1].to_le_bytes());
|
||||
i128::from_le_bytes(tmp)
|
||||
self.lo as i128
|
||||
}
|
||||
|
||||
fn hi(self) -> Self::H {
|
||||
let mut tmp = [0u8; 16];
|
||||
tmp[..8].copy_from_slice(&self.0[2].to_le_bytes());
|
||||
tmp[8..].copy_from_slice(&self.0[3].to_le_bytes());
|
||||
i128::from_le_bytes(tmp)
|
||||
self.hi as i128
|
||||
}
|
||||
}
|
||||
|
|
|
|||
|
|
@ -9,33 +9,30 @@ const LOHI_SPLIT: u128 = 0xaaaaaaaaaaaaaaaaffffffffffffffff;
|
|||
|
||||
/// Print a `u256` as hex since we can't add format implementations
|
||||
fn hexu(v: u256) -> String {
|
||||
format!("0x{:016x}{:016x}{:016x}{:016x}", v.0[3], v.0[2], v.0[1], v.0[0])
|
||||
format!("0x{:032x}{:032x}", v.hi, v.lo)
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn widen_u128() {
|
||||
assert_eq!(u128::MAX.widen(), u256([u64::MAX, u64::MAX, 0, 0]));
|
||||
assert_eq!(LOHI_SPLIT.widen(), u256([u64::MAX, 0xaaaaaaaaaaaaaaaa, 0, 0]));
|
||||
assert_eq!(u128::MAX.widen(), u256 { lo: u128::MAX, hi: 0 });
|
||||
assert_eq!(LOHI_SPLIT.widen(), u256 { lo: LOHI_SPLIT, hi: 0 });
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn widen_i128() {
|
||||
assert_eq!((-1i128).widen(), u256::MAX.signed());
|
||||
assert_eq!(
|
||||
(LOHI_SPLIT as i128).widen(),
|
||||
i256([u64::MAX, 0xaaaaaaaaaaaaaaaa, u64::MAX, u64::MAX])
|
||||
);
|
||||
assert_eq!((LOHI_SPLIT as i128).widen(), i256 { lo: LOHI_SPLIT, hi: u128::MAX });
|
||||
assert_eq!((-1i128).zero_widen().unsigned(), (u128::MAX).widen());
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn widen_mul_u128() {
|
||||
let tests = [
|
||||
(u128::MAX / 2, 2_u128, u256([u64::MAX - 1, u64::MAX, 0, 0])),
|
||||
(u128::MAX, 2_u128, u256([u64::MAX - 1, u64::MAX, 1, 0])),
|
||||
(u128::MAX, u128::MAX, u256([1, 0, u64::MAX - 1, u64::MAX])),
|
||||
(u128::MIN, u128::MIN, u256::ZERO),
|
||||
(1234, 0, u256::ZERO),
|
||||
(u128::MAX / 2, 2_u128, u256 { lo: u128::MAX - 1, hi: 0 }),
|
||||
(u128::MAX, 2_u128, u256 { lo: u128::MAX - 1, hi: 1 }),
|
||||
(u128::MAX, u128::MAX, u256 { lo: 1, hi: u128::MAX - 1 }),
|
||||
(0, 0, u256::ZERO),
|
||||
(1234u128, 0, u256::ZERO),
|
||||
(0, 1234, u256::ZERO),
|
||||
];
|
||||
|
||||
|
|
@ -50,20 +47,27 @@ fn widen_mul_u128() {
|
|||
}
|
||||
|
||||
for (i, a, b, exp, res) in &errors {
|
||||
eprintln!("FAILURE ({i}): {a:#034x} * {b:#034x} = {} got {}", hexu(*exp), hexu(*res));
|
||||
eprintln!(
|
||||
"\
|
||||
FAILURE ({i}): {a:#034x} * {b:#034x}\n\
|
||||
expected: {}\n\
|
||||
got: {}\
|
||||
",
|
||||
hexu(*exp),
|
||||
hexu(*res)
|
||||
);
|
||||
}
|
||||
assert!(errors.is_empty());
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn not_u128() {
|
||||
fn not_u256() {
|
||||
assert_eq!(!u256::ZERO, u256::MAX);
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn shr_u128() {
|
||||
fn shr_u256() {
|
||||
let only_low = [1, u16::MAX.into(), u32::MAX.into(), u64::MAX.into(), u128::MAX];
|
||||
|
||||
let mut errors = Vec::new();
|
||||
|
||||
for a in only_low {
|
||||
|
|
@ -80,20 +84,24 @@ fn shr_u128() {
|
|||
}
|
||||
|
||||
let check = [
|
||||
(u256::MAX, 1, u256([u64::MAX, u64::MAX, u64::MAX, u64::MAX >> 1])),
|
||||
(u256::MAX, 5, u256([u64::MAX, u64::MAX, u64::MAX, u64::MAX >> 5])),
|
||||
(u256::MAX, 63, u256([u64::MAX, u64::MAX, u64::MAX, 1])),
|
||||
(u256::MAX, 64, u256([u64::MAX, u64::MAX, u64::MAX, 0])),
|
||||
(u256::MAX, 65, u256([u64::MAX, u64::MAX, u64::MAX >> 1, 0])),
|
||||
(u256::MAX, 127, u256([u64::MAX, u64::MAX, 1, 0])),
|
||||
(u256::MAX, 128, u256([u64::MAX, u64::MAX, 0, 0])),
|
||||
(u256::MAX, 129, u256([u64::MAX, u64::MAX >> 1, 0, 0])),
|
||||
(u256::MAX, 191, u256([u64::MAX, 1, 0, 0])),
|
||||
(u256::MAX, 192, u256([u64::MAX, 0, 0, 0])),
|
||||
(u256::MAX, 193, u256([u64::MAX >> 1, 0, 0, 0])),
|
||||
(u256::MAX, 191, u256([u64::MAX, 1, 0, 0])),
|
||||
(u256::MAX, 254, u256([0b11, 0, 0, 0])),
|
||||
(u256::MAX, 255, u256([1, 0, 0, 0])),
|
||||
(u256::MAX, 1, u256 { lo: u128::MAX, hi: u128::MAX >> 1 }),
|
||||
(u256::MAX, 5, u256 { lo: u128::MAX, hi: u128::MAX >> 5 }),
|
||||
(u256::MAX, 63, u256 { lo: u128::MAX, hi: u64::MAX as u128 | (1 << 64) }),
|
||||
(u256::MAX, 64, u256 { lo: u128::MAX, hi: u64::MAX as u128 }),
|
||||
(u256::MAX, 65, u256 { lo: u128::MAX, hi: (u64::MAX >> 1) as u128 }),
|
||||
(u256::MAX, 127, u256 { lo: u128::MAX, hi: 1 }),
|
||||
(u256::MAX, 128, u256 { lo: u128::MAX, hi: 0 }),
|
||||
(u256::MAX, 129, u256 { lo: u128::MAX >> 1, hi: 0 }),
|
||||
(u256::MAX, 191, u256 { lo: u64::MAX as u128 | 1 << 64, hi: 0 }),
|
||||
(u256::MAX, 192, u256 { lo: u64::MAX as u128, hi: 0 }),
|
||||
(u256::MAX, 193, u256 { lo: u64::MAX as u128 >> 1, hi: 0 }),
|
||||
(u256::MAX, 254, u256 { lo: 0b11, hi: 0 }),
|
||||
(u256::MAX, 255, u256 { lo: 1, hi: 0 }),
|
||||
(
|
||||
u256 { hi: LOHI_SPLIT, lo: 0 },
|
||||
64,
|
||||
u256 { lo: 0xffffffffffffffff0000000000000000, hi: 0xaaaaaaaaaaaaaaaa },
|
||||
),
|
||||
];
|
||||
|
||||
for (input, shift, expected) in check {
|
||||
|
|
@ -104,7 +112,16 @@ fn shr_u128() {
|
|||
}
|
||||
|
||||
for (a, b, res, expected) in &errors {
|
||||
eprintln!("FAILURE: {} >> {b} = {} got {}", hexu(*a), hexu(*expected), hexu(*res),);
|
||||
eprintln!(
|
||||
"\
|
||||
FAILURE: {} >> {b}\n\
|
||||
expected: {}\n\
|
||||
got: {}\
|
||||
",
|
||||
hexu(*a),
|
||||
hexu(*expected),
|
||||
hexu(*res)
|
||||
);
|
||||
}
|
||||
assert!(errors.is_empty());
|
||||
}
|
||||
|
|
|
|||
Loading…
Add table
Add a link
Reference in a new issue