Construct signed division functions differently

This commit is contained in:
Aaron Kutch 2020-10-02 22:04:05 -05:00
parent 1dfa14595e
commit 756a9dea00
6 changed files with 225 additions and 333 deletions

View file

@ -1,65 +1,166 @@
use int::specialized_div_rem::*;
use int::udiv::*;
macro_rules! sdivmod {
(
$unsigned_fn:ident, // name of the unsigned division function
$signed_fn:ident, // name of the signed division function
$uX:ident, // unsigned integer type for the inputs and outputs of `$unsigned_name`
$iX:ident, // signed integer type for the inputs and outputs of `$signed_name`
$($attr:tt),* // attributes
) => {
intrinsics! {
$(
#[$attr]
)*
/// Returns `n / d` and sets `*rem = n % d`
pub extern "C" fn $signed_fn(a: $iX, b: $iX, rem: &mut $iX) -> $iX {
let a_neg = a < 0;
let b_neg = b < 0;
let mut a = a;
let mut b = b;
if a_neg {
a = a.wrapping_neg();
}
if b_neg {
b = b.wrapping_neg();
}
let mut r = *rem as $uX;
let t = $unsigned_fn(a as $uX, b as $uX, Some(&mut r)) as $iX;
let mut r = r as $iX;
if a_neg {
r = r.wrapping_neg();
}
*rem = r;
if a_neg != b_neg {
t.wrapping_neg()
} else {
t
}
}
}
}
}
macro_rules! sdiv {
(
$unsigned_fn:ident, // name of the unsigned division function
$signed_fn:ident, // name of the signed division function
$uX:ident, // unsigned integer type for the inputs and outputs of `$unsigned_name`
$iX:ident, // signed integer type for the inputs and outputs of `$signed_name`
$($attr:tt),* // attributes
) => {
intrinsics! {
$(
#[$attr]
)*
/// Returns `n / d`
pub extern "C" fn $signed_fn(a: $iX, b: $iX) -> $iX {
let a_neg = a < 0;
let b_neg = b < 0;
let mut a = a;
let mut b = b;
if a_neg {
a = a.wrapping_neg();
}
if b_neg {
b = b.wrapping_neg();
}
let t = $unsigned_fn(a as $uX, b as $uX) as $iX;
if a_neg != b_neg {
t.wrapping_neg()
} else {
t
}
}
}
}
}
macro_rules! smod {
(
$unsigned_fn:ident, // name of the unsigned division function
$signed_fn:ident, // name of the signed division function
$uX:ident, // unsigned integer type for the inputs and outputs of `$unsigned_name`
$iX:ident, // signed integer type for the inputs and outputs of `$signed_name`
$($attr:tt),* // attributes
) => {
intrinsics! {
$(
#[$attr]
)*
/// Returns `n % d`
pub extern "C" fn $signed_fn(a: $iX, b: $iX) -> $iX {
let a_neg = a < 0;
let b_neg = b < 0;
let mut a = a;
let mut b = b;
if a_neg {
a = a.wrapping_neg();
}
if b_neg {
b = b.wrapping_neg();
}
let r = $unsigned_fn(a as $uX, b as $uX) as $iX;
if a_neg {
r.wrapping_neg()
} else {
r
}
}
}
}
}
sdivmod!(
__udivmodsi4,
__divmodsi4,
u32,
i32,
maybe_use_optimized_c_shim
);
// The `#[arm_aeabi_alias = __aeabi_idiv]` attribute cannot be made to work with `intrinsics!` in macros
intrinsics! {
#[maybe_use_optimized_c_shim]
#[arm_aeabi_alias = __aeabi_idiv]
/// Returns `n / d`
pub extern "C" fn __divsi3(a: i32, b: i32) -> i32 {
i32_div_rem(a, b).0
}
#[maybe_use_optimized_c_shim]
/// Returns `n % d`
pub extern "C" fn __modsi3(a: i32, b: i32) -> i32 {
i32_div_rem(a, b).1
}
#[maybe_use_optimized_c_shim]
/// Returns `n / d` and sets `*rem = n % d`
pub extern "C" fn __divmodsi4(a: i32, b: i32, rem: &mut i32) -> i32 {
let quo_rem = i32_div_rem(a, b);
*rem = quo_rem.1;
quo_rem.0
}
#[maybe_use_optimized_c_shim]
/// Returns `n / d`
pub extern "C" fn __divdi3(a: i64, b: i64) -> i64 {
i64_div_rem(a, b).0
}
#[maybe_use_optimized_c_shim]
/// Returns `n % d`
pub extern "C" fn __moddi3(a: i64, b: i64) -> i64 {
i64_div_rem(a, b).1
}
#[maybe_use_optimized_c_shim]
/// Returns `n / d` and sets `*rem = n % d`
pub extern "C" fn __divmoddi4(a: i64, b: i64, rem: &mut i64) -> i64 {
let quo_rem = i64_div_rem(a, b);
*rem = quo_rem.1;
quo_rem.0
}
#[win64_128bit_abi_hack]
/// Returns `n / d`
pub extern "C" fn __divti3(a: i128, b: i128) -> i128 {
i128_div_rem(a, b).0
}
#[win64_128bit_abi_hack]
/// Returns `n % d`
pub extern "C" fn __modti3(a: i128, b: i128) -> i128 {
i128_div_rem(a, b).1
}
// LLVM does not currently have a `__divmodti4` function, but GCC does
#[maybe_use_optimized_c_shim]
/// Returns `n / d` and sets `*rem = n % d`
pub extern "C" fn __divmodti4(a: i128, b: i128, rem: &mut i128) -> i128 {
let quo_rem = i128_div_rem(a, b);
*rem = quo_rem.1;
quo_rem.0
let a_neg = a < 0;
let b_neg = b < 0;
let mut a = a;
let mut b = b;
if a_neg {
a = a.wrapping_neg();
}
if b_neg {
b = b.wrapping_neg();
}
let t = __udivsi3(a as u32, b as u32) as i32;
if a_neg != b_neg {
t.wrapping_neg()
} else {
t
}
}
}
smod!(__umodsi3, __modsi3, u32, i32, maybe_use_optimized_c_shim);
sdivmod!(
__udivmoddi4,
__divmoddi4,
u64,
i64,
maybe_use_optimized_c_shim
);
sdiv!(__udivdi3, __divdi3, u64, i64, maybe_use_optimized_c_shim);
smod!(__umoddi3, __moddi3, u64, i64, maybe_use_optimized_c_shim);
// LLVM does not currently have a `__divmodti4` function, but GCC does
sdivmod!(
__udivmodti4,
__divmodti4,
u128,
i128,
maybe_use_optimized_c_shim
);
sdiv!(__udivti3, __divti3, u128, i128, win64_128bit_abi_hack);
smod!(__umodti3, __modti3, u128, i128, win64_128bit_abi_hack);

View file

@ -1,4 +1,4 @@
/// Creates unsigned and signed division functions optimized for dividing integers with the same
/// Creates an unsigned division function optimized for dividing integers with the same
/// bitwidth as the largest operand in an asymmetrically sized division. For example, x86-64 has an
/// assembly instruction that can divide a 128 bit integer by a 64 bit integer if the quotient fits
/// in 64 bits. The 128 bit version of this algorithm would use that fast hardware division to
@ -6,25 +6,18 @@
#[macro_export]
macro_rules! impl_asymmetric {
(
$unsigned_name:ident, // name of the unsigned division function
$signed_name:ident, // name of the signed division function
$fn:ident, // name of the unsigned division function
$zero_div_fn:ident, // function called when division by zero is attempted
$half_division:ident, // function for division of a $uX by a $uX
$asymmetric_division:ident, // function for division of a $uD by a $uX
$n_h:expr, // the number of bits in a $iH or $uH
$uH:ident, // unsigned integer with half the bit width of $uX
$uX:ident, // unsigned integer with half the bit width of $uD
$uD:ident, // unsigned integer type for the inputs and outputs of `$unsigned_name`
$iD:ident, // signed integer type for the inputs and outputs of `$signed_name`
$($unsigned_attr:meta),*; // attributes for the unsigned function
$($signed_attr:meta),* // attributes for the signed function
$uD:ident // unsigned integer type for the inputs and outputs of `$fn`
) => {
/// Computes the quotient and remainder of `duo` divided by `div` and returns them as a
/// tuple.
$(
#[$unsigned_attr]
)*
pub fn $unsigned_name(duo: $uD, div: $uD) -> ($uD,$uD) {
pub fn $fn(duo: $uD, div: $uD) -> ($uD, $uD) {
let n: u32 = $n_h * 2;
let duo_lo = duo as $uX;
@ -38,14 +31,14 @@ macro_rules! impl_asymmetric {
if duo_hi < div_lo {
// `$uD` by `$uX` division with a quotient that will fit into a `$uX`
let (quo, rem) = unsafe { $asymmetric_division(duo, div_lo) };
return (quo as $uD, rem as $uD)
return (quo as $uD, rem as $uD);
} else {
// Short division using the $uD by $uX division
let (quo_hi, rem_hi) = $half_division(duo_hi, div_lo);
let tmp = unsafe {
$asymmetric_division((duo_lo as $uD) | ((rem_hi as $uD) << n), div_lo)
};
return ((tmp.0 as $uD) | ((quo_hi as $uD) << n), tmp.1 as $uD)
return ((tmp.0 as $uD) | ((quo_hi as $uD) << n), tmp.1 as $uD);
}
}
@ -57,9 +50,7 @@ macro_rules! impl_asymmetric {
let div_lz = div_hi.leading_zeros();
let div_extra = n - div_lz;
let div_sig_n = (div >> div_extra) as $uX;
let tmp = unsafe {
$asymmetric_division(duo >> 1, div_sig_n)
};
let tmp = unsafe { $asymmetric_division(duo >> 1, div_sig_n) };
let mut quo = tmp.0 >> ((n - 1) - div_lz);
if quo != 0 {
@ -72,33 +63,7 @@ macro_rules! impl_asymmetric {
quo += 1;
rem -= div;
}
return (quo as $uD, rem)
return (quo as $uD, rem);
}
/// Computes the quotient and remainder of `duo` divided by `div` and returns them as a
/// tuple.
$(
#[$signed_attr]
)*
pub fn $signed_name(duo: $iD, div: $iD) -> ($iD, $iD) {
match (duo < 0, div < 0) {
(false, false) => {
let t = $unsigned_name(duo as $uD, div as $uD);
(t.0 as $iD, t.1 as $iD)
},
(true, false) => {
let t = $unsigned_name(duo.wrapping_neg() as $uD, div as $uD);
((t.0 as $iD).wrapping_neg(), (t.1 as $iD).wrapping_neg())
},
(false, true) => {
let t = $unsigned_name(duo as $uD, div.wrapping_neg() as $uD);
((t.0 as $iD).wrapping_neg(), t.1 as $iD)
},
(true, true) => {
let t = $unsigned_name(duo.wrapping_neg() as $uD, div.wrapping_neg() as $uD);
(t.0 as $iD, (t.1 as $iD).wrapping_neg())
},
}
}
}
};
}

View file

@ -1,4 +1,4 @@
/// Creates unsigned and signed division functions that use binary long division, designed for
/// Creates an unsigned division function that uses binary long division, designed for
/// computer architectures without division instructions. These functions have good performance for
/// microarchitectures with large branch miss penalties and architectures without the ability to
/// predicate instructions. For architectures with predicated instructions, one of the algorithms
@ -7,29 +7,23 @@
#[macro_export]
macro_rules! impl_binary_long {
(
$unsigned_name:ident, // name of the unsigned division function
$signed_name:ident, // name of the signed division function
$fn:ident, // name of the unsigned division function
$zero_div_fn:ident, // function called when division by zero is attempted
$normalization_shift:ident, // function for finding the normalization shift
$n:tt, // the number of bits in a $iX or $uX
$uX:ident, // unsigned integer type for the inputs and outputs of `$unsigned_name`
$iX:ident, // signed integer type for the inputs and outputs of `$signed_name`
$($unsigned_attr:meta),*; // attributes for the unsigned function
$($signed_attr:meta),* // attributes for the signed function
$uX:ident, // unsigned integer type for the inputs and outputs of `$fn`
$iX:ident // signed integer type with same bitwidth as `$uX`
) => {
/// Computes the quotient and remainder of `duo` divided by `div` and returns them as a
/// tuple.
$(
#[$unsigned_attr]
)*
pub fn $unsigned_name(duo: $uX, div: $uX) -> ($uX, $uX) {
pub fn $fn(duo: $uX, div: $uX) -> ($uX, $uX) {
let mut duo = duo;
// handle edge cases before calling `$normalization_shift`
if div == 0 {
$zero_div_fn()
}
if duo < div {
return (0, duo)
return (0, duo);
}
// There are many variations of binary division algorithm that could be used. This
@ -430,7 +424,7 @@ macro_rules! impl_binary_long {
let mut i = shl;
loop {
if i == 0 {
break
break;
}
i -= 1;
// shift left 1 and subtract
@ -550,47 +544,5 @@ macro_rules! impl_binary_long {
return ((duo & mask) | quo, duo >> shl);
*/
}
/// Computes the quotient and remainder of `duo` divided by `div` and returns them as a
/// tuple.
$(
#[$signed_attr]
)*
pub fn $signed_name(duo: $iX, div: $iX) -> ($iX, $iX) {
// There is a way of doing this without any branches, but requires too many extra
// operations to be faster.
/*
let duo_s = duo >> ($n - 1);
let div_s = div >> ($n - 1);
let duo = (duo ^ duo_s).wrapping_sub(duo_s);
let div = (div ^ div_s).wrapping_sub(div_s);
let quo_s = duo_s ^ div_s;
let rem_s = duo_s;
let tmp = $unsigned_name(duo as $uX, div as $uX);
(
((tmp.0 as $iX) ^ quo_s).wrapping_sub(quo_s),
((tmp.1 as $iX) ^ rem_s).wrapping_sub(rem_s),
)
*/
match (duo < 0, div < 0) {
(false, false) => {
let t = $unsigned_name(duo as $uX, div as $uX);
(t.0 as $iX, t.1 as $iX)
},
(true, false) => {
let t = $unsigned_name(duo.wrapping_neg() as $uX, div as $uX);
((t.0 as $iX).wrapping_neg(), (t.1 as $iX).wrapping_neg())
},
(false, true) => {
let t = $unsigned_name(duo as $uX, div.wrapping_neg() as $uX);
((t.0 as $iX).wrapping_neg(), t.1 as $iX)
},
(true, true) => {
let t = $unsigned_name(duo.wrapping_neg() as $uX, div.wrapping_neg() as $uX);
(t.0 as $iX, (t.1 as $iX).wrapping_neg())
},
}
}
}
};
}

View file

@ -1,29 +1,23 @@
/// Creates unsigned and signed division functions that use a combination of hardware division and
/// Creates an unsigned division function that uses a combination of hardware division and
/// binary long division to divide integers larger than what hardware division by itself can do. This
/// function is intended for microarchitectures that have division hardware, but not fast enough
/// multiplication hardware for `impl_trifecta` to be faster.
#[macro_export]
macro_rules! impl_delegate {
(
$unsigned_name:ident, // name of the unsigned division function
$signed_name:ident, // name of the signed division function
$fn:ident, // name of the unsigned division function
$zero_div_fn:ident, // function called when division by zero is attempted
$half_normalization_shift:ident, // function for finding the normalization shift of $uX
$half_division:ident, // function for division of a $uX by a $uX
$n_h:expr, // the number of bits in $iH or $uH
$uH:ident, // unsigned integer with half the bit width of $uX
$uX:ident, // unsigned integer with half the bit width of $uD.
$uD:ident, // unsigned integer type for the inputs and outputs of `$unsigned_name`
$iD:ident, // signed integer type for the inputs and outputs of `$signed_name`
$($unsigned_attr:meta),*; // attributes for the unsigned function
$($signed_attr:meta),* // attributes for the signed function
$uD:ident, // unsigned integer type for the inputs and outputs of `$fn`
$iD:ident // signed integer type with the same bitwidth as `$uD`
) => {
/// Computes the quotient and remainder of `duo` divided by `div` and returns them as a
/// tuple.
$(
#[$unsigned_attr]
)*
pub fn $unsigned_name(duo: $uD, div: $uD) -> ($uD, $uD) {
pub fn $fn(duo: $uD, div: $uD) -> ($uD, $uD) {
// The two possibility algorithm, undersubtracting long division algorithm, or any kind
// of reciprocal based algorithm will not be fastest, because they involve large
// multiplications that we assume to not be fast enough relative to the divisions to
@ -38,17 +32,15 @@ macro_rules! impl_delegate {
let div_hi = (div >> n) as $uX;
match (div_lo == 0, div_hi == 0, duo_hi == 0) {
(true, true, _) => {
$zero_div_fn()
}
(true, true, _) => $zero_div_fn(),
(_, false, true) => {
// `duo` < `div`
return (0, duo)
return (0, duo);
}
(false, true, true) => {
// delegate to smaller division
let tmp = $half_division(duo_lo, div_lo);
return (tmp.0 as $uD, tmp.1 as $uD)
return (tmp.0 as $uD, tmp.1 as $uD);
}
(false, true, false) => {
if duo_hi < div_lo {
@ -96,7 +88,7 @@ macro_rules! impl_delegate {
// Delegate to get the rest of the quotient. Note that the
// `div_lo` here is the original unshifted `div`.
let tmp = $half_division(duo as $uX, div_lo);
return ((quo_lo | tmp.0) as $uD, tmp.1 as $uD)
return ((quo_lo | tmp.0) as $uD, tmp.1 as $uD);
}
}
div >>= 1;
@ -105,7 +97,7 @@ macro_rules! impl_delegate {
} else if duo_hi == div_lo {
// `quo_hi == 1`. This branch is cheap and helps with edge cases.
let tmp = $half_division(duo as $uX, div as $uX);
return ((1 << n) | (tmp.0 as $uD), tmp.1 as $uD)
return ((1 << n) | (tmp.0 as $uD), tmp.1 as $uD);
} else {
// `div_lo < duo_hi`
// `rem_hi == 0`
@ -114,22 +106,16 @@ macro_rules! impl_delegate {
let div_0 = div_lo as $uH as $uX;
let (quo_hi, rem_3) = $half_division(duo_hi, div_0);
let duo_mid =
((duo >> $n_h) as $uH as $uX)
| (rem_3 << $n_h);
let duo_mid = ((duo >> $n_h) as $uH as $uX) | (rem_3 << $n_h);
let (quo_1, rem_2) = $half_division(duo_mid, div_0);
let duo_lo =
(duo as $uH as $uX)
| (rem_2 << $n_h);
let duo_lo = (duo as $uH as $uX) | (rem_2 << $n_h);
let (quo_0, rem_1) = $half_division(duo_lo, div_0);
return (
(quo_0 as $uD)
| ((quo_1 as $uD) << $n_h)
| ((quo_hi as $uD) << n),
rem_1 as $uD
)
(quo_0 as $uD) | ((quo_1 as $uD) << $n_h) | ((quo_hi as $uD) << n),
rem_1 as $uD,
);
}
// This is basically a short division composed of a half division for the hi
@ -161,7 +147,7 @@ macro_rules! impl_delegate {
let tmp = $half_division(duo as $uX, div_lo);
return (
(tmp.0) as $uD | (quo_lo as $uD) | ((quo_hi as $uD) << n),
tmp.1 as $uD
tmp.1 as $uD,
);
}
}
@ -187,7 +173,7 @@ macro_rules! impl_delegate {
duo = sub;
quo_lo |= pow_lo;
if duo < div_original {
return (quo_lo as $uD, duo)
return (quo_lo as $uD, duo);
}
}
div >>= 1;
@ -196,31 +182,5 @@ macro_rules! impl_delegate {
}
}
}
/// Computes the quotient and remainder of `duo` divided by `div` and returns them as a
/// tuple.
$(
#[$signed_attr]
)*
pub fn $signed_name(duo: $iD, div: $iD) -> ($iD, $iD) {
match (duo < 0, div < 0) {
(false, false) => {
let t = $unsigned_name(duo as $uD, div as $uD);
(t.0 as $iD, t.1 as $iD)
},
(true, false) => {
let t = $unsigned_name(duo.wrapping_neg() as $uD, div as $uD);
((t.0 as $iD).wrapping_neg(), (t.1 as $iD).wrapping_neg())
},
(false, true) => {
let t = $unsigned_name(duo as $uD, div.wrapping_neg() as $uD);
((t.0 as $iD).wrapping_neg(), t.1 as $iD)
},
(true, true) => {
let t = $unsigned_name(duo.wrapping_neg() as $uD, div.wrapping_neg() as $uD);
(t.0 as $iD, (t.1 as $iD).wrapping_neg())
},
}
}
}
};
}

View file

@ -111,13 +111,6 @@ fn u64_by_u64_div_rem(duo: u64, div: u64) -> (u64, u64) {
zero_div_fn()
}
// `inline(never)` is placed on unsigned division functions so that there are just three division
// functions (`u32_div_rem`, `u64_div_rem`, and `u128_div_rem`) backing all `compiler-builtins`
// division functions. The signed functions like `i32_div_rem` will get inlined into the
// `compiler-builtins` signed division functions, so that they directly call the three division
// functions. Otherwise, LLVM may try to inline the unsigned division functions 4 times into the
// signed division functions, which results in an explosion in code size.
// Whether `trifecta` or `delegate` is faster for 128 bit division depends on the speed at which a
// microarchitecture can multiply and divide. We decide to be optimistic and assume `trifecta` is
// faster if the target pointer width is at least 64.
@ -127,16 +120,12 @@ fn u64_by_u64_div_rem(duo: u64, div: u64) -> (u64, u64) {
))]
impl_trifecta!(
u128_div_rem,
i128_div_rem,
zero_div_fn,
u64_by_u64_div_rem,
32,
u32,
u64,
u128,
i128,
inline(never);
inline
u128
);
// If the pointer width less than 64, then the target architecture almost certainly does not have
@ -147,7 +136,6 @@ impl_trifecta!(
))]
impl_delegate!(
u128_div_rem,
i128_div_rem,
zero_div_fn,
u64_normalization_shift,
u64_by_u64_div_rem,
@ -155,9 +143,7 @@ impl_delegate!(
u32,
u64,
u128,
i128,
inline(never);
inline
i128
);
/// Divides `duo` by `div` and returns a tuple of the quotient and the remainder.
@ -191,17 +177,13 @@ unsafe fn u128_by_u64_div_rem(duo: u128, div: u64) -> (u64, u64) {
#[cfg(all(feature = "asm", target_arch = "x86_64"))]
impl_asymmetric!(
u128_div_rem,
i128_div_rem,
zero_div_fn,
u64_by_u64_div_rem,
u128_by_u64_div_rem,
32,
u32,
u64,
u128,
i128,
inline(never);
inline
u128
);
/// Divides `duo` by `div` and returns a tuple of the quotient and the remainder.
@ -226,7 +208,6 @@ fn u32_by_u32_div_rem(duo: u32, div: u32) -> (u32, u32) {
))]
impl_delegate!(
u64_div_rem,
i64_div_rem,
zero_div_fn,
u32_normalization_shift,
u32_by_u32_div_rem,
@ -234,9 +215,7 @@ impl_delegate!(
u16,
u32,
u64,
i64,
inline(never);
inline
i64
);
// When not on x86 and the pointer width is 64, use `binary_long`.
@ -246,14 +225,11 @@ impl_delegate!(
))]
impl_binary_long!(
u64_div_rem,
i64_div_rem,
zero_div_fn,
u64_normalization_shift,
64,
u64,
i64,
inline(never);
inline
i64
);
/// Divides `duo` by `div` and returns a tuple of the quotient and the remainder.
@ -287,28 +263,21 @@ unsafe fn u64_by_u32_div_rem(duo: u64, div: u32) -> (u32, u32) {
#[cfg(all(feature = "asm", target_arch = "x86"))]
impl_asymmetric!(
u64_div_rem,
i64_div_rem,
zero_div_fn,
u32_by_u32_div_rem,
u64_by_u32_div_rem,
16,
u16,
u32,
u64,
i64,
inline(never);
inline
u64
);
// 32 bits is the smallest division used by `compiler-builtins`, so we end with binary long division
impl_binary_long!(
u32_div_rem,
i32_div_rem,
zero_div_fn,
u32_normalization_shift,
32,
u32,
i32,
inline(never);
inline
i32
);

View file

@ -1,28 +1,21 @@
/// Creates unsigned and signed division functions optimized for division of integers with bitwidths
/// Creates an unsigned division function optimized for division of integers with bitwidths
/// larger than the largest hardware integer division supported. These functions use large radix
/// division algorithms that require both fast division and very fast widening multiplication on the
/// target microarchitecture. Otherwise, `impl_delegate` should be used instead.
#[macro_export]
macro_rules! impl_trifecta {
(
$unsigned_name:ident, // name of the unsigned division function
$signed_name:ident, // name of the signed division function
$fn:ident, // name of the unsigned division function
$zero_div_fn:ident, // function called when division by zero is attempted
$half_division:ident, // function for division of a $uX by a $uX
$n_h:expr, // the number of bits in $iH or $uH
$uH:ident, // unsigned integer with half the bit width of $uX
$uX:ident, // unsigned integer with half the bit width of $uD
$uD:ident, // unsigned integer type for the inputs and outputs of `$unsigned_name`
$iD:ident, // signed integer type for the inputs and outputs of `$signed_name`
$($unsigned_attr:meta),*; // attributes for the unsigned function
$($signed_attr:meta),* // attributes for the signed function
$uD:ident // unsigned integer type for the inputs and outputs of `$unsigned_name`
) => {
/// Computes the quotient and remainder of `duo` divided by `div` and returns them as a
/// tuple.
$(
#[$unsigned_attr]
)*
pub fn $unsigned_name(duo: $uD, div: $uD) -> ($uD, $uD) {
pub fn $fn(duo: $uD, div: $uD) -> ($uD, $uD) {
// This is called the trifecta algorithm because it uses three main algorithms: short
// division for small divisors, the two possibility algorithm for large divisors, and an
// undersubtracting long division algorithm for intermediate cases.
@ -34,7 +27,9 @@ macro_rules! impl_trifecta {
(tmp as $uX, (tmp >> ($n_h * 2)) as $uX)
}
fn carrying_mul_add(lhs: $uX, mul: $uX, add: $uX) -> ($uX, $uX) {
let tmp = (lhs as $uD).wrapping_mul(mul as $uD).wrapping_add(add as $uD);
let tmp = (lhs as $uD)
.wrapping_mul(mul as $uD)
.wrapping_add(add as $uD);
(tmp as $uX, (tmp >> ($n_h * 2)) as $uX)
}
@ -62,9 +57,9 @@ macro_rules! impl_trifecta {
// The quotient cannot be more than 1. The highest set bit of `duo` needs to be at
// least one place higher than `div` for the quotient to be more than 1.
if duo >= div {
return (1, duo - div)
return (1, duo - div);
} else {
return (0, duo)
return (0, duo);
}
}
@ -76,10 +71,7 @@ macro_rules! impl_trifecta {
// `duo < 2^n` so it will fit in a $uX. `div` will also fit in a $uX (because of the
// `div_lz <= duo_lz` branch) so no numerical error.
let (quo, rem) = $half_division(duo as $uX, div as $uX);
return (
quo as $uD,
rem as $uD
)
return (quo as $uD, rem as $uD);
}
// `{2^n, 2^div_sb} <= duo < 2^n_d`
@ -99,22 +91,16 @@ macro_rules! impl_trifecta {
let div_0 = div as $uH as $uX;
let (quo_hi, rem_3) = $half_division(duo_hi, div_0);
let duo_mid =
((duo >> $n_h) as $uH as $uX)
| (rem_3 << $n_h);
let duo_mid = ((duo >> $n_h) as $uH as $uX) | (rem_3 << $n_h);
let (quo_1, rem_2) = $half_division(duo_mid, div_0);
let duo_lo =
(duo as $uH as $uX)
| (rem_2 << $n_h);
let duo_lo = (duo as $uH as $uX) | (rem_2 << $n_h);
let (quo_0, rem_1) = $half_division(duo_lo, div_0);
return (
(quo_0 as $uD)
| ((quo_1 as $uD) << $n_h)
| ((quo_hi as $uD) << n),
rem_1 as $uD
)
(quo_0 as $uD) | ((quo_1 as $uD) << $n_h) | ((quo_hi as $uD) << n),
rem_1 as $uD,
);
}
// relative leading significant bits, cannot overflow because of above branches
@ -237,13 +223,10 @@ macro_rules! impl_trifecta {
(quo - 1) as $uD,
// Both the addition and subtraction can overflow, but when combined end up
// as a correct positive number.
duo.wrapping_add(div).wrapping_sub(tmp)
)
duo.wrapping_add(div).wrapping_sub(tmp),
);
} else {
return (
quo as $uD,
duo - tmp
)
return (quo as $uD, duo - tmp);
}
}
@ -372,13 +355,10 @@ macro_rules! impl_trifecta {
if duo < tmp {
return (
quo + ((quo_part - 1) as $uD),
duo.wrapping_add(div).wrapping_sub(tmp)
)
duo.wrapping_add(div).wrapping_sub(tmp),
);
} else {
return (
quo + (quo_part as $uD),
duo - tmp
)
return (quo + (quo_part as $uD), duo - tmp);
}
}
@ -387,15 +367,9 @@ macro_rules! impl_trifecta {
if div_lz <= duo_lz {
// quotient can have 0 or 1 added to it
if div <= duo {
return (
quo + 1,
duo - div
)
return (quo + 1, duo - div);
} else {
return (
quo,
duo
)
return (quo, duo);
}
}
@ -404,38 +378,9 @@ macro_rules! impl_trifecta {
if n <= duo_lz {
// simple division and addition
let tmp = $half_division(duo as $uX, div as $uX);
return (
quo + (tmp.0 as $uD),
tmp.1 as $uD
)
return (quo + (tmp.0 as $uD), tmp.1 as $uD);
}
}
}
/// Computes the quotient and remainder of `duo` divided by `div` and returns them as a
/// tuple.
$(
#[$signed_attr]
)*
pub fn $signed_name(duo: $iD, div: $iD) -> ($iD, $iD) {
match (duo < 0, div < 0) {
(false, false) => {
let t = $unsigned_name(duo as $uD, div as $uD);
(t.0 as $iD, t.1 as $iD)
},
(true, false) => {
let t = $unsigned_name(duo.wrapping_neg() as $uD, div as $uD);
((t.0 as $iD).wrapping_neg(), (t.1 as $iD).wrapping_neg())
},
(false, true) => {
let t = $unsigned_name(duo as $uD, div.wrapping_neg() as $uD);
((t.0 as $iD).wrapping_neg(), t.1 as $iD)
},
(true, true) => {
let t = $unsigned_name(duo.wrapping_neg() as $uD, div.wrapping_neg() as $uD);
(t.0 as $iD, (t.1 as $iD).wrapping_neg())
},
}
}
}
};
}