From 5a2da96a44abad6be752c7ee1cac173aa1ca2f7c Mon Sep 17 00:00:00 2001 From: Trevor Gross Date: Mon, 9 Dec 2024 09:25:22 +0000 Subject: [PATCH] dec2flt: Update documentation of existing methods Fix or elaborate existing float parsing documentation. This includes introducing a convention that should make naming more consistent. --- library/core/src/num/dec2flt/common.rs | 14 ++++++++------ library/core/src/num/dec2flt/decimal.rs | 22 +++++++++++++--------- library/core/src/num/dec2flt/mod.rs | 16 ++++++++++++++-- 3 files changed, 35 insertions(+), 17 deletions(-) diff --git a/library/core/src/num/dec2flt/common.rs b/library/core/src/num/dec2flt/common.rs index 4dadf406ae8c..1646d3a95d3b 100644 --- a/library/core/src/num/dec2flt/common.rs +++ b/library/core/src/num/dec2flt/common.rs @@ -8,12 +8,12 @@ pub(crate) trait ByteSlice { /// Writes a 64-bit integer as 8 bytes in little-endian order. fn write_u64(&mut self, value: u64); - /// Calculate the offset of a slice from another. + /// Calculate the difference in length between two slices. fn offset_from(&self, other: &Self) -> isize; /// Iteratively parse and consume digits from bytes. - /// Returns the same bytes with consumed digits being - /// elided. + /// + /// Returns the same bytes with consumed digits being elided. Breaks on invalid digits. fn parse_digits(&self, func: impl FnMut(u8)) -> &Self; } @@ -39,11 +39,11 @@ impl ByteSlice for [u8] { fn parse_digits(&self, mut func: impl FnMut(u8)) -> &Self { let mut s = self; - while let Some((c, s_next)) = s.split_first() { + while let Some((c, rest)) = s.split_first() { let c = c.wrapping_sub(b'0'); if c < 10 { func(c); - s = s_next; + s = rest; } else { break; } @@ -53,7 +53,9 @@ impl ByteSlice for [u8] { } } -/// Determine if 8 bytes are all decimal digits. +/// Determine if all characters in an 8-byte byte string (represented as a `u64`) are all decimal +/// digits. +/// /// This does not care about the order in which the bytes were loaded. pub(crate) fn is_8digits(v: u64) -> bool { let a = v.wrapping_add(0x4646_4646_4646_4646); diff --git a/library/core/src/num/dec2flt/decimal.rs b/library/core/src/num/dec2flt/decimal.rs index b37724ba62d5..a84e11ec8e71 100644 --- a/library/core/src/num/dec2flt/decimal.rs +++ b/library/core/src/num/dec2flt/decimal.rs @@ -1,4 +1,4 @@ -//! Arbitrary-precision decimal class for fallback algorithms. +//! Arbitrary-precision decimal type used by fallback algorithms. //! //! This is only used if the fast-path (native floats) and //! the Eisel-Lemire algorithm are unable to unambiguously @@ -11,6 +11,7 @@ use crate::num::dec2flt::common::{ByteSlice, is_8digits}; +/// A decimal floating-point number. #[derive(Clone)] pub(super) struct Decimal { /// The number of significant digits in the decimal. @@ -30,18 +31,17 @@ impl Default for Decimal { } impl Decimal { - /// The maximum number of digits required to unambiguously round a float. + /// The maximum number of digits required to unambiguously round up to a 64-bit float. /// - /// For a double-precision IEEE 754 float, this required 767 digits, - /// so we store the max digits + 1. + /// For an IEEE 754 binary64 float, this required 767 digits. So we store the max digits + 1. /// /// We can exactly represent a float in radix `b` from radix 2 if /// `b` is divisible by 2. This function calculates the exact number of /// digits required to exactly represent that float. /// /// According to the "Handbook of Floating Point Arithmetic", - /// for IEEE754, with emin being the min exponent, p2 being the - /// precision, and b being the radix, the number of digits follows as: + /// for IEEE754, with `emin` being the min exponent, `p2` being the + /// precision, and `b` being the radix, the number of digits follows as: /// /// `−emin + p2 + ⌊(emin + 1) log(2, b) − log(1 − 2^(−p2), b)⌋` /// @@ -56,11 +56,14 @@ impl Decimal { /// In Python: /// `-emin + p2 + math.floor((emin+ 1)*math.log(2, b)-math.log(1-2**(-p2), b))` pub(super) const MAX_DIGITS: usize = 768; - /// The max digits that can be exactly represented in a 64-bit integer. + /// The max decimal digits that can be exactly represented in a 64-bit integer. pub(super) const MAX_DIGITS_WITHOUT_OVERFLOW: usize = 19; pub(super) const DECIMAL_POINT_RANGE: i32 = 2047; - /// Append a digit to the buffer. + /// Append a digit to the buffer if it fits. + // FIXME(tgross35): it may be better for this to return an option + // FIXME(tgross35): incrementing the digit counter even if we don't push anything + // seems incorrect. pub(super) fn try_add_digit(&mut self, digit: u8) { if self.num_digits < Self::MAX_DIGITS { self.digits[self.num_digits] = digit; @@ -69,6 +72,7 @@ impl Decimal { } /// Trim trailing zeros from the buffer. + // FIXME(tgross35): this could be `.rev().position()` if perf is okay pub(super) fn trim(&mut self) { // All of the following calls to `Decimal::trim` can't panic because: // @@ -86,7 +90,7 @@ impl Decimal { pub(super) fn round(&self) -> u64 { if self.num_digits == 0 || self.decimal_point < 0 { return 0; - } else if self.decimal_point > 18 { + } else if self.decimal_point >= Self::MAX_DIGITS_WITHOUT_OVERFLOW as i32 { return 0xFFFF_FFFF_FFFF_FFFF_u64; } let dp = self.decimal_point as usize; diff --git a/library/core/src/num/dec2flt/mod.rs b/library/core/src/num/dec2flt/mod.rs index 6dca74068453..91bfe1bef2e7 100644 --- a/library/core/src/num/dec2flt/mod.rs +++ b/library/core/src/num/dec2flt/mod.rs @@ -3,8 +3,8 @@ //! # Problem statement //! //! We are given a decimal string such as `12.34e56`. This string consists of integral (`12`), -//! fractional (`34`), and exponent (`56`) parts. All parts are optional and interpreted as zero -//! when missing. +//! fractional (`34`), and exponent (`56`) parts. All parts are optional and interpreted as a +//! default value (1 or 0) when missing. //! //! We seek the IEEE 754 floating point number that is closest to the exact value of the decimal //! string. It is well-known that many decimal strings do not have terminating representations in @@ -67,6 +67,18 @@ //! "such that the exponent +/- the number of decimal digits fits into a 64 bit integer". //! Larger exponents are accepted, but we don't do arithmetic with them, they are immediately //! turned into {positive,negative} {zero,infinity}. +//! +//! # Notation +//! +//! This module uses the same notation as the Lemire paper: +//! +//! - `m`: binary mantissa; always nonnegative +//! - `p`: binary exponent; a signed integer +//! - `w`: decimal significand; always nonnegative +//! - `q`: decimal exponent; a signed integer +//! +//! This gives `m * 2^p` for the binary floating-point number, with `w * 10^q` as the decimal +//! equivalent. #![doc(hidden)] #![unstable(