From 5a2da96a44abad6be752c7ee1cac173aa1ca2f7c Mon Sep 17 00:00:00 2001
From: Trevor Gross <tmgross@umich.edu>
Date: Mon, 9 Dec 2024 09:25:22 +0000
Subject: [PATCH] dec2flt: Update documentation of existing methods

Fix or elaborate existing float parsing documentation. This includes
introducing a convention that should make naming more consistent.
---
 library/core/src/num/dec2flt/common.rs  | 14 ++++++++------
 library/core/src/num/dec2flt/decimal.rs | 22 +++++++++++++---------
 library/core/src/num/dec2flt/mod.rs     | 16 ++++++++++++++--
 3 files changed, 35 insertions(+), 17 deletions(-)

diff --git a/library/core/src/num/dec2flt/common.rs b/library/core/src/num/dec2flt/common.rs
index 4dadf406ae8c..1646d3a95d3b 100644
--- a/library/core/src/num/dec2flt/common.rs
+++ b/library/core/src/num/dec2flt/common.rs
@@ -8,12 +8,12 @@ pub(crate) trait ByteSlice {
     /// Writes a 64-bit integer as 8 bytes in little-endian order.
     fn write_u64(&mut self, value: u64);
 
-    /// Calculate the offset of a slice from another.
+    /// Calculate the difference in length between two slices.
     fn offset_from(&self, other: &Self) -> isize;
 
     /// Iteratively parse and consume digits from bytes.
-    /// Returns the same bytes with consumed digits being
-    /// elided.
+    ///
+    /// Returns the same bytes with consumed digits being elided. Breaks on invalid digits.
     fn parse_digits(&self, func: impl FnMut(u8)) -> &Self;
 }
 
@@ -39,11 +39,11 @@ impl ByteSlice for [u8] {
     fn parse_digits(&self, mut func: impl FnMut(u8)) -> &Self {
         let mut s = self;
 
-        while let Some((c, s_next)) = s.split_first() {
+        while let Some((c, rest)) = s.split_first() {
             let c = c.wrapping_sub(b'0');
             if c < 10 {
                 func(c);
-                s = s_next;
+                s = rest;
             } else {
                 break;
             }
@@ -53,7 +53,9 @@ impl ByteSlice for [u8] {
     }
 }
 
-/// Determine if 8 bytes are all decimal digits.
+/// Determine if all characters in an 8-byte byte string (represented as a `u64`) are all decimal
+/// digits.
+///
 /// This does not care about the order in which the bytes were loaded.
 pub(crate) fn is_8digits(v: u64) -> bool {
     let a = v.wrapping_add(0x4646_4646_4646_4646);
diff --git a/library/core/src/num/dec2flt/decimal.rs b/library/core/src/num/dec2flt/decimal.rs
index b37724ba62d5..a84e11ec8e71 100644
--- a/library/core/src/num/dec2flt/decimal.rs
+++ b/library/core/src/num/dec2flt/decimal.rs
@@ -1,4 +1,4 @@
-//! Arbitrary-precision decimal class for fallback algorithms.
+//! Arbitrary-precision decimal type used by fallback algorithms.
 //!
 //! This is only used if the fast-path (native floats) and
 //! the Eisel-Lemire algorithm are unable to unambiguously
@@ -11,6 +11,7 @@
 
 use crate::num::dec2flt::common::{ByteSlice, is_8digits};
 
+/// A decimal floating-point number.
 #[derive(Clone)]
 pub(super) struct Decimal {
     /// The number of significant digits in the decimal.
@@ -30,18 +31,17 @@ impl Default for Decimal {
 }
 
 impl Decimal {
-    /// The maximum number of digits required to unambiguously round a float.
+    /// The maximum number of digits required to unambiguously round up to a 64-bit float.
     ///
-    /// For a double-precision IEEE 754 float, this required 767 digits,
-    /// so we store the max digits + 1.
+    /// For an IEEE 754 binary64 float, this required 767 digits. So we store the max digits + 1.
     ///
     /// We can exactly represent a float in radix `b` from radix 2 if
     /// `b` is divisible by 2. This function calculates the exact number of
     /// digits required to exactly represent that float.
     ///
     /// According to the "Handbook of Floating Point Arithmetic",
-    /// for IEEE754, with emin being the min exponent, p2 being the
-    /// precision, and b being the radix, the number of digits follows as:
+    /// for IEEE754, with `emin` being the min exponent, `p2` being the
+    /// precision, and `b` being the radix, the number of digits follows as:
     ///
     /// `−emin + p2 + ⌊(emin + 1) log(2, b) − log(1 − 2^(−p2), b)⌋`
     ///
@@ -56,11 +56,14 @@ impl Decimal {
     /// In Python:
     ///     `-emin + p2 + math.floor((emin+ 1)*math.log(2, b)-math.log(1-2**(-p2), b))`
     pub(super) const MAX_DIGITS: usize = 768;
-    /// The max digits that can be exactly represented in a 64-bit integer.
+    /// The max decimal digits that can be exactly represented in a 64-bit integer.
     pub(super) const MAX_DIGITS_WITHOUT_OVERFLOW: usize = 19;
     pub(super) const DECIMAL_POINT_RANGE: i32 = 2047;
 
-    /// Append a digit to the buffer.
+    /// Append a digit to the buffer if it fits.
+    // FIXME(tgross35): it may be better for this to return an option
+    // FIXME(tgross35): incrementing the digit counter even if we don't push anything
+    // seems incorrect.
     pub(super) fn try_add_digit(&mut self, digit: u8) {
         if self.num_digits < Self::MAX_DIGITS {
             self.digits[self.num_digits] = digit;
@@ -69,6 +72,7 @@ impl Decimal {
     }
 
     /// Trim trailing zeros from the buffer.
+    // FIXME(tgross35): this could be `.rev().position()` if perf is okay
     pub(super) fn trim(&mut self) {
         // All of the following calls to `Decimal::trim` can't panic because:
         //
@@ -86,7 +90,7 @@ impl Decimal {
     pub(super) fn round(&self) -> u64 {
         if self.num_digits == 0 || self.decimal_point < 0 {
             return 0;
-        } else if self.decimal_point > 18 {
+        } else if self.decimal_point >= Self::MAX_DIGITS_WITHOUT_OVERFLOW as i32 {
             return 0xFFFF_FFFF_FFFF_FFFF_u64;
         }
         let dp = self.decimal_point as usize;
diff --git a/library/core/src/num/dec2flt/mod.rs b/library/core/src/num/dec2flt/mod.rs
index 6dca74068453..91bfe1bef2e7 100644
--- a/library/core/src/num/dec2flt/mod.rs
+++ b/library/core/src/num/dec2flt/mod.rs
@@ -3,8 +3,8 @@
 //! # Problem statement
 //!
 //! We are given a decimal string such as `12.34e56`. This string consists of integral (`12`),
-//! fractional (`34`), and exponent (`56`) parts. All parts are optional and interpreted as zero
-//! when missing.
+//! fractional (`34`), and exponent (`56`) parts. All parts are optional and interpreted as a
+//! default value (1 or 0) when missing.
 //!
 //! We seek the IEEE 754 floating point number that is closest to the exact value of the decimal
 //! string. It is well-known that many decimal strings do not have terminating representations in
@@ -67,6 +67,18 @@
 //! "such that the exponent +/- the number of decimal digits fits into a 64 bit integer".
 //! Larger exponents are accepted, but we don't do arithmetic with them, they are immediately
 //! turned into {positive,negative} {zero,infinity}.
+//!
+//! # Notation
+//!
+//! This module uses the same notation as the Lemire paper:
+//!
+//! - `m`: binary mantissa; always nonnegative
+//! - `p`: binary exponent; a signed integer
+//! - `w`: decimal significand; always nonnegative
+//! - `q`: decimal exponent; a signed integer
+//!
+//! This gives `m * 2^p` for the binary floating-point number, with `w * 10^q` as the decimal
+//! equivalent.
 
 #![doc(hidden)]
 #![unstable(