Accurate decimal-to-float parsing routines.

This commit primarily adds implementations of the algorithms from William Clinger's paper "How to Read Floating Point Numbers Accurately". It also includes a lot of infrastructure necessary for those algorithms, and some unit tests. Since these algorithms reject a few (extreme) inputs that were previously accepted, this could be seen as a [breaking-change]
2015-07-26 17:50:29 +02:00 · 2015-07-26 17:50:29 +02:00 · ba792a4baa
commit ba792a4baa
parent b7e39a1c2d
13 changed files with 2787 additions and 15 deletions
--- a/src/libcoretest/num/dec2flt/mod.rs
+++ b/src/libcoretest/num/dec2flt/mod.rs
@ -0,0 +1,174 @@
+// Copyright 2015 The Rust Project Developers. See the COPYRIGHT
+// file at the top-level directory of this distribution and at
+// http://rust-lang.org/COPYRIGHT.
+//
+// Licensed under the Apache License, Version 2.0 <LICENSE-APACHE or
+// http://www.apache.org/licenses/LICENSE-2.0> or the MIT license
+// <LICENSE-MIT or http://opensource.org/licenses/MIT>, at your
+// option. This file may not be copied, modified, or distributed
+// except according to those terms.
+
+#![allow(overflowing_literals)]
+
+use std::{i64, f32, f64};
+use test;
+use core::num::dec2flt::{to_f32, to_f64};
+
+mod parse;
+mod rawfp;
+
+// Take an float literal, turn it into a string in various ways (that are all trusted
+// to be correct) and see if those strings are parsed back to the value of the literal.
+// Requires a *polymorphic literal*, i.e. one that can serve as f64 as well as f32.
+macro_rules! test_literal {
+    ($x: expr) => ({
+        let x32: f32 = $x;
+        let x64: f64 = $x;
+        let inputs = &[stringify!($x).into(), format!("{:?}", x64), format!("{:e}", x64)];
+        for input in inputs {
+            if input != "inf" {
+                assert_eq!(to_f64(input), Ok(x64));
+                assert_eq!(to_f32(input), Ok(x32));
+                let neg_input = &format!("-{}", input);
+                assert_eq!(to_f64(neg_input), Ok(-x64));
+                assert_eq!(to_f32(neg_input), Ok(-x32));
+            }
+        }
+    })
+}
+
+#[test]
+fn ordinary() {
+    test_literal!(1.0);
+    test_literal!(3e-5);
+    test_literal!(0.1);
+    test_literal!(12345.);
+    test_literal!(0.9999999);
+    test_literal!(2.2250738585072014e-308);
+}
+
+#[test]
+fn special_code_paths() {
+    test_literal!(36893488147419103229.0); // 2^65 - 3, triggers half-to-even with even significand
+    test_literal!(101e-33); // Triggers the tricky underflow case in AlgorithmM (for f32)
+    test_literal!(1e23); // Triggers AlgorithmR
+    test_literal!(2075e23); // Triggers another path through AlgorithmR
+    test_literal!(8713e-23); // ... and yet another.
+}
+
+#[test]
+fn large() {
+    test_literal!(1e300);
+    test_literal!(123456789.34567e250);
+    test_literal!(943794359898089732078308743689303290943794359843568973207830874368930329.);
+}
+
+#[test]
+fn subnormals() {
+    test_literal!(5e-324);
+    test_literal!(91e-324);
+    test_literal!(1e-322);
+    test_literal!(13245643e-320);
+    test_literal!(2.22507385851e-308);
+    test_literal!(2.1e-308);
+    test_literal!(4.9406564584124654e-324);
+}
+
+#[test]
+fn infinity() {
+    test_literal!(1e400);
+    test_literal!(1e309);
+    test_literal!(2e308);
+    test_literal!(1.7976931348624e308);
+}
+
+#[test]
+fn zero() {
+    test_literal!(0.0);
+    test_literal!(1e-325);
+    test_literal!(1e-326);
+    test_literal!(1e-500);
+}
+
+#[test]
+fn lonely_dot() {
+    assert_eq!(to_f64("."), Ok(0.0));
+}
+
+#[test]
+fn nan() {
+    assert!(to_f64("NaN").unwrap().is_nan());
+    assert!(to_f32("NaN").unwrap().is_nan());
+}
+
+#[test]
+fn inf() {
+    assert_eq!(to_f64("inf"), Ok(f64::INFINITY));
+    assert_eq!(to_f64("-inf"), Ok(f64::NEG_INFINITY));
+    assert_eq!(to_f32("inf"), Ok(f32::INFINITY));
+    assert_eq!(to_f32("-inf"), Ok(f32::NEG_INFINITY));
+}
+
+#[test]
+fn massive_exponent() {
+    let max = i64::MAX;
+    assert_eq!(to_f64(&format!("1e{}000", max)), Ok(f64::INFINITY));
+    assert_eq!(to_f64(&format!("1e-{}000", max)), Ok(0.0));
+    assert_eq!(to_f64(&format!("1e{}000", max)), Ok(f64::INFINITY));
+}
+
+#[bench]
+fn bench_0(b: &mut test::Bencher) {
+    b.iter(|| to_f64("0.0"));
+}
+
+#[bench]
+fn bench_42(b: &mut test::Bencher) {
+    b.iter(|| to_f64("42"));
+}
+
+#[bench]
+fn bench_huge_int(b: &mut test::Bencher) {
+    // 2^128 - 1
+    b.iter(|| to_f64("170141183460469231731687303715884105727"));
+}
+
+#[bench]
+fn bench_short_decimal(b: &mut test::Bencher) {
+    b.iter(|| to_f64("1234.5678"));
+}
+
+#[bench]
+fn bench_pi_long(b: &mut test::Bencher) {
+    b.iter(|| to_f64("3.14159265358979323846264338327950288"));
+}
+
+#[bench]
+fn bench_pi_short(b: &mut test::Bencher) {
+    b.iter(|| to_f64("3.141592653589793"))
+}
+
+#[bench]
+fn bench_1e150(b: &mut test::Bencher) {
+    b.iter(|| to_f64("1e150"));
+}
+
+#[bench]
+fn bench_long_decimal_and_exp(b: &mut test::Bencher) {
+    b.iter(|| to_f64("727501488517303786137132964064381141071e-123"));
+}
+
+#[bench]
+fn bench_min_subnormal(b: &mut test::Bencher) {
+    b.iter(|| to_f64("5e-324"));
+}
+
+#[bench]
+fn bench_min_normal(b: &mut test::Bencher) {
+    b.iter(|| to_f64("2.2250738585072014e-308"));
+}
+
+#[bench]
+fn bench_max(b: &mut test::Bencher) {
+    b.iter(|| to_f64("1.7976931348623157e308"));
+}
--- a/src/libcoretest/num/dec2flt/parse.rs
+++ b/src/libcoretest/num/dec2flt/parse.rs
@ -0,0 +1,52 @@
+// Copyright 2015 The Rust Project Developers. See the COPYRIGHT
+// file at the top-level directory of this distribution and at
+// http://rust-lang.org/COPYRIGHT.
+//
+// Licensed under the Apache License, Version 2.0 <LICENSE-APACHE or
+// http://www.apache.org/licenses/LICENSE-2.0> or the MIT license
+// <LICENSE-MIT or http://opensource.org/licenses/MIT>, at your
+// option. This file may not be copied, modified, or distributed
+// except according to those terms.
+
+use std::iter;
+use core::num::dec2flt::parse::{Decimal, parse_decimal};
+use core::num::dec2flt::parse::ParseResult::{Valid, Invalid};
+
+#[test]
+fn missing_pieces() {
+    let permutations = &[".e", "1e", "e4", "e", ".12e", "321.e", "32.12e+", "12.32e-"];
+    for &s in permutations {
+        assert_eq!(parse_decimal(s), Invalid);
+    }
+}
+
+#[test]
+fn invalid_chars() {
+    let invalid = "r,?<j";
+    let valid_strings = &["123", "666.", ".1", "5e1", "7e-3", "0.0e+1"];
+    for c in invalid.chars() {
+        for s in valid_strings {
+            for i in 0..s.len() {
+                let mut input = String::new();
+                input.push_str(s);
+                input.insert(i, c);
+                assert!(parse_decimal(&input) == Invalid, "did not reject invalid {:?}", input);
+            }
+        }
+    }
+}
+
+#[test]
+fn valid() {
+    assert_eq!(parse_decimal("123.456e789"), Valid(Decimal::new(b"123", b"456", 789)));
+    assert_eq!(parse_decimal("123.456e+789"), Valid(Decimal::new(b"123", b"456", 789)));
+    assert_eq!(parse_decimal("123.456e-789"), Valid(Decimal::new(b"123", b"456", -789)));
+    assert_eq!(parse_decimal(".050"), Valid(Decimal::new(b"", b"050", 0)));
+    assert_eq!(parse_decimal("999"), Valid(Decimal::new(b"999", b"", 0)));
+    assert_eq!(parse_decimal("1.e300"), Valid(Decimal::new(b"1", b"", 300)));
+    assert_eq!(parse_decimal(".1e300"), Valid(Decimal::new(b"", b"1", 300)));
+    assert_eq!(parse_decimal("101e-33"), Valid(Decimal::new(b"101", b"", -33)));
+    let zeros: String = iter::repeat('0').take(25).collect();
+    let s = format!("1.5e{}", zeros);
+    assert_eq!(parse_decimal(&s), Valid(Decimal::new(b"1", b"5", 0)));
+}
--- a/src/libcoretest/num/dec2flt/rawfp.rs
+++ b/src/libcoretest/num/dec2flt/rawfp.rs
@ -0,0 +1,139 @@
+// Copyright 2015 The Rust Project Developers. See the COPYRIGHT
+// file at the top-level directory of this distribution and at
+// http://rust-lang.org/COPYRIGHT.
+//
+// Licensed under the Apache License, Version 2.0 <LICENSE-APACHE or
+// http://www.apache.org/licenses/LICENSE-2.0> or the MIT license
+// <LICENSE-MIT or http://opensource.org/licenses/MIT>, at your
+// option. This file may not be copied, modified, or distributed
+// except according to those terms.
+
+use std::f64;
+use core::num::flt2dec::strategy::grisu::Fp;
+use core::num::dec2flt::rawfp::{fp_to_float, prev_float, next_float, round_normal};
+
+#[test]
+fn fp_to_float_half_to_even() {
+    fn is_normalized(sig: u64) -> bool {
+            // intentionally written without {min,max}_sig() as a sanity check
+            sig >> 52 == 1 && sig >> 53 == 0
+    }
+
+    fn conv(sig: u64) -> u64 {
+        // The significands are perfectly in range, so the exponent should not matter
+        let (m1, e1, _) = fp_to_float::<f64>(Fp { f: sig, e: 0 }).integer_decode();
+        assert_eq!(e1, 0 + 64 - 53);
+        let (m2, e2, _) = fp_to_float::<f64>(Fp { f: sig, e: 55 }).integer_decode();
+        assert_eq!(e2, 55 + 64 - 53);
+        assert_eq!(m2, m1);
+        let (m3, e3, _) = fp_to_float::<f64>(Fp { f: sig, e: -78 }).integer_decode();
+        assert_eq!(e3, -78 + 64 - 53);
+        assert_eq!(m3, m2);
+        m3
+    }
+
+    let odd = 0x1F_EDCB_A012_345F;
+    let even = odd - 1;
+    assert!(is_normalized(odd));
+    assert!(is_normalized(even));
+    assert_eq!(conv(odd << 11), odd);
+    assert_eq!(conv(even << 11), even);
+    assert_eq!(conv(odd << 11 | 1 << 10), odd + 1);
+    assert_eq!(conv(even << 11 | 1 << 10), even);
+    assert_eq!(conv(even << 11 | 1 << 10 | 1), even + 1);
+    assert_eq!(conv(odd << 11 | 1 << 9), odd);
+    assert_eq!(conv(even << 11 | 1 << 9), even);
+    assert_eq!(conv(odd << 11 | 0x7FF), odd + 1);
+    assert_eq!(conv(even << 11 | 0x7FF), even + 1);
+    assert_eq!(conv(odd << 11 | 0x3FF), odd);
+    assert_eq!(conv(even << 11 | 0x3FF), even);
+}
+
+#[test]
+fn integers_to_f64() {
+    assert_eq!(fp_to_float::<f64>(Fp { f: 1, e: 0 }), 1.0);
+    assert_eq!(fp_to_float::<f64>(Fp { f: 42, e: 7 }), (42 << 7) as f64);
+    assert_eq!(fp_to_float::<f64>(Fp { f: 1 << 20, e: 30 }), (1u64 << 50) as f64);
+    assert_eq!(fp_to_float::<f64>(Fp { f: 4, e: -3 }), 0.5);
+}
+
+const SOME_FLOATS: [f64; 9] =
+    [0.1f64, 33.568, 42.1e-5, 777.0e9, 1.1111, 0.347997,
+     9843579834.35892, 12456.0e-150, 54389573.0e-150];
+
+
+#[test]
+fn human_f64_roundtrip() {
+    for &x in &SOME_FLOATS {
+        let (f, e, _) = x.integer_decode();
+        let fp = Fp { f: f, e: e};
+        assert_eq!(fp_to_float::<f64>(fp), x);
+    }
+}
+
+#[test]
+fn rounding_overflow() {
+    let x = Fp { f: 0xFF_FF_FF_FF_FF_FF_FF_00u64, e: 42 };
+    let rounded = round_normal::<f64>(x);
+    let adjusted_k = x.e + 64 - 53;
+    assert_eq!(rounded.sig, 1 << 52);
+    assert_eq!(rounded.k, adjusted_k + 1);
+}
+
+#[test]
+fn prev_float_monotonic() {
+    let mut x = 1.0;
+    for _ in 0..100 {
+        let x1 = prev_float(x);
+        assert!(x1 < x);
+        assert!(x - x1 < 1e-15);
+        x = x1;
+    }
+}
+
+const MIN_SUBNORMAL: f64 = 5e-324;
+
+#[test]
+fn next_float_zero() {
+    let tiny = next_float(0.0);
+    assert_eq!(tiny, MIN_SUBNORMAL);
+    assert!(tiny != 0.0);
+}
+
+#[test]
+fn next_float_subnormal() {
+    let second = next_float(MIN_SUBNORMAL);
+    // For subnormals, MIN_SUBNORMAL is the ULP
+    assert!(second != MIN_SUBNORMAL);
+    assert!(second > 0.0);
+    assert_eq!(second - MIN_SUBNORMAL, MIN_SUBNORMAL);
+}
+
+#[test]
+fn next_float_inf() {
+    assert_eq!(next_float(f64::MAX), f64::INFINITY);
+    assert_eq!(next_float(f64::INFINITY), f64::INFINITY);
+}
+
+#[test]
+fn next_prev_identity() {
+    for &x in &SOME_FLOATS {
+        assert_eq!(prev_float(next_float(x)), x);
+        assert_eq!(prev_float(prev_float(next_float(next_float(x)))), x);
+        assert_eq!(next_float(prev_float(x)), x);
+        assert_eq!(next_float(next_float(prev_float(prev_float(x)))), x);
+    }
+}
+
+#[test]
+fn next_float_monotonic() {
+    let mut x = 0.49999999999999;
+    assert!(x < 0.5);
+    for _ in 0..200 {
+        let x1 = next_float(x);
+        assert!(x1 > x);
+        assert!(x1 - x < 1e-15, "next_float_monotonic: delta = {:?}", x1 - x);
+        x = x1;
+    }
+    assert!(x > 0.5);
+}
--- a/src/libcoretest/num/mod.rs
+++ b/src/libcoretest/num/mod.rs
@ -30,6 +30,7 @@ mod u32;
 mod u64;

 mod flt2dec;
+mod dec2flt;

 /// Helper function for testing numeric operations
 pub fn test_num<T>(ten: T, two: T) where