auto merge of #8272 : DaGenix/rust/digest-md5-impl-not-unrolled, r=cmr

An MD5 implementation was originally included in #8097, but, since there are a couple different implementations of that digest algorithm (@alco mentioned his implementation on the mailing list just before I opened that PR), it was suggested that I remove it from that PR and open up a new PR to discuss the different implementations and the best way forward. If anyone wants to discuss a different implementation, feel free to present it here and discuss and compare it to this one. I'll just discuss my implementation and I'll leave it to others to present details of theirs. This implementation relies on the FixedBuffer struct from cryptoutil.rs for managing the input buffer, just like the Sha1 and Sha2 digest implementations do. I tried manually unrolling the loops in the compression function, but I got slightly worse performance when I did that. Outside of the #[test]s, I also tested the implementation by generating 1,000 inputs of up to 10MB in size and checking the MD5 digest calculated by this code against the MD5 digest calculated by Java's implementation. On my computer, I'm getting the following performance: ``` test md5::bench::md5_10 ... bench: 52 ns/iter (+/- 1) = 192 MB/s test md5::bench::md5_1k ... bench: 2819 ns/iter (+/- 44) = 363 MB/s test md5::bench::md5_64k ... bench: 178566 ns/iter (+/- 4927) = 367 MB/s ```
2013-08-17 07:42:01 -07:00 · 2013-08-17 07:42:01 -07:00 · 29a67d1dc2
commit 29a67d1dc2
parent 1942a7a3fb b00aa12374
5 changed files with 473 additions and 44 deletions
--- a/src/libextra/crypto/cryptoutil.rs
+++ b/src/libextra/crypto/cryptoutil.rs
@ -8,7 +8,7 @@
 // option. This file may not be copied, modified, or distributed
 // except according to those terms.

-use std::num::One;
+use std::num::{One, Zero, CheckedAdd};
 use std::vec::bytes::{MutableByteVector, copy_memory};


@ -36,6 +36,18 @@ pub fn write_u32_be(dst: &mut[u8], input: u32) {
    }
 }

+/// Write a u32 into a vector, which must be 4 bytes long. The value is written in little-endian
+/// format.
+pub fn write_u32_le(dst: &mut[u8], input: u32) {
+    use std::cast::transmute;
+    use std::unstable::intrinsics::to_le32;
+    assert!(dst.len() == 4);
+    unsafe {
+        let x: *mut i32 = transmute(dst.unsafe_mut_ref(0));
+        *x = to_le32(input as i32);
+    }
+}
+
 /// Read a vector of bytes into a vector of u64s. The values are read in big-endian format.
 pub fn read_u64v_be(dst: &mut[u64], input: &[u8]) {
    use std::cast::transmute;
@ -68,51 +80,90 @@ pub fn read_u32v_be(dst: &mut[u32], input: &[u8]) {
    }
 }

-
-/// Returns true if adding the two parameters will result in integer overflow
-pub fn will_add_overflow<T: Int + Unsigned>(x: T, y: T) -> bool {
-    // This doesn't handle negative values! Don't copy this code elsewhere without considering if
-    // negative values are important to you!
-    let max: T = Bounded::max_value();
-    return x > max - y;
+/// Read a vector of bytes into a vector of u32s. The values are read in little-endian format.
+pub fn read_u32v_le(dst: &mut[u32], input: &[u8]) {
+    use std::cast::transmute;
+    use std::unstable::intrinsics::to_le32;
+    assert!(dst.len() * 4 == input.len());
+    unsafe {
+        let mut x: *mut i32 = transmute(dst.unsafe_mut_ref(0));
+        let mut y: *i32 = transmute(input.unsafe_ref(0));
+        do dst.len().times() {
+            *x = to_le32(*y);
+            x = x.offset(1);
+            y = y.offset(1);
+        }
+    }
 }

-/// Shifts the second parameter and then adds it to the first. fails!() if there would be unsigned
-/// integer overflow.
-pub fn shift_add_check_overflow<T: Int + Unsigned + Clone>(x: T, mut y: T, shift: T) -> T {
-    if y.leading_zeros() < shift {
-        fail!("Could not add values - integer overflow.");
-    }
-    y = y << shift;

-    if will_add_overflow(x.clone(), y.clone()) {
-        fail!("Could not add values - integer overflow.");
-    }
-
-    return x + y;
+trait ToBits {
+    /// Convert the value in bytes to the number of bits, a tuple where the 1st item is the
+    /// high-order value and the 2nd item is the low order value.
+    fn to_bits(self) -> (Self, Self);
 }

-/// Shifts the second parameter and then adds it to the first, which is a tuple where the first
-/// element is the high order value. fails!() if there would be unsigned integer overflow.
-pub fn shift_add_check_overflow_tuple
-        <T: Int + Unsigned + Clone>
-        (x: (T, T), mut y: T, shift: T) -> (T, T) {
-    if y.leading_zeros() < shift {
-        fail!("Could not add values - integer overflow.");
+impl ToBits for u64 {
+    fn to_bits(self) -> (u64, u64) {
+        return (self >> 61, self << 3);
    }
-    y = y << shift;
+}

-    match x {
-        (hi, low) => {
-            let one: T = One::one();
-            if will_add_overflow(low.clone(), y.clone()) {
-                if will_add_overflow(hi.clone(), one.clone()) {
-                    fail!("Could not add values - integer overflow.");
-                } else {
-                    return (hi + one, low + y);
-                }
+/// Adds the specified number of bytes to the bit count. fail!() if this would cause numeric
+/// overflow.
+pub fn add_bytes_to_bits<T: Int + CheckedAdd + ToBits>(bits: T, bytes: T) -> T {
+    let (new_high_bits, new_low_bits) = bytes.to_bits();
+
+    if new_high_bits > Zero::zero() {
+        fail!("Numeric overflow occured.")
+    }
+
+    match bits.checked_add(&new_low_bits) {
+        Some(x) => return x,
+        None => fail!("Numeric overflow occured.")
+    }
+}
+
+/// Adds the specified number of bytes to the bit count, which is a tuple where the first element is
+/// the high order value. fail!() if this would cause numeric overflow.
+pub fn add_bytes_to_bits_tuple
+        <T: Int + Unsigned + CheckedAdd + ToBits>
+        (bits: (T, T), bytes: T) -> (T, T) {
+    let (new_high_bits, new_low_bits) = bytes.to_bits();
+    let (hi, low) = bits;
+
+    // Add the low order value - if there is no overflow, then add the high order values
+    // If the addition of the low order values causes overflow, add one to the high order values
+    // before adding them.
+    match low.checked_add(&new_low_bits) {
+        Some(x) => {
+            if new_high_bits == Zero::zero() {
+                // This is the fast path - every other alternative will rarely occur in practice
+                // considering how large an input would need to be for those paths to be used.
+                return (hi, x);
            } else {
-                return (hi, low + y);
+                match hi.checked_add(&new_high_bits) {
+                    Some(y) => return (y, x),
+                    None => fail!("Numeric overflow occured.")
+                }
+            }
+        },
+        None => {
+            let one: T = One::one();
+            let z = match new_high_bits.checked_add(&one) {
+                Some(w) => w,
+                None => fail!("Numeric overflow occured.")
+            };
+            match hi.checked_add(&z) {
+                // This re-executes the addition that was already performed earlier when overflow
+                // occured, this time allowing the overflow to happen. Technically, this could be
+                // avoided by using the checked add intrinsic directly, but that involves using
+                // unsafe code and is not really worthwhile considering how infrequently code will
+                // run in practice. This is the reason that this function requires that the type T
+                // be Unsigned - overflow is not defined for Signed types. This function could be
+                // implemented for signed types as well if that were needed.
+                Some(y) => return (y, low + new_low_bits),
+                None => fail!("Numeric overflow occured.")
            }
        }
    }
@ -300,6 +351,7 @@ mod test {
    use std::rand::RngUtil;
    use std::vec;

+    use cryptoutil::{add_bytes_to_bits, add_bytes_to_bits_tuple};
    use digest::Digest;

    /// Feed 1,000,000 'a's into the digest with varying input sizes and check that the result is
@ -324,4 +376,50 @@ mod test {

        assert!(expected == result_str);
    }
+
+    // A normal addition - no overflow occurs
+    #[test]
+    fn test_add_bytes_to_bits_ok() {
+        assert!(add_bytes_to_bits::<u64>(100, 10) == 180);
+    }
+
+    // A simple failure case - adding 1 to the max value
+    #[test]
+    #[should_fail]
+    fn test_add_bytes_to_bits_overflow() {
+        add_bytes_to_bits::<u64>(Bounded::max_value(), 1);
+    }
+
+    // A normal addition - no overflow occurs (fast path)
+    #[test]
+    fn test_add_bytes_to_bits_tuple_ok() {
+        assert!(add_bytes_to_bits_tuple::<u64>((5, 100), 10) == (5, 180));
+    }
+
+    // The low order value overflows into the high order value
+    #[test]
+    fn test_add_bytes_to_bits_tuple_ok2() {
+        assert!(add_bytes_to_bits_tuple::<u64>((5, Bounded::max_value()), 1) == (6, 7));
+    }
+
+    // The value to add is too large to be converted into bits without overflowing its type
+    #[test]
+    fn test_add_bytes_to_bits_tuple_ok3() {
+        assert!(add_bytes_to_bits_tuple::<u64>((5, 0), 0x4000000000000001) == (7, 8));
+    }
+
+    // A simple failure case - adding 1 to the max value
+    #[test]
+    #[should_fail]
+    fn test_add_bytes_to_bits_tuple_overflow() {
+        add_bytes_to_bits_tuple::<u64>((Bounded::max_value(), Bounded::max_value()), 1);
+    }
+
+    // The value to add is too large to convert to bytes without overflowing its type, but the high
+    // order value from this conversion overflows when added to the existing high order value
+    #[test]
+    #[should_fail]
+    fn test_add_bytes_to_bits_tuple_overflow2() {
+        add_bytes_to_bits_tuple::<u64>((Bounded::max_value::<u64>() - 1, 0), 0x8000000000000000);
+    }
 }
--- a/src/libextra/crypto/md5.rs
+++ b/src/libextra/crypto/md5.rs
@ -0,0 +1,329 @@
+// Copyright 2013 The Rust Project Developers. See the COPYRIGHT
+// file at the top-level directory of this distribution and at
+// http://rust-lang.org/COPYRIGHT.
+//
+// Licensed under the Apache License, Version 2.0 <LICENSE-APACHE or
+// http://www.apache.org/licenses/LICENSE-2.0> or the MIT license
+// <LICENSE-MIT or http://opensource.org/licenses/MIT>, at your
+// option. This file may not be copied, modified, or distributed
+// except according to those terms.
+
+use std::uint;
+
+use cryptoutil::{write_u32_le, read_u32v_le, FixedBuffer, FixedBuffer64, StandardPadding};
+use digest::Digest;
+
+
+// A structure that represents that state of a digest computation for the MD5 digest function
+struct Md5State {
+    s0: u32,
+    s1: u32,
+    s2: u32,
+    s3: u32
+}
+
+impl Md5State {
+    fn new() -> Md5State {
+        return Md5State {
+            s0: 0x67452301,
+            s1: 0xefcdab89,
+            s2: 0x98badcfe,
+            s3: 0x10325476
+        };
+    }
+
+    fn reset(&mut self) {
+        self.s0 = 0x67452301;
+        self.s1 = 0xefcdab89;
+        self.s2 = 0x98badcfe;
+        self.s3 = 0x10325476;
+    }
+
+    fn process_block(&mut self, input: &[u8]) {
+        fn f(u: u32, v: u32, w: u32) -> u32 {
+            return (u & v) | (!u & w);
+        }
+
+        fn g(u: u32, v: u32, w: u32) -> u32 {
+            return (u & w) | (v & !w);
+        }
+
+        fn h(u: u32, v: u32, w: u32) -> u32 {
+            return u ^ v ^ w;
+        }
+
+        fn i(u: u32, v: u32, w: u32) -> u32 {
+            return v ^ (u | !w);
+        }
+
+        fn rotate_left(x: u32, n: u32) -> u32 {
+            return (x << n) | (x >> (32 - n));
+        }
+
+        fn op_f(w: u32, x: u32, y: u32, z: u32, m: u32, s: u32) -> u32 {
+            return rotate_left(w + f(x, y, z) + m, s) + x;
+        }
+
+        fn op_g(w: u32, x: u32, y: u32, z: u32, m: u32, s: u32) -> u32 {
+            return rotate_left(w + g(x, y, z) + m, s) + x;
+        }
+
+        fn op_h(w: u32, x: u32, y: u32, z: u32, m: u32, s: u32) -> u32 {
+            return rotate_left(w + h(x, y, z) + m, s) + x;
+        }
+
+        fn op_i(w: u32, x: u32, y: u32, z: u32, m: u32, s: u32) -> u32 {
+            return rotate_left(w + i(x, y, z) + m, s) + x;
+        }
+
+        let mut a = self.s0;
+        let mut b = self.s1;
+        let mut c = self.s2;
+        let mut d = self.s3;
+
+        let mut data = [0u32, ..16];
+
+        read_u32v_le(data, input);
+
+        // round 1
+        do uint::range_step(0, 16, 4) |i| {
+            a = op_f(a, b, c, d, data[i] + C1[i], 7);
+            d = op_f(d, a, b, c, data[i + 1] + C1[i + 1], 12);
+            c = op_f(c, d, a, b, data[i + 2] + C1[i + 2], 17);
+            b = op_f(b, c, d, a, data[i + 3] + C1[i + 3], 22);
+            true
+        };
+
+        // round 2
+        let mut t = 1;
+        do uint::range_step(0, 16, 4) |i| {
+            a = op_g(a, b, c, d, data[t & 0x0f] + C2[i], 5);
+            d = op_g(d, a, b, c, data[(t + 5) & 0x0f] + C2[i + 1], 9);
+            c = op_g(c, d, a, b, data[(t + 10) & 0x0f] + C2[i + 2], 14);
+            b = op_g(b, c, d, a, data[(t + 15) & 0x0f] + C2[i + 3], 20);
+            t += 20;
+            true
+        };
+
+        // round 3
+        t = 5;
+        do uint::range_step(0, 16, 4) |i| {
+            a = op_h(a, b, c, d, data[t & 0x0f] + C3[i], 4);
+            d = op_h(d, a, b, c, data[(t + 3) & 0x0f] + C3[i + 1], 11);
+            c = op_h(c, d, a, b, data[(t + 6) & 0x0f] + C3[i + 2], 16);
+            b = op_h(b, c, d, a, data[(t + 9) & 0x0f] + C3[i + 3], 23);
+            t += 12;
+            true
+        };
+
+        // round 4
+        t = 0;
+        do uint::range_step(0, 16, 4) |i| {
+            a = op_i(a, b, c, d, data[t & 0x0f] + C4[i], 6);
+            d = op_i(d, a, b, c, data[(t + 7) & 0x0f] + C4[i + 1], 10);
+            c = op_i(c, d, a, b, data[(t + 14) & 0x0f] + C4[i + 2], 15);
+            b = op_i(b, c, d, a, data[(t + 21) & 0x0f] + C4[i + 3], 21);
+            t += 28;
+            true
+        };
+
+        self.s0 += a;
+        self.s1 += b;
+        self.s2 += c;
+        self.s3 += d;
+    }
+}
+
+// Round 1 constants
+static C1: [u32, ..16] = [
+    0xd76aa478, 0xe8c7b756, 0x242070db, 0xc1bdceee, 0xf57c0faf, 0x4787c62a, 0xa8304613, 0xfd469501,
+    0x698098d8, 0x8b44f7af, 0xffff5bb1, 0x895cd7be, 0x6b901122, 0xfd987193, 0xa679438e, 0x49b40821
+];
+
+// Round 2 constants
+static C2: [u32, ..16] = [
+    0xf61e2562, 0xc040b340, 0x265e5a51, 0xe9b6c7aa, 0xd62f105d, 0x02441453, 0xd8a1e681, 0xe7d3fbc8,
+    0x21e1cde6, 0xc33707d6, 0xf4d50d87, 0x455a14ed, 0xa9e3e905, 0xfcefa3f8, 0x676f02d9, 0x8d2a4c8a
+];
+
+// Round 3 constants
+static C3: [u32, ..16] = [
+    0xfffa3942, 0x8771f681, 0x6d9d6122, 0xfde5380c, 0xa4beea44, 0x4bdecfa9, 0xf6bb4b60, 0xbebfbc70,
+    0x289b7ec6, 0xeaa127fa, 0xd4ef3085, 0x04881d05, 0xd9d4d039, 0xe6db99e5, 0x1fa27cf8, 0xc4ac5665
+];
+
+// Round 4 constants
+static C4: [u32, ..16] = [
+    0xf4292244, 0x432aff97, 0xab9423a7, 0xfc93a039, 0x655b59c3, 0x8f0ccc92, 0xffeff47d, 0x85845dd1,
+    0x6fa87e4f, 0xfe2ce6e0, 0xa3014314, 0x4e0811a1, 0xf7537e82, 0xbd3af235, 0x2ad7d2bb, 0xeb86d391
+];
+
+
+/// The MD5 Digest algorithm
+struct Md5 {
+    priv length_bytes: u64,
+    priv buffer: FixedBuffer64,
+    priv state: Md5State,
+    priv finished: bool,
+}
+
+impl Md5 {
+    /// Construct a new instance of the MD5 Digest.
+    pub fn new() -> Md5 {
+        return Md5 {
+            length_bytes: 0,
+            buffer: FixedBuffer64::new(),
+            state: Md5State::new(),
+            finished: false
+        }
+    }
+}
+
+impl Digest for Md5 {
+    fn input(&mut self, input: &[u8]) {
+        assert!(!self.finished);
+        // Unlike Sha1 and Sha2, the length value in MD5 is defined as the length of the message mod
+        // 2^64 - ie: integer overflow is OK.
+        self.length_bytes += input.len() as u64;
+        self.buffer.input(input, |d: &[u8]| { self.state.process_block(d); });
+    }
+
+    fn reset(&mut self) {
+        self.length_bytes = 0;
+        self.buffer.reset();
+        self.state.reset();
+        self.finished = false;
+    }
+
+    fn result(&mut self, out: &mut [u8]) {
+        if !self.finished {
+            self.buffer.standard_padding(8, |d: &[u8]| { self.state.process_block(d); });
+            write_u32_le(self.buffer.next(4), (self.length_bytes << 3) as u32);
+            write_u32_le(self.buffer.next(4), (self.length_bytes >> 29) as u32);
+            self.state.process_block(self.buffer.full_buffer());
+            self.finished = true;
+        }
+
+        write_u32_le(out.mut_slice(0, 4), self.state.s0);
+        write_u32_le(out.mut_slice(4, 8), self.state.s1);
+        write_u32_le(out.mut_slice(8, 12), self.state.s2);
+        write_u32_le(out.mut_slice(12, 16), self.state.s3);
+    }
+
+    fn output_bits(&self) -> uint { 128 }
+}
+
+
+#[cfg(test)]
+mod tests {
+    use cryptoutil::test::test_digest_1million_random;
+    use digest::Digest;
+    use md5::Md5;
+
+
+    struct Test {
+        input: ~str,
+        output_str: ~str,
+    }
+
+    fn test_hash<D: Digest>(sh: &mut D, tests: &[Test]) {
+        // Test that it works when accepting the message all at once
+        for t in tests.iter() {
+            sh.input_str(t.input);
+
+            let out_str = sh.result_str();
+            assert!(out_str == t.output_str);
+
+            sh.reset();
+        }
+
+        // Test that it works when accepting the message in pieces
+        for t in tests.iter() {
+            let len = t.input.len();
+            let mut left = len;
+            while left > 0u {
+                let take = (left + 1u) / 2u;
+                sh.input_str(t.input.slice(len - left, take + len - left));
+                left = left - take;
+            }
+
+            let out_str = sh.result_str();
+            assert!(out_str == t.output_str);
+
+            sh.reset();
+        }
+    }
+
+    #[test]
+    fn test_md5() {
+        // Examples from wikipedia
+        let wikipedia_tests = ~[
+            Test {
+                input: ~"",
+                output_str: ~"d41d8cd98f00b204e9800998ecf8427e"
+            },
+            Test {
+                input: ~"The quick brown fox jumps over the lazy dog",
+                output_str: ~"9e107d9d372bb6826bd81d3542a419d6"
+            },
+            Test {
+                input: ~"The quick brown fox jumps over the lazy dog.",
+                output_str: ~"e4d909c290d0fb1ca068ffaddf22cbd0"
+            },
+        ];
+
+        let tests = wikipedia_tests;
+
+        let mut sh = Md5::new();
+
+        test_hash(&mut sh, tests);
+    }
+
+    #[test]
+    fn test_1million_random_md5() {
+        let mut sh = Md5::new();
+        test_digest_1million_random(
+            &mut sh,
+            64,
+            "7707d6ae4e027c70eea2a935c2296f21");
+    }
+}
+
+
+#[cfg(test)]
+mod bench {
+    use extra::test::BenchHarness;
+
+    use md5::Md5;
+
+
+    #[bench]
+    pub fn md5_10(bh: & mut BenchHarness) {
+        let mut sh = Md5::new();
+        let bytes = [1u8, ..10];
+        do bh.iter {
+            sh.input(bytes);
+        }
+        bh.bytes = bytes.len() as u64;
+    }
+
+    #[bench]
+    pub fn md5_1k(bh: & mut BenchHarness) {
+        let mut sh = Md5::new();
+        let bytes = [1u8, ..1024];
+        do bh.iter {
+            sh.input(bytes);
+        }
+        bh.bytes = bytes.len() as u64;
+    }
+
+    #[bench]
+    pub fn md5_64k(bh: & mut BenchHarness) {
+        let mut sh = Md5::new();
+        let bytes = [1u8, ..65536];
+        do bh.iter {
+            sh.input(bytes);
+        }
+        bh.bytes = bytes.len() as u64;
+    }
+}
--- a/src/libextra/crypto/sha1.rs
+++ b/src/libextra/crypto/sha1.rs
@ -23,7 +23,7 @@
 */


-use cryptoutil::{write_u32_be, read_u32v_be, shift_add_check_overflow, FixedBuffer, FixedBuffer64,
+use cryptoutil::{write_u32_be, read_u32v_be, add_bytes_to_bits, FixedBuffer, FixedBuffer64,
    StandardPadding};
 use digest::Digest;

@ -52,7 +52,7 @@ pub struct Sha1 {
 fn add_input(st: &mut Sha1, msg: &[u8]) {
    assert!((!st.computed));
    // Assumes that msg.len() can be converted to u64 without overflow
-    st.length_bits = shift_add_check_overflow(st.length_bits, msg.len() as u64, 3);
+    st.length_bits = add_bytes_to_bits(st.length_bits, msg.len() as u64);
    st.buffer.input(msg, |d: &[u8]| { process_msg_block(d, &mut st.h); });
 }

--- a/src/libextra/crypto/sha2.rs
+++ b/src/libextra/crypto/sha2.rs
@ -10,8 +10,8 @@

 use std::uint;

-use cryptoutil::{write_u64_be, write_u32_be, read_u64v_be, read_u32v_be, shift_add_check_overflow,
-    shift_add_check_overflow_tuple, FixedBuffer, FixedBuffer128, FixedBuffer64, StandardPadding};
+use cryptoutil::{write_u64_be, write_u32_be, read_u64v_be, read_u32v_be, add_bytes_to_bits,
+    add_bytes_to_bits_tuple, FixedBuffer, FixedBuffer128, FixedBuffer64, StandardPadding};
 use digest::Digest;


@ -210,7 +210,7 @@ impl Engine512 {
    fn input(&mut self, input: &[u8]) {
        assert!(!self.finished)
        // Assumes that input.len() can be converted to u64 without overflow
-        self.length_bits = shift_add_check_overflow_tuple(self.length_bits, input.len() as u64, 3);
+        self.length_bits = add_bytes_to_bits_tuple(self.length_bits, input.len() as u64);
        self.buffer.input(input, |input: &[u8]| { self.state.process_block(input) });
    }

@ -602,7 +602,7 @@ impl Engine256 {
    fn input(&mut self, input: &[u8]) {
        assert!(!self.finished)
        // Assumes that input.len() can be converted to u64 without overflow
-        self.length_bits = shift_add_check_overflow(self.length_bits, input.len() as u64, 3);
+        self.length_bits = add_bytes_to_bits(self.length_bits, input.len() as u64);
        self.buffer.input(input, |input: &[u8]| { self.state.process_block(input) });
    }

--- a/src/libextra/extra.rs
+++ b/src/libextra/extra.rs
@ -71,6 +71,8 @@ pub mod treemap;
 mod cryptoutil;
 #[path="crypto/digest.rs"]
 pub mod digest;
+#[path="crypto/md5.rs"]
+pub mod md5;
 #[path="crypto/sha1.rs"]
 pub mod sha1;
 #[path="crypto/sha2.rs"]