diff --git a/src/libextra/crypto/sha2.rs b/src/libextra/crypto/sha2.rs index dc2d56f483e0..fc420d7179fd 100644 --- a/src/libextra/crypto/sha2.rs +++ b/src/libextra/crypto/sha2.rs @@ -8,11 +8,32 @@ // option. This file may not be copied, modified, or distributed // except according to those terms. +use std::uint; + use cryptoutil::{write_u64_be, write_u32_be, read_u64v_be, read_u32v_be, FixedBuffer, FixedBuffer128, FixedBuffer64, StandardPadding}; use digest::Digest; +// Sha-512 and Sha-256 use basically the same calculations which are implemented by these macros. +// Inlining the calculations seems to result in better generated code. +macro_rules! schedule_round( ($t:expr) => ( + W[$t] = sigma1(W[$t - 2]) + W[$t - 7] + sigma0(W[$t - 15]) + W[$t - 16]; + ) +) + +macro_rules! sha2_round( + ($A:ident, $B:ident, $C:ident, $D:ident, + $E:ident, $F:ident, $G:ident, $H:ident, $K:ident, $t:expr) => ( + { + $H += sum1($E) + ch($E, $F, $G) + $K[$t] + W[$t]; + $D += $H; + $H += sum0($A) + maj($A, $B, $C); + } + ) +) + + // BitCounter is a specialized structure intended simply for counting the // number of bits that have been processed by the SHA-2 512 family of functions. // It does very little overflow checking since such checking is not necessary @@ -117,15 +138,6 @@ impl Engine512State { ((x << 45) | (x >> 19)) ^ ((x << 3) | (x >> 61)) ^ (x >> 6) } - let mut W = [0u64, ..80]; - - read_u64v_be(W.mut_slice(0, 16), data); - - foreach t in range(16u, 80) { - W[t] = sigma1(W[t - 2]) + W[t - 7] + sigma0(W[t - 15]) + - W[t - 16]; - } - let mut a = self.H0; let mut b = self.H1; let mut c = self.H2; @@ -135,48 +147,41 @@ impl Engine512State { let mut g = self.H6; let mut h = self.H7; - let mut t = 0; - - foreach _ in range(0u, 10) { - h += sum1(e) + ch(e, f, g) + K64[t] + W[t]; - d += h; - h += sum0(a) + maj(a, b, c); - t += 1; + let mut W = [0u64, ..80]; - g += sum1(d) + ch(d, e, f) + K64[t] + W[t]; - c += g; - g += sum0(h) + maj(h, a, b); - t += 1; + read_u64v_be(W.mut_slice(0, 16), data); - f += sum1(c) + ch(c, d, e) + K64[t] + W[t]; - b += f; - f += sum0(g) + maj(g, h, a); - t += 1; + // Putting the message schedule inside the same loop as the round calculations allows for + // the compiler to generate better code. + for uint::range_step(0, 64, 8) |t| { + schedule_round!(t + 16); + schedule_round!(t + 17); + schedule_round!(t + 18); + schedule_round!(t + 19); + schedule_round!(t + 20); + schedule_round!(t + 21); + schedule_round!(t + 22); + schedule_round!(t + 23); - e += sum1(b) + ch(b, c, d) + K64[t] + W[t]; - a += e; - e += sum0(f) + maj(f, g, h); - t += 1; + sha2_round!(a, b, c, d, e, f, g, h, K64, t); + sha2_round!(h, a, b, c, d, e, f, g, K64, t + 1); + sha2_round!(g, h, a, b, c, d, e, f, K64, t + 2); + sha2_round!(f, g, h, a, b, c, d, e, K64, t + 3); + sha2_round!(e, f, g, h, a, b, c, d, K64, t + 4); + sha2_round!(d, e, f, g, h, a, b, c, K64, t + 5); + sha2_round!(c, d, e, f, g, h, a, b, K64, t + 6); + sha2_round!(b, c, d, e, f, g, h, a, K64, t + 7); + } - d += sum1(a) + ch(a, b, c) + K64[t] + W[t]; - h += d; - d += sum0(e) + maj(e, f, g); - t += 1; - - c += sum1(h) + ch(h, a, b) + K64[t] + W[t]; - g += c; - c += sum0(d) + maj(d, e, f); - t += 1; - - b += sum1(g) + ch(g, h, a) + K64[t] + W[t]; - f += b; - b += sum0(c) + maj(c, d, e); - t += 1; - - a += sum1(f) + ch(f, g, h) + K64[t] + W[t]; - e += a; - a += sum0(b) + maj(b, c, d); - t += 1; + for uint::range_step(64, 80, 8) |t| { + sha2_round!(a, b, c, d, e, f, g, h, K64, t); + sha2_round!(h, a, b, c, d, e, f, g, K64, t + 1); + sha2_round!(g, h, a, b, c, d, e, f, K64, t + 2); + sha2_round!(f, g, h, a, b, c, d, e, K64, t + 3); + sha2_round!(e, f, g, h, a, b, c, d, K64, t + 4); + sha2_round!(d, e, f, g, h, a, b, c, K64, t + 5); + sha2_round!(c, d, e, f, g, h, a, b, K64, t + 6); + sha2_round!(b, c, d, e, f, g, h, a, K64, t + 7); } self.H0 += a; @@ -523,15 +528,6 @@ impl Engine256State { ((x >> 17) | (x << 15)) ^ ((x >> 19) | (x << 13)) ^ (x >> 10) } - let mut W = [0u32, ..80]; - - read_u32v_be(W.mut_slice(0, 16), data); - - foreach t in range(16u, 64) { - W[t] = sigma1(W[t - 2]) + W[t - 7] + sigma0(W[t - 15]) + - W[t - 16]; - } - let mut a = self.H0; let mut b = self.H1; let mut c = self.H2; @@ -541,48 +537,41 @@ impl Engine256State { let mut g = self.H6; let mut h = self.H7; - let mut t = 0; + let mut W = [0u32, ..64]; - foreach _ in range(0u, 8) { - h += sum1(e) + ch(e, f, g) + K32[t] + W[t]; - d += h; - h += sum0(a) + maj(a, b, c); - t += 1; + read_u32v_be(W.mut_slice(0, 16), data); - g += sum1(d) + ch(d, e, f) + K32[t] + W[t]; - c += g; - g += sum0(h) + maj(h, a, b); - t += 1; + // Putting the message schedule inside the same loop as the round calculations allows for + // the compiler to generate better code. + for uint::range_step(0, 48, 8) |t| { + schedule_round!(t + 16); + schedule_round!(t + 17); + schedule_round!(t + 18); + schedule_round!(t + 19); + schedule_round!(t + 20); + schedule_round!(t + 21); + schedule_round!(t + 22); + schedule_round!(t + 23); - f += sum1(c) + ch(c, d, e) + K32[t] + W[t]; - b += f; - f += sum0(g) + maj(g, h, a); - t += 1; + sha2_round!(a, b, c, d, e, f, g, h, K32, t); + sha2_round!(h, a, b, c, d, e, f, g, K32, t + 1); + sha2_round!(g, h, a, b, c, d, e, f, K32, t + 2); + sha2_round!(f, g, h, a, b, c, d, e, K32, t + 3); + sha2_round!(e, f, g, h, a, b, c, d, K32, t + 4); + sha2_round!(d, e, f, g, h, a, b, c, K32, t + 5); + sha2_round!(c, d, e, f, g, h, a, b, K32, t + 6); + sha2_round!(b, c, d, e, f, g, h, a, K32, t + 7); + } - e += sum1(b) + ch(b, c, d) + K32[t] + W[t]; - a += e; - e += sum0(f) + maj(f, g, h); - t += 1; - - d += sum1(a) + ch(a, b, c) + K32[t] + W[t]; - h += d; - d += sum0(e) + maj(e, f, g); - t += 1; - - c += sum1(h) + ch(h, a, b) + K32[t] + W[t]; - g += c; - c += sum0(d) + maj(d, e, f); - t += 1; - - b += sum1(g) + ch(g, h, a) + K32[t] + W[t]; - f += b; - b += sum0(c) + maj(c, d, e); - t += 1; - - a += sum1(f) + ch(f, g, h) + K32[t] + W[t]; - e += a; - a += sum0(b) + maj(b, c, d); - t += 1; + for uint::range_step(48, 64, 8) |t| { + sha2_round!(a, b, c, d, e, f, g, h, K32, t); + sha2_round!(h, a, b, c, d, e, f, g, K32, t + 1); + sha2_round!(g, h, a, b, c, d, e, f, K32, t + 2); + sha2_round!(f, g, h, a, b, c, d, e, K32, t + 3); + sha2_round!(e, f, g, h, a, b, c, d, K32, t + 4); + sha2_round!(d, e, f, g, h, a, b, c, K32, t + 5); + sha2_round!(c, d, e, f, g, h, a, b, K32, t + 6); + sha2_round!(b, c, d, e, f, g, h, a, K32, t + 7); } self.H0 += a;