diff --git a/library/stdarch/coresimd/x86/mod.rs b/library/stdarch/coresimd/x86/mod.rs index a7ba8a56153d..87c5efab3c48 100644 --- a/library/stdarch/coresimd/x86/mod.rs +++ b/library/stdarch/coresimd/x86/mod.rs @@ -597,3 +597,6 @@ pub use self::aes::*; mod rdrand; pub use self::rdrand::*; + +mod sha; +pub use self::sha::*; diff --git a/library/stdarch/coresimd/x86/sha.rs b/library/stdarch/coresimd/x86/sha.rs new file mode 100644 index 000000000000..4f4701f379f1 --- /dev/null +++ b/library/stdarch/coresimd/x86/sha.rs @@ -0,0 +1,201 @@ +use coresimd::simd::*; +use coresimd::x86::*; +use mem; + +#[allow(improper_ctypes)] +extern "C" { + #[link_name = "llvm.x86.sha1msg1"] + fn sha1msg1(a: i32x4, b: i32x4) -> i32x4; + #[link_name = "llvm.x86.sha1msg2"] + fn sha1msg2(a: i32x4, b: i32x4) -> i32x4; + #[link_name = "llvm.x86.sha1nexte"] + fn sha1nexte(a: i32x4, b: i32x4) -> i32x4; + #[link_name = "llvm.x86.sha1rnds4"] + fn sha1rnds4(a: i32x4, b: i32x4, c: i8) -> i32x4; + #[link_name = "llvm.x86.sha256msg1"] + fn sha256msg1(a: i32x4, b: i32x4) -> i32x4; + #[link_name = "llvm.x86.sha256msg2"] + fn sha256msg2(a: i32x4, b: i32x4) -> i32x4; + #[link_name = "llvm.x86.sha256rnds2"] + fn sha256rnds2(a: i32x4, b: i32x4, k: i32x4) -> i32x4; +} + +#[cfg(test)] +use stdsimd_test::assert_instr; + +/// Perform an intermediate calculation for the next four SHA1 message values +/// (unsigned 32-bit integers) using previous message values from `a` and `b`, +/// and returning the result. +#[inline] +#[target_feature(enable = "sha")] +#[cfg_attr(test, assert_instr(sha1msg1))] +pub unsafe fn _mm_sha1msg1_epu32(a: __m128i, b: __m128i) -> __m128i { + mem::transmute(sha1msg1(a.as_i32x4(), b.as_i32x4())) +} + +/// Perform the final calculation for the next four SHA1 message values +/// (unsigned 32-bit integers) using the intermediate result in `a` and the +/// previous message values in `b`, and returns the result. +#[inline] +#[target_feature(enable = "sha")] +#[cfg_attr(test, assert_instr(sha1msg2))] +pub unsafe fn _mm_sha1msg2_epu32(a: __m128i, b: __m128i) -> __m128i { + mem::transmute(sha1msg2(a.as_i32x4(), b.as_i32x4())) +} + +/// Calculate SHA1 state variable E after four rounds of operation from the +/// current SHA1 state variable `a`, add that value to the scheduled values +/// (unsigned 32-bit integers) in `b`, and returns the result. +#[inline] +#[target_feature(enable = "sha")] +#[cfg_attr(test, assert_instr(sha1nexte))] +pub unsafe fn _mm_sha1nexte_epu32(a: __m128i, b: __m128i) -> __m128i { + mem::transmute(sha1nexte(a.as_i32x4(), b.as_i32x4())) +} + +/// Perform four rounds of SHA1 operation using an initial SHA1 state (A,B,C,D) +/// from `a` and some pre-computed sum of the next 4 round message values +/// (unsigned 32-bit integers), and state variable E from `b`, and return the +/// updated SHA1 state (A,B,C,D). `func` contains the logic functions and round +/// constants. +#[inline] +#[target_feature(enable = "sha")] +#[cfg_attr(test, assert_instr(sha1rnds4, func = 0))] +#[rustc_args_required_const(2)] +pub unsafe fn _mm_sha1rnds4_epu32(a: __m128i, b: __m128i, func: i32) -> __m128i { + let a = a.as_i32x4(); + let b = b.as_i32x4(); + macro_rules! call { + ($imm2:expr) => { sha1rnds4(a, b, $imm2) } + } + let ret = constify_imm2!(func, call); + mem::transmute(ret) +} + +/// Perform an intermediate calculation for the next four SHA256 message values +/// (unsigned 32-bit integers) using previous message values from `a` and `b`, +/// and return the result. +#[inline] +#[target_feature(enable = "sha")] +#[cfg_attr(test, assert_instr(sha256msg1))] +pub unsafe fn _mm_sha256msg1_epu32(a: __m128i, b: __m128i) -> __m128i { + mem::transmute(sha256msg1(a.as_i32x4(), b.as_i32x4())) +} + +/// Perform the final calculation for the next four SHA256 message values +/// (unsigned 32-bit integers) using previous message values from `a` and `b`, +/// and return the result. +#[inline] +#[target_feature(enable = "sha")] +#[cfg_attr(test, assert_instr(sha256msg2))] +pub unsafe fn _mm_sha256msg2_epu32(a: __m128i, b: __m128i) -> __m128i { + mem::transmute(sha256msg2(a.as_i32x4(), b.as_i32x4())) +} + +/// Perform 2 rounds of SHA256 operation using an initial SHA256 state (C,D,G,H) +/// from `a`, an initial SHA256 state (A,B,E,F) from `b`, and a pre-computed sum +/// of the next 2 round message values (unsigned 32-bit integers) and the +/// corresponding round constants from `k`, and store the updated SHA256 state +/// (A,B,E,F) in dst. +#[inline] +#[target_feature(enable = "sha")] +#[cfg_attr(test, assert_instr(sha256rnds2))] +pub unsafe fn _mm_sha256rnds2_epu32 (a: __m128i, b: __m128i, k: __m128i) -> __m128i { + mem::transmute(sha256rnds2(a.as_i32x4(), b.as_i32x4(), k.as_i32x4())) +} + +#[cfg(test)] +mod tests { + use std::mem::{self, transmute}; + use std::f64::{self, NAN}; + use std::f32; + use std::i32; + + use stdsimd_test::simd_test; + use test::black_box; // Used to inhibit constant-folding. + use coresimd::x86::*; + use coresimd::simd::*; + + #[simd_test = "sha"] + #[allow(overflowing_literals)] + unsafe fn test_mm_sha1msg1_epu32() { + let a = _mm_set_epi64x(0xe9b5dba5b5c0fbcf, 0x71374491428a2f98); + let b = _mm_set_epi64x(0xab1c5ed5923f82a4, 0x59f111f13956c25b); + let expected = _mm_set_epi64x(0x98829f34f74ad457, 0xda2b1a44d0b5ad3c); + let r = _mm_sha1msg1_epu32(a, b); + assert_eq_m128i(r, expected); + } + + #[simd_test = "sha"] + #[allow(overflowing_literals)] + unsafe fn test_mm_sha1msg2_epu32() { + let a = _mm_set_epi64x(0xe9b5dba5b5c0fbcf, 0x71374491428a2f98); + let b = _mm_set_epi64x(0xab1c5ed5923f82a4, 0x59f111f13956c25b); + let expected = _mm_set_epi64x(0xf714b202d863d47d, 0x90c30d946b3d3b35); + let r = _mm_sha1msg2_epu32(a, b); + assert_eq_m128i(r, expected); + } + + #[simd_test = "sha"] + #[allow(overflowing_literals)] + unsafe fn test_mm_sha1nexte_epu32() { + let a = _mm_set_epi64x(0xe9b5dba5b5c0fbcf, 0x71374491428a2f98); + let b = _mm_set_epi64x(0xab1c5ed5923f82a4, 0x59f111f13956c25b); + let expected = _mm_set_epi64x(0x2589d5be923f82a4, 0x59f111f13956c25b); + let r = _mm_sha1nexte_epu32(a, b); + assert_eq_m128i(r, expected); + } + + #[simd_test = "sha"] + #[allow(overflowing_literals)] + unsafe fn test_mm_sha1rnds4_epu32() { + let a = _mm_set_epi64x(0xe9b5dba5b5c0fbcf, 0x71374491428a2f98); + let b = _mm_set_epi64x(0xab1c5ed5923f82a4, 0x59f111f13956c25b); + let expected = _mm_set_epi64x(0x32b13cd8322f5268, 0xc54420862bd9246f); + let r = _mm_sha1rnds4_epu32(a, b, 0); + assert_eq_m128i(r, expected); + + let expected = _mm_set_epi64x(0x6d4c43e56a3c25d9, 0xa7e00fb775cbd3fe); + let r = _mm_sha1rnds4_epu32(a, b, 1); + assert_eq_m128i(r, expected); + + let expected = _mm_set_epi64x(0xb304e383c01222f4, 0x66f6b3b1f89d8001); + let r = _mm_sha1rnds4_epu32(a, b, 2); + assert_eq_m128i(r, expected); + + let expected = _mm_set_epi64x(0x8189b758bfabfa79, 0xdb08f6e78cae098b); + let r = _mm_sha1rnds4_epu32(a, b, 3); + assert_eq_m128i(r, expected); + } + + #[simd_test = "sha"] + #[allow(overflowing_literals)] + unsafe fn test_mm_sha256msg1_epu32() { + let a = _mm_set_epi64x(0xe9b5dba5b5c0fbcf, 0x71374491428a2f98); + let b = _mm_set_epi64x(0xab1c5ed5923f82a4, 0x59f111f13956c25b); + let expected = _mm_set_epi64x(0xeb84973fd5cda67d, 0x2857b88f406b09ee); + let r = _mm_sha256msg1_epu32(a, b); + assert_eq_m128i(r, expected); + } + + #[simd_test = "sha"] + #[allow(overflowing_literals)] + unsafe fn test_mm_sha256msg2_epu32() { + let a = _mm_set_epi64x(0xe9b5dba5b5c0fbcf, 0x71374491428a2f98); + let b = _mm_set_epi64x(0xab1c5ed5923f82a4, 0x59f111f13956c25b); + let expected = _mm_set_epi64x(0xb58777ce887fd851, 0x15d1ec8b73ac8450); + let r = _mm_sha256msg2_epu32(a, b); + assert_eq_m128i(r, expected); + } + + #[simd_test = "sha"] + #[allow(overflowing_literals)] + unsafe fn test_mm_sha256rnds2_epu32() { + let a = _mm_set_epi64x(0xe9b5dba5b5c0fbcf, 0x71374491428a2f98); + let b = _mm_set_epi64x(0xab1c5ed5923f82a4, 0x59f111f13956c25b); + let k = _mm_set_epi64x(0, 0x12835b01d807aa98); + let expected = _mm_set_epi64x(0xd3063037effb15ea, 0x187ee3db0d6d1d19); + let r = _mm_sha256rnds2_epu32(a, b, k); + assert_eq_m128i(r, expected); + } +} diff --git a/library/stdarch/crates/stdsimd/tests/cpu-detection.rs b/library/stdarch/crates/stdsimd/tests/cpu-detection.rs index f383c503792c..0653ba3af2d2 100644 --- a/library/stdarch/crates/stdsimd/tests/cpu-detection.rs +++ b/library/stdarch/crates/stdsimd/tests/cpu-detection.rs @@ -71,6 +71,7 @@ fn x86_all() { is_x86_feature_detected!("sse4.2") ); println!("sse4a: {:?}", is_x86_feature_detected!("sse4a")); + println!("sha: {:?}", is_x86_feature_detected!("sha")); println!("avx: {:?}", is_x86_feature_detected!("avx")); println!("avx2: {:?}", is_x86_feature_detected!("avx2")); println!( diff --git a/library/stdarch/stdsimd/arch/detect/arch/x86.rs b/library/stdarch/stdsimd/arch/detect/arch/x86.rs index 0c6cb926af33..eb4302137587 100644 --- a/library/stdarch/stdsimd/arch/detect/arch/x86.rs +++ b/library/stdarch/stdsimd/arch/detect/arch/x86.rs @@ -64,6 +64,10 @@ macro_rules! is_x86_feature_detected { cfg!(target_feature = "sse4a") || $crate::arch::detect::check_for( $crate::arch::detect::Feature::sse4a) }; + ("sha") => { + cfg!(target_feature = "sha") || $crate::arch::detect::check_for( + $crate::arch::detect::Feature::sha) + }; ("avx") => { cfg!(target_feature = "avx") || $crate::arch::detect::check_for( $crate::arch::detect::Feature::avx) @@ -199,6 +203,8 @@ pub enum Feature { sse4_2, /// SSE4a (Streaming SIMD Extensions 4a) sse4a, + /// SHA + sha, /// AVX (Advanced Vector Extensions) avx, /// AVX2 (Advanced Vector Extensions 2) diff --git a/library/stdarch/stdsimd/arch/detect/os/x86.rs b/library/stdarch/stdsimd/arch/detect/os/x86.rs index d7d7b85df496..faccf99582cf 100644 --- a/library/stdarch/stdsimd/arch/detect/os/x86.rs +++ b/library/stdarch/stdsimd/arch/detect/os/x86.rs @@ -131,6 +131,7 @@ pub fn detect_features() -> cache::Initializer { enable(proc_info_edx, 24, Feature::fxsr); enable(proc_info_edx, 25, Feature::sse); enable(proc_info_edx, 26, Feature::sse2); + enable(extended_features_ebx, 29, Feature::sha); enable(extended_features_ebx, 3, Feature::bmi); enable(extended_features_ebx, 8, Feature::bmi2); @@ -249,6 +250,7 @@ mod tests { println!("sse4.1: {:?}", is_x86_feature_detected!("sse4.1")); println!("sse4.2: {:?}", is_x86_feature_detected!("sse4.2")); println!("sse4a: {:?}", is_x86_feature_detected!("sse4a")); + println!("sha: {:?}", is_x86_feature_detected!("sha")); println!("avx: {:?}", is_x86_feature_detected!("avx")); println!("avx2: {:?}", is_x86_feature_detected!("avx2")); println!("avx512f {:?}", is_x86_feature_detected!("avx512f")); @@ -293,6 +295,7 @@ mod tests { assert_eq!(is_x86_feature_detected!("sse4.1"), information.sse4_1()); assert_eq!(is_x86_feature_detected!("sse4.2"), information.sse4_2()); assert_eq!(is_x86_feature_detected!("sse4a"), information.sse4a()); + assert_eq!(is_x86_feature_detected!("sha"), information.sha()); assert_eq!(is_x86_feature_detected!("avx"), information.avx()); assert_eq!(is_x86_feature_detected!("avx2"), information.avx2()); assert_eq!(is_x86_feature_detected!("avx512f"), information.avx512f());