Add support for Intel SHA extensions. (#395)
This commit is contained in:
parent
56d9a42a2f
commit
de82d9d26b
5 changed files with 214 additions and 0 deletions
|
|
@ -597,3 +597,6 @@ pub use self::aes::*;
|
|||
|
||||
mod rdrand;
|
||||
pub use self::rdrand::*;
|
||||
|
||||
mod sha;
|
||||
pub use self::sha::*;
|
||||
|
|
|
|||
201
library/stdarch/coresimd/x86/sha.rs
Normal file
201
library/stdarch/coresimd/x86/sha.rs
Normal file
|
|
@ -0,0 +1,201 @@
|
|||
use coresimd::simd::*;
|
||||
use coresimd::x86::*;
|
||||
use mem;
|
||||
|
||||
#[allow(improper_ctypes)]
|
||||
extern "C" {
|
||||
#[link_name = "llvm.x86.sha1msg1"]
|
||||
fn sha1msg1(a: i32x4, b: i32x4) -> i32x4;
|
||||
#[link_name = "llvm.x86.sha1msg2"]
|
||||
fn sha1msg2(a: i32x4, b: i32x4) -> i32x4;
|
||||
#[link_name = "llvm.x86.sha1nexte"]
|
||||
fn sha1nexte(a: i32x4, b: i32x4) -> i32x4;
|
||||
#[link_name = "llvm.x86.sha1rnds4"]
|
||||
fn sha1rnds4(a: i32x4, b: i32x4, c: i8) -> i32x4;
|
||||
#[link_name = "llvm.x86.sha256msg1"]
|
||||
fn sha256msg1(a: i32x4, b: i32x4) -> i32x4;
|
||||
#[link_name = "llvm.x86.sha256msg2"]
|
||||
fn sha256msg2(a: i32x4, b: i32x4) -> i32x4;
|
||||
#[link_name = "llvm.x86.sha256rnds2"]
|
||||
fn sha256rnds2(a: i32x4, b: i32x4, k: i32x4) -> i32x4;
|
||||
}
|
||||
|
||||
#[cfg(test)]
|
||||
use stdsimd_test::assert_instr;
|
||||
|
||||
/// Perform an intermediate calculation for the next four SHA1 message values
|
||||
/// (unsigned 32-bit integers) using previous message values from `a` and `b`,
|
||||
/// and returning the result.
|
||||
#[inline]
|
||||
#[target_feature(enable = "sha")]
|
||||
#[cfg_attr(test, assert_instr(sha1msg1))]
|
||||
pub unsafe fn _mm_sha1msg1_epu32(a: __m128i, b: __m128i) -> __m128i {
|
||||
mem::transmute(sha1msg1(a.as_i32x4(), b.as_i32x4()))
|
||||
}
|
||||
|
||||
/// Perform the final calculation for the next four SHA1 message values
|
||||
/// (unsigned 32-bit integers) using the intermediate result in `a` and the
|
||||
/// previous message values in `b`, and returns the result.
|
||||
#[inline]
|
||||
#[target_feature(enable = "sha")]
|
||||
#[cfg_attr(test, assert_instr(sha1msg2))]
|
||||
pub unsafe fn _mm_sha1msg2_epu32(a: __m128i, b: __m128i) -> __m128i {
|
||||
mem::transmute(sha1msg2(a.as_i32x4(), b.as_i32x4()))
|
||||
}
|
||||
|
||||
/// Calculate SHA1 state variable E after four rounds of operation from the
|
||||
/// current SHA1 state variable `a`, add that value to the scheduled values
|
||||
/// (unsigned 32-bit integers) in `b`, and returns the result.
|
||||
#[inline]
|
||||
#[target_feature(enable = "sha")]
|
||||
#[cfg_attr(test, assert_instr(sha1nexte))]
|
||||
pub unsafe fn _mm_sha1nexte_epu32(a: __m128i, b: __m128i) -> __m128i {
|
||||
mem::transmute(sha1nexte(a.as_i32x4(), b.as_i32x4()))
|
||||
}
|
||||
|
||||
/// Perform four rounds of SHA1 operation using an initial SHA1 state (A,B,C,D)
|
||||
/// from `a` and some pre-computed sum of the next 4 round message values
|
||||
/// (unsigned 32-bit integers), and state variable E from `b`, and return the
|
||||
/// updated SHA1 state (A,B,C,D). `func` contains the logic functions and round
|
||||
/// constants.
|
||||
#[inline]
|
||||
#[target_feature(enable = "sha")]
|
||||
#[cfg_attr(test, assert_instr(sha1rnds4, func = 0))]
|
||||
#[rustc_args_required_const(2)]
|
||||
pub unsafe fn _mm_sha1rnds4_epu32(a: __m128i, b: __m128i, func: i32) -> __m128i {
|
||||
let a = a.as_i32x4();
|
||||
let b = b.as_i32x4();
|
||||
macro_rules! call {
|
||||
($imm2:expr) => { sha1rnds4(a, b, $imm2) }
|
||||
}
|
||||
let ret = constify_imm2!(func, call);
|
||||
mem::transmute(ret)
|
||||
}
|
||||
|
||||
/// Perform an intermediate calculation for the next four SHA256 message values
|
||||
/// (unsigned 32-bit integers) using previous message values from `a` and `b`,
|
||||
/// and return the result.
|
||||
#[inline]
|
||||
#[target_feature(enable = "sha")]
|
||||
#[cfg_attr(test, assert_instr(sha256msg1))]
|
||||
pub unsafe fn _mm_sha256msg1_epu32(a: __m128i, b: __m128i) -> __m128i {
|
||||
mem::transmute(sha256msg1(a.as_i32x4(), b.as_i32x4()))
|
||||
}
|
||||
|
||||
/// Perform the final calculation for the next four SHA256 message values
|
||||
/// (unsigned 32-bit integers) using previous message values from `a` and `b`,
|
||||
/// and return the result.
|
||||
#[inline]
|
||||
#[target_feature(enable = "sha")]
|
||||
#[cfg_attr(test, assert_instr(sha256msg2))]
|
||||
pub unsafe fn _mm_sha256msg2_epu32(a: __m128i, b: __m128i) -> __m128i {
|
||||
mem::transmute(sha256msg2(a.as_i32x4(), b.as_i32x4()))
|
||||
}
|
||||
|
||||
/// Perform 2 rounds of SHA256 operation using an initial SHA256 state (C,D,G,H)
|
||||
/// from `a`, an initial SHA256 state (A,B,E,F) from `b`, and a pre-computed sum
|
||||
/// of the next 2 round message values (unsigned 32-bit integers) and the
|
||||
/// corresponding round constants from `k`, and store the updated SHA256 state
|
||||
/// (A,B,E,F) in dst.
|
||||
#[inline]
|
||||
#[target_feature(enable = "sha")]
|
||||
#[cfg_attr(test, assert_instr(sha256rnds2))]
|
||||
pub unsafe fn _mm_sha256rnds2_epu32 (a: __m128i, b: __m128i, k: __m128i) -> __m128i {
|
||||
mem::transmute(sha256rnds2(a.as_i32x4(), b.as_i32x4(), k.as_i32x4()))
|
||||
}
|
||||
|
||||
#[cfg(test)]
|
||||
mod tests {
|
||||
use std::mem::{self, transmute};
|
||||
use std::f64::{self, NAN};
|
||||
use std::f32;
|
||||
use std::i32;
|
||||
|
||||
use stdsimd_test::simd_test;
|
||||
use test::black_box; // Used to inhibit constant-folding.
|
||||
use coresimd::x86::*;
|
||||
use coresimd::simd::*;
|
||||
|
||||
#[simd_test = "sha"]
|
||||
#[allow(overflowing_literals)]
|
||||
unsafe fn test_mm_sha1msg1_epu32() {
|
||||
let a = _mm_set_epi64x(0xe9b5dba5b5c0fbcf, 0x71374491428a2f98);
|
||||
let b = _mm_set_epi64x(0xab1c5ed5923f82a4, 0x59f111f13956c25b);
|
||||
let expected = _mm_set_epi64x(0x98829f34f74ad457, 0xda2b1a44d0b5ad3c);
|
||||
let r = _mm_sha1msg1_epu32(a, b);
|
||||
assert_eq_m128i(r, expected);
|
||||
}
|
||||
|
||||
#[simd_test = "sha"]
|
||||
#[allow(overflowing_literals)]
|
||||
unsafe fn test_mm_sha1msg2_epu32() {
|
||||
let a = _mm_set_epi64x(0xe9b5dba5b5c0fbcf, 0x71374491428a2f98);
|
||||
let b = _mm_set_epi64x(0xab1c5ed5923f82a4, 0x59f111f13956c25b);
|
||||
let expected = _mm_set_epi64x(0xf714b202d863d47d, 0x90c30d946b3d3b35);
|
||||
let r = _mm_sha1msg2_epu32(a, b);
|
||||
assert_eq_m128i(r, expected);
|
||||
}
|
||||
|
||||
#[simd_test = "sha"]
|
||||
#[allow(overflowing_literals)]
|
||||
unsafe fn test_mm_sha1nexte_epu32() {
|
||||
let a = _mm_set_epi64x(0xe9b5dba5b5c0fbcf, 0x71374491428a2f98);
|
||||
let b = _mm_set_epi64x(0xab1c5ed5923f82a4, 0x59f111f13956c25b);
|
||||
let expected = _mm_set_epi64x(0x2589d5be923f82a4, 0x59f111f13956c25b);
|
||||
let r = _mm_sha1nexte_epu32(a, b);
|
||||
assert_eq_m128i(r, expected);
|
||||
}
|
||||
|
||||
#[simd_test = "sha"]
|
||||
#[allow(overflowing_literals)]
|
||||
unsafe fn test_mm_sha1rnds4_epu32() {
|
||||
let a = _mm_set_epi64x(0xe9b5dba5b5c0fbcf, 0x71374491428a2f98);
|
||||
let b = _mm_set_epi64x(0xab1c5ed5923f82a4, 0x59f111f13956c25b);
|
||||
let expected = _mm_set_epi64x(0x32b13cd8322f5268, 0xc54420862bd9246f);
|
||||
let r = _mm_sha1rnds4_epu32(a, b, 0);
|
||||
assert_eq_m128i(r, expected);
|
||||
|
||||
let expected = _mm_set_epi64x(0x6d4c43e56a3c25d9, 0xa7e00fb775cbd3fe);
|
||||
let r = _mm_sha1rnds4_epu32(a, b, 1);
|
||||
assert_eq_m128i(r, expected);
|
||||
|
||||
let expected = _mm_set_epi64x(0xb304e383c01222f4, 0x66f6b3b1f89d8001);
|
||||
let r = _mm_sha1rnds4_epu32(a, b, 2);
|
||||
assert_eq_m128i(r, expected);
|
||||
|
||||
let expected = _mm_set_epi64x(0x8189b758bfabfa79, 0xdb08f6e78cae098b);
|
||||
let r = _mm_sha1rnds4_epu32(a, b, 3);
|
||||
assert_eq_m128i(r, expected);
|
||||
}
|
||||
|
||||
#[simd_test = "sha"]
|
||||
#[allow(overflowing_literals)]
|
||||
unsafe fn test_mm_sha256msg1_epu32() {
|
||||
let a = _mm_set_epi64x(0xe9b5dba5b5c0fbcf, 0x71374491428a2f98);
|
||||
let b = _mm_set_epi64x(0xab1c5ed5923f82a4, 0x59f111f13956c25b);
|
||||
let expected = _mm_set_epi64x(0xeb84973fd5cda67d, 0x2857b88f406b09ee);
|
||||
let r = _mm_sha256msg1_epu32(a, b);
|
||||
assert_eq_m128i(r, expected);
|
||||
}
|
||||
|
||||
#[simd_test = "sha"]
|
||||
#[allow(overflowing_literals)]
|
||||
unsafe fn test_mm_sha256msg2_epu32() {
|
||||
let a = _mm_set_epi64x(0xe9b5dba5b5c0fbcf, 0x71374491428a2f98);
|
||||
let b = _mm_set_epi64x(0xab1c5ed5923f82a4, 0x59f111f13956c25b);
|
||||
let expected = _mm_set_epi64x(0xb58777ce887fd851, 0x15d1ec8b73ac8450);
|
||||
let r = _mm_sha256msg2_epu32(a, b);
|
||||
assert_eq_m128i(r, expected);
|
||||
}
|
||||
|
||||
#[simd_test = "sha"]
|
||||
#[allow(overflowing_literals)]
|
||||
unsafe fn test_mm_sha256rnds2_epu32() {
|
||||
let a = _mm_set_epi64x(0xe9b5dba5b5c0fbcf, 0x71374491428a2f98);
|
||||
let b = _mm_set_epi64x(0xab1c5ed5923f82a4, 0x59f111f13956c25b);
|
||||
let k = _mm_set_epi64x(0, 0x12835b01d807aa98);
|
||||
let expected = _mm_set_epi64x(0xd3063037effb15ea, 0x187ee3db0d6d1d19);
|
||||
let r = _mm_sha256rnds2_epu32(a, b, k);
|
||||
assert_eq_m128i(r, expected);
|
||||
}
|
||||
}
|
||||
|
|
@ -71,6 +71,7 @@ fn x86_all() {
|
|||
is_x86_feature_detected!("sse4.2")
|
||||
);
|
||||
println!("sse4a: {:?}", is_x86_feature_detected!("sse4a"));
|
||||
println!("sha: {:?}", is_x86_feature_detected!("sha"));
|
||||
println!("avx: {:?}", is_x86_feature_detected!("avx"));
|
||||
println!("avx2: {:?}", is_x86_feature_detected!("avx2"));
|
||||
println!(
|
||||
|
|
|
|||
|
|
@ -64,6 +64,10 @@ macro_rules! is_x86_feature_detected {
|
|||
cfg!(target_feature = "sse4a") || $crate::arch::detect::check_for(
|
||||
$crate::arch::detect::Feature::sse4a)
|
||||
};
|
||||
("sha") => {
|
||||
cfg!(target_feature = "sha") || $crate::arch::detect::check_for(
|
||||
$crate::arch::detect::Feature::sha)
|
||||
};
|
||||
("avx") => {
|
||||
cfg!(target_feature = "avx") || $crate::arch::detect::check_for(
|
||||
$crate::arch::detect::Feature::avx)
|
||||
|
|
@ -199,6 +203,8 @@ pub enum Feature {
|
|||
sse4_2,
|
||||
/// SSE4a (Streaming SIMD Extensions 4a)
|
||||
sse4a,
|
||||
/// SHA
|
||||
sha,
|
||||
/// AVX (Advanced Vector Extensions)
|
||||
avx,
|
||||
/// AVX2 (Advanced Vector Extensions 2)
|
||||
|
|
|
|||
|
|
@ -131,6 +131,7 @@ pub fn detect_features() -> cache::Initializer {
|
|||
enable(proc_info_edx, 24, Feature::fxsr);
|
||||
enable(proc_info_edx, 25, Feature::sse);
|
||||
enable(proc_info_edx, 26, Feature::sse2);
|
||||
enable(extended_features_ebx, 29, Feature::sha);
|
||||
|
||||
enable(extended_features_ebx, 3, Feature::bmi);
|
||||
enable(extended_features_ebx, 8, Feature::bmi2);
|
||||
|
|
@ -249,6 +250,7 @@ mod tests {
|
|||
println!("sse4.1: {:?}", is_x86_feature_detected!("sse4.1"));
|
||||
println!("sse4.2: {:?}", is_x86_feature_detected!("sse4.2"));
|
||||
println!("sse4a: {:?}", is_x86_feature_detected!("sse4a"));
|
||||
println!("sha: {:?}", is_x86_feature_detected!("sha"));
|
||||
println!("avx: {:?}", is_x86_feature_detected!("avx"));
|
||||
println!("avx2: {:?}", is_x86_feature_detected!("avx2"));
|
||||
println!("avx512f {:?}", is_x86_feature_detected!("avx512f"));
|
||||
|
|
@ -293,6 +295,7 @@ mod tests {
|
|||
assert_eq!(is_x86_feature_detected!("sse4.1"), information.sse4_1());
|
||||
assert_eq!(is_x86_feature_detected!("sse4.2"), information.sse4_2());
|
||||
assert_eq!(is_x86_feature_detected!("sse4a"), information.sse4a());
|
||||
assert_eq!(is_x86_feature_detected!("sha"), information.sha());
|
||||
assert_eq!(is_x86_feature_detected!("avx"), information.avx());
|
||||
assert_eq!(is_x86_feature_detected!("avx2"), information.avx2());
|
||||
assert_eq!(is_x86_feature_detected!("avx512f"), information.avx512f());
|
||||
|
|
|
|||
Loading…
Add table
Add a link
Reference in a new issue