Add VAES intrinsics (#942)
This commit is contained in:
parent
e799627ed7
commit
f9d32c56e2
2 changed files with 335 additions and 0 deletions
332
library/stdarch/crates/core_arch/src/x86/avx512vaes.rs
Normal file
332
library/stdarch/crates/core_arch/src/x86/avx512vaes.rs
Normal file
|
|
@ -0,0 +1,332 @@
|
|||
//! Vectorized AES Instructions (VAES)
|
||||
//!
|
||||
//! The intrinsics here correspond to those in the `immintrin.h` C header.
|
||||
//!
|
||||
//! The reference is [Intel 64 and IA-32 Architectures Software Developer's
|
||||
//! Manual Volume 2: Instruction Set Reference, A-Z][intel64_ref].
|
||||
//!
|
||||
//! [intel64_ref]: http://www.intel.de/content/dam/www/public/us/en/documents/manuals/64-ia-32-architectures-software-developer-instruction-set-reference-manual-325383.pdf
|
||||
|
||||
use crate::core_arch::x86::__m256i;
|
||||
use crate::core_arch::x86::__m512i;
|
||||
|
||||
#[cfg(test)]
|
||||
use stdarch_test::assert_instr;
|
||||
|
||||
#[allow(improper_ctypes)]
|
||||
extern "C" {
|
||||
#[link_name = "llvm.x86.aesni.aesenc.256"]
|
||||
fn aesenc_256(a: __m256i, round_key: __m256i) -> __m256i;
|
||||
#[link_name = "llvm.x86.aesni.aesenclast.256"]
|
||||
fn aesenclast_256(a: __m256i, round_key: __m256i) -> __m256i;
|
||||
#[link_name = "llvm.x86.aesni.aesdec.256"]
|
||||
fn aesdec_256(a: __m256i, round_key: __m256i) -> __m256i;
|
||||
#[link_name = "llvm.x86.aesni.aesdeclast.256"]
|
||||
fn aesdeclast_256(a: __m256i, round_key: __m256i) -> __m256i;
|
||||
#[link_name = "llvm.x86.aesni.aesenc.512"]
|
||||
fn aesenc_512(a: __m512i, round_key: __m512i) -> __m512i;
|
||||
#[link_name = "llvm.x86.aesni.aesenclast.512"]
|
||||
fn aesenclast_512(a: __m512i, round_key: __m512i) -> __m512i;
|
||||
#[link_name = "llvm.x86.aesni.aesdec.512"]
|
||||
fn aesdec_512(a: __m512i, round_key: __m512i) -> __m512i;
|
||||
#[link_name = "llvm.x86.aesni.aesdeclast.512"]
|
||||
fn aesdeclast_512(a: __m512i, round_key: __m512i) -> __m512i;
|
||||
}
|
||||
|
||||
/// Performs one round of an AES encryption flow on each 128-bit word (state) in `a` using
|
||||
/// the corresponding 128-bit word (key) in `round_key`.
|
||||
///
|
||||
/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm256_aesenc_epi128)
|
||||
#[inline]
|
||||
#[target_feature(enable = "avx512vaes,avx512vl")]
|
||||
#[cfg_attr(test, assert_instr(vaesenc))]
|
||||
pub unsafe fn _mm256_aesenc_epi128(a: __m256i, round_key: __m256i) -> __m256i {
|
||||
aesenc_256(a, round_key)
|
||||
}
|
||||
|
||||
/// Performs the last round of an AES encryption flow on each 128-bit word (state) in `a` using
|
||||
/// the corresponding 128-bit word (key) in `round_key`.
|
||||
///
|
||||
/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm256_aesenclast_epi128)
|
||||
#[inline]
|
||||
#[target_feature(enable = "avx512vaes,avx512vl")]
|
||||
#[cfg_attr(test, assert_instr(vaesenclast))]
|
||||
pub unsafe fn _mm256_aesenclast_epi128(a: __m256i, round_key: __m256i) -> __m256i {
|
||||
aesenclast_256(a, round_key)
|
||||
}
|
||||
|
||||
/// Performs one round of an AES decryption flow on each 128-bit word (state) in `a` using
|
||||
/// the corresponding 128-bit word (key) in `round_key`.
|
||||
///
|
||||
/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm256_aesdec_epi128)
|
||||
#[inline]
|
||||
#[target_feature(enable = "avx512vaes,avx512vl")]
|
||||
#[cfg_attr(test, assert_instr(vaesdec))]
|
||||
pub unsafe fn _mm256_aesdec_epi128(a: __m256i, round_key: __m256i) -> __m256i {
|
||||
aesdec_256(a, round_key)
|
||||
}
|
||||
|
||||
/// Performs the last round of an AES decryption flow on each 128-bit word (state) in `a` using
|
||||
/// the corresponding 128-bit word (key) in `round_key`.
|
||||
///
|
||||
/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm256_aesdeclast_epi128)
|
||||
#[inline]
|
||||
#[target_feature(enable = "avx512vaes,avx512vl")]
|
||||
#[cfg_attr(test, assert_instr(vaesdeclast))]
|
||||
pub unsafe fn _mm256_aesdeclast_epi128(a: __m256i, round_key: __m256i) -> __m256i {
|
||||
aesdeclast_256(a, round_key)
|
||||
}
|
||||
|
||||
/// Performs one round of an AES encryption flow on each 128-bit word (state) in `a` using
|
||||
/// the corresponding 128-bit word (key) in `round_key`.
|
||||
///
|
||||
/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm512_aesenc_epi128)
|
||||
#[inline]
|
||||
#[target_feature(enable = "avx512vaes,avx512f")]
|
||||
#[cfg_attr(test, assert_instr(vaesenc))]
|
||||
pub unsafe fn _mm512_aesenc_epi128(a: __m512i, round_key: __m512i) -> __m512i {
|
||||
aesenc_512(a, round_key)
|
||||
}
|
||||
|
||||
/// Performs the last round of an AES encryption flow on each 128-bit word (state) in `a` using
|
||||
/// the corresponding 128-bit word (key) in `round_key`.
|
||||
///
|
||||
/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm512_aesenclast_epi128)
|
||||
#[inline]
|
||||
#[target_feature(enable = "avx512vaes,avx512f")]
|
||||
#[cfg_attr(test, assert_instr(vaesenclast))]
|
||||
pub unsafe fn _mm512_aesenclast_epi128(a: __m512i, round_key: __m512i) -> __m512i {
|
||||
aesenclast_512(a, round_key)
|
||||
}
|
||||
|
||||
/// Performs one round of an AES decryption flow on each 128-bit word (state) in `a` using
|
||||
/// the corresponding 128-bit word (key) in `round_key`.
|
||||
///
|
||||
/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm512_aesdec_epi128)
|
||||
#[inline]
|
||||
#[target_feature(enable = "avx512vaes,avx512f")]
|
||||
#[cfg_attr(test, assert_instr(vaesdec))]
|
||||
pub unsafe fn _mm512_aesdec_epi128(a: __m512i, round_key: __m512i) -> __m512i {
|
||||
aesdec_512(a, round_key)
|
||||
}
|
||||
|
||||
/// Performs the last round of an AES decryption flow on each 128-bit word (state) in `a` using
|
||||
/// the corresponding 128-bit word (key) in `round_key`.
|
||||
///
|
||||
/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm512_aesdeclast_epi128)
|
||||
#[inline]
|
||||
#[target_feature(enable = "avx512vaes,avx512f")]
|
||||
#[cfg_attr(test, assert_instr(vaesdeclast))]
|
||||
pub unsafe fn _mm512_aesdeclast_epi128(a: __m512i, round_key: __m512i) -> __m512i {
|
||||
aesdeclast_512(a, round_key)
|
||||
}
|
||||
|
||||
#[cfg(test)]
|
||||
mod tests {
|
||||
// The constants in the tests below are just bit patterns. They should not
|
||||
// be interpreted as integers; signedness does not make sense for them, but
|
||||
// __mXXXi happens to be defined in terms of signed integers.
|
||||
#![allow(overflowing_literals)]
|
||||
|
||||
use stdarch_test::simd_test;
|
||||
|
||||
use crate::core_arch::x86::*;
|
||||
|
||||
// the first parts of these tests are straight ports from the AES-NI tests
|
||||
// the second parts directly compare the two, for inputs that are different across lanes
|
||||
// and "more random" than the standard test vectors
|
||||
// ideally we'd be using quickcheck here instead
|
||||
|
||||
#[target_feature(enable = "avx2")]
|
||||
unsafe fn helper_for_256_avx512vaes(
|
||||
linear: unsafe fn(__m128i, __m128i) -> __m128i,
|
||||
vectorized: unsafe fn(__m256i, __m256i) -> __m256i,
|
||||
) {
|
||||
let a = _mm256_set_epi64x(
|
||||
0xDCB4DB3657BF0B7D,
|
||||
0x18DB0601068EDD9F,
|
||||
0xB76B908233200DC5,
|
||||
0xE478235FA8E22D5E,
|
||||
);
|
||||
let k = _mm256_set_epi64x(
|
||||
0x672F6F105A94CEA7,
|
||||
0x8298B8FFCA5F829C,
|
||||
0xA3927047B3FB61D8,
|
||||
0x978093862CDE7187,
|
||||
);
|
||||
let mut a_decomp = [_mm_setzero_si128(); 2];
|
||||
a_decomp[0] = _mm256_extracti128_si256(a, 0);
|
||||
a_decomp[1] = _mm256_extracti128_si256(a, 1);
|
||||
let mut k_decomp = [_mm_setzero_si128(); 2];
|
||||
k_decomp[0] = _mm256_extracti128_si256(k, 0);
|
||||
k_decomp[1] = _mm256_extracti128_si256(k, 1);
|
||||
let r = vectorized(a, k);
|
||||
let mut e_decomp = [_mm_setzero_si128(); 2];
|
||||
for i in 0..2 {
|
||||
e_decomp[i] = linear(a_decomp[i], k_decomp[i]);
|
||||
}
|
||||
assert_eq_m128i(_mm256_extracti128_si256(r, 0), e_decomp[0]);
|
||||
assert_eq_m128i(_mm256_extracti128_si256(r, 1), e_decomp[1]);
|
||||
}
|
||||
|
||||
#[target_feature(enable = "sse2")]
|
||||
unsafe fn setup_state_key<T>(broadcast: unsafe fn(__m128i) -> T) -> (T, T) {
|
||||
// Constants taken from https://msdn.microsoft.com/en-us/library/cc664949.aspx.
|
||||
let a = _mm_set_epi64x(0x0123456789abcdef, 0x8899aabbccddeeff);
|
||||
let k = _mm_set_epi64x(0x1133557799bbddff, 0x0022446688aaccee);
|
||||
(broadcast(a), broadcast(k))
|
||||
}
|
||||
|
||||
#[target_feature(enable = "avx2")]
|
||||
unsafe fn setup_state_key_256() -> (__m256i, __m256i) {
|
||||
setup_state_key(_mm256_broadcastsi128_si256)
|
||||
}
|
||||
|
||||
#[target_feature(enable = "avx512f")]
|
||||
unsafe fn setup_state_key_512() -> (__m512i, __m512i) {
|
||||
setup_state_key(_mm512_broadcast_i32x4)
|
||||
}
|
||||
|
||||
#[simd_test(enable = "avx512vaes,avx512vl")]
|
||||
unsafe fn test_mm256_aesdec_epi128() {
|
||||
// Constants taken from https://msdn.microsoft.com/en-us/library/cc664949.aspx.
|
||||
let (a, k) = setup_state_key_256();
|
||||
let e = _mm_set_epi64x(0x044e4f5176fec48f, 0xb57ecfa381da39ee);
|
||||
let e = _mm256_broadcastsi128_si256(e);
|
||||
let r = _mm256_aesdec_epi128(a, k);
|
||||
assert_eq_m256i(r, e);
|
||||
|
||||
helper_for_256_avx512vaes(_mm_aesdec_si128, _mm256_aesdec_epi128);
|
||||
}
|
||||
|
||||
#[simd_test(enable = "avx512vaes,avx512vl")]
|
||||
unsafe fn test_mm256_aesdeclast_epi128() {
|
||||
// Constants taken from https://msdn.microsoft.com/en-us/library/cc714178.aspx.
|
||||
let (a, k) = setup_state_key_256();
|
||||
let e = _mm_set_epi64x(0x36cad57d9072bf9e, 0xf210dd981fa4a493);
|
||||
let e = _mm256_broadcastsi128_si256(e);
|
||||
let r = _mm256_aesdeclast_epi128(a, k);
|
||||
assert_eq_m256i(r, e);
|
||||
|
||||
helper_for_256_avx512vaes(_mm_aesdeclast_si128, _mm256_aesdeclast_epi128);
|
||||
}
|
||||
|
||||
#[simd_test(enable = "avx512vaes,avx512vl")]
|
||||
unsafe fn test_mm256_aesenc_epi128() {
|
||||
// Constants taken from https://msdn.microsoft.com/en-us/library/cc664810.aspx.
|
||||
// they are repeated appropriately
|
||||
let (a, k) = setup_state_key_256();
|
||||
let e = _mm_set_epi64x(0x16ab0e57dfc442ed, 0x28e4ee1884504333);
|
||||
let e = _mm256_broadcastsi128_si256(e);
|
||||
let r = _mm256_aesenc_epi128(a, k);
|
||||
assert_eq_m256i(r, e);
|
||||
|
||||
helper_for_256_avx512vaes(_mm_aesenc_si128, _mm256_aesenc_epi128);
|
||||
}
|
||||
|
||||
#[simd_test(enable = "avx512vaes,avx512vl")]
|
||||
unsafe fn test_mm256_aesenclast_epi128() {
|
||||
// Constants taken from https://msdn.microsoft.com/en-us/library/cc714136.aspx.
|
||||
let (a, k) = setup_state_key_256();
|
||||
let e = _mm_set_epi64x(0xb6dd7df25d7ab320, 0x4b04f98cf4c860f8);
|
||||
let e = _mm256_broadcastsi128_si256(e);
|
||||
let r = _mm256_aesenclast_epi128(a, k);
|
||||
assert_eq_m256i(r, e);
|
||||
|
||||
helper_for_256_avx512vaes(_mm_aesenclast_si128, _mm256_aesenclast_epi128);
|
||||
}
|
||||
|
||||
#[target_feature(enable = "avx512f")]
|
||||
unsafe fn helper_for_512_avx512vaes(
|
||||
linear: unsafe fn(__m128i, __m128i) -> __m128i,
|
||||
vectorized: unsafe fn(__m512i, __m512i) -> __m512i,
|
||||
) {
|
||||
let a = _mm512_set_epi64(
|
||||
0xDCB4DB3657BF0B7D,
|
||||
0x18DB0601068EDD9F,
|
||||
0xB76B908233200DC5,
|
||||
0xE478235FA8E22D5E,
|
||||
0xAB05CFFA2621154C,
|
||||
0x1171B47A186174C9,
|
||||
0x8C6B6C0E7595CEC9,
|
||||
0xBE3E7D4934E961BD,
|
||||
);
|
||||
let k = _mm512_set_epi64(
|
||||
0x672F6F105A94CEA7,
|
||||
0x8298B8FFCA5F829C,
|
||||
0xA3927047B3FB61D8,
|
||||
0x978093862CDE7187,
|
||||
0xB1927AB22F31D0EC,
|
||||
0xA9A5DA619BE4D7AF,
|
||||
0xCA2590F56884FDC6,
|
||||
0x19BE9F660038BDB5,
|
||||
);
|
||||
let mut a_decomp = [_mm_setzero_si128(); 4];
|
||||
a_decomp[0] = _mm512_extracti32x4_epi32(a, 0);
|
||||
a_decomp[1] = _mm512_extracti32x4_epi32(a, 1);
|
||||
a_decomp[2] = _mm512_extracti32x4_epi32(a, 2);
|
||||
a_decomp[3] = _mm512_extracti32x4_epi32(a, 3);
|
||||
let mut k_decomp = [_mm_setzero_si128(); 4];
|
||||
k_decomp[0] = _mm512_extracti32x4_epi32(k, 0);
|
||||
k_decomp[1] = _mm512_extracti32x4_epi32(k, 1);
|
||||
k_decomp[2] = _mm512_extracti32x4_epi32(k, 2);
|
||||
k_decomp[3] = _mm512_extracti32x4_epi32(k, 3);
|
||||
let r = vectorized(a, k);
|
||||
let mut e_decomp = [_mm_setzero_si128(); 4];
|
||||
for i in 0..4 {
|
||||
e_decomp[i] = linear(a_decomp[i], k_decomp[i]);
|
||||
}
|
||||
assert_eq_m128i(_mm512_extracti32x4_epi32(r, 0), e_decomp[0]);
|
||||
assert_eq_m128i(_mm512_extracti32x4_epi32(r, 1), e_decomp[1]);
|
||||
assert_eq_m128i(_mm512_extracti32x4_epi32(r, 2), e_decomp[2]);
|
||||
assert_eq_m128i(_mm512_extracti32x4_epi32(r, 3), e_decomp[3]);
|
||||
}
|
||||
|
||||
#[simd_test(enable = "avx512vaes,avx512f")]
|
||||
unsafe fn test_mm512_aesdec_epi128() {
|
||||
// Constants taken from https://msdn.microsoft.com/en-us/library/cc664949.aspx.
|
||||
let (a, k) = setup_state_key_512();
|
||||
let e = _mm_set_epi64x(0x044e4f5176fec48f, 0xb57ecfa381da39ee);
|
||||
let e = _mm512_broadcast_i32x4(e);
|
||||
let r = _mm512_aesdec_epi128(a, k);
|
||||
assert_eq_m512i(r, e);
|
||||
|
||||
helper_for_512_avx512vaes(_mm_aesdec_si128, _mm512_aesdec_epi128);
|
||||
}
|
||||
|
||||
#[simd_test(enable = "avx512vaes,avx512f")]
|
||||
unsafe fn test_mm512_aesdeclast_epi128() {
|
||||
// Constants taken from https://msdn.microsoft.com/en-us/library/cc714178.aspx.
|
||||
let (a, k) = setup_state_key_512();
|
||||
let e = _mm_set_epi64x(0x36cad57d9072bf9e, 0xf210dd981fa4a493);
|
||||
let e = _mm512_broadcast_i32x4(e);
|
||||
let r = _mm512_aesdeclast_epi128(a, k);
|
||||
assert_eq_m512i(r, e);
|
||||
|
||||
helper_for_512_avx512vaes(_mm_aesdeclast_si128, _mm512_aesdeclast_epi128);
|
||||
}
|
||||
|
||||
#[simd_test(enable = "avx512vaes,avx512f")]
|
||||
unsafe fn test_mm512_aesenc_epi128() {
|
||||
// Constants taken from https://msdn.microsoft.com/en-us/library/cc664810.aspx.
|
||||
let (a, k) = setup_state_key_512();
|
||||
let e = _mm_set_epi64x(0x16ab0e57dfc442ed, 0x28e4ee1884504333);
|
||||
let e = _mm512_broadcast_i32x4(e);
|
||||
let r = _mm512_aesenc_epi128(a, k);
|
||||
assert_eq_m512i(r, e);
|
||||
|
||||
helper_for_512_avx512vaes(_mm_aesenc_si128, _mm512_aesenc_epi128);
|
||||
}
|
||||
|
||||
#[simd_test(enable = "avx512vaes,avx512f")]
|
||||
unsafe fn test_mm512_aesenclast_epi128() {
|
||||
// Constants taken from https://msdn.microsoft.com/en-us/library/cc714136.aspx.
|
||||
let (a, k) = setup_state_key_512();
|
||||
let e = _mm_set_epi64x(0xb6dd7df25d7ab320, 0x4b04f98cf4c860f8);
|
||||
let e = _mm512_broadcast_i32x4(e);
|
||||
let r = _mm512_aesenclast_epi128(a, k);
|
||||
assert_eq_m512i(r, e);
|
||||
|
||||
helper_for_512_avx512vaes(_mm_aesenclast_si128, _mm512_aesenclast_epi128);
|
||||
}
|
||||
}
|
||||
|
|
@ -651,6 +651,9 @@ pub use self::avx512f::*;
|
|||
mod avx512ifma;
|
||||
pub use self::avx512ifma::*;
|
||||
|
||||
mod avx512vaes;
|
||||
pub use self::avx512vaes::*;
|
||||
|
||||
mod bt;
|
||||
pub use self::bt::*;
|
||||
|
||||
|
|
|
|||
Loading…
Add table
Add a link
Reference in a new issue