diff --git a/library/stdarch/coresimd/aarch64/crypto.rs b/library/stdarch/coresimd/aarch64/crypto.rs new file mode 100644 index 000000000000..d8cb9e3d4086 --- /dev/null +++ b/library/stdarch/coresimd/aarch64/crypto.rs @@ -0,0 +1,428 @@ +use coresimd::arm::uint32x4_t; +use coresimd::arm::uint8x16_t; + +#[allow(improper_ctypes)] +extern "C" { + #[link_name = "llvm.aarch64.crypto.aese"] + fn vaeseq_u8_(data: uint8x16_t, key: uint8x16_t) -> uint8x16_t; + #[link_name = "llvm.aarch64.crypto.aesd"] + fn vaesdq_u8_(data: uint8x16_t, key: uint8x16_t) -> uint8x16_t; + #[link_name = "llvm.aarch64.crypto.aesmc"] + fn vaesmcq_u8_(data: uint8x16_t) -> uint8x16_t; + #[link_name = "llvm.aarch64.crypto.aesimc"] + fn vaesimcq_u8_(data: uint8x16_t) -> uint8x16_t; + + #[link_name = "llvm.aarch64.crypto.sha1h"] + fn vsha1h_u32_(hash_e: u32) -> u32; + #[link_name = "llvm.aarch64.crypto.sha1su0"] + fn vsha1su0q_u32_( + w0_3: uint32x4_t, w4_7: uint32x4_t, w8_11: uint32x4_t + ) -> uint32x4_t; + #[link_name = "llvm.aarch64.crypto.sha1su1"] + fn vsha1su1q_u32_(tw0_3: uint32x4_t, w12_15: uint32x4_t) -> uint32x4_t; + #[link_name = "llvm.aarch64.crypto.sha1c"] + fn vsha1cq_u32_( + hash_abcd: uint32x4_t, hash_e: u32, wk: uint32x4_t + ) -> uint32x4_t; + #[link_name = "llvm.aarch64.crypto.sha1p"] + fn vsha1pq_u32_( + hash_abcd: uint32x4_t, hash_e: u32, wk: uint32x4_t + ) -> uint32x4_t; + #[link_name = "llvm.aarch64.crypto.sha1m"] + fn vsha1mq_u32_( + hash_abcd: uint32x4_t, hash_e: u32, wk: uint32x4_t + ) -> uint32x4_t; + + #[link_name = "llvm.aarch64.crypto.sha256h"] + fn vsha256hq_u32_( + hash_abcd: uint32x4_t, hash_efgh: uint32x4_t, wk: uint32x4_t + ) -> uint32x4_t; + #[link_name = "llvm.aarch64.crypto.sha256h2"] + fn vsha256h2q_u32_( + hash_efgh: uint32x4_t, hash_abcd: uint32x4_t, wk: uint32x4_t + ) -> uint32x4_t; + #[link_name = "llvm.aarch64.crypto.sha256su0"] + fn vsha256su0q_u32_(w0_3: uint32x4_t, w4_7: uint32x4_t) -> uint32x4_t; + #[link_name = "llvm.aarch64.crypto.sha256su1"] + fn vsha256su1q_u32_( + tw0_3: uint32x4_t, w8_11: uint32x4_t, w12_15: uint32x4_t + ) -> uint32x4_t; +} + +#[cfg(test)] +use stdsimd_test::assert_instr; + +/// AES single round encryption. +#[inline] +#[target_feature(enable = "crypto")] +#[cfg_attr(test, assert_instr(aese))] +pub unsafe fn vaeseq_u8(data: uint8x16_t, key: uint8x16_t) -> uint8x16_t { + vaeseq_u8_(data, key) +} + +/// AES single round decryption. +#[inline] +#[target_feature(enable = "crypto")] +#[cfg_attr(test, assert_instr(aesd))] +pub unsafe fn vaesdq_u8(data: uint8x16_t, key: uint8x16_t) -> uint8x16_t { + vaesdq_u8_(data, key) +} + +/// AES mix columns. +#[inline] +#[target_feature(enable = "crypto")] +#[cfg_attr(test, assert_instr(aesmc))] +pub unsafe fn vaesmcq_u8(data: uint8x16_t) -> uint8x16_t { + vaesmcq_u8_(data) +} + +/// AES inverse mix columns. +#[inline] +#[target_feature(enable = "crypto")] +#[cfg_attr(test, assert_instr(aesimc))] +pub unsafe fn vaesimcq_u8(data: uint8x16_t) -> uint8x16_t { + vaesimcq_u8_(data) +} + +/// SHA1 fixed rotate. +#[inline] +#[target_feature(enable = "crypto")] +#[cfg_attr(test, assert_instr(sha1h))] +pub unsafe fn vsha1h_u32(hash_e: u32) -> u32 { + vsha1h_u32_(hash_e) +} + +/// SHA1 hash update accelerator, choose. +#[inline] +#[target_feature(enable = "crypto")] +#[cfg_attr(test, assert_instr(sha1c))] +pub unsafe fn vsha1cq_u32( + hash_abcd: uint32x4_t, hash_e: u32, wk: uint32x4_t +) -> uint32x4_t { + vsha1cq_u32_(hash_abcd, hash_e, wk) +} + +/// SHA1 hash update accelerator, majority. +#[inline] +#[target_feature(enable = "crypto")] +#[cfg_attr(test, assert_instr(sha1m))] +pub unsafe fn vsha1mq_u32( + hash_abcd: uint32x4_t, hash_e: u32, wk: uint32x4_t +) -> uint32x4_t { + vsha1mq_u32_(hash_abcd, hash_e, wk) +} + +/// SHA1 hash update accelerator, parity. +#[inline] +#[target_feature(enable = "crypto")] +#[cfg_attr(test, assert_instr(sha1p))] +pub unsafe fn vsha1pq_u32( + hash_abcd: uint32x4_t, hash_e: u32, wk: uint32x4_t +) -> uint32x4_t { + vsha1pq_u32_(hash_abcd, hash_e, wk) +} + +/// SHA1 schedule update accelerator, first part. +#[inline] +#[target_feature(enable = "crypto")] +#[cfg_attr(test, assert_instr(sha1su0))] +pub unsafe fn vsha1su0q_u32( + w0_3: uint32x4_t, w4_7: uint32x4_t, w8_11: uint32x4_t +) -> uint32x4_t { + vsha1su0q_u32_(w0_3, w4_7, w8_11) +} + +/// SHA1 schedule update accelerator, second part. +#[inline] +#[target_feature(enable = "crypto")] +#[cfg_attr(test, assert_instr(sha1su1))] +pub unsafe fn vsha1su1q_u32( + tw0_3: uint32x4_t, w12_15: uint32x4_t +) -> uint32x4_t { + vsha1su1q_u32_(tw0_3, w12_15) +} + +/// SHA256 hash update accelerator. +#[inline] +#[target_feature(enable = "crypto")] +#[cfg_attr(test, assert_instr(sha256h))] +pub unsafe fn vsha256hq_u32( + hash_abcd: uint32x4_t, hash_efgh: uint32x4_t, wk: uint32x4_t +) -> uint32x4_t { + vsha256hq_u32_(hash_abcd, hash_efgh, wk) +} + +/// SHA256 hash update accelerator, upper part. +#[inline] +#[target_feature(enable = "crypto")] +#[cfg_attr(test, assert_instr(sha256h2))] +pub unsafe fn vsha256h2q_u32( + hash_efgh: uint32x4_t, hash_abcd: uint32x4_t, wk: uint32x4_t +) -> uint32x4_t { + vsha256h2q_u32_(hash_efgh, hash_abcd, wk) +} + +/// SHA256 schedule update accelerator, first part. +#[inline] +#[target_feature(enable = "crypto")] +#[cfg_attr(test, assert_instr(sha256su0))] +pub unsafe fn vsha256su0q_u32( + w0_3: uint32x4_t, w4_7: uint32x4_t +) -> uint32x4_t { + vsha256su0q_u32_(w0_3, w4_7) +} + +/// SHA256 schedule update accelerator, second part. +#[inline] +#[target_feature(enable = "crypto")] +#[cfg_attr(test, assert_instr(sha256su1))] +pub unsafe fn vsha256su1q_u32( + tw0_3: uint32x4_t, w8_11: uint32x4_t, w12_15: uint32x4_t +) -> uint32x4_t { + vsha256su1q_u32_(tw0_3, w8_11, w12_15) +} + +#[cfg(test)] +mod tests { + use stdsimd_test::simd_test; + use simd::*; + use coresimd::aarch64::*; + use std::mem; + + #[simd_test = "crypto"] + unsafe fn test_vaeseq_u8() { + let data = u8x16::new(1, 2, 3, 4, 5, 6, 7, 8, 1, 2, 3, 4, 5, 6, 7, 8) + .into_bits(); + let key = u8x16::new(0, 1, 2, 3, 4, 5, 6, 7, 0, 1, 2, 3, 4, 5, 6, 7) + .into_bits(); + let r: u8x16 = vaeseq_u8(data, key).into_bits(); + assert_eq!( + r, + u8x16::new( + 124, + 123, + 124, + 118, + 124, + 123, + 124, + 197, + 124, + 123, + 124, + 118, + 124, + 123, + 124, + 197 + ) + ); + } + + #[simd_test = "crypto"] + unsafe fn test_vaesdq_u8() { + let data = u8x16::new(1, 2, 3, 4, 5, 6, 7, 8, 1, 2, 3, 4, 5, 6, 7, 8) + .into_bits(); + let key = u8x16::new(0, 1, 2, 3, 4, 5, 6, 7, 0, 1, 2, 3, 4, 5, 6, 7) + .into_bits(); + let r: u8x16 = vaesdq_u8(data, key).into_bits(); + assert_eq!( + r, + u8x16::new( + 9, + 213, + 9, + 251, + 9, + 213, + 9, + 56, + 9, + 213, + 9, + 251, + 9, + 213, + 9, + 56 + ) + ); + } + + #[simd_test = "crypto"] + unsafe fn test_vaesmcq_u8() { + let data = u8x16::new(1, 2, 3, 4, 5, 6, 7, 8, 1, 2, 3, 4, 5, 6, 7, 8) + .into_bits(); + let r: u8x16 = vaesmcq_u8(data).into_bits(); + assert_eq!( + r, + u8x16::new( + 3, + 4, + 9, + 10, + 15, + 8, + 21, + 30, + 3, + 4, + 9, + 10, + 15, + 8, + 21, + 30 + ) + ); + } + + #[simd_test = "crypto"] + unsafe fn test_vaesimcq_u8() { + let data = u8x16::new(1, 2, 3, 4, 5, 6, 7, 8, 1, 2, 3, 4, 5, 6, 7, 8) + .into_bits(); + let r: u8x16 = vaesimcq_u8(data).into_bits(); + assert_eq!( + r, + u8x16::new( + 43, + 60, + 33, + 50, + 103, + 80, + 125, + 70, + 43, + 60, + 33, + 50, + 103, + 80, + 125, + 70 + ) + ); + } + + #[simd_test = "crypto"] + unsafe fn test_vsha1h_u32() { + assert_eq!(vsha1h_u32(0x1234), 0x048d); + assert_eq!(vsha1h_u32(0x5678), 0x159e); + } + + #[simd_test = "crypto"] + unsafe fn test_vsha1su0q_u32() { + let r: u32x4 = vsha1su0q_u32( + u32x4::new(0x1234_u32, 0x5678_u32, 0x9abc_u32, 0xdef0_u32) + .into_bits(), + u32x4::new(0x1234_u32, 0x5678_u32, 0x9abc_u32, 0xdef0_u32) + .into_bits(), + u32x4::new(0x1234_u32, 0x5678_u32, 0x9abc_u32, 0xdef0_u32) + .into_bits(), + ).into_bits(); + assert_eq!(r, u32x4::new(0x9abc, 0xdef0, 0x1234, 0x5678)); + } + + #[simd_test = "crypto"] + unsafe fn test_vsha1su1q_u32() { + let r: u32x4 = vsha1su1q_u32( + u32x4::new(0x1234, 0x5678, 0x9abc, 0xdef0).into_bits(), + u32x4::new(0x1234, 0x5678, 0x9abc, 0xdef0).into_bits(), + ).into_bits(); + assert_eq!( + r, + u32x4::new(0x00008898, 0x00019988, 0x00008898, 0x0000acd0) + ); + } + + #[simd_test = "crypto"] + unsafe fn test_vsha1cq_u32() { + let r: u32x4 = vsha1cq_u32( + u32x4::new(0x1234, 0x5678, 0x9abc, 0xdef0).into_bits(), + 0x1234, + u32x4::new(0x1234, 0x5678, 0x9abc, 0xdef0).into_bits(), + ).into_bits(); + assert_eq!( + r, + u32x4::new(0x8a32cbd8, 0x0c518a96, 0x0018a081, 0x0000c168) + ); + } + + #[simd_test = "crypto"] + unsafe fn test_vsha1pq_u32() { + let r: u32x4 = vsha1pq_u32( + u32x4::new(0x1234, 0x5678, 0x9abc, 0xdef0).into_bits(), + 0x1234, + u32x4::new(0x1234, 0x5678, 0x9abc, 0xdef0).into_bits(), + ).into_bits(); + assert_eq!( + r, + u32x4::new(0x469f0ba3, 0x0a326147, 0x80145d7f, 0x00009f47) + ); + } + + #[simd_test = "crypto"] + unsafe fn test_vsha1mq_u32() { + let r: u32x4 = vsha1mq_u32( + u32x4::new(0x1234, 0x5678, 0x9abc, 0xdef0).into_bits(), + 0x1234, + u32x4::new(0x1234, 0x5678, 0x9abc, 0xdef0).into_bits(), + ).into_bits(); + assert_eq!( + r, + u32x4::new(0xaa39693b, 0x0d51bf84, 0x001aa109, 0x0000d278) + ); + } + + #[simd_test = "crypto"] + unsafe fn test_vsha256hq_u32() { + let r: u32x4 = vsha256hq_u32( + u32x4::new(0x1234, 0x5678, 0x9abc, 0xdef0).into_bits(), + u32x4::new(0x1234, 0x5678, 0x9abc, 0xdef0).into_bits(), + u32x4::new(0x1234, 0x5678, 0x9abc, 0xdef0).into_bits(), + ).into_bits(); + assert_eq!( + r, + u32x4::new(0x05e9aaa8, 0xec5f4c02, 0x20a1ea61, 0x28738cef) + ); + } + + #[simd_test = "crypto"] + unsafe fn test_vsha256h2q_u32() { + let r: u32x4 = vsha256h2q_u32( + u32x4::new(0x1234, 0x5678, 0x9abc, 0xdef0).into_bits(), + u32x4::new(0x1234, 0x5678, 0x9abc, 0xdef0).into_bits(), + u32x4::new(0x1234, 0x5678, 0x9abc, 0xdef0).into_bits(), + ).into_bits(); + assert_eq!( + r, + u32x4::new(0x3745362e, 0x2fb51d00, 0xbd4c529b, 0x968b8516) + ); + } + + #[simd_test = "crypto"] + unsafe fn test_vsha256su0q_u32() { + let r: u32x4 = vsha256su0q_u32( + u32x4::new(0x1234, 0x5678, 0x9abc, 0xdef0).into_bits(), + u32x4::new(0x1234, 0x5678, 0x9abc, 0xdef0).into_bits(), + ).into_bits(); + assert_eq!( + r, + u32x4::new(0xe59e1c97, 0x5eaf68da, 0xd7bcb51f, 0x6c8de152) + ); + } + + #[simd_test = "crypto"] + unsafe fn test_vsha256su1q_u32() { + let r: u32x4 = vsha256su1q_u32( + u32x4::new(0x1234, 0x5678, 0x9abc, 0xdef0).into_bits(), + u32x4::new(0x1234, 0x5678, 0x9abc, 0xdef0).into_bits(), + u32x4::new(0x1234, 0x5678, 0x9abc, 0xdef0).into_bits(), + ).into_bits(); + assert_eq!( + r, + u32x4::new(0x5e09e8d2, 0x74a6f16b, 0xc966606b, 0xa686ee9f) + ); + } +} diff --git a/library/stdarch/coresimd/aarch64/mod.rs b/library/stdarch/coresimd/aarch64/mod.rs index 5c794e375064..a4419db7575e 100644 --- a/library/stdarch/coresimd/aarch64/mod.rs +++ b/library/stdarch/coresimd/aarch64/mod.rs @@ -13,3 +13,6 @@ pub use self::v8::*; mod neon; pub use self::neon::*; + +mod crypto; +pub use self::crypto::*;