Fix: Utilize LLVM intrinsics where possible

This commit is contained in:
Gijs Burghoorn 2023-08-22 13:45:04 +02:00 committed by Amanieu d'Antras
parent 7fd998870d
commit 0e0a78a3f0
3 changed files with 112 additions and 310 deletions

View file

@ -1,4 +1,5 @@
use core::arch::asm;
#[cfg(test)]
use stdarch_test::assert_instr;
macro_rules! static_assert_imm2 {
($imm:ident) => {
@ -21,6 +22,30 @@ extern "unadjusted" {
#[link_name = "llvm.riscv.aes32dsmi"]
fn _aes32dsmi(rs1: i32, rs2: i32, bs: i32) -> i32;
#[link_name = "llvm.riscv.zip.i32"]
fn _zip(rs1: i32) -> i32;
#[link_name = "llvm.riscv.unzip.i32"]
fn _unzip(rs1: i32) -> i32;
#[link_name = "llvm.riscv.sha512sig0h"]
fn _sha512sig0h(rs1: i32, rs2: i32) -> i32;
#[link_name = "llvm.riscv.sha512sig0l"]
fn _sha512sig0l(rs1: i32, rs2: i32) -> i32;
#[link_name = "llvm.riscv.sha512sig1h"]
fn _sha512sig1h(rs1: i32, rs2: i32) -> i32;
#[link_name = "llvm.riscv.sha512sig1l"]
fn _sha512sig1l(rs1: i32, rs2: i32) -> i32;
#[link_name = "llvm.riscv.sha512sum0r"]
fn _sha512sum0r(rs1: i32, rs2: i32) -> i32;
#[link_name = "llvm.riscv.sha512sum1r"]
fn _sha512sum1r(rs1: i32, rs2: i32) -> i32;
}
/// AES final round encryption instruction for RV32.
@ -166,17 +191,8 @@ pub unsafe fn aes32dsmi<const BS: u8>(rs1: u32, rs2: u32) -> u32 {
#[target_feature(enable = "zbkb")]
#[cfg_attr(test, assert_instr(zip))]
#[inline]
pub unsafe fn zip(rs: usize) -> usize {
let value: usize;
unsafe {
asm!(
"zip {rd},{rs}",
rd = lateout(reg) value,
rs = in(reg) rs,
options(pure, nomem, nostack),
)
}
value
pub unsafe fn zip(rs: u32) -> u32 {
_zip(rs as i32) as u32
}
/// Place odd and even bits of the source word into upper/lower halves of the destination.
@ -197,17 +213,8 @@ pub unsafe fn zip(rs: usize) -> usize {
#[target_feature(enable = "zbkb")]
#[cfg_attr(test, assert_instr(unzip))]
#[inline]
pub unsafe fn unzip(rs: usize) -> usize {
let value: usize;
unsafe {
asm!(
"unzip {rd},{rs}",
rd = lateout(reg) value,
rs = in(reg) rs,
options(pure, nomem, nostack),
)
}
value
pub unsafe fn unzip(rs: u32) -> u32 {
_unzip(rs as i32) as u32
}
/// Implements the high half of the Sigma0 transformation, as used in the SHA2-512 hash
@ -232,17 +239,7 @@ pub unsafe fn unzip(rs: usize) -> usize {
#[cfg_attr(test, assert_instr(sha512sig0h))]
#[inline]
pub unsafe fn sha512sig0h(rs1: u32, rs2: u32) -> u32 {
let value: u32;
unsafe {
asm!(
"sha512sig0h {rd},{rs1},{rs2}",
rd = lateout(reg) value,
rs1 = in(reg) rs1,
rs2 = in(reg) rs2,
options(pure, nomem, nostack),
)
}
value
_sha512sig0h(rs1 as i32, rs2 as i32) as u32
}
/// Implements the low half of the Sigma0 transformation, as used in the SHA2-512 hash function
@ -267,17 +264,7 @@ pub unsafe fn sha512sig0h(rs1: u32, rs2: u32) -> u32 {
#[cfg_attr(test, assert_instr(sha512sig0l))]
#[inline]
pub unsafe fn sha512sig0l(rs1: u32, rs2: u32) -> u32 {
let value: u32;
unsafe {
asm!(
"sha512sig0l {rd},{rs1},{rs2}",
rd = lateout(reg) value,
rs1 = in(reg) rs1,
rs2 = in(reg) rs2,
options(pure, nomem, nostack),
)
}
value
_sha512sig0l(rs1 as i32, rs2 as i32) as u32
}
/// Implements the high half of the Sigma1 transformation, as used in the SHA2-512 hash
@ -302,17 +289,7 @@ pub unsafe fn sha512sig0l(rs1: u32, rs2: u32) -> u32 {
#[cfg_attr(test, assert_instr(sha512sig1h))]
#[inline]
pub unsafe fn sha512sig1h(rs1: u32, rs2: u32) -> u32 {
let value: u32;
unsafe {
asm!(
"sha512sig1h {rd},{rs1},{rs2}",
rd = lateout(reg) value,
rs1 = in(reg) rs1,
rs2 = in(reg) rs2,
options(pure, nomem, nostack),
)
}
value
_sha512sig1h(rs1 as i32, rs2 as i32) as u32
}
/// Implements the low half of the Sigma1 transformation, as used in the SHA2-512 hash function
@ -337,17 +314,7 @@ pub unsafe fn sha512sig1h(rs1: u32, rs2: u32) -> u32 {
#[cfg_attr(test, assert_instr(sha512sig1l))]
#[inline]
pub unsafe fn sha512sig1l(rs1: u32, rs2: u32) -> u32 {
let value: u32;
unsafe {
asm!(
"sha512sig1l {rd},{rs1},{rs2}",
rd = lateout(reg) value,
rs1 = in(reg) rs1,
rs2 = in(reg) rs2,
options(pure, nomem, nostack),
)
}
value
_sha512sig1l(rs1 as i32, rs2 as i32) as u32
}
/// Implements the Sum0 transformation, as used in the SHA2-512 hash function \[49\] (Section
@ -371,17 +338,7 @@ pub unsafe fn sha512sig1l(rs1: u32, rs2: u32) -> u32 {
#[cfg_attr(test, assert_instr(sha512sum0r))]
#[inline]
pub unsafe fn sha512sum0r(rs1: u32, rs2: u32) -> u32 {
let value: u32;
unsafe {
asm!(
"sha512sum0r {rd},{rs1},{rs2}",
rd = lateout(reg) value,
rs1 = in(reg) rs1,
rs2 = in(reg) rs2,
options(pure, nomem, nostack),
)
}
value
_sha512sum0r(rs1 as i32, rs2 as i32) as u32
}
/// Implements the Sum1 transformation, as used in the SHA2-512 hash function \[49\] (Section
@ -405,15 +362,5 @@ pub unsafe fn sha512sum0r(rs1: u32, rs2: u32) -> u32 {
#[cfg_attr(test, assert_instr(sha512sum1r))]
#[inline]
pub unsafe fn sha512sum1r(rs1: u32, rs2: u32) -> u32 {
let value: u32;
unsafe {
asm!(
"sha512sum1r {rd},{rs1},{rs2}",
rd = lateout(reg) value,
rs1 = in(reg) rs1,
rs2 = in(reg) rs2,
options(pure, nomem, nostack),
)
}
value
_sha512sum1r(rs1 as i32, rs2 as i32) as u32
}

View file

@ -1,5 +1,8 @@
use core::arch::asm;
#[cfg(test)]
use stdarch_test::assert_instr;
macro_rules! static_assert_imm_0_until_10 {
($imm:ident) => {
static_assert!(
@ -10,8 +13,35 @@ macro_rules! static_assert_imm_0_until_10 {
}
extern "unadjusted" {
#[link_name = "llvm.riscv.aes64es"]
fn _aes64es(rs1: i64, rs2: i64) -> i64;
#[link_name = "llvm.riscv.aes64esm"]
fn _aes64esm(rs1: i64, rs2: i64) -> i64;
#[link_name = "llvm.riscv.aes64ds"]
fn _aes64ds(rs1: i64, rs2: i64) -> i64;
#[link_name = "llvm.riscv.aes64dsm"]
fn _aes64dsm(rs1: i64, rs2: i64) -> i64;
#[link_name = "llvm.riscv.aes64ks1i"]
fn _aes64ks1i(rs1: i64, rnum: i32) -> i64;
#[link_name = "llvm.riscv.aes64ks2"]
fn _aes64ks2(rs1: i64, rs2: i64) -> i64;
#[link_name = "llvm.riscv.sha512sig0"]
fn _sha512sig0(rs1: i64) -> i64;
#[link_name = "llvm.riscv.sha512sig1"]
fn _sha512sig1(rs1: i64) -> i64;
#[link_name = "llvm.riscv.sha512sum0"]
fn _sha512sum0(rs1: i64) -> i64;
#[link_name = "llvm.riscv.sha512sum1"]
fn _sha512sum1(rs1: i64) -> i64;
}
/// AES final round encryption instruction for RV64.
@ -34,17 +64,7 @@ extern "unadjusted" {
#[cfg_attr(test, assert_instr(aes64es))]
#[inline]
pub unsafe fn aes64es(rs1: u64, rs2: u64) -> u64 {
let value: u64;
unsafe {
asm!(
"aes64es {rd},{rs1},{rs2}",
rd = lateout(reg) value,
rs1 = in(reg) rs1,
rs2 = in(reg) rs2,
options(pure, nomem, nostack),
)
}
value
_aes64es(rs1 as i64, rs2 as i64) as u64
}
/// AES middle round encryption instruction for RV64.
@ -67,17 +87,7 @@ pub unsafe fn aes64es(rs1: u64, rs2: u64) -> u64 {
#[cfg_attr(test, assert_instr(aes64esm))]
#[inline]
pub unsafe fn aes64esm(rs1: u64, rs2: u64) -> u64 {
let value: u64;
unsafe {
asm!(
"aes64esm {rd},{rs1},{rs2}",
rd = lateout(reg) value,
rs1 = in(reg) rs1,
rs2 = in(reg) rs2,
options(pure, nomem, nostack),
)
}
value
_aes64esm(rs1 as i64, rs2 as i64) as u64
}
/// AES final round decryption instruction for RV64.
@ -100,17 +110,7 @@ pub unsafe fn aes64esm(rs1: u64, rs2: u64) -> u64 {
#[cfg_attr(test, assert_instr(aes64ds))]
#[inline]
pub unsafe fn aes64ds(rs1: u64, rs2: u64) -> u64 {
let value: u64;
unsafe {
asm!(
"aes64ds {rd},{rs1},{rs2}",
rd = lateout(reg) value,
rs1 = in(reg) rs1,
rs2 = in(reg) rs2,
options(pure, nomem, nostack),
)
}
value
_aes64ds(rs1 as i64, rs2 as i64) as u64
}
/// AES middle round decryption instruction for RV64.
@ -133,17 +133,7 @@ pub unsafe fn aes64ds(rs1: u64, rs2: u64) -> u64 {
#[cfg_attr(test, assert_instr(aes64dsm))]
#[inline]
pub unsafe fn aes64dsm(rs1: u64, rs2: u64) -> u64 {
let value: u64;
unsafe {
asm!(
"aes64dsm {rd},{rs1},{rs2}",
rd = lateout(reg) value,
rs1 = in(reg) rs1,
rs2 = in(reg) rs2,
options(pure, nomem, nostack),
)
}
value
_aes64dsm(rs1 as i64, rs2 as i64) as u64
}
/// This instruction implements part of the KeySchedule operation for the AES Block cipher
@ -196,17 +186,7 @@ pub unsafe fn aes64ks1i<const RNUM: u8>(rs1: u64) -> u64 {
#[cfg_attr(test, assert_instr(aes64ks2))]
#[inline]
pub unsafe fn aes64ks2(rs1: u64, rs2: u64) -> u64 {
let value: u64;
unsafe {
asm!(
"aes64ks2 {rd},{rs1},{rs2}",
rd = lateout(reg) value,
rs1 = in(reg) rs1,
rs2 = in(reg) rs2,
options(pure, nomem, nostack),
)
}
value
_aes64ks2(rs1 as i64, rs2 as i64) as u64
}
/// Pack the low 16-bits of rs1 and rs2 into rd on RV64
@ -228,6 +208,8 @@ pub unsafe fn aes64ks2(rs1: u64, rs2: u64) -> u64 {
#[cfg_attr(test, assert_instr(packw))]
#[inline]
pub unsafe fn packw(rs1: u64, rs2: u64) -> u64 {
// Note: There is no LLVM intrinsic for this instruction currently.
let value: u64;
unsafe {
asm!(
@ -261,16 +243,7 @@ pub unsafe fn packw(rs1: u64, rs2: u64) -> u64 {
#[cfg_attr(test, assert_instr(sha512sig0))]
#[inline]
pub unsafe fn sha512sig0(rs1: u64) -> u64 {
let value: u64;
unsafe {
asm!(
"sha512sig0 {rd},{rs1}",
rd = lateout(reg) value,
rs1 = in(reg) rs1,
options(pure, nomem, nostack),
)
}
value
_sha512sig0(rs1 as i64) as u64
}
/// Implements the Sigma1 transformation function as used in the SHA2-512 hash function \[49\]
@ -293,16 +266,7 @@ pub unsafe fn sha512sig0(rs1: u64) -> u64 {
#[cfg_attr(test, assert_instr(sha512sig1))]
#[inline]
pub unsafe fn sha512sig1(rs1: u64) -> u64 {
let value: u64;
unsafe {
asm!(
"sha512sig1 {rd},{rs1}",
rd = lateout(reg) value,
rs1 = in(reg) rs1,
options(pure, nomem, nostack),
)
}
value
_sha512sig1(rs1 as i64) as u64
}
/// Implements the Sum0 transformation function as used in the SHA2-512 hash function \[49\]
@ -325,16 +289,7 @@ pub unsafe fn sha512sig1(rs1: u64) -> u64 {
#[cfg_attr(test, assert_instr(sha512sum0))]
#[inline]
pub unsafe fn sha512sum0(rs1: u64) -> u64 {
let value: u64;
unsafe {
asm!(
"sha512sum0 {rd},{rs1}",
rd = lateout(reg) value,
rs1 = in(reg) rs1,
options(pure, nomem, nostack),
)
}
value
_sha512sum0(rs1 as i64) as u64
}
/// Implements the Sum1 transformation function as used in the SHA2-512 hash function \[49\]
@ -357,14 +312,5 @@ pub unsafe fn sha512sum0(rs1: u64) -> u64 {
#[cfg_attr(test, assert_instr(sha512sum1))]
#[inline]
pub unsafe fn sha512sum1(rs1: u64) -> u64 {
let value: u64;
unsafe {
asm!(
"sha512sum1 {rd},{rs1}",
rd = lateout(reg) value,
rs1 = in(reg) rs1,
options(pure, nomem, nostack),
)
}
value
_sha512sum1(rs1 as i64) as u64
}

View file

@ -1,5 +1,8 @@
use core::arch::asm;
#[cfg(test)]
use stdarch_test::assert_instr;
macro_rules! static_assert_imm2 {
($imm:ident) => {
static_assert!(
@ -15,6 +18,24 @@ extern "unadjusted" {
#[link_name = "llvm.riscv.sm4ks"]
fn _sm4ks(rs1: i32, rs2: i32, bs: i32) -> i32;
#[link_name = "llvm.riscv.sm3p0"]
fn _sm3p0(rs1: i32) -> i32;
#[link_name = "llvm.riscv.sm3p1"]
fn _sm3p1(rs1: i32) -> i32;
#[link_name = "llvm.riscv.sha256sig0"]
fn _sha256sig0(rs1: i32) -> i32;
#[link_name = "llvm.riscv.sha256sig1"]
fn _sha256sig1(rs1: i32) -> i32;
#[link_name = "llvm.riscv.sha256sum0"]
fn _sha256sum0(rs1: i32) -> i32;
#[link_name = "llvm.riscv.sha256sum1"]
fn _sha256sum1(rs1: i32) -> i32;
}
#[cfg(target_arch = "riscv32")]
@ -35,37 +56,6 @@ extern "unadjusted" {
fn _xperm4_64(rs1: i64, rs2: i64) -> i64;
}
/// Pack the low halves of rs1 and rs2 into rd.
///
/// The pack instruction packs the XLEN/2-bit lower halves of rs1 and rs2 into rd, with rs1 in
/// the lower half and rs2 in the upper half.
///
/// Source: RISC-V Cryptography Extensions Volume I: Scalar & Entropy Source Instructions
///
/// Version: v1.0.1
///
/// Section: 3.17
///
/// # Safety
///
/// This function is safe to use if the `zbkb` target feature is present.
#[target_feature(enable = "zbkb")]
#[cfg_attr(test, assert_instr(pack))]
#[inline]
pub unsafe fn pack(rs1: usize, rs2: usize) -> usize {
let value: usize;
unsafe {
asm!(
"pack {rd},{rs1},{rs2}",
rd = lateout(reg) value,
rs1 = in(reg) rs1,
rs2 = in(reg) rs2,
options(pure, nomem, nostack),
)
}
value
}
/// Pack the low bytes of rs1 and rs2 into rd.
///
/// And the packh instruction packs the least-significant bytes of rs1 and rs2 into the 16
@ -84,6 +74,8 @@ pub unsafe fn pack(rs1: usize, rs2: usize) -> usize {
#[cfg_attr(test, assert_instr(packh))]
#[inline]
pub unsafe fn packh(rs1: usize, rs2: usize) -> usize {
// Note: There is no LLVM intrinsic for this instruction currently.
let value: usize;
unsafe {
asm!(
@ -97,35 +89,6 @@ pub unsafe fn packh(rs1: usize, rs2: usize) -> usize {
value
}
/// Reverse the bits in each byte of a source register.
///
/// This instruction reverses the order of the bits in every byte of a register.
///
/// Source: RISC-V Cryptography Extensions Volume I: Scalar & Entropy Source Instructions
///
/// Version: v1.0.1
///
/// Section: 3.13
///
/// # Safety
///
/// This function is safe to use if the `zbkb` target feature is present.
#[target_feature(enable = "zbkb")]
#[cfg_attr(test, assert_instr(brev8))]
#[inline]
pub unsafe fn brev8(rs: usize) -> usize {
let value: usize;
unsafe {
asm!(
"brev8 {rd},{rs}",
rd = lateout(reg) value,
rs = in(reg) rs,
options(pure, nomem, nostack),
)
}
value
}
/// Byte-wise lookup of indicies into a vector in registers.
///
/// The xperm8 instruction operates on bytes. The rs1 register contains a vector of XLEN/8
@ -210,17 +173,8 @@ pub unsafe fn xperm4(rs1: usize, rs2: usize) -> usize {
#[target_feature(enable = "zknh")]
#[cfg_attr(test, assert_instr(sha256sig0))]
#[inline]
pub unsafe fn sha256sig0(rs1: usize) -> usize {
let value: usize;
unsafe {
asm!(
"sha256sig0 {rd},{rs1}",
rd = lateout(reg) value,
rs1 = in(reg) rs1,
options(pure, nomem, nostack),
)
}
value
pub unsafe fn sha256sig0(rs1: u32) -> u32 {
_sha256sig0(rs1 as i32) as u32
}
/// Implements the Sigma1 transformation function as used in the SHA2-256 hash function \[49\]
@ -245,17 +199,8 @@ pub unsafe fn sha256sig0(rs1: usize) -> usize {
#[target_feature(enable = "zknh")]
#[cfg_attr(test, assert_instr(sha256sig1))]
#[inline]
pub unsafe fn sha256sig1(rs1: usize) -> usize {
let value: usize;
unsafe {
asm!(
"sha256sig1 {rd},{rs1}",
rd = lateout(reg) value,
rs1 = in(reg) rs1,
options(pure, nomem, nostack),
)
}
value
pub unsafe fn sha256sig1(rs1: u32) -> u32 {
_sha256sig1(rs1 as i32) as u32
}
/// Implements the Sum0 transformation function as used in the SHA2-256 hash function \[49\]
@ -280,17 +225,8 @@ pub unsafe fn sha256sig1(rs1: usize) -> usize {
#[target_feature(enable = "zknh")]
#[cfg_attr(test, assert_instr(sha256sum0))]
#[inline]
pub unsafe fn sha256sum0(rs1: usize) -> usize {
let value: usize;
unsafe {
asm!(
"sha256sum0 {rd},{rs1}",
rd = lateout(reg) value,
rs1 = in(reg) rs1,
options(pure, nomem, nostack),
)
}
value
pub unsafe fn sha256sum0(rs1: u32) -> u32 {
_sha256sum0(rs1 as i32) as u32
}
/// Implements the Sum1 transformation function as used in the SHA2-256 hash function \[49\]
@ -315,17 +251,8 @@ pub unsafe fn sha256sum0(rs1: usize) -> usize {
#[target_feature(enable = "zknh")]
#[cfg_attr(test, assert_instr(sha256sum1))]
#[inline]
pub unsafe fn sha256sum1(rs1: usize) -> usize {
let value: usize;
unsafe {
asm!(
"sha256sum1 {rd},{rs1}",
rd = lateout(reg) value,
rs1 = in(reg) rs1,
options(pure, nomem, nostack),
)
}
value
pub unsafe fn sha256sum1(rs1: u32) -> u32 {
_sha256sum1(rs1 as i32) as u32
}
/// Accelerates the block encrypt/decrypt operation of the SM4 block cipher \[5, 31\].
@ -520,17 +447,8 @@ pub unsafe fn sm4ks<const BS: u8>(rs1: u32, rs2: u32) -> u32 {
#[target_feature(enable = "zksh")]
#[cfg_attr(test, assert_instr(sm3p0))]
#[inline]
pub unsafe fn sm3p0(rs1: usize) -> usize {
let value: usize;
unsafe {
asm!(
"sm3p0 {rd},{rs1}",
rd = lateout(reg) value,
rs1 = in(reg) rs1,
options(pure, nomem, nostack),
)
}
value
pub unsafe fn sm3p0(rs1: u32) -> u32 {
_sm3p0(rs1 as i32) as u32
}
/// Implements the P1 transformation function as used in the SM3 hash function [4, 30].
@ -573,15 +491,6 @@ pub unsafe fn sm3p0(rs1: usize) -> usize {
#[target_feature(enable = "zksh")]
#[cfg_attr(test, assert_instr(sm3p1))]
#[inline]
pub unsafe fn sm3p1(rs1: usize) -> usize {
let value: usize;
unsafe {
asm!(
"sm3p1 {rd},{rs1}",
rd = lateout(reg) value,
rs1 = in(reg) rs1,
options(pure, nomem, nostack),
)
}
value
pub unsafe fn sm3p1(rs1: u32) -> u32 {
_sm3p1(rs1 as i32) as u32
}