From f0be271de9082f1c71539d15ef18218f1faf419a Mon Sep 17 00:00:00 2001 From: Gijs Burghoorn Date: Sun, 6 Aug 2023 15:41:34 +0200 Subject: [PATCH] Implement RISC-V Zk extension intrinsics --- library/stdarch/crates/core_arch/src/mod.rs | 5 + .../crates/core_arch/src/riscv32/mod.rs | 5 + .../crates/core_arch/src/riscv32/zk.rs | 458 ++++++++++++++ .../crates/core_arch/src/riscv64/mod.rs | 4 + .../crates/core_arch/src/riscv64/zk.rs | 388 ++++++++++++ .../crates/core_arch/src/riscv_shared/mod.rs | 175 +----- .../crates/core_arch/src/riscv_shared/zk.rs | 594 ++++++++++++++++++ 7 files changed, 1458 insertions(+), 171 deletions(-) create mode 100644 library/stdarch/crates/core_arch/src/riscv32/mod.rs create mode 100644 library/stdarch/crates/core_arch/src/riscv32/zk.rs create mode 100644 library/stdarch/crates/core_arch/src/riscv64/zk.rs create mode 100644 library/stdarch/crates/core_arch/src/riscv_shared/zk.rs diff --git a/library/stdarch/crates/core_arch/src/mod.rs b/library/stdarch/crates/core_arch/src/mod.rs index f2cf11e479ce..ad3ec863d48e 100644 --- a/library/stdarch/crates/core_arch/src/mod.rs +++ b/library/stdarch/crates/core_arch/src/mod.rs @@ -66,6 +66,7 @@ pub mod arch { #[doc(cfg(any(target_arch = "riscv32")))] #[unstable(feature = "riscv_ext_intrinsics", issue = "114544")] pub mod riscv32 { + pub use crate::core_arch::riscv32::*; pub use crate::core_arch::riscv_shared::*; } @@ -279,6 +280,10 @@ mod aarch64; #[doc(cfg(any(target_arch = "arm")))] mod arm; +#[cfg(any(target_arch = "riscv32", doc))] +#[doc(cfg(any(target_arch = "riscv32")))] +mod riscv32; + #[cfg(any(target_arch = "riscv64", doc))] #[doc(cfg(any(target_arch = "riscv64")))] mod riscv64; diff --git a/library/stdarch/crates/core_arch/src/riscv32/mod.rs b/library/stdarch/crates/core_arch/src/riscv32/mod.rs new file mode 100644 index 000000000000..394d695ae0ba --- /dev/null +++ b/library/stdarch/crates/core_arch/src/riscv32/mod.rs @@ -0,0 +1,5 @@ +//! RISC-V RV32 specific intrinsics + +mod zk; + +pub use zk::*; \ No newline at end of file diff --git a/library/stdarch/crates/core_arch/src/riscv32/zk.rs b/library/stdarch/crates/core_arch/src/riscv32/zk.rs new file mode 100644 index 000000000000..56115a986fea --- /dev/null +++ b/library/stdarch/crates/core_arch/src/riscv32/zk.rs @@ -0,0 +1,458 @@ +#[allow(unused)] +use core::arch::asm; + +#[allow(unused)] +macro_rules! constify_imm2 { + ($imm2:expr, $expand:ident) => { + #[allow(overflowing_literals)] + match $imm2 & 0b11 { + 0b00 => $expand!(0), + 0b01 => $expand!(1), + 0b10 => $expand!(2), + _ => $expand!(3), + } + }; +} + +/// AES final round encryption instruction for RV32. +/// +/// This instruction sources a single byte from rs2 according to bs. To this it applies the +/// forward AES SBox operation, before XOR’ing the result with rs1. This instruction must +/// always be implemented such that its execution latency does not depend on the data being +/// operated on. +/// +/// Source: RISC-V Cryptography Extensions Volume I: Scalar & Entropy Source Instructions +/// +/// Version: v1.0.1 +/// +/// Section: 3.3 +/// +/// # Note +/// +/// The `bs` parameter is expected to be a constant value and only the bottom 2 bits of `bs` are +/// used. +/// +/// # Safety +/// +/// This function is safe to use if the `zkne` target feature is present. +#[target_feature(enable = "zkne")] +#[cfg_attr(test, assert_instr(aes32esi))] +#[inline] +pub unsafe fn aes32esi(rs1: u32, rs2: u32, bs: u8) -> u32 { + macro_rules! aes32esi { + ($imm2:expr) => {{ + let value: u32; + unsafe { + asm!( + concat!("aes32esi {rd},{rs1},{rs2},", $imm2), + rd = lateout(reg) value, + rs1 = in(reg) rs1, + rs2 = in(reg) rs2, + options(pure, nomem, nostack), + ); + } + value + }} + } + constify_imm2!(bs, aes32esi) +} + +/// AES middle round encryption instruction for RV32 with. +/// +/// This instruction sources a single byte from rs2 according to bs. To this it applies the +/// forward AES SBox operation, and a partial forward MixColumn, before XOR’ing the result with +/// rs1. This instruction must always be implemented such that its execution latency does not +/// depend on the data being operated on. +/// +/// Source: RISC-V Cryptography Extensions Volume I: Scalar & Entropy Source Instructions +/// +/// Version: v1.0.1 +/// +/// Section: 3.4 +/// +/// # Note +/// +/// The `bs` parameter is expected to be a constant value and only the bottom 2 bits of `bs` are +/// used. +/// +/// # Safety +/// +/// This function is safe to use if the `zkne` target feature is present. +#[target_feature(enable = "zkne")] +#[cfg_attr(test, assert_instr(aes32esmi))] +#[inline] +pub unsafe fn aes32esmi(rs1: u32, rs2: u32, bs: u8) -> u32 { + macro_rules! aes32esmi { + ($imm2:expr) => {{ + let value: u32; + unsafe { + asm!( + concat!("aes32esmi {rd},{rs1},{rs2},", $imm2), + rd = lateout(reg) value, + rs1 = in(reg) rs1, + rs2 = in(reg) rs2, + options(pure, nomem, nostack), + ); + } + value + }} + } + constify_imm2!(bs, aes32esmi) +} + +/// AES final round decryption instruction for RV32. +/// +/// This instruction sources a single byte from rs2 according to bs. To this it applies the +/// inverse AES SBox operation, and XOR’s the result with rs1. This instruction must always be +/// implemented such that its execution latency does not depend on the data being operated on. +/// +/// Source: RISC-V Cryptography Extensions Volume I: Scalar & Entropy Source Instructions +/// +/// Version: v1.0.1 +/// +/// Section: 3.1 +/// +/// # Note +/// +/// The `bs` parameter is expected to be a constant value and only the bottom 2 bits of `bs` are +/// used. +/// +/// # Safety +/// +/// This function is safe to use if the `zknd` target feature is present. +#[target_feature(enable = "zknd")] +#[cfg_attr(test, assert_instr(aes32dsi))] +#[inline] +pub unsafe fn aes32dsi(rs1: u32, rs2: u32, bs: u8) -> u32 { + macro_rules! aes32dsi { + ($imm2:expr) => {{ + let value: u32; + unsafe { + asm!( + concat!("aes32dsi {rd},{rs1},{rs2},", $imm2), + rd = lateout(reg) value, + rs1 = in(reg) rs1, + rs2 = in(reg) rs2, + options(pure, nomem, nostack), + ); + } + value + }} + } + constify_imm2!(bs, aes32dsi) +} + +#[target_feature(enable = "zknd")] +#[cfg_attr(test, assert_instr(aes32dsmi))] +#[inline] +/// AES middle round decryption instruction for RV32. +/// +/// This instruction sources a single byte from rs2 according to bs. To this it applies the +/// inverse AES SBox operation, and a partial inverse MixColumn, before XOR’ing the result with +/// rs1. This instruction must always be implemented such that its execution latency does not +/// depend on the data being operated on. +/// +/// Source: RISC-V Cryptography Extensions Volume I: Scalar & Entropy Source Instructions +/// +/// Version: v1.0.1 +/// +/// Section: 3.2 +/// +/// # Note +/// +/// The `bs` parameter is expected to be a constant value and only the bottom 2 bits of `bs` are +/// used. +/// +/// # Safety +/// +/// This function is safe to use if the `zknd` target feature is present. +pub unsafe fn aes32dsmi(rs1: u32, rs2: u32, bs: u8) -> u32 { + macro_rules! aes32dsmi { + ($imm2:expr) => {{ + let value: u32; + unsafe { + asm!( + concat!("aes32dsmi {rd},{rs1},{rs2},", $imm2), + rd = lateout(reg) value, + rs1 = in(reg) rs1, + rs2 = in(reg) rs2, + options(pure, nomem, nostack), + ); + } + value + }} + } + constify_imm2!(bs, aes32dsmi) +} + +/// Place upper/lower halves of the source register into odd/even bits of the destination +/// respectivley. +/// +/// This instruction places bits in the low half of the source register into the even bit +/// positions of the destination, and bits in the high half of the source register into the odd +/// bit positions of the destination. It is the inverse of the unzip instruction. This +/// instruction is available only on RV32. +/// +/// Source: RISC-V Cryptography Extensions Volume I: Scalar & Entropy Source Instructions +/// +/// Version: v1.0.1 +/// +/// Section: 3.49 +/// +/// # Safety +/// +/// This function is safe to use if the `zbkb` target feature is present. +#[target_feature(enable = "zbkb")] +#[cfg_attr(test, assert_instr(zip))] +#[inline] +pub unsafe fn zip(rs: usize) -> usize { + let value: usize; + unsafe { + asm!( + "zip {rd},{rs}", + rd = lateout(reg) value, + rs = in(reg) rs, + options(pure, nomem, nostack), + ) + } + value +} + +/// Place odd and even bits of the source word into upper/lower halves of the destination. +/// +/// This instruction places the even bits of the source register into the low half of the +/// destination, and the odd bits of the source into the high bits of the destination. It is +/// the inverse of the zip instruction. This instruction is available only on RV32. +/// +/// Source: RISC-V Cryptography Extensions Volume I: Scalar & Entropy Source Instructions +/// +/// Version: v1.0.1 +/// +/// Section: 3.45 +/// +/// # Safety +/// +/// This function is safe to use if the `zbkb` target feature is present. +#[target_feature(enable = "zbkb")] +#[cfg_attr(test, assert_instr(unzip))] +#[inline] +pub unsafe fn unzip(rs: usize) -> usize { + let value: usize; + unsafe { + asm!( + "unzip {rd},{rs}", + rd = lateout(reg) value, + rs = in(reg) rs, + options(pure, nomem, nostack), + ) + } + value +} + +/// Implements the high half of the Sigma0 transformation, as used in the SHA2-512 hash +/// function \[49\] (Section 4.1.3). +/// +/// This instruction is implemented on RV32 only. Used to compute the Sigma0 transform of the +/// SHA2-512 hash function in conjunction with the sha512sig0l instruction. The transform is a +/// 64-bit to 64-bit function, so the input and output are each represented by two 32-bit +/// registers. This instruction must always be implemented such that its execution latency does +/// not depend on the data being operated on. +/// +/// Source: RISC-V Cryptography Extensions Volume I: Scalar & Entropy Source Instructions +/// +/// Version: v1.0.1 +/// +/// Section: 3.31 +/// +/// # Safety +/// +/// This function is safe to use if the `zknh` target feature is present. +#[target_feature(enable = "zknh")] +#[cfg_attr(test, assert_instr(sha512sig0h))] +#[inline] +pub unsafe fn sha512sig0h(rs1: u32, rs2: u32) -> u32 { + let value: u32; + unsafe { + asm!( + "sha512sig0h {rd},{rs1},{rs2}", + rd = lateout(reg) value, + rs1 = in(reg) rs1, + rs2 = in(reg) rs2, + options(pure, nomem, nostack), + ) + } + value +} + +/// Implements the low half of the Sigma0 transformation, as used in the SHA2-512 hash function +/// \[49\] (Section 4.1.3). +/// +/// This instruction is implemented on RV32 only. Used to compute the Sigma0 transform of the +/// SHA2-512 hash function in conjunction with the sha512sig0h instruction. The transform is a +/// 64-bit to 64-bit function, so the input and output are each represented by two 32-bit +/// registers. This instruction must always be implemented such that its execution latency does +/// not depend on the data being operated on. +/// +/// Source: RISC-V Cryptography Extensions Volume I: Scalar & Entropy Source Instructions +/// +/// Version: v1.0.1 +/// +/// Section: 3.32 +/// +/// # Safety +/// +/// This function is safe to use if the `zknh` target feature is present. +#[target_feature(enable = "zknh")] +#[cfg_attr(test, assert_instr(sha512sig0l))] +#[inline] +pub unsafe fn sha512sig0l(rs1: u32, rs2: u32) -> u32 { + let value: u32; + unsafe { + asm!( + "sha512sig0l {rd},{rs1},{rs2}", + rd = lateout(reg) value, + rs1 = in(reg) rs1, + rs2 = in(reg) rs2, + options(pure, nomem, nostack), + ) + } + value +} + +/// Implements the high half of the Sigma1 transformation, as used in the SHA2-512 hash +/// function \[49\] (Section 4.1.3). +/// +/// This instruction is implemented on RV32 only. Used to compute the Sigma1 transform of the +/// SHA2-512 hash function in conjunction with the sha512sig1l instruction. The transform is a +/// 64-bit to 64-bit function, so the input and output are each represented by two 32-bit +/// registers. This instruction must always be implemented such that its execution latency does +/// not depend on the data being operated on. +/// +/// Source: RISC-V Cryptography Extensions Volume I: Scalar & Entropy Source Instructions +/// +/// Version: v1.0.1 +/// +/// Section: 3.33 +/// +/// # Safety +/// +/// This function is safe to use if the `zknh` target feature is present. +#[target_feature(enable = "zknh")] +#[cfg_attr(test, assert_instr(sha512sig1h))] +#[inline] +pub unsafe fn sha512sig1h(rs1: u32, rs2: u32) -> u32 { + let value: u32; + unsafe { + asm!( + "sha512sig1h {rd},{rs1},{rs2}", + rd = lateout(reg) value, + rs1 = in(reg) rs1, + rs2 = in(reg) rs2, + options(pure, nomem, nostack), + ) + } + value +} + +/// Implements the low half of the Sigma1 transformation, as used in the SHA2-512 hash function +/// \[49\] (Section 4.1.3). +/// +/// This instruction is implemented on RV32 only. Used to compute the Sigma1 transform of the +/// SHA2-512 hash function in conjunction with the sha512sig1h instruction. The transform is a +/// 64-bit to 64-bit function, so the input and output are each represented by two 32-bit +/// registers. This instruction must always be implemented such that its execution latency does +/// not depend on the data being operated on. +/// +/// Source: RISC-V Cryptography Extensions Volume I: Scalar & Entropy Source Instructions +/// +/// Version: v1.0.1 +/// +/// Section: 3.34 +/// +/// # Safety +/// +/// This function is safe to use if the `zknh` target feature is present. +#[target_feature(enable = "zknh")] +#[cfg_attr(test, assert_instr(sha512sig1l))] +#[inline] +pub unsafe fn sha512sig1l(rs1: u32, rs2: u32) -> u32 { + let value: u32; + unsafe { + asm!( + "sha512sig1l {rd},{rs1},{rs2}", + rd = lateout(reg) value, + rs1 = in(reg) rs1, + rs2 = in(reg) rs2, + options(pure, nomem, nostack), + ) + } + value +} + +/// Implements the Sum0 transformation, as used in the SHA2-512 hash function \[49\] (Section +/// 4.1.3). +/// +/// This instruction is implemented on RV32 only. Used to compute the Sum0 transform of the +/// SHA2-512 hash function. The transform is a 64-bit to 64-bit function, so the input and +/// output is represented by two 32-bit registers. This instruction must always be implemented +/// such that its execution latency does not depend on the data being operated on. +/// +/// Source: RISC-V Cryptography Extensions Volume I: Scalar & Entropy Source Instructions +/// +/// Version: v1.0.1 +/// +/// Section: 3.35 +/// +/// # Safety +/// +/// This function is safe to use if the `zknh` target feature is present. +#[target_feature(enable = "zknh")] +#[cfg_attr(test, assert_instr(sha512sum0r))] +#[inline] +pub unsafe fn sha512sum0r(rs1: u32, rs2: u32) -> u32 { + let value: u32; + unsafe { + asm!( + "sha512sum0r {rd},{rs1},{rs2}", + rd = lateout(reg) value, + rs1 = in(reg) rs1, + rs2 = in(reg) rs2, + options(pure, nomem, nostack), + ) + } + value +} + +/// Implements the Sum1 transformation, as used in the SHA2-512 hash function \[49\] (Section +/// 4.1.3). +/// +/// This instruction is implemented on RV32 only. Used to compute the Sum1 transform of the +/// SHA2-512 hash function. The transform is a 64-bit to 64-bit function, so the input and +/// output is represented by two 32-bit registers. This instruction must always be implemented +/// such that its execution latency does not depend on the data being operated on. +/// +/// Source: RISC-V Cryptography Extensions Volume I: Scalar & Entropy Source Instructions +/// +/// Version: v1.0.1 +/// +/// Section: 3.36 +/// +/// # Safety +/// +/// This function is safe to use if the `zknh` target feature is present. +#[target_feature(enable = "zknh")] +#[cfg_attr(test, assert_instr(sha512sum1r))] +#[inline] +pub unsafe fn sha512sum1r(rs1: u32, rs2: u32) -> u32 { + let value: u32; + unsafe { + asm!( + "sha512sum1r {rd},{rs1},{rs2}", + rd = lateout(reg) value, + rs1 = in(reg) rs1, + rs2 = in(reg) rs2, + options(pure, nomem, nostack), + ) + } + value +} \ No newline at end of file diff --git a/library/stdarch/crates/core_arch/src/riscv64/mod.rs b/library/stdarch/crates/core_arch/src/riscv64/mod.rs index 751b9a860f52..ad16d6c23126 100644 --- a/library/stdarch/crates/core_arch/src/riscv64/mod.rs +++ b/library/stdarch/crates/core_arch/src/riscv64/mod.rs @@ -1,6 +1,10 @@ //! RISC-V RV64 specific intrinsics use crate::arch::asm; +mod zk; + +pub use zk::*; + /// Loads virtual machine memory by unsigned word integer /// /// This instruction performs an explicit memory access as though `V=1`; diff --git a/library/stdarch/crates/core_arch/src/riscv64/zk.rs b/library/stdarch/crates/core_arch/src/riscv64/zk.rs new file mode 100644 index 000000000000..de45ad62c7d6 --- /dev/null +++ b/library/stdarch/crates/core_arch/src/riscv64/zk.rs @@ -0,0 +1,388 @@ +#[allow(unused)] +use core::arch::asm; + +#[allow(unused)] +macro_rules! constify_imm_0_until_10 { + ($imm2:expr, $expand:ident) => { + match $imm2 { + 1 => $expand!(1), + 2 => $expand!(2), + 3 => $expand!(3), + 4 => $expand!(4), + 5 => $expand!(5), + 6 => $expand!(6), + 7 => $expand!(7), + 8 => $expand!(8), + 9 => $expand!(9), + 10 => $expand!(10), + _ => $expand!(0), + } + }; +} + +/// AES final round encryption instruction for RV64. +/// +/// Uses the two 64-bit source registers to represent the entire AES state, and produces half +/// of the next round output, applying the ShiftRows and SubBytes steps. This instruction must +/// always be implemented such that its execution latency does not depend on the data being +/// operated on. +/// +/// Source: RISC-V Cryptography Extensions Volume I: Scalar & Entropy Source Instructions +/// +/// Version: v1.0.1 +/// +/// Section: 3.7 +/// +/// # Safety +/// +/// This function is safe to use if the `zkne` target feature is present. +#[target_feature(enable = "zkne")] +#[cfg_attr(test, assert_instr(aes64es))] +#[inline] +pub unsafe fn aes64es(rs1: u64, rs2: u64) -> u64 { + let value: u64; + unsafe { + asm!( + "aes64es {rd},{rs1},{rs2}", + rd = lateout(reg) value, + rs1 = in(reg) rs1, + rs2 = in(reg) rs2, + options(pure, nomem, nostack), + ) + } + value +} + +/// AES middle round encryption instruction for RV64. +/// +/// Uses the two 64-bit source registers to represent the entire AES state, and produces half +/// of the next round output, applying the ShiftRows, SubBytes and MixColumns steps. This +/// instruction must always be implemented such that its execution latency does not depend on +/// the data being operated on. +/// +/// Source: RISC-V Cryptography Extensions Volume I: Scalar & Entropy Source Instructions +/// +/// Version: v1.0.1 +/// +/// Section: 3.8 +/// +/// # Safety +/// +/// This function is safe to use if the `zkne` target feature is present. +#[target_feature(enable = "zkne")] +#[cfg_attr(test, assert_instr(aes64esm))] +#[inline] +pub unsafe fn aes64esm(rs1: u64, rs2: u64) -> u64 { + let value: u64; + unsafe { + asm!( + "aes64esm {rd},{rs1},{rs2}", + rd = lateout(reg) value, + rs1 = in(reg) rs1, + rs2 = in(reg) rs2, + options(pure, nomem, nostack), + ) + } + value +} + +/// AES final round decryption instruction for RV64. +/// +/// Uses the two 64-bit source registers to represent the entire AES state, and produces half +/// of the next round output, applying the Inverse ShiftRows and SubBytes steps. This +/// instruction must always be implemented such that its execution latency does not depend on +/// the data being operated on. +/// +/// Source: RISC-V Cryptography Extensions Volume I: Scalar & Entropy Source Instructions +/// +/// Version: v1.0.1 +/// +/// Section: 3.5 +/// +/// # Safety +/// +/// This function is safe to use if the `zknd` target feature is present. +#[target_feature(enable = "zknd")] +#[cfg_attr(test, assert_instr(aes64ds))] +#[inline] +pub unsafe fn aes64ds(rs1: u64, rs2: u64) -> u64 { + let value: u64; + unsafe { + asm!( + "aes64ds {rd},{rs1},{rs2}", + rd = lateout(reg) value, + rs1 = in(reg) rs1, + rs2 = in(reg) rs2, + options(pure, nomem, nostack), + ) + } + value +} + +/// AES middle round decryption instruction for RV64. +/// +/// Uses the two 64-bit source registers to represent the entire AES state, and produces half +/// of the next round output, applying the Inverse ShiftRows, SubBytes and MixColumns steps. +/// This instruction must always be implemented such that its execution latency does not depend +/// on the data being operated on. +/// +/// Source: RISC-V Cryptography Extensions Volume I: Scalar & Entropy Source Instructions +/// +/// Version: v1.0.1 +/// +/// Section: 3.6 +/// +/// # Safety +/// +/// This function is safe to use if the `zknd` target feature is present. +#[target_feature(enable = "zknd")] +#[cfg_attr(test, assert_instr(aes64dsm))] +#[inline] +pub unsafe fn aes64dsm(rs1: u64, rs2: u64) -> u64 { + let value: u64; + unsafe { + asm!( + "aes64esm {rd},{rs1},{rs2}", + rd = lateout(reg) value, + rs1 = in(reg) rs1, + rs2 = in(reg) rs2, + options(pure, nomem, nostack), + ) + } + value +} + +/// This instruction implements part of the KeySchedule operation for the AES Block cipher +/// involving the SBox operation. +/// +/// This instruction implements the rotation, SubBytes and Round Constant addition steps of the +/// AES block cipher Key Schedule. This instruction must always be implemented such that its +/// execution latency does not depend on the data being operated on. Note that rnum must be in +/// the range 0x0..0xA. The values 0xB..0xF are reserved. +/// +/// Source: RISC-V Cryptography Extensions Volume I: Scalar & Entropy Source Instructions +/// +/// Version: v1.0.1 +/// +/// Section: 3.10 +/// +/// # Note +/// +/// The `rnum` parameter is expected to be a constant value inside the range of `0..=10`, if a +/// value outside the valid range is given it uses `rnum=0`. +/// +/// # Safety +/// +/// This function is safe to use if the `zkne` or `zknd` target feature is present. +#[target_feature(enable = "zkne", enable = "zknd")] +#[cfg_attr(test, assert_instr(aes64ks1i))] +#[inline] +pub unsafe fn aes64ks1i(rs1: u64, rnum: u8) -> u64 { + macro_rules! aes64ks1i { + ($imm_0_until_10:expr) => {{ + let value: u64; + unsafe { + asm!( + concat!("aes64ks1i {rd},{rs1},", $imm_0_until_10), + rd = lateout(reg) value, + rs1 = in(reg) rs1, + options(pure, nomem, nostack), + ) + } + value + }} + } + constify_imm_0_until_10!(rnum, aes64ks1i) +} + +/// This instruction implements part of the KeySchedule operation for the AES Block cipher. +/// +/// This instruction implements the additional XOR’ing of key words as part of the AES block +/// cipher Key Schedule. This instruction must always be implemented such that its execution +/// latency does not depend on the data being operated on. +/// +/// Source: RISC-V Cryptography Extensions Volume I: Scalar & Entropy Source Instructions +/// +/// Version: v1.0.1 +/// +/// Section: 3.11 +/// +/// # Safety +/// +/// This function is safe to use if the `zkne` or `zknd` target feature is present. +#[target_feature(enable = "zkne", enable = "zknd")] +#[cfg_attr(test, assert_instr(aes64ks2))] +#[inline] +pub unsafe fn aes64ks2(rs1: u64, rs2: u64) -> u64 { + let value: u64; + unsafe { + asm!( + "aes64ks2 {rd},{rs1},{rs2}", + rd = lateout(reg) value, + rs1 = in(reg) rs1, + rs2 = in(reg) rs2, + options(pure, nomem, nostack), + ) + } + value +} + +/// Pack the low 16-bits of rs1 and rs2 into rd on RV64 +/// +/// This instruction packs the low 16 bits of rs1 and rs2 into the 32 least-significant bits of +/// rd, sign extending the 32-bit result to the rest of rd. This instruction only exists on +/// RV64 based systems. +/// +/// Source: RISC-V Cryptography Extensions Volume I: Scalar & Entropy Source Instructions +/// +/// Version: v1.0.1 +/// +/// Section: 3.26 +/// +/// # Safety +/// +/// This function is safe to use if the `zbkb` target feature is present. +#[target_feature(enable = "zbkb")] +#[cfg_attr(test, assert_instr(packw))] +#[inline] +pub unsafe fn packw(rs1: u64, rs2: u64) -> u64 { + let value: u64; + unsafe { + asm!( + "packw {rd},{rs1},{rs2}", + rd = lateout(reg) value, + rs1 = in(reg) rs1, + rs2 = in(reg) rs2, + options(pure, nomem, nostack), + ) + } + value +} + +/// Implements the Sigma0 transformation function as used in the SHA2-512 hash function \[49\] +/// (Section 4.1.3). +/// +/// This instruction is supported for the RV64 base architecture. It implements the Sigma0 +/// transform of the SHA2-512 hash function. \[49\]. This instruction must always be +/// implemented such that its execution latency does not depend on the data being operated on. +/// +/// Source: RISC-V Cryptography Extensions Volume I: Scalar & Entropy Source Instructions +/// +/// Version: v1.0.1 +/// +/// Section: 3.37 +/// +/// # Safety +/// +/// This function is safe to use if the `zknh` target feature is present. +#[target_feature(enable = "zknh")] +#[cfg_attr(test, assert_instr(sha512sig0))] +#[inline] +pub unsafe fn sha512sig0(rs1: u64) -> u64 { + let value: u64; + unsafe { + asm!( + "sha512sig0 {rd},{rs1}", + rd = lateout(reg) value, + rs1 = in(reg) rs1, + options(pure, nomem, nostack), + ) + } + value +} + +/// Implements the Sigma1 transformation function as used in the SHA2-512 hash function \[49\] +/// (Section 4.1.3). +/// +/// This instruction is supported for the RV64 base architecture. It implements the Sigma1 +/// transform of the SHA2-512 hash function. \[49\]. This instruction must always be +/// implemented such that its execution latency does not depend on the data being operated on. +/// +/// Source: RISC-V Cryptography Extensions Volume I: Scalar & Entropy Source Instructions +/// +/// Version: v1.0.1 +/// +/// Section: 3.38 +/// +/// # Safety +/// +/// This function is safe to use if the `zknh` target feature is present. +#[target_feature(enable = "zknh")] +#[cfg_attr(test, assert_instr(sha512sig1))] +#[inline] +pub unsafe fn sha512sig1(rs1: u64) -> u64 { + let value: u64; + unsafe { + asm!( + "sha512sig1 {rd},{rs1}", + rd = lateout(reg) value, + rs1 = in(reg) rs1, + options(pure, nomem, nostack), + ) + } + value +} + +/// Implements the Sum0 transformation function as used in the SHA2-512 hash function \[49\] +/// (Section 4.1.3). +/// +/// This instruction is supported for the RV64 base architecture. It implements the Sum0 +/// transform of the SHA2-512 hash function. \[49\]. This instruction must always be +/// implemented such that its execution latency does not depend on the data being operated on. +/// +/// Source: RISC-V Cryptography Extensions Volume I: Scalar & Entropy Source Instructions +/// +/// Version: v1.0.1 +/// +/// Section: 3.39 +/// +/// # Safety +/// +/// This function is safe to use if the `zknh` target feature is present. +#[target_feature(enable = "zknh")] +#[cfg_attr(test, assert_instr(sha512sum0))] +#[inline] +pub unsafe fn sha512sum0(rs1: u64) -> u64 { + let value: u64; + unsafe { + asm!( + "sha512sum0 {rd},{rs1}", + rd = lateout(reg) value, + rs1 = in(reg) rs1, + options(pure, nomem, nostack), + ) + } + value +} + +/// Implements the Sum1 transformation function as used in the SHA2-512 hash function \[49\] +/// (Section 4.1.3). +/// +/// This instruction is supported for the RV64 base architecture. It implements the Sum1 +/// transform of the SHA2-512 hash function. \[49\]. This instruction must always be +/// implemented such that its execution latency does not depend on the data being operated on. +/// +/// Source: RISC-V Cryptography Extensions Volume I: Scalar & Entropy Source Instructions +/// +/// Version: v1.0.1 +/// +/// Section: 3.40 +/// +/// # Safety +/// +/// This function is safe to use if the `zknh` target feature is present. +#[target_feature(enable = "zknh")] +#[cfg_attr(test, assert_instr(sha512sum1))] +#[inline] +pub unsafe fn sha512sum1(rs1: u64) -> u64 { + let value: u64; + unsafe { + asm!( + "sha512sum0 {rd},{rs1}", + rd = lateout(reg) value, + rs1 = in(reg) rs1, + options(pure, nomem, nostack), + ) + } + value +} diff --git a/library/stdarch/crates/core_arch/src/riscv_shared/mod.rs b/library/stdarch/crates/core_arch/src/riscv_shared/mod.rs index b796ee0e77e6..d14a44004463 100644 --- a/library/stdarch/crates/core_arch/src/riscv_shared/mod.rs +++ b/library/stdarch/crates/core_arch/src/riscv_shared/mod.rs @@ -1,8 +1,11 @@ //! Shared RISC-V intrinsics + +mod zk; mod p; #[unstable(feature = "stdsimd", issue = "27731")] pub use p::*; +pub use zk::*; use crate::arch::asm; @@ -628,179 +631,9 @@ pub fn frflags() -> u32 { /// and then writing a new value obtained from the five least-significant bits of /// input variable `value` into `fflags`. #[inline] +#[unstable(feature = "stdsimd", issue = "27731")] pub fn fsflags(value: u32) -> u32 { let original: u32; unsafe { asm!("fsflags {}, {}", out(reg) original, in(reg) value, options(nomem, nostack)) } original } - -/// `P0` transformation function as is used in the SM3 hash algorithm -/// -/// This function is included in `Zksh` extension. It's defined as: -/// -/// ```text -/// P0(X) = X ⊕ (X ≪ 9) ⊕ (X ≪ 17) -/// ``` -/// -/// where `⊕` represents 32-bit xor, and `≪ k` represents rotate left by `k` bits. -/// -/// In the SM3 algorithm, the `P0` transformation is used as `E ← P0(TT2)` when the -/// compression function `CF` uses the intermediate value `TT2` to calculate -/// the variable `E` in one iteration for subsequent processes. -/// -/// According to RISC-V Cryptography Extensions, Volume I, the execution latency of -/// this instruction must always be independent from the data it operates on. -#[inline] -#[target_feature(enable = "zksh")] -pub fn sm3p0(x: u32) -> u32 { - let ans: u32; - unsafe { asm!("sm3p0 {}, {}", lateout(reg) ans, in(reg) x, options(pure, nomem, nostack)) }; - ans -} - -/// `P1` transformation function as is used in the SM3 hash algorithm -/// -/// This function is included in `Zksh` extension. It's defined as: -/// -/// ```text -/// P1(X) = X ⊕ (X ≪ 15) ⊕ (X ≪ 23) -/// ``` -/// -/// where `⊕` represents 32-bit xor, and `≪ k` represents rotate left by `k` bits. -/// -/// In the SM3 algorithm, the `P1` transformation is used to expand message, -/// where expanded word `Wj` can be generated from the previous words. -/// The whole process can be described as the following pseudocode: -/// -/// ```text -/// FOR j=16 TO 67 -/// Wj ← P1(Wj−16 ⊕ Wj−9 ⊕ (Wj−3 ≪ 15)) ⊕ (Wj−13 ≪ 7) ⊕ Wj−6 -/// ENDFOR -/// ``` -/// -/// According to RISC-V Cryptography Extensions, Volume I, the execution latency of -/// this instruction must always be independent from the data it operates on. -#[inline] -#[target_feature(enable = "zksh")] -pub fn sm3p1(x: u32) -> u32 { - let ans: u32; - unsafe { asm!("sm3p1 {}, {}", lateout(reg) ans, in(reg) x, options(pure, nomem, nostack)) }; - ans -} - -/// Accelerates the round function `F` in the SM4 block cipher algorithm -/// -/// This instruction is included in extension `Zksed`. It's defined as: -/// -/// ```text -/// SM4ED(x, a, BS) = x ⊕ T(ai) -/// ... where -/// ai = a.bytes[BS] -/// T(ai) = L(τ(ai)) -/// bi = τ(ai) = SM4-S-Box(ai) -/// ci = L(bi) = bi ⊕ (bi ≪ 2) ⊕ (bi ≪ 10) ⊕ (bi ≪ 18) ⊕ (bi ≪ 24) -/// SM4ED = (ci ≪ (BS * 8)) ⊕ x -/// ``` -/// -/// where `⊕` represents 32-bit xor, and `≪ k` represents rotate left by `k` bits. -/// As is defined above, `T` is a combined transformation of non linear S-Box transform `τ` -/// and linear layer transform `L`. -/// -/// In the SM4 algorithm, the round function `F` is defined as: -/// -/// ```text -/// F(x0, x1, x2, x3, rk) = x0 ⊕ T(x1 ⊕ x2 ⊕ x3 ⊕ rk) -/// ... where -/// T(A) = L(τ(A)) -/// B = τ(A) = (SM4-S-Box(a0), SM4-S-Box(a1), SM4-S-Box(a2), SM4-S-Box(a3)) -/// C = L(B) = B ⊕ (B ≪ 2) ⊕ (B ≪ 10) ⊕ (B ≪ 18) ⊕ (B ≪ 24) -/// ``` -/// -/// It can be implemented by `sm4ed` instruction like: -/// -/// ```no_run -/// # #[cfg(any(target_arch = "riscv32", target_arch = "riscv64"))] -/// # fn round_function(x0: u32, x1: u32, x2: u32, x3: u32, rk: u32) -> u32 { -/// # #[cfg(target_arch = "riscv32")] use core::arch::riscv32::sm4ed; -/// # #[cfg(target_arch = "riscv64")] use core::arch::riscv64::sm4ed; -/// let a = x1 ^ x2 ^ x3 ^ rk; -/// let c0 = sm4ed::<0>(x0, a); -/// let c1 = sm4ed::<1>(c0, a); // c1 represents c[0..=1], etc. -/// let c2 = sm4ed::<2>(c1, a); -/// let c3 = sm4ed::<3>(c2, a); -/// return c3; // c3 represents c[0..=3] -/// # } -/// ``` -/// -/// According to RISC-V Cryptography Extensions, Volume I, the execution latency of -/// this instruction must always be independent from the data it operates on. -#[inline] -#[target_feature(enable = "zksed")] -pub fn sm4ed(x: u32, a: u32) -> u32 { - static_assert!(BS <= 3); - let ans: u32; - unsafe { - asm!("sm4ed {}, {}, {}, {}", lateout(reg) ans, in(reg) x, in(reg) a, const BS, options(pure, nomem, nostack)) - }; - ans -} - -/// Accelerates the key schedule operation in the SM4 block cipher algorithm -/// -/// This instruction is included in extension `Zksed`. It's defined as: -/// -/// ```text -/// SM4KS(x, k, BS) = x ⊕ T'(ki) -/// ... where -/// ki = k.bytes[BS] -/// T'(ki) = L'(τ(ki)) -/// bi = τ(ki) = SM4-S-Box(ki) -/// ci = L'(bi) = bi ⊕ (bi ≪ 13) ⊕ (bi ≪ 23) -/// SM4KS = (ci ≪ (BS * 8)) ⊕ x -/// ``` -/// -/// where `⊕` represents 32-bit xor, and `≪ k` represents rotate left by `k` bits. -/// As is defined above, `T'` is a combined transformation of non linear S-Box transform `τ` -/// and the replaced linear layer transform `L'`. -/// -/// In the SM4 algorithm, the key schedule is defined as: -/// -/// ```text -/// rk[i] = K[i+4] = K[i] ⊕ T'(K[i+1] ⊕ K[i+2] ⊕ K[i+3] ⊕ CK[i]) -/// ... where -/// K[0..=3] = MK[0..=3] ⊕ FK[0..=3] -/// T'(K) = L'(τ(K)) -/// B = τ(K) = (SM4-S-Box(k0), SM4-S-Box(k1), SM4-S-Box(k2), SM4-S-Box(k3)) -/// C = L'(B) = B ⊕ (B ≪ 13) ⊕ (B ≪ 23) -/// ``` -/// -/// where `MK` represents the input 128-bit encryption key, -/// constants `FK` and `CK` are fixed system configuration constant values defined by the SM4 algorithm. -/// Hence, the key schedule operation can be implemented by `sm4ks` instruction like: -/// -/// ```no_run -/// # #[cfg(any(target_arch = "riscv32", target_arch = "riscv64"))] -/// # fn key_schedule(k0: u32, k1: u32, k2: u32, k3: u32, ck_i: u32) -> u32 { -/// # #[cfg(target_arch = "riscv32")] use core::arch::riscv32::sm4ks; -/// # #[cfg(target_arch = "riscv64")] use core::arch::riscv64::sm4ks; -/// let k = k1 ^ k2 ^ k3 ^ ck_i; -/// let c0 = sm4ks::<0>(k0, k); -/// let c1 = sm4ks::<1>(c0, k); // c1 represents c[0..=1], etc. -/// let c2 = sm4ks::<2>(c1, k); -/// let c3 = sm4ks::<3>(c2, k); -/// return c3; // c3 represents c[0..=3] -/// # } -/// ``` -/// -/// According to RISC-V Cryptography Extensions, Volume I, the execution latency of -/// this instruction must always be independent from the data it operates on. -#[inline] -#[target_feature(enable = "zksed")] -pub fn sm4ks(x: u32, k: u32) -> u32 { - static_assert!(BS <= 3); - let ans: u32; - unsafe { - asm!("sm4ks {}, {}, {}, {}", lateout(reg) ans, in(reg) x, in(reg) k, const BS, options(pure, nomem, nostack)) - }; - ans -} diff --git a/library/stdarch/crates/core_arch/src/riscv_shared/zk.rs b/library/stdarch/crates/core_arch/src/riscv_shared/zk.rs new file mode 100644 index 000000000000..0877e052a733 --- /dev/null +++ b/library/stdarch/crates/core_arch/src/riscv_shared/zk.rs @@ -0,0 +1,594 @@ +#[allow(unused)] +use core::arch::asm; + +#[allow(unused)] +macro_rules! constify_imm2 { + ($imm2:expr, $expand:ident) => { + #[allow(overflowing_literals)] + match $imm2 & 0b11 { + 0b00 => $expand!(0), + 0b01 => $expand!(1), + 0b10 => $expand!(2), + _ => $expand!(3), + } + }; +} + +/// Pack the low halves of rs1 and rs2 into rd. +/// +/// The pack instruction packs the XLEN/2-bit lower halves of rs1 and rs2 into rd, with rs1 in +/// the lower half and rs2 in the upper half. +/// +/// Source: RISC-V Cryptography Extensions Volume I: Scalar & Entropy Source Instructions +/// +/// Version: v1.0.1 +/// +/// Section: 3.17 +/// +/// # Safety +/// +/// This function is safe to use if the `zbkb` target feature is present. +#[target_feature(enable = "zbkb")] +#[cfg_attr(test, assert_instr(pack))] +#[inline] +pub unsafe fn pack(rs1: usize, rs2: usize) -> usize { + let value: usize; + unsafe { + asm!( + "pack {rd},{rs1},{rs2}", + rd = lateout(reg) value, + rs1 = in(reg) rs1, + rs2 = in(reg) rs2, + options(pure, nomem, nostack), + ) + } + value +} + +/// Pack the low bytes of rs1 and rs2 into rd. +/// +/// And the packh instruction packs the least-significant bytes of rs1 and rs2 into the 16 +/// least-significant bits of rd, zero extending the rest of rd. +/// +/// Source: RISC-V Cryptography Extensions Volume I: Scalar & Entropy Source Instructions +/// +/// Version: v1.0.1 +/// +/// Section: 3.18 +/// +/// # Safety +/// +/// This function is safe to use if the `zbkb` target feature is present. +#[target_feature(enable = "zbkb")] +#[cfg_attr(test, assert_instr(packh))] +#[inline] +pub unsafe fn packh(rs1: usize, rs2: usize) -> usize { + let value: usize; + unsafe { + asm!( + "packh {rd},{rs1},{rs2}", + rd = lateout(reg) value, + rs1 = in(reg) rs1, + rs2 = in(reg) rs2, + options(pure, nomem, nostack), + ) + } + value +} + +/// Reverse the bits in each byte of a source register. +/// +/// This instruction reverses the order of the bits in every byte of a register. +/// +/// Source: RISC-V Cryptography Extensions Volume I: Scalar & Entropy Source Instructions +/// +/// Version: v1.0.1 +/// +/// Section: 3.13 +/// +/// # Safety +/// +/// This function is safe to use if the `zbkb` target feature is present. +#[target_feature(enable = "zbkb")] +#[cfg_attr(test, assert_instr(brev8))] +#[inline] +pub unsafe fn brev8(rs: usize) -> usize { + let value: usize; + unsafe { + asm!( + "brev8 {rd},{rs}", + rd = lateout(reg) value, + rs = in(reg) rs, + options(pure, nomem, nostack), + ) + } + value +} + +/// Byte-wise lookup of indicies into a vector in registers. +/// +/// The xperm8 instruction operates on bytes. The rs1 register contains a vector of XLEN/8 +/// 8-bit elements. The rs2 register contains a vector of XLEN/8 8-bit indexes. The result is +/// each element in rs2 replaced by the indexed element in rs1, or zero if the index into rs2 +/// is out of bounds. +/// +/// Source: RISC-V Cryptography Extensions Volume I: Scalar & Entropy Source Instructions +/// +/// Version: v1.0.1 +/// +/// Section: 3.47 +/// +/// # Safety +/// +/// This function is safe to use if the `zbkx` target feature is present. +#[target_feature(enable = "zbkx")] +#[cfg_attr(test, assert_instr(xperm8))] +#[inline] +pub unsafe fn xperm8(rs1: usize, rs2: usize) -> usize { + let value: usize; + unsafe { + asm!( + "xperm8 {rd},{rs1},{rs2}", + rd = lateout(reg) value, + rs1 = in(reg) rs1, + rs2 = in(reg) rs2, + options(pure, nomem, nostack), + ) + } + value +} + +/// Nibble-wise lookup of indicies into a vector. +/// +/// The xperm4 instruction operates on nibbles. The rs1 register contains a vector of XLEN/4 +/// 4-bit elements. The rs2 register contains a vector of XLEN/4 4-bit indexes. The result is +/// each element in rs2 replaced by the indexed element in rs1, or zero if the index into rs2 +/// is out of bounds. +/// +/// Source: RISC-V Cryptography Extensions Volume I: Scalar & Entropy Source Instructions +/// +/// Version: v1.0.1 +/// +/// Section: 3.48 +/// +/// # Safety +/// +/// This function is safe to use if the `zbkx` target feature is present. +#[target_feature(enable = "zbkx")] +#[cfg_attr(test, assert_instr(xperm4))] +#[inline] +pub unsafe fn xperm4(rs1: usize, rs2: usize) -> usize { + let value: usize; + unsafe { + asm!( + "xperm4 {rd},{rs1},{rs2}", + rd = lateout(reg) value, + rs1 = in(reg) rs1, + rs2 = in(reg) rs2, + options(pure, nomem, nostack), + ) + } + value +} + +/// Implements the Sigma0 transformation function as used in the SHA2-256 hash function \[49\] +/// (Section 4.1.2). +/// +/// This instruction is supported for both RV32 and RV64 base architectures. For RV32, the +/// entire XLEN source register is operated on. For RV64, the low 32 bits of the source +/// register are operated on, and the result sign extended to XLEN bits. Though named for +/// SHA2-256, the instruction works for both the SHA2-224 and SHA2-256 parameterisations as +/// described in \[49\]. This instruction must always be implemented such that its execution +/// latency does not depend on the data being operated on. +/// +/// Source: RISC-V Cryptography Extensions Volume I: Scalar & Entropy Source Instructions +/// +/// Version: v1.0.1 +/// +/// Section: 3.27 +/// +/// # Safety +/// +/// This function is safe to use if the `zknh` target feature is present. +#[target_feature(enable = "zknh")] +#[cfg_attr(test, assert_instr(sha256sig0))] +#[inline] +pub unsafe fn sha256sig0(rs1: usize) -> usize { + let value: usize; + unsafe { + asm!( + "sha256sig0 {rd},{rs1}", + rd = lateout(reg) value, + rs1 = in(reg) rs1, + options(pure, nomem, nostack), + ) + } + value +} + +/// Implements the Sigma1 transformation function as used in the SHA2-256 hash function \[49\] +/// (Section 4.1.2). +/// +/// This instruction is supported for both RV32 and RV64 base architectures. For RV32, the +/// entire XLEN source register is operated on. For RV64, the low 32 bits of the source +/// register are operated on, and the result sign extended to XLEN bits. Though named for +/// SHA2-256, the instruction works for both the SHA2-224 and SHA2-256 parameterisations as +/// described in \[49\]. This instruction must always be implemented such that its execution +/// latency does not depend on the data being operated on. +/// +/// Source: RISC-V Cryptography Extensions Volume I: Scalar & Entropy Source Instructions +/// +/// Version: v1.0.1 +/// +/// Section: 3.28 +/// +/// # Safety +/// +/// This function is safe to use if the `zknh` target feature is present. +#[target_feature(enable = "zknh")] +#[cfg_attr(test, assert_instr(sha256sig1))] +#[inline] +pub unsafe fn sha256sig1(rs1: usize) -> usize { + let value: usize; + unsafe { + asm!( + "sha256sig1 {rd},{rs1}", + rd = lateout(reg) value, + rs1 = in(reg) rs1, + options(pure, nomem, nostack), + ) + } + value +} + +/// Implements the Sum0 transformation function as used in the SHA2-256 hash function \[49\] +/// (Section 4.1.2). +/// +/// This instruction is supported for both RV32 and RV64 base architectures. For RV32, the +/// entire XLEN source register is operated on. For RV64, the low 32 bits of the source +/// register are operated on, and the result sign extended to XLEN bits. Though named for +/// SHA2-256, the instruction works for both the SHA2-224 and SHA2-256 parameterisations as +/// described in \[49\]. This instruction must always be implemented such that its execution +/// latency does not depend on the data being operated on. +/// +/// Source: RISC-V Cryptography Extensions Volume I: Scalar & Entropy Source Instructions +/// +/// Version: v1.0.1 +/// +/// Section: 3.29 +/// +/// # Safety +/// +/// This function is safe to use if the `zknh` target feature is present. +#[target_feature(enable = "zknh")] +#[cfg_attr(test, assert_instr(sha256sum0))] +#[inline] +pub unsafe fn sha256sum0(rs1: usize) -> usize { + let value: usize; + unsafe { + asm!( + "sha256sig1 {rd},{rs1}", + rd = lateout(reg) value, + rs1 = in(reg) rs1, + options(pure, nomem, nostack), + ) + } + value +} + +/// Implements the Sum1 transformation function as used in the SHA2-256 hash function \[49\] +/// (Section 4.1.2). +/// +/// This instruction is supported for both RV32 and RV64 base architectures. For RV32, the +/// entire XLEN source register is operated on. For RV64, the low 32 bits of the source +/// register are operated on, and the result sign extended to XLEN bits. Though named for +/// SHA2-256, the instruction works for both the SHA2-224 and SHA2-256 parameterisations as +/// described in \[49\]. This instruction must always be implemented such that its execution +/// latency does not depend on the data being operated on. +/// +/// Source: RISC-V Cryptography Extensions Volume I: Scalar & Entropy Source Instructions +/// +/// Version: v1.0.1 +/// +/// Section: 3.30 +/// +/// # Safety +/// +/// This function is safe to use if the `zknh` target feature is present. +#[target_feature(enable = "zknh")] +#[cfg_attr(test, assert_instr(sha256sum1))] +#[inline] +pub unsafe fn sha256sum1(rs1: usize) -> usize { + let value: usize; + unsafe { + asm!( + "sha256sig1 {rd},{rs1}", + rd = lateout(reg) value, + rs1 = in(reg) rs1, + options(pure, nomem, nostack), + ) + } + value +} + +/// Accelerates the block encrypt/decrypt operation of the SM4 block cipher \[5, 31\]. +/// +/// Implements a T-tables in hardware style approach to accelerating the SM4 round function. A +/// byte is extracted from rs2 based on bs, to which the SBox and linear layer transforms are +/// applied, before the result is XOR’d with rs1 and written back to rd. This instruction +/// exists on RV32 and RV64 base architectures. On RV64, the 32-bit result is sign extended to +/// XLEN bits. This instruction must always be implemented such that its execution latency does +/// not depend on the data being operated on. +/// +/// Source: RISC-V Cryptography Extensions Volume I: Scalar & Entropy Source Instructions +/// +/// Version: v1.0.1 +/// +/// Section: 3.43 +/// +/// # Note +/// +/// The `bs` parameter is expected to be a constant value and only the bottom 2 bits of `bs` are +/// used. +/// +/// # Safety +/// +/// This function is safe to use if the `zksed` target feature is present. +/// +/// # Details +/// +/// Accelerates the round function `F` in the SM4 block cipher algorithm +/// +/// This instruction is included in extension `Zksed`. It's defined as: +/// +/// ```text +/// SM4ED(x, a, BS) = x ⊕ T(ai) +/// ... where +/// ai = a.bytes[BS] +/// T(ai) = L(τ(ai)) +/// bi = τ(ai) = SM4-S-Box(ai) +/// ci = L(bi) = bi ⊕ (bi ≪ 2) ⊕ (bi ≪ 10) ⊕ (bi ≪ 18) ⊕ (bi ≪ 24) +/// SM4ED = (ci ≪ (BS * 8)) ⊕ x +/// ``` +/// +/// where `⊕` represents 32-bit xor, and `≪ k` represents rotate left by `k` bits. +/// As is defined above, `T` is a combined transformation of non linear S-Box transform `τ` +/// and linear layer transform `L`. +/// +/// In the SM4 algorithm, the round function `F` is defined as: +/// +/// ```text +/// F(x0, x1, x2, x3, rk) = x0 ⊕ T(x1 ⊕ x2 ⊕ x3 ⊕ rk) +/// ... where +/// T(A) = L(τ(A)) +/// B = τ(A) = (SM4-S-Box(a0), SM4-S-Box(a1), SM4-S-Box(a2), SM4-S-Box(a3)) +/// C = L(B) = B ⊕ (B ≪ 2) ⊕ (B ≪ 10) ⊕ (B ≪ 18) ⊕ (B ≪ 24) +/// ``` +/// +/// It can be implemented by `sm4ed` instruction like: +/// +/// ```no_run +/// # #[cfg(any(target_arch = "riscv32", target_arch = "riscv64"))] +/// # fn round_function(x0: u32, x1: u32, x2: u32, x3: u32, rk: u32) -> u32 { +/// # #[cfg(target_arch = "riscv32")] use core::arch::riscv32::sm4ed; +/// # #[cfg(target_arch = "riscv64")] use core::arch::riscv64::sm4ed; +/// let a = x1 ^ x2 ^ x3 ^ rk; +/// let c0 = sm4ed(x0, a, 0); +/// let c1 = sm4ed(c0, a, 1); // c1 represents c[0..=1], etc. +/// let c2 = sm4ed(c1, a, 2); +/// let c3 = sm4ed(c2, a, 3); +/// return c3; // c3 represents c[0..=3] +/// # } +/// ``` +#[target_feature(enable = "zksed")] +#[cfg_attr(test, assert_instr(sm4ed))] +#[inline] +pub unsafe fn sm4ed(rs1: usize, rs2: usize, bs: u8) -> usize { + macro_rules! sm4ed { + ($imm2:expr) => {{ + let value: usize; + unsafe { + asm!( + concat!("sm4ed {rd},{rs1},{rs2},", $imm2), + rd = lateout(reg) value, + rs1 = in(reg) rs1, + rs2 = in(reg) rs2, + options(pure, nomem, nostack), + ) + } + value + }} + } + constify_imm2!(bs, sm4ed) +} + +/// Accelerates the Key Schedule operation of the SM4 block cipher \[5, 31\] with `bs=0`. +/// +/// Implements a T-tables in hardware style approach to accelerating the SM4 Key Schedule. A +/// byte is extracted from rs2 based on bs, to which the SBox and linear layer transforms are +/// applied, before the result is XOR’d with rs1 and written back to rd. This instruction +/// exists on RV32 and RV64 base architectures. On RV64, the 32-bit result is sign extended to +/// XLEN bits. This instruction must always be implemented such that its execution latency does +/// not depend on the data being operated on. +/// +/// Source: RISC-V Cryptography Extensions Volume I: Scalar & Entropy Source Instructions +/// +/// Version: v1.0.1 +/// +/// Section: 3.44 +/// +/// # Note +/// +/// The `bs` parameter is expected to be a constant value and only the bottom 2 bits of `bs` are +/// used. +/// +/// # Safety +/// +/// This function is safe to use if the `zksed` target feature is present. +/// +/// # Details +/// +/// Accelerates the round function `F` in the SM4 block cipher algorithm +/// +/// This instruction is included in extension `Zksed`. It's defined as: +/// +/// ```text +/// SM4ED(x, a, BS) = x ⊕ T(ai) +/// ... where +/// ai = a.bytes[BS] +/// T(ai) = L(τ(ai)) +/// bi = τ(ai) = SM4-S-Box(ai) +/// ci = L(bi) = bi ⊕ (bi ≪ 2) ⊕ (bi ≪ 10) ⊕ (bi ≪ 18) ⊕ (bi ≪ 24) +/// SM4ED = (ci ≪ (BS * 8)) ⊕ x +/// ``` +/// +/// where `⊕` represents 32-bit xor, and `≪ k` represents rotate left by `k` bits. +/// As is defined above, `T` is a combined transformation of non linear S-Box transform `τ` +/// and linear layer transform `L`. +/// +/// In the SM4 algorithm, the round function `F` is defined as: +/// +/// ```text +/// F(x0, x1, x2, x3, rk) = x0 ⊕ T(x1 ⊕ x2 ⊕ x3 ⊕ rk) +/// ... where +/// T(A) = L(τ(A)) +/// B = τ(A) = (SM4-S-Box(a0), SM4-S-Box(a1), SM4-S-Box(a2), SM4-S-Box(a3)) +/// C = L(B) = B ⊕ (B ≪ 2) ⊕ (B ≪ 10) ⊕ (B ≪ 18) ⊕ (B ≪ 24) +/// ``` +/// +/// It can be implemented by `sm4ed` instruction like: +/// +/// ```no_run +/// # #[cfg(any(target_arch = "riscv32", target_arch = "riscv64"))] +/// # fn round_function(x0: u32, x1: u32, x2: u32, x3: u32, rk: u32) -> u32 { +/// # #[cfg(target_arch = "riscv32")] use core::arch::riscv32::sm4ed; +/// # #[cfg(target_arch = "riscv64")] use core::arch::riscv64::sm4ed; +/// let a = x1 ^ x2 ^ x3 ^ rk; +/// let c0 = sm4ed(x0, a, 0); +/// let c1 = sm4ed(c0, a, 1); // c1 represents c[0..=1], etc. +/// let c2 = sm4ed(c1, a, 2); +/// let c3 = sm4ed(c2, a, 3); +/// return c3; // c3 represents c[0..=3] +/// # } +/// ``` +#[target_feature(enable = "zksed")] +#[cfg_attr(test, assert_instr(sm4ks))] +#[inline] +pub unsafe fn sm4ks(rs1: usize, rs2: usize, bs: u8) -> usize { + macro_rules! sm4ks { + ($imm2:expr) => {{ + let value: usize; + unsafe { + asm!( + concat!("sm4ks {rd},{rs1},{rs2},", $imm2), + rd = lateout(reg) value, + rs1 = in(reg) rs1, + rs2 = in(reg) rs2, + options(pure, nomem, nostack), + ) + } + value + }} + } + constify_imm2!(bs, sm4ks) +} + +/// Implements the P0 transformation function as used in the SM3 hash function [4, 30]. +/// +/// This instruction is supported for the RV32 and RV64 base architectures. It implements the +/// P0 transform of the SM3 hash function [4, 30]. This instruction must always be implemented +/// such that its execution latency does not depend on the data being operated on. +/// +/// Source: RISC-V Cryptography Extensions Volume I: Scalar & Entropy Source Instructions +/// +/// Version: v1.0.1 +/// +/// Section: 3.41 +/// +/// # Safety +/// +/// This function is safe to use if the `zksh` target feature is present. +/// +/// # Details +/// +/// `P0` transformation function as is used in the SM3 hash algorithm +/// +/// This function is included in `Zksh` extension. It's defined as: +/// +/// ```text +/// P0(X) = X ⊕ (X ≪ 9) ⊕ (X ≪ 17) +/// ``` +/// +/// where `⊕` represents 32-bit xor, and `≪ k` represents rotate left by `k` bits. +/// +/// In the SM3 algorithm, the `P0` transformation is used as `E ← P0(TT2)` when the +/// compression function `CF` uses the intermediate value `TT2` to calculate +/// the variable `E` in one iteration for subsequent processes. +#[target_feature(enable = "zksh")] +#[cfg_attr(test, assert_instr(sm3p0))] +#[inline] +pub unsafe fn sm3p0(rs1: usize) -> usize { + let value: usize; + unsafe { + asm!( + "sm3p0 {rd},{rs1}", + rd = lateout(reg) value, + rs1 = in(reg) rs1, + options(pure, nomem, nostack), + ) + } + value +} + +/// Implements the P1 transformation function as used in the SM3 hash function [4, 30]. +/// +/// This instruction is supported for the RV32 and RV64 base architectures. It implements the +/// P1 transform of the SM3 hash function [4, 30]. This instruction must always be implemented +/// such that its execution latency does not depend on the data being operated on. +/// +/// Source: RISC-V Cryptography Extensions Volume I: Scalar & Entropy Source Instructions +/// +/// Version: v1.0.1 +/// +/// Section: 3.42 +/// +/// # Safety +/// +/// This function is safe to use if the `zksh` target feature is present. +/// +/// # Details +/// +/// `P1` transformation function as is used in the SM3 hash algorithm +/// +/// This function is included in `Zksh` extension. It's defined as: +/// +/// ```text +/// P1(X) = X ⊕ (X ≪ 15) ⊕ (X ≪ 23) +/// ``` +/// +/// where `⊕` represents 32-bit xor, and `≪ k` represents rotate left by `k` bits. +/// +/// In the SM3 algorithm, the `P1` transformation is used to expand message, +/// where expanded word `Wj` can be generated from the previous words. +/// The whole process can be described as the following pseudocode: +/// +/// ```text +/// FOR j=16 TO 67 +/// Wj ← P1(Wj−16 ⊕ Wj−9 ⊕ (Wj−3 ≪ 15)) ⊕ (Wj−13 ≪ 7) ⊕ Wj−6 +/// ENDFOR +/// ``` +#[target_feature(enable = "zksh")] +#[cfg_attr(test, assert_instr(sm3p1))] +#[inline] +pub unsafe fn sm3p1(rs1: usize) -> usize { + let value: usize; + unsafe { + asm!( + "sm3p1 {rd},{rs1}", + rd = lateout(reg) value, + rs1 = in(reg) rs1, + options(pure, nomem, nostack), + ) + } + value +} \ No newline at end of file