Implement LLVM x86 adx intrinsics

This commit is contained in:
Tobias Decking 2024-06-20 20:11:16 +02:00
parent 66ad792c4e
commit 9afd752591
2 changed files with 123 additions and 38 deletions

View file

@ -35,17 +35,48 @@ pub(super) trait EvalContextExt<'tcx>: crate::MiriInterpCxExt<'tcx> {
// Prefix should have already been checked.
let unprefixed_name = link_name.as_str().strip_prefix("llvm.x86.").unwrap();
match unprefixed_name {
// Used to implement the `_addcarry_u32` and `_addcarry_u64` functions.
// Computes a + b with input and output carry. The input carry is an 8-bit
// value, which is interpreted as 1 if it is non-zero. The output carry is
// an 8-bit value that will be 0 or 1.
// Used to implement the `_addcarry_u{32, 64}` and the `_subborrow_u{32, 64}` functions.
// Computes a + b or a - b with input and output carry/borrow. The input carry/borrow is an 8-bit
// value, which is interpreted as 1 if it is non-zero. The output carry/borrow is an 8-bit value that will be 0 or 1.
// https://www.intel.com/content/www/us/en/docs/cpp-compiler/developer-guide-reference/2021-8/addcarry-u32-addcarry-u64.html
"addcarry.32" | "addcarry.64" => {
if unprefixed_name == "addcarry.64" && this.tcx.sess.target.arch != "x86_64" {
// https://www.intel.com/content/www/us/en/docs/cpp-compiler/developer-guide-reference/2021-8/subborrow-u32-subborrow-u64.html
"addcarry.32" | "addcarry.64" | "subborrow.32" | "subborrow.64" => {
if unprefixed_name.ends_with("64") && this.tcx.sess.target.arch != "x86_64" {
return Ok(EmulateItemResult::NotSupported);
}
let [c_in, a, b] = this.check_shim(abi, Abi::Unadjusted, link_name, args)?;
let op = if unprefixed_name.starts_with("add") {
mir::BinOp::AddWithOverflow
} else {
mir::BinOp::SubWithOverflow
};
let [cb_in, a, b] = this.check_shim(abi, Abi::Unadjusted, link_name, args)?;
let cb_in = this.read_scalar(cb_in)?.to_u8()? != 0;
let a = this.read_immediate(a)?;
let b = this.read_immediate(b)?;
let (sum, overflow1) = this.binary_op(op, &a, &b)?.to_pair(this);
let (sum, overflow2) =
this.binary_op(op, &sum, &ImmTy::from_uint(cb_in, a.layout))?.to_pair(this);
let cb_out = overflow1.to_scalar().to_bool()? | overflow2.to_scalar().to_bool()?;
let d1 = this.project_field(dest, 0)?;
let d2 = this.project_field(dest, 1)?;
write_twice(this, &d1, Scalar::from_u8(cb_out.into()), &d2, sum)?;
}
// Used to implement the `_addcarryx_u{32, 64}` functions. They are semantically identical with the `_addcarry_u{32, 64}` functions,
// except for a slightly different type signature and the requirement for the "adx" target feature.
// https://www.intel.com/content/www/us/en/docs/cpp-compiler/developer-guide-reference/2021-8/addcarryx-u32-addcarryx-u64.html
"addcarryx.u32" | "addcarryx.u64" => {
this.expect_target_feature_for_intrinsic(link_name, "adx")?;
if unprefixed_name.ends_with("64") && this.tcx.sess.target.arch != "x86_64" {
return Ok(EmulateItemResult::NotSupported);
}
let [c_in, a, b, out] = this.check_shim(abi, Abi::Unadjusted, link_name, args)?;
let c_in = this.read_scalar(c_in)?.to_u8()? != 0;
let a = this.read_immediate(a)?;
let b = this.read_immediate(b)?;
@ -61,37 +92,8 @@ pub(super) trait EvalContextExt<'tcx>: crate::MiriInterpCxExt<'tcx> {
.to_pair(this);
let c_out = overflow1.to_scalar().to_bool()? | overflow2.to_scalar().to_bool()?;
this.write_scalar(Scalar::from_u8(c_out.into()), &this.project_field(dest, 0)?)?;
this.write_immediate(*sum, &this.project_field(dest, 1)?)?;
}
// Used to implement the `_subborrow_u32` and `_subborrow_u64` functions.
// Computes a - b with input and output borrow. The input borrow is an 8-bit
// value, which is interpreted as 1 if it is non-zero. The output borrow is
// an 8-bit value that will be 0 or 1.
// https://www.intel.com/content/www/us/en/docs/cpp-compiler/developer-guide-reference/2021-8/subborrow-u32-subborrow-u64.html
"subborrow.32" | "subborrow.64" => {
if unprefixed_name == "subborrow.64" && this.tcx.sess.target.arch != "x86_64" {
return Ok(EmulateItemResult::NotSupported);
}
let [b_in, a, b] = this.check_shim(abi, Abi::Unadjusted, link_name, args)?;
let b_in = this.read_scalar(b_in)?.to_u8()? != 0;
let a = this.read_immediate(a)?;
let b = this.read_immediate(b)?;
let (sub, overflow1) =
this.binary_op(mir::BinOp::SubWithOverflow, &a, &b)?.to_pair(this);
let (sub, overflow2) = this
.binary_op(
mir::BinOp::SubWithOverflow,
&sub,
&ImmTy::from_uint(b_in, a.layout),
)?
.to_pair(this);
let b_out = overflow1.to_scalar().to_bool()? | overflow2.to_scalar().to_bool()?;
this.write_scalar(Scalar::from_u8(b_out.into()), &this.project_field(dest, 0)?)?;
this.write_immediate(*sub, &this.project_field(dest, 1)?)?;
let out = this.deref_pointer_as(out, sum.layout)?;
write_twice(this, dest, Scalar::from_u8(c_out.into()), &out, sum)?;
}
// Used to implement the `_mm_pause` function.
@ -1366,3 +1368,16 @@ fn psign<'tcx>(
Ok(())
}
/// Write two values `v1` and `v2` to the places `d1` and `d2`.
fn write_twice<'tcx>(
this: &mut crate::MiriInterpCx<'tcx>,
d1: &MPlaceTy<'tcx>,
v1: Scalar,
d2: &MPlaceTy<'tcx>,
v2: ImmTy<'tcx>,
) -> InterpResult<'tcx, ()> {
this.write_scalar(v1, d1)?;
this.write_immediate(*v2, d2)?;
Ok(())
}

View file

@ -0,0 +1,70 @@
// Ignore everything except x86 and x86_64
// Any new targets that are added to CI should be ignored here.
// (We cannot use `cfg`-based tricks here since the `target-feature` flags below only work on x86.)
//@ignore-target-aarch64
//@ignore-target-arm
//@ignore-target-avr
//@ignore-target-s390x
//@ignore-target-thumbv7em
//@ignore-target-wasm32
//@compile-flags: -C target-feature=+adx
#[cfg(any(target_arch = "x86", target_arch = "x86_64"))]
mod x86 {
#[cfg(target_arch = "x86")]
use core::arch::x86 as arch;
#[cfg(target_arch = "x86_64")]
use core::arch::x86_64 as arch;
fn adc(c_in: u8, a: u32, b: u32) -> (u8, u32) {
let mut sum = 0;
// SAFETY: There are no safety requirements for calling `_addcarry_u32`.
// It's just unsafe for API consistency with other intrinsics.
let c_out = unsafe { arch::_addcarryx_u32(c_in, a, b, &mut sum) };
(c_out, sum)
}
pub fn main() {
assert_eq!(adc(0, 1, 1), (0, 2));
assert_eq!(adc(1, 1, 1), (0, 3));
assert_eq!(adc(2, 1, 1), (0, 3)); // any non-zero carry acts as 1!
assert_eq!(adc(u8::MAX, 1, 1), (0, 3));
assert_eq!(adc(0, u32::MAX, u32::MAX), (1, u32::MAX - 1));
assert_eq!(adc(1, u32::MAX, u32::MAX), (1, u32::MAX));
assert_eq!(adc(2, u32::MAX, u32::MAX), (1, u32::MAX));
assert_eq!(adc(u8::MAX, u32::MAX, u32::MAX), (1, u32::MAX));
}
}
#[cfg(target_arch = "x86_64")]
mod x86_64 {
use core::arch::x86_64 as arch;
fn adc(c_in: u8, a: u64, b: u64) -> (u8, u64) {
let mut sum = 0;
// SAFETY: There are no safety requirements for calling `_addcarry_u64`.
// It's just unsafe for API consistency with other intrinsics.
let c_out = unsafe { arch::_addcarryx_u64(c_in, a, b, &mut sum) };
(c_out, sum)
}
pub fn main() {
assert_eq!(adc(0, 1, 1), (0, 2));
assert_eq!(adc(1, 1, 1), (0, 3));
assert_eq!(adc(2, 1, 1), (0, 3)); // any non-zero carry acts as 1!
assert_eq!(adc(u8::MAX, 1, 1), (0, 3));
assert_eq!(adc(0, u64::MAX, u64::MAX), (1, u64::MAX - 1));
assert_eq!(adc(1, u64::MAX, u64::MAX), (1, u64::MAX));
assert_eq!(adc(2, u64::MAX, u64::MAX), (1, u64::MAX));
assert_eq!(adc(u8::MAX, u64::MAX, u64::MAX), (1, u64::MAX));
}
}
fn main() {
assert!(is_x86_feature_detected!("adx"));
#[cfg(any(target_arch = "x86", target_arch = "x86_64"))]
x86::main();
#[cfg(target_arch = "x86_64")]
x86_64::main();
}