Merge pull request #4193 from bjorn3/arm64_vpmaxq_u8
Implement vpmaxq_u8 on aarch64
This commit is contained in:
commit
913612c59e
4 changed files with 124 additions and 13 deletions
78
src/tools/miri/src/shims/aarch64.rs
Normal file
78
src/tools/miri/src/shims/aarch64.rs
Normal file
|
|
@ -0,0 +1,78 @@
|
|||
use rustc_middle::mir::BinOp;
|
||||
use rustc_middle::ty::Ty;
|
||||
use rustc_span::Symbol;
|
||||
use rustc_target::callconv::{Conv, FnAbi};
|
||||
|
||||
use crate::*;
|
||||
|
||||
impl<'tcx> EvalContextExt<'tcx> for crate::MiriInterpCx<'tcx> {}
|
||||
pub(super) trait EvalContextExt<'tcx>: crate::MiriInterpCxExt<'tcx> {
|
||||
fn emulate_aarch64_intrinsic(
|
||||
&mut self,
|
||||
link_name: Symbol,
|
||||
abi: &FnAbi<'tcx, Ty<'tcx>>,
|
||||
args: &[OpTy<'tcx>],
|
||||
dest: &MPlaceTy<'tcx>,
|
||||
) -> InterpResult<'tcx, EmulateItemResult> {
|
||||
let this = self.eval_context_mut();
|
||||
// Prefix should have already been checked.
|
||||
let unprefixed_name = link_name.as_str().strip_prefix("llvm.aarch64.").unwrap();
|
||||
match unprefixed_name {
|
||||
"isb" => {
|
||||
let [arg] = this.check_shim(abi, Conv::C, link_name, args)?;
|
||||
let arg = this.read_scalar(arg)?.to_i32()?;
|
||||
match arg {
|
||||
// SY ("full system scope")
|
||||
15 => {
|
||||
this.yield_active_thread();
|
||||
}
|
||||
_ => {
|
||||
throw_unsup_format!("unsupported llvm.aarch64.isb argument {}", arg);
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
// Used to implement the vpmaxq_u8 function.
|
||||
// Computes the maximum of adjacent pairs; the first half of the output is produced from the
|
||||
// `left` input, the second half of the output from the `right` input.
|
||||
// https://developer.arm.com/architectures/instruction-sets/intrinsics/vpmaxq_u8
|
||||
"neon.umaxp.v16i8" => {
|
||||
let [left, right] = this.check_shim(abi, Conv::C, link_name, args)?;
|
||||
|
||||
let (left, left_len) = this.project_to_simd(left)?;
|
||||
let (right, right_len) = this.project_to_simd(right)?;
|
||||
let (dest, lane_count) = this.project_to_simd(dest)?;
|
||||
assert_eq!(left_len, right_len);
|
||||
assert_eq!(lane_count, left_len);
|
||||
|
||||
for lane_idx in 0..lane_count {
|
||||
let src = if lane_idx < (lane_count / 2) { &left } else { &right };
|
||||
let src_idx = lane_idx.strict_rem(lane_count / 2);
|
||||
|
||||
let lhs_lane =
|
||||
this.read_immediate(&this.project_index(src, src_idx.strict_mul(2))?)?;
|
||||
let rhs_lane = this.read_immediate(
|
||||
&this.project_index(src, src_idx.strict_mul(2).strict_add(1))?,
|
||||
)?;
|
||||
|
||||
// Compute `if lhs > rhs { lhs } else { rhs }`, i.e., `max`.
|
||||
let res_lane = if this
|
||||
.binary_op(BinOp::Gt, &lhs_lane, &rhs_lane)?
|
||||
.to_scalar()
|
||||
.to_bool()?
|
||||
{
|
||||
lhs_lane
|
||||
} else {
|
||||
rhs_lane
|
||||
};
|
||||
|
||||
let dest = this.project_index(&dest, lane_idx)?;
|
||||
this.write_immediate(*res_lane, &dest)?;
|
||||
}
|
||||
}
|
||||
|
||||
_ => return interp_ok(EmulateItemResult::NotSupported),
|
||||
}
|
||||
interp_ok(EmulateItemResult::NeedsReturn)
|
||||
}
|
||||
}
|
||||
|
|
@ -981,20 +981,12 @@ trait EvalContextExtPriv<'tcx>: crate::MiriInterpCxExt<'tcx> {
|
|||
this, link_name, abi, args, dest,
|
||||
);
|
||||
}
|
||||
// FIXME: Move these to an `arm` submodule.
|
||||
"llvm.aarch64.isb" if this.tcx.sess.target.arch == "aarch64" => {
|
||||
let [arg] = this.check_shim(abi, Conv::C, link_name, args)?;
|
||||
let arg = this.read_scalar(arg)?.to_i32()?;
|
||||
match arg {
|
||||
// SY ("full system scope")
|
||||
15 => {
|
||||
this.yield_active_thread();
|
||||
}
|
||||
_ => {
|
||||
throw_unsup_format!("unsupported llvm.aarch64.isb argument {}", arg);
|
||||
}
|
||||
}
|
||||
name if name.starts_with("llvm.aarch64.") && this.tcx.sess.target.arch == "aarch64" => {
|
||||
return shims::aarch64::EvalContextExt::emulate_aarch64_intrinsic(
|
||||
this, link_name, abi, args, dest,
|
||||
);
|
||||
}
|
||||
// FIXME: Move this to an `arm` submodule.
|
||||
"llvm.arm.hint" if this.tcx.sess.target.arch == "arm" => {
|
||||
let [arg] = this.check_shim(abi, Conv::C, link_name, args)?;
|
||||
let arg = this.read_scalar(arg)?.to_i32()?;
|
||||
|
|
|
|||
|
|
@ -1,5 +1,6 @@
|
|||
#![warn(clippy::arithmetic_side_effects)]
|
||||
|
||||
mod aarch64;
|
||||
mod alloc;
|
||||
mod backtrace;
|
||||
mod files;
|
||||
|
|
|
|||
|
|
@ -0,0 +1,40 @@
|
|||
// We're testing aarch64 target specific features
|
||||
//@only-target: aarch64
|
||||
//@compile-flags: -C target-feature=+neon
|
||||
|
||||
use std::arch::aarch64::*;
|
||||
use std::arch::is_aarch64_feature_detected;
|
||||
|
||||
fn main() {
|
||||
assert!(is_aarch64_feature_detected!("neon"));
|
||||
|
||||
unsafe {
|
||||
test_neon();
|
||||
}
|
||||
}
|
||||
|
||||
#[target_feature(enable = "neon")]
|
||||
unsafe fn test_neon() {
|
||||
// Adapted from library/stdarch/crates/core_arch/src/aarch64/neon/mod.rs
|
||||
unsafe fn test_vpmaxq_u8() {
|
||||
let a = vld1q_u8([1, 2, 3, 4, 5, 6, 7, 8, 1, 2, 3, 4, 5, 6, 7, 8].as_ptr());
|
||||
let b = vld1q_u8([0, 3, 2, 5, 4, 7, 6, 9, 0, 3, 2, 5, 4, 7, 6, 9].as_ptr());
|
||||
let e = [2, 4, 6, 8, 2, 4, 6, 8, 3, 5, 7, 9, 3, 5, 7, 9];
|
||||
let mut r = [0; 16];
|
||||
vst1q_u8(r.as_mut_ptr(), vpmaxq_u8(a, b));
|
||||
assert_eq!(r, e);
|
||||
}
|
||||
test_vpmaxq_u8();
|
||||
|
||||
unsafe fn test_vpmaxq_u8_is_unsigned() {
|
||||
let a = vld1q_u8(
|
||||
[255, 0, 253, 252, 251, 250, 249, 248, 255, 254, 253, 252, 251, 250, 249, 248].as_ptr(),
|
||||
);
|
||||
let b = vld1q_u8([254, 3, 2, 5, 4, 7, 6, 9, 0, 3, 2, 5, 4, 7, 6, 9].as_ptr());
|
||||
let e = [255, 253, 251, 249, 255, 253, 251, 249, 254, 5, 7, 9, 3, 5, 7, 9];
|
||||
let mut r = [0; 16];
|
||||
vst1q_u8(r.as_mut_ptr(), vpmaxq_u8(a, b));
|
||||
assert_eq!(r, e);
|
||||
}
|
||||
test_vpmaxq_u8_is_unsigned();
|
||||
}
|
||||
Loading…
Add table
Add a link
Reference in a new issue