Implement arm64 vaddlvq_u8 and vld1q_u8_x4 vendor intrinsics
This is required for using the bytecount crate on arm64.
This commit is contained in:
parent
ed91b73179
commit
b004312ee4
1 changed files with 24 additions and 0 deletions
|
|
@ -17,6 +17,14 @@ pub(crate) fn codegen_aarch64_llvm_intrinsic_call<'tcx>(
|
|||
fx.bcx.ins().fence();
|
||||
}
|
||||
|
||||
"llvm.aarch64.neon.ld1x4.v16i8.p0i8" => {
|
||||
intrinsic_args!(fx, args => (ptr); intrinsic);
|
||||
|
||||
let ptr = ptr.load_scalar(fx);
|
||||
let val = CPlace::for_ptr(Pointer::new(ptr), ret.layout()).to_cvalue(fx);
|
||||
ret.write_cvalue(fx, val);
|
||||
}
|
||||
|
||||
_ if intrinsic.starts_with("llvm.aarch64.neon.abs.v") => {
|
||||
intrinsic_args!(fx, args => (a); intrinsic);
|
||||
|
||||
|
|
@ -115,6 +123,22 @@ pub(crate) fn codegen_aarch64_llvm_intrinsic_call<'tcx>(
|
|||
);
|
||||
}
|
||||
|
||||
"llvm.aarch64.neon.uaddlv.i32.v16i8" => {
|
||||
intrinsic_args!(fx, args => (v); intrinsic);
|
||||
|
||||
let mut res_val = fx.bcx.ins().iconst(types::I16, 0);
|
||||
for lane_idx in 0..16 {
|
||||
let lane = v.value_lane(fx, lane_idx).load_scalar(fx);
|
||||
let lane = fx.bcx.ins().uextend(types::I16, lane);
|
||||
res_val = fx.bcx.ins().iadd(res_val, lane);
|
||||
}
|
||||
let res = CValue::by_val(
|
||||
fx.bcx.ins().uextend(types::I32, res_val),
|
||||
fx.layout_of(fx.tcx.types.u32),
|
||||
);
|
||||
ret.write_cvalue(fx, res);
|
||||
}
|
||||
|
||||
_ if intrinsic.starts_with("llvm.aarch64.neon.faddv.f32.v") => {
|
||||
intrinsic_args!(fx, args => (v); intrinsic);
|
||||
|
||||
|
|
|
|||
Loading…
Add table
Add a link
Reference in a new issue