From b004312ee4c8418e5a42cc25b971fa5fc5ac88b7 Mon Sep 17 00:00:00 2001 From: bjorn3 <17426603+bjorn3@users.noreply.github.com> Date: Fri, 7 Feb 2025 11:01:59 +0000 Subject: [PATCH] Implement arm64 vaddlvq_u8 and vld1q_u8_x4 vendor intrinsics This is required for using the bytecount crate on arm64. --- src/intrinsics/llvm_aarch64.rs | 24 ++++++++++++++++++++++++ 1 file changed, 24 insertions(+) diff --git a/src/intrinsics/llvm_aarch64.rs b/src/intrinsics/llvm_aarch64.rs index 39f6763d9f2c..4c59c81296ba 100644 --- a/src/intrinsics/llvm_aarch64.rs +++ b/src/intrinsics/llvm_aarch64.rs @@ -17,6 +17,14 @@ pub(crate) fn codegen_aarch64_llvm_intrinsic_call<'tcx>( fx.bcx.ins().fence(); } + "llvm.aarch64.neon.ld1x4.v16i8.p0i8" => { + intrinsic_args!(fx, args => (ptr); intrinsic); + + let ptr = ptr.load_scalar(fx); + let val = CPlace::for_ptr(Pointer::new(ptr), ret.layout()).to_cvalue(fx); + ret.write_cvalue(fx, val); + } + _ if intrinsic.starts_with("llvm.aarch64.neon.abs.v") => { intrinsic_args!(fx, args => (a); intrinsic); @@ -115,6 +123,22 @@ pub(crate) fn codegen_aarch64_llvm_intrinsic_call<'tcx>( ); } + "llvm.aarch64.neon.uaddlv.i32.v16i8" => { + intrinsic_args!(fx, args => (v); intrinsic); + + let mut res_val = fx.bcx.ins().iconst(types::I16, 0); + for lane_idx in 0..16 { + let lane = v.value_lane(fx, lane_idx).load_scalar(fx); + let lane = fx.bcx.ins().uextend(types::I16, lane); + res_val = fx.bcx.ins().iadd(res_val, lane); + } + let res = CValue::by_val( + fx.bcx.ins().uextend(types::I32, res_val), + fx.layout_of(fx.tcx.types.u32), + ); + ret.write_cvalue(fx, res); + } + _ if intrinsic.starts_with("llvm.aarch64.neon.faddv.f32.v") => { intrinsic_args!(fx, args => (v); intrinsic);