From 30ece8da06882b9cbca9f7e73f6804c669449a17 Mon Sep 17 00:00:00 2001 From: bjorn3 <17426603+bjorn3@users.noreply.github.com> Date: Wed, 26 Feb 2025 13:06:01 +0000 Subject: [PATCH] Make tiny-skia work on arm64 --- src/intrinsics/llvm.rs | 8 +++ src/intrinsics/llvm_aarch64.rs | 119 +++++++++++++++++++++++++++++++++ 2 files changed, 127 insertions(+) diff --git a/src/intrinsics/llvm.rs b/src/intrinsics/llvm.rs index 720a0d8fbf59..eb0dfbb69c3b 100644 --- a/src/intrinsics/llvm.rs +++ b/src/intrinsics/llvm.rs @@ -54,6 +54,14 @@ pub(crate) fn codegen_llvm_intrinsic_call<'tcx>( ); } + "llvm.fptosi.sat.v4i32.v4f32" => { + intrinsic_args!(fx, args => (a); intrinsic); + + simd_for_each_lane(fx, a, ret, &|fx, _lane_ty, _res_lane_ty, lane| { + fx.bcx.ins().fcvt_to_sint_sat(types::I32, lane) + }); + } + _ => { fx.tcx .dcx() diff --git a/src/intrinsics/llvm_aarch64.rs b/src/intrinsics/llvm_aarch64.rs index 4c59c81296ba..b77c99fa2896 100644 --- a/src/intrinsics/llvm_aarch64.rs +++ b/src/intrinsics/llvm_aarch64.rs @@ -1,5 +1,9 @@ //! Emulate AArch64 LLVM intrinsics +use rustc_ast::ast::{InlineAsmOptions, InlineAsmTemplatePiece}; +use rustc_target::asm::*; + +use crate::inline_asm::{CInlineAsmOperand, codegen_inline_asm_inner}; use crate::intrinsics::*; use crate::prelude::*; @@ -49,6 +53,121 @@ pub(crate) fn codegen_aarch64_llvm_intrinsic_call<'tcx>( }); } + "llvm.aarch64.neon.fcvtns.v4i32.v4f32" => { + intrinsic_args!(fx, args => (a); intrinsic); + + // Note: Using inline asm instead of fcvt_to_sint as the latter rounds to zero rather than to nearest + + let a_ptr = a.force_stack(fx).0.get_addr(fx); + let res_place = CPlace::new_stack_slot(fx, ret.layout()); + let res_ptr = res_place.to_ptr().get_addr(fx); + + codegen_inline_asm_inner( + fx, + &[InlineAsmTemplatePiece::String( + "ldr q0, [x0] + fcvtns v0.4s, v0.4s + str q0, [x1]" + .into(), + )], + &[ + CInlineAsmOperand::In { + reg: InlineAsmRegOrRegClass::Reg(InlineAsmReg::AArch64( + AArch64InlineAsmReg::x0, + )), + value: a_ptr, + }, + CInlineAsmOperand::In { + reg: InlineAsmRegOrRegClass::Reg(InlineAsmReg::AArch64( + AArch64InlineAsmReg::x1, + )), + value: res_ptr, + }, + ], + InlineAsmOptions::NOSTACK, + ); + let res = res_place.to_cvalue(fx); + ret.write_cvalue_transmute(fx, res); + } + + "llvm.aarch64.neon.frecpe.v4f32" => { + intrinsic_args!(fx, args => (a); intrinsic); + + let a_ptr = a.force_stack(fx).0.get_addr(fx); + let res_place = CPlace::new_stack_slot(fx, ret.layout()); + let res_ptr = res_place.to_ptr().get_addr(fx); + + codegen_inline_asm_inner( + fx, + &[InlineAsmTemplatePiece::String( + "ldr q0, [x0] + frecpe v0.4s, v0.4s + str q0, [x1]" + .into(), + )], + &[ + CInlineAsmOperand::In { + reg: InlineAsmRegOrRegClass::Reg(InlineAsmReg::AArch64( + AArch64InlineAsmReg::x0, + )), + value: a_ptr, + }, + CInlineAsmOperand::In { + reg: InlineAsmRegOrRegClass::Reg(InlineAsmReg::AArch64( + AArch64InlineAsmReg::x1, + )), + value: res_ptr, + }, + ], + InlineAsmOptions::NOSTACK, + ); + let res = res_place.to_cvalue(fx); + ret.write_cvalue_transmute(fx, res); + } + + "llvm.aarch64.neon.frecps.v4f32" => { + intrinsic_args!(fx, args => (a, b); intrinsic); + + let a_ptr = a.force_stack(fx).0.get_addr(fx); + let b_ptr = b.force_stack(fx).0.get_addr(fx); + let res_place = CPlace::new_stack_slot(fx, ret.layout()); + let res_ptr = res_place.to_ptr().get_addr(fx); + + codegen_inline_asm_inner( + fx, + &[InlineAsmTemplatePiece::String( + "ldr q0, [x0] + ldr q1, [x1] + frecps v0.4s, v0.4s, v1.4s + str q0, [x2]" + .into(), + )], + &[ + CInlineAsmOperand::In { + reg: InlineAsmRegOrRegClass::Reg(InlineAsmReg::AArch64( + AArch64InlineAsmReg::x0, + )), + value: a_ptr, + }, + CInlineAsmOperand::In { + reg: InlineAsmRegOrRegClass::Reg(InlineAsmReg::AArch64( + AArch64InlineAsmReg::x1, + )), + value: b_ptr, + }, + CInlineAsmOperand::In { + reg: InlineAsmRegOrRegClass::Reg(InlineAsmReg::AArch64( + AArch64InlineAsmReg::x2, + )), + value: res_ptr, + }, + ], + InlineAsmOptions::NOSTACK, + ); + let res = res_place.to_cvalue(fx); + ret.write_cvalue_transmute(fx, res); + } + _ if intrinsic.starts_with("llvm.aarch64.neon.sqadd.v") || intrinsic.starts_with("llvm.aarch64.neon.uqadd.v") => {