Rollup merge of #150511 - Sa4dUs:offload-inline, r=ZuseZ4

Allow inline calls to offload intrinsic

Removes explicit insertion point handling and recovers the pointer at the end of the saved basic block.

r? `@ZuseZ4`

fixes: https://github.com/rust-lang/rust/issues/150413
This commit is contained in:
Jonathan Brouwer 2025-12-31 14:30:48 +01:00 committed by GitHub
commit d898dccc21
No known key found for this signature in database
GPG key ID: B5690EEEBB952194
4 changed files with 39 additions and 37 deletions

View file

@ -430,7 +430,7 @@ pub(crate) fn gen_call_handling<'ll, 'tcx>(
let fn_ty = offload_globals.mapper_fn_ty;
let num_args = types.len() as u64;
let ip = unsafe { llvm::LLVMRustGetInsertPoint(&builder.llbuilder) };
let bb = builder.llbb();
// FIXME(Sa4dUs): dummy loads are a temp workaround, we should find a proper way to prevent these
// variables from being optimized away
@ -468,7 +468,7 @@ pub(crate) fn gen_call_handling<'ll, 'tcx>(
// Step 1)
unsafe {
llvm::LLVMRustRestoreInsertPoint(&builder.llbuilder, ip);
llvm::LLVMPositionBuilderAtEnd(&builder.llbuilder, bb);
}
builder.memset(tgt_bin_desc_alloca, cx.get_const_i8(0), cx.get_const_i64(32), Align::EIGHT);

View file

@ -2443,8 +2443,6 @@ unsafe extern "C" {
pub(crate) fn LLVMRustPositionBuilderPastAllocas<'a>(B: &Builder<'a>, Fn: &'a Value);
pub(crate) fn LLVMRustPositionBuilderAtStart<'a>(B: &Builder<'a>, BB: &'a BasicBlock);
pub(crate) fn LLVMRustGetInsertPoint<'a>(B: &Builder<'a>) -> &'a Value;
pub(crate) fn LLVMRustRestoreInsertPoint<'a>(B: &Builder<'a>, IP: &'a Value);
pub(crate) fn LLVMRustSetModulePICLevel(M: &Module);
pub(crate) fn LLVMRustSetModulePIELevel(M: &Module);

View file

@ -1458,39 +1458,6 @@ extern "C" void LLVMRustPositionAfter(LLVMBuilderRef B, LLVMValueRef Instr) {
}
}
extern "C" LLVMValueRef LLVMRustGetInsertPoint(LLVMBuilderRef B) {
llvm::IRBuilderBase &IRB = *unwrap(B);
llvm::IRBuilderBase::InsertPoint ip = IRB.saveIP();
llvm::BasicBlock *BB = ip.getBlock();
if (!BB)
return nullptr;
auto it = ip.getPoint();
if (it == BB->end())
return nullptr;
llvm::Instruction *I = &*it;
return wrap(I);
}
extern "C" void LLVMRustRestoreInsertPoint(LLVMBuilderRef B,
LLVMValueRef Instr) {
llvm::IRBuilderBase &IRB = *unwrap(B);
if (!Instr) {
llvm::BasicBlock *BB = IRB.GetInsertBlock();
if (BB)
IRB.SetInsertPoint(BB);
return;
}
llvm::Instruction *I = unwrap<llvm::Instruction>(Instr);
IRB.SetInsertPoint(I);
}
extern "C" LLVMValueRef
LLVMRustGetFunctionCall(LLVMValueRef Fn, const char *Name, size_t NameLen) {
auto targetName = StringRef(Name, NameLen);

View file

@ -0,0 +1,37 @@
//@ compile-flags: -Zoffload=Test -Zunstable-options -C opt-level=3 -Clto=fat
//@ no-prefer-dynamic
//@ needs-offload
// This test verifies that the offload intrinsic is correctly lowered even when the caller
// contains control flow.
#![feature(abi_gpu_kernel)]
#![feature(rustc_attrs)]
#![feature(core_intrinsics)]
#![no_main]
// CHECK: define{{( dso_local)?}} void @main()
// CHECK-NOT: define
// CHECK: %EmptyDesc = alloca %struct.__tgt_bin_desc, align 8
// CHECK-NEXT: %.offload_baseptrs = alloca [1 x ptr], align 8
// CHECK-NEXT: %.offload_ptrs = alloca [1 x ptr], align 8
// CHECK-NEXT: %.offload_sizes = alloca [1 x i64], align 8
// CHECK-NEXT: %kernel_args = alloca %struct.__tgt_kernel_arguments, align 8
// CHECK: br label %bb3
// CHECK-NOT define
// CHECK: bb3
// CHECK: call void @__tgt_target_data_begin_mapper(ptr nonnull @anon.{{.*}}.1, i64 -1, i32 1, ptr nonnull %.offload_baseptrs, ptr nonnull %.offload_ptrs, ptr nonnull %.offload_sizes, ptr nonnull @.offload_maptypes.foo, ptr null, ptr null)
// CHECK: %10 = call i32 @__tgt_target_kernel(ptr nonnull @anon.{{.*}}.1, i64 -1, i32 2097152, i32 256, ptr nonnull @.foo.region_id, ptr nonnull %kernel_args)
// CHECK-NEXT: call void @__tgt_target_data_end_mapper(ptr nonnull @anon.{{.*}}.1, i64 -1, i32 1, ptr nonnull %.offload_baseptrs, ptr nonnull %.offload_ptrs, ptr nonnull %.offload_sizes, ptr nonnull @.offload_maptypes.foo, ptr null, ptr null)
#[unsafe(no_mangle)]
unsafe fn main() {
let A = [1.0, 1.0, 1.0, 1.0, 1.0, 1.0];
for i in 0..100 {
core::intrinsics::offload::<_, _, ()>(foo, (A.as_ptr() as *const [f32; 6],));
}
}
unsafe extern "C" {
pub fn foo(A: *const [f32; 6]) -> ();
}