Rollup merge of #150511 - Sa4dUs:offload-inline, r=ZuseZ4
Allow inline calls to offload intrinsic Removes explicit insertion point handling and recovers the pointer at the end of the saved basic block. r? `@ZuseZ4` fixes: https://github.com/rust-lang/rust/issues/150413
This commit is contained in:
commit
d898dccc21
4 changed files with 39 additions and 37 deletions
|
|
@ -430,7 +430,7 @@ pub(crate) fn gen_call_handling<'ll, 'tcx>(
|
|||
let fn_ty = offload_globals.mapper_fn_ty;
|
||||
|
||||
let num_args = types.len() as u64;
|
||||
let ip = unsafe { llvm::LLVMRustGetInsertPoint(&builder.llbuilder) };
|
||||
let bb = builder.llbb();
|
||||
|
||||
// FIXME(Sa4dUs): dummy loads are a temp workaround, we should find a proper way to prevent these
|
||||
// variables from being optimized away
|
||||
|
|
@ -468,7 +468,7 @@ pub(crate) fn gen_call_handling<'ll, 'tcx>(
|
|||
|
||||
// Step 1)
|
||||
unsafe {
|
||||
llvm::LLVMRustRestoreInsertPoint(&builder.llbuilder, ip);
|
||||
llvm::LLVMPositionBuilderAtEnd(&builder.llbuilder, bb);
|
||||
}
|
||||
builder.memset(tgt_bin_desc_alloca, cx.get_const_i8(0), cx.get_const_i64(32), Align::EIGHT);
|
||||
|
||||
|
|
|
|||
|
|
@ -2443,8 +2443,6 @@ unsafe extern "C" {
|
|||
|
||||
pub(crate) fn LLVMRustPositionBuilderPastAllocas<'a>(B: &Builder<'a>, Fn: &'a Value);
|
||||
pub(crate) fn LLVMRustPositionBuilderAtStart<'a>(B: &Builder<'a>, BB: &'a BasicBlock);
|
||||
pub(crate) fn LLVMRustGetInsertPoint<'a>(B: &Builder<'a>) -> &'a Value;
|
||||
pub(crate) fn LLVMRustRestoreInsertPoint<'a>(B: &Builder<'a>, IP: &'a Value);
|
||||
|
||||
pub(crate) fn LLVMRustSetModulePICLevel(M: &Module);
|
||||
pub(crate) fn LLVMRustSetModulePIELevel(M: &Module);
|
||||
|
|
|
|||
|
|
@ -1458,39 +1458,6 @@ extern "C" void LLVMRustPositionAfter(LLVMBuilderRef B, LLVMValueRef Instr) {
|
|||
}
|
||||
}
|
||||
|
||||
extern "C" LLVMValueRef LLVMRustGetInsertPoint(LLVMBuilderRef B) {
|
||||
llvm::IRBuilderBase &IRB = *unwrap(B);
|
||||
|
||||
llvm::IRBuilderBase::InsertPoint ip = IRB.saveIP();
|
||||
llvm::BasicBlock *BB = ip.getBlock();
|
||||
|
||||
if (!BB)
|
||||
return nullptr;
|
||||
|
||||
auto it = ip.getPoint();
|
||||
|
||||
if (it == BB->end())
|
||||
return nullptr;
|
||||
|
||||
llvm::Instruction *I = &*it;
|
||||
return wrap(I);
|
||||
}
|
||||
|
||||
extern "C" void LLVMRustRestoreInsertPoint(LLVMBuilderRef B,
|
||||
LLVMValueRef Instr) {
|
||||
llvm::IRBuilderBase &IRB = *unwrap(B);
|
||||
|
||||
if (!Instr) {
|
||||
llvm::BasicBlock *BB = IRB.GetInsertBlock();
|
||||
if (BB)
|
||||
IRB.SetInsertPoint(BB);
|
||||
return;
|
||||
}
|
||||
|
||||
llvm::Instruction *I = unwrap<llvm::Instruction>(Instr);
|
||||
IRB.SetInsertPoint(I);
|
||||
}
|
||||
|
||||
extern "C" LLVMValueRef
|
||||
LLVMRustGetFunctionCall(LLVMValueRef Fn, const char *Name, size_t NameLen) {
|
||||
auto targetName = StringRef(Name, NameLen);
|
||||
|
|
|
|||
37
tests/codegen-llvm/gpu_offload/control_flow.rs
Normal file
37
tests/codegen-llvm/gpu_offload/control_flow.rs
Normal file
|
|
@ -0,0 +1,37 @@
|
|||
//@ compile-flags: -Zoffload=Test -Zunstable-options -C opt-level=3 -Clto=fat
|
||||
//@ no-prefer-dynamic
|
||||
//@ needs-offload
|
||||
|
||||
// This test verifies that the offload intrinsic is correctly lowered even when the caller
|
||||
// contains control flow.
|
||||
|
||||
#![feature(abi_gpu_kernel)]
|
||||
#![feature(rustc_attrs)]
|
||||
#![feature(core_intrinsics)]
|
||||
#![no_main]
|
||||
|
||||
// CHECK: define{{( dso_local)?}} void @main()
|
||||
// CHECK-NOT: define
|
||||
// CHECK: %EmptyDesc = alloca %struct.__tgt_bin_desc, align 8
|
||||
// CHECK-NEXT: %.offload_baseptrs = alloca [1 x ptr], align 8
|
||||
// CHECK-NEXT: %.offload_ptrs = alloca [1 x ptr], align 8
|
||||
// CHECK-NEXT: %.offload_sizes = alloca [1 x i64], align 8
|
||||
// CHECK-NEXT: %kernel_args = alloca %struct.__tgt_kernel_arguments, align 8
|
||||
// CHECK: br label %bb3
|
||||
// CHECK-NOT define
|
||||
// CHECK: bb3
|
||||
// CHECK: call void @__tgt_target_data_begin_mapper(ptr nonnull @anon.{{.*}}.1, i64 -1, i32 1, ptr nonnull %.offload_baseptrs, ptr nonnull %.offload_ptrs, ptr nonnull %.offload_sizes, ptr nonnull @.offload_maptypes.foo, ptr null, ptr null)
|
||||
// CHECK: %10 = call i32 @__tgt_target_kernel(ptr nonnull @anon.{{.*}}.1, i64 -1, i32 2097152, i32 256, ptr nonnull @.foo.region_id, ptr nonnull %kernel_args)
|
||||
// CHECK-NEXT: call void @__tgt_target_data_end_mapper(ptr nonnull @anon.{{.*}}.1, i64 -1, i32 1, ptr nonnull %.offload_baseptrs, ptr nonnull %.offload_ptrs, ptr nonnull %.offload_sizes, ptr nonnull @.offload_maptypes.foo, ptr null, ptr null)
|
||||
#[unsafe(no_mangle)]
|
||||
unsafe fn main() {
|
||||
let A = [1.0, 1.0, 1.0, 1.0, 1.0, 1.0];
|
||||
|
||||
for i in 0..100 {
|
||||
core::intrinsics::offload::<_, _, ()>(foo, (A.as_ptr() as *const [f32; 6],));
|
||||
}
|
||||
}
|
||||
|
||||
unsafe extern "C" {
|
||||
pub fn foo(A: *const [f32; 6]) -> ();
|
||||
}
|
||||
Loading…
Add table
Add a link
Reference in a new issue