Update test and verify that tgt_(un)register_lib have the right type
This commit is contained in:
parent
4c3310a6db
commit
fa584faca5
5 changed files with 40 additions and 28 deletions
|
|
@ -93,8 +93,13 @@ pub(crate) fn compile_codegen_unit(
|
|||
// They are necessary for correct offload execution. We do this here to simplify the
|
||||
// `offload` intrinsic, avoiding the need for tracking whether it's the first
|
||||
// intrinsic call or not.
|
||||
let has_host_offload =
|
||||
cx.sess().opts.unstable_opts.offload.iter().any(|o| matches!(o, Offload::Host(_)));
|
||||
let has_host_offload = cx
|
||||
.sess()
|
||||
.opts
|
||||
.unstable_opts
|
||||
.offload
|
||||
.iter()
|
||||
.any(|o| matches!(o, Offload::Host(_) | Offload::Test));
|
||||
if has_host_offload && !cx.sess().target.is_like_gpu {
|
||||
cx.offload_globals.replace(Some(OffloadGlobals::declare(&cx)));
|
||||
}
|
||||
|
|
|
|||
|
|
@ -49,8 +49,9 @@ impl<'ll> OffloadGlobals<'ll> {
|
|||
let bin_desc = cx.type_named_struct("struct.__tgt_bin_desc");
|
||||
cx.set_struct_body(bin_desc, &tgt_bin_desc_ty, false);
|
||||
|
||||
let register_lib = declare_offload_fn(&cx, "__tgt_register_lib", mapper_fn_ty);
|
||||
let unregister_lib = declare_offload_fn(&cx, "__tgt_unregister_lib", mapper_fn_ty);
|
||||
let reg_lib_decl = cx.type_func(&[cx.type_ptr()], cx.type_void());
|
||||
let register_lib = declare_offload_fn(&cx, "__tgt_register_lib", reg_lib_decl);
|
||||
let unregister_lib = declare_offload_fn(&cx, "__tgt_unregister_lib", reg_lib_decl);
|
||||
let init_ty = cx.type_func(&[], cx.type_void());
|
||||
let init_rtls = declare_offload_fn(cx, "__tgt_init_all_rtls", init_ty);
|
||||
|
||||
|
|
|
|||
|
|
@ -196,6 +196,8 @@ pub enum Offload {
|
|||
Device,
|
||||
/// Second step in the offload pipeline, generates the host code to call kernels.
|
||||
Host(String),
|
||||
/// Test is similar to Host, but allows testing without a device artifact.
|
||||
Test,
|
||||
}
|
||||
|
||||
/// The different settings that the `-Z autodiff` flag can have.
|
||||
|
|
|
|||
|
|
@ -794,7 +794,8 @@ mod desc {
|
|||
pub(crate) const parse_list_with_polarity: &str =
|
||||
"a comma-separated list of strings, with elements beginning with + or -";
|
||||
pub(crate) const parse_autodiff: &str = "a comma separated list of settings: `Enable`, `PrintSteps`, `PrintTA`, `PrintTAFn`, `PrintAA`, `PrintPerf`, `PrintModBefore`, `PrintModAfter`, `PrintModFinal`, `PrintPasses`, `NoPostopt`, `LooseTypes`, `Inline`, `NoTT`";
|
||||
pub(crate) const parse_offload: &str = "a comma separated list of settings: `Enable`";
|
||||
pub(crate) const parse_offload: &str =
|
||||
"a comma separated list of settings: `Host=<Absolute-Path>`, `Device`, `Test`";
|
||||
pub(crate) const parse_comma_list: &str = "a comma-separated list of strings";
|
||||
pub(crate) const parse_opt_comma_list: &str = parse_comma_list;
|
||||
pub(crate) const parse_number: &str = "a number";
|
||||
|
|
@ -1471,6 +1472,13 @@ pub mod parse {
|
|||
}
|
||||
Offload::Device
|
||||
}
|
||||
"Test" => {
|
||||
if let Some(_) = arg {
|
||||
// Test does not accept a value
|
||||
return false;
|
||||
}
|
||||
Offload::Test
|
||||
}
|
||||
_ => {
|
||||
// FIXME(ZuseZ4): print an error saying which value is not recognized
|
||||
return false;
|
||||
|
|
|
|||
|
|
@ -1,15 +1,10 @@
|
|||
//@ compile-flags: -Zoffload=Enable -Zunstable-options -C opt-level=3 -Clto=fat
|
||||
//@ compile-flags: -Zoffload=Test -Zunstable-options -C opt-level=3 -Clto=fat
|
||||
//@ no-prefer-dynamic
|
||||
//@ needs-enzyme
|
||||
//@ needs-offload
|
||||
|
||||
// This test is verifying that we generate __tgt_target_data_*_mapper before and after a call to the
|
||||
// kernel_1. Better documentation to what each global or variable means is available in the gpu
|
||||
// offlaod code, or the LLVM offload documentation. This code does not launch any GPU kernels yet,
|
||||
// and will be rewritten once a proper offload frontend has landed.
|
||||
//
|
||||
// We currently only handle memory transfer for specific calls to functions named `kernel_{num}`,
|
||||
// when inside of a function called main. This, too, is a temporary workaround for not having a
|
||||
// frontend.
|
||||
// offload code, or the LLVM offload documentation.
|
||||
|
||||
#![feature(rustc_attrs)]
|
||||
#![feature(core_intrinsics)]
|
||||
|
|
@ -22,6 +17,20 @@ fn main() {
|
|||
core::hint::black_box(&x);
|
||||
}
|
||||
|
||||
#[unsafe(no_mangle)]
|
||||
#[inline(never)]
|
||||
pub fn kernel_1(x: &mut [f32; 256]) {
|
||||
core::intrinsics::offload(_kernel_1, [256, 1, 1], [32, 1, 1], (x,))
|
||||
}
|
||||
|
||||
#[unsafe(no_mangle)]
|
||||
#[inline(never)]
|
||||
pub fn _kernel_1(x: &mut [f32; 256]) {
|
||||
for i in 0..256 {
|
||||
x[i] = 21.0;
|
||||
}
|
||||
}
|
||||
|
||||
// CHECK: %struct.ident_t = type { i32, i32, i32, i32, ptr }
|
||||
// CHECK: %struct.__tgt_offload_entry = type { i64, i16, i16, i32, ptr, ptr, i64, i64, ptr }
|
||||
// CHECK: %struct.__tgt_bin_desc = type { i32, ptr, ptr, ptr }
|
||||
|
|
@ -36,8 +45,9 @@ fn main() {
|
|||
// CHECK: @.offloading.entry_name._kernel_1 = internal unnamed_addr constant [10 x i8] c"_kernel_1\00", section ".llvm.rodata.offloading", align 1
|
||||
// CHECK: @.offloading.entry._kernel_1 = internal constant %struct.__tgt_offload_entry { i64 0, i16 1, i16 1, i32 0, ptr @._kernel_1.region_id, ptr @.offloading.entry_name._kernel_1, i64 0, i64 0, ptr null }, section "llvm_offload_entries", align 8
|
||||
|
||||
// CHECK: Function Attrs: nounwind
|
||||
// CHECK: declare i32 @__tgt_target_kernel(ptr, i64, i32, i32, ptr, ptr)
|
||||
// CHECK: declare void @__tgt_register_lib(ptr) local_unnamed_addr
|
||||
// CHECK: declare void @__tgt_unregister_lib(ptr) local_unnamed_addr
|
||||
|
||||
// CHECK: define{{( dso_local)?}} void @main()
|
||||
// CHECK-NEXT: start:
|
||||
|
|
@ -94,17 +104,3 @@ fn main() {
|
|||
// CHECK-NEXT: call void @__tgt_unregister_lib(ptr nonnull %EmptyDesc)
|
||||
// CHECK-NEXT: ret void
|
||||
// CHECK-NEXT: }
|
||||
|
||||
#[unsafe(no_mangle)]
|
||||
#[inline(never)]
|
||||
pub fn kernel_1(x: &mut [f32; 256]) {
|
||||
core::intrinsics::offload(_kernel_1, [256, 1, 1], [32, 1, 1], (x,))
|
||||
}
|
||||
|
||||
#[unsafe(no_mangle)]
|
||||
#[inline(never)]
|
||||
pub fn _kernel_1(x: &mut [f32; 256]) {
|
||||
for i in 0..256 {
|
||||
x[i] = 21.0;
|
||||
}
|
||||
}
|
||||
|
|
|
|||
Loading…
Add table
Add a link
Reference in a new issue