Update test and verify that tgt_(un)register_lib have the right type

This commit is contained in:
Manuel Drehwald 2026-01-04 06:56:48 -08:00
parent 4c3310a6db
commit fa584faca5
5 changed files with 40 additions and 28 deletions

View file

@ -93,8 +93,13 @@ pub(crate) fn compile_codegen_unit(
// They are necessary for correct offload execution. We do this here to simplify the
// `offload` intrinsic, avoiding the need for tracking whether it's the first
// intrinsic call or not.
let has_host_offload =
cx.sess().opts.unstable_opts.offload.iter().any(|o| matches!(o, Offload::Host(_)));
let has_host_offload = cx
.sess()
.opts
.unstable_opts
.offload
.iter()
.any(|o| matches!(o, Offload::Host(_) | Offload::Test));
if has_host_offload && !cx.sess().target.is_like_gpu {
cx.offload_globals.replace(Some(OffloadGlobals::declare(&cx)));
}

View file

@ -49,8 +49,9 @@ impl<'ll> OffloadGlobals<'ll> {
let bin_desc = cx.type_named_struct("struct.__tgt_bin_desc");
cx.set_struct_body(bin_desc, &tgt_bin_desc_ty, false);
let register_lib = declare_offload_fn(&cx, "__tgt_register_lib", mapper_fn_ty);
let unregister_lib = declare_offload_fn(&cx, "__tgt_unregister_lib", mapper_fn_ty);
let reg_lib_decl = cx.type_func(&[cx.type_ptr()], cx.type_void());
let register_lib = declare_offload_fn(&cx, "__tgt_register_lib", reg_lib_decl);
let unregister_lib = declare_offload_fn(&cx, "__tgt_unregister_lib", reg_lib_decl);
let init_ty = cx.type_func(&[], cx.type_void());
let init_rtls = declare_offload_fn(cx, "__tgt_init_all_rtls", init_ty);

View file

@ -196,6 +196,8 @@ pub enum Offload {
Device,
/// Second step in the offload pipeline, generates the host code to call kernels.
Host(String),
/// Test is similar to Host, but allows testing without a device artifact.
Test,
}
/// The different settings that the `-Z autodiff` flag can have.

View file

@ -794,7 +794,8 @@ mod desc {
pub(crate) const parse_list_with_polarity: &str =
"a comma-separated list of strings, with elements beginning with + or -";
pub(crate) const parse_autodiff: &str = "a comma separated list of settings: `Enable`, `PrintSteps`, `PrintTA`, `PrintTAFn`, `PrintAA`, `PrintPerf`, `PrintModBefore`, `PrintModAfter`, `PrintModFinal`, `PrintPasses`, `NoPostopt`, `LooseTypes`, `Inline`, `NoTT`";
pub(crate) const parse_offload: &str = "a comma separated list of settings: `Enable`";
pub(crate) const parse_offload: &str =
"a comma separated list of settings: `Host=<Absolute-Path>`, `Device`, `Test`";
pub(crate) const parse_comma_list: &str = "a comma-separated list of strings";
pub(crate) const parse_opt_comma_list: &str = parse_comma_list;
pub(crate) const parse_number: &str = "a number";
@ -1471,6 +1472,13 @@ pub mod parse {
}
Offload::Device
}
"Test" => {
if let Some(_) = arg {
// Test does not accept a value
return false;
}
Offload::Test
}
_ => {
// FIXME(ZuseZ4): print an error saying which value is not recognized
return false;

View file

@ -1,15 +1,10 @@
//@ compile-flags: -Zoffload=Enable -Zunstable-options -C opt-level=3 -Clto=fat
//@ compile-flags: -Zoffload=Test -Zunstable-options -C opt-level=3 -Clto=fat
//@ no-prefer-dynamic
//@ needs-enzyme
//@ needs-offload
// This test is verifying that we generate __tgt_target_data_*_mapper before and after a call to the
// kernel_1. Better documentation to what each global or variable means is available in the gpu
// offlaod code, or the LLVM offload documentation. This code does not launch any GPU kernels yet,
// and will be rewritten once a proper offload frontend has landed.
//
// We currently only handle memory transfer for specific calls to functions named `kernel_{num}`,
// when inside of a function called main. This, too, is a temporary workaround for not having a
// frontend.
// offload code, or the LLVM offload documentation.
#![feature(rustc_attrs)]
#![feature(core_intrinsics)]
@ -22,6 +17,20 @@ fn main() {
core::hint::black_box(&x);
}
#[unsafe(no_mangle)]
#[inline(never)]
pub fn kernel_1(x: &mut [f32; 256]) {
core::intrinsics::offload(_kernel_1, [256, 1, 1], [32, 1, 1], (x,))
}
#[unsafe(no_mangle)]
#[inline(never)]
pub fn _kernel_1(x: &mut [f32; 256]) {
for i in 0..256 {
x[i] = 21.0;
}
}
// CHECK: %struct.ident_t = type { i32, i32, i32, i32, ptr }
// CHECK: %struct.__tgt_offload_entry = type { i64, i16, i16, i32, ptr, ptr, i64, i64, ptr }
// CHECK: %struct.__tgt_bin_desc = type { i32, ptr, ptr, ptr }
@ -36,8 +45,9 @@ fn main() {
// CHECK: @.offloading.entry_name._kernel_1 = internal unnamed_addr constant [10 x i8] c"_kernel_1\00", section ".llvm.rodata.offloading", align 1
// CHECK: @.offloading.entry._kernel_1 = internal constant %struct.__tgt_offload_entry { i64 0, i16 1, i16 1, i32 0, ptr @._kernel_1.region_id, ptr @.offloading.entry_name._kernel_1, i64 0, i64 0, ptr null }, section "llvm_offload_entries", align 8
// CHECK: Function Attrs: nounwind
// CHECK: declare i32 @__tgt_target_kernel(ptr, i64, i32, i32, ptr, ptr)
// CHECK: declare void @__tgt_register_lib(ptr) local_unnamed_addr
// CHECK: declare void @__tgt_unregister_lib(ptr) local_unnamed_addr
// CHECK: define{{( dso_local)?}} void @main()
// CHECK-NEXT: start:
@ -94,17 +104,3 @@ fn main() {
// CHECK-NEXT: call void @__tgt_unregister_lib(ptr nonnull %EmptyDesc)
// CHECK-NEXT: ret void
// CHECK-NEXT: }
#[unsafe(no_mangle)]
#[inline(never)]
pub fn kernel_1(x: &mut [f32; 256]) {
core::intrinsics::offload(_kernel_1, [256, 1, 1], [32, 1, 1], (x,))
}
#[unsafe(no_mangle)]
#[inline(never)]
pub fn _kernel_1(x: &mut [f32; 256]) {
for i in 0..256 {
x[i] = 21.0;
}
}