Add amdgpu_dispatch_ptr intrinsic

Add a rustc intrinsic `amdgpu_dispatch_ptr` to access the kernel
dispatch packet on amdgpu.
The HSA kernel dispatch packet contains important information like the
launch size and workgroup size.

The Rust intrinsic lowers to the `llvm.amdgcn.dispatch.ptr` LLVM
intrinsic, which returns a `ptr addrspace(4)`, plus an addrspacecast to
`addrspace(0)`, so it can be returned as a Rust reference.

The returned pointer/reference is valid for the whole program lifetime,
and is therefore `'static`.

The return type of the intrinsic (`*const ()`) does not mention the
struct so that rustc does not need to know the exact struct type.
An alternative would be to define the struct as lang item or add a
generic argument to the function.

Short version:
```rust
#[cfg(target_arch = "amdgpu")]
pub fn amdgpu_dispatch_ptr() -> *const ();
```
This commit is contained in:
Flakebi 2026-01-09 10:41:37 +01:00
parent fcd630976c
commit 91d4e40e02
No known key found for this signature in database
GPG key ID: 38E7ED984D7DCD02
7 changed files with 61 additions and 0 deletions

View file

@ -560,6 +560,12 @@ impl<'ll, 'tcx> IntrinsicCallBuilderMethods<'tcx> for Builder<'_, 'll, 'tcx> {
return Ok(());
}
sym::amdgpu_dispatch_ptr => {
let val = self.call_intrinsic("llvm.amdgcn.dispatch.ptr", &[], &[]);
// Relying on `LLVMBuildPointerCast` to produce an addrspacecast
self.pointercast(val, self.type_ptr())
}
_ if name.as_str().starts_with("simd_") => {
// Unpack non-power-of-2 #[repr(packed, simd)] arguments.
// This gives them the expected layout of a regular #[repr(simd)] vector.

View file

@ -112,6 +112,7 @@ impl<'a, 'tcx, Bx: BuilderMethods<'a, 'tcx>> FunctionCx<'a, 'tcx, Bx> {
| sym::unreachable
| sym::cold_path
| sym::breakpoint
| sym::amdgpu_dispatch_ptr
| sym::assert_zero_valid
| sym::assert_mem_uninitialized_valid
| sym::assert_inhabited

View file

@ -70,6 +70,7 @@ fn intrinsic_operation_unsafety(tcx: TyCtxt<'_>, intrinsic_id: LocalDefId) -> hi
| sym::add_with_overflow
| sym::aggregate_raw_ptr
| sym::align_of
| sym::amdgpu_dispatch_ptr
| sym::assert_inhabited
| sym::assert_mem_uninitialized_valid
| sym::assert_zero_valid
@ -285,6 +286,7 @@ pub(crate) fn check_intrinsic_type(
let (n_tps, n_cts, inputs, output) = match intrinsic_name {
sym::autodiff => (4, 0, vec![param(0), param(1), param(2)], param(3)),
sym::abort => (0, 0, vec![], tcx.types.never),
sym::amdgpu_dispatch_ptr => (0, 0, vec![], Ty::new_imm_ptr(tcx, tcx.types.unit)),
sym::unreachable => (0, 0, vec![], tcx.types.never),
sym::breakpoint => (0, 0, vec![], tcx.types.unit),
sym::size_of | sym::align_of | sym::variant_count => (1, 0, vec![], tcx.types.usize),

View file

@ -454,6 +454,7 @@ symbols! {
alu32,
always,
amdgpu,
amdgpu_dispatch_ptr,
analysis,
and,
and_then,

View file

@ -0,0 +1,23 @@
//! Intrinsics for GPU targets.
//!
//! Intrinsics in this module are intended for use on GPU targets.
//! They can be target specific but in general GPU targets are similar.
#![unstable(feature = "gpu_intrinsics", issue = "none")]
/// Returns a pointer to the HSA kernel dispatch packet.
///
/// A `gpu-kernel` on amdgpu is always launched through a kernel dispatch packet.
/// The dispatch packet contains the workgroup size, launch size and other data.
/// The content is defined by the [HSA Platform System Architecture Specification],
/// which is implemented e.g. in AMD's [hsa.h].
/// The intrinsic returns a unit pointer so that rustc does not need to know the packet struct.
/// The pointer is valid for the whole lifetime of the program.
///
/// [HSA Platform System Architecture Specification]: https://hsafoundation.com/wp-content/uploads/2021/02/HSA-SysArch-1.2.pdf
/// [hsa.h]: https://github.com/ROCm/rocm-systems/blob/rocm-7.1.0/projects/rocr-runtime/runtime/hsa-runtime/inc/hsa.h#L2959
#[rustc_nounwind]
#[rustc_intrinsic]
#[cfg(target_arch = "amdgpu")]
#[must_use = "returns a pointer that does nothing unless used"]
pub fn amdgpu_dispatch_ptr() -> *const ();

View file

@ -60,6 +60,7 @@ use crate::{mem, ptr};
mod bounds;
pub mod fallback;
pub mod gpu;
pub mod mir;
pub mod simd;

View file

@ -0,0 +1,27 @@
// Tests the amdgpu_dispatch_ptr intrinsic.
//@ compile-flags: --crate-type=rlib --target amdgcn-amd-amdhsa -Ctarget-cpu=gfx900
//@ needs-llvm-components: amdgpu
//@ add-minicore
#![feature(intrinsics, no_core, rustc_attrs)]
#![no_core]
extern crate minicore;
pub struct DispatchPacket {
pub header: u16,
pub setup: u16,
pub workgroup_size_x: u16, // and more
}
#[rustc_intrinsic]
#[rustc_nounwind]
fn amdgpu_dispatch_ptr() -> *const ();
// CHECK-LABEL: @get_dispatch_data
// CHECK: %[[ORIG_PTR:[^ ]+]] = {{(tail )?}}call ptr addrspace(4) @llvm.amdgcn.dispatch.ptr()
// CHECK-NEXT: %[[PTR:[^ ]+]] = addrspacecast ptr addrspace(4) %[[ORIG_PTR]] to ptr
#[unsafe(no_mangle)]
pub fn get_dispatch_data() -> &'static DispatchPacket {
unsafe { &*(amdgpu_dispatch_ptr() as *const _) }
}