Add amdgpu_dispatch_ptr intrinsic
Add a rustc intrinsic `amdgpu_dispatch_ptr` to access the kernel dispatch packet on amdgpu. The HSA kernel dispatch packet contains important information like the launch size and workgroup size. The Rust intrinsic lowers to the `llvm.amdgcn.dispatch.ptr` LLVM intrinsic, which returns a `ptr addrspace(4)`, plus an addrspacecast to `addrspace(0)`, so it can be returned as a Rust reference. The returned pointer/reference is valid for the whole program lifetime, and is therefore `'static`. The return type of the intrinsic (`*const ()`) does not mention the struct so that rustc does not need to know the exact struct type. An alternative would be to define the struct as lang item or add a generic argument to the function. Short version: ```rust #[cfg(target_arch = "amdgpu")] pub fn amdgpu_dispatch_ptr() -> *const (); ```
This commit is contained in:
parent
fcd630976c
commit
91d4e40e02
7 changed files with 61 additions and 0 deletions
|
|
@ -560,6 +560,12 @@ impl<'ll, 'tcx> IntrinsicCallBuilderMethods<'tcx> for Builder<'_, 'll, 'tcx> {
|
|||
return Ok(());
|
||||
}
|
||||
|
||||
sym::amdgpu_dispatch_ptr => {
|
||||
let val = self.call_intrinsic("llvm.amdgcn.dispatch.ptr", &[], &[]);
|
||||
// Relying on `LLVMBuildPointerCast` to produce an addrspacecast
|
||||
self.pointercast(val, self.type_ptr())
|
||||
}
|
||||
|
||||
_ if name.as_str().starts_with("simd_") => {
|
||||
// Unpack non-power-of-2 #[repr(packed, simd)] arguments.
|
||||
// This gives them the expected layout of a regular #[repr(simd)] vector.
|
||||
|
|
|
|||
|
|
@ -112,6 +112,7 @@ impl<'a, 'tcx, Bx: BuilderMethods<'a, 'tcx>> FunctionCx<'a, 'tcx, Bx> {
|
|||
| sym::unreachable
|
||||
| sym::cold_path
|
||||
| sym::breakpoint
|
||||
| sym::amdgpu_dispatch_ptr
|
||||
| sym::assert_zero_valid
|
||||
| sym::assert_mem_uninitialized_valid
|
||||
| sym::assert_inhabited
|
||||
|
|
|
|||
|
|
@ -70,6 +70,7 @@ fn intrinsic_operation_unsafety(tcx: TyCtxt<'_>, intrinsic_id: LocalDefId) -> hi
|
|||
| sym::add_with_overflow
|
||||
| sym::aggregate_raw_ptr
|
||||
| sym::align_of
|
||||
| sym::amdgpu_dispatch_ptr
|
||||
| sym::assert_inhabited
|
||||
| sym::assert_mem_uninitialized_valid
|
||||
| sym::assert_zero_valid
|
||||
|
|
@ -285,6 +286,7 @@ pub(crate) fn check_intrinsic_type(
|
|||
let (n_tps, n_cts, inputs, output) = match intrinsic_name {
|
||||
sym::autodiff => (4, 0, vec![param(0), param(1), param(2)], param(3)),
|
||||
sym::abort => (0, 0, vec![], tcx.types.never),
|
||||
sym::amdgpu_dispatch_ptr => (0, 0, vec![], Ty::new_imm_ptr(tcx, tcx.types.unit)),
|
||||
sym::unreachable => (0, 0, vec![], tcx.types.never),
|
||||
sym::breakpoint => (0, 0, vec![], tcx.types.unit),
|
||||
sym::size_of | sym::align_of | sym::variant_count => (1, 0, vec![], tcx.types.usize),
|
||||
|
|
|
|||
|
|
@ -454,6 +454,7 @@ symbols! {
|
|||
alu32,
|
||||
always,
|
||||
amdgpu,
|
||||
amdgpu_dispatch_ptr,
|
||||
analysis,
|
||||
and,
|
||||
and_then,
|
||||
|
|
|
|||
23
library/core/src/intrinsics/gpu.rs
Normal file
23
library/core/src/intrinsics/gpu.rs
Normal file
|
|
@ -0,0 +1,23 @@
|
|||
//! Intrinsics for GPU targets.
|
||||
//!
|
||||
//! Intrinsics in this module are intended for use on GPU targets.
|
||||
//! They can be target specific but in general GPU targets are similar.
|
||||
|
||||
#![unstable(feature = "gpu_intrinsics", issue = "none")]
|
||||
|
||||
/// Returns a pointer to the HSA kernel dispatch packet.
|
||||
///
|
||||
/// A `gpu-kernel` on amdgpu is always launched through a kernel dispatch packet.
|
||||
/// The dispatch packet contains the workgroup size, launch size and other data.
|
||||
/// The content is defined by the [HSA Platform System Architecture Specification],
|
||||
/// which is implemented e.g. in AMD's [hsa.h].
|
||||
/// The intrinsic returns a unit pointer so that rustc does not need to know the packet struct.
|
||||
/// The pointer is valid for the whole lifetime of the program.
|
||||
///
|
||||
/// [HSA Platform System Architecture Specification]: https://hsafoundation.com/wp-content/uploads/2021/02/HSA-SysArch-1.2.pdf
|
||||
/// [hsa.h]: https://github.com/ROCm/rocm-systems/blob/rocm-7.1.0/projects/rocr-runtime/runtime/hsa-runtime/inc/hsa.h#L2959
|
||||
#[rustc_nounwind]
|
||||
#[rustc_intrinsic]
|
||||
#[cfg(target_arch = "amdgpu")]
|
||||
#[must_use = "returns a pointer that does nothing unless used"]
|
||||
pub fn amdgpu_dispatch_ptr() -> *const ();
|
||||
|
|
@ -60,6 +60,7 @@ use crate::{mem, ptr};
|
|||
|
||||
mod bounds;
|
||||
pub mod fallback;
|
||||
pub mod gpu;
|
||||
pub mod mir;
|
||||
pub mod simd;
|
||||
|
||||
|
|
|
|||
27
tests/codegen-llvm/amdgpu-dispatch-ptr.rs
Normal file
27
tests/codegen-llvm/amdgpu-dispatch-ptr.rs
Normal file
|
|
@ -0,0 +1,27 @@
|
|||
// Tests the amdgpu_dispatch_ptr intrinsic.
|
||||
|
||||
//@ compile-flags: --crate-type=rlib --target amdgcn-amd-amdhsa -Ctarget-cpu=gfx900
|
||||
//@ needs-llvm-components: amdgpu
|
||||
//@ add-minicore
|
||||
#![feature(intrinsics, no_core, rustc_attrs)]
|
||||
#![no_core]
|
||||
|
||||
extern crate minicore;
|
||||
|
||||
pub struct DispatchPacket {
|
||||
pub header: u16,
|
||||
pub setup: u16,
|
||||
pub workgroup_size_x: u16, // and more
|
||||
}
|
||||
|
||||
#[rustc_intrinsic]
|
||||
#[rustc_nounwind]
|
||||
fn amdgpu_dispatch_ptr() -> *const ();
|
||||
|
||||
// CHECK-LABEL: @get_dispatch_data
|
||||
// CHECK: %[[ORIG_PTR:[^ ]+]] = {{(tail )?}}call ptr addrspace(4) @llvm.amdgcn.dispatch.ptr()
|
||||
// CHECK-NEXT: %[[PTR:[^ ]+]] = addrspacecast ptr addrspace(4) %[[ORIG_PTR]] to ptr
|
||||
#[unsafe(no_mangle)]
|
||||
pub fn get_dispatch_data() -> &'static DispatchPacket {
|
||||
unsafe { &*(amdgpu_dispatch_ptr() as *const _) }
|
||||
}
|
||||
Loading…
Add table
Add a link
Reference in a new issue