diff --git a/src/librustc_codegen_llvm/asm.rs b/src/librustc_codegen_llvm/asm.rs index 8986ab322c07..fe4cd16c1f5e 100644 --- a/src/librustc_codegen_llvm/asm.rs +++ b/src/librustc_codegen_llvm/asm.rs @@ -254,6 +254,7 @@ impl AsmBuilderMethods<'tcx> for Builder<'a, 'll, 'tcx> { ]); } InlineAsmArch::RiscV32 | InlineAsmArch::RiscV64 => {} + InlineAsmArch::Nvptx64 => {} } } if !options.contains(InlineAsmOptions::NOMEM) { @@ -410,6 +411,11 @@ fn reg_to_llvm(reg: InlineAsmRegOrRegClass) -> String { | InlineAsmRegClass::Arm(ArmInlineAsmRegClass::qreg_low4) => "x", InlineAsmRegClass::Arm(ArmInlineAsmRegClass::dreg) | InlineAsmRegClass::Arm(ArmInlineAsmRegClass::qreg) => "w", + InlineAsmRegClass::Nvptx(NvptxInlineAsmRegClass::reg16) => "h", + InlineAsmRegClass::Nvptx(NvptxInlineAsmRegClass::reg32) => "r", + InlineAsmRegClass::Nvptx(NvptxInlineAsmRegClass::reg64) => "l", + InlineAsmRegClass::Nvptx(NvptxInlineAsmRegClass::freg32) => "f", + InlineAsmRegClass::Nvptx(NvptxInlineAsmRegClass::freg64) => "d", InlineAsmRegClass::RiscV(RiscVInlineAsmRegClass::reg) => "r", InlineAsmRegClass::RiscV(RiscVInlineAsmRegClass::freg) => "f", InlineAsmRegClass::X86(X86InlineAsmRegClass::reg) => "r", @@ -452,6 +458,7 @@ fn modifier_to_llvm( modifier } } + InlineAsmRegClass::Nvptx(_) => None, InlineAsmRegClass::RiscV(RiscVInlineAsmRegClass::reg) | InlineAsmRegClass::RiscV(RiscVInlineAsmRegClass::freg) => None, InlineAsmRegClass::X86(X86InlineAsmRegClass::reg) @@ -502,6 +509,11 @@ fn dummy_output_type(cx: &CodegenCx<'ll, 'tcx>, reg: InlineAsmRegClass) -> &'ll | InlineAsmRegClass::Arm(ArmInlineAsmRegClass::qreg_low4) => { cx.type_vector(cx.type_i64(), 2) } + InlineAsmRegClass::Nvptx(NvptxInlineAsmRegClass::reg16) => cx.type_i16(), + InlineAsmRegClass::Nvptx(NvptxInlineAsmRegClass::reg32) => cx.type_i32(), + InlineAsmRegClass::Nvptx(NvptxInlineAsmRegClass::reg64) => cx.type_i64(), + InlineAsmRegClass::Nvptx(NvptxInlineAsmRegClass::freg32) => cx.type_f32(), + InlineAsmRegClass::Nvptx(NvptxInlineAsmRegClass::freg64) => cx.type_f64(), InlineAsmRegClass::RiscV(RiscVInlineAsmRegClass::reg) => cx.type_i32(), InlineAsmRegClass::RiscV(RiscVInlineAsmRegClass::freg) => cx.type_f32(), InlineAsmRegClass::X86(X86InlineAsmRegClass::reg) diff --git a/src/librustc_target/asm/mod.rs b/src/librustc_target/asm/mod.rs index 774146a679ab..ffca742e9ab8 100644 --- a/src/librustc_target/asm/mod.rs +++ b/src/librustc_target/asm/mod.rs @@ -146,11 +146,13 @@ macro_rules! types { mod aarch64; mod arm; +mod nvptx; mod riscv; mod x86; pub use aarch64::{AArch64InlineAsmReg, AArch64InlineAsmRegClass}; pub use arm::{ArmInlineAsmReg, ArmInlineAsmRegClass}; +pub use nvptx::{NvptxInlineAsmReg, NvptxInlineAsmRegClass}; pub use riscv::{RiscVInlineAsmReg, RiscVInlineAsmRegClass}; pub use x86::{X86InlineAsmReg, X86InlineAsmRegClass}; @@ -162,6 +164,7 @@ pub enum InlineAsmArch { AArch64, RiscV32, RiscV64, + Nvptx64, } impl FromStr for InlineAsmArch { @@ -175,6 +178,7 @@ impl FromStr for InlineAsmArch { "aarch64" => Ok(Self::AArch64), "riscv32" => Ok(Self::RiscV32), "riscv64" => Ok(Self::RiscV64), + "nvptx64" => Ok(Self::Nvptx64), _ => Err(()), } } @@ -196,6 +200,7 @@ pub enum InlineAsmReg { Arm(ArmInlineAsmReg), AArch64(AArch64InlineAsmReg), RiscV(RiscVInlineAsmReg), + Nvptx(NvptxInlineAsmReg), } impl InlineAsmReg { @@ -205,6 +210,7 @@ impl InlineAsmReg { Self::Arm(r) => r.name(), Self::AArch64(r) => r.name(), Self::RiscV(r) => r.name(), + Self::Nvptx(r) => r.name(), } } @@ -214,6 +220,7 @@ impl InlineAsmReg { Self::Arm(r) => InlineAsmRegClass::Arm(r.reg_class()), Self::AArch64(r) => InlineAsmRegClass::AArch64(r.reg_class()), Self::RiscV(r) => InlineAsmRegClass::RiscV(r.reg_class()), + Self::Nvptx(r) => InlineAsmRegClass::Nvptx(r.reg_class()), } } @@ -236,6 +243,9 @@ impl InlineAsmReg { InlineAsmArch::RiscV32 | InlineAsmArch::RiscV64 => { Self::RiscV(RiscVInlineAsmReg::parse(arch, has_feature, &name)?) } + InlineAsmArch::Nvptx64 => { + Self::Nvptx(NvptxInlineAsmReg::parse(arch, has_feature, &name)?) + } }) } @@ -252,6 +262,7 @@ impl InlineAsmReg { Self::Arm(r) => r.emit(out, arch, modifier), Self::AArch64(r) => r.emit(out, arch, modifier), Self::RiscV(r) => r.emit(out, arch, modifier), + Self::Nvptx(r) => r.emit(out, arch, modifier), } } @@ -261,6 +272,7 @@ impl InlineAsmReg { Self::Arm(r) => r.overlapping_regs(|r| cb(Self::Arm(r))), Self::AArch64(_) => cb(self), Self::RiscV(_) => cb(self), + Self::Nvptx(_) => cb(self), } } } @@ -281,6 +293,7 @@ pub enum InlineAsmRegClass { Arm(ArmInlineAsmRegClass), AArch64(AArch64InlineAsmRegClass), RiscV(RiscVInlineAsmRegClass), + Nvptx(NvptxInlineAsmRegClass), } impl InlineAsmRegClass { @@ -290,6 +303,7 @@ impl InlineAsmRegClass { Self::Arm(r) => r.name(), Self::AArch64(r) => r.name(), Self::RiscV(r) => r.name(), + Self::Nvptx(r) => r.name(), } } @@ -302,6 +316,7 @@ impl InlineAsmRegClass { Self::Arm(r) => r.suggest_class(arch, ty).map(InlineAsmRegClass::Arm), Self::AArch64(r) => r.suggest_class(arch, ty).map(InlineAsmRegClass::AArch64), Self::RiscV(r) => r.suggest_class(arch, ty).map(InlineAsmRegClass::RiscV), + Self::Nvptx(r) => r.suggest_class(arch, ty).map(InlineAsmRegClass::Nvptx), } } @@ -321,6 +336,7 @@ impl InlineAsmRegClass { Self::Arm(r) => r.suggest_modifier(arch, ty), Self::AArch64(r) => r.suggest_modifier(arch, ty), Self::RiscV(r) => r.suggest_modifier(arch, ty), + Self::Nvptx(r) => r.suggest_modifier(arch, ty), } } @@ -336,6 +352,7 @@ impl InlineAsmRegClass { Self::Arm(r) => r.default_modifier(arch), Self::AArch64(r) => r.default_modifier(arch), Self::RiscV(r) => r.default_modifier(arch), + Self::Nvptx(r) => r.default_modifier(arch), } } @@ -350,6 +367,7 @@ impl InlineAsmRegClass { Self::Arm(r) => r.supported_types(arch), Self::AArch64(r) => r.supported_types(arch), Self::RiscV(r) => r.supported_types(arch), + Self::Nvptx(r) => r.supported_types(arch), } } @@ -367,6 +385,9 @@ impl InlineAsmRegClass { InlineAsmArch::RiscV32 | InlineAsmArch::RiscV64 => { Self::RiscV(RiscVInlineAsmRegClass::parse(arch, name)?) } + InlineAsmArch::Nvptx64 => { + Self::Nvptx(NvptxInlineAsmRegClass::parse(arch, name)?) + } }) }) } @@ -379,6 +400,7 @@ impl InlineAsmRegClass { Self::Arm(r) => r.valid_modifiers(arch), Self::AArch64(r) => r.valid_modifiers(arch), Self::RiscV(r) => r.valid_modifiers(arch), + Self::Nvptx(r) => r.valid_modifiers(arch), } } } @@ -518,5 +540,10 @@ pub fn allocatable_registers( riscv::fill_reg_map(arch, has_feature, &mut map); map } + InlineAsmArch::Nvptx64 => { + let mut map = nvptx::regclass_map(); + nvptx::fill_reg_map(arch, has_feature, &mut map); + map + } } } diff --git a/src/librustc_target/asm/nvptx.rs b/src/librustc_target/asm/nvptx.rs new file mode 100644 index 000000000000..b574e921718a --- /dev/null +++ b/src/librustc_target/asm/nvptx.rs @@ -0,0 +1,90 @@ +use super::{InlineAsmArch, InlineAsmType}; +use rustc_macros::HashStable_Generic; +use std::fmt; + +def_reg_class! { + Nvptx NvptxInlineAsmRegClass { + reg16, + reg32, + reg64, + freg32, + freg64, + } +} + +impl NvptxInlineAsmRegClass { + pub fn valid_modifiers(self, _arch: InlineAsmArch) -> &'static [char] { + &[] + } + + pub fn suggest_class(self, _arch: InlineAsmArch, _ty: InlineAsmType) -> Option { + None + } + + pub fn suggest_modifier( + self, + _arch: InlineAsmArch, + _ty: InlineAsmType, + ) -> Option<(char, &'static str)> { + None + } + + pub fn default_modifier(self, _arch: InlineAsmArch) -> Option<(char, &'static str)> { + None + } + + pub fn supported_types( + self, + _arch: InlineAsmArch, + ) -> &'static [(InlineAsmType, Option<&'static str>)] { + match self { + Self::reg16 => types! { _: I8, I16; }, + Self::reg32 => types! { _: I8, I16, I32; }, + Self::reg64 => types! { _: I8, I16, I32, I64; }, + Self::freg32 => types! { _: F32; }, + Self::freg64 => types! { _: F32, F64; }, + } + } +} + +def_regs! { + Nvptx NvptxInlineAsmReg NvptxInlineAsmRegClass { + // We have to define a register, otherwise we get warnings/errors about unused imports and + // unreachable code. Do what clang does and define r0. + r0: reg32 = ["r0"], + #error = ["tid", "tid.x", "tid.y", "tid.z"] => "tid not supported for inline asm", + #error = ["ntid", "ntid.x", "ntid.y", "ntid.z"] => "ntid not supported for inline asm", + #error = ["laneid"] => "laneid not supported for inline asm", + #error = ["warpid"] => "warpid not supported for inline asm", + #error = ["nwarpid"] => "nwarpid not supported for inline asm", + #error = ["ctaid", "ctaid.x", "ctaid.y", "ctaid.z"] => "ctaid not supported for inline asm", + #error = ["nctaid", "nctaid.x", "nctaid.y", "nctaid.z"] => "nctaid not supported for inline asm", + #error = ["smid"] => "smid not supported for inline asm", + #error = ["nsmid"] => "nsmid not supported for inline asm", + #error = ["gridid"] => "gridid not supported for inline asm", + #error = ["lanemask_eq"] => "lanemask_eq not supported for inline asm", + #error = ["lanemask_le"] => "lanemask_le not supported for inline asm", + #error = ["lanemask_lt"] => "lanemask_lt not supported for inline asm", + #error = ["lanemask_ge"] => "lanemask_ge not supported for inline asm", + #error = ["lanemask_gt"] => "lanemask_gt not supported for inline asm", + #error = ["clock", "clock_hi"] => "clock not supported for inline asm", + #error = ["clock64"] => "clock64 not supported for inline asm", + #error = ["pm0", "pm1", "pm2", "pm3", "pm4", "pm5", "pm6", "pm7"] => "pm not supported for inline asm", + #error = ["pm0_64", "pm1_64", "pm2_64", "pm3_64", "pm4_64", "pm5_64", "pm6_64", "pm7_64"] => "pm_64 not supported for inline asm", + #error = ["envreg0", "envreg1", "envreg2", "envreg3", "envreg4", "envreg5", "envreg6", "envreg7", "envreg8", "envreg9", "envreg10", "envreg11", "envreg12", "envreg13", "envreg14", "envreg15", "envreg16", "envreg17", "envreg18", "envreg19", "envreg20", "envreg21", "envreg22", "envreg23", "envreg24", "envreg25", "envreg26", "envreg27", "envreg28", "envreg29", "envreg30", "envreg31"] => "envreg not supported for inline asm", + #error = ["globaltimer", "globaltimer_lo", "globaltimer_hi"] => "globaltimer not supported for inline asm", + #error = ["total_mem_size"] => "total_mem_size not supported for inline asm", + #error = ["dynamic_mem_size"] => "dynamic_mem_size not supported for inline asm", + } +} + +impl NvptxInlineAsmReg { + pub fn emit( + self, + out: &mut dyn fmt::Write, + _arch: InlineAsmArch, + _modifier: Option, + ) -> fmt::Result { + out.write_str(self.name()) + } +} diff --git a/src/test/assembly/asm/nvptx-types.rs b/src/test/assembly/asm/nvptx-types.rs new file mode 100644 index 000000000000..cfaab2351245 --- /dev/null +++ b/src/test/assembly/asm/nvptx-types.rs @@ -0,0 +1,109 @@ +// no-system-llvm +// assembly-output: emit-asm +// compile-flags: --target --nvptx64-nvidia-cuda +// only-nvptx64 +// ignore-nvptx64 + +#![feature(no_core, lang_items, rustc_attrs)] +#![crate_type = "rlib"] +#![no_core] +#![allow(asm_sub_register, non_camel_case_types)] + +#[rustc_builtin_macro] +macro_rules! asm { + () => {}; +} +#[rustc_builtin_macro] +macro_rules! concat { + () => {}; +} +#[rustc_builtin_macro] +macro_rules! stringify { + () => {}; +} + +#[lang = "sized"] +trait Sized {} +#[lang = "copy"] +trait Copy {} + +type ptr = *mut u8; + +impl Copy for i8 {} +impl Copy for i16 {} +impl Copy for i32 {} +impl Copy for f32 {} +impl Copy for i64 {} +impl Copy for f64 {} +impl Copy for ptr {} + +#[no_mangle] +fn extern_func(); + +// CHECK-LABEL: sym_fn +// CHECK: #APP +// CHECK call extern_func; +// CHECK: #NO_APP +#[no_mangle] +pub unsafe fn sym_fn() { + asm!("call {}", sym extern_func); +} + +macro_rules! check { + ($func:ident $ty:ident, $class:ident $mov:literal) => { + #[no_mangle] + pub unsafe fn $func(x: $ty) -> $ty { + // Hack to avoid function merging + extern "Rust" { + fn dont_merge(s: &str); + } + dont_merge(stringify!($func)); + + let y; + asm!(concat!($mov, " {}, {};"), out($class) y, in($class) x); + y + } + }; +} + +// CHECK-LABEL: reg_i8 +// CHECK: #APP +// CHECK: mov.i16 {{[a-z0-9]+}}, {{[a-z0-9]+}}; +// CHECK: #NO_APP +check!(reg_i8 i8 reg16 "mov.i16"); + +// CHECK-LABEL: reg_i16 +// CHECK: #APP +// CHECK: mov.i16 {{[a-z0-9]+}}, {{[a-z0-9]+}}; +// CHECK: #NO_APP +check!(reg_i16 i16 reg16 "mov.i16"); + +// CHECK-LABEL: reg_i32 +// CHECK: #APP +// CHECK: mov.i32 {{[a-z0-9]+}}, {{[a-z0-9]+}}; +// CHECK: #NO_APP +check!(reg_i32 i32 reg32 "mov.i32"); + +// CHECK-LABEL: reg_f32 +// CHECK: #APP +// CHECK: mov.f32 {{[a-z0-9]+}}, {{[a-z0-9]+}}; +// CHECK: #NO_APP +check!(reg_f32 f32 freg32 "mov.f32"); + +// CHECK-LABEL: reg_i54 +// CHECK: #APP +// CHECK: mov.i64 {{[a-z0-9]+}}, {{[a-z0-9]+}}; +// CHECK: #NO_APP +check!(reg_i64 i64 reg64 "mov.i64"); + +// CHECK-LABEL: reg_f64 +// CHECK: #APP +// CHECK: mov.f64 {{[a-z0-9]+}}, {{[a-z0-9]+}}; +// CHECK: #NO_APP +check!(reg_f64 f64 freg64 "mov.f64"); + +// CHECK-LABEL: reg_ptr +// CHECK: #APP +// CHECK: mov.i64 {{[a-z0-9]+}}, {{[a-z0-9]+}}; +// CHECK: #NO_APP +check!(reg_ptr ptr reg64 "mov.i64");