Merge pull request #172 from rust-lang/feature/more-simd

Implement more SIMD intrinsics
This commit is contained in:
antoyo 2022-05-04 23:39:04 -04:00 committed by GitHub
commit e062c37125
No known key found for this signature in database
GPG key ID: 4AEE18F83AFDEB23
9 changed files with 580 additions and 35 deletions

View file

@ -595,7 +595,7 @@ fn reg_to_gcc(reg: InlineAsmRegOrRegClass) -> ConstraintOrRegister {
InlineAsmRegClass::X86(X86InlineAsmRegClass::xmm_reg)
| InlineAsmRegClass::X86(X86InlineAsmRegClass::ymm_reg) => "x",
InlineAsmRegClass::X86(X86InlineAsmRegClass::zmm_reg) => "v",
InlineAsmRegClass::X86(X86InlineAsmRegClass::kreg) => unimplemented!(),
InlineAsmRegClass::X86(X86InlineAsmRegClass::kreg) => "Yk",
InlineAsmRegClass::Wasm(WasmInlineAsmRegClass::local) => unimplemented!(),
InlineAsmRegClass::X86(
X86InlineAsmRegClass::x87_reg | X86InlineAsmRegClass::mmx_reg,

View file

@ -7,6 +7,7 @@ use gccjit::{
BinaryOp,
Block,
ComparisonOp,
Context,
Function,
LValue,
RValue,
@ -47,6 +48,7 @@ use rustc_target::spec::{HasTargetSpec, Target};
use crate::common::{SignType, TypeReflection, type_is_pointer};
use crate::context::CodegenCx;
use crate::intrinsic::llvm;
use crate::type_of::LayoutGccExt;
// TODO(antoyo)
@ -216,11 +218,17 @@ impl<'a, 'gcc, 'tcx> Builder<'a, 'gcc, 'tcx> {
return Cow::Borrowed(args);
}
let func_name = format!("{:?}", func_ptr);
let casted_args: Vec<_> = param_types
.into_iter()
.zip(args.iter())
.enumerate()
.map(|(index, (expected_ty, &actual_val))| {
if llvm::ignore_arg_cast(&func_name, index, args.len()) {
return actual_val;
}
let actual_ty = actual_val.get_type();
if expected_ty != actual_ty {
if !actual_ty.is_vector() && !expected_ty.is_vector() && actual_ty.is_integral() && expected_ty.is_integral() && actual_ty.get_size() != expected_ty.get_size() {
@ -297,6 +305,8 @@ impl<'a, 'gcc, 'tcx> Builder<'a, 'gcc, 'tcx> {
if return_type != void_type {
unsafe { RETURN_VALUE_COUNT += 1 };
let result = current_func.new_local(None, return_type, &format!("ptrReturnValue{}", unsafe { RETURN_VALUE_COUNT }));
let func_name = format!("{:?}", func_ptr);
let args = llvm::adjust_intrinsic_arguments(&self, gcc_func, args, &func_name);
self.block.add_assignment(None, result, self.cx.context.new_call_through_ptr(None, func_ptr, &args));
result.to_rvalue()
}
@ -1316,6 +1326,11 @@ impl<'a, 'gcc, 'tcx> Builder<'a, 'gcc, 'tcx> {
element_type
}
else {
#[cfg(feature="master")]
{
self.cx.type_ix(element_type.get_size() as u64 * 8)
}
#[cfg(not(feature="master"))]
self.int_type
};
for i in 0..mask_num_units {
@ -1343,7 +1358,7 @@ impl<'a, 'gcc, 'tcx> Builder<'a, 'gcc, 'tcx> {
}
// TODO(antoyo): switch to using new_vector_access.
let array = self.context.new_bitcast(None, v2, array_type);
for i in 0..vec_num_units {
for i in 0..(mask_num_units - vec_num_units) {
elements.push(self.context.new_array_access(None, array, self.context.new_rvalue_from_int(self.int_type, i as i32)).to_rvalue());
}
let v1 = self.context.new_rvalue_from_vector(None, result_type, &elements);
@ -1380,6 +1395,98 @@ impl<'a, 'gcc, 'tcx> Builder<'a, 'gcc, 'tcx> {
pub fn shuffle_vector(&mut self, _v1: RValue<'gcc>, _v2: RValue<'gcc>, _mask: RValue<'gcc>) -> RValue<'gcc> {
unimplemented!();
}
#[cfg(feature="master")]
pub fn vector_reduce<F>(&mut self, src: RValue<'gcc>, op: F) -> RValue<'gcc>
where F: Fn(RValue<'gcc>, RValue<'gcc>, &'gcc Context<'gcc>) -> RValue<'gcc>
{
let vector_type = src.get_type().unqualified().dyncast_vector().expect("vector type");
let element_count = vector_type.get_num_units();
let mut vector_elements = vec![];
for i in 0..element_count {
vector_elements.push(i);
}
let mask_type = self.context.new_vector_type(self.int_type, element_count as u64);
let mut shift = 1;
let mut res = src;
while shift < element_count {
let vector_elements: Vec<_> =
vector_elements.iter()
.map(|i| self.context.new_rvalue_from_int(self.int_type, ((i + shift) % element_count) as i32))
.collect();
let mask = self.context.new_rvalue_from_vector(None, mask_type, &vector_elements);
let shifted = self.context.new_rvalue_vector_perm(None, res, res, mask);
shift *= 2;
res = op(res, shifted, &self.context);
}
self.context.new_vector_access(None, res, self.context.new_rvalue_zero(self.int_type))
.to_rvalue()
}
#[cfg(not(feature="master"))]
pub fn vector_reduce<F>(&mut self, src: RValue<'gcc>, op: F) -> RValue<'gcc>
where F: Fn(RValue<'gcc>, RValue<'gcc>, &'gcc Context<'gcc>) -> RValue<'gcc>
{
unimplemented!();
}
pub fn vector_reduce_op(&mut self, src: RValue<'gcc>, op: BinaryOp) -> RValue<'gcc> {
self.vector_reduce(src, |a, b, context| context.new_binary_op(None, op, a.get_type(), a, b))
}
pub fn vector_reduce_fadd_fast(&mut self, _acc: RValue<'gcc>, _src: RValue<'gcc>) -> RValue<'gcc> {
unimplemented!();
}
pub fn vector_reduce_fmul_fast(&mut self, _acc: RValue<'gcc>, _src: RValue<'gcc>) -> RValue<'gcc> {
unimplemented!();
}
// Inspired by Hacker's Delight min implementation.
pub fn vector_reduce_min(&mut self, src: RValue<'gcc>) -> RValue<'gcc> {
self.vector_reduce(src, |a, b, context| {
let differences_or_zeros = difference_or_zero(a, b, context);
context.new_binary_op(None, BinaryOp::Minus, a.get_type(), a, differences_or_zeros)
})
}
// Inspired by Hacker's Delight max implementation.
pub fn vector_reduce_max(&mut self, src: RValue<'gcc>) -> RValue<'gcc> {
self.vector_reduce(src, |a, b, context| {
let differences_or_zeros = difference_or_zero(a, b, context);
context.new_binary_op(None, BinaryOp::Plus, b.get_type(), b, differences_or_zeros)
})
}
pub fn vector_select(&mut self, cond: RValue<'gcc>, then_val: RValue<'gcc>, else_val: RValue<'gcc>) -> RValue<'gcc> {
// cond is a vector of integers, not of bools.
let cond_type = cond.get_type();
let vector_type = cond_type.unqualified().dyncast_vector().expect("vector type");
let num_units = vector_type.get_num_units();
let element_type = vector_type.get_element_type();
let zeros = vec![self.context.new_rvalue_zero(element_type); num_units];
let zeros = self.context.new_rvalue_from_vector(None, cond_type, &zeros);
let masks = self.context.new_comparison(None, ComparisonOp::NotEquals, cond, zeros);
let then_vals = masks & then_val;
let ones = vec![self.context.new_rvalue_one(element_type); num_units];
let ones = self.context.new_rvalue_from_vector(None, cond_type, &ones);
let inverted_masks = masks + ones;
// NOTE: sometimes, the type of else_val can be different than the type of then_val in
// libgccjit (vector of int vs vector of int32_t), but they should be the same for the AND
// operation to work.
let else_val = self.context.new_bitcast(None, else_val, then_val.get_type());
let else_vals = inverted_masks & else_val;
then_vals | else_vals
}
}
fn difference_or_zero<'gcc>(a: RValue<'gcc>, b: RValue<'gcc>, context: &'gcc Context<'gcc>) -> RValue<'gcc> {
let difference = a - b;
let masks = context.new_comparison(None, ComparisonOp::GreaterThanEquals, b, a);
difference & masks
}
impl<'a, 'gcc, 'tcx> StaticBuilderMethods for Builder<'a, 'gcc, 'tcx> {

View file

@ -117,8 +117,8 @@ impl<'gcc, 'tcx> ConstMethods<'tcx> for CodegenCx<'gcc, 'tcx> {
unimplemented!();
}
fn const_real(&self, _t: Type<'gcc>, _val: f64) -> RValue<'gcc> {
unimplemented!();
fn const_real(&self, typ: Type<'gcc>, val: f64) -> RValue<'gcc> {
self.context.new_rvalue_from_double(typ, val)
}
fn const_str(&self, s: Symbol) -> (RValue<'gcc>, RValue<'gcc>) {

View file

@ -27,12 +27,7 @@ impl<'gcc, 'tcx> CodegenCx<'gcc, 'tcx> {
}
// NOTE: since bitcast makes a value non-constant, don't bitcast if not necessary as some
// SIMD builtins require a constant value.
if value.get_type() != typ {
self.context.new_bitcast(None, value, typ)
}
else {
value
}
self.bitcast_if_needed(value, typ)
}
}
@ -86,13 +81,7 @@ impl<'gcc, 'tcx> StaticMethods for CodegenCx<'gcc, 'tcx> {
// TODO(antoyo): set alignment.
let value =
if value.get_type() != gcc_type {
self.context.new_bitcast(None, value, gcc_type)
}
else {
value
};
let value = self.bitcast_if_needed(value, gcc_type);
global.global_set_initializer_rvalue(value);
// As an optimization, all shared statics which do not have interior

View file

@ -35,6 +35,7 @@ pub struct CodegenCx<'gcc, 'tcx> {
pub normal_function_addresses: RefCell<FxHashSet<RValue<'gcc>>>,
pub functions: RefCell<FxHashMap<String, Function<'gcc>>>,
pub intrinsics: RefCell<FxHashMap<String, Function<'gcc>>>,
pub tls_model: gccjit::TlsModel,
@ -184,6 +185,7 @@ impl<'gcc, 'tcx> CodegenCx<'gcc, 'tcx> {
current_func: RefCell::new(None),
normal_function_addresses: Default::default(),
functions: RefCell::new(functions),
intrinsics: RefCell::new(FxHashMap::default()),
tls_model,
@ -279,6 +281,15 @@ impl<'gcc, 'tcx> CodegenCx<'gcc, 'tcx> {
pub fn sess(&self) -> &Session {
&self.tcx.sess
}
pub fn bitcast_if_needed(&self, value: RValue<'gcc>, expected_type: Type<'gcc>) -> RValue<'gcc> {
if value.get_type() != expected_type {
self.context.new_bitcast(None, value, expected_type)
}
else {
value
}
}
}
impl<'gcc, 'tcx> BackendTypes for CodegenCx<'gcc, 'tcx> {
@ -306,8 +317,16 @@ impl<'gcc, 'tcx> MiscMethods<'tcx> for CodegenCx<'gcc, 'tcx> {
}
fn get_fn_addr(&self, instance: Instance<'tcx>) -> RValue<'gcc> {
let func = get_fn(self, instance);
let func = self.rvalue_as_function(func);
let func_name = self.tcx.symbol_name(instance).name;
let func =
if self.intrinsics.borrow().contains_key(func_name) {
self.intrinsics.borrow()[func_name].clone()
}
else {
let func = get_fn(self, instance);
self.rvalue_as_function(func)
};
let ptr = func.get_address(None);
// TODO(antoyo): don't do this twice: i.e. in declare_fn and here.

View file

@ -11,7 +11,7 @@ use crate::intrinsic::llvm;
impl<'gcc, 'tcx> CodegenCx<'gcc, 'tcx> {
pub fn get_or_insert_global(&self, name: &str, ty: Type<'gcc>, is_tls: bool, link_section: Option<Symbol>) -> LValue<'gcc> {
if self.globals.borrow().contains_key(name) {
let typ = self.globals.borrow().get(name).expect("global").get_type();
let typ = self.globals.borrow()[name].get_type();
let global = self.context.new_global(None, GlobalKind::Imported, typ, name);
if is_tls {
global.set_tls_model(self.tls_model);
@ -103,11 +103,13 @@ impl<'gcc, 'tcx> CodegenCx<'gcc, 'tcx> {
/// update the declaration and return existing Value instead.
fn declare_raw_fn<'gcc>(cx: &CodegenCx<'gcc, '_>, name: &str, _callconv: () /*llvm::CallConv*/, return_type: Type<'gcc>, param_types: &[Type<'gcc>], variadic: bool) -> Function<'gcc> {
if name.starts_with("llvm.") {
return llvm::intrinsic(name, cx);
let intrinsic = llvm::intrinsic(name, cx);
cx.intrinsics.borrow_mut().insert(name.to_string(), intrinsic);
return intrinsic;
}
let func =
if cx.functions.borrow().contains_key(name) {
*cx.functions.borrow().get(name).expect("function")
cx.functions.borrow()[name]
}
else {
let params: Vec<_> = param_types.into_iter().enumerate()

View file

@ -1,6 +1,172 @@
use gccjit::Function;
use std::borrow::Cow;
use crate::context::CodegenCx;
use gccjit::{Function, FunctionPtrType, RValue, ToRValue};
use crate::{context::CodegenCx, builder::Builder};
pub fn adjust_intrinsic_arguments<'a, 'b, 'gcc, 'tcx>(builder: &Builder<'a, 'gcc, 'tcx>, gcc_func: FunctionPtrType<'gcc>, mut args: Cow<'b, [RValue<'gcc>]>, func_name: &str) -> Cow<'b, [RValue<'gcc>]> {
// Some LLVM intrinsics do not map 1-to-1 to GCC intrinsics, so we add the missing
// arguments here.
if gcc_func.get_param_count() != args.len() {
match &*func_name {
"__builtin_ia32_pmuldq512_mask" | "__builtin_ia32_pmuludq512_mask"
// FIXME(antoyo): the following intrinsics has 4 (or 5) arguments according to the doc, but is defined with 2 (or 3) arguments in library/stdarch/crates/core_arch/src/x86/avx512f.rs.
| "__builtin_ia32_pmaxsd512_mask" | "__builtin_ia32_pmaxsq512_mask" | "__builtin_ia32_pmaxsq256_mask"
| "__builtin_ia32_pmaxsq128_mask" | "__builtin_ia32_maxps512_mask" | "__builtin_ia32_maxpd512_mask"
| "__builtin_ia32_pmaxud512_mask" | "__builtin_ia32_pmaxuq512_mask" | "__builtin_ia32_pmaxuq256_mask"
| "__builtin_ia32_pmaxuq128_mask"
| "__builtin_ia32_pminsd512_mask" | "__builtin_ia32_pminsq512_mask" | "__builtin_ia32_pminsq256_mask"
| "__builtin_ia32_pminsq128_mask" | "__builtin_ia32_minps512_mask" | "__builtin_ia32_minpd512_mask"
| "__builtin_ia32_pminud512_mask" | "__builtin_ia32_pminuq512_mask" | "__builtin_ia32_pminuq256_mask"
| "__builtin_ia32_pminuq128_mask" | "__builtin_ia32_sqrtps512_mask" | "__builtin_ia32_sqrtpd512_mask"
=> {
// TODO: refactor by separating those intrinsics outside of this branch.
let add_before_last_arg =
match &*func_name {
"__builtin_ia32_maxps512_mask" | "__builtin_ia32_maxpd512_mask"
| "__builtin_ia32_minps512_mask" | "__builtin_ia32_minpd512_mask"
| "__builtin_ia32_sqrtps512_mask" | "__builtin_ia32_sqrtpd512_mask" => true,
_ => false,
};
let new_first_arg_is_zero =
match &*func_name {
"__builtin_ia32_pmaxuq256_mask" | "__builtin_ia32_pmaxuq128_mask"
| "__builtin_ia32_pminuq256_mask" | "__builtin_ia32_pminuq128_mask" => true,
_ => false
};
let arg3_index =
match &*func_name {
"__builtin_ia32_sqrtps512_mask" | "__builtin_ia32_sqrtpd512_mask" => 1,
_ => 2,
};
let mut new_args = args.to_vec();
let arg3_type = gcc_func.get_param_type(arg3_index);
let first_arg =
if new_first_arg_is_zero {
let vector_type = arg3_type.dyncast_vector().expect("vector type");
let zero = builder.context.new_rvalue_zero(vector_type.get_element_type());
let num_units = vector_type.get_num_units();
builder.context.new_rvalue_from_vector(None, arg3_type, &vec![zero; num_units])
}
else {
builder.current_func().new_local(None, arg3_type, "undefined_for_intrinsic").to_rvalue()
};
if add_before_last_arg {
new_args.insert(new_args.len() - 1, first_arg);
}
else {
new_args.push(first_arg);
}
let arg4_index =
match &*func_name {
"__builtin_ia32_sqrtps512_mask" | "__builtin_ia32_sqrtpd512_mask" => 2,
_ => 3,
};
let arg4_type = gcc_func.get_param_type(arg4_index);
let minus_one = builder.context.new_rvalue_from_int(arg4_type, -1);
if add_before_last_arg {
new_args.insert(new_args.len() - 1, minus_one);
}
else {
new_args.push(minus_one);
}
args = new_args.into();
},
"__builtin_ia32_pternlogd512_mask" | "__builtin_ia32_pternlogd256_mask"
| "__builtin_ia32_pternlogd128_mask" | "__builtin_ia32_pternlogq512_mask"
| "__builtin_ia32_pternlogq256_mask" | "__builtin_ia32_pternlogq128_mask" => {
let mut new_args = args.to_vec();
let arg5_type = gcc_func.get_param_type(4);
let minus_one = builder.context.new_rvalue_from_int(arg5_type, -1);
new_args.push(minus_one);
args = new_args.into();
},
"__builtin_ia32_vfmaddps512_mask" | "__builtin_ia32_vfmaddpd512_mask" => {
let mut new_args = args.to_vec();
let mut last_arg = None;
if args.len() == 4 {
last_arg = new_args.pop();
}
let arg4_type = gcc_func.get_param_type(3);
let minus_one = builder.context.new_rvalue_from_int(arg4_type, -1);
new_args.push(minus_one);
if args.len() == 3 {
// Both llvm.fma.v16f32 and llvm.x86.avx512.vfmadd.ps.512 maps to
// the same GCC intrinsic, but the former has 3 parameters and the
// latter has 4 so it doesn't require this additional argument.
let arg5_type = gcc_func.get_param_type(4);
new_args.push(builder.context.new_rvalue_from_int(arg5_type, 4));
}
if let Some(last_arg) = last_arg {
new_args.push(last_arg);
}
args = new_args.into();
},
"__builtin_ia32_addps512_mask" | "__builtin_ia32_addpd512_mask"
| "__builtin_ia32_subps512_mask" | "__builtin_ia32_subpd512_mask"
| "__builtin_ia32_mulps512_mask" | "__builtin_ia32_mulpd512_mask"
| "__builtin_ia32_divps512_mask" | "__builtin_ia32_divpd512_mask" => {
let mut new_args = args.to_vec();
let last_arg = new_args.pop().expect("last arg");
let arg3_type = gcc_func.get_param_type(2);
let undefined = builder.current_func().new_local(None, arg3_type, "undefined_for_intrinsic").to_rvalue();
new_args.push(undefined);
let arg4_type = gcc_func.get_param_type(3);
let minus_one = builder.context.new_rvalue_from_int(arg4_type, -1);
new_args.push(minus_one);
new_args.push(last_arg);
args = new_args.into();
},
"__builtin_ia32_vfmaddsubps512_mask" | "__builtin_ia32_vfmaddsubpd512_mask" => {
let mut new_args = args.to_vec();
let last_arg = new_args.pop().expect("last arg");
let arg4_type = gcc_func.get_param_type(3);
let minus_one = builder.context.new_rvalue_from_int(arg4_type, -1);
new_args.push(minus_one);
new_args.push(last_arg);
args = new_args.into();
},
_ => (),
}
}
args
}
pub fn ignore_arg_cast(func_name: &str, index: usize, args_len: usize) -> bool {
// NOTE: these intrinsics have missing parameters before the last one, so ignore the
// last argument type check.
// FIXME(antoyo): find a way to refactor in order to avoid this hack.
match func_name {
"__builtin_ia32_maxps512_mask" | "__builtin_ia32_maxpd512_mask"
| "__builtin_ia32_minps512_mask" | "__builtin_ia32_minpd512_mask" | "__builtin_ia32_sqrtps512_mask"
| "__builtin_ia32_sqrtpd512_mask" | "__builtin_ia32_addps512_mask" | "__builtin_ia32_addpd512_mask"
| "__builtin_ia32_subps512_mask" | "__builtin_ia32_subpd512_mask"
| "__builtin_ia32_mulps512_mask" | "__builtin_ia32_mulpd512_mask"
| "__builtin_ia32_divps512_mask" | "__builtin_ia32_divpd512_mask"
| "__builtin_ia32_vfmaddsubps512_mask" | "__builtin_ia32_vfmaddsubpd512_mask" => {
if index == args_len - 1 {
return true;
}
},
"__builtin_ia32_vfmaddps512_mask" | "__builtin_ia32_vfmaddpd512_mask" => {
// Since there are two LLVM intrinsics that map to each of these GCC builtins and only
// one of them has a missing parameter before the last one, we check the number of
// arguments to distinguish those cases.
if args_len == 4 && index == args_len - 1 {
return true;
}
},
_ => (),
}
false
}
#[cfg(not(feature="master"))]
pub fn intrinsic<'gcc, 'tcx>(name: &str, cx: &CodegenCx<'gcc, 'tcx>) -> Function<'gcc> {
@ -21,6 +187,59 @@ pub fn intrinsic<'gcc, 'tcx>(name: &str, cx: &CodegenCx<'gcc, 'tcx>) -> Function
"llvm.x86.xgetbv" => "__builtin_ia32_xgetbv",
// NOTE: this doc specifies the equivalent GCC builtins: http://huonw.github.io/llvmint/llvmint/x86/index.html
"llvm.sqrt.v2f64" => "__builtin_ia32_sqrtpd",
"llvm.x86.avx512.pmul.dq.512" => "__builtin_ia32_pmuldq512_mask",
"llvm.x86.avx512.pmulu.dq.512" => "__builtin_ia32_pmuludq512_mask",
"llvm.x86.avx512.mask.pmaxs.q.256" => "__builtin_ia32_pmaxsq256_mask",
"llvm.x86.avx512.mask.pmaxs.q.128" => "__builtin_ia32_pmaxsq128_mask",
"llvm.x86.avx512.max.ps.512" => "__builtin_ia32_maxps512_mask",
"llvm.x86.avx512.max.pd.512" => "__builtin_ia32_maxpd512_mask",
"llvm.x86.avx512.mask.pmaxu.q.256" => "__builtin_ia32_pmaxuq256_mask",
"llvm.x86.avx512.mask.pmaxu.q.128" => "__builtin_ia32_pmaxuq128_mask",
"llvm.x86.avx512.mask.pmins.q.256" => "__builtin_ia32_pminsq256_mask",
"llvm.x86.avx512.mask.pmins.q.128" => "__builtin_ia32_pminsq128_mask",
"llvm.x86.avx512.min.ps.512" => "__builtin_ia32_minps512_mask",
"llvm.x86.avx512.min.pd.512" => "__builtin_ia32_minpd512_mask",
"llvm.x86.avx512.mask.pminu.q.256" => "__builtin_ia32_pminuq256_mask",
"llvm.x86.avx512.mask.pminu.q.128" => "__builtin_ia32_pminuq128_mask",
"llvm.fma.v16f32" => "__builtin_ia32_vfmaddps512_mask",
"llvm.fma.v8f64" => "__builtin_ia32_vfmaddpd512_mask",
"llvm.x86.avx512.vfmaddsub.ps.512" => "__builtin_ia32_vfmaddsubps512_mask",
"llvm.x86.avx512.vfmaddsub.pd.512" => "__builtin_ia32_vfmaddsubpd512_mask",
"llvm.x86.avx512.pternlog.d.512" => "__builtin_ia32_pternlogd512_mask",
"llvm.x86.avx512.pternlog.d.256" => "__builtin_ia32_pternlogd256_mask",
"llvm.x86.avx512.pternlog.d.128" => "__builtin_ia32_pternlogd128_mask",
"llvm.x86.avx512.pternlog.q.512" => "__builtin_ia32_pternlogq512_mask",
"llvm.x86.avx512.pternlog.q.256" => "__builtin_ia32_pternlogq256_mask",
"llvm.x86.avx512.pternlog.q.128" => "__builtin_ia32_pternlogq128_mask",
"llvm.x86.avx512.add.ps.512" => "__builtin_ia32_addps512_mask",
"llvm.x86.avx512.add.pd.512" => "__builtin_ia32_addpd512_mask",
"llvm.x86.avx512.sub.ps.512" => "__builtin_ia32_subps512_mask",
"llvm.x86.avx512.sub.pd.512" => "__builtin_ia32_subpd512_mask",
"llvm.x86.avx512.mul.ps.512" => "__builtin_ia32_mulps512_mask",
"llvm.x86.avx512.mul.pd.512" => "__builtin_ia32_mulpd512_mask",
"llvm.x86.avx512.div.ps.512" => "__builtin_ia32_divps512_mask",
"llvm.x86.avx512.div.pd.512" => "__builtin_ia32_divpd512_mask",
"llvm.x86.avx512.vfmadd.ps.512" => "__builtin_ia32_vfmaddps512_mask",
"llvm.x86.avx512.vfmadd.pd.512" => "__builtin_ia32_vfmaddpd512_mask",
// The above doc points to unknown builtins for the following, so override them:
"llvm.x86.avx2.gather.d.d" => "__builtin_ia32_gathersiv4si",
"llvm.x86.avx2.gather.d.d.256" => "__builtin_ia32_gathersiv8si",
"llvm.x86.avx2.gather.d.ps" => "__builtin_ia32_gathersiv4sf",
"llvm.x86.avx2.gather.d.ps.256" => "__builtin_ia32_gathersiv8sf",
"llvm.x86.avx2.gather.d.q" => "__builtin_ia32_gathersiv2di",
"llvm.x86.avx2.gather.d.q.256" => "__builtin_ia32_gathersiv4di",
"llvm.x86.avx2.gather.d.pd" => "__builtin_ia32_gathersiv2df",
"llvm.x86.avx2.gather.d.pd.256" => "__builtin_ia32_gathersiv4df",
"llvm.x86.avx2.gather.q.d" => "__builtin_ia32_gatherdiv4si",
"llvm.x86.avx2.gather.q.d.256" => "__builtin_ia32_gatherdiv4si256",
"llvm.x86.avx2.gather.q.ps" => "__builtin_ia32_gatherdiv4sf",
"llvm.x86.avx2.gather.q.ps.256" => "__builtin_ia32_gatherdiv4sf256",
"llvm.x86.avx2.gather.q.q" => "__builtin_ia32_gatherdiv2di",
"llvm.x86.avx2.gather.q.q.256" => "__builtin_ia32_gatherdiv4di",
"llvm.x86.avx2.gather.q.pd" => "__builtin_ia32_gatherdiv2df",
"llvm.x86.avx2.gather.q.pd.256" => "__builtin_ia32_gatherdiv4df",
"" => "",
// NOTE: this file is generated by https://github.com/GuillaumeGomez/llvmint/blob/master/generate_list.py
_ => include!("archs.rs"),
};

View file

@ -1,15 +1,17 @@
use std::cmp::Ordering;
use gccjit::{RValue, Type, ToRValue};
use gccjit::{BinaryOp, RValue, Type, ToRValue};
use rustc_codegen_ssa::base::compare_simd_types;
use rustc_codegen_ssa::common::{TypeKind, span_invalid_monomorphization_error};
use rustc_codegen_ssa::mir::operand::OperandRef;
use rustc_codegen_ssa::mir::place::PlaceRef;
use rustc_codegen_ssa::traits::{BaseTypeMethods, BuilderMethods};
use rustc_hir as hir;
use rustc_middle::span_bug;
use rustc_middle::ty::layout::HasTyCtxt;
use rustc_middle::ty::{self, Ty};
use rustc_span::{Span, Symbol, sym};
use rustc_target::abi::Align;
use crate::builder::Builder;
use crate::intrinsic;
@ -55,7 +57,53 @@ pub fn generic_simd_intrinsic<'a, 'gcc, 'tcx>(bx: &mut Builder<'a, 'gcc, 'tcx>,
let sig =
tcx.normalize_erasing_late_bound_regions(ty::ParamEnv::reveal_all(), callee_ty.fn_sig(tcx));
let arg_tys = sig.inputs();
let name_str = name.as_str();
if name == sym::simd_select_bitmask {
require_simd!(arg_tys[1], "argument");
let (len, _) = arg_tys[1].simd_size_and_type(bx.tcx());
let expected_int_bits = (len.max(8) - 1).next_power_of_two();
let expected_bytes = len / 8 + ((len % 8 > 0) as u64);
let mask_ty = arg_tys[0];
let mut mask = match mask_ty.kind() {
ty::Int(i) if i.bit_width() == Some(expected_int_bits) => args[0].immediate(),
ty::Uint(i) if i.bit_width() == Some(expected_int_bits) => args[0].immediate(),
ty::Array(elem, len)
if matches!(elem.kind(), ty::Uint(ty::UintTy::U8))
&& len.try_eval_usize(bx.tcx, ty::ParamEnv::reveal_all())
== Some(expected_bytes) =>
{
let place = PlaceRef::alloca(bx, args[0].layout);
args[0].val.store(bx, place);
let int_ty = bx.type_ix(expected_bytes * 8);
let ptr = bx.pointercast(place.llval, bx.cx.type_ptr_to(int_ty));
bx.load(int_ty, ptr, Align::ONE)
}
_ => return_error!(
"invalid bitmask `{}`, expected `u{}` or `[u8; {}]`",
mask_ty,
expected_int_bits,
expected_bytes
),
};
let arg1 = args[1].immediate();
let arg1_type = arg1.get_type();
let arg1_vector_type = arg1_type.unqualified().dyncast_vector().expect("vector type");
let arg1_element_type = arg1_vector_type.get_element_type();
let mut elements = vec![];
let one = bx.context.new_rvalue_one(mask.get_type());
for _ in 0..len {
let element = bx.context.new_cast(None, mask & one, arg1_element_type);
elements.push(element);
mask = mask >> one;
}
let vector_mask = bx.context.new_rvalue_from_vector(None, arg1_type, &elements);
return Ok(bx.vector_select(vector_mask, arg1, args[2].immediate()));
}
// every intrinsic below takes a SIMD vector as its first argument
require_simd!(arg_tys[0], "input");
@ -102,7 +150,7 @@ pub fn generic_simd_intrinsic<'a, 'gcc, 'tcx>(bx: &mut Builder<'a, 'gcc, 'tcx>,
));
}
if let Some(stripped) = name_str.strip_prefix("simd_shuffle") {
if let Some(stripped) = name.as_str().strip_prefix("simd_shuffle") {
let n: u64 =
if stripped.is_empty() {
// Make sure this is actually an array, since typeck only checks the length-suffixed
@ -172,6 +220,7 @@ pub fn generic_simd_intrinsic<'a, 'gcc, 'tcx>(bx: &mut Builder<'a, 'gcc, 'tcx>,
// NOTE: we cannot cast to an array and assign to its element here because the value might
// not be an l-value. So, call a builtin to set the element.
// TODO(antoyo): perhaps we could create a new vector or maybe there's a GIMPLE instruction for that?
// TODO(antoyo): don't use target specific builtins here.
let func_name =
match in_len {
2 => {
@ -202,14 +251,8 @@ pub fn generic_simd_intrinsic<'a, 'gcc, 'tcx>(bx: &mut Builder<'a, 'gcc, 'tcx>,
};
let builtin = bx.context.get_target_builtin_function(func_name);
let param1_type = builtin.get_param(0).to_rvalue().get_type();
let vector =
if vector.get_type() != param1_type {
// TODO(antoyo): perhaps use __builtin_convertvector for vector casting.
bx.context.new_bitcast(None, vector, param1_type)
}
else {
vector
};
// TODO(antoyo): perhaps use __builtin_convertvector for vector casting.
let vector = bx.cx.bitcast_if_needed(vector, param1_type);
let result = bx.context.new_call(None, builtin, &[vector, value, bx.context.new_cast(None, index, bx.int_type)]);
// TODO(antoyo): perhaps use __builtin_convertvector for vector casting.
return Ok(bx.context.new_bitcast(None, result, vector.get_type()));
@ -228,6 +271,24 @@ pub fn generic_simd_intrinsic<'a, 'gcc, 'tcx>(bx: &mut Builder<'a, 'gcc, 'tcx>,
return Ok(bx.context.new_vector_access(None, vector, args[1].immediate()).to_rvalue());
}
if name == sym::simd_select {
let m_elem_ty = in_elem;
let m_len = in_len;
require_simd!(arg_tys[1], "argument");
let (v_len, _) = arg_tys[1].simd_size_and_type(bx.tcx());
require!(
m_len == v_len,
"mismatched lengths: mask length `{}` != other vector length `{}`",
m_len,
v_len
);
match m_elem_ty.kind() {
ty::Int(_) => {}
_ => return_error!("mask element type is `{}`, expected `i_`", m_elem_ty),
}
return Ok(bx.vector_select(args[0].immediate(), args[1].immediate(), args[2].immediate()));
}
if name == sym::simd_cast {
require_simd!(ret_ty, "return");
let (out_len, out_elem) = ret_ty.simd_size_and_type(bx.tcx());
@ -336,6 +397,10 @@ pub fn generic_simd_intrinsic<'a, 'gcc, 'tcx>(bx: &mut Builder<'a, 'gcc, 'tcx>,
});
}
(Style::Int(_), Style::Float) => {
// TODO: add support for internal functions in libgccjit to get access to IFN_VEC_CONVERT which is
// doing like __builtin_convertvector?
// Or maybe provide convert_vector as an API since it might not easy to get the
// types of internal functions.
unimplemented!();
}
(Style::Float, Style::Int(_)) => {
@ -539,10 +604,150 @@ pub fn generic_simd_intrinsic<'a, 'gcc, 'tcx>(bx: &mut Builder<'a, 'gcc, 'tcx>,
let vec_ty = bx.cx.type_vector(elem_ty, in_len as u64);
let func = bx.context.get_target_builtin_function(builtin_name);
let param1_type = func.get_param(0).to_rvalue().get_type();
let param2_type = func.get_param(1).to_rvalue().get_type();
let lhs = bx.cx.bitcast_if_needed(lhs, param1_type);
let rhs = bx.cx.bitcast_if_needed(rhs, param2_type);
let result = bx.context.new_call(None, func, &[lhs, rhs]);
// TODO(antoyo): perhaps use __builtin_convertvector for vector casting.
return Ok(bx.context.new_bitcast(None, result, vec_ty));
}
macro_rules! arith_red {
($name:ident : $vec_op:expr, $float_reduce:ident, $ordered:expr, $op:ident,
$identity:expr) => {
if name == sym::$name {
require!(
ret_ty == in_elem,
"expected return type `{}` (element of input `{}`), found `{}`",
in_elem,
in_ty,
ret_ty
);
return match in_elem.kind() {
ty::Int(_) | ty::Uint(_) => {
let r = bx.vector_reduce_op(args[0].immediate(), $vec_op);
if $ordered {
// if overflow occurs, the result is the
// mathematical result modulo 2^n:
Ok(bx.$op(args[1].immediate(), r))
}
else {
Ok(bx.vector_reduce_op(args[0].immediate(), $vec_op))
}
}
ty::Float(_) => {
if $ordered {
// ordered arithmetic reductions take an accumulator
let acc = args[1].immediate();
Ok(bx.$float_reduce(acc, args[0].immediate()))
}
else {
Ok(bx.vector_reduce_op(args[0].immediate(), $vec_op))
}
}
_ => return_error!(
"unsupported {} from `{}` with element `{}` to `{}`",
sym::$name,
in_ty,
in_elem,
ret_ty
),
};
}
};
}
arith_red!(
simd_reduce_add_unordered: BinaryOp::Plus,
vector_reduce_fadd_fast,
false,
add,
0.0 // TODO: Use this argument.
);
arith_red!(
simd_reduce_mul_unordered: BinaryOp::Mult,
vector_reduce_fmul_fast,
false,
mul,
1.0
);
macro_rules! minmax_red {
($name:ident: $reduction:ident) => {
if name == sym::$name {
require!(
ret_ty == in_elem,
"expected return type `{}` (element of input `{}`), found `{}`",
in_elem,
in_ty,
ret_ty
);
return match in_elem.kind() {
ty::Int(_) | ty::Uint(_) | ty::Float(_) => Ok(bx.$reduction(args[0].immediate())),
_ => return_error!(
"unsupported {} from `{}` with element `{}` to `{}`",
sym::$name,
in_ty,
in_elem,
ret_ty
),
};
}
};
}
minmax_red!(simd_reduce_min: vector_reduce_min);
minmax_red!(simd_reduce_max: vector_reduce_max);
macro_rules! bitwise_red {
($name:ident : $op:expr, $boolean:expr) => {
if name == sym::$name {
let input = if !$boolean {
require!(
ret_ty == in_elem,
"expected return type `{}` (element of input `{}`), found `{}`",
in_elem,
in_ty,
ret_ty
);
args[0].immediate()
} else {
match in_elem.kind() {
ty::Int(_) | ty::Uint(_) => {}
_ => return_error!(
"unsupported {} from `{}` with element `{}` to `{}`",
sym::$name,
in_ty,
in_elem,
ret_ty
),
}
// boolean reductions operate on vectors of i1s:
let i1 = bx.type_i1();
let i1xn = bx.type_vector(i1, in_len as u64);
bx.trunc(args[0].immediate(), i1xn)
};
return match in_elem.kind() {
ty::Int(_) | ty::Uint(_) => {
let r = bx.vector_reduce_op(input, $op);
Ok(if !$boolean { r } else { bx.zext(r, bx.type_bool()) })
}
_ => return_error!(
"unsupported {} from `{}` with element `{}` to `{}`",
sym::$name,
in_ty,
in_elem,
ret_ty
),
};
}
};
}
bitwise_red!(simd_reduce_and: BinaryOp::BitwiseAnd, false);
bitwise_red!(simd_reduce_or: BinaryOp::BitwiseOr, false);
unimplemented!("simd {}", name);
}

View file

@ -247,6 +247,10 @@ impl<'gcc, 'tcx> CodegenCx<'gcc, 'tcx> {
self.context.new_array_type(None, ty, len)
}
pub fn type_bool(&self) -> Type<'gcc> {
self.context.new_type::<bool>()
}
}
pub fn struct_fields<'gcc, 'tcx>(cx: &CodegenCx<'gcc, 'tcx>, layout: TyAndLayout<'tcx>) -> (Vec<Type<'gcc>>, bool) {