Implement more SIMD
This commit is contained in:
parent
e8dca3e87d
commit
3b35940443
4 changed files with 113 additions and 138 deletions
|
|
@ -22,10 +22,10 @@ default = ["master"]
|
|||
master = ["gccjit/master"]
|
||||
|
||||
[dependencies]
|
||||
gccjit = { git = "https://github.com/antoyo/gccjit.rs" }
|
||||
#gccjit = { git = "https://github.com/antoyo/gccjit.rs" }
|
||||
|
||||
# Local copy.
|
||||
#gccjit = { path = "../gccjit.rs" }
|
||||
gccjit = { path = "../gccjit.rs" }
|
||||
|
||||
target-lexicon = "0.10.0"
|
||||
|
||||
|
|
|
|||
|
|
@ -1409,7 +1409,7 @@ impl<'a, 'gcc, 'tcx> Builder<'a, 'gcc, 'tcx> {
|
|||
}
|
||||
|
||||
#[cfg(not(feature="master"))]
|
||||
pub fn vector_reduce<F>(&mut self, src: RValue<'gcc>, op: F) -> RValue<'gcc>
|
||||
pub fn vector_reduce<F>(&mut self, _src: RValue<'gcc>, _op: F) -> RValue<'gcc>
|
||||
where F: Fn(RValue<'gcc>, RValue<'gcc>, &'gcc Context<'gcc>) -> RValue<'gcc>
|
||||
{
|
||||
unimplemented!();
|
||||
|
|
|
|||
|
|
@ -75,38 +75,38 @@ pub fn adjust_intrinsic_arguments<'a, 'b, 'gcc, 'tcx>(builder: &Builder<'a, 'gcc
|
|||
"__builtin_ia32_pternlogd512_mask" | "__builtin_ia32_pternlogd256_mask"
|
||||
| "__builtin_ia32_pternlogd128_mask" | "__builtin_ia32_pternlogq512_mask"
|
||||
| "__builtin_ia32_pternlogq256_mask" | "__builtin_ia32_pternlogq128_mask" => {
|
||||
let mut new_args = args.to_vec();
|
||||
let mut new_args = args.to_vec();
|
||||
let arg5_type = gcc_func.get_param_type(4);
|
||||
let minus_one = builder.context.new_rvalue_from_int(arg5_type, -1);
|
||||
new_args.push(minus_one);
|
||||
args = new_args.into();
|
||||
},
|
||||
"__builtin_ia32_vfmaddps512_mask" | "__builtin_ia32_vfmaddpd512_mask" => {
|
||||
let mut new_args = args.to_vec();
|
||||
|
||||
let mut last_arg = None;
|
||||
if args.len() == 4 {
|
||||
last_arg = new_args.pop();
|
||||
}
|
||||
|
||||
let arg4_type = gcc_func.get_param_type(3);
|
||||
let minus_one = builder.context.new_rvalue_from_int(arg4_type, -1);
|
||||
new_args.push(minus_one);
|
||||
|
||||
if args.len() == 3 {
|
||||
// Both llvm.fma.v16f32 and llvm.x86.avx512.vfmadd.ps.512 maps to
|
||||
// the same GCC intrinsic, but the former has 3 parameters and the
|
||||
// latter has 4 so it doesn't require this additional argument.
|
||||
let arg5_type = gcc_func.get_param_type(4);
|
||||
let minus_one = builder.context.new_rvalue_from_int(arg5_type, -1);
|
||||
new_args.push(minus_one);
|
||||
args = new_args.into();
|
||||
},
|
||||
"__builtin_ia32_vfmaddps512_mask" | "__builtin_ia32_vfmaddpd512_mask" => {
|
||||
let mut new_args = args.to_vec();
|
||||
new_args.push(builder.context.new_rvalue_from_int(arg5_type, 4));
|
||||
}
|
||||
|
||||
let mut last_arg = None;
|
||||
if args.len() == 4 {
|
||||
last_arg = new_args.pop();
|
||||
}
|
||||
if let Some(last_arg) = last_arg {
|
||||
new_args.push(last_arg);
|
||||
}
|
||||
|
||||
let arg4_type = gcc_func.get_param_type(3);
|
||||
let minus_one = builder.context.new_rvalue_from_int(arg4_type, -1);
|
||||
new_args.push(minus_one);
|
||||
|
||||
if args.len() == 3 {
|
||||
// Both llvm.fma.v16f32 and llvm.x86.avx512.vfmadd.ps.512 maps to
|
||||
// the same GCC intrinsic, but the former has 3 parameters and the
|
||||
// latter has 4 so it doesn't require this additional argument.
|
||||
let arg5_type = gcc_func.get_param_type(4);
|
||||
new_args.push(builder.context.new_rvalue_from_int(arg5_type, 4));
|
||||
}
|
||||
|
||||
if let Some(last_arg) = last_arg {
|
||||
new_args.push(last_arg);
|
||||
}
|
||||
|
||||
args = new_args.into();
|
||||
},
|
||||
args = new_args.into();
|
||||
},
|
||||
"__builtin_ia32_addps512_mask" | "__builtin_ia32_addpd512_mask"
|
||||
| "__builtin_ia32_subps512_mask" | "__builtin_ia32_subpd512_mask"
|
||||
| "__builtin_ia32_mulps512_mask" | "__builtin_ia32_mulpd512_mask"
|
||||
|
|
@ -131,6 +131,18 @@ pub fn adjust_intrinsic_arguments<'a, 'b, 'gcc, 'tcx>(builder: &Builder<'a, 'gcc
|
|||
new_args.push(last_arg);
|
||||
args = new_args.into();
|
||||
},
|
||||
"__builtin_ia32_cvtdq2ps512_mask" | "__builtin_ia32_cvtudq2ps512_mask" => {
|
||||
let mut new_args = args.to_vec();
|
||||
let last_arg = new_args.pop().expect("last arg");
|
||||
let arg2_type = gcc_func.get_param_type(1);
|
||||
let undefined = builder.current_func().new_local(None, arg2_type, "undefined_for_intrinsic").to_rvalue();
|
||||
new_args.push(undefined);
|
||||
let arg3_type = gcc_func.get_param_type(2);
|
||||
let minus_one = builder.context.new_rvalue_from_int(arg3_type, -1);
|
||||
new_args.push(minus_one);
|
||||
new_args.push(last_arg);
|
||||
args = new_args.into();
|
||||
},
|
||||
_ => (),
|
||||
}
|
||||
}
|
||||
|
|
@ -149,7 +161,8 @@ pub fn ignore_arg_cast(func_name: &str, index: usize, args_len: usize) -> bool {
|
|||
| "__builtin_ia32_subps512_mask" | "__builtin_ia32_subpd512_mask"
|
||||
| "__builtin_ia32_mulps512_mask" | "__builtin_ia32_mulpd512_mask"
|
||||
| "__builtin_ia32_divps512_mask" | "__builtin_ia32_divpd512_mask"
|
||||
| "__builtin_ia32_vfmaddsubps512_mask" | "__builtin_ia32_vfmaddsubpd512_mask" => {
|
||||
| "__builtin_ia32_vfmaddsubps512_mask" | "__builtin_ia32_vfmaddsubpd512_mask"
|
||||
| "__builtin_ia32_cvtdq2ps512_mask" | "__builtin_ia32_cvtudq2ps512_mask" => {
|
||||
if index == args_len - 1 {
|
||||
return true;
|
||||
}
|
||||
|
|
@ -221,6 +234,48 @@ pub fn intrinsic<'gcc, 'tcx>(name: &str, cx: &CodegenCx<'gcc, 'tcx>) -> Function
|
|||
"llvm.x86.avx512.div.pd.512" => "__builtin_ia32_divpd512_mask",
|
||||
"llvm.x86.avx512.vfmadd.ps.512" => "__builtin_ia32_vfmaddps512_mask",
|
||||
"llvm.x86.avx512.vfmadd.pd.512" => "__builtin_ia32_vfmaddpd512_mask",
|
||||
"llvm.x86.avx512.sitofp.round.v16f32.v16i32" => "__builtin_ia32_cvtdq2ps512_mask",
|
||||
"llvm.x86.avx512.uitofp.round.v16f32.v16i32" => "__builtin_ia32_cvtudq2ps512_mask",
|
||||
"llvm.x86.avx512.mask.cvttps2dq.256" => "__builtin_ia32_cvttps2dq256_mask",
|
||||
"llvm.x86.avx512.mask.cvttps2dq.128" => "__builtin_ia32_cvttps2dq128_mask",
|
||||
"llvm.x86.avx512.mask.cvttpd2dq.256" => "__builtin_ia32_cvttpd2dq256_mask",
|
||||
"llvm.x86.avx512.mask.compress.d.512" => "__builtin_ia32_compresssi512_mask",
|
||||
"llvm.x86.avx512.mask.compress.d.256" => "__builtin_ia32_compresssi256_mask",
|
||||
"llvm.x86.avx512.mask.compress.d.128" => "__builtin_ia32_compresssi128_mask",
|
||||
"llvm.x86.avx512.mask.compress.q.512" => "__builtin_ia32_compressdi512_mask",
|
||||
"llvm.x86.avx512.mask.compress.q.256" => "__builtin_ia32_compressdi256_mask",
|
||||
"llvm.x86.avx512.mask.compress.q.128" => "__builtin_ia32_compressdi128_mask",
|
||||
"llvm.x86.avx512.mask.compress.ps.512" => "__builtin_ia32_compresssf512_mask",
|
||||
"llvm.x86.avx512.mask.compress.ps.256" => "__builtin_ia32_compresssf256_mask",
|
||||
"llvm.x86.avx512.mask.compress.ps.128" => "__builtin_ia32_compresssf128_mask",
|
||||
"llvm.x86.avx512.mask.compress.pd.512" => "__builtin_ia32_compressdf512_mask",
|
||||
"llvm.x86.avx512.mask.compress.pd.256" => "__builtin_ia32_compressdf256_mask",
|
||||
"llvm.x86.avx512.mask.compress.pd.128" => "__builtin_ia32_compressdf128_mask",
|
||||
"llvm.x86.avx512.mask.compress.store.d.512" => "",
|
||||
"llvm.x86.avx512.mask.compress.store.d.256" => "",
|
||||
"llvm.x86.avx512.mask.compress.store.d.128" => "",
|
||||
"llvm.x86.avx512.mask.compress.store.q.512" => "",
|
||||
"llvm.x86.avx512.mask.compress.store.q.256" => "",
|
||||
"llvm.x86.avx512.mask.compress.store.q.128" => "",
|
||||
"llvm.x86.avx512.mask.compress.store.ps.512" => "",
|
||||
"llvm.x86.avx512.mask.compress.store.ps.256" => "",
|
||||
"llvm.x86.avx512.mask.compress.store.ps.128" => "",
|
||||
"llvm.x86.avx512.mask.compress.store.pd.512" => "",
|
||||
"llvm.x86.avx512.mask.compress.store.pd.256" => "",
|
||||
"llvm.x86.avx512.mask.compress.store.pd.128" => "",
|
||||
"llvm.x86.avx512.mask.expand.d.512" => "",
|
||||
"llvm.x86.avx512.mask.expand.d.256" => "",
|
||||
"llvm.x86.avx512.mask.expand.d.128" => "",
|
||||
"llvm.x86.avx512.mask.expand.q.512" => "",
|
||||
"" => "",
|
||||
"" => "",
|
||||
"" => "",
|
||||
"" => "",
|
||||
"" => "",
|
||||
"" => "",
|
||||
"" => "",
|
||||
"" => "",
|
||||
"" => "",
|
||||
|
||||
// The above doc points to unknown builtins for the following, so override them:
|
||||
"llvm.x86.avx2.gather.d.d" => "__builtin_ia32_gathersiv4si",
|
||||
|
|
|
|||
|
|
@ -1,5 +1,3 @@
|
|||
use std::cmp::Ordering;
|
||||
|
||||
use gccjit::{BinaryOp, RValue, Type, ToRValue};
|
||||
use rustc_codegen_ssa::base::compare_simd_types;
|
||||
use rustc_codegen_ssa::common::{TypeKind, span_invalid_monomorphization_error};
|
||||
|
|
@ -309,117 +307,37 @@ pub fn generic_simd_intrinsic<'a, 'gcc, 'tcx>(bx: &mut Builder<'a, 'gcc, 'tcx>,
|
|||
|
||||
enum Style {
|
||||
Float,
|
||||
Int(/* is signed? */ bool),
|
||||
Int,
|
||||
Unsupported,
|
||||
}
|
||||
|
||||
let (in_style, in_width) = match in_elem.kind() {
|
||||
// vectors of pointer-sized integers should've been
|
||||
// disallowed before here, so this unwrap is safe.
|
||||
ty::Int(i) => (
|
||||
Style::Int(true),
|
||||
i.normalize(bx.tcx().sess.target.pointer_width).bit_width().unwrap(),
|
||||
),
|
||||
ty::Uint(u) => (
|
||||
Style::Int(false),
|
||||
u.normalize(bx.tcx().sess.target.pointer_width).bit_width().unwrap(),
|
||||
),
|
||||
ty::Float(f) => (Style::Float, f.bit_width()),
|
||||
_ => (Style::Unsupported, 0),
|
||||
};
|
||||
let (out_style, out_width) = match out_elem.kind() {
|
||||
ty::Int(i) => (
|
||||
Style::Int(true),
|
||||
i.normalize(bx.tcx().sess.target.pointer_width).bit_width().unwrap(),
|
||||
),
|
||||
ty::Uint(u) => (
|
||||
Style::Int(false),
|
||||
u.normalize(bx.tcx().sess.target.pointer_width).bit_width().unwrap(),
|
||||
),
|
||||
ty::Float(f) => (Style::Float, f.bit_width()),
|
||||
_ => (Style::Unsupported, 0),
|
||||
};
|
||||
|
||||
let extend = |in_type, out_type| {
|
||||
let vector_type = bx.context.new_vector_type(out_type, 8);
|
||||
let vector = args[0].immediate();
|
||||
let array_type = bx.context.new_array_type(None, in_type, 8);
|
||||
// TODO(antoyo): switch to using new_vector_access or __builtin_convertvector for vector casting.
|
||||
let array = bx.context.new_bitcast(None, vector, array_type);
|
||||
|
||||
let cast_vec_element = |index| {
|
||||
let index = bx.context.new_rvalue_from_int(bx.int_type, index);
|
||||
bx.context.new_cast(None, bx.context.new_array_access(None, array, index).to_rvalue(), out_type)
|
||||
let in_style =
|
||||
match in_elem.kind() {
|
||||
ty::Int(_) | ty::Uint(_) => Style::Int,
|
||||
ty::Float(_) => Style::Float,
|
||||
_ => Style::Unsupported,
|
||||
};
|
||||
|
||||
bx.context.new_rvalue_from_vector(None, vector_type, &[
|
||||
cast_vec_element(0),
|
||||
cast_vec_element(1),
|
||||
cast_vec_element(2),
|
||||
cast_vec_element(3),
|
||||
cast_vec_element(4),
|
||||
cast_vec_element(5),
|
||||
cast_vec_element(6),
|
||||
cast_vec_element(7),
|
||||
])
|
||||
};
|
||||
let out_style =
|
||||
match out_elem.kind() {
|
||||
ty::Int(_) | ty::Uint(_) => Style::Int,
|
||||
ty::Float(_) => Style::Float,
|
||||
_ => Style::Unsupported,
|
||||
};
|
||||
|
||||
match (in_style, out_style) {
|
||||
(Style::Int(in_is_signed), Style::Int(_)) => {
|
||||
return Ok(match in_width.cmp(&out_width) {
|
||||
Ordering::Greater => bx.trunc(args[0].immediate(), llret_ty),
|
||||
Ordering::Equal => args[0].immediate(),
|
||||
Ordering::Less => {
|
||||
if in_is_signed {
|
||||
match (in_width, out_width) {
|
||||
// FIXME(antoyo): the function _mm_cvtepi8_epi16 should directly
|
||||
// call an intrinsic equivalent to __builtin_ia32_pmovsxbw128 so that
|
||||
// we can generate a call to it.
|
||||
(8, 16) => extend(bx.i8_type, bx.i16_type),
|
||||
(8, 32) => extend(bx.i8_type, bx.i32_type),
|
||||
(8, 64) => extend(bx.i8_type, bx.i64_type),
|
||||
(16, 32) => extend(bx.i16_type, bx.i32_type),
|
||||
(32, 64) => extend(bx.i32_type, bx.i64_type),
|
||||
(16, 64) => extend(bx.i16_type, bx.i64_type),
|
||||
_ => unimplemented!("in: {}, out: {}", in_width, out_width),
|
||||
}
|
||||
} else {
|
||||
match (in_width, out_width) {
|
||||
(8, 16) => extend(bx.u8_type, bx.u16_type),
|
||||
(8, 32) => extend(bx.u8_type, bx.u32_type),
|
||||
(8, 64) => extend(bx.u8_type, bx.u64_type),
|
||||
(16, 32) => extend(bx.u16_type, bx.u32_type),
|
||||
(16, 64) => extend(bx.u16_type, bx.u64_type),
|
||||
(32, 64) => extend(bx.u32_type, bx.u64_type),
|
||||
_ => unimplemented!("in: {}, out: {}", in_width, out_width),
|
||||
}
|
||||
}
|
||||
}
|
||||
});
|
||||
}
|
||||
(Style::Int(_), Style::Float) => {
|
||||
// TODO: add support for internal functions in libgccjit to get access to IFN_VEC_CONVERT which is
|
||||
// doing like __builtin_convertvector?
|
||||
// Or maybe provide convert_vector as an API since it might not easy to get the
|
||||
// types of internal functions.
|
||||
unimplemented!();
|
||||
}
|
||||
(Style::Float, Style::Int(_)) => {
|
||||
unimplemented!();
|
||||
}
|
||||
(Style::Float, Style::Float) => {
|
||||
unimplemented!();
|
||||
}
|
||||
_ => { /* Unsupported. Fallthrough. */ }
|
||||
(Style::Unsupported, Style::Unsupported) => {
|
||||
require!(
|
||||
false,
|
||||
"unsupported cast from `{}` with element `{}` to `{}` with element `{}`",
|
||||
in_ty,
|
||||
in_elem,
|
||||
ret_ty,
|
||||
out_elem
|
||||
);
|
||||
},
|
||||
_ => return Ok(bx.context.convert_vector(None, args[0].immediate(), llret_ty)),
|
||||
}
|
||||
require!(
|
||||
false,
|
||||
"unsupported cast from `{}` with element `{}` to `{}` with element `{}`",
|
||||
in_ty,
|
||||
in_elem,
|
||||
ret_ty,
|
||||
out_elem
|
||||
);
|
||||
}
|
||||
|
||||
macro_rules! arith_binary {
|
||||
|
|
@ -590,6 +508,8 @@ pub fn generic_simd_intrinsic<'a, 'gcc, 'tcx>(bx: &mut Builder<'a, 'gcc, 'tcx>,
|
|||
);
|
||||
}
|
||||
};
|
||||
// TODO(antoyo): don't use target specific builtins here.
|
||||
// Not sure how easy it would be to avoid theme here.
|
||||
let builtin_name =
|
||||
match (signed, is_add, in_len, elem_width) {
|
||||
(true, true, 32, 8) => "__builtin_ia32_paddsb256", // TODO(antoyo): cast arguments to unsigned.
|
||||
|
|
|
|||
Loading…
Add table
Add a link
Reference in a new issue