diff --git a/Cargo.toml b/Cargo.toml index 211d19a8dc89..0e41bec8b762 100644 --- a/Cargo.toml +++ b/Cargo.toml @@ -22,10 +22,10 @@ default = ["master"] master = ["gccjit/master"] [dependencies] -gccjit = { git = "https://github.com/antoyo/gccjit.rs" } +#gccjit = { git = "https://github.com/antoyo/gccjit.rs" } # Local copy. -#gccjit = { path = "../gccjit.rs" } +gccjit = { path = "../gccjit.rs" } target-lexicon = "0.10.0" diff --git a/src/builder.rs b/src/builder.rs index fa490fe3f222..e7adf29fed8e 100644 --- a/src/builder.rs +++ b/src/builder.rs @@ -1409,7 +1409,7 @@ impl<'a, 'gcc, 'tcx> Builder<'a, 'gcc, 'tcx> { } #[cfg(not(feature="master"))] - pub fn vector_reduce(&mut self, src: RValue<'gcc>, op: F) -> RValue<'gcc> + pub fn vector_reduce(&mut self, _src: RValue<'gcc>, _op: F) -> RValue<'gcc> where F: Fn(RValue<'gcc>, RValue<'gcc>, &'gcc Context<'gcc>) -> RValue<'gcc> { unimplemented!(); diff --git a/src/intrinsic/llvm.rs b/src/intrinsic/llvm.rs index 1b089f08f764..6b78157410b0 100644 --- a/src/intrinsic/llvm.rs +++ b/src/intrinsic/llvm.rs @@ -75,38 +75,38 @@ pub fn adjust_intrinsic_arguments<'a, 'b, 'gcc, 'tcx>(builder: &Builder<'a, 'gcc "__builtin_ia32_pternlogd512_mask" | "__builtin_ia32_pternlogd256_mask" | "__builtin_ia32_pternlogd128_mask" | "__builtin_ia32_pternlogq512_mask" | "__builtin_ia32_pternlogq256_mask" | "__builtin_ia32_pternlogq128_mask" => { - let mut new_args = args.to_vec(); + let mut new_args = args.to_vec(); + let arg5_type = gcc_func.get_param_type(4); + let minus_one = builder.context.new_rvalue_from_int(arg5_type, -1); + new_args.push(minus_one); + args = new_args.into(); + }, + "__builtin_ia32_vfmaddps512_mask" | "__builtin_ia32_vfmaddpd512_mask" => { + let mut new_args = args.to_vec(); + + let mut last_arg = None; + if args.len() == 4 { + last_arg = new_args.pop(); + } + + let arg4_type = gcc_func.get_param_type(3); + let minus_one = builder.context.new_rvalue_from_int(arg4_type, -1); + new_args.push(minus_one); + + if args.len() == 3 { + // Both llvm.fma.v16f32 and llvm.x86.avx512.vfmadd.ps.512 maps to + // the same GCC intrinsic, but the former has 3 parameters and the + // latter has 4 so it doesn't require this additional argument. let arg5_type = gcc_func.get_param_type(4); - let minus_one = builder.context.new_rvalue_from_int(arg5_type, -1); - new_args.push(minus_one); - args = new_args.into(); - }, - "__builtin_ia32_vfmaddps512_mask" | "__builtin_ia32_vfmaddpd512_mask" => { - let mut new_args = args.to_vec(); + new_args.push(builder.context.new_rvalue_from_int(arg5_type, 4)); + } - let mut last_arg = None; - if args.len() == 4 { - last_arg = new_args.pop(); - } + if let Some(last_arg) = last_arg { + new_args.push(last_arg); + } - let arg4_type = gcc_func.get_param_type(3); - let minus_one = builder.context.new_rvalue_from_int(arg4_type, -1); - new_args.push(minus_one); - - if args.len() == 3 { - // Both llvm.fma.v16f32 and llvm.x86.avx512.vfmadd.ps.512 maps to - // the same GCC intrinsic, but the former has 3 parameters and the - // latter has 4 so it doesn't require this additional argument. - let arg5_type = gcc_func.get_param_type(4); - new_args.push(builder.context.new_rvalue_from_int(arg5_type, 4)); - } - - if let Some(last_arg) = last_arg { - new_args.push(last_arg); - } - - args = new_args.into(); - }, + args = new_args.into(); + }, "__builtin_ia32_addps512_mask" | "__builtin_ia32_addpd512_mask" | "__builtin_ia32_subps512_mask" | "__builtin_ia32_subpd512_mask" | "__builtin_ia32_mulps512_mask" | "__builtin_ia32_mulpd512_mask" @@ -131,6 +131,18 @@ pub fn adjust_intrinsic_arguments<'a, 'b, 'gcc, 'tcx>(builder: &Builder<'a, 'gcc new_args.push(last_arg); args = new_args.into(); }, + "__builtin_ia32_cvtdq2ps512_mask" | "__builtin_ia32_cvtudq2ps512_mask" => { + let mut new_args = args.to_vec(); + let last_arg = new_args.pop().expect("last arg"); + let arg2_type = gcc_func.get_param_type(1); + let undefined = builder.current_func().new_local(None, arg2_type, "undefined_for_intrinsic").to_rvalue(); + new_args.push(undefined); + let arg3_type = gcc_func.get_param_type(2); + let minus_one = builder.context.new_rvalue_from_int(arg3_type, -1); + new_args.push(minus_one); + new_args.push(last_arg); + args = new_args.into(); + }, _ => (), } } @@ -149,7 +161,8 @@ pub fn ignore_arg_cast(func_name: &str, index: usize, args_len: usize) -> bool { | "__builtin_ia32_subps512_mask" | "__builtin_ia32_subpd512_mask" | "__builtin_ia32_mulps512_mask" | "__builtin_ia32_mulpd512_mask" | "__builtin_ia32_divps512_mask" | "__builtin_ia32_divpd512_mask" - | "__builtin_ia32_vfmaddsubps512_mask" | "__builtin_ia32_vfmaddsubpd512_mask" => { + | "__builtin_ia32_vfmaddsubps512_mask" | "__builtin_ia32_vfmaddsubpd512_mask" + | "__builtin_ia32_cvtdq2ps512_mask" | "__builtin_ia32_cvtudq2ps512_mask" => { if index == args_len - 1 { return true; } @@ -221,6 +234,48 @@ pub fn intrinsic<'gcc, 'tcx>(name: &str, cx: &CodegenCx<'gcc, 'tcx>) -> Function "llvm.x86.avx512.div.pd.512" => "__builtin_ia32_divpd512_mask", "llvm.x86.avx512.vfmadd.ps.512" => "__builtin_ia32_vfmaddps512_mask", "llvm.x86.avx512.vfmadd.pd.512" => "__builtin_ia32_vfmaddpd512_mask", + "llvm.x86.avx512.sitofp.round.v16f32.v16i32" => "__builtin_ia32_cvtdq2ps512_mask", + "llvm.x86.avx512.uitofp.round.v16f32.v16i32" => "__builtin_ia32_cvtudq2ps512_mask", + "llvm.x86.avx512.mask.cvttps2dq.256" => "__builtin_ia32_cvttps2dq256_mask", + "llvm.x86.avx512.mask.cvttps2dq.128" => "__builtin_ia32_cvttps2dq128_mask", + "llvm.x86.avx512.mask.cvttpd2dq.256" => "__builtin_ia32_cvttpd2dq256_mask", + "llvm.x86.avx512.mask.compress.d.512" => "__builtin_ia32_compresssi512_mask", + "llvm.x86.avx512.mask.compress.d.256" => "__builtin_ia32_compresssi256_mask", + "llvm.x86.avx512.mask.compress.d.128" => "__builtin_ia32_compresssi128_mask", + "llvm.x86.avx512.mask.compress.q.512" => "__builtin_ia32_compressdi512_mask", + "llvm.x86.avx512.mask.compress.q.256" => "__builtin_ia32_compressdi256_mask", + "llvm.x86.avx512.mask.compress.q.128" => "__builtin_ia32_compressdi128_mask", + "llvm.x86.avx512.mask.compress.ps.512" => "__builtin_ia32_compresssf512_mask", + "llvm.x86.avx512.mask.compress.ps.256" => "__builtin_ia32_compresssf256_mask", + "llvm.x86.avx512.mask.compress.ps.128" => "__builtin_ia32_compresssf128_mask", + "llvm.x86.avx512.mask.compress.pd.512" => "__builtin_ia32_compressdf512_mask", + "llvm.x86.avx512.mask.compress.pd.256" => "__builtin_ia32_compressdf256_mask", + "llvm.x86.avx512.mask.compress.pd.128" => "__builtin_ia32_compressdf128_mask", + "llvm.x86.avx512.mask.compress.store.d.512" => "", + "llvm.x86.avx512.mask.compress.store.d.256" => "", + "llvm.x86.avx512.mask.compress.store.d.128" => "", + "llvm.x86.avx512.mask.compress.store.q.512" => "", + "llvm.x86.avx512.mask.compress.store.q.256" => "", + "llvm.x86.avx512.mask.compress.store.q.128" => "", + "llvm.x86.avx512.mask.compress.store.ps.512" => "", + "llvm.x86.avx512.mask.compress.store.ps.256" => "", + "llvm.x86.avx512.mask.compress.store.ps.128" => "", + "llvm.x86.avx512.mask.compress.store.pd.512" => "", + "llvm.x86.avx512.mask.compress.store.pd.256" => "", + "llvm.x86.avx512.mask.compress.store.pd.128" => "", + "llvm.x86.avx512.mask.expand.d.512" => "", + "llvm.x86.avx512.mask.expand.d.256" => "", + "llvm.x86.avx512.mask.expand.d.128" => "", + "llvm.x86.avx512.mask.expand.q.512" => "", + "" => "", + "" => "", + "" => "", + "" => "", + "" => "", + "" => "", + "" => "", + "" => "", + "" => "", // The above doc points to unknown builtins for the following, so override them: "llvm.x86.avx2.gather.d.d" => "__builtin_ia32_gathersiv4si", diff --git a/src/intrinsic/simd.rs b/src/intrinsic/simd.rs index 870e9f776a4f..a6cf99c62fff 100644 --- a/src/intrinsic/simd.rs +++ b/src/intrinsic/simd.rs @@ -1,5 +1,3 @@ -use std::cmp::Ordering; - use gccjit::{BinaryOp, RValue, Type, ToRValue}; use rustc_codegen_ssa::base::compare_simd_types; use rustc_codegen_ssa::common::{TypeKind, span_invalid_monomorphization_error}; @@ -309,117 +307,37 @@ pub fn generic_simd_intrinsic<'a, 'gcc, 'tcx>(bx: &mut Builder<'a, 'gcc, 'tcx>, enum Style { Float, - Int(/* is signed? */ bool), + Int, Unsupported, } - let (in_style, in_width) = match in_elem.kind() { - // vectors of pointer-sized integers should've been - // disallowed before here, so this unwrap is safe. - ty::Int(i) => ( - Style::Int(true), - i.normalize(bx.tcx().sess.target.pointer_width).bit_width().unwrap(), - ), - ty::Uint(u) => ( - Style::Int(false), - u.normalize(bx.tcx().sess.target.pointer_width).bit_width().unwrap(), - ), - ty::Float(f) => (Style::Float, f.bit_width()), - _ => (Style::Unsupported, 0), - }; - let (out_style, out_width) = match out_elem.kind() { - ty::Int(i) => ( - Style::Int(true), - i.normalize(bx.tcx().sess.target.pointer_width).bit_width().unwrap(), - ), - ty::Uint(u) => ( - Style::Int(false), - u.normalize(bx.tcx().sess.target.pointer_width).bit_width().unwrap(), - ), - ty::Float(f) => (Style::Float, f.bit_width()), - _ => (Style::Unsupported, 0), - }; - - let extend = |in_type, out_type| { - let vector_type = bx.context.new_vector_type(out_type, 8); - let vector = args[0].immediate(); - let array_type = bx.context.new_array_type(None, in_type, 8); - // TODO(antoyo): switch to using new_vector_access or __builtin_convertvector for vector casting. - let array = bx.context.new_bitcast(None, vector, array_type); - - let cast_vec_element = |index| { - let index = bx.context.new_rvalue_from_int(bx.int_type, index); - bx.context.new_cast(None, bx.context.new_array_access(None, array, index).to_rvalue(), out_type) + let in_style = + match in_elem.kind() { + ty::Int(_) | ty::Uint(_) => Style::Int, + ty::Float(_) => Style::Float, + _ => Style::Unsupported, }; - bx.context.new_rvalue_from_vector(None, vector_type, &[ - cast_vec_element(0), - cast_vec_element(1), - cast_vec_element(2), - cast_vec_element(3), - cast_vec_element(4), - cast_vec_element(5), - cast_vec_element(6), - cast_vec_element(7), - ]) - }; + let out_style = + match out_elem.kind() { + ty::Int(_) | ty::Uint(_) => Style::Int, + ty::Float(_) => Style::Float, + _ => Style::Unsupported, + }; match (in_style, out_style) { - (Style::Int(in_is_signed), Style::Int(_)) => { - return Ok(match in_width.cmp(&out_width) { - Ordering::Greater => bx.trunc(args[0].immediate(), llret_ty), - Ordering::Equal => args[0].immediate(), - Ordering::Less => { - if in_is_signed { - match (in_width, out_width) { - // FIXME(antoyo): the function _mm_cvtepi8_epi16 should directly - // call an intrinsic equivalent to __builtin_ia32_pmovsxbw128 so that - // we can generate a call to it. - (8, 16) => extend(bx.i8_type, bx.i16_type), - (8, 32) => extend(bx.i8_type, bx.i32_type), - (8, 64) => extend(bx.i8_type, bx.i64_type), - (16, 32) => extend(bx.i16_type, bx.i32_type), - (32, 64) => extend(bx.i32_type, bx.i64_type), - (16, 64) => extend(bx.i16_type, bx.i64_type), - _ => unimplemented!("in: {}, out: {}", in_width, out_width), - } - } else { - match (in_width, out_width) { - (8, 16) => extend(bx.u8_type, bx.u16_type), - (8, 32) => extend(bx.u8_type, bx.u32_type), - (8, 64) => extend(bx.u8_type, bx.u64_type), - (16, 32) => extend(bx.u16_type, bx.u32_type), - (16, 64) => extend(bx.u16_type, bx.u64_type), - (32, 64) => extend(bx.u32_type, bx.u64_type), - _ => unimplemented!("in: {}, out: {}", in_width, out_width), - } - } - } - }); - } - (Style::Int(_), Style::Float) => { - // TODO: add support for internal functions in libgccjit to get access to IFN_VEC_CONVERT which is - // doing like __builtin_convertvector? - // Or maybe provide convert_vector as an API since it might not easy to get the - // types of internal functions. - unimplemented!(); - } - (Style::Float, Style::Int(_)) => { - unimplemented!(); - } - (Style::Float, Style::Float) => { - unimplemented!(); - } - _ => { /* Unsupported. Fallthrough. */ } + (Style::Unsupported, Style::Unsupported) => { + require!( + false, + "unsupported cast from `{}` with element `{}` to `{}` with element `{}`", + in_ty, + in_elem, + ret_ty, + out_elem + ); + }, + _ => return Ok(bx.context.convert_vector(None, args[0].immediate(), llret_ty)), } - require!( - false, - "unsupported cast from `{}` with element `{}` to `{}` with element `{}`", - in_ty, - in_elem, - ret_ty, - out_elem - ); } macro_rules! arith_binary { @@ -590,6 +508,8 @@ pub fn generic_simd_intrinsic<'a, 'gcc, 'tcx>(bx: &mut Builder<'a, 'gcc, 'tcx>, ); } }; + // TODO(antoyo): don't use target specific builtins here. + // Not sure how easy it would be to avoid theme here. let builtin_name = match (signed, is_add, in_len, elem_width) { (true, true, 32, 8) => "__builtin_ia32_paddsb256", // TODO(antoyo): cast arguments to unsigned.