From 4636c59df5a7be4e47758588ad188bcb1f666f7c Mon Sep 17 00:00:00 2001 From: Antoni Boucher Date: Fri, 29 Apr 2022 23:14:26 -0400 Subject: [PATCH] Add more SIMD --- src/builder.rs | 2 +- src/consts.rs | 15 +------- src/context.rs | 9 +++++ src/intrinsic/llvm.rs | 19 ++++++++++ src/intrinsic/simd.rs | 87 +++++++++++++++++++++++++++++++++++-------- 5 files changed, 102 insertions(+), 30 deletions(-) diff --git a/src/builder.rs b/src/builder.rs index 9a5cf785a1f5..f0b93c3d5170 100644 --- a/src/builder.rs +++ b/src/builder.rs @@ -1343,7 +1343,7 @@ impl<'a, 'gcc, 'tcx> Builder<'a, 'gcc, 'tcx> { } // TODO(antoyo): switch to using new_vector_access. let array = self.context.new_bitcast(None, v2, array_type); - for i in 0..vec_num_units { + for i in 0..(mask_num_units - vec_num_units) { elements.push(self.context.new_array_access(None, array, self.context.new_rvalue_from_int(self.int_type, i as i32)).to_rvalue()); } let v1 = self.context.new_rvalue_from_vector(None, result_type, &elements); diff --git a/src/consts.rs b/src/consts.rs index 4350c00e94a7..4b517fd85f05 100644 --- a/src/consts.rs +++ b/src/consts.rs @@ -27,12 +27,7 @@ impl<'gcc, 'tcx> CodegenCx<'gcc, 'tcx> { } // NOTE: since bitcast makes a value non-constant, don't bitcast if not necessary as some // SIMD builtins require a constant value. - if value.get_type() != typ { - self.context.new_bitcast(None, value, typ) - } - else { - value - } + self.bitcast_if_needed(value, typ) } } @@ -86,13 +81,7 @@ impl<'gcc, 'tcx> StaticMethods for CodegenCx<'gcc, 'tcx> { // TODO(antoyo): set alignment. - let value = - if value.get_type() != gcc_type { - self.context.new_bitcast(None, value, gcc_type) - } - else { - value - }; + let value = self.bitcast_if_needed(value, gcc_type); global.global_set_initializer_rvalue(value); // As an optimization, all shared statics which do not have interior diff --git a/src/context.rs b/src/context.rs index 83c4683a6683..92b30ef9b4d8 100644 --- a/src/context.rs +++ b/src/context.rs @@ -279,6 +279,15 @@ impl<'gcc, 'tcx> CodegenCx<'gcc, 'tcx> { pub fn sess(&self) -> &Session { &self.tcx.sess } + + pub fn bitcast_if_needed(&self, value: RValue<'gcc>, expected_type: Type<'gcc>) -> RValue<'gcc> { + if value.get_type() != expected_type { + self.context.new_bitcast(None, value, expected_type) + } + else { + value + } + } } impl<'gcc, 'tcx> BackendTypes for CodegenCx<'gcc, 'tcx> { diff --git a/src/intrinsic/llvm.rs b/src/intrinsic/llvm.rs index 4b41b0ba6e78..aab93b927558 100644 --- a/src/intrinsic/llvm.rs +++ b/src/intrinsic/llvm.rs @@ -21,6 +21,25 @@ pub fn intrinsic<'gcc, 'tcx>(name: &str, cx: &CodegenCx<'gcc, 'tcx>) -> Function "llvm.x86.xgetbv" => "__builtin_ia32_xgetbv", // NOTE: this doc specifies the equivalent GCC builtins: http://huonw.github.io/llvmint/llvmint/x86/index.html "llvm.sqrt.v2f64" => "__builtin_ia32_sqrtpd", + + // The above doc points to unknown builtins for the following, so override them: + "llvm.x86.avx2.gather.d.d" => "__builtin_ia32_gathersiv4si", + "llvm.x86.avx2.gather.d.d.256" => "__builtin_ia32_gathersiv8si", + "llvm.x86.avx2.gather.d.ps" => "__builtin_ia32_gathersiv4sf", + "llvm.x86.avx2.gather.d.ps.256" => "__builtin_ia32_gathersiv8sf", + "llvm.x86.avx2.gather.d.q" => "__builtin_ia32_gathersiv2di", + "llvm.x86.avx2.gather.d.q.256" => "__builtin_ia32_gathersiv4di", + "llvm.x86.avx2.gather.d.pd" => "__builtin_ia32_gathersiv2df", + "llvm.x86.avx2.gather.d.pd.256" => "__builtin_ia32_gathersiv4df", + "llvm.x86.avx2.gather.q.d" => "__builtin_ia32_gatherdiv4si", + "llvm.x86.avx2.gather.q.d.256" => "__builtin_ia32_gatherdiv4si256", + "llvm.x86.avx2.gather.q.ps" => "__builtin_ia32_gatherdiv4sf", + "llvm.x86.avx2.gather.q.ps.256" => "__builtin_ia32_gatherdiv4sf256", + "llvm.x86.avx2.gather.q.q" => "__builtin_ia32_gatherdiv2di", + "llvm.x86.avx2.gather.q.q.256" => "__builtin_ia32_gatherdiv4di", + "llvm.x86.avx2.gather.q.pd" => "__builtin_ia32_gatherdiv2df", + "llvm.x86.avx2.gather.q.pd.256" => "__builtin_ia32_gatherdiv4df", + "" => "", // NOTE: this file is generated by https://github.com/GuillaumeGomez/llvmint/blob/master/generate_list.py _ => include!("archs.rs"), }; diff --git a/src/intrinsic/simd.rs b/src/intrinsic/simd.rs index e5753e318c7f..9204fbdfaba7 100644 --- a/src/intrinsic/simd.rs +++ b/src/intrinsic/simd.rs @@ -202,14 +202,8 @@ pub fn generic_simd_intrinsic<'a, 'gcc, 'tcx>(bx: &mut Builder<'a, 'gcc, 'tcx>, }; let builtin = bx.context.get_target_builtin_function(func_name); let param1_type = builtin.get_param(0).to_rvalue().get_type(); - let vector = - if vector.get_type() != param1_type { - // TODO(antoyo): perhaps use __builtin_convertvector for vector casting. - bx.context.new_bitcast(None, vector, param1_type) - } - else { - vector - }; + // TODO(antoyo): perhaps use __builtin_convertvector for vector casting. + let vector = bx.cx.bitcast_if_needed(vector, param1_type); let result = bx.context.new_call(None, builtin, &[vector, value, bx.context.new_cast(None, index, bx.int_type)]); // TODO(antoyo): perhaps use __builtin_convertvector for vector casting. return Ok(bx.context.new_bitcast(None, result, vector.get_type())); @@ -539,18 +533,79 @@ pub fn generic_simd_intrinsic<'a, 'gcc, 'tcx>(bx: &mut Builder<'a, 'gcc, 'tcx>, let vec_ty = bx.cx.type_vector(elem_ty, in_len as u64); let func = bx.context.get_target_builtin_function(builtin_name); - let param1_type = func.get_parameter(0).get_type(); - let lhs = - if lhs.get_type() != param1_type { - bx.context.new_bitcast(None, lhs, param1_type) - } - else { - lhs - }; + let param1_type = func.get_param(0).to_rvalue().get_type(); + let param2_type = func.get_param(1).to_rvalue().get_type(); + let lhs = bx.cx.bitcast_if_needed(lhs, param1_type); + let rhs = bx.cx.bitcast_if_needed(rhs, param2_type); let result = bx.context.new_call(None, func, &[lhs, rhs]); // TODO(antoyo): perhaps use __builtin_convertvector for vector casting. return Ok(bx.context.new_bitcast(None, result, vec_ty)); } + macro_rules! arith_red { + ($name:ident : $integer_reduce:ident, $float_reduce:ident, $ordered:expr, $op:ident, + $identity:expr) => { + if name == sym::$name { + require!( + ret_ty == in_elem, + "expected return type `{}` (element of input `{}`), found `{}`", + in_elem, + in_ty, + ret_ty + ); + return match in_elem.kind() { + ty::Int(_) | ty::Uint(_) => { + let r = bx.$integer_reduce(args[0].immediate()); + if $ordered { + // if overflow occurs, the result is the + // mathematical result modulo 2^n: + Ok(bx.$op(args[1].immediate(), r)) + } else { + Ok(bx.$integer_reduce(args[0].immediate())) + } + } + ty::Float(f) => { + let acc = if $ordered { + // ordered arithmetic reductions take an accumulator + args[1].immediate() + } else { + // unordered arithmetic reductions use the identity accumulator + match f.bit_width() { + 32 => bx.const_real(bx.type_f32(), $identity), + 64 => bx.const_real(bx.type_f64(), $identity), + v => return_error!( + r#" +unsupported {} from `{}` with element `{}` of size `{}` to `{}`"#, + sym::$name, + in_ty, + in_elem, + v, + ret_ty + ), + } + }; + Ok(bx.$float_reduce(acc, args[0].immediate())) + } + _ => return_error!( + "unsupported {} from `{}` with element `{}` to `{}`", + sym::$name, + in_ty, + in_elem, + ret_ty + ), + }; + } + }; + } + + // TODO: use a recursive algorithm a-la Hacker's Delight. + arith_red!( + simd_reduce_add_unordered: vector_reduce_add, + vector_reduce_fadd_fast, + false, + add, + 0.0 + ); + unimplemented!("simd {}", name); }