diff --git a/Readme.md b/Readme.md index fe23a2676966..5bbd29fceba2 100644 --- a/Readme.md +++ b/Readme.md @@ -127,6 +127,10 @@ To get the `rustc` command to run in `gdb`, add the `--verbose` flag to `cargo b * Build the stage2 compiler (`rustup toolchain link debug-current build/x86_64-unknown-linux-gnu/stage2`). * Clean and rebuild the codegen with `debug-current` in the file `rust-toolchain`. +### How to use [mem-trace](https://github.com/antoyo/mem-trace) + +`rustc` needs to be built without `jemalloc` so that `mem-trace` can overload `malloc` since `jemalloc` is linked statically, so a `LD_PRELOAD`-ed library won't a chance to intercept the calls to `malloc`. + ### How to build a cross-compiling libgccjit #### Building libgccjit diff --git a/src/base.rs b/src/base.rs index e4ecbd46f0c4..19c981309d75 100644 --- a/src/base.rs +++ b/src/base.rs @@ -81,11 +81,17 @@ pub fn compile_codegen_unit<'tcx>(tcx: TyCtxt<'tcx>, cgu_name: Symbol, supports_ // TODO(antoyo): only add the following cli argument if the feature is supported. context.add_command_line_option("-msse2"); context.add_command_line_option("-mavx2"); - context.add_command_line_option("-msha"); - context.add_command_line_option("-mpclmul"); // FIXME(antoyo): the following causes an illegal instruction on vmovdqu64 in std_example on my CPU. // Only add if the CPU supports it. - //context.add_command_line_option("-mavx512f"); + /*context.add_command_line_option("-mavx512f"); + context.add_command_line_option("-msha"); + context.add_command_line_option("-mpclmul"); + context.add_command_line_option("-mfma"); + context.add_command_line_option("-mfma4"); + context.add_command_line_option("-mavx512vpopcntdq"); + context.add_command_line_option("-mavx512vl"); + context.add_command_line_option("-m64"); + context.add_command_line_option("-mbmi");*/ for arg in &tcx.sess.opts.cg.llvm_args { context.add_command_line_option(arg); } diff --git a/src/builder.rs b/src/builder.rs index 3e1f56c183ac..3804a958e691 100644 --- a/src/builder.rs +++ b/src/builder.rs @@ -213,7 +213,7 @@ impl<'a, 'gcc, 'tcx> Builder<'a, 'gcc, 'tcx> { let actual_ty = actual_val.get_type(); if expected_ty != actual_ty { - if !actual_ty.is_vector() && !expected_ty.is_vector() && actual_ty.is_integral() && expected_ty.is_integral() && actual_ty.get_size() != expected_ty.get_size() { + if !actual_ty.is_vector() && !expected_ty.is_vector() && actual_ty.is_integral() && expected_ty.is_integral() { self.context.new_cast(None, actual_val, expected_ty) } else if on_stack_param_indices.contains(&index) { @@ -1390,18 +1390,20 @@ impl<'a, 'gcc, 'tcx> Builder<'a, 'gcc, 'tcx> { where F: Fn(RValue<'gcc>, RValue<'gcc>, &'gcc Context<'gcc>) -> RValue<'gcc> { let vector_type = src.get_type().unqualified().dyncast_vector().expect("vector type"); + let element_type = vector_type.get_element_type(); + let mask_element_type = self.type_ix(element_type.get_size() as u64 * 8); let element_count = vector_type.get_num_units(); let mut vector_elements = vec![]; for i in 0..element_count { vector_elements.push(i); } - let mask_type = self.context.new_vector_type(self.int_type, element_count as u64); + let mask_type = self.context.new_vector_type(mask_element_type, element_count as u64); let mut shift = 1; let mut res = src; while shift < element_count { let vector_elements: Vec<_> = vector_elements.iter() - .map(|i| self.context.new_rvalue_from_int(self.int_type, ((i + shift) % element_count) as i32)) + .map(|i| self.context.new_rvalue_from_int(mask_element_type, ((i + shift) % element_count) as i32)) .collect(); let mask = self.context.new_rvalue_from_vector(None, mask_type, &vector_elements); let shifted = self.context.new_rvalue_vector_perm(None, res, res, mask); diff --git a/src/intrinsic/llvm.rs b/src/intrinsic/llvm.rs index 42cf06c8c7ab..f2faae070284 100644 --- a/src/intrinsic/llvm.rs +++ b/src/intrinsic/llvm.rs @@ -288,7 +288,10 @@ pub fn adjust_intrinsic_return_value<'a, 'gcc, 'tcx>(builder: &Builder<'a, 'gcc, match func_name { "__builtin_ia32_vfmaddss3_round" | "__builtin_ia32_vfmaddsd3_round" => { let zero = builder.context.new_rvalue_zero(builder.int_type); - return_value = builder.context.new_vector_access(None, return_value, zero).to_rvalue(); + #[cfg(feature="master")] + { + return_value = builder.context.new_vector_access(None, return_value, zero).to_rvalue(); + } }, "__builtin_ia32_addcarryx_u64" | "__builtin_ia32_sbb_u64" | "__builtin_ia32_addcarryx_u32" | "__builtin_ia32_sbb_u32" => { // Both llvm.x86.addcarry.32 and llvm.x86.addcarryx.u32 points to the same GCC builtin, diff --git a/src/intrinsic/simd.rs b/src/intrinsic/simd.rs index bf5d555736ae..8f9862414e60 100644 --- a/src/intrinsic/simd.rs +++ b/src/intrinsic/simd.rs @@ -216,7 +216,7 @@ pub fn generic_simd_intrinsic<'a, 'gcc, 'tcx>(bx: &mut Builder<'a, 'gcc, 'tcx>, let variable = bx.current_func().new_local(None, vector.get_type(), "new_vector"); bx.llbb().add_assignment(None, variable, vector); let lvalue = bx.context.new_vector_access(None, variable.to_rvalue(), index); - // TODO: si simd_insert est constant, utiliser BIT_REF… + // TODO: if simd_insert is constant, use BIT_REF. bx.llbb().add_assignment(None, lvalue, value); return Ok(variable.to_rvalue()); } @@ -252,6 +252,7 @@ pub fn generic_simd_intrinsic<'a, 'gcc, 'tcx>(bx: &mut Builder<'a, 'gcc, 'tcx>, return Ok(bx.vector_select(args[0].immediate(), args[1].immediate(), args[2].immediate())); } + #[cfg(feature="master")] if name == sym::simd_cast { require_simd!(ret_ty, "return"); let (out_len, out_elem) = ret_ty.simd_size_and_type(bx.tcx());