From 45bca0052098ee2005857dcdeeed9fe00d554a51 Mon Sep 17 00:00:00 2001 From: Antoni Boucher Date: Thu, 15 Jun 2023 20:51:29 -0400 Subject: [PATCH] Handle alignment of the load instruction --- src/builder.rs | 19 ++++++++++++++----- src/intrinsic/simd.rs | 9 ++++++--- 2 files changed, 20 insertions(+), 8 deletions(-) diff --git a/src/builder.rs b/src/builder.rs index f2775421ccd0..bb23524d8af4 100644 --- a/src/builder.rs +++ b/src/builder.rs @@ -729,17 +729,25 @@ impl<'a, 'gcc, 'tcx> BuilderMethods<'a, 'tcx> for Builder<'a, 'gcc, 'tcx> { unimplemented!(); } - fn load(&mut self, pointee_ty: Type<'gcc>, ptr: RValue<'gcc>, _align: Align) -> RValue<'gcc> { + fn load(&mut self, pointee_ty: Type<'gcc>, ptr: RValue<'gcc>, align: Align) -> RValue<'gcc> { let block = self.llbb(); let function = block.get_function(); // NOTE: instead of returning the dereference here, we have to assign it to a variable in // the current basic block. Otherwise, it could be used in another basic block, causing a // dereference after a drop, for instance. - // TODO(antoyo): handle align of the load instruction. - let ptr = self.context.new_cast(None, ptr, pointee_ty.make_pointer()); + // FIXME(antoyo): this check that we don't call get_aligned() a second time on a type. + // Ideally, we shouldn't need to do this check. + let aligned_type = + if pointee_ty == self.cx.u128_type || pointee_ty == self.cx.i128_type { + pointee_ty + } + else { + pointee_ty.get_aligned(align.bytes()) + }; + let ptr = self.context.new_cast(None, ptr, aligned_type.make_pointer()); let deref = ptr.dereference(None).to_rvalue(); unsafe { RETURN_VALUE_COUNT += 1 }; - let loaded_value = function.new_local(None, pointee_ty, &format!("loadedValue{}", unsafe { RETURN_VALUE_COUNT })); + let loaded_value = function.new_local(None, aligned_type, &format!("loadedValue{}", unsafe { RETURN_VALUE_COUNT })); block.add_assignment(None, loaded_value, deref); loaded_value.to_rvalue() } @@ -1857,7 +1865,8 @@ impl<'a, 'gcc, 'tcx> Builder<'a, 'gcc, 'tcx> { #[cfg(feature="master")] let (cond, element_type) = { - let then_val_vector_type = then_val.get_type().dyncast_vector().expect("vector type"); + // TODO(antoyo): dyncast_vector should not require a call to unqualified. + let then_val_vector_type = then_val.get_type().unqualified().dyncast_vector().expect("vector type"); let then_val_element_type = then_val_vector_type.get_element_type(); let then_val_element_size = then_val_element_type.get_size(); diff --git a/src/intrinsic/simd.rs b/src/intrinsic/simd.rs index 36b9c9b63641..9115cf971196 100644 --- a/src/intrinsic/simd.rs +++ b/src/intrinsic/simd.rs @@ -346,7 +346,8 @@ pub fn generic_simd_intrinsic<'a, 'gcc, 'tcx>( // endian and MSB-first for big endian. let vector = args[0].immediate(); - let vector_type = vector.get_type().dyncast_vector().expect("vector type"); + // TODO(antoyo): dyncast_vector should not require a call to unqualified. + let vector_type = vector.get_type().unqualified().dyncast_vector().expect("vector type"); let elem_type = vector_type.get_element_type(); let expected_int_bits = in_len.max(8); @@ -853,7 +854,8 @@ pub fn generic_simd_intrinsic<'a, 'gcc, 'tcx>( (true, true) => { // Algorithm from: https://codereview.stackexchange.com/questions/115869/saturated-signed-addition // TODO(antoyo): improve using conditional operators if possible. - let arg_type = lhs.get_type(); + // TODO(antoyo): dyncast_vector should not require a call to unqualified. + let arg_type = lhs.get_type().unqualified(); // TODO(antoyo): convert lhs and rhs to unsigned. let sum = lhs + rhs; let vector_type = arg_type.dyncast_vector().expect("vector type"); @@ -883,7 +885,8 @@ pub fn generic_simd_intrinsic<'a, 'gcc, 'tcx>( res & cmp }, (true, false) => { - let arg_type = lhs.get_type(); + // TODO(antoyo): dyncast_vector should not require a call to unqualified. + let arg_type = lhs.get_type().unqualified(); // TODO(antoyo): this uses the same algorithm from saturating add, but add the // negative of the right operand. Find a proper subtraction algorithm. let rhs = bx.context.new_unary_op(None, UnaryOp::Minus, arg_type, rhs);