From 2100b10c4c61ac7c0fe67ae4c9a390eaf3708ae4 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Bj=C3=B6rn=20Steinbrink?= Date: Fri, 9 Jan 2015 17:40:13 +0100 Subject: [PATCH 1/5] Treat `struct(T)` the same as `struct S { x: T }` WRT being immediate args Currently we pass a `struct S(u64)` as an immediate value on i686, but a `struct S { x: u64 }` is passed indirectly. This seems pretty wrong, as they both have the same underlying LLVM type `{ i64 }`, no sense in treating them differently. --- src/librustc_trans/trans/common.rs | 5 +---- 1 file changed, 1 insertion(+), 4 deletions(-) diff --git a/src/librustc_trans/trans/common.rs b/src/librustc_trans/trans/common.rs index 3afd33d324dd..fb58d9359223 100644 --- a/src/librustc_trans/trans/common.rs +++ b/src/librustc_trans/trans/common.rs @@ -222,10 +222,7 @@ fn type_is_newtype_immediate<'a, 'tcx>(ccx: &CrateContext<'a, 'tcx>, match ty.sty { ty::ty_struct(def_id, substs) => { let fields = ty::struct_fields(ccx.tcx(), def_id, substs); - fields.len() == 1 && - fields[0].name == - token::special_idents::unnamed_field.name && - type_is_immediate(ccx, fields[0].mt.ty) + fields.len() == 1 && type_is_immediate(ccx, fields[0].mt.ty) } _ => false } From a03ae681d3f52ca6a6aaf7ebfd32a51af6d4e4da Mon Sep 17 00:00:00 2001 From: Julian Orth Date: Sat, 10 Jan 2015 11:07:14 +0100 Subject: [PATCH 2/5] add inline to every String function --- src/libcollections/string.rs | 15 +++++++++++++++ 1 file changed, 15 insertions(+) diff --git a/src/libcollections/string.rs b/src/libcollections/string.rs index 5991fc832e96..e30e7e8600dc 100644 --- a/src/libcollections/string.rs +++ b/src/libcollections/string.rs @@ -302,6 +302,7 @@ impl String { /// assert_eq!(String::from_utf16_lossy(v), /// "𝄞mus\u{FFFD}ic\u{FFFD}".to_string()); /// ``` + #[inline] #[stable] pub fn from_utf16_lossy(v: &[u16]) -> String { unicode_str::utf16_items(v).map(|c| c.to_char_lossy()).collect() @@ -556,6 +557,7 @@ impl String { /// assert_eq!(s.remove(1), 'o'); /// assert_eq!(s.remove(0), 'o'); /// ``` + #[inline] #[stable] pub fn remove(&mut self, idx: uint) -> char { let len = self.len(); @@ -582,6 +584,7 @@ impl String { /// /// If `idx` does not lie on a character boundary or is out of bounds, then /// this function will panic. + #[inline] #[stable] pub fn insert(&mut self, idx: uint, ch: char) { let len = self.len(); @@ -618,6 +621,7 @@ impl String { /// } /// assert_eq!(s.as_slice(), "olleh"); /// ``` + #[inline] #[stable] pub unsafe fn as_mut_vec<'a>(&'a mut self) -> &'a mut Vec { &mut self.vec @@ -645,6 +649,7 @@ impl String { /// v.push('a'); /// assert!(!v.is_empty()); /// ``` + #[inline] #[stable] pub fn is_empty(&self) -> bool { self.len() == 0 } @@ -801,6 +806,7 @@ impl Str for String { #[stable] impl Default for String { + #[inline] #[stable] fn default() -> String { String::new() @@ -809,6 +815,7 @@ impl Default for String { #[stable] impl fmt::String for String { + #[inline] fn fmt(&self, f: &mut fmt::Formatter) -> fmt::Result { fmt::String::fmt(&**self, f) } @@ -816,6 +823,7 @@ impl fmt::String for String { #[unstable = "waiting on fmt stabilization"] impl fmt::Show for String { + #[inline] fn fmt(&self, f: &mut fmt::Formatter) -> fmt::Result { fmt::Show::fmt(&**self, f) } @@ -842,6 +850,7 @@ impl hash::Hash for String { impl<'a> Add<&'a str> for String { type Output = String; + #[inline] fn add(mut self, other: &str) -> String { self.push_str(other); self @@ -881,6 +890,7 @@ impl ops::Index for String { impl ops::Deref for String { type Target = str; + #[inline] fn deref<'a>(&'a self) -> &'a str { unsafe { mem::transmute(&self.vec[]) } } @@ -895,6 +905,7 @@ pub struct DerefString<'a> { impl<'a> Deref for DerefString<'a> { type Target = String; + #[inline] fn deref<'b>(&'b self) -> &'b String { unsafe { mem::transmute(&*self.x) } } @@ -933,6 +944,7 @@ pub trait ToString { } impl ToString for T { + #[inline] fn to_string(&self) -> String { use core::fmt::Writer; let mut buf = String::new(); @@ -943,12 +955,14 @@ impl ToString for T { } impl IntoCow<'static, String, str> for String { + #[inline] fn into_cow(self) -> CowString<'static> { Cow::Owned(self) } } impl<'a> IntoCow<'a, String, str> for &'a str { + #[inline] fn into_cow(self) -> CowString<'a> { Cow::Borrowed(self) } @@ -966,6 +980,7 @@ impl<'a> Str for CowString<'a> { } impl fmt::Writer for String { + #[inline] fn write_str(&mut self, s: &str) -> fmt::Result { self.push_str(s); Ok(()) From 6ef08406dccdeeb828a6cfc7658ac149900e73ad Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Bj=C3=B6rn=20Steinbrink?= Date: Thu, 8 Jan 2015 16:28:07 +0100 Subject: [PATCH 3/5] Avoid loads/stores of first class aggregates Currently, small aggregates are passed to functions as immediate values as is. This has two consequences. One is that aggregates are passed component-wise by LLVM, so e.g. a struct containing four u8 values (e.g. an RGBA struct) will be passed as four individual values. The other is that LLVM isn't very good at optimizing loads/stores of first class attributes. What clang does is converting the aggregate to an appropriately sized integer type (e.g. i32 for the four u8 values), and using that for the function argument. This allows LLVM to create code that is a lot better. Fixes #20450 #20149 #16506 #13927 --- src/librustc_trans/trans/base.rs | 12 +++++++++++- src/librustc_trans/trans/foreign.rs | 11 +++++++++-- src/librustc_trans/trans/glue.rs | 2 +- src/librustc_trans/trans/intrinsic.rs | 13 +++++++++---- src/librustc_trans/trans/type_.rs | 12 ++++++++++++ src/librustc_trans/trans/type_of.rs | 15 +++++++++++++++ 6 files changed, 57 insertions(+), 8 deletions(-) diff --git a/src/librustc_trans/trans/base.rs b/src/librustc_trans/trans/base.rs index 88ce36a710a0..ea98d6bb74e9 100644 --- a/src/librustc_trans/trans/base.rs +++ b/src/librustc_trans/trans/base.rs @@ -1054,6 +1054,11 @@ pub fn load_ty<'blk, 'tcx>(cx: Block<'blk, 'tcx>, C_undef(type_of::type_of(cx.ccx(), t)) } else if ty::type_is_bool(t) { Trunc(cx, LoadRangeAssert(cx, ptr, 0, 2, llvm::False), Type::i1(cx.ccx())) + } else if type_is_immediate(cx.ccx(), t) && type_of::type_of(cx.ccx(), t).is_aggregate() { + // We want to pass small aggregates as immediate values, but using an aggregate LLVM type + // for this leads to bad optimizations, so its arg type is an appropriately sized integer + // and we have to convert it + Load(cx, BitCast(cx, ptr, type_of::arg_type_of(cx.ccx(), t).ptr_to())) } else if ty::type_is_char(t) { // a char is a Unicode codepoint, and so takes values from 0 // to 0x10FFFF inclusive only. @@ -1065,9 +1070,14 @@ pub fn load_ty<'blk, 'tcx>(cx: Block<'blk, 'tcx>, /// Helper for storing values in memory. Does the necessary conversion if the in-memory type /// differs from the type used for SSA values. -pub fn store_ty(cx: Block, v: ValueRef, dst: ValueRef, t: Ty) { +pub fn store_ty<'blk, 'tcx>(cx: Block<'blk, 'tcx>, v: ValueRef, dst: ValueRef, t: Ty<'tcx>) { if ty::type_is_bool(t) { Store(cx, ZExt(cx, v, Type::i8(cx.ccx())), dst); + } else if type_is_immediate(cx.ccx(), t) && type_of::type_of(cx.ccx(), t).is_aggregate() { + // We want to pass small aggregates as immediate values, but using an aggregate LLVM type + // for this leads to bad optimizations, so its arg type is an appropriately sized integer + // and we have to convert it + Store(cx, v, BitCast(cx, dst, type_of::arg_type_of(cx.ccx(), t).ptr_to())); } else { Store(cx, v, dst); }; diff --git a/src/librustc_trans/trans/foreign.rs b/src/librustc_trans/trans/foreign.rs index 3dfb36c854b7..fb61bab6ade2 100644 --- a/src/librustc_trans/trans/foreign.rs +++ b/src/librustc_trans/trans/foreign.rs @@ -736,6 +736,13 @@ pub fn trans_rust_fn_with_foreign_abi<'a, 'tcx>(ccx: &CrateContext<'a, 'tcx>, if ty::type_is_bool(rust_ty) { let tmp = builder.load_range_assert(llforeign_arg, 0, 2, llvm::False); builder.trunc(tmp, Type::i1(ccx)) + } else if type_of::type_of(ccx, rust_ty).is_aggregate() { + // We want to pass small aggregates as immediate values, but using an aggregate + // LLVM type for this leads to bad optimizations, so its arg type is an + // appropriately sized integer and we have to convert it + let tmp = builder.bitcast(llforeign_arg, + type_of::arg_type_of(ccx, rust_ty).ptr_to()); + builder.load(tmp) } else { builder.load(llforeign_arg) } @@ -834,10 +841,10 @@ fn foreign_signature<'a, 'tcx>(ccx: &CrateContext<'a, 'tcx>, fn_sig: &ty::FnSig<'tcx>, arg_tys: &[Ty<'tcx>]) -> LlvmSignature { - let llarg_tys = arg_tys.iter().map(|&arg| arg_type_of(ccx, arg)).collect(); + let llarg_tys = arg_tys.iter().map(|&arg| foreign_arg_type_of(ccx, arg)).collect(); let (llret_ty, ret_def) = match fn_sig.output { ty::FnConverging(ret_ty) => - (type_of::arg_type_of(ccx, ret_ty), !return_type_is_void(ccx, ret_ty)), + (type_of::foreign_arg_type_of(ccx, ret_ty), !return_type_is_void(ccx, ret_ty)), ty::FnDiverging => (Type::nil(ccx), false) }; diff --git a/src/librustc_trans/trans/glue.rs b/src/librustc_trans/trans/glue.rs index 2219cd59263c..a79bb6ca1647 100644 --- a/src/librustc_trans/trans/glue.rs +++ b/src/librustc_trans/trans/glue.rs @@ -138,7 +138,7 @@ pub fn drop_ty_immediate<'blk, 'tcx>(bcx: Block<'blk, 'tcx>, -> Block<'blk, 'tcx> { let _icx = push_ctxt("drop_ty_immediate"); let vp = alloca(bcx, type_of(bcx.ccx(), t), ""); - Store(bcx, v, vp); + store_ty(bcx, v, vp, t); drop_ty(bcx, vp, t, source_location) } diff --git a/src/librustc_trans/trans/intrinsic.rs b/src/librustc_trans/trans/intrinsic.rs index b22c7f763f03..91c7409182df 100644 --- a/src/librustc_trans/trans/intrinsic.rs +++ b/src/librustc_trans/trans/intrinsic.rs @@ -357,11 +357,11 @@ pub fn trans_intrinsic_call<'a, 'blk, 'tcx>(mut bcx: Block<'blk, 'tcx>, &ccx.link_meta().crate_hash); // NB: This needs to be kept in lockstep with the TypeId struct in // the intrinsic module - C_named_struct(llret_ty, &[C_u64(ccx, hash)]) + C_u64(ccx, hash) } (_, "init") => { let tp_ty = *substs.types.get(FnSpace, 0); - let lltp_ty = type_of::type_of(ccx, tp_ty); + let lltp_ty = type_of::arg_type_of(ccx, tp_ty); if return_type_is_void(ccx, tp_ty) { C_nil(ccx) } else { @@ -686,6 +686,11 @@ fn with_overflow_intrinsic<'blk, 'tcx>(bcx: Block<'blk, 'tcx>, name: &'static st let ret = C_undef(type_of::type_of(bcx.ccx(), t)); let ret = InsertValue(bcx, ret, result, 0); let ret = InsertValue(bcx, ret, overflow, 1); - - ret + if type_is_immediate(bcx.ccx(), t) { + let tmp = alloc_ty(bcx, t, "tmp"); + Store(bcx, ret, tmp); + load_ty(bcx, tmp, t) + } else { + ret + } } diff --git a/src/librustc_trans/trans/type_.rs b/src/librustc_trans/trans/type_.rs index 71a7789eb393..9cae142c03ac 100644 --- a/src/librustc_trans/trans/type_.rs +++ b/src/librustc_trans/trans/type_.rs @@ -82,6 +82,11 @@ impl Type { ty!(llvm::LLVMInt64TypeInContext(ccx.llcx())) } + // Creates an integer type with the given number of bits, e.g. i24 + pub fn ix(ccx: &CrateContext, num_bits: u64) -> Type { + ty!(llvm::LLVMIntTypeInContext(ccx.llcx(), num_bits as c_uint)) + } + pub fn f32(ccx: &CrateContext) -> Type { ty!(llvm::LLVMFloatTypeInContext(ccx.llcx())) } @@ -260,6 +265,13 @@ impl Type { ty!(llvm::LLVMPointerType(self.to_ref(), 0)) } + pub fn is_aggregate(&self) -> bool { + match self.kind() { + TypeKind::Struct | TypeKind::Array => true, + _ => false + } + } + pub fn is_packed(&self) -> bool { unsafe { llvm::LLVMIsPackedStruct(self.to_ref()) == True diff --git a/src/librustc_trans/trans/type_of.rs b/src/librustc_trans/trans/type_of.rs index 993307974222..76e0e0d05455 100644 --- a/src/librustc_trans/trans/type_of.rs +++ b/src/librustc_trans/trans/type_of.rs @@ -243,9 +243,24 @@ pub fn sizing_type_of<'a, 'tcx>(cx: &CrateContext<'a, 'tcx>, t: Ty<'tcx>) -> Typ llsizingty } +pub fn foreign_arg_type_of<'a, 'tcx>(cx: &CrateContext<'a, 'tcx>, t: Ty<'tcx>) -> Type { + if ty::type_is_bool(t) { + Type::i1(cx) + } else { + type_of(cx, t) + } +} + pub fn arg_type_of<'a, 'tcx>(cx: &CrateContext<'a, 'tcx>, t: Ty<'tcx>) -> Type { if ty::type_is_bool(t) { Type::i1(cx) + } else if type_is_immediate(cx, t) && type_of(cx, t).is_aggregate() { + // We want to pass small aggregates as immediate values, but using an aggregate LLVM type + // for this leads to bad optimizations, so its arg type is an appropriately sized integer + match machine::llsize_of_alloc(cx, sizing_type_of(cx, t)) { + 0 => type_of(cx, t), + n => Type::ix(cx, n * 8), + } } else { type_of(cx, t) } From 6f1b06eb6546ab4808614dfe7807277419b485aa Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Bj=C3=B6rn=20Steinbrink?= Date: Thu, 8 Jan 2015 16:34:09 +0100 Subject: [PATCH 4/5] Pass small fixed-size arrays as immediates Currently even small fixed-size arrays are passed as indirect parameters, which seems to be just an oversight. Let's handle them the same as structs of the same size, passing them as immediate values. --- src/librustc_trans/trans/common.rs | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/src/librustc_trans/trans/common.rs b/src/librustc_trans/trans/common.rs index fb58d9359223..f59e70d099a6 100644 --- a/src/librustc_trans/trans/common.rs +++ b/src/librustc_trans/trans/common.rs @@ -244,7 +244,7 @@ pub fn type_is_immediate<'a, 'tcx>(ccx: &CrateContext<'a, 'tcx>, ty: Ty<'tcx>) - return false; } match ty.sty { - ty::ty_struct(..) | ty::ty_enum(..) | ty::ty_tup(..) | + ty::ty_struct(..) | ty::ty_enum(..) | ty::ty_tup(..) | ty::ty_vec(_, Some(_)) | ty::ty_unboxed_closure(..) => { let llty = sizing_type_of(ccx, ty); llsize_of_alloc(ccx, llty) <= llsize_of_alloc(ccx, ccx.int_type()) From b8304e540423b86bbe9789ed0088f29a61f46459 Mon Sep 17 00:00:00 2001 From: Alex Crichton Date: Sat, 10 Jan 2015 22:56:01 -0800 Subject: [PATCH 5/5] test: Use Thread::scoped in two more tests I saw these hanging on a windows bot, and the previous ones seem to have calmed down after switching from Thread::spawn to Thread::scoped, so try that here as well! --- src/test/run-pass/unwind-unique.rs | 2 +- src/test/run-pass/weak-lang-item.rs | 2 +- 2 files changed, 2 insertions(+), 2 deletions(-) diff --git a/src/test/run-pass/unwind-unique.rs b/src/test/run-pass/unwind-unique.rs index 371fd677bd98..74802c156a26 100644 --- a/src/test/run-pass/unwind-unique.rs +++ b/src/test/run-pass/unwind-unique.rs @@ -19,5 +19,5 @@ fn f() { } pub fn main() { - let _t = Thread::spawn(f); + let _t = Thread::scoped(f); } diff --git a/src/test/run-pass/weak-lang-item.rs b/src/test/run-pass/weak-lang-item.rs index 08dac5c7c82d..b1c65d322ab2 100644 --- a/src/test/run-pass/weak-lang-item.rs +++ b/src/test/run-pass/weak-lang-item.rs @@ -15,7 +15,7 @@ extern crate "weak-lang-items" as other; use std::thread::Thread; fn main() { - let _ = Thread::spawn(move|| { + let _ = Thread::scoped(move|| { other::foo() }); }