rustc: Implement ThinLTO

This commit is an implementation of LLVM's ThinLTO for consumption in rustc
itself. Currently today LTO works by merging all relevant LLVM modules into one
and then running optimization passes. "Thin" LTO operates differently by having
more sharded work and allowing parallelism opportunities between optimizing
codegen units. Further down the road Thin LTO also allows *incremental* LTO
which should enable even faster release builds without compromising on the
performance we have today.

This commit uses a `-Z thinlto` flag to gate whether ThinLTO is enabled. It then
also implements two forms of ThinLTO:

* In one mode we'll *only* perform ThinLTO over the codegen units produced in a
  single compilation. That is, we won't load upstream rlibs, but we'll instead
  just perform ThinLTO amongst all codegen units produced by the compiler for
  the local crate. This is intended to emulate a desired end point where we have
  codegen units turned on by default for all crates and ThinLTO allows us to do
  this without performance loss.

* In anther mode, like full LTO today, we'll optimize all upstream dependencies
  in "thin" mode. Unlike today, however, this LTO step is fully parallelized so
  should finish much more quickly.

There's a good bit of comments about what the implementation is doing and where
it came from, but the tl;dr; is that currently most of the support here is
copied from upstream LLVM. This code duplication is done for a number of
reasons:

* Controlling parallelism means we can use the existing jobserver support to
  avoid overloading machines.
* We will likely want a slightly different form of incremental caching which
  integrates with our own incremental strategy, but this is yet to be
  determined.
* This buys us some flexibility about when/where we run ThinLTO, as well as
  having it tailored to fit our needs for the time being.
* Finally this allows us to reuse some artifacts such as our `TargetMachine`
  creation, where all our options we used today aren't necessarily supported by
  upstream LLVM yet.

My hope is that we can get some experience with this copy/paste in tree and then
eventually upstream some work to LLVM itself to avoid the duplication while
still ensuring our needs are met. Otherwise I fear that maintaining these
bindings may be quite costly over the years with LLVM updates!
This commit is contained in:
Alex Crichton 2017-07-23 08:14:38 -07:00
parent abef7e1fd2
commit 4ca1b19fde
24 changed files with 1288 additions and 182 deletions

View file

@ -11,7 +11,7 @@
// ignore-tidy-linelength
// compile-flags:-Zprint-trans-items=eager
//~ TRANS_ITEM fn core::ptr[0]::drop_in_place[0]<drop_in_place_intrinsic::StructWithDtor[0]> @@ drop_in_place_intrinsic.cgu-0[Internal]
//~ TRANS_ITEM fn core::ptr[0]::drop_in_place[0]<drop_in_place_intrinsic::StructWithDtor[0]> @@ drop_in_place_intrinsic0[Internal]
struct StructWithDtor(u32);
impl Drop for StructWithDtor {
@ -22,7 +22,7 @@ impl Drop for StructWithDtor {
//~ TRANS_ITEM fn drop_in_place_intrinsic::main[0]
fn main() {
//~ TRANS_ITEM fn core::ptr[0]::drop_in_place[0]<[drop_in_place_intrinsic::StructWithDtor[0]; 2]> @@ drop_in_place_intrinsic.cgu-0[Internal]
//~ TRANS_ITEM fn core::ptr[0]::drop_in_place[0]<[drop_in_place_intrinsic::StructWithDtor[0]; 2]> @@ drop_in_place_intrinsic0[Internal]
let x = [StructWithDtor(0), StructWithDtor(1)];
drop_slice_in_place(&x);
@ -34,7 +34,7 @@ fn drop_slice_in_place(x: &[StructWithDtor]) {
// This is the interesting thing in this test case: Normally we would
// not have drop-glue for the unsized [StructWithDtor]. This has to be
// generated though when the drop_in_place() intrinsic is used.
//~ TRANS_ITEM fn core::ptr[0]::drop_in_place[0]<[drop_in_place_intrinsic::StructWithDtor[0]]> @@ drop_in_place_intrinsic.cgu-0[Internal]
//~ TRANS_ITEM fn core::ptr[0]::drop_in_place[0]<[drop_in_place_intrinsic::StructWithDtor[0]]> @@ drop_in_place_intrinsic0[Internal]
::std::ptr::drop_in_place(x as *const _ as *mut [StructWithDtor]);
}
}

View file

@ -45,7 +45,7 @@ enum EnumNoDrop<T1, T2> {
struct NonGenericNoDrop(i32);
struct NonGenericWithDrop(i32);
//~ TRANS_ITEM fn core::ptr[0]::drop_in_place[0]<generic_drop_glue::NonGenericWithDrop[0]> @@ generic_drop_glue.cgu-0[Internal]
//~ TRANS_ITEM fn core::ptr[0]::drop_in_place[0]<generic_drop_glue::NonGenericWithDrop[0]> @@ generic_drop_glue0[Internal]
impl Drop for NonGenericWithDrop {
//~ TRANS_ITEM fn generic_drop_glue::{{impl}}[2]::drop[0]
@ -54,11 +54,11 @@ impl Drop for NonGenericWithDrop {
//~ TRANS_ITEM fn generic_drop_glue::main[0]
fn main() {
//~ TRANS_ITEM fn core::ptr[0]::drop_in_place[0]<generic_drop_glue::StructWithDrop[0]<i8, char>> @@ generic_drop_glue.cgu-0[Internal]
//~ TRANS_ITEM fn core::ptr[0]::drop_in_place[0]<generic_drop_glue::StructWithDrop[0]<i8, char>> @@ generic_drop_glue0[Internal]
//~ TRANS_ITEM fn generic_drop_glue::{{impl}}[0]::drop[0]<i8, char>
let _ = StructWithDrop { x: 0i8, y: 'a' }.x;
//~ TRANS_ITEM fn core::ptr[0]::drop_in_place[0]<generic_drop_glue::StructWithDrop[0]<&str, generic_drop_glue::NonGenericNoDrop[0]>> @@ generic_drop_glue.cgu-0[Internal]
//~ TRANS_ITEM fn core::ptr[0]::drop_in_place[0]<generic_drop_glue::StructWithDrop[0]<&str, generic_drop_glue::NonGenericNoDrop[0]>> @@ generic_drop_glue0[Internal]
//~ TRANS_ITEM fn generic_drop_glue::{{impl}}[0]::drop[0]<&str, generic_drop_glue::NonGenericNoDrop[0]>
let _ = StructWithDrop { x: "&str", y: NonGenericNoDrop(0) }.y;
@ -67,17 +67,17 @@ fn main() {
// This is supposed to generate drop-glue because it contains a field that
// needs to be dropped.
//~ TRANS_ITEM fn core::ptr[0]::drop_in_place[0]<generic_drop_glue::StructNoDrop[0]<generic_drop_glue::NonGenericWithDrop[0], f64>> @@ generic_drop_glue.cgu-0[Internal]
//~ TRANS_ITEM fn core::ptr[0]::drop_in_place[0]<generic_drop_glue::StructNoDrop[0]<generic_drop_glue::NonGenericWithDrop[0], f64>> @@ generic_drop_glue0[Internal]
let _ = StructNoDrop { x: NonGenericWithDrop(0), y: 0f64 }.y;
//~ TRANS_ITEM fn core::ptr[0]::drop_in_place[0]<generic_drop_glue::EnumWithDrop[0]<i32, i64>> @@ generic_drop_glue.cgu-0[Internal]
//~ TRANS_ITEM fn core::ptr[0]::drop_in_place[0]<generic_drop_glue::EnumWithDrop[0]<i32, i64>> @@ generic_drop_glue0[Internal]
//~ TRANS_ITEM fn generic_drop_glue::{{impl}}[1]::drop[0]<i32, i64>
let _ = match EnumWithDrop::A::<i32, i64>(0) {
EnumWithDrop::A(x) => x,
EnumWithDrop::B(x) => x as i32
};
//~TRANS_ITEM fn core::ptr[0]::drop_in_place[0]<generic_drop_glue::EnumWithDrop[0]<f64, f32>> @@ generic_drop_glue.cgu-0[Internal]
//~TRANS_ITEM fn core::ptr[0]::drop_in_place[0]<generic_drop_glue::EnumWithDrop[0]<f64, f32>> @@ generic_drop_glue0[Internal]
//~ TRANS_ITEM fn generic_drop_glue::{{impl}}[1]::drop[0]<f64, f32>
let _ = match EnumWithDrop::B::<f64, f32>(1.0) {
EnumWithDrop::A(x) => x,

View file

@ -31,13 +31,13 @@ impl<T> Trait for Struct<T> {
fn main() {
let s1 = Struct { _a: 0u32 };
//~ TRANS_ITEM fn core::ptr[0]::drop_in_place[0]<instantiation_through_vtable::Struct[0]<u32>> @@ instantiation_through_vtable.cgu-0[Internal]
//~ TRANS_ITEM fn core::ptr[0]::drop_in_place[0]<instantiation_through_vtable::Struct[0]<u32>> @@ instantiation_through_vtable0[Internal]
//~ TRANS_ITEM fn instantiation_through_vtable::{{impl}}[0]::foo[0]<u32>
//~ TRANS_ITEM fn instantiation_through_vtable::{{impl}}[0]::bar[0]<u32>
let _ = &s1 as &Trait;
let s1 = Struct { _a: 0u64 };
//~ TRANS_ITEM fn core::ptr[0]::drop_in_place[0]<instantiation_through_vtable::Struct[0]<u64>> @@ instantiation_through_vtable.cgu-0[Internal]
//~ TRANS_ITEM fn core::ptr[0]::drop_in_place[0]<instantiation_through_vtable::Struct[0]<u64>> @@ instantiation_through_vtable0[Internal]
//~ TRANS_ITEM fn instantiation_through_vtable::{{impl}}[0]::foo[0]<u64>
//~ TRANS_ITEM fn instantiation_through_vtable::{{impl}}[0]::bar[0]<u64>
let _ = &s1 as &Trait;

View file

@ -13,7 +13,7 @@
#![deny(dead_code)]
//~ TRANS_ITEM fn core::ptr[0]::drop_in_place[0]<non_generic_drop_glue::StructWithDrop[0]> @@ non_generic_drop_glue.cgu-0[Internal]
//~ TRANS_ITEM fn core::ptr[0]::drop_in_place[0]<non_generic_drop_glue::StructWithDrop[0]> @@ non_generic_drop_glue0[Internal]
struct StructWithDrop {
x: i32
}
@ -27,7 +27,7 @@ struct StructNoDrop {
x: i32
}
//~ TRANS_ITEM fn core::ptr[0]::drop_in_place[0]<non_generic_drop_glue::EnumWithDrop[0]> @@ non_generic_drop_glue.cgu-0[Internal]
//~ TRANS_ITEM fn core::ptr[0]::drop_in_place[0]<non_generic_drop_glue::EnumWithDrop[0]> @@ non_generic_drop_glue0[Internal]
enum EnumWithDrop {
A(i32)
}

View file

@ -13,11 +13,11 @@
#![deny(dead_code)]
//~ TRANS_ITEM fn core::ptr[0]::drop_in_place[0]<transitive_drop_glue::Root[0]> @@ transitive_drop_glue.cgu-0[Internal]
//~ TRANS_ITEM fn core::ptr[0]::drop_in_place[0]<transitive_drop_glue::Root[0]> @@ transitive_drop_glue0[Internal]
struct Root(Intermediate);
//~ TRANS_ITEM fn core::ptr[0]::drop_in_place[0]<transitive_drop_glue::Intermediate[0]> @@ transitive_drop_glue.cgu-0[Internal]
//~ TRANS_ITEM fn core::ptr[0]::drop_in_place[0]<transitive_drop_glue::Intermediate[0]> @@ transitive_drop_glue0[Internal]
struct Intermediate(Leaf);
//~ TRANS_ITEM fn core::ptr[0]::drop_in_place[0]<transitive_drop_glue::Leaf[0]> @@ transitive_drop_glue.cgu-0[Internal]
//~ TRANS_ITEM fn core::ptr[0]::drop_in_place[0]<transitive_drop_glue::Leaf[0]> @@ transitive_drop_glue0[Internal]
struct Leaf;
impl Drop for Leaf {
@ -38,15 +38,15 @@ fn main() {
let _ = Root(Intermediate(Leaf));
//~ TRANS_ITEM fn core::ptr[0]::drop_in_place[0]<transitive_drop_glue::RootGen[0]<u32>> @@ transitive_drop_glue.cgu-0[Internal]
//~ TRANS_ITEM fn core::ptr[0]::drop_in_place[0]<transitive_drop_glue::IntermediateGen[0]<u32>> @@ transitive_drop_glue.cgu-0[Internal]
//~ TRANS_ITEM fn core::ptr[0]::drop_in_place[0]<transitive_drop_glue::LeafGen[0]<u32>> @@ transitive_drop_glue.cgu-0[Internal]
//~ TRANS_ITEM fn core::ptr[0]::drop_in_place[0]<transitive_drop_glue::RootGen[0]<u32>> @@ transitive_drop_glue0[Internal]
//~ TRANS_ITEM fn core::ptr[0]::drop_in_place[0]<transitive_drop_glue::IntermediateGen[0]<u32>> @@ transitive_drop_glue0[Internal]
//~ TRANS_ITEM fn core::ptr[0]::drop_in_place[0]<transitive_drop_glue::LeafGen[0]<u32>> @@ transitive_drop_glue0[Internal]
//~ TRANS_ITEM fn transitive_drop_glue::{{impl}}[1]::drop[0]<u32>
let _ = RootGen(IntermediateGen(LeafGen(0u32)));
//~ TRANS_ITEM fn core::ptr[0]::drop_in_place[0]<transitive_drop_glue::RootGen[0]<i16>> @@ transitive_drop_glue.cgu-0[Internal]
//~ TRANS_ITEM fn core::ptr[0]::drop_in_place[0]<transitive_drop_glue::IntermediateGen[0]<i16>> @@ transitive_drop_glue.cgu-0[Internal]
//~ TRANS_ITEM fn core::ptr[0]::drop_in_place[0]<transitive_drop_glue::LeafGen[0]<i16>> @@ transitive_drop_glue.cgu-0[Internal]
//~ TRANS_ITEM fn core::ptr[0]::drop_in_place[0]<transitive_drop_glue::RootGen[0]<i16>> @@ transitive_drop_glue0[Internal]
//~ TRANS_ITEM fn core::ptr[0]::drop_in_place[0]<transitive_drop_glue::IntermediateGen[0]<i16>> @@ transitive_drop_glue0[Internal]
//~ TRANS_ITEM fn core::ptr[0]::drop_in_place[0]<transitive_drop_glue::LeafGen[0]<i16>> @@ transitive_drop_glue0[Internal]
//~ TRANS_ITEM fn transitive_drop_glue::{{impl}}[1]::drop[0]<i16>
let _ = RootGen(IntermediateGen(LeafGen(0i16)));
}

View file

@ -13,7 +13,7 @@
#![deny(dead_code)]
//~ TRANS_ITEM fn core::ptr[0]::drop_in_place[0]<tuple_drop_glue::Dropped[0]> @@ tuple_drop_glue.cgu-0[Internal]
//~ TRANS_ITEM fn core::ptr[0]::drop_in_place[0]<tuple_drop_glue::Dropped[0]> @@ tuple_drop_glue0[Internal]
struct Dropped;
impl Drop for Dropped {
@ -23,10 +23,10 @@ impl Drop for Dropped {
//~ TRANS_ITEM fn tuple_drop_glue::main[0]
fn main() {
//~ TRANS_ITEM fn core::ptr[0]::drop_in_place[0]<(u32, tuple_drop_glue::Dropped[0])> @@ tuple_drop_glue.cgu-0[Internal]
//~ TRANS_ITEM fn core::ptr[0]::drop_in_place[0]<(u32, tuple_drop_glue::Dropped[0])> @@ tuple_drop_glue0[Internal]
let x = (0u32, Dropped);
//~ TRANS_ITEM fn core::ptr[0]::drop_in_place[0]<(i16, (tuple_drop_glue::Dropped[0], bool))> @@ tuple_drop_glue.cgu-0[Internal]
//~ TRANS_ITEM fn core::ptr[0]::drop_in_place[0]<(tuple_drop_glue::Dropped[0], bool)> @@ tuple_drop_glue.cgu-0[Internal]
//~ TRANS_ITEM fn core::ptr[0]::drop_in_place[0]<(i16, (tuple_drop_glue::Dropped[0], bool))> @@ tuple_drop_glue0[Internal]
//~ TRANS_ITEM fn core::ptr[0]::drop_in_place[0]<(tuple_drop_glue::Dropped[0], bool)> @@ tuple_drop_glue0[Internal]
let x = (0i16, (Dropped, true));
}

View file

@ -57,13 +57,13 @@ fn main()
{
// simple case
let bool_sized = &true;
//~ TRANS_ITEM fn core::ptr[0]::drop_in_place[0]<bool> @@ unsizing.cgu-0[Internal]
//~ TRANS_ITEM fn core::ptr[0]::drop_in_place[0]<bool> @@ unsizing0[Internal]
//~ TRANS_ITEM fn unsizing::{{impl}}[0]::foo[0]
let _bool_unsized = bool_sized as &Trait;
let char_sized = &'a';
//~ TRANS_ITEM fn core::ptr[0]::drop_in_place[0]<char> @@ unsizing.cgu-0[Internal]
//~ TRANS_ITEM fn core::ptr[0]::drop_in_place[0]<char> @@ unsizing0[Internal]
//~ TRANS_ITEM fn unsizing::{{impl}}[1]::foo[0]
let _char_unsized = char_sized as &Trait;
@ -73,13 +73,13 @@ fn main()
_b: 2,
_c: 3.0f64
};
//~ TRANS_ITEM fn core::ptr[0]::drop_in_place[0]<f64> @@ unsizing.cgu-0[Internal]
//~ TRANS_ITEM fn core::ptr[0]::drop_in_place[0]<f64> @@ unsizing0[Internal]
//~ TRANS_ITEM fn unsizing::{{impl}}[2]::foo[0]
let _struct_unsized = struct_sized as &Struct<Trait>;
// custom coercion
let wrapper_sized = Wrapper(&0u32);
//~ TRANS_ITEM fn core::ptr[0]::drop_in_place[0]<u32> @@ unsizing.cgu-0[Internal]
//~ TRANS_ITEM fn core::ptr[0]::drop_in_place[0]<u32> @@ unsizing0[Internal]
//~ TRANS_ITEM fn unsizing::{{impl}}[3]::foo[0]
let _wrapper_sized = wrapper_sized as Wrapper<Trait>;
}

View file

@ -2,5 +2,5 @@
all:
$(RUSTC) -C extra-filename=bar foo.rs -C save-temps
rm $(TMPDIR)/foobar.0.o
rm $(TMPDIR)/foobar.foo0.rust-cgu.o
rm $(TMPDIR)/$(call BIN,foobar)

View file

@ -6,4 +6,4 @@
all:
$(RUSTC) cci_lib.rs
$(RUSTC) foo.rs --emit=llvm-ir -C codegen-units=3
[ "$$(cat "$(TMPDIR)"/foo.?.ll | grep -c define\ .*cci_fn)" -eq "2" ]
[ "$$(cat "$(TMPDIR)"/foo.*.ll | grep -c define\ .*cci_fn)" -eq "2" ]

View file

@ -8,7 +8,7 @@
all:
$(RUSTC) foo.rs --emit=llvm-ir -C codegen-units=3
[ "$$(cat "$(TMPDIR)"/foo.?.ll | grep -c define\ i32\ .*inlined)" -eq "0" ]
[ "$$(cat "$(TMPDIR)"/foo.?.ll | grep -c define\ internal\ i32\ .*inlined)" -eq "2" ]
[ "$$(cat "$(TMPDIR)"/foo.?.ll | grep -c define\ hidden\ i32\ .*normal)" -eq "1" ]
[ "$$(cat "$(TMPDIR)"/foo.?.ll | grep -c declare\ hidden\ i32\ .*normal)" -eq "2" ]
[ "$$(cat "$(TMPDIR)"/foo.*.ll | grep -c define\ i32\ .*inlined)" -eq "0" ]
[ "$$(cat "$(TMPDIR)"/foo.*.ll | grep -c define\ internal\ i32\ .*inlined)" -eq "2" ]
[ "$$(cat "$(TMPDIR)"/foo.*.ll | grep -c define\ hidden\ i32\ .*normal)" -eq "1" ]
[ "$$(cat "$(TMPDIR)"/foo.*.ll | grep -c declare\ hidden\ i32\ .*normal)" -eq "2" ]

View file

@ -6,4 +6,4 @@
all:
$(RUSTC) foo.rs --emit=llvm-ir -C codegen-units=3
[ "$$(cat "$(TMPDIR)"/foo.?.ll | grep -c define\ .*magic_fn)" -eq "3" ]
[ "$$(cat "$(TMPDIR)"/foo.*.ll | grep -c define\ .*magic_fn)" -eq "3" ]

View file

@ -0,0 +1,17 @@
// Copyright 2017 The Rust Project Developers. See the COPYRIGHT
// file at the top-level directory of this distribution and at
// http://rust-lang.org/COPYRIGHT.
//
// Licensed under the Apache License, Version 2.0 <LICENSE-APACHE or
// http://www.apache.org/licenses/LICENSE-2.0> or the MIT license
// <LICENSE-MIT or http://opensource.org/licenses/MIT>, at your
// option. This file may not be copied, modified, or distributed
// except according to those terms.
// no-prefer-dynamic
#![crate_type = "rlib"]
pub fn bar() -> u32 {
3
}

View file

@ -0,0 +1,39 @@
// Copyright 2017 The Rust Project Developers. See the COPYRIGHT
// file at the top-level directory of this distribution and at
// http://rust-lang.org/COPYRIGHT.
//
// Licensed under the Apache License, Version 2.0 <LICENSE-APACHE or
// http://www.apache.org/licenses/LICENSE-2.0> or the MIT license
// <LICENSE-MIT or http://opensource.org/licenses/MIT>, at your
// option. This file may not be copied, modified, or distributed
// except according to those terms.
// compile-flags: -Z thinlto -C codegen-units=8 -O
// min-llvm-version 4.0
// ignore-emscripten
// We want to assert here that ThinLTO will inline across codegen units. There's
// not really a great way to do that in general so we sort of hack around it by
// praying two functions go into separate codegen units and then assuming that
// if inlining *doesn't* happen the first byte of the functions will differ.
pub fn foo() -> u32 {
bar::bar()
}
mod bar {
pub fn bar() -> u32 {
3
}
}
fn main() {
println!("{} {}", foo(), bar::bar());
unsafe {
let foo = foo as usize as *const u8;
let bar = bar::bar as usize as *const u8;
assert_eq!(*foo, *bar);
}
}

View file

@ -0,0 +1,38 @@
// Copyright 2017 The Rust Project Developers. See the COPYRIGHT
// file at the top-level directory of this distribution and at
// http://rust-lang.org/COPYRIGHT.
//
// Licensed under the Apache License, Version 2.0 <LICENSE-APACHE or
// http://www.apache.org/licenses/LICENSE-2.0> or the MIT license
// <LICENSE-MIT or http://opensource.org/licenses/MIT>, at your
// option. This file may not be copied, modified, or distributed
// except according to those terms.
// compile-flags: -Z thinlto -C codegen-units=8 -O -C lto
// aux-build:thin-lto-inlines-aux.rs
// min-llvm-version 4.0
// no-prefer-dynamic
// ignore-emscripten
// We want to assert here that ThinLTO will inline across codegen units. There's
// not really a great way to do that in general so we sort of hack around it by
// praying two functions go into separate codegen units and then assuming that
// if inlining *doesn't* happen the first byte of the functions will differ.
extern crate thin_lto_inlines_aux as bar;
pub fn foo() -> u32 {
bar::bar()
}
fn main() {
println!("{} {}", foo(), bar::bar());
unsafe {
let foo = foo as usize as *const u8;
let bar = bar::bar as usize as *const u8;
assert_eq!(*foo, *bar);
}
}