Fix multi-cgu+debug builds using autodiff by delaying autodiff till lto
This commit is contained in:
parent
8340622e14
commit
c89a89bb14
2 changed files with 13 additions and 5 deletions
|
|
@ -812,12 +812,12 @@ extern "C" LLVMRustResult LLVMRustOptimize(
|
||||||
auto ThinLTOBuffer = std::make_unique<LLVMRustThinLTOBuffer>();
|
auto ThinLTOBuffer = std::make_unique<LLVMRustThinLTOBuffer>();
|
||||||
raw_string_ostream ThinLTODataOS(ThinLTOBuffer->data);
|
raw_string_ostream ThinLTODataOS(ThinLTOBuffer->data);
|
||||||
raw_string_ostream ThinLinkDataOS(ThinLTOBuffer->thin_link_data);
|
raw_string_ostream ThinLinkDataOS(ThinLTOBuffer->thin_link_data);
|
||||||
|
bool IsLTO = OptStage == LLVMRustOptStage::ThinLTO ||
|
||||||
|
OptStage == LLVMRustOptStage::FatLTO;
|
||||||
if (!NoPrepopulatePasses) {
|
if (!NoPrepopulatePasses) {
|
||||||
// The pre-link pipelines don't support O0 and require using
|
// The pre-link pipelines don't support O0 and require using
|
||||||
// buildO0DefaultPipeline() instead. At the same time, the LTO pipelines do
|
// buildO0DefaultPipeline() instead. At the same time, the LTO pipelines do
|
||||||
// support O0 and using them is required.
|
// support O0 and using them is required.
|
||||||
bool IsLTO = OptStage == LLVMRustOptStage::ThinLTO ||
|
|
||||||
OptStage == LLVMRustOptStage::FatLTO;
|
|
||||||
if (OptLevel == OptimizationLevel::O0 && !IsLTO) {
|
if (OptLevel == OptimizationLevel::O0 && !IsLTO) {
|
||||||
for (const auto &C : PipelineStartEPCallbacks)
|
for (const auto &C : PipelineStartEPCallbacks)
|
||||||
PB.registerPipelineStartEPCallback(C);
|
PB.registerPipelineStartEPCallback(C);
|
||||||
|
|
@ -908,7 +908,10 @@ extern "C" LLVMRustResult LLVMRustOptimize(
|
||||||
|
|
||||||
// now load "-enzyme" pass:
|
// now load "-enzyme" pass:
|
||||||
// With dlopen, ENZYME macro may not be defined, so check EnzymePtr directly
|
// With dlopen, ENZYME macro may not be defined, so check EnzymePtr directly
|
||||||
if (EnzymePtr) {
|
// In the case of debug builds with multiple codegen units, we might not
|
||||||
|
// have all function definitions available during the early compiler
|
||||||
|
// invocations. We therefore wait for the final lto step to run Enzyme.
|
||||||
|
if (EnzymePtr && IsLTO) {
|
||||||
|
|
||||||
if (PrintBeforeEnzyme) {
|
if (PrintBeforeEnzyme) {
|
||||||
// Handle the Rust flag `-Zautodiff=PrintModBefore`.
|
// Handle the Rust flag `-Zautodiff=PrintModBefore`.
|
||||||
|
|
@ -929,6 +932,7 @@ extern "C" LLVMRustResult LLVMRustOptimize(
|
||||||
MPM.addPass(PrintModulePass(outs(), Banner, true, false));
|
MPM.addPass(PrintModulePass(outs(), Banner, true, false));
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
if (PrintPasses) {
|
if (PrintPasses) {
|
||||||
// Print all passes from the PM:
|
// Print all passes from the PM:
|
||||||
std::string Pipeline;
|
std::string Pipeline;
|
||||||
|
|
|
||||||
|
|
@ -1,6 +1,6 @@
|
||||||
//@ revisions: DEBUG RELEASE
|
//@ revisions: DEBUG RELEASE
|
||||||
//@[RELEASE] compile-flags: -Zautodiff=Enable,NoTT -C opt-level=3 -Clto=fat
|
//@[RELEASE] compile-flags: -Zautodiff=Enable,NoTT -C opt-level=3 -Clto=fat
|
||||||
//@[DEBUG] compile-flags: -Zautodiff=Enable,NoTT -C opt-level=0 -Clto=fat -C debuginfo=2
|
//@[DEBUG] compile-flags: -Zautodiff=Enable,NoTT -Copt-level=0 -Clto=fat -Cdebuginfo=2 -Ccodegen-units=8
|
||||||
//@ needs-enzyme
|
//@ needs-enzyme
|
||||||
//@ incremental
|
//@ incremental
|
||||||
//@ no-prefer-dynamic
|
//@ no-prefer-dynamic
|
||||||
|
|
@ -13,6 +13,10 @@
|
||||||
// dropped. We now use globals instead and add this test to verify that incremental
|
// dropped. We now use globals instead and add this test to verify that incremental
|
||||||
// keeps working. Also testing debug mode while at it.
|
// keeps working. Also testing debug mode while at it.
|
||||||
|
|
||||||
|
// We extended this test to use 8 codegen-units in debug mode and call an intrinsic like powi,
|
||||||
|
// rather than just simple arithmetic. This caused a compilation failure, since the definition of
|
||||||
|
// the intrinsic was not available in the same cgu as the function being differentiated.
|
||||||
|
|
||||||
use std::autodiff::autodiff_reverse;
|
use std::autodiff::autodiff_reverse;
|
||||||
|
|
||||||
#[autodiff_reverse(bar, Duplicated, Duplicated)]
|
#[autodiff_reverse(bar, Duplicated, Duplicated)]
|
||||||
|
|
@ -20,7 +24,7 @@ pub fn foo(r: &[f64; 10], res: &mut f64) {
|
||||||
let mut output = [0.0; 10];
|
let mut output = [0.0; 10];
|
||||||
output[0] = r[0];
|
output[0] = r[0];
|
||||||
output[1] = r[1] * r[2];
|
output[1] = r[1] * r[2];
|
||||||
output[2] = r[4] * r[5];
|
output[2] = r[4] * r[5].powi(2);
|
||||||
output[3] = r[2] * r[6];
|
output[3] = r[2] * r[6];
|
||||||
output[4] = r[1] * r[7];
|
output[4] = r[1] * r[7];
|
||||||
output[5] = r[2] * r[8];
|
output[5] = r[2] * r[8];
|
||||||
|
|
|
||||||
Loading…
Add table
Add a link
Reference in a new issue