Fix multi-cgu+debug builds using autodiff by delaying autodiff till lto
This commit is contained in:
parent
8340622e14
commit
c89a89bb14
2 changed files with 13 additions and 5 deletions
|
|
@ -812,12 +812,12 @@ extern "C" LLVMRustResult LLVMRustOptimize(
|
|||
auto ThinLTOBuffer = std::make_unique<LLVMRustThinLTOBuffer>();
|
||||
raw_string_ostream ThinLTODataOS(ThinLTOBuffer->data);
|
||||
raw_string_ostream ThinLinkDataOS(ThinLTOBuffer->thin_link_data);
|
||||
bool IsLTO = OptStage == LLVMRustOptStage::ThinLTO ||
|
||||
OptStage == LLVMRustOptStage::FatLTO;
|
||||
if (!NoPrepopulatePasses) {
|
||||
// The pre-link pipelines don't support O0 and require using
|
||||
// buildO0DefaultPipeline() instead. At the same time, the LTO pipelines do
|
||||
// support O0 and using them is required.
|
||||
bool IsLTO = OptStage == LLVMRustOptStage::ThinLTO ||
|
||||
OptStage == LLVMRustOptStage::FatLTO;
|
||||
if (OptLevel == OptimizationLevel::O0 && !IsLTO) {
|
||||
for (const auto &C : PipelineStartEPCallbacks)
|
||||
PB.registerPipelineStartEPCallback(C);
|
||||
|
|
@ -908,7 +908,10 @@ extern "C" LLVMRustResult LLVMRustOptimize(
|
|||
|
||||
// now load "-enzyme" pass:
|
||||
// With dlopen, ENZYME macro may not be defined, so check EnzymePtr directly
|
||||
if (EnzymePtr) {
|
||||
// In the case of debug builds with multiple codegen units, we might not
|
||||
// have all function definitions available during the early compiler
|
||||
// invocations. We therefore wait for the final lto step to run Enzyme.
|
||||
if (EnzymePtr && IsLTO) {
|
||||
|
||||
if (PrintBeforeEnzyme) {
|
||||
// Handle the Rust flag `-Zautodiff=PrintModBefore`.
|
||||
|
|
@ -929,6 +932,7 @@ extern "C" LLVMRustResult LLVMRustOptimize(
|
|||
MPM.addPass(PrintModulePass(outs(), Banner, true, false));
|
||||
}
|
||||
}
|
||||
|
||||
if (PrintPasses) {
|
||||
// Print all passes from the PM:
|
||||
std::string Pipeline;
|
||||
|
|
|
|||
|
|
@ -1,6 +1,6 @@
|
|||
//@ revisions: DEBUG RELEASE
|
||||
//@[RELEASE] compile-flags: -Zautodiff=Enable,NoTT -C opt-level=3 -Clto=fat
|
||||
//@[DEBUG] compile-flags: -Zautodiff=Enable,NoTT -C opt-level=0 -Clto=fat -C debuginfo=2
|
||||
//@[DEBUG] compile-flags: -Zautodiff=Enable,NoTT -Copt-level=0 -Clto=fat -Cdebuginfo=2 -Ccodegen-units=8
|
||||
//@ needs-enzyme
|
||||
//@ incremental
|
||||
//@ no-prefer-dynamic
|
||||
|
|
@ -13,6 +13,10 @@
|
|||
// dropped. We now use globals instead and add this test to verify that incremental
|
||||
// keeps working. Also testing debug mode while at it.
|
||||
|
||||
// We extended this test to use 8 codegen-units in debug mode and call an intrinsic like powi,
|
||||
// rather than just simple arithmetic. This caused a compilation failure, since the definition of
|
||||
// the intrinsic was not available in the same cgu as the function being differentiated.
|
||||
|
||||
use std::autodiff::autodiff_reverse;
|
||||
|
||||
#[autodiff_reverse(bar, Duplicated, Duplicated)]
|
||||
|
|
@ -20,7 +24,7 @@ pub fn foo(r: &[f64; 10], res: &mut f64) {
|
|||
let mut output = [0.0; 10];
|
||||
output[0] = r[0];
|
||||
output[1] = r[1] * r[2];
|
||||
output[2] = r[4] * r[5];
|
||||
output[2] = r[4] * r[5].powi(2);
|
||||
output[3] = r[2] * r[6];
|
||||
output[4] = r[1] * r[7];
|
||||
output[5] = r[2] * r[8];
|
||||
|
|
|
|||
Loading…
Add table
Add a link
Reference in a new issue