@@ -77,6 +77,11 @@ FilterPID("pid",
77
77
cl::Optional,
78
78
cl::cat(AggregatorCategory));
79
79
80
+ static cl::opt<bool > ImputeTraceFallthrough (
81
+ " impute-trace-fall-through" ,
82
+ cl::desc (" impute missing fall-throughs for branch-only traces" ),
83
+ cl::Optional, cl::cat(AggregatorCategory));
84
+
80
85
static cl::opt<bool >
81
86
IgnoreBuildID (" ignore-build-id" ,
82
87
cl::desc (" continue even if build-ids in input binary and perf.data mismatch" ),
@@ -513,6 +518,69 @@ void DataAggregator::parsePerfData(BinaryContext &BC) {
513
518
deleteTempFiles ();
514
519
}
515
520
521
+ void DataAggregator::imputeFallThroughs () {
522
+ if (Traces.empty ())
523
+ return ;
524
+
525
+ std::pair PrevBranch (Trace::EXTERNAL, Trace::EXTERNAL);
526
+ uint64_t AggregateCount = 0 ;
527
+ uint64_t AggregateFallthroughSize = 0 ;
528
+ uint64_t InferredTraces = 0 ;
529
+
530
+ // Helper map with whether the instruction is a call/ret/unconditional branch
531
+ std::unordered_map<uint64_t , bool > IsUncondCTMap;
532
+ auto checkUnconditionalControlTransfer = [&](const uint64_t Addr) {
533
+ auto isUncondCT = [&](const MCInst &MI) -> bool {
534
+ return BC->MIB ->isUnconditionalControlTransfer (MI);
535
+ };
536
+ return testAndSet<bool >(Addr, isUncondCT, IsUncondCTMap).value_or (true );
537
+ };
538
+
539
+ // Traces are sorted by their component addresses (Branch, From, To).
540
+ // assert(is_sorted(Traces));
541
+
542
+ // Traces corresponding to the top-of-stack branch entry with a missing
543
+ // fall-through have BR_ONLY(-1ULL/UINT64_MAX) in To field, meaning that for
544
+ // fixed values of Branch and From branch-only traces are stored after all
545
+ // traces with valid fall-through.
546
+ //
547
+ // Group traces by (Branch, From) and compute weighted average fall-through
548
+ // length for the top-of-stack trace (closing the group) by accumulating the
549
+ // fall-through lengths of traces with valid fall-throughs earlier in the
550
+ // group.
551
+ for (auto &[Trace, Info] : Traces) {
552
+ // Skip fall-throughs in external code.
553
+ if (Trace.From == Trace::EXTERNAL)
554
+ continue ;
555
+ std::pair CurrentBranch (Trace.Branch , Trace.From );
556
+ // BR_ONLY must be the last trace in the group
557
+ if (Trace.To == Trace::BR_ONLY) {
558
+ // If the group is not empty, use aggregate values, otherwise 0-length
559
+ // for unconditional jumps (call/ret/uncond branch) or 1-length for others
560
+ uint64_t InferredBytes =
561
+ PrevBranch == CurrentBranch
562
+ ? AggregateFallthroughSize / AggregateCount
563
+ : !checkUnconditionalControlTransfer (Trace.From );
564
+ Trace.To = Trace.From + InferredBytes;
565
+ LLVM_DEBUG (dbgs () << " imputed " << Trace << " (" << InferredBytes
566
+ << " bytes)\n " );
567
+ ++InferredTraces;
568
+ } else {
569
+ // Trace with a valid fall-through
570
+ // New group: reset aggregates.
571
+ if (CurrentBranch != PrevBranch)
572
+ AggregateCount = AggregateFallthroughSize = 0 ;
573
+ // Only use valid fall-through lengths
574
+ if (Trace.To != Trace::EXTERNAL)
575
+ AggregateFallthroughSize += (Trace.To - Trace.From ) * Info.TakenCount ;
576
+ AggregateCount += Info.TakenCount ;
577
+ }
578
+ PrevBranch = CurrentBranch;
579
+ }
580
+ if (opts::Verbosity >= 1 )
581
+ outs () << " BOLT-INFO: imputed " << InferredTraces << " traces\n " ;
582
+ }
583
+
516
584
Error DataAggregator::preprocessProfile (BinaryContext &BC) {
517
585
this ->BC = &BC;
518
586
@@ -525,6 +593,9 @@ Error DataAggregator::preprocessProfile(BinaryContext &BC) {
525
593
// Sort parsed traces for faster processing.
526
594
llvm::sort (Traces, llvm::less_first ());
527
595
596
+ if (opts::ImputeTraceFallthrough)
597
+ imputeFallThroughs ();
598
+
528
599
if (opts::HeatmapMode) {
529
600
if (std::error_code EC = printLBRHeatMap ())
530
601
return errorCodeToError (EC);
@@ -726,22 +797,10 @@ bool DataAggregator::doInterBranch(BinaryFunction *FromFunc,
726
797
}
727
798
728
799
bool DataAggregator::checkReturn (uint64_t Addr) {
729
- auto isReturn = [&](auto MI) { return MI && BC->MIB ->isReturn (*MI); };
730
- if (llvm::is_contained (Returns, Addr))
731
- return true ;
732
-
733
- BinaryFunction *Func = getBinaryFunctionContainingAddress (Addr);
734
- if (!Func)
735
- return false ;
736
-
737
- const uint64_t Offset = Addr - Func->getAddress ();
738
- if (Func->hasInstructions ()
739
- ? isReturn (Func->getInstructionAtOffset (Offset))
740
- : isReturn (Func->disassembleInstructionAtOffset (Offset))) {
741
- Returns.emplace (Addr);
742
- return true ;
743
- }
744
- return false ;
800
+ auto isReturn = [&](const MCInst &MI) -> bool {
801
+ return BC->MIB ->isReturn (MI);
802
+ };
803
+ return testAndSet<bool >(Addr, isReturn, Returns).value_or (false );
745
804
}
746
805
747
806
bool DataAggregator::doBranch (uint64_t From, uint64_t To, uint64_t Count,
@@ -1331,7 +1390,7 @@ std::error_code DataAggregator::parseAggregatedLBREntry() {
1331
1390
if (!Addr[0 ]->Offset )
1332
1391
Addr[0 ]->Offset = Trace::FT_EXTERNAL_RETURN;
1333
1392
else
1334
- Returns.emplace (Addr[0 ]->Offset );
1393
+ Returns.emplace (Addr[0 ]->Offset , true );
1335
1394
}
1336
1395
1337
1396
// / Record a trace.
@@ -1592,7 +1651,7 @@ void DataAggregator::processBranchEvents() {
1592
1651
NamedRegionTimer T (" processBranch" , " Processing branch events" ,
1593
1652
TimerGroupName, TimerGroupDesc, opts::TimeAggregator);
1594
1653
1595
- Returns.emplace (Trace::FT_EXTERNAL_RETURN);
1654
+ Returns.emplace (Trace::FT_EXTERNAL_RETURN, true );
1596
1655
for (const auto &[Trace, Info] : Traces) {
1597
1656
bool IsReturn = checkReturn (Trace.Branch );
1598
1657
// Ignore returns.
0 commit comments