@@ -62,8 +62,6 @@ STATISTIC(NumUnscaledPairCreated,
62
62
" Number of load/store from unscaled generated" );
63
63
STATISTIC (NumZeroStoresPromoted, " Number of narrow zero stores promoted" );
64
64
STATISTIC (NumLoadsFromStoresPromoted, " Number of loads from stores promoted" );
65
- STATISTIC (NumConstOffsetFolded,
66
- " Number of const offset of index address folded" );
67
65
68
66
DEBUG_COUNTER (RegRenamingCounter, DEBUG_TYPE " -reg-renaming" ,
69
67
" Controls which pairs are considered for renaming" );
@@ -77,11 +75,6 @@ static cl::opt<unsigned> LdStLimit("aarch64-load-store-scan-limit",
77
75
static cl::opt<unsigned > UpdateLimit (" aarch64-update-scan-limit" , cl::init(100 ),
78
76
cl::Hidden);
79
77
80
- // The LdStConstLimit limits how far we search for const offset instructions
81
- // when we form index address load/store instructions.
82
- static cl::opt<unsigned > LdStConstLimit (" aarch64-load-store-const-scan-limit" ,
83
- cl::init (10 ), cl::Hidden);
84
-
85
78
// Enable register renaming to find additional store pairing opportunities.
86
79
static cl::opt<bool > EnableRenaming (" aarch64-load-store-renaming" ,
87
80
cl::init (true ), cl::Hidden);
@@ -178,13 +171,6 @@ struct AArch64LoadStoreOpt : public MachineFunctionPass {
178
171
findMatchingUpdateInsnForward (MachineBasicBlock::iterator I,
179
172
int UnscaledOffset, unsigned Limit);
180
173
181
- // Scan the instruction list to find a register assigned with a const
182
- // value that can be combined with the current instruction (a load or store)
183
- // using base addressing with writeback. Scan forwards.
184
- MachineBasicBlock::iterator
185
- findMatchingConstOffsetBackward (MachineBasicBlock::iterator I, unsigned Limit,
186
- unsigned &Offset);
187
-
188
174
// Scan the instruction list to find a base register update that can
189
175
// be combined with the current instruction (a load or store) using
190
176
// pre or post indexed addressing with writeback. Scan backwards.
@@ -196,19 +182,11 @@ struct AArch64LoadStoreOpt : public MachineFunctionPass {
196
182
bool isMatchingUpdateInsn (MachineInstr &MemMI, MachineInstr &MI,
197
183
unsigned BaseReg, int Offset);
198
184
199
- bool isMatchingMovConstInsn (MachineInstr &MemMI, MachineInstr &MI,
200
- unsigned IndexReg, unsigned &Offset);
201
-
202
185
// Merge a pre- or post-index base register update into a ld/st instruction.
203
186
MachineBasicBlock::iterator
204
187
mergeUpdateInsn (MachineBasicBlock::iterator I,
205
188
MachineBasicBlock::iterator Update, bool IsPreIdx);
206
189
207
- MachineBasicBlock::iterator
208
- mergeConstOffsetInsn (MachineBasicBlock::iterator I,
209
- MachineBasicBlock::iterator Update, unsigned Offset,
210
- int Scale);
211
-
212
190
// Find and merge zero store instructions.
213
191
bool tryToMergeZeroStInst (MachineBasicBlock::iterator &MBBI);
214
192
@@ -221,9 +199,6 @@ struct AArch64LoadStoreOpt : public MachineFunctionPass {
221
199
// Find and merge a base register updates before or after a ld/st instruction.
222
200
bool tryToMergeLdStUpdate (MachineBasicBlock::iterator &MBBI);
223
201
224
- // Find and merge a index ldr/st instructions into a base ld/st instruction.
225
- bool tryToMergeIndexLdSt (MachineBasicBlock::iterator &MBBI, int Scale);
226
-
227
202
bool optimizeBlock (MachineBasicBlock &MBB, bool EnableNarrowZeroStOpt);
228
203
229
204
bool runOnMachineFunction (MachineFunction &Fn) override ;
@@ -506,16 +481,6 @@ static unsigned getPreIndexedOpcode(unsigned Opc) {
506
481
}
507
482
}
508
483
509
- static unsigned getBaseAddressOpcode (unsigned Opc) {
510
- // TODO: Add more index address loads/stores.
511
- switch (Opc) {
512
- default :
513
- llvm_unreachable (" Opcode has no base address equivalent!" );
514
- case AArch64::LDRBBroX:
515
- return AArch64::LDRBBui;
516
- }
517
- }
518
-
519
484
static unsigned getPostIndexedOpcode (unsigned Opc) {
520
485
switch (Opc) {
521
486
default :
@@ -757,20 +722,6 @@ static bool isMergeableLdStUpdate(MachineInstr &MI) {
757
722
}
758
723
}
759
724
760
- // Make sure this is a reg+reg Ld/St
761
- static bool isMergeableIndexLdSt (MachineInstr &MI, int &Scale) {
762
- unsigned Opc = MI.getOpcode ();
763
- switch (Opc) {
764
- default :
765
- return false ;
766
- // Scaled instructions.
767
- // TODO: Add more index address loads/stores.
768
- case AArch64::LDRBBroX:
769
- Scale = 1 ;
770
- return true ;
771
- }
772
- }
773
-
774
725
static bool isRewritableImplicitDef (unsigned Opc) {
775
726
switch (Opc) {
776
727
default :
@@ -2097,63 +2048,6 @@ AArch64LoadStoreOpt::mergeUpdateInsn(MachineBasicBlock::iterator I,
2097
2048
return NextI;
2098
2049
}
2099
2050
2100
- MachineBasicBlock::iterator
2101
- AArch64LoadStoreOpt::mergeConstOffsetInsn (MachineBasicBlock::iterator I,
2102
- MachineBasicBlock::iterator Update,
2103
- unsigned Offset, int Scale) {
2104
- assert ((Update->getOpcode () == AArch64::MOVKWi) &&
2105
- " Unexpected const mov instruction to merge!" );
2106
- MachineBasicBlock::iterator E = I->getParent ()->end ();
2107
- MachineBasicBlock::iterator NextI = next_nodbg (I, E);
2108
- MachineBasicBlock::iterator PrevI = prev_nodbg (Update, E);
2109
- MachineInstr &MemMI = *I;
2110
- unsigned Mask = (1 << 12 ) * Scale - 1 ;
2111
- unsigned Low = Offset & Mask;
2112
- unsigned High = Offset - Low;
2113
- Register BaseReg = AArch64InstrInfo::getLdStBaseOp (MemMI).getReg ();
2114
- Register IndexReg = AArch64InstrInfo::getLdStOffsetOp (MemMI).getReg ();
2115
- MachineInstrBuilder AddMIB, MemMIB;
2116
-
2117
- // Add IndexReg, BaseReg, High (the BaseReg may be SP)
2118
- AddMIB =
2119
- BuildMI (*I->getParent (), I, I->getDebugLoc (), TII->get (AArch64::ADDXri))
2120
- .addDef (IndexReg)
2121
- .addUse (BaseReg)
2122
- .addImm (High >> 12 ) // shifted value
2123
- .addImm (12 ); // shift 12
2124
- (void )AddMIB;
2125
- // Ld/St DestReg, IndexReg, Imm12
2126
- unsigned NewOpc = getBaseAddressOpcode (I->getOpcode ());
2127
- MemMIB = BuildMI (*I->getParent (), I, I->getDebugLoc (), TII->get (NewOpc))
2128
- .add (getLdStRegOp (MemMI))
2129
- .add (AArch64InstrInfo::getLdStOffsetOp (MemMI))
2130
- .addImm (Low / Scale)
2131
- .setMemRefs (I->memoperands ())
2132
- .setMIFlags (I->mergeFlagsWith (*Update));
2133
- (void )MemMIB;
2134
-
2135
- ++NumConstOffsetFolded;
2136
- LLVM_DEBUG (dbgs () << " Creating base address load/store.\n " );
2137
- LLVM_DEBUG (dbgs () << " Replacing instructions:\n " );
2138
- LLVM_DEBUG (PrevI->print (dbgs ()));
2139
- LLVM_DEBUG (dbgs () << " " );
2140
- LLVM_DEBUG (Update->print (dbgs ()));
2141
- LLVM_DEBUG (dbgs () << " " );
2142
- LLVM_DEBUG (I->print (dbgs ()));
2143
- LLVM_DEBUG (dbgs () << " with instruction:\n " );
2144
- LLVM_DEBUG (((MachineInstr *)AddMIB)->print (dbgs ()));
2145
- LLVM_DEBUG (dbgs () << " " );
2146
- LLVM_DEBUG (((MachineInstr *)MemMIB)->print (dbgs ()));
2147
- LLVM_DEBUG (dbgs () << " \n " );
2148
-
2149
- // Erase the old instructions for the block.
2150
- I->eraseFromParent ();
2151
- PrevI->eraseFromParent ();
2152
- Update->eraseFromParent ();
2153
-
2154
- return NextI;
2155
- }
2156
-
2157
2051
bool AArch64LoadStoreOpt::isMatchingUpdateInsn (MachineInstr &MemMI,
2158
2052
MachineInstr &MI,
2159
2053
unsigned BaseReg, int Offset) {
@@ -2201,31 +2095,6 @@ bool AArch64LoadStoreOpt::isMatchingUpdateInsn(MachineInstr &MemMI,
2201
2095
return false ;
2202
2096
}
2203
2097
2204
- bool AArch64LoadStoreOpt::isMatchingMovConstInsn (MachineInstr &MemMI,
2205
- MachineInstr &MI,
2206
- unsigned IndexReg,
2207
- unsigned &Offset) {
2208
- // The update instruction source and destination register must be the
2209
- // same as the load/store index register.
2210
- if (MI.getOpcode () == AArch64::MOVKWi &&
2211
- TRI->isSuperOrSubRegisterEq (IndexReg, MI.getOperand (1 ).getReg ())) {
2212
-
2213
- // movz + movk hold a large offset of a Ld/St instruction.
2214
- MachineBasicBlock::iterator B = MI.getParent ()->begin ();
2215
- MachineBasicBlock::iterator MBBI = &MI;
2216
- MBBI = prev_nodbg (MBBI, B);
2217
- MachineInstr &MovzMI = *MBBI;
2218
- if (MovzMI.getOpcode () == AArch64::MOVZWi) {
2219
- unsigned Low = MovzMI.getOperand (1 ).getImm ();
2220
- unsigned High = MI.getOperand (2 ).getImm () << MI.getOperand (3 ).getImm ();
2221
- Offset = High + Low;
2222
- // 12-bit optionally shifted immediates are legal for adds.
2223
- return Offset >> 24 == 0 ;
2224
- }
2225
- }
2226
- return false ;
2227
- }
2228
-
2229
2098
MachineBasicBlock::iterator AArch64LoadStoreOpt::findMatchingUpdateInsnForward (
2230
2099
MachineBasicBlock::iterator I, int UnscaledOffset, unsigned Limit) {
2231
2100
MachineBasicBlock::iterator E = I->getParent ()->end ();
@@ -2381,60 +2250,6 @@ MachineBasicBlock::iterator AArch64LoadStoreOpt::findMatchingUpdateInsnBackward(
2381
2250
return E;
2382
2251
}
2383
2252
2384
- MachineBasicBlock::iterator
2385
- AArch64LoadStoreOpt::findMatchingConstOffsetBackward (
2386
- MachineBasicBlock::iterator I, unsigned Limit, unsigned &Offset) {
2387
- MachineBasicBlock::iterator B = I->getParent ()->begin ();
2388
- MachineBasicBlock::iterator E = I->getParent ()->end ();
2389
- MachineInstr &MemMI = *I;
2390
- MachineBasicBlock::iterator MBBI = I;
2391
-
2392
- // If the load is the first instruction in the block, there's obviously
2393
- // not any matching load or store.
2394
- if (MBBI == B)
2395
- return E;
2396
-
2397
- // Make sure the IndexReg is killed and the shift amount is zero.
2398
- // TODO: Relex this restriction to extend, simplify processing now.
2399
- if (!AArch64InstrInfo::getLdStOffsetOp (MemMI).isKill () ||
2400
- !AArch64InstrInfo::getLdStAmountOp (MemMI).isImm () ||
2401
- (AArch64InstrInfo::getLdStAmountOp (MemMI).getImm () != 0 ))
2402
- return E;
2403
-
2404
- Register IndexReg = AArch64InstrInfo::getLdStOffsetOp (MemMI).getReg ();
2405
-
2406
- // Track which register units have been modified and used between the first
2407
- // insn (inclusive) and the second insn.
2408
- ModifiedRegUnits.clear ();
2409
- UsedRegUnits.clear ();
2410
- unsigned Count = 0 ;
2411
- do {
2412
- MBBI = prev_nodbg (MBBI, B);
2413
- MachineInstr &MI = *MBBI;
2414
-
2415
- // Don't count transient instructions towards the search limit since there
2416
- // may be different numbers of them if e.g. debug information is present.
2417
- if (!MI.isTransient ())
2418
- ++Count;
2419
-
2420
- // If we found a match, return it.
2421
- if (isMatchingMovConstInsn (*I, MI, IndexReg, Offset)) {
2422
- return MBBI;
2423
- }
2424
-
2425
- // Update the status of what the instruction clobbered and used.
2426
- LiveRegUnits::accumulateUsedDefed (MI, ModifiedRegUnits, UsedRegUnits, TRI);
2427
-
2428
- // Otherwise, if the index register is used or modified, we have no match,
2429
- // so return early.
2430
- if (!ModifiedRegUnits.available (IndexReg) ||
2431
- !UsedRegUnits.available (IndexReg))
2432
- return E;
2433
-
2434
- } while (MBBI != B && Count < Limit);
2435
- return E;
2436
- }
2437
-
2438
2253
bool AArch64LoadStoreOpt::tryToPromoteLoadFromStore (
2439
2254
MachineBasicBlock::iterator &MBBI) {
2440
2255
MachineInstr &MI = *MBBI;
@@ -2619,34 +2434,6 @@ bool AArch64LoadStoreOpt::tryToMergeLdStUpdate
2619
2434
return false ;
2620
2435
}
2621
2436
2622
- bool AArch64LoadStoreOpt::tryToMergeIndexLdSt (MachineBasicBlock::iterator &MBBI,
2623
- int Scale) {
2624
- MachineInstr &MI = *MBBI;
2625
- MachineBasicBlock::iterator E = MI.getParent ()->end ();
2626
- MachineBasicBlock::iterator Update;
2627
-
2628
- // Don't know how to handle unscaled pre/post-index versions below, so bail.
2629
- if (TII->hasUnscaledLdStOffset (MI.getOpcode ()))
2630
- return false ;
2631
-
2632
- // Look back to try to find a const offset for index LdSt instruction. For
2633
- // example,
2634
- // mov x8, #LargeImm ; = a * (1<<12) + imm12
2635
- // ldr x1, [x0, x8]
2636
- // merged into:
2637
- // add x8, x0, a * (1<<12)
2638
- // ldr x1, [x8, imm12]
2639
- unsigned Offset;
2640
- Update = findMatchingConstOffsetBackward (MBBI, LdStConstLimit, Offset);
2641
- if (Update != E && (Offset & (Scale - 1 )) == 0 ) {
2642
- // Merge the imm12 into the ld/st.
2643
- MBBI = mergeConstOffsetInsn (MBBI, Update, Offset, Scale);
2644
- return true ;
2645
- }
2646
-
2647
- return false ;
2648
- }
2649
-
2650
2437
bool AArch64LoadStoreOpt::optimizeBlock (MachineBasicBlock &MBB,
2651
2438
bool EnableNarrowZeroStOpt) {
2652
2439
@@ -2725,22 +2512,6 @@ bool AArch64LoadStoreOpt::optimizeBlock(MachineBasicBlock &MBB,
2725
2512
++MBBI;
2726
2513
}
2727
2514
2728
- // 5) Find a register assigned with a const value that can be combined with
2729
- // into the load or store. e.g.,
2730
- // mov x8, #LargeImm ; = a * (1<<12) + imm12
2731
- // ldr x1, [x0, x8]
2732
- // ; becomes
2733
- // add x8, x0, a * (1<<12)
2734
- // ldr x1, [x8, imm12]
2735
- for (MachineBasicBlock::iterator MBBI = MBB.begin (), E = MBB.end ();
2736
- MBBI != E;) {
2737
- int Scale;
2738
- if (isMergeableIndexLdSt (*MBBI, Scale) && tryToMergeIndexLdSt (MBBI, Scale))
2739
- Modified = true ;
2740
- else
2741
- ++MBBI;
2742
- }
2743
-
2744
2515
return Modified;
2745
2516
}
2746
2517
0 commit comments