Skip to content

Commit bab01ae

Browse files
davemgreentstellar
authored andcommittedJan 29, 2024
Revert "[AArch64] merge index address with large offset into base address"
This reverts commit 32878c2 due to #79756 and #76202. (cherry picked from commit 915c3d9)
1 parent 0680e84 commit bab01ae

File tree

5 files changed

+12
-250
lines changed

5 files changed

+12
-250
lines changed
 

‎llvm/lib/Target/AArch64/AArch64InstrInfo.cpp

-10
Original file line numberDiff line numberDiff line change
@@ -4098,16 +4098,6 @@ AArch64InstrInfo::getLdStOffsetOp(const MachineInstr &MI) {
40984098
return MI.getOperand(Idx);
40994099
}
41004100

4101-
const MachineOperand &
4102-
AArch64InstrInfo::getLdStAmountOp(const MachineInstr &MI) {
4103-
switch (MI.getOpcode()) {
4104-
default:
4105-
llvm_unreachable("Unexpected opcode");
4106-
case AArch64::LDRBBroX:
4107-
return MI.getOperand(4);
4108-
}
4109-
}
4110-
41114101
static const TargetRegisterClass *getRegClass(const MachineInstr &MI,
41124102
Register Reg) {
41134103
if (MI.getParent() == nullptr)

‎llvm/lib/Target/AArch64/AArch64InstrInfo.h

-3
Original file line numberDiff line numberDiff line change
@@ -111,9 +111,6 @@ class AArch64InstrInfo final : public AArch64GenInstrInfo {
111111
/// Returns the immediate offset operator of a load/store.
112112
static const MachineOperand &getLdStOffsetOp(const MachineInstr &MI);
113113

114-
/// Returns the shift amount operator of a load/store.
115-
static const MachineOperand &getLdStAmountOp(const MachineInstr &MI);
116-
117114
/// Returns whether the instruction is FP or NEON.
118115
static bool isFpOrNEON(const MachineInstr &MI);
119116

‎llvm/lib/Target/AArch64/AArch64LoadStoreOptimizer.cpp

-229
Original file line numberDiff line numberDiff line change
@@ -62,8 +62,6 @@ STATISTIC(NumUnscaledPairCreated,
6262
"Number of load/store from unscaled generated");
6363
STATISTIC(NumZeroStoresPromoted, "Number of narrow zero stores promoted");
6464
STATISTIC(NumLoadsFromStoresPromoted, "Number of loads from stores promoted");
65-
STATISTIC(NumConstOffsetFolded,
66-
"Number of const offset of index address folded");
6765

6866
DEBUG_COUNTER(RegRenamingCounter, DEBUG_TYPE "-reg-renaming",
6967
"Controls which pairs are considered for renaming");
@@ -77,11 +75,6 @@ static cl::opt<unsigned> LdStLimit("aarch64-load-store-scan-limit",
7775
static cl::opt<unsigned> UpdateLimit("aarch64-update-scan-limit", cl::init(100),
7876
cl::Hidden);
7977

80-
// The LdStConstLimit limits how far we search for const offset instructions
81-
// when we form index address load/store instructions.
82-
static cl::opt<unsigned> LdStConstLimit("aarch64-load-store-const-scan-limit",
83-
cl::init(10), cl::Hidden);
84-
8578
// Enable register renaming to find additional store pairing opportunities.
8679
static cl::opt<bool> EnableRenaming("aarch64-load-store-renaming",
8780
cl::init(true), cl::Hidden);
@@ -178,13 +171,6 @@ struct AArch64LoadStoreOpt : public MachineFunctionPass {
178171
findMatchingUpdateInsnForward(MachineBasicBlock::iterator I,
179172
int UnscaledOffset, unsigned Limit);
180173

181-
// Scan the instruction list to find a register assigned with a const
182-
// value that can be combined with the current instruction (a load or store)
183-
// using base addressing with writeback. Scan forwards.
184-
MachineBasicBlock::iterator
185-
findMatchingConstOffsetBackward(MachineBasicBlock::iterator I, unsigned Limit,
186-
unsigned &Offset);
187-
188174
// Scan the instruction list to find a base register update that can
189175
// be combined with the current instruction (a load or store) using
190176
// pre or post indexed addressing with writeback. Scan backwards.
@@ -196,19 +182,11 @@ struct AArch64LoadStoreOpt : public MachineFunctionPass {
196182
bool isMatchingUpdateInsn(MachineInstr &MemMI, MachineInstr &MI,
197183
unsigned BaseReg, int Offset);
198184

199-
bool isMatchingMovConstInsn(MachineInstr &MemMI, MachineInstr &MI,
200-
unsigned IndexReg, unsigned &Offset);
201-
202185
// Merge a pre- or post-index base register update into a ld/st instruction.
203186
MachineBasicBlock::iterator
204187
mergeUpdateInsn(MachineBasicBlock::iterator I,
205188
MachineBasicBlock::iterator Update, bool IsPreIdx);
206189

207-
MachineBasicBlock::iterator
208-
mergeConstOffsetInsn(MachineBasicBlock::iterator I,
209-
MachineBasicBlock::iterator Update, unsigned Offset,
210-
int Scale);
211-
212190
// Find and merge zero store instructions.
213191
bool tryToMergeZeroStInst(MachineBasicBlock::iterator &MBBI);
214192

@@ -221,9 +199,6 @@ struct AArch64LoadStoreOpt : public MachineFunctionPass {
221199
// Find and merge a base register updates before or after a ld/st instruction.
222200
bool tryToMergeLdStUpdate(MachineBasicBlock::iterator &MBBI);
223201

224-
// Find and merge a index ldr/st instructions into a base ld/st instruction.
225-
bool tryToMergeIndexLdSt(MachineBasicBlock::iterator &MBBI, int Scale);
226-
227202
bool optimizeBlock(MachineBasicBlock &MBB, bool EnableNarrowZeroStOpt);
228203

229204
bool runOnMachineFunction(MachineFunction &Fn) override;
@@ -506,16 +481,6 @@ static unsigned getPreIndexedOpcode(unsigned Opc) {
506481
}
507482
}
508483

509-
static unsigned getBaseAddressOpcode(unsigned Opc) {
510-
// TODO: Add more index address loads/stores.
511-
switch (Opc) {
512-
default:
513-
llvm_unreachable("Opcode has no base address equivalent!");
514-
case AArch64::LDRBBroX:
515-
return AArch64::LDRBBui;
516-
}
517-
}
518-
519484
static unsigned getPostIndexedOpcode(unsigned Opc) {
520485
switch (Opc) {
521486
default:
@@ -757,20 +722,6 @@ static bool isMergeableLdStUpdate(MachineInstr &MI) {
757722
}
758723
}
759724

760-
// Make sure this is a reg+reg Ld/St
761-
static bool isMergeableIndexLdSt(MachineInstr &MI, int &Scale) {
762-
unsigned Opc = MI.getOpcode();
763-
switch (Opc) {
764-
default:
765-
return false;
766-
// Scaled instructions.
767-
// TODO: Add more index address loads/stores.
768-
case AArch64::LDRBBroX:
769-
Scale = 1;
770-
return true;
771-
}
772-
}
773-
774725
static bool isRewritableImplicitDef(unsigned Opc) {
775726
switch (Opc) {
776727
default:
@@ -2097,63 +2048,6 @@ AArch64LoadStoreOpt::mergeUpdateInsn(MachineBasicBlock::iterator I,
20972048
return NextI;
20982049
}
20992050

2100-
MachineBasicBlock::iterator
2101-
AArch64LoadStoreOpt::mergeConstOffsetInsn(MachineBasicBlock::iterator I,
2102-
MachineBasicBlock::iterator Update,
2103-
unsigned Offset, int Scale) {
2104-
assert((Update->getOpcode() == AArch64::MOVKWi) &&
2105-
"Unexpected const mov instruction to merge!");
2106-
MachineBasicBlock::iterator E = I->getParent()->end();
2107-
MachineBasicBlock::iterator NextI = next_nodbg(I, E);
2108-
MachineBasicBlock::iterator PrevI = prev_nodbg(Update, E);
2109-
MachineInstr &MemMI = *I;
2110-
unsigned Mask = (1 << 12) * Scale - 1;
2111-
unsigned Low = Offset & Mask;
2112-
unsigned High = Offset - Low;
2113-
Register BaseReg = AArch64InstrInfo::getLdStBaseOp(MemMI).getReg();
2114-
Register IndexReg = AArch64InstrInfo::getLdStOffsetOp(MemMI).getReg();
2115-
MachineInstrBuilder AddMIB, MemMIB;
2116-
2117-
// Add IndexReg, BaseReg, High (the BaseReg may be SP)
2118-
AddMIB =
2119-
BuildMI(*I->getParent(), I, I->getDebugLoc(), TII->get(AArch64::ADDXri))
2120-
.addDef(IndexReg)
2121-
.addUse(BaseReg)
2122-
.addImm(High >> 12) // shifted value
2123-
.addImm(12); // shift 12
2124-
(void)AddMIB;
2125-
// Ld/St DestReg, IndexReg, Imm12
2126-
unsigned NewOpc = getBaseAddressOpcode(I->getOpcode());
2127-
MemMIB = BuildMI(*I->getParent(), I, I->getDebugLoc(), TII->get(NewOpc))
2128-
.add(getLdStRegOp(MemMI))
2129-
.add(AArch64InstrInfo::getLdStOffsetOp(MemMI))
2130-
.addImm(Low / Scale)
2131-
.setMemRefs(I->memoperands())
2132-
.setMIFlags(I->mergeFlagsWith(*Update));
2133-
(void)MemMIB;
2134-
2135-
++NumConstOffsetFolded;
2136-
LLVM_DEBUG(dbgs() << "Creating base address load/store.\n");
2137-
LLVM_DEBUG(dbgs() << " Replacing instructions:\n ");
2138-
LLVM_DEBUG(PrevI->print(dbgs()));
2139-
LLVM_DEBUG(dbgs() << " ");
2140-
LLVM_DEBUG(Update->print(dbgs()));
2141-
LLVM_DEBUG(dbgs() << " ");
2142-
LLVM_DEBUG(I->print(dbgs()));
2143-
LLVM_DEBUG(dbgs() << " with instruction:\n ");
2144-
LLVM_DEBUG(((MachineInstr *)AddMIB)->print(dbgs()));
2145-
LLVM_DEBUG(dbgs() << " ");
2146-
LLVM_DEBUG(((MachineInstr *)MemMIB)->print(dbgs()));
2147-
LLVM_DEBUG(dbgs() << "\n");
2148-
2149-
// Erase the old instructions for the block.
2150-
I->eraseFromParent();
2151-
PrevI->eraseFromParent();
2152-
Update->eraseFromParent();
2153-
2154-
return NextI;
2155-
}
2156-
21572051
bool AArch64LoadStoreOpt::isMatchingUpdateInsn(MachineInstr &MemMI,
21582052
MachineInstr &MI,
21592053
unsigned BaseReg, int Offset) {
@@ -2201,31 +2095,6 @@ bool AArch64LoadStoreOpt::isMatchingUpdateInsn(MachineInstr &MemMI,
22012095
return false;
22022096
}
22032097

2204-
bool AArch64LoadStoreOpt::isMatchingMovConstInsn(MachineInstr &MemMI,
2205-
MachineInstr &MI,
2206-
unsigned IndexReg,
2207-
unsigned &Offset) {
2208-
// The update instruction source and destination register must be the
2209-
// same as the load/store index register.
2210-
if (MI.getOpcode() == AArch64::MOVKWi &&
2211-
TRI->isSuperOrSubRegisterEq(IndexReg, MI.getOperand(1).getReg())) {
2212-
2213-
// movz + movk hold a large offset of a Ld/St instruction.
2214-
MachineBasicBlock::iterator B = MI.getParent()->begin();
2215-
MachineBasicBlock::iterator MBBI = &MI;
2216-
MBBI = prev_nodbg(MBBI, B);
2217-
MachineInstr &MovzMI = *MBBI;
2218-
if (MovzMI.getOpcode() == AArch64::MOVZWi) {
2219-
unsigned Low = MovzMI.getOperand(1).getImm();
2220-
unsigned High = MI.getOperand(2).getImm() << MI.getOperand(3).getImm();
2221-
Offset = High + Low;
2222-
// 12-bit optionally shifted immediates are legal for adds.
2223-
return Offset >> 24 == 0;
2224-
}
2225-
}
2226-
return false;
2227-
}
2228-
22292098
MachineBasicBlock::iterator AArch64LoadStoreOpt::findMatchingUpdateInsnForward(
22302099
MachineBasicBlock::iterator I, int UnscaledOffset, unsigned Limit) {
22312100
MachineBasicBlock::iterator E = I->getParent()->end();
@@ -2381,60 +2250,6 @@ MachineBasicBlock::iterator AArch64LoadStoreOpt::findMatchingUpdateInsnBackward(
23812250
return E;
23822251
}
23832252

2384-
MachineBasicBlock::iterator
2385-
AArch64LoadStoreOpt::findMatchingConstOffsetBackward(
2386-
MachineBasicBlock::iterator I, unsigned Limit, unsigned &Offset) {
2387-
MachineBasicBlock::iterator B = I->getParent()->begin();
2388-
MachineBasicBlock::iterator E = I->getParent()->end();
2389-
MachineInstr &MemMI = *I;
2390-
MachineBasicBlock::iterator MBBI = I;
2391-
2392-
// If the load is the first instruction in the block, there's obviously
2393-
// not any matching load or store.
2394-
if (MBBI == B)
2395-
return E;
2396-
2397-
// Make sure the IndexReg is killed and the shift amount is zero.
2398-
// TODO: Relex this restriction to extend, simplify processing now.
2399-
if (!AArch64InstrInfo::getLdStOffsetOp(MemMI).isKill() ||
2400-
!AArch64InstrInfo::getLdStAmountOp(MemMI).isImm() ||
2401-
(AArch64InstrInfo::getLdStAmountOp(MemMI).getImm() != 0))
2402-
return E;
2403-
2404-
Register IndexReg = AArch64InstrInfo::getLdStOffsetOp(MemMI).getReg();
2405-
2406-
// Track which register units have been modified and used between the first
2407-
// insn (inclusive) and the second insn.
2408-
ModifiedRegUnits.clear();
2409-
UsedRegUnits.clear();
2410-
unsigned Count = 0;
2411-
do {
2412-
MBBI = prev_nodbg(MBBI, B);
2413-
MachineInstr &MI = *MBBI;
2414-
2415-
// Don't count transient instructions towards the search limit since there
2416-
// may be different numbers of them if e.g. debug information is present.
2417-
if (!MI.isTransient())
2418-
++Count;
2419-
2420-
// If we found a match, return it.
2421-
if (isMatchingMovConstInsn(*I, MI, IndexReg, Offset)) {
2422-
return MBBI;
2423-
}
2424-
2425-
// Update the status of what the instruction clobbered and used.
2426-
LiveRegUnits::accumulateUsedDefed(MI, ModifiedRegUnits, UsedRegUnits, TRI);
2427-
2428-
// Otherwise, if the index register is used or modified, we have no match,
2429-
// so return early.
2430-
if (!ModifiedRegUnits.available(IndexReg) ||
2431-
!UsedRegUnits.available(IndexReg))
2432-
return E;
2433-
2434-
} while (MBBI != B && Count < Limit);
2435-
return E;
2436-
}
2437-
24382253
bool AArch64LoadStoreOpt::tryToPromoteLoadFromStore(
24392254
MachineBasicBlock::iterator &MBBI) {
24402255
MachineInstr &MI = *MBBI;
@@ -2619,34 +2434,6 @@ bool AArch64LoadStoreOpt::tryToMergeLdStUpdate
26192434
return false;
26202435
}
26212436

2622-
bool AArch64LoadStoreOpt::tryToMergeIndexLdSt(MachineBasicBlock::iterator &MBBI,
2623-
int Scale) {
2624-
MachineInstr &MI = *MBBI;
2625-
MachineBasicBlock::iterator E = MI.getParent()->end();
2626-
MachineBasicBlock::iterator Update;
2627-
2628-
// Don't know how to handle unscaled pre/post-index versions below, so bail.
2629-
if (TII->hasUnscaledLdStOffset(MI.getOpcode()))
2630-
return false;
2631-
2632-
// Look back to try to find a const offset for index LdSt instruction. For
2633-
// example,
2634-
// mov x8, #LargeImm ; = a * (1<<12) + imm12
2635-
// ldr x1, [x0, x8]
2636-
// merged into:
2637-
// add x8, x0, a * (1<<12)
2638-
// ldr x1, [x8, imm12]
2639-
unsigned Offset;
2640-
Update = findMatchingConstOffsetBackward(MBBI, LdStConstLimit, Offset);
2641-
if (Update != E && (Offset & (Scale - 1)) == 0) {
2642-
// Merge the imm12 into the ld/st.
2643-
MBBI = mergeConstOffsetInsn(MBBI, Update, Offset, Scale);
2644-
return true;
2645-
}
2646-
2647-
return false;
2648-
}
2649-
26502437
bool AArch64LoadStoreOpt::optimizeBlock(MachineBasicBlock &MBB,
26512438
bool EnableNarrowZeroStOpt) {
26522439

@@ -2725,22 +2512,6 @@ bool AArch64LoadStoreOpt::optimizeBlock(MachineBasicBlock &MBB,
27252512
++MBBI;
27262513
}
27272514

2728-
// 5) Find a register assigned with a const value that can be combined with
2729-
// into the load or store. e.g.,
2730-
// mov x8, #LargeImm ; = a * (1<<12) + imm12
2731-
// ldr x1, [x0, x8]
2732-
// ; becomes
2733-
// add x8, x0, a * (1<<12)
2734-
// ldr x1, [x8, imm12]
2735-
for (MachineBasicBlock::iterator MBBI = MBB.begin(), E = MBB.end();
2736-
MBBI != E;) {
2737-
int Scale;
2738-
if (isMergeableIndexLdSt(*MBBI, Scale) && tryToMergeIndexLdSt(MBBI, Scale))
2739-
Modified = true;
2740-
else
2741-
++MBBI;
2742-
}
2743-
27442515
return Modified;
27452516
}
27462517

‎llvm/test/CodeGen/AArch64/arm64-addrmode.ll

+9-6
Original file line numberDiff line numberDiff line change
@@ -214,8 +214,9 @@ define void @t17(i64 %a) {
214214
define i8 @LdOffset_i8(ptr %a) {
215215
; CHECK-LABEL: LdOffset_i8:
216216
; CHECK: // %bb.0:
217-
; CHECK-NEXT: add x8, x0, #253, lsl #12 // =1036288
218-
; CHECK-NEXT: ldrb w0, [x8, #3704]
217+
; CHECK-NEXT: mov w8, #56952 // =0xde78
218+
; CHECK-NEXT: movk w8, #15, lsl #16
219+
; CHECK-NEXT: ldrb w0, [x0, x8]
219220
; CHECK-NEXT: ret
220221
%arrayidx = getelementptr inbounds i8, ptr %a, i64 1039992
221222
%val = load i8, ptr %arrayidx, align 1
@@ -226,8 +227,9 @@ define i8 @LdOffset_i8(ptr %a) {
226227
define i32 @LdOffset_i8_zext32(ptr %a) {
227228
; CHECK-LABEL: LdOffset_i8_zext32:
228229
; CHECK: // %bb.0:
229-
; CHECK-NEXT: add x8, x0, #253, lsl #12 // =1036288
230-
; CHECK-NEXT: ldrb w0, [x8, #3704]
230+
; CHECK-NEXT: mov w8, #56952 // =0xde78
231+
; CHECK-NEXT: movk w8, #15, lsl #16
232+
; CHECK-NEXT: ldrb w0, [x0, x8]
231233
; CHECK-NEXT: ret
232234
%arrayidx = getelementptr inbounds i8, ptr %a, i64 1039992
233235
%val = load i8, ptr %arrayidx, align 1
@@ -253,8 +255,9 @@ define i32 @LdOffset_i8_sext32(ptr %a) {
253255
define i64 @LdOffset_i8_zext64(ptr %a) {
254256
; CHECK-LABEL: LdOffset_i8_zext64:
255257
; CHECK: // %bb.0:
256-
; CHECK-NEXT: add x8, x0, #253, lsl #12 // =1036288
257-
; CHECK-NEXT: ldrb w0, [x8, #3704]
258+
; CHECK-NEXT: mov w8, #56952 // =0xde78
259+
; CHECK-NEXT: movk w8, #15, lsl #16
260+
; CHECK-NEXT: ldrb w0, [x0, x8]
258261
; CHECK-NEXT: ret
259262
%arrayidx = getelementptr inbounds i8, ptr %a, i64 1039992
260263
%val = load i8, ptr %arrayidx, align 1

‎llvm/test/CodeGen/AArch64/large-offset-ldr-merge.mir

+3-2
Original file line numberDiff line numberDiff line change
@@ -14,8 +14,9 @@ body: |
1414
; CHECK-LABEL: name: LdOffset
1515
; CHECK: liveins: $x0
1616
; CHECK-NEXT: {{ $}}
17-
; CHECK-NEXT: $x8 = ADDXri $x0, 253, 12
18-
; CHECK-NEXT: renamable $w0 = LDRBBui killed renamable $x8, 3704
17+
; CHECK-NEXT: renamable $w8 = MOVZWi 56952, 0
18+
; CHECK-NEXT: renamable $w8 = MOVKWi $w8, 15, 16, implicit-def $x8
19+
; CHECK-NEXT: renamable $w0 = LDRBBroX killed renamable $x0, killed renamable $x8, 0, 0
1920
; CHECK-NEXT: RET undef $lr, implicit $w0
2021
renamable $w8 = MOVZWi 56952, 0
2122
renamable $w8 = MOVKWi $w8, 15, 16, implicit-def $x8

0 commit comments

Comments
 (0)