@@ -62,6 +62,7 @@ class SIPeepholeSDWA {
62
62
std::unique_ptr<SDWAOperand> matchSDWAOperand (MachineInstr &MI);
63
63
void pseudoOpConvertToVOP2 (MachineInstr &MI,
64
64
const GCNSubtarget &ST) const ;
65
+ MachineInstr *createSDWAVersion (MachineInstr &MI);
65
66
bool convertToSDWA (MachineInstr &MI, const SDWAOperandsVector &SDWAOperands);
66
67
void legalizeScalarOperands (MachineInstr &MI, const GCNSubtarget &ST) const ;
67
68
@@ -85,11 +86,18 @@ class SIPeepholeSDWALegacy : public MachineFunctionPass {
85
86
}
86
87
};
87
88
89
+ using namespace AMDGPU ::SDWA;
90
+
88
91
class SDWAOperand {
89
92
private:
90
93
MachineOperand *Target; // Operand that would be used in converted instruction
91
94
MachineOperand *Replaced; // Operand that would be replace by Target
92
95
96
+ // / Returns true iff the SDWA selection of this SDWAOperand can be combined
97
+ // / with the SDWA selections of its uses in \p MI.
98
+ virtual bool canCombineSelections (const MachineInstr &MI,
99
+ const SIInstrInfo *TII) = 0;
100
+
93
101
public:
94
102
SDWAOperand (MachineOperand *TargetOp, MachineOperand *ReplacedOp)
95
103
: Target(TargetOp), Replaced(ReplacedOp) {
@@ -118,8 +126,6 @@ class SDWAOperand {
118
126
#endif
119
127
};
120
128
121
- using namespace AMDGPU ::SDWA;
122
-
123
129
class SDWASrcOperand : public SDWAOperand {
124
130
private:
125
131
SdwaSel SrcSel;
@@ -131,13 +137,15 @@ class SDWASrcOperand : public SDWAOperand {
131
137
SDWASrcOperand (MachineOperand *TargetOp, MachineOperand *ReplacedOp,
132
138
SdwaSel SrcSel_ = DWORD, bool Abs_ = false , bool Neg_ = false ,
133
139
bool Sext_ = false )
134
- : SDWAOperand(TargetOp, ReplacedOp),
135
- SrcSel (SrcSel_), Abs(Abs_), Neg(Neg_), Sext(Sext_) {}
140
+ : SDWAOperand(TargetOp, ReplacedOp), SrcSel(SrcSel_), Abs(Abs_),
141
+ Neg (Neg_), Sext(Sext_) {}
136
142
137
143
MachineInstr *potentialToConvert (const SIInstrInfo *TII,
138
144
const GCNSubtarget &ST,
139
145
SDWAOperandsMap *PotentialMatches = nullptr ) override ;
140
146
bool convertToSDWA (MachineInstr &MI, const SIInstrInfo *TII) override ;
147
+ bool canCombineSelections (const MachineInstr &MI,
148
+ const SIInstrInfo *TII) override ;
141
149
142
150
SdwaSel getSrcSel () const { return SrcSel; }
143
151
bool getAbs () const { return Abs; }
@@ -158,15 +166,16 @@ class SDWADstOperand : public SDWAOperand {
158
166
DstUnused DstUn;
159
167
160
168
public:
161
-
162
169
SDWADstOperand (MachineOperand *TargetOp, MachineOperand *ReplacedOp,
163
170
SdwaSel DstSel_ = DWORD, DstUnused DstUn_ = UNUSED_PAD)
164
- : SDWAOperand(TargetOp, ReplacedOp), DstSel(DstSel_), DstUn(DstUn_) {}
171
+ : SDWAOperand(TargetOp, ReplacedOp), DstSel(DstSel_), DstUn(DstUn_) {}
165
172
166
173
MachineInstr *potentialToConvert (const SIInstrInfo *TII,
167
174
const GCNSubtarget &ST,
168
175
SDWAOperandsMap *PotentialMatches = nullptr ) override ;
169
176
bool convertToSDWA (MachineInstr &MI, const SIInstrInfo *TII) override ;
177
+ bool canCombineSelections (const MachineInstr &MI,
178
+ const SIInstrInfo *TII) override ;
170
179
171
180
SdwaSel getDstSel () const { return DstSel; }
172
181
DstUnused getDstUnused () const { return DstUn; }
@@ -187,6 +196,8 @@ class SDWADstPreserveOperand : public SDWADstOperand {
187
196
Preserve (PreserveOp) {}
188
197
189
198
bool convertToSDWA (MachineInstr &MI, const SIInstrInfo *TII) override ;
199
+ bool canCombineSelections (const MachineInstr &MI,
200
+ const SIInstrInfo *TII) override ;
190
201
191
202
MachineOperand *getPreservedOperand () const { return Preserve; }
192
203
@@ -314,6 +325,38 @@ static MachineOperand *findSingleRegDef(const MachineOperand *Reg,
314
325
return nullptr ;
315
326
}
316
327
328
+ // / Combine an SDWA instruction's existing SDWA selection \p Sel with
329
+ // / the SDWA selection \p OperandSel of its operand. If the selections
330
+ // / are compatible, return the combined selection, otherwise return a
331
+ // / nullopt.
332
+ // / For example, if we have Sel = BYTE_0 Sel and OperandSel = WORD_1:
333
+ // / BYTE_0 Sel (WORD_1 Sel (%X)) -> BYTE_2 Sel (%X)
334
+ static std::optional<SdwaSel> combineSdwaSel (SdwaSel Sel, SdwaSel OperandSel) {
335
+ if (Sel == SdwaSel::DWORD)
336
+ return OperandSel;
337
+
338
+ if (Sel == OperandSel || OperandSel == SdwaSel::DWORD)
339
+ return Sel;
340
+
341
+ if (Sel == SdwaSel::WORD_1 || Sel == SdwaSel::BYTE_2 ||
342
+ Sel == SdwaSel::BYTE_3)
343
+ return {};
344
+
345
+ if (OperandSel == SdwaSel::WORD_0)
346
+ return Sel;
347
+
348
+ if (OperandSel == SdwaSel::WORD_1) {
349
+ if (Sel == SdwaSel::BYTE_0)
350
+ return SdwaSel::BYTE_2;
351
+ if (Sel == SdwaSel::BYTE_1)
352
+ return SdwaSel::BYTE_3;
353
+ if (Sel == SdwaSel::WORD_0)
354
+ return SdwaSel::WORD_1;
355
+ }
356
+
357
+ return {};
358
+ }
359
+
317
360
uint64_t SDWASrcOperand::getSrcMods (const SIInstrInfo *TII,
318
361
const MachineOperand *SrcOp) const {
319
362
uint64_t Mods = 0 ;
@@ -350,7 +393,8 @@ MachineInstr *SDWASrcOperand::potentialToConvert(const SIInstrInfo *TII,
350
393
351
394
for (MachineInstr &UseMI : getMRI ()->use_nodbg_instructions (Reg->getReg ()))
352
395
// Check that all instructions that use Reg can be converted
353
- if (!isConvertibleToSDWA (UseMI, ST, TII))
396
+ if (!isConvertibleToSDWA (UseMI, ST, TII) ||
397
+ !canCombineSelections (UseMI, TII))
354
398
return nullptr ;
355
399
356
400
// Now that it's guaranteed all uses are legal, iterate over the uses again
@@ -372,7 +416,9 @@ MachineInstr *SDWASrcOperand::potentialToConvert(const SIInstrInfo *TII,
372
416
if (!PotentialMO)
373
417
return nullptr ;
374
418
375
- return PotentialMO->getParent ();
419
+ MachineInstr *Parent = PotentialMO->getParent ();
420
+
421
+ return canCombineSelections (*Parent, TII) ? Parent : nullptr ;
376
422
}
377
423
378
424
bool SDWASrcOperand::convertToSDWA (MachineInstr &MI, const SIInstrInfo *TII) {
@@ -451,13 +497,55 @@ bool SDWASrcOperand::convertToSDWA(MachineInstr &MI, const SIInstrInfo *TII) {
451
497
}
452
498
copyRegOperand (*Src, *getTargetOperand ());
453
499
if (!IsPreserveSrc) {
454
- SrcSel->setImm (getSrcSel ());
500
+ SdwaSel ExistingSel = static_cast <SdwaSel>(SrcSel->getImm ());
501
+ SrcSel->setImm (*combineSdwaSel (ExistingSel, getSrcSel ()));
455
502
SrcMods->setImm (getSrcMods (TII, Src));
456
503
}
457
504
getTargetOperand ()->setIsKill (false );
458
505
return true ;
459
506
}
460
507
508
+ // / Verify that the SDWA selection operand \p SrcSelOpName of the SDWA
509
+ // / instruction \p MI can be combined with the selection \p OpSel.
510
+ static bool canCombineOpSel (const MachineInstr &MI, const SIInstrInfo *TII,
511
+ AMDGPU::OpName SrcSelOpName, SdwaSel OpSel) {
512
+ assert (TII->isSDWA (MI.getOpcode ()));
513
+
514
+ const MachineOperand *SrcSelOp = TII->getNamedOperand (MI, SrcSelOpName);
515
+ SdwaSel SrcSel = static_cast <SdwaSel>(SrcSelOp->getImm ());
516
+
517
+ return combineSdwaSel (SrcSel, OpSel).has_value ();
518
+ }
519
+
520
+ // / Verify that \p Op is the same register as the operand of the SDWA
521
+ // / instruction \p MI named by \p SrcOpName and that the SDWA
522
+ // / selection \p SrcSelOpName can be combined with the \p OpSel.
523
+ static bool canCombineOpSel (const MachineInstr &MI, const SIInstrInfo *TII,
524
+ AMDGPU::OpName SrcOpName,
525
+ AMDGPU::OpName SrcSelOpName, MachineOperand *Op,
526
+ SdwaSel OpSel) {
527
+ assert (TII->isSDWA (MI.getOpcode ()));
528
+
529
+ const MachineOperand *Src = TII->getNamedOperand (MI, SrcOpName);
530
+ if (!Src || !isSameReg (*Src, *Op))
531
+ return true ;
532
+
533
+ return canCombineOpSel (MI, TII, SrcSelOpName, OpSel);
534
+ }
535
+
536
+ bool SDWASrcOperand::canCombineSelections (const MachineInstr &MI,
537
+ const SIInstrInfo *TII) {
538
+ if (!TII->isSDWA (MI.getOpcode ()))
539
+ return true ;
540
+
541
+ using namespace AMDGPU ;
542
+
543
+ return canCombineOpSel (MI, TII, OpName::src0, OpName::src0_sel,
544
+ getReplacedOperand (), getSrcSel ()) &&
545
+ canCombineOpSel (MI, TII, OpName::src1, OpName::src1_sel,
546
+ getReplacedOperand (), getSrcSel ());
547
+ }
548
+
461
549
MachineInstr *SDWADstOperand::potentialToConvert (const SIInstrInfo *TII,
462
550
const GCNSubtarget &ST,
463
551
SDWAOperandsMap *PotentialMatches) {
@@ -476,7 +564,8 @@ MachineInstr *SDWADstOperand::potentialToConvert(const SIInstrInfo *TII,
476
564
return nullptr ;
477
565
}
478
566
479
- return PotentialMO->getParent ();
567
+ MachineInstr *Parent = PotentialMO->getParent ();
568
+ return canCombineSelections (*Parent, TII) ? Parent : nullptr ;
480
569
}
481
570
482
571
bool SDWADstOperand::convertToSDWA (MachineInstr &MI, const SIInstrInfo *TII) {
@@ -498,7 +587,10 @@ bool SDWADstOperand::convertToSDWA(MachineInstr &MI, const SIInstrInfo *TII) {
498
587
copyRegOperand (*Operand, *getTargetOperand ());
499
588
MachineOperand *DstSel= TII->getNamedOperand (MI, AMDGPU::OpName::dst_sel);
500
589
assert (DstSel);
501
- DstSel->setImm (getDstSel ());
590
+
591
+ SdwaSel ExistingSel = static_cast <SdwaSel>(DstSel->getImm ());
592
+ DstSel->setImm (combineSdwaSel (ExistingSel, getDstSel ()).value ());
593
+
502
594
MachineOperand *DstUnused= TII->getNamedOperand (MI, AMDGPU::OpName::dst_unused);
503
595
assert (DstUnused);
504
596
DstUnused->setImm (getDstUnused ());
@@ -509,6 +601,14 @@ bool SDWADstOperand::convertToSDWA(MachineInstr &MI, const SIInstrInfo *TII) {
509
601
return true ;
510
602
}
511
603
604
+ bool SDWADstOperand::canCombineSelections (const MachineInstr &MI,
605
+ const SIInstrInfo *TII) {
606
+ if (!TII->isSDWA (MI.getOpcode ()))
607
+ return true ;
608
+
609
+ return canCombineOpSel (MI, TII, AMDGPU::OpName::dst_sel, getDstSel ());
610
+ }
611
+
512
612
bool SDWADstPreserveOperand::convertToSDWA (MachineInstr &MI,
513
613
const SIInstrInfo *TII) {
514
614
// MI should be moved right before v_or_b32.
@@ -538,6 +638,11 @@ bool SDWADstPreserveOperand::convertToSDWA(MachineInstr &MI,
538
638
return SDWADstOperand::convertToSDWA (MI, TII);
539
639
}
540
640
641
+ bool SDWADstPreserveOperand::canCombineSelections (const MachineInstr &MI,
642
+ const SIInstrInfo *TII) {
643
+ return SDWADstOperand::canCombineSelections (MI, TII);
644
+ }
645
+
541
646
std::optional<int64_t >
542
647
SIPeepholeSDWA::foldToImm (const MachineOperand &Op) const {
543
648
if (Op.isImm ()) {
@@ -962,11 +1067,8 @@ bool isConvertibleToSDWA(MachineInstr &MI,
962
1067
const SIInstrInfo* TII) {
963
1068
// Check if this is already an SDWA instruction
964
1069
unsigned Opc = MI.getOpcode ();
965
- if (TII->isSDWA (Opc)) {
966
- // FIXME: Reenable after fixing selection handling.
967
- // Cf. llvm/test/CodeGen/AMDGPU/sdwa-peephole-instr-combine-sel.ll
968
- return false ;
969
- }
1070
+ if (TII->isSDWA (Opc))
1071
+ return true ;
970
1072
971
1073
// Check if this instruction has opcode that supports SDWA
972
1074
if (AMDGPU::getSDWAOp (Opc) == -1 )
@@ -1024,21 +1126,13 @@ bool isConvertibleToSDWA(MachineInstr &MI,
1024
1126
}
1025
1127
} // namespace
1026
1128
1027
- bool SIPeepholeSDWA::convertToSDWA (MachineInstr &MI,
1028
- const SDWAOperandsVector &SDWAOperands) {
1029
-
1030
- LLVM_DEBUG (dbgs () << " Convert instruction:" << MI);
1031
-
1032
- // Convert to sdwa
1033
- int SDWAOpcode;
1129
+ MachineInstr *SIPeepholeSDWA::createSDWAVersion (MachineInstr &MI) {
1034
1130
unsigned Opcode = MI.getOpcode ();
1035
- if (TII->isSDWA (Opcode)) {
1036
- SDWAOpcode = Opcode;
1037
- } else {
1038
- SDWAOpcode = AMDGPU::getSDWAOp (Opcode);
1039
- if (SDWAOpcode == -1 )
1040
- SDWAOpcode = AMDGPU::getSDWAOp (AMDGPU::getVOPe32 (Opcode));
1041
- }
1131
+ assert (!TII->isSDWA (Opcode));
1132
+
1133
+ int SDWAOpcode = AMDGPU::getSDWAOp (Opcode);
1134
+ if (SDWAOpcode == -1 )
1135
+ SDWAOpcode = AMDGPU::getSDWAOp (AMDGPU::getVOPe32 (Opcode));
1042
1136
assert (SDWAOpcode != -1 );
1043
1137
1044
1138
const MCInstrDesc &SDWADesc = TII->get (SDWAOpcode);
@@ -1172,6 +1266,24 @@ bool SIPeepholeSDWA::convertToSDWA(MachineInstr &MI,
1172
1266
SDWAInst->tieOperands (PreserveDstIdx, SDWAInst->getNumOperands () - 1 );
1173
1267
}
1174
1268
1269
+ return SDWAInst.getInstr ();
1270
+ }
1271
+
1272
+ bool SIPeepholeSDWA::convertToSDWA (MachineInstr &MI,
1273
+ const SDWAOperandsVector &SDWAOperands) {
1274
+ LLVM_DEBUG (dbgs () << " Convert instruction:" << MI);
1275
+
1276
+ MachineInstr *SDWAInst;
1277
+ if (TII->isSDWA (MI.getOpcode ())) {
1278
+ // Clone the instruction to allow revoking changes
1279
+ // made to MI during the processing of the operands
1280
+ // if the conversion fails.
1281
+ SDWAInst = MI.getParent ()->getParent ()->CloneMachineInstr (&MI);
1282
+ MI.getParent ()->insert (MI.getIterator (), SDWAInst);
1283
+ } else {
1284
+ SDWAInst = createSDWAVersion (MI);
1285
+ }
1286
+
1175
1287
// Apply all sdwa operand patterns.
1176
1288
bool Converted = false ;
1177
1289
for (auto &Operand : SDWAOperands) {
@@ -1190,19 +1302,18 @@ bool SIPeepholeSDWA::convertToSDWA(MachineInstr &MI,
1190
1302
Converted |= Operand->convertToSDWA (*SDWAInst, TII);
1191
1303
}
1192
1304
1193
- if (Converted) {
1194
- ConvertedInstructions.push_back (SDWAInst);
1195
- for (MachineOperand &MO : SDWAInst->uses ()) {
1196
- if (!MO.isReg ())
1197
- continue ;
1198
-
1199
- MRI->clearKillFlags (MO.getReg ());
1200
- }
1201
- } else {
1305
+ if (!Converted) {
1202
1306
SDWAInst->eraseFromParent ();
1203
1307
return false ;
1204
1308
}
1205
1309
1310
+ ConvertedInstructions.push_back (SDWAInst);
1311
+ for (MachineOperand &MO : SDWAInst->uses ()) {
1312
+ if (!MO.isReg ())
1313
+ continue ;
1314
+
1315
+ MRI->clearKillFlags (MO.getReg ());
1316
+ }
1206
1317
LLVM_DEBUG (dbgs () << " \n Into:" << *SDWAInst << ' \n ' );
1207
1318
++NumSDWAInstructionsPeepholed;
1208
1319
@@ -1285,10 +1396,11 @@ bool SIPeepholeSDWA::run(MachineFunction &MF) {
1285
1396
1286
1397
for (const auto &OperandPair : SDWAOperands) {
1287
1398
const auto &Operand = OperandPair.second ;
1288
- MachineInstr *PotentialMI = Operand->potentialToConvert (TII, ST, &PotentialMatches);
1289
- if (PotentialMI && isConvertibleToSDWA (*PotentialMI, ST, TII)) {
1399
+ MachineInstr *PotentialMI =
1400
+ Operand->potentialToConvert (TII, ST, &PotentialMatches);
1401
+
1402
+ if (PotentialMI && isConvertibleToSDWA (*PotentialMI, ST, TII))
1290
1403
PotentialMatches[PotentialMI].push_back (Operand.get ());
1291
- }
1292
1404
}
1293
1405
1294
1406
for (auto &PotentialPair : PotentialMatches) {
0 commit comments