Skip to content

[AMDGPU] Account for existing SDWA selections #123221

New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Merged
merged 48 commits into from
Mar 3, 2025
Merged
Changes from all commits
Commits
Show all changes
48 commits
Select commit Hold shift + click to select a range
b29c0f2
[AMDGPU] Account for existing SDWA selections
jrbyrnes Jan 10, 2025
8d16c1c
[AMDGPU] Correct transformation and simplify combineSdwaSel
frederik-h Jan 14, 2025
20e23b6
[AMDGPU] Change formatting of combineSdwaSel
frederik-h Jan 16, 2025
663b94c
[AMDGPU] Remove dead branch from SIPeepholeSDWA::convertToSDWA
frederik-h Jan 16, 2025
c2dfca0
[AMDGPU] Extract SDWA instruction creation from convertToSDWA
frederik-h Jan 16, 2025
38bd038
[AMDGPU] Unify loops in SIPeepholeSDWA::convertToSDWA
frederik-h Jan 16, 2025
e5923ac
[AMDGPU] Invert if statement in SIPeepholeSDWA::convertToSDWA
frederik-h Jan 16, 2025
7034d2d
[AMDGPU] Rename "Combine" to "CombineSelections" in SIPeepholeSDWA
frederik-h Jan 16, 2025
bbe9ab8
[AMDGPU] Change combineSdwaSel to use optional return type
frederik-h Jan 16, 2025
245c93b
[AMDGPU] Add regression test for invalid SDWA selection handling
frederik-h Jan 16, 2025
5b51aeb
[AMDGPU] clang-format changes to SIPeepholeSDWA
frederik-h Jan 16, 2025
b1ead11
Merge remote-tracking branch 'upstream/main' into SIPeepholeSDWA-Comb…
frederik-h Jan 23, 2025
aa1d42e
Merge remote-tracking branch 'upstream/main' into SIPeepholeSDWA-Comb…
frederik-h Jan 29, 2025
b05facb
[AMDGPU] SIPeepholeSDWA: Reenable existing SDWA instruction handling
frederik-h Jan 23, 2025
c3868a5
[AMDGPU] SIPeepholeSDWA: Stop using CombineSelections in convertToSDWA
frederik-h Jan 29, 2025
c58493c
[AMDGPU] SIPeepholeSDWA.cpp: Simplify combineSdwaSel uses
frederik-h Jan 29, 2025
3242677
[AMDGPU] SIPeepholeSDWA: Change arg names and comments
frederik-h Jan 29, 2025
b5aa73d
[AMDGPU] Use default check prefix in sdwa-peephole-instr-combine-sel.mir
frederik-h Jan 29, 2025
ed16fbd
Revert unintended reformatting
frederik-h Jan 30, 2025
258fb14
[AMDGPU] SIPeepholeSDWA: Verify compatibility of selections earlier
frederik-h Feb 11, 2025
ac0a133
[AMDGPU] SIPeepholeSDWA: Adjust comments and variable names
frederik-h Feb 11, 2025
a9e38fa
[AMDGPU] SIPeepholeSDWA: Add comment answering a review question
frederik-h Feb 11, 2025
db7f674
clang-format changes
frederik-h Feb 11, 2025
ac80b86
Use consistent/more specific return type for SDWA{Src,Dst}Operand fac…
frederik-h Feb 12, 2025
bbe87ff
fixup! Use consistent/more specific return type for SDWA{Src,Dst}Oper…
frederik-h Feb 12, 2025
179007c
Merge "compatibleSelections" function back into "combineSdwaSel"
frederik-h Feb 12, 2025
a5a45aa
Add comprehensive test for source selection combinations
frederik-h Feb 13, 2025
1290369
Revert introduction of SDWA{Dst,Src}Operand::create
frederik-h Feb 13, 2025
84889b5
Fix combineSdwaSel handling of Sel == OperandSel case
frederik-h Feb 14, 2025
96e055b
Add new early check for combinable selections
frederik-h Feb 14, 2025
0724d76
clang-format changes
frederik-h Feb 14, 2025
d2943ab
Move combineSdwaSel from anon namespace and make 'static'
frederik-h Feb 18, 2025
c6d9f87
Move all uses of "canCombineSelections" into "potentialToConvert"
frederik-h Feb 24, 2025
9d41c6c
Update llvm/lib/Target/AMDGPU/SIPeepholeSDWA.cpp
frederik-h Feb 26, 2025
7bc33f4
Update llvm/test/CodeGen/AMDGPU/sdwa-peephole-instr-combine-sel2.mir
frederik-h Feb 26, 2025
0c4e99f
Update llvm/lib/Target/AMDGPU/SIPeepholeSDWA.cpp
frederik-h Feb 26, 2025
25bc9d0
Update llvm/lib/Target/AMDGPU/SIPeepholeSDWA.cpp
frederik-h Feb 26, 2025
e564af8
Update llvm/lib/Target/AMDGPU/SIPeepholeSDWA.cpp
frederik-h Feb 26, 2025
4d2e8e2
fixup! Update llvm/lib/Target/AMDGPU/SIPeepholeSDWA.cpp
frederik-h Feb 26, 2025
2fd69e3
Extract canCombineSel helper function
frederik-h Feb 27, 2025
9190655
Split canCombineSel and reuse for SDWADstOperand::canCombineSelections
frederik-h Feb 27, 2025
ce79a90
Fix test: Don't use physical registers, compact registers
frederik-h Feb 27, 2025
1ba89df
Merge remote-tracking branch 'upstream/main' into SIPeepholeSDWA-Comb…
frederik-h Feb 27, 2025
6311358
Merge remote-tracking branch 'upstream/main' into SIPeepholeSDWA-Comb…
frederik-h Feb 27, 2025
1e77766
Add test for destination selection
frederik-h Feb 28, 2025
80f5210
Use right type for OpNames
frederik-h Feb 28, 2025
5a19bc0
clang-format changes
frederik-h Feb 28, 2025
58e278f
Add dst_sel test
frederik-h Mar 3, 2025
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
196 changes: 154 additions & 42 deletions llvm/lib/Target/AMDGPU/SIPeepholeSDWA.cpp
Original file line number Diff line number Diff line change
@@ -62,6 +62,7 @@ class SIPeepholeSDWA {
std::unique_ptr<SDWAOperand> matchSDWAOperand(MachineInstr &MI);
void pseudoOpConvertToVOP2(MachineInstr &MI,
const GCNSubtarget &ST) const;
MachineInstr *createSDWAVersion(MachineInstr &MI);
bool convertToSDWA(MachineInstr &MI, const SDWAOperandsVector &SDWAOperands);
void legalizeScalarOperands(MachineInstr &MI, const GCNSubtarget &ST) const;

@@ -85,11 +86,18 @@ class SIPeepholeSDWALegacy : public MachineFunctionPass {
}
};

using namespace AMDGPU::SDWA;

class SDWAOperand {
private:
MachineOperand *Target; // Operand that would be used in converted instruction
MachineOperand *Replaced; // Operand that would be replace by Target

/// Returns true iff the SDWA selection of this SDWAOperand can be combined
/// with the SDWA selections of its uses in \p MI.
virtual bool canCombineSelections(const MachineInstr &MI,
const SIInstrInfo *TII) = 0;

public:
SDWAOperand(MachineOperand *TargetOp, MachineOperand *ReplacedOp)
: Target(TargetOp), Replaced(ReplacedOp) {
@@ -118,8 +126,6 @@ class SDWAOperand {
#endif
};

using namespace AMDGPU::SDWA;

class SDWASrcOperand : public SDWAOperand {
private:
SdwaSel SrcSel;
@@ -131,13 +137,15 @@ class SDWASrcOperand : public SDWAOperand {
SDWASrcOperand(MachineOperand *TargetOp, MachineOperand *ReplacedOp,
SdwaSel SrcSel_ = DWORD, bool Abs_ = false, bool Neg_ = false,
bool Sext_ = false)
: SDWAOperand(TargetOp, ReplacedOp),
SrcSel(SrcSel_), Abs(Abs_), Neg(Neg_), Sext(Sext_) {}
: SDWAOperand(TargetOp, ReplacedOp), SrcSel(SrcSel_), Abs(Abs_),
Neg(Neg_), Sext(Sext_) {}

MachineInstr *potentialToConvert(const SIInstrInfo *TII,
const GCNSubtarget &ST,
SDWAOperandsMap *PotentialMatches = nullptr) override;
bool convertToSDWA(MachineInstr &MI, const SIInstrInfo *TII) override;
bool canCombineSelections(const MachineInstr &MI,
const SIInstrInfo *TII) override;

SdwaSel getSrcSel() const { return SrcSel; }
bool getAbs() const { return Abs; }
@@ -158,15 +166,16 @@ class SDWADstOperand : public SDWAOperand {
DstUnused DstUn;

public:

SDWADstOperand(MachineOperand *TargetOp, MachineOperand *ReplacedOp,
SdwaSel DstSel_ = DWORD, DstUnused DstUn_ = UNUSED_PAD)
: SDWAOperand(TargetOp, ReplacedOp), DstSel(DstSel_), DstUn(DstUn_) {}
: SDWAOperand(TargetOp, ReplacedOp), DstSel(DstSel_), DstUn(DstUn_) {}

MachineInstr *potentialToConvert(const SIInstrInfo *TII,
const GCNSubtarget &ST,
SDWAOperandsMap *PotentialMatches = nullptr) override;
bool convertToSDWA(MachineInstr &MI, const SIInstrInfo *TII) override;
bool canCombineSelections(const MachineInstr &MI,
const SIInstrInfo *TII) override;

SdwaSel getDstSel() const { return DstSel; }
DstUnused getDstUnused() const { return DstUn; }
@@ -187,6 +196,8 @@ class SDWADstPreserveOperand : public SDWADstOperand {
Preserve(PreserveOp) {}

bool convertToSDWA(MachineInstr &MI, const SIInstrInfo *TII) override;
bool canCombineSelections(const MachineInstr &MI,
const SIInstrInfo *TII) override;

MachineOperand *getPreservedOperand() const { return Preserve; }

@@ -314,6 +325,38 @@ static MachineOperand *findSingleRegDef(const MachineOperand *Reg,
return nullptr;
}

/// Combine an SDWA instruction's existing SDWA selection \p Sel with
/// the SDWA selection \p OperandSel of its operand. If the selections
/// are compatible, return the combined selection, otherwise return a
/// nullopt.
/// For example, if we have Sel = BYTE_0 Sel and OperandSel = WORD_1:
/// BYTE_0 Sel (WORD_1 Sel (%X)) -> BYTE_2 Sel (%X)
static std::optional<SdwaSel> combineSdwaSel(SdwaSel Sel, SdwaSel OperandSel) {
if (Sel == SdwaSel::DWORD)
return OperandSel;

if (Sel == OperandSel || OperandSel == SdwaSel::DWORD)
return Sel;

if (Sel == SdwaSel::WORD_1 || Sel == SdwaSel::BYTE_2 ||
Sel == SdwaSel::BYTE_3)
return {};

if (OperandSel == SdwaSel::WORD_0)
return Sel;

if (OperandSel == SdwaSel::WORD_1) {
if (Sel == SdwaSel::BYTE_0)
return SdwaSel::BYTE_2;
if (Sel == SdwaSel::BYTE_1)
return SdwaSel::BYTE_3;
if (Sel == SdwaSel::WORD_0)
return SdwaSel::WORD_1;
}

return {};
}

uint64_t SDWASrcOperand::getSrcMods(const SIInstrInfo *TII,
const MachineOperand *SrcOp) const {
uint64_t Mods = 0;
@@ -350,7 +393,8 @@ MachineInstr *SDWASrcOperand::potentialToConvert(const SIInstrInfo *TII,

for (MachineInstr &UseMI : getMRI()->use_nodbg_instructions(Reg->getReg()))
// Check that all instructions that use Reg can be converted
if (!isConvertibleToSDWA(UseMI, ST, TII))
if (!isConvertibleToSDWA(UseMI, ST, TII) ||
!canCombineSelections(UseMI, TII))
return nullptr;

// Now that it's guaranteed all uses are legal, iterate over the uses again
@@ -372,7 +416,9 @@ MachineInstr *SDWASrcOperand::potentialToConvert(const SIInstrInfo *TII,
if (!PotentialMO)
return nullptr;

return PotentialMO->getParent();
MachineInstr *Parent = PotentialMO->getParent();

return canCombineSelections(*Parent, TII) ? Parent : nullptr;
}

bool SDWASrcOperand::convertToSDWA(MachineInstr &MI, const SIInstrInfo *TII) {
@@ -451,13 +497,55 @@ bool SDWASrcOperand::convertToSDWA(MachineInstr &MI, const SIInstrInfo *TII) {
}
copyRegOperand(*Src, *getTargetOperand());
if (!IsPreserveSrc) {
SrcSel->setImm(getSrcSel());
SdwaSel ExistingSel = static_cast<SdwaSel>(SrcSel->getImm());
SrcSel->setImm(*combineSdwaSel(ExistingSel, getSrcSel()));
SrcMods->setImm(getSrcMods(TII, Src));
}
getTargetOperand()->setIsKill(false);
return true;
}

/// Verify that the SDWA selection operand \p SrcSelOpName of the SDWA
/// instruction \p MI can be combined with the selection \p OpSel.
static bool canCombineOpSel(const MachineInstr &MI, const SIInstrInfo *TII,
AMDGPU::OpName SrcSelOpName, SdwaSel OpSel) {
assert(TII->isSDWA(MI.getOpcode()));

const MachineOperand *SrcSelOp = TII->getNamedOperand(MI, SrcSelOpName);
SdwaSel SrcSel = static_cast<SdwaSel>(SrcSelOp->getImm());

return combineSdwaSel(SrcSel, OpSel).has_value();
}

/// Verify that \p Op is the same register as the operand of the SDWA
/// instruction \p MI named by \p SrcOpName and that the SDWA
/// selection \p SrcSelOpName can be combined with the \p OpSel.
static bool canCombineOpSel(const MachineInstr &MI, const SIInstrInfo *TII,
AMDGPU::OpName SrcOpName,
AMDGPU::OpName SrcSelOpName, MachineOperand *Op,
SdwaSel OpSel) {
assert(TII->isSDWA(MI.getOpcode()));

const MachineOperand *Src = TII->getNamedOperand(MI, SrcOpName);
if (!Src || !isSameReg(*Src, *Op))
return true;

return canCombineOpSel(MI, TII, SrcSelOpName, OpSel);
}

bool SDWASrcOperand::canCombineSelections(const MachineInstr &MI,
const SIInstrInfo *TII) {
if (!TII->isSDWA(MI.getOpcode()))
return true;

using namespace AMDGPU;

return canCombineOpSel(MI, TII, OpName::src0, OpName::src0_sel,
getReplacedOperand(), getSrcSel()) &&
canCombineOpSel(MI, TII, OpName::src1, OpName::src1_sel,
getReplacedOperand(), getSrcSel());
}

MachineInstr *SDWADstOperand::potentialToConvert(const SIInstrInfo *TII,
const GCNSubtarget &ST,
SDWAOperandsMap *PotentialMatches) {
@@ -476,7 +564,8 @@ MachineInstr *SDWADstOperand::potentialToConvert(const SIInstrInfo *TII,
return nullptr;
}

return PotentialMO->getParent();
MachineInstr *Parent = PotentialMO->getParent();
return canCombineSelections(*Parent, TII) ? Parent : nullptr;
}

bool SDWADstOperand::convertToSDWA(MachineInstr &MI, const SIInstrInfo *TII) {
@@ -498,7 +587,10 @@ bool SDWADstOperand::convertToSDWA(MachineInstr &MI, const SIInstrInfo *TII) {
copyRegOperand(*Operand, *getTargetOperand());
MachineOperand *DstSel= TII->getNamedOperand(MI, AMDGPU::OpName::dst_sel);
assert(DstSel);
DstSel->setImm(getDstSel());

SdwaSel ExistingSel = static_cast<SdwaSel>(DstSel->getImm());
DstSel->setImm(combineSdwaSel(ExistingSel, getDstSel()).value());

MachineOperand *DstUnused= TII->getNamedOperand(MI, AMDGPU::OpName::dst_unused);
assert(DstUnused);
DstUnused->setImm(getDstUnused());
@@ -509,6 +601,14 @@ bool SDWADstOperand::convertToSDWA(MachineInstr &MI, const SIInstrInfo *TII) {
return true;
}

bool SDWADstOperand::canCombineSelections(const MachineInstr &MI,
const SIInstrInfo *TII) {
if (!TII->isSDWA(MI.getOpcode()))
return true;

return canCombineOpSel(MI, TII, AMDGPU::OpName::dst_sel, getDstSel());
}

bool SDWADstPreserveOperand::convertToSDWA(MachineInstr &MI,
const SIInstrInfo *TII) {
// MI should be moved right before v_or_b32.
@@ -538,6 +638,11 @@ bool SDWADstPreserveOperand::convertToSDWA(MachineInstr &MI,
return SDWADstOperand::convertToSDWA(MI, TII);
}

bool SDWADstPreserveOperand::canCombineSelections(const MachineInstr &MI,
const SIInstrInfo *TII) {
return SDWADstOperand::canCombineSelections(MI, TII);
}

std::optional<int64_t>
SIPeepholeSDWA::foldToImm(const MachineOperand &Op) const {
if (Op.isImm()) {
@@ -962,11 +1067,8 @@ bool isConvertibleToSDWA(MachineInstr &MI,
const SIInstrInfo* TII) {
// Check if this is already an SDWA instruction
unsigned Opc = MI.getOpcode();
if (TII->isSDWA(Opc)) {
// FIXME: Reenable after fixing selection handling.
// Cf. llvm/test/CodeGen/AMDGPU/sdwa-peephole-instr-combine-sel.ll
return false;
}
if (TII->isSDWA(Opc))
return true;

// Check if this instruction has opcode that supports SDWA
if (AMDGPU::getSDWAOp(Opc) == -1)
@@ -1024,21 +1126,13 @@ bool isConvertibleToSDWA(MachineInstr &MI,
}
} // namespace

bool SIPeepholeSDWA::convertToSDWA(MachineInstr &MI,
const SDWAOperandsVector &SDWAOperands) {

LLVM_DEBUG(dbgs() << "Convert instruction:" << MI);

// Convert to sdwa
int SDWAOpcode;
MachineInstr *SIPeepholeSDWA::createSDWAVersion(MachineInstr &MI) {
unsigned Opcode = MI.getOpcode();
if (TII->isSDWA(Opcode)) {
SDWAOpcode = Opcode;
} else {
SDWAOpcode = AMDGPU::getSDWAOp(Opcode);
if (SDWAOpcode == -1)
SDWAOpcode = AMDGPU::getSDWAOp(AMDGPU::getVOPe32(Opcode));
}
assert(!TII->isSDWA(Opcode));

int SDWAOpcode = AMDGPU::getSDWAOp(Opcode);
if (SDWAOpcode == -1)
SDWAOpcode = AMDGPU::getSDWAOp(AMDGPU::getVOPe32(Opcode));
assert(SDWAOpcode != -1);

const MCInstrDesc &SDWADesc = TII->get(SDWAOpcode);
@@ -1172,6 +1266,24 @@ bool SIPeepholeSDWA::convertToSDWA(MachineInstr &MI,
SDWAInst->tieOperands(PreserveDstIdx, SDWAInst->getNumOperands() - 1);
}

return SDWAInst.getInstr();
}

bool SIPeepholeSDWA::convertToSDWA(MachineInstr &MI,
const SDWAOperandsVector &SDWAOperands) {
LLVM_DEBUG(dbgs() << "Convert instruction:" << MI);

MachineInstr *SDWAInst;
if (TII->isSDWA(MI.getOpcode())) {
// Clone the instruction to allow revoking changes
// made to MI during the processing of the operands
// if the conversion fails.
SDWAInst = MI.getParent()->getParent()->CloneMachineInstr(&MI);
MI.getParent()->insert(MI.getIterator(), SDWAInst);
} else {
SDWAInst = createSDWAVersion(MI);
}

// Apply all sdwa operand patterns.
bool Converted = false;
for (auto &Operand : SDWAOperands) {
@@ -1190,19 +1302,18 @@ bool SIPeepholeSDWA::convertToSDWA(MachineInstr &MI,
Converted |= Operand->convertToSDWA(*SDWAInst, TII);
}

if (Converted) {
ConvertedInstructions.push_back(SDWAInst);
for (MachineOperand &MO : SDWAInst->uses()) {
if (!MO.isReg())
continue;

MRI->clearKillFlags(MO.getReg());
}
} else {
if (!Converted) {
SDWAInst->eraseFromParent();
return false;
}

ConvertedInstructions.push_back(SDWAInst);
for (MachineOperand &MO : SDWAInst->uses()) {
if (!MO.isReg())
continue;

MRI->clearKillFlags(MO.getReg());
}
LLVM_DEBUG(dbgs() << "\nInto:" << *SDWAInst << '\n');
++NumSDWAInstructionsPeepholed;

@@ -1285,10 +1396,11 @@ bool SIPeepholeSDWA::run(MachineFunction &MF) {

for (const auto &OperandPair : SDWAOperands) {
const auto &Operand = OperandPair.second;
MachineInstr *PotentialMI = Operand->potentialToConvert(TII, ST, &PotentialMatches);
if (PotentialMI && isConvertibleToSDWA(*PotentialMI, ST, TII)) {
MachineInstr *PotentialMI =
Operand->potentialToConvert(TII, ST, &PotentialMatches);

if (PotentialMI && isConvertibleToSDWA(*PotentialMI, ST, TII))
PotentialMatches[PotentialMI].push_back(Operand.get());
}
}

for (auto &PotentialPair : PotentialMatches) {
Loading