Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

[AMDGPU] Account for existing SDWA selections #123221

Merged
merged 48 commits into from
Mar 3, 2025
Merged
Show file tree
Hide file tree
Changes from 11 commits
Commits
Show all changes
48 commits
Select commit Hold shift + click to select a range
b29c0f2
[AMDGPU] Account for existing SDWA selections
jrbyrnes Jan 10, 2025
8d16c1c
[AMDGPU] Correct transformation and simplify combineSdwaSel
frederik-h Jan 14, 2025
20e23b6
[AMDGPU] Change formatting of combineSdwaSel
frederik-h Jan 16, 2025
663b94c
[AMDGPU] Remove dead branch from SIPeepholeSDWA::convertToSDWA
frederik-h Jan 16, 2025
c2dfca0
[AMDGPU] Extract SDWA instruction creation from convertToSDWA
frederik-h Jan 16, 2025
38bd038
[AMDGPU] Unify loops in SIPeepholeSDWA::convertToSDWA
frederik-h Jan 16, 2025
e5923ac
[AMDGPU] Invert if statement in SIPeepholeSDWA::convertToSDWA
frederik-h Jan 16, 2025
7034d2d
[AMDGPU] Rename "Combine" to "CombineSelections" in SIPeepholeSDWA
frederik-h Jan 16, 2025
bbe9ab8
[AMDGPU] Change combineSdwaSel to use optional return type
frederik-h Jan 16, 2025
245c93b
[AMDGPU] Add regression test for invalid SDWA selection handling
frederik-h Jan 16, 2025
5b51aeb
[AMDGPU] clang-format changes to SIPeepholeSDWA
frederik-h Jan 16, 2025
b1ead11
Merge remote-tracking branch 'upstream/main' into SIPeepholeSDWA-Comb…
frederik-h Jan 23, 2025
aa1d42e
Merge remote-tracking branch 'upstream/main' into SIPeepholeSDWA-Comb…
frederik-h Jan 29, 2025
b05facb
[AMDGPU] SIPeepholeSDWA: Reenable existing SDWA instruction handling
frederik-h Jan 23, 2025
c3868a5
[AMDGPU] SIPeepholeSDWA: Stop using CombineSelections in convertToSDWA
frederik-h Jan 29, 2025
c58493c
[AMDGPU] SIPeepholeSDWA.cpp: Simplify combineSdwaSel uses
frederik-h Jan 29, 2025
3242677
[AMDGPU] SIPeepholeSDWA: Change arg names and comments
frederik-h Jan 29, 2025
b5aa73d
[AMDGPU] Use default check prefix in sdwa-peephole-instr-combine-sel.mir
frederik-h Jan 29, 2025
ed16fbd
Revert unintended reformatting
frederik-h Jan 30, 2025
258fb14
[AMDGPU] SIPeepholeSDWA: Verify compatibility of selections earlier
frederik-h Feb 11, 2025
ac0a133
[AMDGPU] SIPeepholeSDWA: Adjust comments and variable names
frederik-h Feb 11, 2025
a9e38fa
[AMDGPU] SIPeepholeSDWA: Add comment answering a review question
frederik-h Feb 11, 2025
db7f674
clang-format changes
frederik-h Feb 11, 2025
ac80b86
Use consistent/more specific return type for SDWA{Src,Dst}Operand fac…
frederik-h Feb 12, 2025
bbe87ff
fixup! Use consistent/more specific return type for SDWA{Src,Dst}Oper…
frederik-h Feb 12, 2025
179007c
Merge "compatibleSelections" function back into "combineSdwaSel"
frederik-h Feb 12, 2025
a5a45aa
Add comprehensive test for source selection combinations
frederik-h Feb 13, 2025
1290369
Revert introduction of SDWA{Dst,Src}Operand::create
frederik-h Feb 13, 2025
84889b5
Fix combineSdwaSel handling of Sel == OperandSel case
frederik-h Feb 14, 2025
96e055b
Add new early check for combinable selections
frederik-h Feb 14, 2025
0724d76
clang-format changes
frederik-h Feb 14, 2025
d2943ab
Move combineSdwaSel from anon namespace and make 'static'
frederik-h Feb 18, 2025
c6d9f87
Move all uses of "canCombineSelections" into "potentialToConvert"
frederik-h Feb 24, 2025
9d41c6c
Update llvm/lib/Target/AMDGPU/SIPeepholeSDWA.cpp
frederik-h Feb 26, 2025
7bc33f4
Update llvm/test/CodeGen/AMDGPU/sdwa-peephole-instr-combine-sel2.mir
frederik-h Feb 26, 2025
0c4e99f
Update llvm/lib/Target/AMDGPU/SIPeepholeSDWA.cpp
frederik-h Feb 26, 2025
25bc9d0
Update llvm/lib/Target/AMDGPU/SIPeepholeSDWA.cpp
frederik-h Feb 26, 2025
e564af8
Update llvm/lib/Target/AMDGPU/SIPeepholeSDWA.cpp
frederik-h Feb 26, 2025
4d2e8e2
fixup! Update llvm/lib/Target/AMDGPU/SIPeepholeSDWA.cpp
frederik-h Feb 26, 2025
2fd69e3
Extract canCombineSel helper function
frederik-h Feb 27, 2025
9190655
Split canCombineSel and reuse for SDWADstOperand::canCombineSelections
frederik-h Feb 27, 2025
ce79a90
Fix test: Don't use physical registers, compact registers
frederik-h Feb 27, 2025
1ba89df
Merge remote-tracking branch 'upstream/main' into SIPeepholeSDWA-Comb…
frederik-h Feb 27, 2025
6311358
Merge remote-tracking branch 'upstream/main' into SIPeepholeSDWA-Comb…
frederik-h Feb 27, 2025
1e77766
Add test for destination selection
frederik-h Feb 28, 2025
80f5210
Use right type for OpNames
frederik-h Feb 28, 2025
5a19bc0
clang-format changes
frederik-h Feb 28, 2025
58e278f
Add dst_sel test
frederik-h Mar 3, 2025
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
143 changes: 107 additions & 36 deletions llvm/lib/Target/AMDGPU/SIPeepholeSDWA.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -62,6 +62,7 @@ class SIPeepholeSDWA {
std::unique_ptr<SDWAOperand> matchSDWAOperand(MachineInstr &MI);
void pseudoOpConvertToVOP2(MachineInstr &MI,
const GCNSubtarget &ST) const;
MachineInstr *createSDWAVersion(MachineInstr &MI);
bool convertToSDWA(MachineInstr &MI, const SDWAOperandsVector &SDWAOperands);
void legalizeScalarOperands(MachineInstr &MI, const GCNSubtarget &ST) const;

Expand All @@ -85,6 +86,8 @@ class SIPeepholeSDWALegacy : public MachineFunctionPass {
}
};

using namespace AMDGPU::SDWA;

class SDWAOperand {
private:
MachineOperand *Target; // Operand that would be used in converted instruction
Expand All @@ -102,12 +105,47 @@ class SDWAOperand {
virtual MachineInstr *potentialToConvert(const SIInstrInfo *TII,
const GCNSubtarget &ST,
SDWAOperandsMap *PotentialMatches = nullptr) = 0;
virtual bool convertToSDWA(MachineInstr &MI, const SIInstrInfo *TII) = 0;
virtual bool convertToSDWA(MachineInstr &MI, const SIInstrInfo *TII,
bool CombineSelections = false) = 0;

MachineOperand *getTargetOperand() const { return Target; }
MachineOperand *getReplacedOperand() const { return Replaced; }
MachineInstr *getParentInst() const { return Target->getParent(); }

/// Fold a \p FoldedOp SDWA selection into an \p ExistingOp existing SDWA
/// selection. If the selections are compatible, return the combined
/// selection, otherwise return a nullopt. For example, if we have existing
/// BYTE_0 Sel and are attempting to fold WORD_1 Sel:
/// BYTE_0 Sel (WORD_1 Sel (%X)) -> BYTE_2 Sel (%X)
std::optional<SdwaSel> combineSdwaSel(SdwaSel ExistingOp, SdwaSel FoldedOp) {
if (ExistingOp == SdwaSel::DWORD)
return FoldedOp;

if (FoldedOp == SdwaSel::DWORD)
return ExistingOp;

if (ExistingOp == SdwaSel::WORD_1 || ExistingOp == SdwaSel::BYTE_2 ||
ExistingOp == SdwaSel::BYTE_3)
return {};

if (ExistingOp == FoldedOp)
return ExistingOp;

if (FoldedOp == SdwaSel::WORD_0)
return ExistingOp;

if (FoldedOp == SdwaSel::WORD_1) {
if (ExistingOp == SdwaSel::BYTE_0)
return SdwaSel::BYTE_2;
if (ExistingOp == SdwaSel::BYTE_1)
return SdwaSel::BYTE_3;
if (ExistingOp == SdwaSel::WORD_0)
return SdwaSel::WORD_1;
}

return {};
}

MachineRegisterInfo *getMRI() const {
return &getParentInst()->getParent()->getParent()->getRegInfo();
}
Expand All @@ -118,8 +156,6 @@ class SDWAOperand {
#endif
};

using namespace AMDGPU::SDWA;

class SDWASrcOperand : public SDWAOperand {
private:
SdwaSel SrcSel;
Expand All @@ -137,7 +173,8 @@ class SDWASrcOperand : public SDWAOperand {
MachineInstr *potentialToConvert(const SIInstrInfo *TII,
const GCNSubtarget &ST,
SDWAOperandsMap *PotentialMatches = nullptr) override;
bool convertToSDWA(MachineInstr &MI, const SIInstrInfo *TII) override;
bool convertToSDWA(MachineInstr &MI, const SIInstrInfo *TII,
bool CombineSelections = false) override;

SdwaSel getSrcSel() const { return SrcSel; }
bool getAbs() const { return Abs; }
Expand Down Expand Up @@ -166,7 +203,8 @@ class SDWADstOperand : public SDWAOperand {
MachineInstr *potentialToConvert(const SIInstrInfo *TII,
const GCNSubtarget &ST,
SDWAOperandsMap *PotentialMatches = nullptr) override;
bool convertToSDWA(MachineInstr &MI, const SIInstrInfo *TII) override;
bool convertToSDWA(MachineInstr &MI, const SIInstrInfo *TII,
bool CombineSelections = false) override;

SdwaSel getDstSel() const { return DstSel; }
DstUnused getDstUnused() const { return DstUn; }
Expand All @@ -186,7 +224,8 @@ class SDWADstPreserveOperand : public SDWADstOperand {
: SDWADstOperand(TargetOp, ReplacedOp, DstSel_, UNUSED_PRESERVE),
Preserve(PreserveOp) {}

bool convertToSDWA(MachineInstr &MI, const SIInstrInfo *TII) override;
bool convertToSDWA(MachineInstr &MI, const SIInstrInfo *TII,
bool CombineSelections = false) override;

MachineOperand *getPreservedOperand() const { return Preserve; }

Expand Down Expand Up @@ -375,7 +414,8 @@ MachineInstr *SDWASrcOperand::potentialToConvert(const SIInstrInfo *TII,
return PotentialMO->getParent();
}

bool SDWASrcOperand::convertToSDWA(MachineInstr &MI, const SIInstrInfo *TII) {
bool SDWASrcOperand::convertToSDWA(MachineInstr &MI, const SIInstrInfo *TII,
bool CombineSelections) {
switch (MI.getOpcode()) {
case AMDGPU::V_CVT_F32_FP8_sdwa:
case AMDGPU::V_CVT_F32_BF8_sdwa:
Expand Down Expand Up @@ -451,7 +491,15 @@ bool SDWASrcOperand::convertToSDWA(MachineInstr &MI, const SIInstrInfo *TII) {
}
copyRegOperand(*Src, *getTargetOperand());
if (!IsPreserveSrc) {
SrcSel->setImm(getSrcSel());
if (CombineSelections) {
std::optional<SdwaSel> NewOp =
combineSdwaSel((SdwaSel)SrcSel->getImm(), getSrcSel());
if (!NewOp.has_value())
return false;
SrcSel->setImm(NewOp.value());
} else {
SrcSel->setImm(getSrcSel());
}
SrcMods->setImm(getSrcMods(TII, Src));
}
getTargetOperand()->setIsKill(false);
Expand Down Expand Up @@ -479,7 +527,8 @@ MachineInstr *SDWADstOperand::potentialToConvert(const SIInstrInfo *TII,
return PotentialMO->getParent();
}

bool SDWADstOperand::convertToSDWA(MachineInstr &MI, const SIInstrInfo *TII) {
bool SDWADstOperand::convertToSDWA(MachineInstr &MI, const SIInstrInfo *TII,
bool CombineSelections) {
// Replace vdst operand in MI with target operand. Set dst_sel and dst_unused

if ((MI.getOpcode() == AMDGPU::V_FMAC_F16_sdwa ||
Expand All @@ -498,7 +547,15 @@ bool SDWADstOperand::convertToSDWA(MachineInstr &MI, const SIInstrInfo *TII) {
copyRegOperand(*Operand, *getTargetOperand());
MachineOperand *DstSel= TII->getNamedOperand(MI, AMDGPU::OpName::dst_sel);
assert(DstSel);
DstSel->setImm(getDstSel());
if (CombineSelections) {
std::optional<SdwaSel> NewOp =
combineSdwaSel((SdwaSel)DstSel->getImm(), getDstSel());
if (!NewOp.has_value())
return false;
DstSel->setImm(NewOp.value());
} else {
DstSel->setImm(getDstSel());
}
MachineOperand *DstUnused= TII->getNamedOperand(MI, AMDGPU::OpName::dst_unused);
assert(DstUnused);
DstUnused->setImm(getDstUnused());
Expand All @@ -510,7 +567,8 @@ bool SDWADstOperand::convertToSDWA(MachineInstr &MI, const SIInstrInfo *TII) {
}

bool SDWADstPreserveOperand::convertToSDWA(MachineInstr &MI,
const SIInstrInfo *TII) {
const SIInstrInfo *TII,
bool CombineSelections) {
// MI should be moved right before v_or_b32.
// For this we should clear all kill flags on uses of MI src-operands or else
// we can encounter problem with use of killed operand.
Expand All @@ -535,7 +593,7 @@ bool SDWADstPreserveOperand::convertToSDWA(MachineInstr &MI,
MI.getNumOperands() - 1);

// Convert MI as any other SDWADstOperand and remove v_or_b32
return SDWADstOperand::convertToSDWA(MI, TII);
return SDWADstOperand::convertToSDWA(MI, TII, CombineSelections);
}

std::optional<int64_t>
Expand Down Expand Up @@ -1021,21 +1079,13 @@ bool isConvertibleToSDWA(MachineInstr &MI,
}
} // namespace

bool SIPeepholeSDWA::convertToSDWA(MachineInstr &MI,
const SDWAOperandsVector &SDWAOperands) {

LLVM_DEBUG(dbgs() << "Convert instruction:" << MI);

// Convert to sdwa
int SDWAOpcode;
MachineInstr *SIPeepholeSDWA::createSDWAVersion(MachineInstr &MI) {
unsigned Opcode = MI.getOpcode();
if (TII->isSDWA(Opcode)) {
SDWAOpcode = Opcode;
} else {
SDWAOpcode = AMDGPU::getSDWAOp(Opcode);
if (SDWAOpcode == -1)
SDWAOpcode = AMDGPU::getSDWAOp(AMDGPU::getVOPe32(Opcode));
}
assert(!TII->isSDWA(Opcode));

int SDWAOpcode = AMDGPU::getSDWAOp(Opcode);
if (SDWAOpcode == -1)
SDWAOpcode = AMDGPU::getSDWAOp(AMDGPU::getVOPe32(Opcode));
assert(SDWAOpcode != -1);

const MCInstrDesc &SDWADesc = TII->get(SDWAOpcode);
Expand Down Expand Up @@ -1169,6 +1219,28 @@ bool SIPeepholeSDWA::convertToSDWA(MachineInstr &MI,
SDWAInst->tieOperands(PreserveDstIdx, SDWAInst->getNumOperands() - 1);
}

return SDWAInst.getInstr();
}

bool SIPeepholeSDWA::convertToSDWA(MachineInstr &MI,
const SDWAOperandsVector &SDWAOperands) {
LLVM_DEBUG(dbgs() << "Convert instruction:" << MI);

MachineInstr *SDWAInst;
bool CombineSelections;
if (TII->isSDWA(MI.getOpcode())) {
// No conversion necessary, since MI is an SDWA instruction. But
// tell convertToSDWA below to combine selections of this instruction
// and its SDWA operands.
SDWAInst = MI.getParent()->getParent()->CloneMachineInstr(&MI);
MI.getParent()->insert(MI.getIterator(), SDWAInst);
CombineSelections = true;
} else {
// Convert to sdwa
SDWAInst = createSDWAVersion(MI);
CombineSelections = false;
}

// Apply all sdwa operand patterns.
bool Converted = false;
for (auto &Operand : SDWAOperands) {
Expand All @@ -1184,22 +1256,21 @@ bool SIPeepholeSDWA::convertToSDWA(MachineInstr &MI,
// was already destroyed). So if SDWAOperand is also a potential MI then do
// not apply it.
if (PotentialMatches.count(Operand->getParentInst()) == 0)
Converted |= Operand->convertToSDWA(*SDWAInst, TII);
Converted |= Operand->convertToSDWA(*SDWAInst, TII, CombineSelections);
}

if (Converted) {
ConvertedInstructions.push_back(SDWAInst);
for (MachineOperand &MO : SDWAInst->uses()) {
if (!MO.isReg())
continue;

MRI->clearKillFlags(MO.getReg());
}
} else {
if (!Converted) {
SDWAInst->eraseFromParent();
return false;
}

ConvertedInstructions.push_back(SDWAInst);
for (MachineOperand &MO : SDWAInst->uses()) {
if (!MO.isReg())
continue;

MRI->clearKillFlags(MO.getReg());
}
LLVM_DEBUG(dbgs() << "\nInto:" << *SDWAInst << '\n');
++NumSDWAInstructionsPeepholed;

Expand Down
124 changes: 124 additions & 0 deletions llvm/test/CodeGen/AMDGPU/sdwa-peephole-instr-combine-sel.mir
Original file line number Diff line number Diff line change
@@ -0,0 +1,124 @@
# NOTE: Assertions have been autogenerated by utils/update_mir_test_checks.py UTC_ARGS: --version 5
# RUN: llc -mtriple=amdgcn -mcpu=gfx1030 -run-pass=si-peephole-sdwa -o - %s | FileCheck -check-prefix=NOHAZARD %s

---
name: sdwa_opsel_hazard
body: |
; NOHAZARD-LABEL: name: sdwa_opsel_hazard
; NOHAZARD: bb.0:
; NOHAZARD-NEXT: successors: %bb.7(0x40000000), %bb.8(0x40000000)
; NOHAZARD-NEXT: liveins: $vgpr0, $sgpr4_sgpr5, $sgpr6
; NOHAZARD-NEXT: {{ $}}
; NOHAZARD-NEXT: [[DEF:%[0-9]+]]:sreg_32 = IMPLICIT_DEF
; NOHAZARD-NEXT: [[DEF1:%[0-9]+]]:sreg_64_xexec_xnull = IMPLICIT_DEF
; NOHAZARD-NEXT: [[DEF2:%[0-9]+]]:vgpr_32 = IMPLICIT_DEF
; NOHAZARD-NEXT: [[GLOBAL_LOAD_DWORD_SADDR:%[0-9]+]]:vgpr_32 = GLOBAL_LOAD_DWORD_SADDR killed [[DEF1]], [[DEF2]], 0, 0, implicit $exec
; NOHAZARD-NEXT: [[SI_IF:%[0-9]+]]:sreg_32 = SI_IF undef [[DEF]], %bb.8, implicit-def dead $exec, implicit-def dead $scc, implicit $exec
; NOHAZARD-NEXT: S_BRANCH %bb.7
; NOHAZARD-NEXT: {{ $}}
; NOHAZARD-NEXT: bb.1:
; NOHAZARD-NEXT: successors: %bb.2(0x80000000)
; NOHAZARD-NEXT: {{ $}}
; NOHAZARD-NEXT: [[V_MOV_B32_e32_:%[0-9]+]]:vgpr_32 = V_MOV_B32_e32 255, implicit $exec
; NOHAZARD-NEXT: [[V_AND_B32_sdwa:%[0-9]+]]:vgpr_32 = V_AND_B32_sdwa 0, undef [[GLOBAL_LOAD_DWORD_SADDR]], 0, [[V_MOV_B32_e32_]], 0, 6, 0, 5, 6, implicit $exec
; NOHAZARD-NEXT: [[V_MOV_B32_e32_1:%[0-9]+]]:vgpr_32 = V_MOV_B32_e32 2, implicit $exec
; NOHAZARD-NEXT: [[V_LSHLREV_B32_sdwa:%[0-9]+]]:vgpr_32 = V_LSHLREV_B32_sdwa 0, [[V_MOV_B32_e32_1]], 0, undef [[GLOBAL_LOAD_DWORD_SADDR]], 0, 6, 0, 6, 2, implicit $exec
; NOHAZARD-NEXT: {{ $}}
; NOHAZARD-NEXT: bb.2:
; NOHAZARD-NEXT: successors: %bb.3(0x40000000), %bb.4(0x40000000)
; NOHAZARD-NEXT: {{ $}}
; NOHAZARD-NEXT: [[SI_IF1:%[0-9]+]]:sreg_32 = SI_IF killed undef %9, %bb.4, implicit-def dead $exec, implicit-def dead $scc, implicit $exec
; NOHAZARD-NEXT: S_BRANCH %bb.3
; NOHAZARD-NEXT: {{ $}}
; NOHAZARD-NEXT: bb.3:
; NOHAZARD-NEXT: successors: %bb.4(0x80000000)
; NOHAZARD-NEXT: {{ $}}
; NOHAZARD-NEXT: bb.4:
; NOHAZARD-NEXT: successors: %bb.5(0x40000000), %bb.6(0x40000000)
; NOHAZARD-NEXT: {{ $}}
; NOHAZARD-NEXT: [[SI_IF2:%[0-9]+]]:sreg_32 = SI_IF killed undef [[SI_IF1]], %bb.6, implicit-def dead $exec, implicit-def dead $scc, implicit $exec
; NOHAZARD-NEXT: S_BRANCH %bb.5
; NOHAZARD-NEXT: {{ $}}
; NOHAZARD-NEXT: bb.5:
; NOHAZARD-NEXT: successors: %bb.6(0x80000000)
; NOHAZARD-NEXT: {{ $}}
; NOHAZARD-NEXT: bb.6:
; NOHAZARD-NEXT: successors: %bb.9(0x40000000), %bb.10(0x40000000)
; NOHAZARD-NEXT: {{ $}}
; NOHAZARD-NEXT: [[SI_IF3:%[0-9]+]]:sreg_32 = SI_IF undef [[DEF]], %bb.10, implicit-def dead $exec, implicit-def dead $scc, implicit $exec
; NOHAZARD-NEXT: S_BRANCH %bb.9
; NOHAZARD-NEXT: {{ $}}
; NOHAZARD-NEXT: bb.7:
; NOHAZARD-NEXT: successors: %bb.8(0x80000000)
; NOHAZARD-NEXT: {{ $}}
; NOHAZARD-NEXT: bb.8:
; NOHAZARD-NEXT: successors: %bb.1(0x40000000), %bb.2(0x40000000)
; NOHAZARD-NEXT: {{ $}}
; NOHAZARD-NEXT: [[V_LSHRREV_B32_e64_:%[0-9]+]]:vgpr_32 = V_LSHRREV_B32_e64 16, undef [[GLOBAL_LOAD_DWORD_SADDR]], implicit $exec
; NOHAZARD-NEXT: [[SI_IF4:%[0-9]+]]:sreg_32 = SI_IF killed undef [[SI_IF]], %bb.2, implicit-def dead $exec, implicit-def dead $scc, implicit $exec
; NOHAZARD-NEXT: S_BRANCH %bb.1
; NOHAZARD-NEXT: {{ $}}
; NOHAZARD-NEXT: bb.9:
; NOHAZARD-NEXT: successors: %bb.10(0x80000000)
; NOHAZARD-NEXT: {{ $}}
; NOHAZARD-NEXT: bb.10:
; NOHAZARD-NEXT: S_ENDPGM 0
bb.0:
successors: %bb.7(0x40000000), %bb.8(0x40000000)
liveins: $vgpr0, $sgpr4_sgpr5, $sgpr6

%0:sreg_32 = IMPLICIT_DEF
%1:sreg_64_xexec_xnull = IMPLICIT_DEF
%2:vgpr_32 = IMPLICIT_DEF
%3:vgpr_32 = GLOBAL_LOAD_DWORD_SADDR killed %1, %2, 0, 0, implicit $exec
%4:sreg_32 = SI_IF undef %0, %bb.8, implicit-def dead $exec, implicit-def dead $scc, implicit $exec
S_BRANCH %bb.7

bb.1:
successors: %bb.2(0x80000000)

%5:vgpr_32 = V_AND_B32_e64 undef %6, 255, implicit $exec
%7:vgpr_32 = V_LSHLREV_B32_e64 2, killed undef %5, implicit $exec

bb.2:
successors: %bb.3(0x40000000), %bb.4(0x40000000)

%8:sreg_32 = SI_IF killed undef %9, %bb.4, implicit-def dead $exec, implicit-def dead $scc, implicit $exec
S_BRANCH %bb.3

bb.3:
successors: %bb.4(0x80000000)

bb.4:
successors: %bb.5(0x40000000), %bb.6(0x40000000)

%10:sreg_32 = SI_IF killed undef %8, %bb.6, implicit-def dead $exec, implicit-def dead $scc, implicit $exec
S_BRANCH %bb.5

bb.5:
successors: %bb.6(0x80000000)

bb.6:
successors: %bb.9(0x40000000), %bb.10(0x40000000)

%11:sreg_32 = SI_IF undef %0, %bb.10, implicit-def dead $exec, implicit-def dead $scc, implicit $exec
S_BRANCH %bb.9

bb.7:
successors: %bb.8(0x80000000)

bb.8:
successors: %bb.1(0x40000000), %bb.2(0x40000000)

%6:vgpr_32 = V_LSHRREV_B32_e64 16, undef %3, implicit $exec
%9:sreg_32 = SI_IF killed undef %4, %bb.2, implicit-def dead $exec, implicit-def dead $scc, implicit $exec
S_BRANCH %bb.1

bb.9:
successors: %bb.10(0x80000000)

bb.10:
S_ENDPGM 0

...

Loading