Skip to content

Commit 5ba9e1f

Browse files
committed
WIP VK MT DEBUGGER RUN MULTIPLE SIMULATION STEPS IN A JOB
1 parent 895a26c commit 5ba9e1f

File tree

3 files changed

+207
-44
lines changed

3 files changed

+207
-44
lines changed

renderdoc/driver/shaders/spirv/spirv_debug.cpp

Lines changed: 95 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -713,7 +713,6 @@ void ThreadState::JumpToLabel(Id target)
713713
{
714714
OpLoopMerge merge(it);
715715

716-
mergeBlock = merge.mergeBlock;
717716
SetConvergencePoint(merge.mergeBlock);
718717

719718
it++;
@@ -786,7 +785,6 @@ void ThreadState::SkipIgnoredInstructions()
786785
{
787786
OpSelectionMerge merge(it);
788787

789-
mergeBlock = merge.mergeBlock;
790788
SetConvergencePoint(merge.mergeBlock);
791789

792790
nextInstruction++;
@@ -797,7 +795,6 @@ void ThreadState::SkipIgnoredInstructions()
797795
{
798796
OpLoopMerge merge(it);
799797

800-
mergeBlock = merge.mergeBlock;
801798
SetConvergencePoint(merge.mergeBlock);
802799

803800
nextInstruction++;
@@ -3359,6 +3356,7 @@ void ThreadState::StepNext(ShaderDebugState *state, const rdcarray<ThreadState>
33593356

33603357
RDCASSERT(lane < workgroup.size(), lane, workgroup.size());
33613358
ShaderVariable x = workgroup[lane].GetSrc(valueId);
3359+
RDCASSERTEQUAL(workgroup[lane].currentInstruction, currentInstruction);
33623360

33633361
switch(opdata.op)
33643362
{
@@ -4114,6 +4112,7 @@ void ThreadState::StepNext(ShaderDebugState *state, const rdcarray<ThreadState>
41144112
}
41154113
else
41164114
{
4115+
// JAKE TODO: THIS MIGHT NOT BE CORRECT IF THE SetDst GETS DEFERRED
41174116
if(hasReturnValueData)
41184117
SetDst(call.result, returnValue);
41194118
returnValue = ShaderVariable();
@@ -5309,6 +5308,99 @@ void ThreadState::QueueSampleGather(Op opcode, DebugAPIWrapper::TextureType texT
53095308
SetStepNeedsGpuSampleGatherOp();
53105309
}
53115310

5311+
// The conditions where it is not safe to run another step are based on:
5312+
// the current simulation state and the next instruction to simulate
5313+
bool ThreadState::CanRunAnotherStep() const
5314+
{
5315+
// Thread has finished
5316+
if(Finished())
5317+
return false;
5318+
5319+
// Current Simulated State that prevents running another step:
5320+
// Any control flow state changes i.e. branch, convergence point, function return
5321+
if(diverged)
5322+
return false;
5323+
if(!enteredPoints.empty())
5324+
return false;
5325+
if(convergenceInstruction != INVALID_EXECUTION_POINT)
5326+
return false;
5327+
if(functionReturnPoint != INVALID_EXECUTION_POINT)
5328+
return false;
5329+
5330+
// Any pending result i.e. pending GPU math operation, need to run on the device thread
5331+
if(IsPendingResultQueued())
5332+
return false;
5333+
5334+
// Next instructions that prevent running another step:
5335+
// any instruction that requires threads in the tangle to be in lockstep
5336+
ConstIter it = debugger.GetIterForInstruction(nextInstruction);
5337+
OpDecoder opdata(it);
5338+
switch(opdata.op)
5339+
{
5340+
// derivatives require threads in the tangle to be in lockstep
5341+
case Op::DPdx:
5342+
case Op::DPdy:
5343+
case Op::DPdxCoarse:
5344+
case Op::DPdyCoarse:
5345+
case Op::DPdxFine:
5346+
case Op::DPdyFine:
5347+
case Op::Fwidth:
5348+
case Op::FwidthCoarse:
5349+
case Op::FwidthFine: return false;
5350+
// subgroup ops require threads in the tangle to be in lockstep
5351+
case Op::GroupNonUniformBallotFindLSB:
5352+
case Op::GroupNonUniformBallotFindMSB:
5353+
case Op::GroupNonUniformInverseBallot:
5354+
case Op::GroupNonUniformBallotBitExtract:
5355+
case Op::GroupNonUniformBroadcastFirst:
5356+
case Op::SubgroupFirstInvocationKHR:
5357+
case Op::GroupNonUniformBroadcast:
5358+
case Op::GroupNonUniformShuffle:
5359+
case Op::GroupNonUniformShuffleXor:
5360+
case Op::GroupNonUniformShuffleUp:
5361+
case Op::GroupNonUniformShuffleDown:
5362+
case Op::SubgroupReadInvocationKHR:
5363+
case Op::GroupNonUniformRotateKHR:
5364+
case Op::GroupNonUniformQuadBroadcast:
5365+
case Op::GroupNonUniformQuadSwap:
5366+
case Op::GroupNonUniformQuadAllKHR:
5367+
case Op::GroupNonUniformQuadAnyKHR:
5368+
case Op::SubgroupAllKHR:
5369+
case Op::SubgroupAnyKHR:
5370+
case Op::SubgroupAllEqualKHR:
5371+
case Op::GroupNonUniformAll:
5372+
case Op::GroupNonUniformAny:
5373+
case Op::GroupNonUniformAllEqual:
5374+
case Op::GroupNonUniformIAdd:
5375+
case Op::GroupNonUniformFAdd:
5376+
case Op::GroupNonUniformIMul:
5377+
case Op::GroupNonUniformFMul:
5378+
case Op::GroupNonUniformSMin:
5379+
case Op::GroupNonUniformUMin:
5380+
case Op::GroupNonUniformFMin:
5381+
case Op::GroupNonUniformSMax:
5382+
case Op::GroupNonUniformUMax:
5383+
case Op::GroupNonUniformFMax:
5384+
case Op::GroupNonUniformBitwiseAnd:
5385+
case Op::GroupNonUniformBitwiseOr:
5386+
case Op::GroupNonUniformBitwiseXor:
5387+
case Op::GroupNonUniformLogicalAnd:
5388+
case Op::GroupNonUniformLogicalOr:
5389+
case Op::GroupNonUniformLogicalXor:
5390+
case Op::GroupNonUniformElect:
5391+
case Op::GroupNonUniformBallot:
5392+
case Op::SubgroupBallotKHR:
5393+
case Op::GroupNonUniformBallotBitCount: return false;
5394+
// Enter/Exit Functions in lockstep
5395+
case Op::FunctionCall:
5396+
case Op::Return:
5397+
case Op::ReturnValue: return false;
5398+
default: break;
5399+
}
5400+
5401+
return true;
5402+
}
5403+
53125404
}; // namespace rdcspv
53135405

53145406
template <>

renderdoc/driver/shaders/spirv/spirv_debug.h

Lines changed: 21 additions & 10 deletions
Original file line numberDiff line numberDiff line change
@@ -291,22 +291,18 @@ struct ThreadState
291291
};
292292
rdcarray<GSMIndex> gsmIndexes;
293293

294-
// the id of the merge block that the last branch targetted
295-
Id mergeBlock;
296-
uint32_t convergenceInstruction;
297-
uint32_t functionReturnPoint;
294+
bool IsDiverged() const { return diverged; };
295+
const rdcarray<uint32_t> &GetEnteredPoints() const { return enteredPoints; }
296+
uint32_t GetConvergenceInstruction() const { return convergenceInstruction; }
297+
uint32_t GetFunctionReturnPoint() const { return functionReturnPoint; }
298+
298299
ShaderVariable returnValue;
299300
bool hasReturnValueData;
300301
rdcarray<StackFrame *> callstack;
301302

302303
// the list of IDs that are currently valid and live
303304
rdcarray<Id> live;
304305

305-
// true if executed an operation which could trigger divergence
306-
bool diverged;
307-
// list of potential convergence points that were entered in a single step (used for tracking thread convergence)
308-
rdcarray<uint32_t> enteredPoints;
309-
310306
std::map<Id, uint32_t> lastWrite;
311307

312308
// quad ID (arbitrary, just used to find neighbours for derivatives)
@@ -399,6 +395,9 @@ struct ThreadState
399395
return queuedGpuSampleGatherOp;
400396
}
401397

398+
bool CanRunAnotherStep() const;
399+
400+
bool IsQueudedOrBeingSimulated() const { return beingSimulated || inQueue; }
402401
bool IsBeingSimulated() const { return beingSimulated; }
403402
void SetBeingSimulated(bool active) { beingSimulated = active; }
404403

@@ -425,6 +424,16 @@ struct ThreadState
425424
void ExecuteMemoryBarrier(Id semanticsId);
426425
static bool WorkgroupIsDiverged(const rdcarray<ThreadState> &workgroup);
427426

427+
// Control Flow state variables
428+
// true if executed an operation which could trigger divergence
429+
bool diverged;
430+
// list of potential convergence points that were entered in a single step (used for tracking thread convergence)
431+
rdcarray<uint32_t> enteredPoints;
432+
// the id of the merge block that the last branch targetted
433+
uint32_t convergenceInstruction;
434+
// the instruction after a function call is defined to be a convergence point
435+
uint32_t functionReturnPoint;
436+
428437
bool stepComplete = false;
429438
ShaderDebugState pendingDebugState;
430439
ShaderDebugState *m_State = NULL;
@@ -655,6 +664,7 @@ class Debugger : public Processor, public ShaderDebugger
655664
ShaderStage stage;
656665

657666
int steps = 0;
667+
int curActiveSteps = 0;
658668

659669
/////////////////////////////////////////////////////////
660670
// parsed data
@@ -752,7 +762,8 @@ class Debugger : public Processor, public ShaderDebugger
752762
void ProcessQueuedDeviceThreadSteps();
753763

754764
void QueueJob(uint32_t lane, rdcarray<ShaderDebugState> *ret, const rdcarray<bool> &activeMask);
755-
void StepThread(uint32_t lane, rdcarray<ShaderDebugState> *ret, const rdcarray<bool> &activeMask);
765+
void StepThread(uint32_t lane, bool runMultiple, rdcarray<ShaderDebugState> *ret,
766+
const rdcarray<bool> &activeMask);
756767
void InternalStepThread(uint32_t lane, rdcarray<ShaderDebugState> *ret,
757768
const rdcarray<bool> &activeMask);
758769

0 commit comments

Comments
 (0)