@@ -713,7 +713,6 @@ void ThreadState::JumpToLabel(Id target)
713
713
{
714
714
OpLoopMerge merge (it);
715
715
716
- mergeBlock = merge.mergeBlock ;
717
716
SetConvergencePoint (merge.mergeBlock );
718
717
719
718
it++;
@@ -786,7 +785,6 @@ void ThreadState::SkipIgnoredInstructions()
786
785
{
787
786
OpSelectionMerge merge (it);
788
787
789
- mergeBlock = merge.mergeBlock ;
790
788
SetConvergencePoint (merge.mergeBlock );
791
789
792
790
nextInstruction++;
@@ -797,7 +795,6 @@ void ThreadState::SkipIgnoredInstructions()
797
795
{
798
796
OpLoopMerge merge (it);
799
797
800
- mergeBlock = merge.mergeBlock ;
801
798
SetConvergencePoint (merge.mergeBlock );
802
799
803
800
nextInstruction++;
@@ -3359,6 +3356,7 @@ void ThreadState::StepNext(ShaderDebugState *state, const rdcarray<ThreadState>
3359
3356
3360
3357
RDCASSERT (lane < workgroup.size (), lane, workgroup.size ());
3361
3358
ShaderVariable x = workgroup[lane].GetSrc (valueId);
3359
+ RDCASSERTEQUAL (workgroup[lane].currentInstruction , currentInstruction);
3362
3360
3363
3361
switch (opdata.op )
3364
3362
{
@@ -4114,6 +4112,7 @@ void ThreadState::StepNext(ShaderDebugState *state, const rdcarray<ThreadState>
4114
4112
}
4115
4113
else
4116
4114
{
4115
+ // JAKE TODO: THIS MIGHT NOT BE CORRECT IF THE SetDst GETS DEFERRED
4117
4116
if (hasReturnValueData)
4118
4117
SetDst (call.result , returnValue);
4119
4118
returnValue = ShaderVariable ();
@@ -5309,6 +5308,99 @@ void ThreadState::QueueSampleGather(Op opcode, DebugAPIWrapper::TextureType texT
5309
5308
SetStepNeedsGpuSampleGatherOp ();
5310
5309
}
5311
5310
5311
+ // The conditions where it is not safe to run another step are based on:
5312
+ // the current simulation state and the next instruction to simulate
5313
+ bool ThreadState::CanRunAnotherStep () const
5314
+ {
5315
+ // Thread has finished
5316
+ if (Finished ())
5317
+ return false ;
5318
+
5319
+ // Current Simulated State that prevents running another step:
5320
+ // Any control flow state changes i.e. branch, convergence point, function return
5321
+ if (diverged)
5322
+ return false ;
5323
+ if (!enteredPoints.empty ())
5324
+ return false ;
5325
+ if (convergenceInstruction != INVALID_EXECUTION_POINT)
5326
+ return false ;
5327
+ if (functionReturnPoint != INVALID_EXECUTION_POINT)
5328
+ return false ;
5329
+
5330
+ // Any pending result i.e. pending GPU math operation, need to run on the device thread
5331
+ if (IsPendingResultQueued ())
5332
+ return false ;
5333
+
5334
+ // Next instructions that prevent running another step:
5335
+ // any instruction that requires threads in the tangle to be in lockstep
5336
+ ConstIter it = debugger.GetIterForInstruction (nextInstruction);
5337
+ OpDecoder opdata (it);
5338
+ switch (opdata.op )
5339
+ {
5340
+ // derivatives require threads in the tangle to be in lockstep
5341
+ case Op::DPdx:
5342
+ case Op::DPdy:
5343
+ case Op::DPdxCoarse:
5344
+ case Op::DPdyCoarse:
5345
+ case Op::DPdxFine:
5346
+ case Op::DPdyFine:
5347
+ case Op::Fwidth:
5348
+ case Op::FwidthCoarse:
5349
+ case Op::FwidthFine: return false ;
5350
+ // subgroup ops require threads in the tangle to be in lockstep
5351
+ case Op::GroupNonUniformBallotFindLSB:
5352
+ case Op::GroupNonUniformBallotFindMSB:
5353
+ case Op::GroupNonUniformInverseBallot:
5354
+ case Op::GroupNonUniformBallotBitExtract:
5355
+ case Op::GroupNonUniformBroadcastFirst:
5356
+ case Op::SubgroupFirstInvocationKHR:
5357
+ case Op::GroupNonUniformBroadcast:
5358
+ case Op::GroupNonUniformShuffle:
5359
+ case Op::GroupNonUniformShuffleXor:
5360
+ case Op::GroupNonUniformShuffleUp:
5361
+ case Op::GroupNonUniformShuffleDown:
5362
+ case Op::SubgroupReadInvocationKHR:
5363
+ case Op::GroupNonUniformRotateKHR:
5364
+ case Op::GroupNonUniformQuadBroadcast:
5365
+ case Op::GroupNonUniformQuadSwap:
5366
+ case Op::GroupNonUniformQuadAllKHR:
5367
+ case Op::GroupNonUniformQuadAnyKHR:
5368
+ case Op::SubgroupAllKHR:
5369
+ case Op::SubgroupAnyKHR:
5370
+ case Op::SubgroupAllEqualKHR:
5371
+ case Op::GroupNonUniformAll:
5372
+ case Op::GroupNonUniformAny:
5373
+ case Op::GroupNonUniformAllEqual:
5374
+ case Op::GroupNonUniformIAdd:
5375
+ case Op::GroupNonUniformFAdd:
5376
+ case Op::GroupNonUniformIMul:
5377
+ case Op::GroupNonUniformFMul:
5378
+ case Op::GroupNonUniformSMin:
5379
+ case Op::GroupNonUniformUMin:
5380
+ case Op::GroupNonUniformFMin:
5381
+ case Op::GroupNonUniformSMax:
5382
+ case Op::GroupNonUniformUMax:
5383
+ case Op::GroupNonUniformFMax:
5384
+ case Op::GroupNonUniformBitwiseAnd:
5385
+ case Op::GroupNonUniformBitwiseOr:
5386
+ case Op::GroupNonUniformBitwiseXor:
5387
+ case Op::GroupNonUniformLogicalAnd:
5388
+ case Op::GroupNonUniformLogicalOr:
5389
+ case Op::GroupNonUniformLogicalXor:
5390
+ case Op::GroupNonUniformElect:
5391
+ case Op::GroupNonUniformBallot:
5392
+ case Op::SubgroupBallotKHR:
5393
+ case Op::GroupNonUniformBallotBitCount: return false ;
5394
+ // Enter/Exit Functions in lockstep
5395
+ case Op::FunctionCall:
5396
+ case Op::Return:
5397
+ case Op::ReturnValue: return false ;
5398
+ default : break ;
5399
+ }
5400
+
5401
+ return true ;
5402
+ }
5403
+
5312
5404
}; // namespace rdcspv
5313
5405
5314
5406
template <>
0 commit comments