@@ -1561,6 +1561,7 @@ ShaderDebugTrace *Debugger::BeginDebug(DebugAPIWrapper *api, const ShaderStage s
15611561
15621562 std::sort (liveGlobals.begin (), liveGlobals.end ());
15631563
1564+ rdcarray<rdcspv::ThreadIndex> threadIds;
15641565 for (uint32_t i = 0 ; i < threadsInWorkgroup; i++)
15651566 {
15661567 ThreadState &lane = workgroup[i];
@@ -1589,8 +1590,14 @@ ShaderDebugTrace *Debugger::BeginDebug(DebugAPIWrapper *api, const ShaderStage s
15891590 // now that the globals are allocated and their storage won't move, we can take pointers to them
15901591 for (const PointerId &p : pointerIDs)
15911592 p.Set (*this , global, lane);
1593+
1594+ // Only add active lanes to control flow
1595+ if (!lane.dead )
1596+ threadIds.push_back (i);
15921597 }
15931598
1599+ controlFlow.Construct (threadIds);
1600+
15941601 // find quad neighbours
15951602 {
15961603 rdcarray<uint32_t > processedQuads;
@@ -2454,6 +2461,7 @@ rdcarray<ShaderDebugState> Debugger::ContinueDebug()
24542461 if (steps == 0 )
24552462 {
24562463 ShaderDebugState initial;
2464+ uint32_t startBlock = INVALID_BLOCK_INDEX;
24572465
24582466 // we should be sitting at the entry point function prologue, step forward into the first block
24592467 // and past any function-local variable declarations
@@ -2466,6 +2474,7 @@ rdcarray<ShaderDebugState> Debugger::ContinueDebug()
24662474 thread.EnterEntryPoint (&initial);
24672475 FillCallstack (thread, initial);
24682476 initial.nextInstruction = thread.nextInstruction ;
2477+ startBlock = thread.callstack .back ()->curBlock .value ();
24692478 }
24702479 else
24712480 {
@@ -2487,6 +2496,18 @@ rdcarray<ShaderDebugState> Debugger::ContinueDebug()
24872496
24882497 ret.push_back (std::move (initial));
24892498
2499+ // Set the initial block for the threads in the root tangle
2500+ ThreadBlockStates threadBlockStates;
2501+ TangleGroup &tangles = controlFlow.GetTangles ();
2502+ RDCASSERTEQUAL (tangles.size (), 1 );
2503+ RDCASSERTNOTEQUAL (startBlock, INVALID_BLOCK_INDEX);
2504+ for (Tangle &tangle : tangles)
2505+ {
2506+ RDCASSERT (tangle.IsAliveActive ());
2507+ for (uint32_t threadIdx = 0 ; threadIdx < workgroup.size (); ++threadIdx)
2508+ threadBlockStates[threadIdx].push_back (startBlock);
2509+ }
2510+ controlFlow.UpdateState (threadBlockStates);
24902511 steps++;
24912512 }
24922513
@@ -2505,29 +2526,66 @@ rdcarray<ShaderDebugState> Debugger::ContinueDebug()
25052526 if (active.Finished ())
25062527 break ;
25072528
2508- // calculate the current mask of which threads are active
2509- CalcActiveMask (activeMask);
2529+ // Execute the threads in each active tangle
2530+ ThreadBlockStates threadBlockStates;
2531+ TangleGroup &tangles = controlFlow.GetTangles ();
25102532
2511- // step all active members of the workgroup
2512- for (size_t lane = 0 ; lane < workgroup.size (); lane++)
2533+ for (Tangle &tangle : tangles)
25132534 {
2514- ThreadState &thread = workgroup[lane];
2535+ if (!tangle.IsAliveActive ())
2536+ continue ;
2537+
2538+ rdcarray<rdcspv::ThreadReference> threadRefs = tangle.GetThreadRefs ();
2539+ // calculate the current active thread mask from the threads in the tangle
2540+ {
2541+ // one bool per workgroup thread
2542+ activeMask.resize (workgroup.size ());
25152543
2516- if (activeMask[lane])
2544+ // start with all threads as inactive
2545+ for (size_t i = 0 ; i < workgroup.size (); i++)
2546+ activeMask[i] = false ;
2547+
2548+ // activate the threads in the tangle
2549+ for (const rdcspv::ThreadReference &ref : threadRefs)
2550+ {
2551+ uint32_t idx = ref.id ;
2552+ RDCASSERT (idx < workgroup.size (), idx, workgroup.size ());
2553+ RDCASSERT (!workgroup[idx].Finished ());
2554+ activeMask[idx] = true ;
2555+ }
2556+ }
2557+
2558+ BlockIndex newMergeBlock = INVALID_BLOCK_INDEX;
2559+ uint32_t countActiveThreads = 0 ;
2560+ uint32_t countDivergedThreads = 0 ;
2561+ uint32_t countMergeBlockThreads = 0 ;
2562+
2563+ // step all active members of the workgroup
2564+ for (size_t lane = 0 ; lane < workgroup.size (); lane++)
25172565 {
2518- if (thread.nextInstruction >= instructionOffsets.size ())
2566+ if (!activeMask[lane])
2567+ continue ;
2568+ ++countActiveThreads;
2569+
2570+ ThreadState &thread = workgroup[lane];
2571+ const uint32_t currentPC = thread.nextInstruction ;
2572+ const uint32_t threadId = lane;
2573+ RDCASSERTEQUAL (thread.callstack .back ()->curBlock .value (), tangle.GetBlockIndex ());
2574+ if (currentPC >= instructionOffsets.size ())
25192575 {
25202576 if (lane == activeLaneIndex)
25212577 ret.emplace_back ();
25222578
2579+ tangle.SetThreadDead (threadId);
25232580 continue ;
25242581 }
2582+ Id prevConvergeBlock = thread.mergeBlock ;
25252583
25262584 if (lane == activeLaneIndex)
25272585 {
25282586 ShaderDebugState state;
25292587
2530- size_t instOffs = instructionOffsets[thread. nextInstruction ];
2588+ size_t instOffs = instructionOffsets[currentPC ];
25312589
25322590 // see if we're retiring any IDs at this state
25332591 for (size_t l = 0 ; l < thread.live .size ();)
@@ -2566,7 +2624,7 @@ rdcarray<ShaderDebugState> Debugger::ContinueDebug()
25662624
25672625 if (m_DebugInfo.valid )
25682626 {
2569- size_t endOffs = instructionOffsets[thread. nextInstruction - 1 ];
2627+ size_t endOffs = instructionOffsets[currentPC - 1 ];
25702628
25712629 // append any inlined functions to the top of the stack
25722630 InlineData *inlined = m_DebugInfo.lineInline [endOffs];
@@ -2614,8 +2672,53 @@ rdcarray<ShaderDebugState> Debugger::ContinueDebug()
26142672 {
26152673 thread.StepNext (NULL , workgroup, activeMask);
26162674 }
2675+ threadBlockStates[threadId] = thread.enteredBlocks ;
2676+ if (!thread.enteredBlocks .empty ())
2677+ {
2678+ const uint32_t nextBlock = thread.callstack .back ()->curBlock .value ();
2679+ // The last block entered should be the current block
2680+ RDCASSERTEQUAL (nextBlock, thread.enteredBlocks .back ());
2681+ }
2682+
2683+ Id newConvergeBlock = thread.mergeBlock ;
2684+ // the thread executed a merge block
2685+ if (newConvergeBlock != prevConvergeBlock)
2686+ {
2687+ if (newMergeBlock == INVALID_BLOCK_INDEX)
2688+ {
2689+ newMergeBlock = newConvergeBlock.value ();
2690+ RDCASSERTNOTEQUAL (newMergeBlock, INVALID_BLOCK_INDEX);
2691+ }
2692+ else
2693+ {
2694+ // All the threads in the tangle should converge to the same block
2695+ RDCASSERTEQUAL (newConvergeBlock.value (), newMergeBlock);
2696+ }
2697+ ++countMergeBlockThreads;
2698+ }
2699+ // JAKE TODO: function exit is a convergence point
2700+
2701+ if (thread.Finished ())
2702+ tangle.SetThreadDead (threadId);
2703+
2704+ if (thread.diverged )
2705+ ++countDivergedThreads;
2706+ }
2707+ if (countMergeBlockThreads)
2708+ {
2709+ // all the active threads should have a merge block if any have a merge block
2710+ RDCASSERTEQUAL (countMergeBlockThreads, countActiveThreads);
2711+ tangle.AddMergePoint (newMergeBlock);
2712+ }
2713+
2714+ if (countDivergedThreads)
2715+ {
2716+ // all the active threads should have diverged if any diverges
2717+ RDCASSERTEQUAL (countDivergedThreads, countActiveThreads);
2718+ tangle.SetDiverged (true );
26172719 }
26182720 }
2721+ controlFlow.UpdateState (threadBlockStates);
26192722 }
26202723
26212724 return ret;
@@ -4324,7 +4427,6 @@ void Debugger::RegisterOp(Iter it)
43244427 curFunction = NULL ;
43254428 }
43264429}
4327-
43284430}; // namespace rdcspv
43294431
43304432#if ENABLED(ENABLE_UNIT_TESTS)
0 commit comments