@@ -1561,6 +1561,7 @@ ShaderDebugTrace *Debugger::BeginDebug(DebugAPIWrapper *api, const ShaderStage s
15611561
15621562 std::sort (liveGlobals.begin (), liveGlobals.end ());
15631563
1564+ rdcarray<rdcspv::ThreadIndex> threadIds;
15641565 for (uint32_t i = 0 ; i < threadsInWorkgroup; i++)
15651566 {
15661567 ThreadState &lane = workgroup[i];
@@ -1589,8 +1590,14 @@ ShaderDebugTrace *Debugger::BeginDebug(DebugAPIWrapper *api, const ShaderStage s
15891590 // now that the globals are allocated and their storage won't move, we can take pointers to them
15901591 for (const PointerId &p : pointerIDs)
15911592 p.Set (*this , global, lane);
1593+
1594+ // Only add active lanes to control flow
1595+ if (!lane.dead )
1596+ threadIds.push_back (i);
15921597 }
15931598
1599+ controlFlow.Construct (threadIds);
1600+
15941601 // find quad neighbours
15951602 {
15961603 rdcarray<uint32_t > processedQuads;
@@ -2462,6 +2469,7 @@ rdcarray<ShaderDebugState> Debugger::ContinueDebug()
24622469 if (steps == 0 )
24632470 {
24642471 ShaderDebugState initial;
2472+ uint32_t startBlock = INVALID_EXECUTION_POINT;
24652473
24662474 // we should be sitting at the entry point function prologue, step forward into the first block
24672475 // and past any function-local variable declarations
@@ -2474,6 +2482,7 @@ rdcarray<ShaderDebugState> Debugger::ContinueDebug()
24742482 thread.EnterEntryPoint (&initial);
24752483 FillCallstack (thread, initial);
24762484 initial.nextInstruction = thread.nextInstruction ;
2485+ startBlock = thread.callstack .back ()->curBlock .value ();
24772486 }
24782487 else
24792488 {
@@ -2495,6 +2504,18 @@ rdcarray<ShaderDebugState> Debugger::ContinueDebug()
24952504
24962505 ret.push_back (std::move (initial));
24972506
2507+ // Set the initial block for the threads in the root tangle
2508+ ThreadExecutionStates threadExecutionStates;
2509+ TangleGroup &tangles = controlFlow.GetTangles ();
2510+ RDCASSERTEQUAL (tangles.size (), 1 );
2511+ RDCASSERTNOTEQUAL (startBlock, INVALID_EXECUTION_POINT);
2512+ for (Tangle &tangle : tangles)
2513+ {
2514+ RDCASSERT (tangle.IsAliveActive ());
2515+ for (uint32_t threadIdx = 0 ; threadIdx < workgroup.size (); ++threadIdx)
2516+ threadExecutionStates[threadIdx].push_back (startBlock);
2517+ }
2518+ controlFlow.UpdateState (threadExecutionStates);
24982519 steps++;
24992520 }
25002521
@@ -2513,29 +2534,67 @@ rdcarray<ShaderDebugState> Debugger::ContinueDebug()
25132534 if (active.Finished ())
25142535 break ;
25152536
2516- // calculate the current mask of which threads are active
2517- CalcActiveMask (activeMask);
2537+ // Execute the threads in each active tangle
2538+ ThreadExecutionStates threadExecutionStates;
2539+ TangleGroup &tangles = controlFlow.GetTangles ();
25182540
2519- // step all active members of the workgroup
2520- for (size_t lane = 0 ; lane < workgroup. size (); lane++ )
2541+ bool anyActiveThreads = false ;
2542+ for (Tangle &tangle : tangles )
25212543 {
2522- ThreadState &thread = workgroup[lane];
2544+ if (!tangle.IsAliveActive ())
2545+ continue ;
2546+
2547+ rdcarray<rdcspv::ThreadReference> threadRefs = tangle.GetThreadRefs ();
2548+ // calculate the current active thread mask from the threads in the tangle
2549+ {
2550+ // one bool per workgroup thread
2551+ activeMask.resize (workgroup.size ());
2552+
2553+ // start with all threads as inactive
2554+ for (size_t i = 0 ; i < workgroup.size (); i++)
2555+ activeMask[i] = false ;
2556+
2557+ // activate the threads in the tangle
2558+ for (const rdcspv::ThreadReference &ref : threadRefs)
2559+ {
2560+ uint32_t idx = ref.id ;
2561+ RDCASSERT (idx < workgroup.size (), idx, workgroup.size ());
2562+ RDCASSERT (!workgroup[idx].Finished ());
2563+ activeMask[idx] = true ;
2564+ anyActiveThreads = true ;
2565+ }
2566+ }
25232567
2524- if (activeMask[lane])
2568+ ExecutionPoint newConvergeInstruction = INVALID_EXECUTION_POINT;
2569+ ExecutionPoint newFunctionReturnPoint = INVALID_EXECUTION_POINT;
2570+ uint32_t countActiveThreads = 0 ;
2571+ uint32_t countDivergedThreads = 0 ;
2572+ uint32_t countConvergePointThreads = 0 ;
2573+ uint32_t countFunctionReturnThreads = 0 ;
2574+
2575+ // step all active members of the workgroup
2576+ for (size_t lane = 0 ; lane < workgroup.size (); lane++)
25252577 {
2526- if (thread.nextInstruction >= instructionOffsets.size ())
2578+ if (!activeMask[lane])
2579+ continue ;
2580+ ++countActiveThreads;
2581+
2582+ ThreadState &thread = workgroup[lane];
2583+ const uint32_t currentPC = thread.nextInstruction ;
2584+ const uint32_t threadId = lane;
2585+ if (currentPC >= instructionOffsets.size ())
25272586 {
25282587 if (lane == activeLaneIndex)
25292588 ret.emplace_back ();
25302589
2590+ tangle.SetThreadDead (threadId);
25312591 continue ;
25322592 }
2533-
25342593 if (lane == activeLaneIndex)
25352594 {
25362595 ShaderDebugState state;
25372596
2538- size_t instOffs = instructionOffsets[thread. nextInstruction ];
2597+ size_t instOffs = instructionOffsets[currentPC ];
25392598
25402599 // see if we're retiring any IDs at this state
25412600 for (size_t l = 0 ; l < thread.live .size ();)
@@ -2574,7 +2633,7 @@ rdcarray<ShaderDebugState> Debugger::ContinueDebug()
25742633
25752634 if (m_DebugInfo.valid )
25762635 {
2577- size_t endOffs = instructionOffsets[thread. nextInstruction - 1 ];
2636+ size_t endOffs = instructionOffsets[currentPC - 1 ];
25782637
25792638 // append any inlined functions to the top of the stack
25802639 InlineData *inlined = m_DebugInfo.lineInline [endOffs];
@@ -2622,8 +2681,73 @@ rdcarray<ShaderDebugState> Debugger::ContinueDebug()
26222681 {
26232682 thread.StepNext (NULL , workgroup, activeMask);
26242683 }
2684+ threadExecutionStates[threadId] = thread.enteredPoints ;
2685+
2686+ uint32_t threadConvergeInstruction = thread.convergenceInstruction ;
2687+ // the thread activated a new convergence point
2688+ if (threadConvergeInstruction != INVALID_EXECUTION_POINT)
2689+ {
2690+ if (newConvergeInstruction == INVALID_EXECUTION_POINT)
2691+ {
2692+ newConvergeInstruction = threadConvergeInstruction;
2693+ RDCASSERTNOTEQUAL (newConvergeInstruction, INVALID_EXECUTION_POINT);
2694+ }
2695+ else
2696+ {
2697+ // All the threads in the tangle should set the same convergence point
2698+ RDCASSERTEQUAL (threadConvergeInstruction, newConvergeInstruction);
2699+ }
2700+ ++countConvergePointThreads;
2701+ }
2702+ uint32_t threadFunctionReturnPoint = thread.functionReturnPoint ;
2703+ // the thread activated a new function return point
2704+ if (threadFunctionReturnPoint != INVALID_EXECUTION_POINT)
2705+ {
2706+ if (newFunctionReturnPoint == INVALID_EXECUTION_POINT)
2707+ {
2708+ newFunctionReturnPoint = threadFunctionReturnPoint;
2709+ RDCASSERTNOTEQUAL (newFunctionReturnPoint, INVALID_EXECUTION_POINT);
2710+ }
2711+ else
2712+ {
2713+ // All the threads in the tangle should set the same function return point
2714+ RDCASSERTEQUAL (threadFunctionReturnPoint, newFunctionReturnPoint);
2715+ }
2716+ ++countFunctionReturnThreads;
2717+ }
2718+
2719+ if (thread.Finished ())
2720+ tangle.SetThreadDead (threadId);
2721+
2722+ if (thread.diverged )
2723+ ++countDivergedThreads;
2724+ }
2725+ if (countConvergePointThreads)
2726+ {
2727+ // all the active threads should have a convergence point if any have one
2728+ RDCASSERTEQUAL (countConvergePointThreads, countActiveThreads);
2729+ tangle.AddMergePoint (newConvergeInstruction);
2730+ }
2731+ if (countFunctionReturnThreads)
2732+ {
2733+ // all the active threads should have a function return point if any have one
2734+ RDCASSERTEQUAL (countFunctionReturnThreads, countActiveThreads);
2735+ tangle.AddFunctionReturnPoint (newFunctionReturnPoint);
2736+ }
2737+ if (countDivergedThreads)
2738+ {
2739+ // all the active threads should have diverged if any diverges
2740+ RDCASSERTEQUAL (countDivergedThreads, countActiveThreads);
2741+ tangle.SetDiverged (true );
26252742 }
26262743 }
2744+ if (!anyActiveThreads)
2745+ {
2746+ active.dead = true ;
2747+ controlFlow.UpdateState (threadExecutionStates);
2748+ RDCERR (" No active threads in any tangle, killing active thread to terminate the debugger" );
2749+ }
2750+ controlFlow.UpdateState (threadExecutionStates);
26272751 }
26282752
26292753 return ret;
0 commit comments