Skip to content

Commit b400d4b

Browse files
committed
Refactor D3D ps input fetch to use quad swizzle helper
* Also centralises and shares more code between D3D11 and D3D12 to use the same shader
1 parent 58cace2 commit b400d4b

12 files changed

+845
-1365
lines changed

renderdoc/driver/d3d11/d3d11_shaderdebug.cpp

+114-418
Large diffs are not rendered by default.

renderdoc/driver/d3d12/d3d12_shaderdebug.cpp

+159-488
Large diffs are not rendered by default.

renderdoc/driver/shaders/dxbc/dx_debug.cpp

+340-40
Large diffs are not rendered by default.

renderdoc/driver/shaders/dxbc/dx_debug.h

+95-7
Original file line numberDiff line numberDiff line change
@@ -25,11 +25,13 @@
2525
#pragma once
2626

2727
#include <map>
28+
#include "maths/vec.h"
2829

2930
namespace DXBC
3031
{
3132
enum ResourceRetType;
3233
enum class InterpolationMode : uint8_t;
34+
class DXBCContainer;
3335
};
3436

3537
namespace DXBCBytecode
@@ -44,6 +46,41 @@ typedef DXBC::ResourceRetType ResourceRetType;
4446
typedef DXBCBytecode::ResourceDimension ResourceDimension;
4547
typedef DXBCBytecode::SamplerMode SamplerMode;
4648

49+
struct LaneData
50+
{
51+
Vec4f pixelPos;
52+
53+
uint32_t isHelper;
54+
uint32_t quadId;
55+
uint32_t quadLane;
56+
uint32_t coverage;
57+
58+
// user data PSInput below here
59+
};
60+
61+
struct PixelDebugHit
62+
{
63+
// only used in the first instance
64+
uint32_t numHits;
65+
// below here are per-hit properties
66+
float posx;
67+
float posy;
68+
float depth;
69+
70+
float derivValid;
71+
uint32_t primitive;
72+
uint32_t isFrontFace;
73+
uint32_t sample;
74+
75+
uint32_t quadLaneIndex;
76+
uint32_t pad[3];
77+
78+
// LaneData quad[4] below here
79+
};
80+
81+
// maximum number of overdraw levels before we start losing potential pixel hits
82+
static const uint32_t maxPixelHits = 100;
83+
4784
struct PSInputElement
4885
{
4986
PSInputElement(int regster, int element, int numWords, ShaderBuiltin attr, bool inc)
@@ -64,13 +101,64 @@ struct PSInputElement
64101
bool included;
65102
};
66103

67-
void GatherPSInputDataForInitialValues(const rdcarray<SigParameter> &stageInputSig,
68-
const rdcarray<SigParameter> &prevStageOutputSig,
69-
const rdcarray<DXBC::InterpolationMode> &interpModes,
70-
rdcarray<PSInputElement> &initialValues,
71-
rdcarray<rdcstr> &floatInputs, rdcarray<rdcstr> &inputVarNames,
72-
rdcstr &psInputDefinition, int &structureStride,
73-
std::map<ShaderBuiltin, rdcstr> &usedInputs);
104+
struct SampleEvalCacheKey
105+
{
106+
int32_t quadIndex = -1; // index of this thread in the quad
107+
int32_t inputRegisterIndex = -1; // index of the input register
108+
int32_t firstComponent = 0; // the first component in the register
109+
int32_t numComponents = 0; // how many components in the register
110+
int32_t sample = -1; // -1 for offset-from-centroid lookups
111+
int32_t offsetx = 0, offsety = 0; // integer offset from centroid
112+
113+
bool operator<(const SampleEvalCacheKey &o) const
114+
{
115+
if(quadIndex != o.quadIndex)
116+
return quadIndex < o.quadIndex;
117+
118+
if(inputRegisterIndex != o.inputRegisterIndex)
119+
return inputRegisterIndex < o.inputRegisterIndex;
120+
121+
if(firstComponent != o.firstComponent)
122+
return firstComponent < o.firstComponent;
123+
124+
if(numComponents != o.numComponents)
125+
return numComponents < o.numComponents;
126+
127+
if(sample != o.sample)
128+
return sample < o.sample;
129+
130+
if(offsetx != o.offsetx)
131+
return offsetx < o.offsetx;
132+
133+
return offsety < o.offsety;
134+
}
135+
bool operator==(const SampleEvalCacheKey &o) const { return !(*this < o) && !(o < *this); }
136+
};
137+
138+
struct PSInputFetcherConfig
139+
{
140+
uint32_t x = 0, y = 0;
141+
uint32_t uavslot = 0;
142+
uint32_t uavspace = 0;
143+
uint32_t outputSampleCount = 1;
144+
};
145+
146+
struct PSInputFetcher
147+
{
148+
// stride of the generated PSInput struct
149+
uint32_t stride = 0;
150+
// members of the PSInput struct
151+
rdcarray<PSInputElement> inputs;
152+
153+
// per-sample evaluation cache
154+
rdcarray<SampleEvalCacheKey> evalSampleCacheData;
155+
uint64_t sampleEvalRegisterMask = 0;
156+
157+
rdcstr hlsl;
158+
};
159+
160+
void CreatePSInputFetcher(const DXBC::DXBCContainer *dxbc, const DXBC::DXBCContainer *prevdxbc,
161+
const PSInputFetcherConfig &cfg, PSInputFetcher &fetcher);
74162

75163
enum class GatherChannel : uint8_t
76164
{

renderdoc/driver/shaders/dxbc/dxbc_bytecode.cpp

+84
Original file line numberDiff line numberDiff line change
@@ -68,6 +68,90 @@ Program::Program(const rdcarray<uint32_t> &words)
6868
m_Minor = VersionToken::MinorVersion.Get(cur[0]);
6969
}
7070

71+
void Program::CalculateEvalSampleCache(const DXDebug::PSInputFetcherConfig &cfg,
72+
DXDebug::PSInputFetcher &fetcher) const
73+
{
74+
// scan the instructions to see if it contains any evaluates.
75+
for(size_t i = 0; i < GetNumInstructions(); i++)
76+
{
77+
const Operation &op = GetInstruction(i);
78+
79+
// skip any non-eval opcodes
80+
if(op.operation != OPCODE_EVAL_CENTROID && op.operation != OPCODE_EVAL_SAMPLE_INDEX &&
81+
op.operation != OPCODE_EVAL_SNAPPED)
82+
continue;
83+
84+
// the generation of this key must match what we'll generate in the corresponding lookup
85+
DXDebug::SampleEvalCacheKey key;
86+
87+
// all the eval opcodes have rDst, vIn as the first two operands
88+
key.inputRegisterIndex = (int32_t)op.operands[1].indices[0].index;
89+
90+
for(int c = 0; c < 4; c++)
91+
{
92+
if(op.operands[0].comps[c] == 0xff)
93+
break;
94+
95+
key.numComponents = c + 1;
96+
}
97+
98+
key.firstComponent = op.operands[1].comps[op.operands[0].comps[0]];
99+
100+
fetcher.sampleEvalRegisterMask |= 1ULL << key.inputRegisterIndex;
101+
102+
if(op.operation == OPCODE_EVAL_CENTROID)
103+
{
104+
// nothing to do - default key is centroid, sample is -1 and offset x/y is 0
105+
if(!fetcher.evalSampleCacheData.contains(key))
106+
fetcher.evalSampleCacheData.push_back(key);
107+
}
108+
else if(op.operation == OPCODE_EVAL_SAMPLE_INDEX)
109+
{
110+
if(op.operands[2].type == TYPE_IMMEDIATE32 || op.operands[2].type == TYPE_IMMEDIATE64)
111+
{
112+
// hooray, only sampling a single index, just add this key
113+
key.sample = (int32_t)op.operands[2].values[0];
114+
115+
if(!fetcher.evalSampleCacheData.contains(key))
116+
fetcher.evalSampleCacheData.push_back(key);
117+
}
118+
else
119+
{
120+
// parameter is a register and we don't know which sample will be needed, fetch them
121+
// all. In most cases this will be a loop over them all, so they'll all be needed anyway
122+
for(uint32_t c = 0; c < cfg.outputSampleCount; c++)
123+
{
124+
key.sample = (int32_t)c;
125+
fetcher.evalSampleCacheData.push_back(key);
126+
}
127+
}
128+
}
129+
else if(op.operation == OPCODE_EVAL_SNAPPED)
130+
{
131+
if(op.operands[2].type == TYPE_IMMEDIATE32 || op.operands[2].type == TYPE_IMMEDIATE64)
132+
{
133+
// hooray, only sampling a single offset, just add this key
134+
key.offsetx = (int32_t)op.operands[2].values[0];
135+
key.offsety = (int32_t)op.operands[2].values[1];
136+
137+
if(!fetcher.evalSampleCacheData.contains(key))
138+
fetcher.evalSampleCacheData.push_back(key);
139+
}
140+
else
141+
{
142+
RDCWARN(
143+
"EvaluateAttributeSnapped called with dynamic parameter, caching all possible "
144+
"evaluations which could have performance impact.");
145+
146+
for(key.offsetx = -8; key.offsetx <= 7; key.offsetx++)
147+
for(key.offsety = -8; key.offsety <= 7; key.offsety++)
148+
if(!fetcher.evalSampleCacheData.contains(key))
149+
fetcher.evalSampleCacheData.push_back(key);
150+
}
151+
}
152+
}
153+
}
154+
71155
void HandleResourceArrayIndices(const rdcarray<DXBCBytecode::RegIndex> &indices,
72156
DXBC::ShaderInputBind &desc)
73157
{

renderdoc/driver/shaders/dxbc/dxbc_bytecode.h

+3
Original file line numberDiff line numberDiff line change
@@ -1117,6 +1117,9 @@ class Program
11171117
void FetchComputeProperties(DXBC::Reflection *reflection);
11181118
DXBC::Reflection *GuessReflection();
11191119

1120+
void CalculateEvalSampleCache(const DXDebug::PSInputFetcherConfig &cfg,
1121+
DXDebug::PSInputFetcher &fetcher) const;
1122+
11201123
const rdcarray<uint32_t> &GetTokens() const { return m_ProgramWords; }
11211124
rdcstr GetDebugStatus();
11221125

0 commit comments

Comments
 (0)