From 060a052fac37cd8d87d649664565a44e2a7ae3dc Mon Sep 17 00:00:00 2001 From: TellowKrinkle Date: Mon, 11 Apr 2022 15:48:06 -0500 Subject: [PATCH 1/4] GS: Remove special handling of WMS == WMT in shaders Let the shader compiler figure that out if it needs to (it probably doesn't unless you're on TeraScale) --- bin/resources/shaders/dx11/tfx.fx | 100 ++++++++------------- bin/resources/shaders/opengl/tfx_fs.glsl | 34 +------- bin/resources/shaders/vulkan/tfx.glsl | 105 ++++++++--------------- pcsx2/GS/Renderers/Metal/tfx.metal | 81 ++++++----------- 4 files changed, 106 insertions(+), 214 deletions(-) diff --git a/bin/resources/shaders/dx11/tfx.fx b/bin/resources/shaders/dx11/tfx.fx index 1fb0f43b99963..ad3c89269bc2b 100644 --- a/bin/resources/shaders/dx11/tfx.fx +++ b/bin/resources/shaders/dx11/tfx.fx @@ -201,46 +201,31 @@ float4 clamp_wrap_uv(float4 uv) else tex_size = WH.xyxy; - if(PS_WMS == PS_WMT) + if(PS_WMS == 2) { - if(PS_WMS == 2) - { - uv = clamp(uv, MinMax.xyxy, MinMax.zwzw); - } - else if(PS_WMS == 3) - { - #if PS_FST == 0 - // wrap negative uv coords to avoid an off by one error that shifted - // textures. Fixes Xenosaga's hair issue. - uv = frac(uv); - #endif - uv = (float4)(((uint4)(uv * tex_size) & MskFix.xyxy) | MskFix.zwzw) / tex_size; - } + uv.xz = clamp(uv.xz, MinMax.xx, MinMax.zz); } - else + else if(PS_WMS == 3) { - if(PS_WMS == 2) - { - uv.xz = clamp(uv.xz, MinMax.xx, MinMax.zz); - } - else if(PS_WMS == 3) - { - #if PS_FST == 0 - uv.xz = frac(uv.xz); - #endif - uv.xz = (float2)(((uint2)(uv.xz * tex_size.xx) & MskFix.xx) | MskFix.zz) / tex_size.xx; - } - if(PS_WMT == 2) - { - uv.yw = clamp(uv.yw, MinMax.yy, MinMax.ww); - } - else if(PS_WMT == 3) - { - #if PS_FST == 0 - uv.yw = frac(uv.yw); - #endif - uv.yw = (float2)(((uint2)(uv.yw * tex_size.yy) & MskFix.yy) | MskFix.ww) / tex_size.yy; - } + // wrap negative uv coords to avoid an off by one error that shifted + // textures. Fixes Xenosaga's hair issue. + #if PS_FST == 0 + uv.xz = frac(uv.xz); + #endif + uv.xz = (float2)(((uint2)(uv.xz * tex_size.xx) & MskFix.xx) | MskFix.zz) / tex_size.xx; + } + if(PS_WMT == 2) + { + uv.yw = clamp(uv.yw, MinMax.yy, MinMax.ww); + } + else if(PS_WMT == 3) + { + // wrap negative uv coords to avoid an off by one error that shifted + // textures. Fixes Xenosaga's hair issue. + #if PS_FST == 0 + uv.yw = frac(uv.yw); + #endif + uv.yw = (float2)(((uint2)(uv.yw * tex_size.yy) & MskFix.yy) | MskFix.ww) / tex_size.yy; } return uv; @@ -329,36 +314,25 @@ float4 fetch_c(int2 uv) int2 clamp_wrap_uv_depth(int2 uv) { int4 mask = (int4)MskFix << 4; - if (PS_WMS == PS_WMT) + + if (PS_WMS == 2) { - if (PS_WMS == 2) - { - uv = clamp(uv, mask.xy, mask.zw); - } - else if (PS_WMS == 3) - { - uv = (uv & mask.xy) | mask.zw; - } + uv.x = clamp(uv.x, mask.x, mask.z); } - else + else if (PS_WMS == 3) { - if (PS_WMS == 2) - { - uv.x = clamp(uv.x, mask.x, mask.z); - } - else if (PS_WMS == 3) - { - uv.x = (uv.x & mask.x) | mask.z; - } - if (PS_WMT == 2) - { - uv.y = clamp(uv.y, mask.y, mask.w); - } - else if (PS_WMT == 3) - { - uv.y = (uv.y & mask.y) | mask.w; - } + uv.x = (uv.x & mask.x) | mask.z; } + + if (PS_WMT == 2) + { + uv.y = clamp(uv.y, mask.y, mask.w); + } + else if (PS_WMT == 3) + { + uv.y = (uv.y & mask.y) | mask.w; + } + return uv; } diff --git a/bin/resources/shaders/opengl/tfx_fs.glsl b/bin/resources/shaders/opengl/tfx_fs.glsl index 7d88226c9154b..fdd4e5a70a2ca 100644 --- a/bin/resources/shaders/opengl/tfx_fs.glsl +++ b/bin/resources/shaders/opengl/tfx_fs.glsl @@ -158,42 +158,26 @@ vec4 clamp_wrap_uv(vec4 uv) vec4 tex_size = WH.xyxy; #endif -#if PS_WMS == PS_WMT - #if PS_WMS == 2 - uv_out = clamp(uv, MinMax.xyxy, MinMax.zwzw); + uv_out.xz = clamp(uv.xz, MinMax.xx, MinMax.zz); #elif PS_WMS == 3 #if PS_FST == 0 // wrap negative uv coords to avoid an off by one error that shifted // textures. Fixes Xenosaga's hair issue. - uv = fract(uv); - #endif - uv_out = vec4((uvec4(uv * tex_size) & MskFix.xyxy) | MskFix.zwzw) / tex_size; -#endif - -#else // PS_WMS != PS_WMT - -#if PS_WMS == 2 - uv_out.xz = clamp(uv.xz, MinMax.xx, MinMax.zz); - -#elif PS_WMS == 3 - #if PS_FST == 0 uv.xz = fract(uv.xz); #endif uv_out.xz = vec2((uvec2(uv.xz * tex_size.xx) & MskFix.xx) | MskFix.zz) / tex_size.xx; - #endif #if PS_WMT == 2 uv_out.yw = clamp(uv.yw, MinMax.yy, MinMax.ww); - #elif PS_WMT == 3 #if PS_FST == 0 + // wrap negative uv coords to avoid an off by one error that shifted + // textures. Fixes Xenosaga's hair issue. uv.yw = fract(uv.yw); #endif uv_out.yw = vec2((uvec2(uv.yw * tex_size.yy) & MskFix.yy) | MskFix.ww) / tex_size.yy; -#endif - #endif return uv_out; @@ -302,16 +286,6 @@ ivec2 clamp_wrap_uv_depth(ivec2 uv) // It allow to multiply the ScalingFactor before the 1/16 coeff ivec4 mask = ivec4(MskFix) << 4; -#if PS_WMS == PS_WMT - -#if PS_WMS == 2 - uv_out = clamp(uv, mask.xy, mask.zw); -#elif PS_WMS == 3 - uv_out = (uv & mask.xy) | mask.zw; -#endif - -#else // PS_WMS != PS_WMT - #if PS_WMS == 2 uv_out.x = clamp(uv.x, mask.x, mask.z); #elif PS_WMS == 3 @@ -322,8 +296,6 @@ ivec2 clamp_wrap_uv_depth(ivec2 uv) uv_out.y = clamp(uv.y, mask.y, mask.w); #elif PS_WMT == 3 uv_out.y = (uv.y & mask.y) | mask.w; -#endif - #endif return uv_out; diff --git a/bin/resources/shaders/vulkan/tfx.glsl b/bin/resources/shaders/vulkan/tfx.glsl index b15b1761908fd..46e0e87f7dba3 100644 --- a/bin/resources/shaders/vulkan/tfx.glsl +++ b/bin/resources/shaders/vulkan/tfx.glsl @@ -454,49 +454,33 @@ vec4 clamp_wrap_uv(vec4 uv) tex_size = WH.xyxy; #endif - #if PS_WMS == PS_WMT + + #if PS_WMS == 2 { - #if PS_WMS == 2 - { - uv = clamp(uv, MinMax.xyxy, MinMax.zwzw); - } - #elif PS_WMS == 3 - { - #if PS_FST == 0 - // wrap negative uv coords to avoid an off by one error that shifted - // textures. Fixes Xenosaga's hair issue. - uv = fract(uv); - #endif - uv = vec4((uvec4(uv * tex_size) & MskFix.xyxy) | MskFix.zwzw) / tex_size; - } - #endif + uv.xz = clamp(uv.xz, MinMax.xx, MinMax.zz); } - #else + #elif PS_WMS == 3 { - #if PS_WMS == 2 - { - uv.xz = clamp(uv.xz, MinMax.xx, MinMax.zz); - } - #elif PS_WMS == 3 - { - #if PS_FST == 0 - uv.xz = fract(uv.xz); - #endif - uv.xz = vec2((uvec2(uv.xz * tex_size.xx) & MskFix.xx) | MskFix.zz) / tex_size.xx; - } + #if PS_FST == 0 + // wrap negative uv coords to avoid an off by one error that shifted + // textures. Fixes Xenosaga's hair issue. + uv.xz = fract(uv.xz); #endif - #if PS_WMT == 2 - { - uv.yw = clamp(uv.yw, MinMax.yy, MinMax.ww); - } - #elif PS_WMT == 3 - { - #if PS_FST == 0 - uv.yw = fract(uv.yw); - #endif - uv.yw = vec2((uvec2(uv.yw * tex_size.yy) & MskFix.yy) | MskFix.ww) / tex_size.yy; - } + uv.xz = vec2((uvec2(uv.xz * tex_size.xx) & MskFix.xx) | MskFix.zz) / tex_size.xx; + } + #endif + #if PS_WMT == 2 + { + uv.yw = clamp(uv.yw, MinMax.yy, MinMax.ww); + } + #elif PS_WMT == 3 + { + #if PS_FST == 0 + // wrap negative uv coords to avoid an off by one error that shifted + // textures. Fixes Xenosaga's hair issue. + uv.yw = fract(uv.yw); #endif + uv.yw = vec2((uvec2(uv.yw * tex_size.yy) & MskFix.yy) | MskFix.ww) / tex_size.yy; } #endif @@ -583,40 +567,27 @@ vec4 fetch_c(ivec2 uv) ivec2 clamp_wrap_uv_depth(ivec2 uv) { ivec4 mask = ivec4(MskFix << 4); - #if (PS_WMS == PS_WMT) + + #if (PS_WMS == 2) { - #if (PS_WMS == 2) - { - uv = clamp(uv, mask.xy, mask.zw); - } - #elif (PS_WMS == 3) - { - uv = (uv & mask.xy) | mask.zw; - } - #endif + uv.x = clamp(uv.x, mask.x, mask.z); } - #else + #elif (PS_WMS == 3) { - #if (PS_WMS == 2) - { - uv.x = clamp(uv.x, mask.x, mask.z); - } - #elif (PS_WMS == 3) - { - uv.x = (uv.x & mask.x) | mask.z; - } - #endif - #if (PS_WMT == 2) - { - uv.y = clamp(uv.y, mask.y, mask.w); - } - #elif (PS_WMT == 3) - { - uv.y = (uv.y & mask.y) | mask.w; - } - #endif + uv.x = (uv.x & mask.x) | mask.z; + } + #endif + + #if (PS_WMT == 2) + { + uv.y = clamp(uv.y, mask.y, mask.w); + } + #elif (PS_WMT == 3) + { + uv.y = (uv.y & mask.y) | mask.w; } #endif + return uv; } diff --git a/pcsx2/GS/Renderers/Metal/tfx.metal b/pcsx2/GS/Renderers/Metal/tfx.metal index 58cc187bfa258..b4d6dcdf6a156 100644 --- a/pcsx2/GS/Renderers/Metal/tfx.metal +++ b/pcsx2/GS/Renderers/Metal/tfx.metal @@ -268,47 +268,32 @@ struct PSMain float4 uv_out = uv; float4 tex_size = PS_INVALID_TEX0 ? cb.wh.zwzw : cb.wh.xyxy; - if (PS_WMS == PS_WMT) + if (PS_WMS == 2) { - if (PS_WMS == 2) - { - uv_out = clamp(uv, cb.uv_min_max.xyxy, cb.uv_min_max.zwzw); - } - else if (PS_WMS == 3) - { - // wrap negative uv coords to avoid an off by one error that shifted - // textures. Fixes Xenosaga's hair issue. - if (!FST) - uv = fract(uv); - - uv_out = float4((ushort4(uv * tex_size) & ushort4(cb.uv_msk_fix.xyxy)) | ushort4(cb.uv_msk_fix.zwzw)) / tex_size; - } + uv_out.xz = clamp(uv.xz, cb.uv_min_max.xx, cb.uv_min_max.zz); } - else + else if (PS_WMS == 3) { - if (PS_WMS == 2) - { - uv_out.xz = clamp(uv.xz, cb.uv_min_max.xx, cb.uv_min_max.zz); - } - else if (PS_WMS == 3) - { - if (!FST) - uv.xz = fract(uv.xz); + // wrap negative uv coords to avoid an off by one error that shifted + // textures. Fixes Xenosaga's hair issue. + if (!FST) + uv.xz = fract(uv.xz); - uv_out.xz = float2((ushort2(uv.xz * tex_size.xx) & ushort2(cb.uv_msk_fix.xx)) | ushort2(cb.uv_msk_fix.zz)) / tex_size.xx; - } + uv_out.xz = float2((ushort2(uv.xz * tex_size.xx) & ushort2(cb.uv_msk_fix.xx)) | ushort2(cb.uv_msk_fix.zz)) / tex_size.xx; + } - if (PS_WMT == 2) - { - uv_out.yw = clamp(uv.yw, cb.uv_min_max.yy, cb.uv_min_max.ww); - } - else if (PS_WMT == 3) - { - if (!FST) - uv.yw = fract(uv.yw); + if (PS_WMT == 2) + { + uv_out.yw = clamp(uv.yw, cb.uv_min_max.yy, cb.uv_min_max.ww); + } + else if (PS_WMT == 3) + { + // wrap negative uv coords to avoid an off by one error that shifted + // textures. Fixes Xenosaga's hair issue. + if (!FST) + uv.yw = fract(uv.yw); - uv_out.yw = float2((ushort2(uv.yw * tex_size.yy) & ushort2(cb.uv_msk_fix.yy)) | ushort2(cb.uv_msk_fix.ww)) / tex_size.yy; - } + uv_out.yw = float2((ushort2(uv.yw * tex_size.yy) & ushort2(cb.uv_msk_fix.yy)) | ushort2(cb.uv_msk_fix.ww)) / tex_size.yy; } return uv_out; @@ -386,25 +371,15 @@ struct PSMain // It allow to multiply the ScalingFactor before the 1/16 coeff ushort4 mask = ushort4(cb.uv_msk_fix) << 4; - if (PS_WMS == PS_WMT) - { - if (PS_WMS == 2) - uv_out = clamp(uv, mask.xy, mask.zw); - else if (PS_WMS == 3) - uv_out = (uv & mask.xy) | mask.zw; - } - else - { - if (PS_WMS == 2) - uv_out.x = clamp(uv.x, mask.x, mask.z); - else if (PS_WMS == 3) - uv_out.x = (uv.x & mask.x) | mask.z; + if (PS_WMS == 2) + uv_out.x = clamp(uv.x, mask.x, mask.z); + else if (PS_WMS == 3) + uv_out.x = (uv.x & mask.x) | mask.z; - if (PS_WMT == 2) - uv_out.y = clamp(uv.y, mask.y, mask.w); - else if (PS_WMT == 3) - uv_out.y = (uv.y & mask.y) | mask.w; - } + if (PS_WMT == 2) + uv_out.y = clamp(uv.y, mask.y, mask.w); + else if (PS_WMT == 3) + uv_out.y = (uv.y & mask.y) | mask.w; return uv_out; } From c4416d0e4ead69e1c18fd1fce4b9868e8a931673 Mon Sep 17 00:00:00 2001 From: TellowKrinkle Date: Mon, 11 Apr 2022 16:27:18 -0500 Subject: [PATCH 2/4] GS: Move clamp_wrap handling into per-axis method --- bin/resources/shaders/dx11/tfx.fx | 82 ++++++++++------------ bin/resources/shaders/opengl/tfx_fs.glsl | 77 ++++++++++----------- bin/resources/shaders/vulkan/tfx.glsl | 86 ++++++++++-------------- pcsx2/GS/Renderers/Metal/tfx.metal | 64 ++++++++---------- 4 files changed, 133 insertions(+), 176 deletions(-) diff --git a/bin/resources/shaders/dx11/tfx.fx b/bin/resources/shaders/dx11/tfx.fx index ad3c89269bc2b..ac41f40e8718f 100644 --- a/bin/resources/shaders/dx11/tfx.fx +++ b/bin/resources/shaders/dx11/tfx.fx @@ -192,41 +192,36 @@ float4 sample_p(float u) return Palette.Sample(PaletteSampler, u); } -float4 clamp_wrap_uv(float4 uv) +float2 clamp_wrap_uv_2(uint mode, float2 uv, float tex_size, float2 min_max, uint2 msk_fix) { - float4 tex_size; - - if (PS_INVALID_TEX0 == 1) - tex_size = WH.zwzw; - else - tex_size = WH.xyxy; - - if(PS_WMS == 2) - { - uv.xz = clamp(uv.xz, MinMax.xx, MinMax.zz); - } - else if(PS_WMS == 3) - { - // wrap negative uv coords to avoid an off by one error that shifted - // textures. Fixes Xenosaga's hair issue. - #if PS_FST == 0 - uv.xz = frac(uv.xz); - #endif - uv.xz = (float2)(((uint2)(uv.xz * tex_size.xx) & MskFix.xx) | MskFix.zz) / tex_size.xx; - } - if(PS_WMT == 2) + if (mode == 2) { - uv.yw = clamp(uv.yw, MinMax.yy, MinMax.ww); + return clamp(uv, min_max.xx, min_max.yy); } - else if(PS_WMT == 3) + if (mode == 3) { - // wrap negative uv coords to avoid an off by one error that shifted - // textures. Fixes Xenosaga's hair issue. #if PS_FST == 0 - uv.yw = frac(uv.yw); + // wrap negative uv coords to avoid an off by one error that shifted + // textures. Fixes Xenosaga's hair issue. + uv = frac(uv); #endif - uv.yw = (float2)(((uint2)(uv.yw * tex_size.yy) & MskFix.yy) | MskFix.ww) / tex_size.yy; + + return float2((uint2(uv * tex_size) & msk_fix.xx) | msk_fix.yy) / tex_size; } + return uv; +} + +float4 clamp_wrap_uv(float4 uv) +{ + float2 tex_size; + + if (PS_INVALID_TEX0 == 1) + tex_size = WH.zw; + else + tex_size = WH.xy; + + uv.xz = clamp_wrap_uv_2(PS_WMS, uv.xz, tex_size.x, MinMax.xz, MskFix.xz); + uv.yw = clamp_wrap_uv_2(PS_WMT, uv.yw, tex_size.y, MinMax.yw, MskFix.yw); return uv; } @@ -311,28 +306,21 @@ float4 fetch_c(int2 uv) // Depth sampling ////////////////////////////////////////////////////////////////////// -int2 clamp_wrap_uv_depth(int2 uv) +int clamp_wrap_uv_depth_1(uint mode, int uv, int2 msk_fix) { - int4 mask = (int4)MskFix << 4; - - if (PS_WMS == 2) - { - uv.x = clamp(uv.x, mask.x, mask.z); - } - else if (PS_WMS == 3) - { - uv.x = (uv.x & mask.x) | mask.z; - } + int2 mask = msk_fix << 4; - if (PS_WMT == 2) - { - uv.y = clamp(uv.y, mask.y, mask.w); - } - else if (PS_WMT == 3) - { - uv.y = (uv.y & mask.y) | mask.w; - } + if (mode == 2) + return clamp(uv, mask.x, mask.y); + if (mode == 3) + return (uv & mask.x) | mask.y; + return uv; +} +int2 clamp_wrap_uv_depth(int2 uv) +{ + uv.x = clamp_wrap_uv_depth_1(PS_WMS, uv.x, (int2)MskFix.xz); + uv.y = clamp_wrap_uv_depth_1(PS_WMT, uv.y, (int2)MskFix.yw); return uv; } diff --git a/bin/resources/shaders/opengl/tfx_fs.glsl b/bin/resources/shaders/opengl/tfx_fs.glsl index fdd4e5a70a2ca..16f431a52f6d8 100644 --- a/bin/resources/shaders/opengl/tfx_fs.glsl +++ b/bin/resources/shaders/opengl/tfx_fs.glsl @@ -149,38 +149,36 @@ vec4 sample_p(float idx) return texture(PaletteSampler, vec2(idx, 0.0f)); } +vec2 clamp_wrap_uv_2(uint mode, vec2 uv, float tex_size, vec2 min_max, uvec2 msk_fix) +{ + if (mode == 2) + { + return clamp(uv, min_max.xx, min_max.yy); + } + if (mode == 3) + { + #if PS_FST == 0 + // wrap negative uv coords to avoid an off by one error that shifted + // textures. Fixes Xenosaga's hair issue. + uv = fract(uv); + #endif + return vec2((uvec2(uv * tex_size) & msk_fix.xx) | msk_fix.yy) / tex_size; + } + return uv; +} + vec4 clamp_wrap_uv(vec4 uv) { - vec4 uv_out = uv; #if PS_INVALID_TEX0 == 1 - vec4 tex_size = WH.zwzw; + vec2 tex_size = WH.zw; #else - vec4 tex_size = WH.xyxy; -#endif - -#if PS_WMS == 2 - uv_out.xz = clamp(uv.xz, MinMax.xx, MinMax.zz); -#elif PS_WMS == 3 - #if PS_FST == 0 - // wrap negative uv coords to avoid an off by one error that shifted - // textures. Fixes Xenosaga's hair issue. - uv.xz = fract(uv.xz); - #endif - uv_out.xz = vec2((uvec2(uv.xz * tex_size.xx) & MskFix.xx) | MskFix.zz) / tex_size.xx; + vec2 tex_size = WH.xy; #endif -#if PS_WMT == 2 - uv_out.yw = clamp(uv.yw, MinMax.yy, MinMax.ww); -#elif PS_WMT == 3 - #if PS_FST == 0 - // wrap negative uv coords to avoid an off by one error that shifted - // textures. Fixes Xenosaga's hair issue. - uv.yw = fract(uv.yw); - #endif - uv_out.yw = vec2((uvec2(uv.yw * tex_size.yy) & MskFix.yy) | MskFix.ww) / tex_size.yy; -#endif + uv.xz = clamp_wrap_uv_2(PS_WMS, uv.xz, tex_size.x, MinMax.xz, MskFix.xz); + uv.yw = clamp_wrap_uv_2(PS_WMT, uv.yw, tex_size.y, MinMax.yw, MskFix.yw); - return uv_out; + return uv; } mat4 sample_4c(vec4 uv) @@ -278,27 +276,24 @@ vec4 fetch_c(ivec2 uv) ////////////////////////////////////////////////////////////////////// // Depth sampling ////////////////////////////////////////////////////////////////////// -ivec2 clamp_wrap_uv_depth(ivec2 uv) +int clamp_wrap_uv_depth_1(uint mode, int uv, ivec2 msk_fix) { - ivec2 uv_out = uv; - // Keep the full precision // It allow to multiply the ScalingFactor before the 1/16 coeff - ivec4 mask = ivec4(MskFix) << 4; - -#if PS_WMS == 2 - uv_out.x = clamp(uv.x, mask.x, mask.z); -#elif PS_WMS == 3 - uv_out.x = (uv.x & mask.x) | mask.z; -#endif + ivec2 mask = msk_fix << 4; -#if PS_WMT == 2 - uv_out.y = clamp(uv.y, mask.y, mask.w); -#elif PS_WMT == 3 - uv_out.y = (uv.y & mask.y) | mask.w; -#endif + if (mode == 2) + return clamp(uv, mask.x, mask.y); + if (mode == 3) + return (uv & mask.x) | mask.y; + return uv; +} - return uv_out; +ivec2 clamp_wrap_uv_depth(ivec2 uv) +{ + uv.x = clamp_wrap_uv_depth_1(PS_WMS, uv.x, ivec2(MskFix.xz)); + uv.y = clamp_wrap_uv_depth_1(PS_WMT, uv.y, ivec2(MskFix.yw)); + return uv; } vec4 sample_depth(vec2 st) diff --git a/bin/resources/shaders/vulkan/tfx.glsl b/bin/resources/shaders/vulkan/tfx.glsl index 46e0e87f7dba3..172f547d824fc 100644 --- a/bin/resources/shaders/vulkan/tfx.glsl +++ b/bin/resources/shaders/vulkan/tfx.glsl @@ -444,46 +444,37 @@ vec4 sample_p(float u) return texture(Palette, vec2(u, 0.0f)); } -vec4 clamp_wrap_uv(vec4 uv) +vec2 clamp_wrap_uv_2(uint mode, vec2 uv, float tex_size, vec2 min_max, uvec2 msk_fix) { - vec4 tex_size; - - #if PS_INVALID_TEX0 - tex_size = WH.zwzw; - #else - tex_size = WH.xyxy; - #endif - - - #if PS_WMS == 2 - { - uv.xz = clamp(uv.xz, MinMax.xx, MinMax.zz); - } - #elif PS_WMS == 3 - { - #if PS_FST == 0 - // wrap negative uv coords to avoid an off by one error that shifted - // textures. Fixes Xenosaga's hair issue. - uv.xz = fract(uv.xz); - #endif - uv.xz = vec2((uvec2(uv.xz * tex_size.xx) & MskFix.xx) | MskFix.zz) / tex_size.xx; - } - #endif - #if PS_WMT == 2 + if (mode == 2) { - uv.yw = clamp(uv.yw, MinMax.yy, MinMax.ww); + return clamp(uv, min_max.xx, min_max.yy); } - #elif PS_WMT == 3 + if (mode == 3) { #if PS_FST == 0 - // wrap negative uv coords to avoid an off by one error that shifted - // textures. Fixes Xenosaga's hair issue. - uv.yw = fract(uv.yw); + // wrap negative uv coords to avoid an off by one error that shifted + // textures. Fixes Xenosaga's hair issue. + uv = fract(uv); #endif - uv.yw = vec2((uvec2(uv.yw * tex_size.yy) & MskFix.yy) | MskFix.ww) / tex_size.yy; + return vec2((uvec2(uv * tex_size) & msk_fix.xx) | msk_fix.yy) / tex_size; } + return uv; +} + +vec4 clamp_wrap_uv(vec4 uv) +{ + vec2 tex_size; + + #if PS_INVALID_TEX0 + tex_size = WH.zw; + #else + tex_size = WH.xy; #endif + uv.xz = clamp_wrap_uv_2(PS_WMS, uv.xz, tex_size.x, MinMax.xz, MskFix.xz); + uv.yw = clamp_wrap_uv_2(PS_WMT, uv.yw, tex_size.y, MinMax.yw, MskFix.yw); + return uv; } @@ -564,30 +555,21 @@ vec4 fetch_c(ivec2 uv) // Depth sampling ////////////////////////////////////////////////////////////////////// -ivec2 clamp_wrap_uv_depth(ivec2 uv) +int clamp_wrap_uv_depth_1(uint mode, int uv, ivec2 msk_fix) { - ivec4 mask = ivec4(MskFix << 4); - - #if (PS_WMS == 2) - { - uv.x = clamp(uv.x, mask.x, mask.z); - } - #elif (PS_WMS == 3) - { - uv.x = (uv.x & mask.x) | mask.z; - } - #endif + ivec2 mask = msk_fix << 4; - #if (PS_WMT == 2) - { - uv.y = clamp(uv.y, mask.y, mask.w); - } - #elif (PS_WMT == 3) - { - uv.y = (uv.y & mask.y) | mask.w; - } - #endif + if (mode == 2) + return clamp(uv, mask.x, mask.y); + if (mode == 3) + return (uv & mask.x) | mask.y; + return uv; +} +ivec2 clamp_wrap_uv_depth(ivec2 uv) +{ + uv.x = clamp_wrap_uv_depth_1(PS_WMS, uv.x, ivec2(MskFix.xz)); + uv.y = clamp_wrap_uv_depth_1(PS_WMT, uv.y, ivec2(MskFix.yw)); return uv; } diff --git a/pcsx2/GS/Renderers/Metal/tfx.metal b/pcsx2/GS/Renderers/Metal/tfx.metal index b4d6dcdf6a156..4762a44f6f0a8 100644 --- a/pcsx2/GS/Renderers/Metal/tfx.metal +++ b/pcsx2/GS/Renderers/Metal/tfx.metal @@ -263,40 +263,32 @@ struct PSMain return palette.sample(palette_sampler, float2(idx, 0)); } - float4 clamp_wrap_uv(float4 uv) + float2 clamp_wrap_uv_2(uint mode, float2 uv, float tex_size, float2 min_max, uint2 msk_fix) { - float4 uv_out = uv; - float4 tex_size = PS_INVALID_TEX0 ? cb.wh.zwzw : cb.wh.xyxy; - - if (PS_WMS == 2) + if (mode == 2) { - uv_out.xz = clamp(uv.xz, cb.uv_min_max.xx, cb.uv_min_max.zz); + return clamp(uv, min_max.xx, min_max.yy); } - else if (PS_WMS == 3) + if (mode == 3) { // wrap negative uv coords to avoid an off by one error that shifted // textures. Fixes Xenosaga's hair issue. if (!FST) - uv.xz = fract(uv.xz); + uv = fract(uv); - uv_out.xz = float2((ushort2(uv.xz * tex_size.xx) & ushort2(cb.uv_msk_fix.xx)) | ushort2(cb.uv_msk_fix.zz)) / tex_size.xx; + return float2((uint2(uv * tex_size) & msk_fix.xx) | msk_fix.yy) / tex_size; } + return uv; + } - if (PS_WMT == 2) - { - uv_out.yw = clamp(uv.yw, cb.uv_min_max.yy, cb.uv_min_max.ww); - } - else if (PS_WMT == 3) - { - // wrap negative uv coords to avoid an off by one error that shifted - // textures. Fixes Xenosaga's hair issue. - if (!FST) - uv.yw = fract(uv.yw); + float4 clamp_wrap_uv(float4 uv) + { + float2 tex_size = PS_INVALID_TEX0 ? cb.wh.zw : cb.wh.xy; - uv_out.yw = float2((ushort2(uv.yw * tex_size.yy) & ushort2(cb.uv_msk_fix.yy)) | ushort2(cb.uv_msk_fix.ww)) / tex_size.yy; - } + uv.xz = clamp_wrap_uv_2(PS_WMS, uv.xz, tex_size.x, cb.uv_min_max.xz, cb.uv_msk_fix.xz); + uv.yw = clamp_wrap_uv_2(PS_WMT, uv.yw, tex_size.y, cb.uv_min_max.yw, cb.uv_msk_fix.yw); - return uv_out; + return uv; } float4x4 sample_4c(float4 uv) @@ -364,29 +356,29 @@ struct PSMain // MARK: Depth sampling - ushort2 clamp_wrap_uv_depth(ushort2 uv) + uint clamp_wrap_uv_depth_1(uint mode, uint uv, uint2 msk_fix) { - ushort2 uv_out = uv; // Keep the full precision // It allow to multiply the ScalingFactor before the 1/16 coeff - ushort4 mask = ushort4(cb.uv_msk_fix) << 4; + uint2 mask = msk_fix << 4; - if (PS_WMS == 2) - uv_out.x = clamp(uv.x, mask.x, mask.z); - else if (PS_WMS == 3) - uv_out.x = (uv.x & mask.x) | mask.z; - - if (PS_WMT == 2) - uv_out.y = clamp(uv.y, mask.y, mask.w); - else if (PS_WMT == 3) - uv_out.y = (uv.y & mask.y) | mask.w; + if (mode == 2) + return clamp(uv, mask.x, mask.y); + if (mode == 3) + return (uv & mask.x) | mask.y; + return uv; + } - return uv_out; + uint2 clamp_wrap_uv_depth(uint2 uv) + { + uv.x = clamp_wrap_uv_depth_1(PS_WMS, uv.x, cb.uv_msk_fix.xz); + uv.y = clamp_wrap_uv_depth_1(PS_WMT, uv.y, cb.uv_msk_fix.yw); + return uv; } float4 sample_depth(float2 st) { - float2 uv_f = float2(clamp_wrap_uv_depth(ushort2(st))) * (float2(SCALING_FACTOR) * float2(1.f / 16.f)); + float2 uv_f = float2(clamp_wrap_uv_depth(uint2(st))) * (float2(SCALING_FACTOR) * float2(1.f / 16.f)); ushort2 uv = ushort2(uv_f); float4 t = float4(0); From 8196c98a55deb7c21ddcceb97743c46f17e51e23 Mon Sep 17 00:00:00 2001 From: TellowKrinkle Date: Mon, 11 Apr 2022 17:20:26 -0500 Subject: [PATCH 3/4] GS: Allow fractional bits in upscaled region repeat --- bin/resources/shaders/dx11/tfx.fx | 12 +++++++++++- bin/resources/shaders/opengl/tfx_fs.glsl | 13 ++++++++++++- bin/resources/shaders/vulkan/tfx.glsl | 13 ++++++++++++- pcsx2/GS/Renderers/HW/GSRendererHW.cpp | 3 --- pcsx2/GS/Renderers/Metal/tfx.metal | 12 +++++++++++- 5 files changed, 46 insertions(+), 7 deletions(-) diff --git a/bin/resources/shaders/dx11/tfx.fx b/bin/resources/shaders/dx11/tfx.fx index ac41f40e8718f..b70331518f949 100644 --- a/bin/resources/shaders/dx11/tfx.fx +++ b/bin/resources/shaders/dx11/tfx.fx @@ -206,7 +206,13 @@ float2 clamp_wrap_uv_2(uint mode, float2 uv, float tex_size, float2 min_max, uin uv = frac(uv); #endif - return float2((uint2(uv * tex_size) & msk_fix.xx) | msk_fix.yy) / tex_size; + uv *= tex_size; + float2 masked = float2((uint2(uv) & msk_fix.xx) | msk_fix.yy); + + if (msk_fix.x & 1) // For upscaling, let the bottom bit mask everything below + masked += frac(uv); + + return masked / tex_size; } return uv; } @@ -313,7 +319,11 @@ int clamp_wrap_uv_depth_1(uint mode, int uv, int2 msk_fix) if (mode == 2) return clamp(uv, mask.x, mask.y); if (mode == 3) + { + if (msk_fix.x & 1) + mask.x |= 0xF; return (uv & mask.x) | mask.y; + } return uv; } diff --git a/bin/resources/shaders/opengl/tfx_fs.glsl b/bin/resources/shaders/opengl/tfx_fs.glsl index 16f431a52f6d8..77fb12fb472c8 100644 --- a/bin/resources/shaders/opengl/tfx_fs.glsl +++ b/bin/resources/shaders/opengl/tfx_fs.glsl @@ -162,7 +162,14 @@ vec2 clamp_wrap_uv_2(uint mode, vec2 uv, float tex_size, vec2 min_max, uvec2 msk // textures. Fixes Xenosaga's hair issue. uv = fract(uv); #endif - return vec2((uvec2(uv * tex_size) & msk_fix.xx) | msk_fix.yy) / tex_size; + + uv *= tex_size; + vec2 masked = vec2((uvec2(uv) & msk_fix.xx) | msk_fix.yy); + + if ((msk_fix.x & 1) != 0) // For upscaling, let the bottom bit mask everything below + masked += fract(uv); + + return masked / tex_size; } return uv; } @@ -285,7 +292,11 @@ int clamp_wrap_uv_depth_1(uint mode, int uv, ivec2 msk_fix) if (mode == 2) return clamp(uv, mask.x, mask.y); if (mode == 3) + { + if ((msk_fix.x & 1) != 0) + mask.x |= 0xF; return (uv & mask.x) | mask.y; + } return uv; } diff --git a/bin/resources/shaders/vulkan/tfx.glsl b/bin/resources/shaders/vulkan/tfx.glsl index 172f547d824fc..9f8730f3a47d6 100644 --- a/bin/resources/shaders/vulkan/tfx.glsl +++ b/bin/resources/shaders/vulkan/tfx.glsl @@ -457,7 +457,14 @@ vec2 clamp_wrap_uv_2(uint mode, vec2 uv, float tex_size, vec2 min_max, uvec2 msk // textures. Fixes Xenosaga's hair issue. uv = fract(uv); #endif - return vec2((uvec2(uv * tex_size) & msk_fix.xx) | msk_fix.yy) / tex_size; + + uv *= tex_size; + vec2 masked = vec2((uvec2(uv) & msk_fix.xx) | msk_fix.yy); + + if ((msk_fix.x & 1) != 0) // For upscaling, let the bottom bit mask everything below + masked += fract(uv); + + return masked / tex_size; } return uv; } @@ -562,7 +569,11 @@ int clamp_wrap_uv_depth_1(uint mode, int uv, ivec2 msk_fix) if (mode == 2) return clamp(uv, mask.x, mask.y); if (mode == 3) + { + if ((msk_fix.x & 1) != 0) + mask.x |= 0xF; return (uv & mask.x) | mask.y; + } return uv; } diff --git a/pcsx2/GS/Renderers/HW/GSRendererHW.cpp b/pcsx2/GS/Renderers/HW/GSRendererHW.cpp index b67bcc2620bdd..162d77030e4db 100644 --- a/pcsx2/GS/Renderers/HW/GSRendererHW.cpp +++ b/pcsx2/GS/Renderers/HW/GSRendererHW.cpp @@ -1370,9 +1370,6 @@ void GSRendererHW::Draw() GSVector4i unscaled_size = GSVector4i(GSVector4(m_src->m_texture->GetSize()) / GSVector4(m_src->m_texture->GetScale())); if (m_context->CLAMP.WMS == CLAMP_REPEAT && (tmm.uses_boundary & TextureMinMaxResult::USES_BOUNDARY_U) && unscaled_size.x != tw) { - // Our shader-emulated region repeat doesn't upscale :( - // Try to avoid it if possible - // TODO: Upscale-supporting shader-emulated region repeat if (unscaled_size.x < tw && m_vt.m_min.t.x > -(tw - unscaled_size.x) && m_vt.m_max.t.x < tw) { // Game only extends into data we don't have (but doesn't wrap around back onto good data), clamp seems like the most reasonable solution diff --git a/pcsx2/GS/Renderers/Metal/tfx.metal b/pcsx2/GS/Renderers/Metal/tfx.metal index 4762a44f6f0a8..fa72c603f86bc 100644 --- a/pcsx2/GS/Renderers/Metal/tfx.metal +++ b/pcsx2/GS/Renderers/Metal/tfx.metal @@ -276,7 +276,13 @@ struct PSMain if (!FST) uv = fract(uv); - return float2((uint2(uv * tex_size) & msk_fix.xx) | msk_fix.yy) / tex_size; + uv *= tex_size; + float2 masked = float2((uint2(uv) & msk_fix.xx) | msk_fix.yy); + + if (msk_fix.x & 1) // For upscaling, let the bottom bit mask everything below + masked += fract(uv); + + return masked / tex_size; } return uv; } @@ -365,7 +371,11 @@ struct PSMain if (mode == 2) return clamp(uv, mask.x, mask.y); if (mode == 3) + { + if (msk_fix.x & 1) + mask.x |= 0xF; return (uv & mask.x) | mask.y; + } return uv; } From f197d4261e4e8b4be66de62010824a829f1ac423 Mon Sep 17 00:00:00 2001 From: TellowKrinkle Date: Mon, 11 Apr 2022 17:39:31 -0500 Subject: [PATCH 4/4] GS: adjust end coordinate of region clamp for upscaling --- bin/resources/shaders/dx11/tfx.fx | 2 +- bin/resources/shaders/opengl/tfx_fs.glsl | 2 +- bin/resources/shaders/vulkan/tfx.glsl | 2 +- pcsx2/GS/Renderers/HW/GSRendererNew.cpp | 3 ++- pcsx2/GS/Renderers/Metal/tfx.metal | 2 +- 5 files changed, 6 insertions(+), 5 deletions(-) diff --git a/bin/resources/shaders/dx11/tfx.fx b/bin/resources/shaders/dx11/tfx.fx index b70331518f949..5600479fa781e 100644 --- a/bin/resources/shaders/dx11/tfx.fx +++ b/bin/resources/shaders/dx11/tfx.fx @@ -317,7 +317,7 @@ int clamp_wrap_uv_depth_1(uint mode, int uv, int2 msk_fix) int2 mask = msk_fix << 4; if (mode == 2) - return clamp(uv, mask.x, mask.y); + return clamp(uv, mask.x, mask.y | 0xF); if (mode == 3) { if (msk_fix.x & 1) diff --git a/bin/resources/shaders/opengl/tfx_fs.glsl b/bin/resources/shaders/opengl/tfx_fs.glsl index 77fb12fb472c8..c90432003656b 100644 --- a/bin/resources/shaders/opengl/tfx_fs.glsl +++ b/bin/resources/shaders/opengl/tfx_fs.glsl @@ -290,7 +290,7 @@ int clamp_wrap_uv_depth_1(uint mode, int uv, ivec2 msk_fix) ivec2 mask = msk_fix << 4; if (mode == 2) - return clamp(uv, mask.x, mask.y); + return clamp(uv, mask.x, mask.y | 0xF); if (mode == 3) { if ((msk_fix.x & 1) != 0) diff --git a/bin/resources/shaders/vulkan/tfx.glsl b/bin/resources/shaders/vulkan/tfx.glsl index 9f8730f3a47d6..654330b03e800 100644 --- a/bin/resources/shaders/vulkan/tfx.glsl +++ b/bin/resources/shaders/vulkan/tfx.glsl @@ -567,7 +567,7 @@ int clamp_wrap_uv_depth_1(uint mode, int uv, ivec2 msk_fix) ivec2 mask = msk_fix << 4; if (mode == 2) - return clamp(uv, mask.x, mask.y); + return clamp(uv, mask.x, mask.y | 0xF); if (mode == 3) { if ((msk_fix.x & 1) != 0) diff --git a/pcsx2/GS/Renderers/HW/GSRendererNew.cpp b/pcsx2/GS/Renderers/HW/GSRendererNew.cpp index 5edad8bd7aab4..9f48bc555c6d8 100644 --- a/pcsx2/GS/Renderers/HW/GSRendererNew.cpp +++ b/pcsx2/GS/Renderers/HW/GSRendererNew.cpp @@ -1222,7 +1222,8 @@ void GSRendererNew::EmulateTextureSampler(const GSTextureCache::Source* tex) if (complex_wms_wmt) { m_conf.cb_ps.MskFix = GSVector4i(m_context->CLAMP.MINU, m_context->CLAMP.MINV, m_context->CLAMP.MAXU, m_context->CLAMP.MAXV);; - m_conf.cb_ps.MinMax = GSVector4(m_conf.cb_ps.MskFix) / WH.xyxy(); + GSVector4 upscale_offset(0.f, 0.f, (15.f / 16.f), (15.f / 16.f)); // Adjust end position to the end of the upscaled pixel + m_conf.cb_ps.MinMax = (GSVector4(m_conf.cb_ps.MskFix) + upscale_offset) / WH.xyxy(); } else if (trilinear_manual) { diff --git a/pcsx2/GS/Renderers/Metal/tfx.metal b/pcsx2/GS/Renderers/Metal/tfx.metal index fa72c603f86bc..09ca81370017b 100644 --- a/pcsx2/GS/Renderers/Metal/tfx.metal +++ b/pcsx2/GS/Renderers/Metal/tfx.metal @@ -369,7 +369,7 @@ struct PSMain uint2 mask = msk_fix << 4; if (mode == 2) - return clamp(uv, mask.x, mask.y); + return clamp(uv, mask.x, mask.y | 0xF); if (mode == 3) { if (msk_fix.x & 1)