From e108828bcc332476eafc2d8de6da3c3593f00783 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Henrik=20Rydg=C3=A5rd?= Date: Fri, 15 May 2026 00:15:32 +0200 Subject: [PATCH 1/4] Unify Z handling between the backends --- GPU/Common/VertexShaderGenerator.cpp | 6 ++++++ GPU/GLES/ShaderManagerGLES.cpp | 18 ++++++++++++------ 2 files changed, 18 insertions(+), 6 deletions(-) diff --git a/GPU/Common/VertexShaderGenerator.cpp b/GPU/Common/VertexShaderGenerator.cpp index 52295fb9c2fd..1272b8911025 100644 --- a/GPU/Common/VertexShaderGenerator.cpp +++ b/GPU/Common/VertexShaderGenerator.cpp @@ -1294,6 +1294,12 @@ bool GenerateVertexShader(const VShaderID &id, char *buffer, const ShaderLanguag WRITE(p, " %sgl_Position.y *= u_scaleY;\n", compat.vsOutPrefix); } + if (compat.depthMinusOneToOne) { + // Convert from 0->1 to -1->1 depth range. + WRITE(p, " %sgl_Position.z = %sgl_Position.z * 2.0 - %sgl_Position.w;\n", compat.vsOutPrefix, compat.vsOutPrefix, compat.vsOutPrefix); + // The formula takes the z component of gl_Position, which is currently in the range [0, w] (where w is the homogeneous coordinate), and transforms it to the range [-w, w]. This is done by first multiplying by 2 to scale the range from [0, w] to [0, 2w], and then subtracting w to shift the range to [-w, w]. This effectively converts the depth range from 0->1 to -1->1 after perspective division (when gl_Position is divided by w). + } + if (needsZWHack) { // See comment in thin3d_vulkan.cpp. WRITE(p, " if (%sgl_Position.z == %sgl_Position.w) %sgl_Position.z *= 0.999999;\n", diff --git a/GPU/GLES/ShaderManagerGLES.cpp b/GPU/GLES/ShaderManagerGLES.cpp index 6f8678675cc0..1d1dcfd35ecd 100644 --- a/GPU/GLES/ShaderManagerGLES.cpp +++ b/GPU/GLES/ShaderManagerGLES.cpp @@ -308,9 +308,9 @@ static void SetMatrix4x3(GLRenderManager *render, GLint *uniform, const float *m render->SetUniformM4x4(uniform, m4x4); } -static inline void ConvertProjMatrixToGL(Matrix4x4 &in) { - const Vec3 trans(gstate_c.vpXOffset, gstate_c.vpYOffset, gstate_c.vpZOffset); - const Vec3 scale(gstate_c.vpWidthScale, gstate_c.vpHeightScale, gstate_c.vpDepthScale); +static void ConvertProjMatrixToZeroToOneDepth(Matrix4x4 &in) { + const Vec3 trans(gstate_c.vpXOffset, gstate_c.vpYOffset, gstate_c.vpZOffset * 0.5f + 0.5f); + const Vec3 scale(gstate_c.vpWidthScale, gstate_c.vpHeightScale, gstate_c.vpDepthScale * 0.5f); in.translateAndScale(trans, scale); } @@ -429,7 +429,7 @@ void LinkedShader::UpdateUniforms(const ShaderID &vsid, bool useBufferedRenderin UpdateVRParams(gstate.projMatrix); FlipProjMatrix(vrProjection); - ConvertProjMatrixToGL(vrProjection); + ConvertProjMatrixToZeroToOneDepth(vrProjection); render_->SetUniformM4x4(&u_proj_lens, vrProjection.m); } @@ -438,13 +438,19 @@ void LinkedShader::UpdateUniforms(const ShaderID &vsid, bool useBufferedRenderin memcpy(&flippedMatrix, gstate.projMatrix, 16 * sizeof(float)); FlipProjMatrix(flippedMatrix); - ConvertProjMatrixToGL(flippedMatrix); + ConvertProjMatrixToZeroToOneDepth(flippedMatrix); render_->SetUniformM4x4(&u_proj, flippedMatrix.m); } if (dirty & DIRTY_PROJTHROUGHMATRIX) { Matrix4x4 proj_through; - proj_through.setOrthoGL(0.0f, gstate_c.curRTWidth, 0.0f, gstate_c.curRTHeight, 0.0f, 1.0f); + proj_through.setOrthoVulkan(0.0f, gstate_c.curRTWidth, 0.0f, gstate_c.curRTHeight, 0.0f, 1.0f); + + // Negative RT offsets come from split framebuffers (Killzone) + if (gstate_c.curRTOffsetX < 0 || gstate_c.curRTOffsetY < 0) { + proj_through.wx += 2.0f * (float)gstate_c.curRTOffsetX / (float)gstate_c.curRTWidth; + proj_through.wy += 2.0f * (float)gstate_c.curRTOffsetY / (float)gstate_c.curRTHeight; + } render_->SetUniformM4x4(&u_proj_through, proj_through.getReadPtr()); } if (dirty & DIRTY_TEXENV) { From 65c99beea78f823bd11ca9f968c268e475bbfada Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Henrik=20Rydg=C3=A5rd?= Date: Tue, 19 May 2026 15:43:25 +0200 Subject: [PATCH 2/4] Remove some old debris --- GPU/Common/SoftwareTransformCommon.cpp | 2 +- GPU/Common/SoftwareTransformCommon.h | 2 -- GPU/D3D11/DrawEngineD3D11.cpp | 2 -- GPU/GLES/DrawEngineGLES.cpp | 2 -- GPU/Vulkan/DrawEngineVulkan.cpp | 2 -- 5 files changed, 1 insertion(+), 9 deletions(-) diff --git a/GPU/Common/SoftwareTransformCommon.cpp b/GPU/Common/SoftwareTransformCommon.cpp index 5c29975a3df5..1d824905b778 100644 --- a/GPU/Common/SoftwareTransformCommon.cpp +++ b/GPU/Common/SoftwareTransformCommon.cpp @@ -558,7 +558,7 @@ void SoftwareTransform::CalcCullParams(float &minZValue, float &maxZValue) const maxZValue = 1.000030517578125f * gstate_c.vpDepthScale; minZValue = -maxZValue; // Scale and offset the Z appropriately, since we baked that into a projection transform. - if (params_.usesHalfZ) { + if (true) { // all backends are "use half z" now maxZValue = maxZValue * 0.5f + 0.5f + gstate_c.vpZOffset * 0.5f; minZValue = minZValue * 0.5f + 0.5f + gstate_c.vpZOffset * 0.5f; } else { diff --git a/GPU/Common/SoftwareTransformCommon.h b/GPU/Common/SoftwareTransformCommon.h index e90a74f2b623..c3e003c5516b 100644 --- a/GPU/Common/SoftwareTransformCommon.h +++ b/GPU/Common/SoftwareTransformCommon.h @@ -58,8 +58,6 @@ struct SoftwareTransformParams { TextureCacheCommon *texCache; bool allowClear; bool allowSeparateAlphaClear; - bool flippedY; - bool usesHalfZ; }; // Converts an index buffer to make the provoking vertex the last. diff --git a/GPU/D3D11/DrawEngineD3D11.cpp b/GPU/D3D11/DrawEngineD3D11.cpp index cc14e712f3a3..3e68a83d327f 100644 --- a/GPU/D3D11/DrawEngineD3D11.cpp +++ b/GPU/D3D11/DrawEngineD3D11.cpp @@ -406,8 +406,6 @@ void DrawEngineD3D11::Flush() { params.texCache = textureCache_; params.allowClear = true; params.allowSeparateAlphaClear = false; // D3D11 doesn't support separate alpha clears - params.flippedY = false; - params.usesHalfZ = true; if (gstate.getShadeMode() == GE_SHADE_FLAT) { // We need to rotate the index buffer to simulate a different provoking vertex. diff --git a/GPU/GLES/DrawEngineGLES.cpp b/GPU/GLES/DrawEngineGLES.cpp index 4728c57083dd..e2d715e3d831 100644 --- a/GPU/GLES/DrawEngineGLES.cpp +++ b/GPU/GLES/DrawEngineGLES.cpp @@ -351,8 +351,6 @@ void DrawEngineGLES::Flush() { params.texCache = textureCache_; params.allowClear = true; // Clear in OpenGL respects scissor rects, so we'll use it. params.allowSeparateAlphaClear = true; - params.flippedY = framebufferManager_->UseBufferedRendering(); - params.usesHalfZ = false; // We need correct viewport values in gstate_c already. if (gstate_c.IsDirty(DIRTY_VIEWPORTSCISSOR_STATE)) { diff --git a/GPU/Vulkan/DrawEngineVulkan.cpp b/GPU/Vulkan/DrawEngineVulkan.cpp index 7ba0c615a5d4..dbaaac7111fa 100644 --- a/GPU/Vulkan/DrawEngineVulkan.cpp +++ b/GPU/Vulkan/DrawEngineVulkan.cpp @@ -420,8 +420,6 @@ void DrawEngineVulkan::Flush() { IndexBufferProvokingLastToFirst(prim, inds, vertexCount); } } - params.flippedY = true; - params.usesHalfZ = true; // We need to update the viewport early because it's checked for flipping in SoftwareTransform. // We don't have a "DrawStateEarly" in vulkan, so... From 0387cc44971b066ee4a75ac88f889c02b5f73a14 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Henrik=20Rydg=C3=A5rd?= Date: Sun, 11 Jan 2026 15:02:38 +0100 Subject: [PATCH 3/4] Remove a confusing operator overload from Lin::Vec3 --- Common/Math/lin/matrix4x4.cpp | 6 +++--- Common/Math/lin/vec3.cpp | 1 - Common/Math/lin/vec3.h | 21 ++++++++------------- 3 files changed, 11 insertions(+), 17 deletions(-) diff --git a/Common/Math/lin/matrix4x4.cpp b/Common/Math/lin/matrix4x4.cpp index 44a689ff7c3e..701de04aaa03 100644 --- a/Common/Math/lin/matrix4x4.cpp +++ b/Common/Math/lin/matrix4x4.cpp @@ -37,9 +37,9 @@ void Matrix4x4::setViewFrame(const Vec3 &pos, const Vec3 &vRight, const Vec3 &vV yx = vRight.y; yy = vUp.y; yz=vView.y; yw = 0.0f; zx = vRight.z; zy = vUp.z; zz=vView.z; zw = 0.0f; - wx = -pos * vRight; - wy = -pos * vUp; - wz = -pos * vView; + wx = dot(-pos, vRight); + wy = dot(-pos, vUp); + wz = dot(-pos, vView); ww = 1.0f; } diff --git a/Common/Math/lin/vec3.cpp b/Common/Math/lin/vec3.cpp index 8329e2191e7c..6692a6164f20 100644 --- a/Common/Math/lin/vec3.cpp +++ b/Common/Math/lin/vec3.cpp @@ -10,7 +10,6 @@ Vec3 Vec3::operator *(const Matrix4x4 &m) const { x*m.xy + y*m.yy + z*m.zy + m.wy, x*m.xz + y*m.yz + z*m.zz + m.wz); } - Vec3 Vec3::rotatedBy(const Matrix4x4 &m) const { return Vec3(x*m.xx + y*m.yx + z*m.zx, x*m.xy + y*m.yy + z*m.zy, diff --git a/Common/Math/lin/vec3.h b/Common/Math/lin/vec3.h index 988a2d08b5a6..e338457ca5d4 100644 --- a/Common/Math/lin/vec3.h +++ b/Common/Math/lin/vec3.h @@ -39,7 +39,7 @@ class Vec3 { x+=other.x; y+=other.y; z+=other.z; } Vec3 operator -(const Vec3 &v) const { - return Vec3(x-v.x,y-v.y,z-v.z); + return Vec3(x-v.x, y-v.y, z-v.z); } void operator -= (const Vec3 &other) { @@ -48,9 +48,8 @@ class Vec3 { Vec3 operator -() const { return Vec3(-x,-y,-z); } - - Vec3 operator * (const float f) const { - return Vec3(x*f,y*f,z*f); + Vec3 operator *(const float f) const { + return Vec3(x * f, y * f, z * f); } Vec3 operator / (const float f) const { float invf = (1.0f/f); @@ -60,9 +59,6 @@ class Vec3 { { *this = *this / f; } - float operator * (const Vec3 &other) const { - return x*other.x + y*other.y + z*other.z; - } void operator *= (const float f) { *this = *this * f; } @@ -72,9 +68,6 @@ class Vec3 { Vec3 scaledBy(const Vec3 &other) const { return Vec3(x*other.x, y*other.y, z*other.z); } - Vec3 scaledByInv(const Vec3 &other) const { - return Vec3(x/other.x, y/other.y, z/other.z); - } Vec3 operator *(const Matrix4x4 &m) const; void operator *=(const Matrix4x4 &m) { *this = *this * m; @@ -90,7 +83,7 @@ class Vec3 { return sqrtf(length2()); } void setLength(const float l) { - (*this) *= l/length(); + (*this) *= l / length(); } Vec3 withLength(const float l) const { return (*this) * l / length(); @@ -116,11 +109,13 @@ class Vec3 { return (*this)*(1-t) + other*t; } void setZero() { - memset((void *)this,0,sizeof(float)*3); + x = 0.0f; + y = 0.0f; + z = 0.0f; } }; -inline Vec3 operator * (const float f, const Vec3 &v) {return v * f;} +inline Vec3 operator * (const float f, const Vec3 &v) { return v * f; } // In new code, prefer these to the operators. From 736d054b8f1282dc0d9bcd1cc1daf51ffa485bbb Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Henrik=20Rydg=C3=A5rd?= Date: Tue, 19 May 2026 15:41:35 +0200 Subject: [PATCH 4/4] Remove the u_proj_though matrix from the uniform buffer --- GPU/Common/ShaderUniforms.cpp | 14 ++++---------- GPU/Common/ShaderUniforms.h | 6 +++--- GPU/Common/VertexShaderGenerator.cpp | 9 +++++++-- GPU/GLES/ShaderManagerGLES.cpp | 17 +++++++---------- GPU/GLES/ShaderManagerGLES.h | 2 +- 5 files changed, 22 insertions(+), 26 deletions(-) diff --git a/GPU/Common/ShaderUniforms.cpp b/GPU/Common/ShaderUniforms.cpp index 75e9fd134445..e8b290d2acb1 100644 --- a/GPU/Common/ShaderUniforms.cpp +++ b/GPU/Common/ShaderUniforms.cpp @@ -151,16 +151,10 @@ void BaseUpdateUniforms(UB_VS_FS_Base *ub, uint64_t dirtyUniforms, bool flipView } if (dirtyUniforms & DIRTY_PROJTHROUGHMATRIX) { - Matrix4x4 proj_through; - proj_through.setOrthoVulkan(0.0f, gstate_c.curRTWidth, 0, gstate_c.curRTHeight, 0, 1); - - // Negative RT offsets come from split framebuffers (Killzone) - if (gstate_c.curRTOffsetX < 0 || gstate_c.curRTOffsetY < 0) { - proj_through.wx += 2.0f * (float)gstate_c.curRTOffsetX / (float)gstate_c.curRTWidth; - proj_through.wy += 2.0f * (float)gstate_c.curRTOffsetY / (float)gstate_c.curRTHeight; - } - - CopyMatrix4x4(ub->proj_through, proj_through.getReadPtr()); + ub->xywh[0] = (float)gstate_c.curRTOffsetX; + ub->xywh[1] = (float)gstate_c.curRTOffsetY; + ub->xywh[2] = (float)gstate_c.curRTWidth; + ub->xywh[3] = (float)gstate_c.curRTHeight; ub->rotation = useBufferedRendering ? 0 : (float)g_display.rotation; } diff --git a/GPU/Common/ShaderUniforms.h b/GPU/Common/ShaderUniforms.h index a63b12e2e09d..a9f37551e322 100644 --- a/GPU/Common/ShaderUniforms.h +++ b/GPU/Common/ShaderUniforms.h @@ -21,7 +21,7 @@ enum : uint64_t { // Every line here is a 4-float. struct alignas(16) UB_VS_FS_Base { float proj[16]; - float proj_through[16]; + float xywh[4]; // later, we could invert w and h here to avoid division. float view[12]; float world[12]; float tex[12]; @@ -43,11 +43,11 @@ struct alignas(16) UB_VS_FS_Base { // VR stuff is to go here, later. For normal drawing, we can then get away // with just uploading the first 448 bytes of the struct (up to and including fogCoef). }; -static_assert(sizeof(UB_VS_FS_Base) == 480, "UB_VS_FS_Base should be 480 bytes"); +static_assert(sizeof(UB_VS_FS_Base) == 432, "UB_VS_FS_Base should be 432 bytes"); static const char * const ub_baseStr = R"( mat4 u_proj; - mat4 u_proj_through; + vec4 u_xywh; mat3x4 u_view; mat3x4 u_world; mat3x4 u_texmtx; diff --git a/GPU/Common/VertexShaderGenerator.cpp b/GPU/Common/VertexShaderGenerator.cpp index 1272b8911025..646173972128 100644 --- a/GPU/Common/VertexShaderGenerator.cpp +++ b/GPU/Common/VertexShaderGenerator.cpp @@ -410,7 +410,7 @@ bool GenerateVertexShader(const VShaderID &id, char *buffer, const ShaderLanguag } if (isModeThrough) { - WRITE(p, "uniform mat4 u_proj_through;\n"); + WRITE(p, "uniform vec4 u_xywh;\n"); *uniformMask |= DIRTY_PROJTHROUGHMATRIX; } else if (useHWTransform) { if (gstate_c.Use(GPU_USE_VIRTUAL_REALITY)) { @@ -746,7 +746,12 @@ bool GenerateVertexShader(const VShaderID &id, char *buffer, const ShaderLanguag WRITE(p, " %sv_fogdepth = fog;\n", compat.vsOutPrefix); if (isModeThrough) { // The proj_through matrix already has the rotation, if needed. - WRITE(p, " vec4 outPos = mul(u_proj_through, vec4(position.xyz, 1.0));\n"); + // NOTE: In through mode, we can ignore W, it's always 1.0. However, + // this transform will later be applied in all modes. + WRITE(p, " vec4 outPos;\n"); + WRITE(p, " outPos.xy = ((position.xy - u_xywh.xy * position.w) / u_xywh.zw) * 2.0 - 1.0;\n"); + WRITE(p, " outPos.zw = position.zw;\n"); + // WRITE(p, " vec4 outPos = mul(u_proj_through, vec4(position.xyz, 1.0));\n"); } else { // The viewport is used in this case, so need to compensate for that. if (gstate_c.Use(GPU_ROUND_DEPTH_TO_16BIT)) { diff --git a/GPU/GLES/ShaderManagerGLES.cpp b/GPU/GLES/ShaderManagerGLES.cpp index 1d1dcfd35ecd..87d744aaa1ba 100644 --- a/GPU/GLES/ShaderManagerGLES.cpp +++ b/GPU/GLES/ShaderManagerGLES.cpp @@ -105,7 +105,7 @@ LinkedShader::LinkedShader(GLRenderManager *render, VShaderID VSID, Shader *vs, queries.push_back({ &u_proj, "u_proj" }); queries.push_back({ &u_proj_lens, "u_proj_lens" }); - queries.push_back({ &u_proj_through, "u_proj_through" }); + queries.push_back({ &u_xywh, "u_xywh" }); queries.push_back({ &u_texenv, "u_texenv" }); queries.push_back({ &u_fogcolor, "u_fogcolor" }); queries.push_back({ &u_fogcoef, "u_fogcoef" }); @@ -443,15 +443,12 @@ void LinkedShader::UpdateUniforms(const ShaderID &vsid, bool useBufferedRenderin render_->SetUniformM4x4(&u_proj, flippedMatrix.m); } if (dirty & DIRTY_PROJTHROUGHMATRIX) { - Matrix4x4 proj_through; - proj_through.setOrthoVulkan(0.0f, gstate_c.curRTWidth, 0.0f, gstate_c.curRTHeight, 0.0f, 1.0f); - - // Negative RT offsets come from split framebuffers (Killzone) - if (gstate_c.curRTOffsetX < 0 || gstate_c.curRTOffsetY < 0) { - proj_through.wx += 2.0f * (float)gstate_c.curRTOffsetX / (float)gstate_c.curRTWidth; - proj_through.wy += 2.0f * (float)gstate_c.curRTOffsetY / (float)gstate_c.curRTHeight; - } - render_->SetUniformM4x4(&u_proj_through, proj_through.getReadPtr()); + float xywh[4]; + xywh[0] = (float)gstate_c.curRTOffsetX; + xywh[1] = (float)gstate_c.curRTOffsetY; + xywh[2] = (float)gstate_c.curRTWidth; + xywh[3] = (float)gstate_c.curRTHeight; + SetFloatUniform4(render_, &u_xywh, xywh); } if (dirty & DIRTY_TEXENV) { SetColorUniform3(render_, &u_texenv, gstate.texenvcolor); diff --git a/GPU/GLES/ShaderManagerGLES.h b/GPU/GLES/ShaderManagerGLES.h index 35fc2054e883..5082042ad243 100644 --- a/GPU/GLES/ShaderManagerGLES.h +++ b/GPU/GLES/ShaderManagerGLES.h @@ -60,7 +60,7 @@ class LinkedShader { int u_tex; int u_proj; int u_proj_lens; - int u_proj_through; + int u_xywh; int u_texenv; int u_view; int u_texmtx;