From e108828bcc332476eafc2d8de6da3c3593f00783 Mon Sep 17 00:00:00 2001
From: =?UTF-8?q?Henrik=20Rydg=C3=A5rd?= <hrydgard@gmail.com>
Date: Fri, 15 May 2026 00:15:32 +0200
Subject: [PATCH 1/4] Unify Z handling between the backends

---
 GPU/Common/VertexShaderGenerator.cpp |  6 ++++++
 GPU/GLES/ShaderManagerGLES.cpp       | 18 ++++++++++++------
 2 files changed, 18 insertions(+), 6 deletions(-)

diff --git a/GPU/Common/VertexShaderGenerator.cpp b/GPU/Common/VertexShaderGenerator.cpp
index 52295fb9c2fd..1272b8911025 100644
--- a/GPU/Common/VertexShaderGenerator.cpp
+++ b/GPU/Common/VertexShaderGenerator.cpp
@@ -1294,6 +1294,12 @@ bool GenerateVertexShader(const VShaderID &id, char *buffer, const ShaderLanguag
 		WRITE(p, "  %sgl_Position.y *= u_scaleY;\n", compat.vsOutPrefix);
 	}
 
+	if (compat.depthMinusOneToOne) {
+		// Convert from 0->1 to -1->1 depth range.
+		WRITE(p, "  %sgl_Position.z = %sgl_Position.z * 2.0 - %sgl_Position.w;\n", compat.vsOutPrefix, compat.vsOutPrefix, compat.vsOutPrefix);
+		// The formula takes the z component of gl_Position, which is currently in the range [0, w] (where w is the homogeneous coordinate), and transforms it to the range [-w, w]. This is done by first multiplying by 2 to scale the range from [0, w] to [0, 2w], and then subtracting w to shift the range to [-w, w]. This effectively converts the depth range from 0->1 to -1->1 after perspective division (when gl_Position is divided by w).
+	}
+
 	if (needsZWHack) {
 		// See comment in thin3d_vulkan.cpp.
 		WRITE(p, "  if (%sgl_Position.z == %sgl_Position.w) %sgl_Position.z *= 0.999999;\n",
diff --git a/GPU/GLES/ShaderManagerGLES.cpp b/GPU/GLES/ShaderManagerGLES.cpp
index 6f8678675cc0..1d1dcfd35ecd 100644
--- a/GPU/GLES/ShaderManagerGLES.cpp
+++ b/GPU/GLES/ShaderManagerGLES.cpp
@@ -308,9 +308,9 @@ static void SetMatrix4x3(GLRenderManager *render, GLint *uniform, const float *m
 	render->SetUniformM4x4(uniform, m4x4);
 }
 
-static inline void ConvertProjMatrixToGL(Matrix4x4 &in) {
-	const Vec3 trans(gstate_c.vpXOffset, gstate_c.vpYOffset, gstate_c.vpZOffset);
-	const Vec3 scale(gstate_c.vpWidthScale, gstate_c.vpHeightScale, gstate_c.vpDepthScale);
+static void ConvertProjMatrixToZeroToOneDepth(Matrix4x4 &in) {
+	const Vec3 trans(gstate_c.vpXOffset, gstate_c.vpYOffset, gstate_c.vpZOffset * 0.5f + 0.5f);
+	const Vec3 scale(gstate_c.vpWidthScale, gstate_c.vpHeightScale, gstate_c.vpDepthScale * 0.5f);
 	in.translateAndScale(trans, scale);
 }
 
@@ -429,7 +429,7 @@ void LinkedShader::UpdateUniforms(const ShaderID &vsid, bool useBufferedRenderin
 			UpdateVRParams(gstate.projMatrix);
 
 			FlipProjMatrix(vrProjection);
-			ConvertProjMatrixToGL(vrProjection);
+			ConvertProjMatrixToZeroToOneDepth(vrProjection);
 
 			render_->SetUniformM4x4(&u_proj_lens, vrProjection.m);
 		}
@@ -438,13 +438,19 @@ void LinkedShader::UpdateUniforms(const ShaderID &vsid, bool useBufferedRenderin
 		memcpy(&flippedMatrix, gstate.projMatrix, 16 * sizeof(float));
 
 		FlipProjMatrix(flippedMatrix);
-		ConvertProjMatrixToGL(flippedMatrix);
+		ConvertProjMatrixToZeroToOneDepth(flippedMatrix);
 
 		render_->SetUniformM4x4(&u_proj, flippedMatrix.m);
 	}
 	if (dirty & DIRTY_PROJTHROUGHMATRIX) {
 		Matrix4x4 proj_through;
-		proj_through.setOrthoGL(0.0f, gstate_c.curRTWidth, 0.0f, gstate_c.curRTHeight, 0.0f, 1.0f);
+		proj_through.setOrthoVulkan(0.0f, gstate_c.curRTWidth, 0.0f, gstate_c.curRTHeight, 0.0f, 1.0f);
+
+		// Negative RT offsets come from split framebuffers (Killzone)
+		if (gstate_c.curRTOffsetX < 0 || gstate_c.curRTOffsetY < 0) {
+			proj_through.wx += 2.0f * (float)gstate_c.curRTOffsetX / (float)gstate_c.curRTWidth;
+			proj_through.wy += 2.0f * (float)gstate_c.curRTOffsetY / (float)gstate_c.curRTHeight;
+		}
 		render_->SetUniformM4x4(&u_proj_through, proj_through.getReadPtr());
 	}
 	if (dirty & DIRTY_TEXENV) {

From 65c99beea78f823bd11ca9f968c268e475bbfada Mon Sep 17 00:00:00 2001
From: =?UTF-8?q?Henrik=20Rydg=C3=A5rd?= <hrydgard@gmail.com>
Date: Tue, 19 May 2026 15:43:25 +0200
Subject: [PATCH 2/4] Remove some old debris

---
 GPU/Common/SoftwareTransformCommon.cpp | 2 +-
 GPU/Common/SoftwareTransformCommon.h   | 2 --
 GPU/D3D11/DrawEngineD3D11.cpp          | 2 --
 GPU/GLES/DrawEngineGLES.cpp            | 2 --
 GPU/Vulkan/DrawEngineVulkan.cpp        | 2 --
 5 files changed, 1 insertion(+), 9 deletions(-)

diff --git a/GPU/Common/SoftwareTransformCommon.cpp b/GPU/Common/SoftwareTransformCommon.cpp
index 5c29975a3df5..1d824905b778 100644
--- a/GPU/Common/SoftwareTransformCommon.cpp
+++ b/GPU/Common/SoftwareTransformCommon.cpp
@@ -558,7 +558,7 @@ void SoftwareTransform::CalcCullParams(float &minZValue, float &maxZValue) const
 	maxZValue = 1.000030517578125f * gstate_c.vpDepthScale;
 	minZValue = -maxZValue;
 	// Scale and offset the Z appropriately, since we baked that into a projection transform.
-	if (params_.usesHalfZ) {
+	if (true) {  // all backends are "use half z" now
 		maxZValue = maxZValue * 0.5f + 0.5f + gstate_c.vpZOffset * 0.5f;
 		minZValue = minZValue * 0.5f + 0.5f + gstate_c.vpZOffset * 0.5f;
 	} else {
diff --git a/GPU/Common/SoftwareTransformCommon.h b/GPU/Common/SoftwareTransformCommon.h
index e90a74f2b623..c3e003c5516b 100644
--- a/GPU/Common/SoftwareTransformCommon.h
+++ b/GPU/Common/SoftwareTransformCommon.h
@@ -58,8 +58,6 @@ struct SoftwareTransformParams {
 	TextureCacheCommon *texCache;
 	bool allowClear;
 	bool allowSeparateAlphaClear;
-	bool flippedY;
-	bool usesHalfZ;
 };
 
 // Converts an index buffer to make the provoking vertex the last.
diff --git a/GPU/D3D11/DrawEngineD3D11.cpp b/GPU/D3D11/DrawEngineD3D11.cpp
index cc14e712f3a3..3e68a83d327f 100644
--- a/GPU/D3D11/DrawEngineD3D11.cpp
+++ b/GPU/D3D11/DrawEngineD3D11.cpp
@@ -406,8 +406,6 @@ void DrawEngineD3D11::Flush() {
 		params.texCache = textureCache_;
 		params.allowClear = true;
 		params.allowSeparateAlphaClear = false;  // D3D11 doesn't support separate alpha clears
-		params.flippedY = false;
-		params.usesHalfZ = true;
 
 		if (gstate.getShadeMode() == GE_SHADE_FLAT) {
 			// We need to rotate the index buffer to simulate a different provoking vertex.
diff --git a/GPU/GLES/DrawEngineGLES.cpp b/GPU/GLES/DrawEngineGLES.cpp
index 4728c57083dd..e2d715e3d831 100644
--- a/GPU/GLES/DrawEngineGLES.cpp
+++ b/GPU/GLES/DrawEngineGLES.cpp
@@ -351,8 +351,6 @@ void DrawEngineGLES::Flush() {
 		params.texCache = textureCache_;
 		params.allowClear = true;  // Clear in OpenGL respects scissor rects, so we'll use it.
 		params.allowSeparateAlphaClear = true;
-		params.flippedY = framebufferManager_->UseBufferedRendering();
-		params.usesHalfZ = false;
 
 		// We need correct viewport values in gstate_c already.
 		if (gstate_c.IsDirty(DIRTY_VIEWPORTSCISSOR_STATE)) {
diff --git a/GPU/Vulkan/DrawEngineVulkan.cpp b/GPU/Vulkan/DrawEngineVulkan.cpp
index 7ba0c615a5d4..dbaaac7111fa 100644
--- a/GPU/Vulkan/DrawEngineVulkan.cpp
+++ b/GPU/Vulkan/DrawEngineVulkan.cpp
@@ -420,8 +420,6 @@ void DrawEngineVulkan::Flush() {
 				IndexBufferProvokingLastToFirst(prim, inds, vertexCount);
 			}
 		}
-		params.flippedY = true;
-		params.usesHalfZ = true;
 
 		// We need to update the viewport early because it's checked for flipping in SoftwareTransform.
 		// We don't have a "DrawStateEarly" in vulkan, so...

From 0387cc44971b066ee4a75ac88f889c02b5f73a14 Mon Sep 17 00:00:00 2001
From: =?UTF-8?q?Henrik=20Rydg=C3=A5rd?= <hrydgard@gmail.com>
Date: Sun, 11 Jan 2026 15:02:38 +0100
Subject: [PATCH 3/4] Remove a confusing operator overload from Lin::Vec3

---
 Common/Math/lin/matrix4x4.cpp |  6 +++---
 Common/Math/lin/vec3.cpp      |  1 -
 Common/Math/lin/vec3.h        | 21 ++++++++-------------
 3 files changed, 11 insertions(+), 17 deletions(-)

diff --git a/Common/Math/lin/matrix4x4.cpp b/Common/Math/lin/matrix4x4.cpp
index 44a689ff7c3e..701de04aaa03 100644
--- a/Common/Math/lin/matrix4x4.cpp
+++ b/Common/Math/lin/matrix4x4.cpp
@@ -37,9 +37,9 @@ void Matrix4x4::setViewFrame(const Vec3 &pos, const Vec3 &vRight, const Vec3 &vV
 	yx = vRight.y; yy = vUp.y; yz=vView.y; yw = 0.0f;
 	zx = vRight.z; zy = vUp.z; zz=vView.z; zw = 0.0f;
 
-	wx = -pos * vRight;
-	wy = -pos * vUp;
-	wz = -pos * vView;
+	wx = dot(-pos, vRight);
+	wy = dot(-pos, vUp);
+	wz = dot(-pos, vView);
 	ww = 1.0f;
 }
 
diff --git a/Common/Math/lin/vec3.cpp b/Common/Math/lin/vec3.cpp
index 8329e2191e7c..6692a6164f20 100644
--- a/Common/Math/lin/vec3.cpp
+++ b/Common/Math/lin/vec3.cpp
@@ -10,7 +10,6 @@ Vec3 Vec3::operator *(const Matrix4x4 &m) const {
 		x*m.xy + y*m.yy + z*m.zy + m.wy,
 		x*m.xz + y*m.yz + z*m.zz + m.wz);
 }
-
 Vec3 Vec3::rotatedBy(const Matrix4x4 &m) const {
 	return Vec3(x*m.xx + y*m.yx + z*m.zx,
 		x*m.xy + y*m.yy + z*m.zy,
diff --git a/Common/Math/lin/vec3.h b/Common/Math/lin/vec3.h
index 988a2d08b5a6..e338457ca5d4 100644
--- a/Common/Math/lin/vec3.h
+++ b/Common/Math/lin/vec3.h
@@ -39,7 +39,7 @@ class Vec3 {
 		x+=other.x; y+=other.y; z+=other.z;
 	}
 	Vec3 operator -(const Vec3 &v) const {
-		return Vec3(x-v.x,y-v.y,z-v.z);
+		return Vec3(x-v.x, y-v.y, z-v.z);
 	}
 	void operator -= (const Vec3 &other)
 	{
@@ -48,9 +48,8 @@ class Vec3 {
 	Vec3 operator -() const {
 		return Vec3(-x,-y,-z);
 	}
-
-	Vec3 operator * (const float f) const {
-		return Vec3(x*f,y*f,z*f);
+	Vec3 operator *(const float f) const {
+		return Vec3(x * f, y * f, z * f);
 	}
 	Vec3 operator / (const float f) const {
 		float invf = (1.0f/f);
@@ -60,9 +59,6 @@ class Vec3 {
 	{
 		*this = *this / f;
 	}
-	float operator * (const Vec3 &other) const {
-		return x*other.x + y*other.y + z*other.z;
-	}
 	void operator *= (const float f) {
 		*this = *this * f;
 	}
@@ -72,9 +68,6 @@ class Vec3 {
 	Vec3 scaledBy(const Vec3 &other) const {
 		return Vec3(x*other.x, y*other.y, z*other.z);
 	}
-	Vec3 scaledByInv(const Vec3 &other) const {
-		return Vec3(x/other.x, y/other.y, z/other.z);
-	}
 	Vec3 operator *(const Matrix4x4 &m) const;
 	void operator *=(const Matrix4x4 &m) {
 		*this = *this * m;
@@ -90,7 +83,7 @@ class Vec3 {
 		return sqrtf(length2());
 	}
 	void setLength(const float l) {
-		(*this) *= l/length();
+		(*this) *= l / length();
 	}
 	Vec3 withLength(const float l) const {
 		return (*this) * l / length();
@@ -116,11 +109,13 @@ class Vec3 {
 		return (*this)*(1-t) + other*t;
 	}
 	void setZero() {
-		memset((void *)this,0,sizeof(float)*3);
+		x = 0.0f;
+		y = 0.0f;
+		z = 0.0f;
 	}
 };
 
-inline Vec3 operator * (const float f, const Vec3 &v) {return v * f;}
+inline Vec3 operator * (const float f, const Vec3 &v) { return v * f; }
 
 // In new code, prefer these to the operators.
 

From 736d054b8f1282dc0d9bcd1cc1daf51ffa485bbb Mon Sep 17 00:00:00 2001
From: =?UTF-8?q?Henrik=20Rydg=C3=A5rd?= <hrydgard@gmail.com>
Date: Tue, 19 May 2026 15:41:35 +0200
Subject: [PATCH 4/4] Remove the u_proj_though matrix from the uniform buffer

---
 GPU/Common/ShaderUniforms.cpp        | 14 ++++----------
 GPU/Common/ShaderUniforms.h          |  6 +++---
 GPU/Common/VertexShaderGenerator.cpp |  9 +++++++--
 GPU/GLES/ShaderManagerGLES.cpp       | 17 +++++++----------
 GPU/GLES/ShaderManagerGLES.h         |  2 +-
 5 files changed, 22 insertions(+), 26 deletions(-)

diff --git a/GPU/Common/ShaderUniforms.cpp b/GPU/Common/ShaderUniforms.cpp
index 75e9fd134445..e8b290d2acb1 100644
--- a/GPU/Common/ShaderUniforms.cpp
+++ b/GPU/Common/ShaderUniforms.cpp
@@ -151,16 +151,10 @@ void BaseUpdateUniforms(UB_VS_FS_Base *ub, uint64_t dirtyUniforms, bool flipView
 	}
 
 	if (dirtyUniforms & DIRTY_PROJTHROUGHMATRIX) {
-		Matrix4x4 proj_through;
-		proj_through.setOrthoVulkan(0.0f, gstate_c.curRTWidth, 0, gstate_c.curRTHeight, 0, 1);
-
-		// Negative RT offsets come from split framebuffers (Killzone)
-		if (gstate_c.curRTOffsetX < 0 || gstate_c.curRTOffsetY < 0) {
-			proj_through.wx += 2.0f * (float)gstate_c.curRTOffsetX / (float)gstate_c.curRTWidth;
-			proj_through.wy += 2.0f * (float)gstate_c.curRTOffsetY / (float)gstate_c.curRTHeight;
-		}
-
-		CopyMatrix4x4(ub->proj_through, proj_through.getReadPtr());
+		ub->xywh[0] = (float)gstate_c.curRTOffsetX;
+		ub->xywh[1] = (float)gstate_c.curRTOffsetY;
+		ub->xywh[2] = (float)gstate_c.curRTWidth;
+		ub->xywh[3] = (float)gstate_c.curRTHeight;
 
 		ub->rotation = useBufferedRendering ? 0 : (float)g_display.rotation;
 	}
diff --git a/GPU/Common/ShaderUniforms.h b/GPU/Common/ShaderUniforms.h
index a63b12e2e09d..a9f37551e322 100644
--- a/GPU/Common/ShaderUniforms.h
+++ b/GPU/Common/ShaderUniforms.h
@@ -21,7 +21,7 @@ enum : uint64_t {
 // Every line here is a 4-float.
 struct alignas(16) UB_VS_FS_Base {
 	float proj[16];
-	float proj_through[16];
+	float xywh[4];  // later, we could invert w and h here to avoid division.
 	float view[12];
 	float world[12];
 	float tex[12];
@@ -43,11 +43,11 @@ struct alignas(16) UB_VS_FS_Base {
 	// VR stuff is to go here, later. For normal drawing, we can then get away
 	// with just uploading the first 448 bytes of the struct (up to and including fogCoef).
 };
-static_assert(sizeof(UB_VS_FS_Base) == 480, "UB_VS_FS_Base should be 480 bytes");
+static_assert(sizeof(UB_VS_FS_Base) == 432, "UB_VS_FS_Base should be 432 bytes");
 
 static const char * const ub_baseStr =
 R"(  mat4 u_proj;
-  mat4 u_proj_through;
+  vec4 u_xywh;
   mat3x4 u_view;
   mat3x4 u_world;
   mat3x4 u_texmtx;
diff --git a/GPU/Common/VertexShaderGenerator.cpp b/GPU/Common/VertexShaderGenerator.cpp
index 1272b8911025..646173972128 100644
--- a/GPU/Common/VertexShaderGenerator.cpp
+++ b/GPU/Common/VertexShaderGenerator.cpp
@@ -410,7 +410,7 @@ bool GenerateVertexShader(const VShaderID &id, char *buffer, const ShaderLanguag
 		}
 
 		if (isModeThrough) {
-			WRITE(p, "uniform mat4 u_proj_through;\n");
+			WRITE(p, "uniform vec4 u_xywh;\n");
 			*uniformMask |= DIRTY_PROJTHROUGHMATRIX;
 		} else if (useHWTransform) {
 			if (gstate_c.Use(GPU_USE_VIRTUAL_REALITY)) {
@@ -746,7 +746,12 @@ bool GenerateVertexShader(const VShaderID &id, char *buffer, const ShaderLanguag
 		WRITE(p, "  %sv_fogdepth = fog;\n", compat.vsOutPrefix);
 		if (isModeThrough)	{
 			// The proj_through matrix already has the rotation, if needed.
-			WRITE(p, "  vec4 outPos = mul(u_proj_through, vec4(position.xyz, 1.0));\n");
+			// NOTE: In through mode, we can ignore W, it's always 1.0. However,
+			// this transform will later be applied in all modes.
+			WRITE(p, "  vec4 outPos;\n");
+			WRITE(p, "  outPos.xy = ((position.xy - u_xywh.xy * position.w) / u_xywh.zw) * 2.0 - 1.0;\n");
+			WRITE(p, "  outPos.zw = position.zw;\n");
+			// WRITE(p, "  vec4 outPos = mul(u_proj_through, vec4(position.xyz, 1.0));\n");
 		} else {
 			// The viewport is used in this case, so need to compensate for that.
 			if (gstate_c.Use(GPU_ROUND_DEPTH_TO_16BIT)) {
diff --git a/GPU/GLES/ShaderManagerGLES.cpp b/GPU/GLES/ShaderManagerGLES.cpp
index 1d1dcfd35ecd..87d744aaa1ba 100644
--- a/GPU/GLES/ShaderManagerGLES.cpp
+++ b/GPU/GLES/ShaderManagerGLES.cpp
@@ -105,7 +105,7 @@ LinkedShader::LinkedShader(GLRenderManager *render, VShaderID VSID, Shader *vs,
 
 	queries.push_back({ &u_proj, "u_proj" });
 	queries.push_back({ &u_proj_lens, "u_proj_lens" });
-	queries.push_back({ &u_proj_through, "u_proj_through" });
+	queries.push_back({ &u_xywh, "u_xywh" });
 	queries.push_back({ &u_texenv, "u_texenv" });
 	queries.push_back({ &u_fogcolor, "u_fogcolor" });
 	queries.push_back({ &u_fogcoef, "u_fogcoef" });
@@ -443,15 +443,12 @@ void LinkedShader::UpdateUniforms(const ShaderID &vsid, bool useBufferedRenderin
 		render_->SetUniformM4x4(&u_proj, flippedMatrix.m);
 	}
 	if (dirty & DIRTY_PROJTHROUGHMATRIX) {
-		Matrix4x4 proj_through;
-		proj_through.setOrthoVulkan(0.0f, gstate_c.curRTWidth, 0.0f, gstate_c.curRTHeight, 0.0f, 1.0f);
-
-		// Negative RT offsets come from split framebuffers (Killzone)
-		if (gstate_c.curRTOffsetX < 0 || gstate_c.curRTOffsetY < 0) {
-			proj_through.wx += 2.0f * (float)gstate_c.curRTOffsetX / (float)gstate_c.curRTWidth;
-			proj_through.wy += 2.0f * (float)gstate_c.curRTOffsetY / (float)gstate_c.curRTHeight;
-		}
-		render_->SetUniformM4x4(&u_proj_through, proj_through.getReadPtr());
+		float xywh[4];
+		xywh[0] = (float)gstate_c.curRTOffsetX;
+		xywh[1] = (float)gstate_c.curRTOffsetY;
+		xywh[2] = (float)gstate_c.curRTWidth;
+		xywh[3] = (float)gstate_c.curRTHeight;
+		SetFloatUniform4(render_, &u_xywh, xywh);
 	}
 	if (dirty & DIRTY_TEXENV) {
 		SetColorUniform3(render_, &u_texenv, gstate.texenvcolor);
diff --git a/GPU/GLES/ShaderManagerGLES.h b/GPU/GLES/ShaderManagerGLES.h
index 35fc2054e883..5082042ad243 100644
--- a/GPU/GLES/ShaderManagerGLES.h
+++ b/GPU/GLES/ShaderManagerGLES.h
@@ -60,7 +60,7 @@ class LinkedShader {
 	int u_tex;
 	int u_proj;
 	int u_proj_lens;
-	int u_proj_through;
+	int u_xywh;
 	int u_texenv;
 	int u_view;
 	int u_texmtx;