diff --git a/.agents/skills/debug-openshell-cluster/SKILL.md b/.agents/skills/debug-openshell-cluster/SKILL.md
index 68ecc7749..48f64bf5c 100644
--- a/.agents/skills/debug-openshell-cluster/SKILL.md
+++ b/.agents/skills/debug-openshell-cluster/SKILL.md
@@ -132,10 +132,9 @@ Common findings:
 helm -n openshell status openshell
 helm -n openshell get values openshell
 kubectl -n openshell get deployment,statefulset,pod,svc,pvc
-kubectl -n openshell logs deployment/openshell -c openshell-gateway --tail=200
-kubectl -n openshell logs statefulset/openshell -c openshell-gateway --tail=200
-kubectl -n openshell rollout status deployment/openshell
-kubectl -n openshell rollout status statefulset/openshell
+WORKLOAD="$(kubectl -n openshell get deployment openshell >/dev/null 2>&1 && echo deployment/openshell || echo statefulset/openshell)"
+kubectl -n openshell logs "${WORKLOAD}" -c openshell-gateway --tail=200
+kubectl -n openshell rollout status "${WORKLOAD}"
 ```
 
 Use the log and rollout commands for the workload kind that exists in the
@@ -153,6 +152,32 @@ kubectl -n openshell get deployment,service,pod -l app.kubernetes.io/name=opensh
 kubectl -n openshell logs deployment/openshell-e2e-postgres --tail=200
 ```
 
+For multi-replica gateway installs, supervisor and client session traffic may
+be served by a non-owner gateway replica and relayed to the current supervisor
+owner over the internal `PeerRelay` RPC. Check the headless peer Service,
+projected peer ServiceAccount token volume, and TokenReview RBAC:
+
+```bash
+kubectl -n openshell get svc openshell-peer -o wide
+kubectl -n openshell get endpoints openshell-peer
+kubectl -n openshell get pod -l app.kubernetes.io/instance=openshell \
+  -o jsonpath='{range .items[*]}{.metadata.name}{"\n"}{.spec.volumes[?(@.name=="gateway-peer-token")]}{"\n"}{.spec.containers[0].env[?(@.name=="OPENSHELL_PEER_SERVICE_ACCOUNT_TOKEN_FILE")]}{"\n"}{.spec.containers[0].env[?(@.name=="OPENSHELL_PEER_ENDPOINT")]}{"\n"}{end}'
+kubectl auth can-i create tokenreviews.authentication.k8s.io \
+  --as=system:serviceaccount:openshell:openshell
+kubectl auth can-i get pods -n openshell \
+  --as=system:serviceaccount:openshell:openshell
+kubectl -n openshell logs "${WORKLOAD}" --tail=200 | grep -E 'gateway peer|PeerRelay|supervisor owner|owner relay'
+```
+
+Expected gateway startup logs include
+`gateway peer ServiceAccount TokenReview authentication enabled`. If peer relay
+calls fail with `Unauthenticated`, verify the `gateway-peer-token` projected
+volume has audience `openshell-gateway-peer` and that the receiving gateway can
+create TokenReviews. If they fail with `PermissionDenied`, verify the gateway
+ServiceAccount name, release namespace, pod UID, and Helm selector labels match
+the live gateway pods. Deployment-backed gateway pods should also publish
+`OPENSHELL_PEER_ENDPOINT` from their pod IP.
+
 Check required Helm deployment secrets:
 
 ```bash
@@ -199,8 +224,8 @@ label, supervisor env vars `OPENSHELL_K8S_SA_TOKEN_FILE` and
 Check the image references currently used by the gateway deployment:
 
 ```bash
-kubectl -n openshell get deployment openshell -o jsonpath="{.spec.template.spec.containers[*].image}{\"\n\"}{.spec.template.spec.containers[*].env[?(@.name==\"OPENSHELL_SUPERVISOR_IMAGE\")].value}{\"\n\"}"
-kubectl -n openshell get statefulset openshell -o jsonpath="{.spec.template.spec.containers[*].image}{\"\n\"}{.spec.template.spec.containers[*].env[?(@.name==\"OPENSHELL_SUPERVISOR_IMAGE\")].value}{\"\n\"}"
+WORKLOAD="$(kubectl -n openshell get deployment openshell >/dev/null 2>&1 && echo deployment/openshell || echo statefulset/openshell)"
+kubectl -n openshell get "${WORKLOAD}" -o jsonpath="{.spec.template.spec.containers[*].image}{\"\n\"}{.spec.template.spec.containers[*].env[?(@.name==\"OPENSHELL_SUPERVISOR_IMAGE\")].value}{\"\n\"}"
 helm -n openshell get values openshell | grep -E 'repository|tag|supervisorImage|workload'
 ```
 
@@ -244,8 +269,8 @@ If the gateway is healthy but sandbox creation fails:
 ```bash
 kubectl -n openshell get pods
 kubectl -n openshell get events --sort-by=.lastTimestamp | tail -n 50
-kubectl -n openshell logs deployment/openshell -c openshell-gateway --tail=200
-kubectl -n openshell logs statefulset/openshell -c openshell-gateway --tail=200
+WORKLOAD="$(kubectl -n openshell get deployment openshell >/dev/null 2>&1 && echo deployment/openshell || echo statefulset/openshell)"
+kubectl -n openshell logs "${WORKLOAD}" -c openshell-gateway --tail=200
 ```
 
 Check the configured sandbox namespace:
diff --git a/.agents/skills/helm-dev-environment/SKILL.md b/.agents/skills/helm-dev-environment/SKILL.md
index 10813792d..cc3c5c58e 100644
--- a/.agents/skills/helm-dev-environment/SKILL.md
+++ b/.agents/skills/helm-dev-environment/SKILL.md
@@ -66,10 +66,34 @@ generates mTLS secrets on first install. Envoy Gateway opt-in; see the Optional
 
 The gateway Service uses ClusterIP. Access is via Envoy Gateway (port `8080`) or `kubectl port-forward`.
 
-**HA test deploy** (two gateway replicas + external PostgreSQL Secret): uncomment
-`#- ci/values-high-availability.yaml` in `deploy/helm/openshell/skaffold.yaml`,
-create the Secret named `openshell-ha-pg` with a `uri` key, then run
-`mise run helm:skaffold:run` or `mise run helm:skaffold:dev`.
+Skaffold profiles are available for HA and reverse-proxy development. Run these from
+`deploy/helm/openshell/`:
+
+```bash
+# Two gateway replicas + external PostgreSQL Secret.
+KUBECONFIG=../../../kubeconfig skaffold run -p high-availability
+
+# Two gateway replicas + Envoy Gateway + Gateway API route.
+KUBECONFIG=../../../kubeconfig skaffold run -p ha-envoy
+```
+
+The HA profiles expect a Secret named `openshell-ha-pg` in the `openshell`
+namespace with a `uri` key. For local manual testing, either create your own
+PostgreSQL Secret or use the e2e PostgreSQL fixture manifest in
+`e2e/kubernetes/postgres-fixture.yaml`.
+
+For the `ha-envoy` profile, return to the repository root and apply the
+GatewayClass and BackendTrafficPolicy manifest after Skaffold has installed
+Envoy Gateway:
+
+```bash
+KUBECONFIG=kubeconfig mise run helm:gateway:apply
+```
+
+The BackendTrafficPolicy disables Envoy request and stream-duration timeouts for
+OpenShell's `GRPCRoute`. Keep that policy in `deploy/kube/manifests/envoy-gateway-openshell.yaml`,
+not in the Helm chart; it is required for long-lived gRPC create/watch/exec/relay
+streams during gateway rollouts and scale events.
 
 ### TLS behaviour
 
@@ -139,23 +163,76 @@ but will point to a deleted cluster — safe to ignore or clean up manually.
 
 ## Optional Add-ons
 
-Each add-on requires uncommenting the corresponding `valuesFiles` entry in
-`deploy/helm/openshell/skaffold.yaml` before running `helm:skaffold:dev` or `helm:skaffold:run`.
+Some add-ons can be enabled by uncommenting values in `skaffold.yaml`, but prefer
+the dedicated Skaffold profiles when they exist. Profiles avoid leaving local
+manual edits in the worktree.
 
 ### Envoy Gateway (Gateway API / GRPCRoute)
 
-Envoy Gateway is already installed by Skaffold (the `envoy-gateway` Helm release in
-`skaffold.yaml`). To activate routing:
+Use the `ha-envoy` Skaffold profile for HA reverse-proxy testing:
 
-1. Uncomment `#- values-gateway.yaml` in `skaffold.yaml`
-2. Redeploy: `mise run helm:skaffold:run`
-3. Apply the GatewayClass: `mise run helm:gateway:apply`
-4. Access: `http://127.0.0.1:8080`
+```bash
+cd deploy/helm/openshell
+KUBECONFIG=../../../kubeconfig skaffold run -p ha-envoy
+cd ../../..
+KUBECONFIG=kubeconfig mise run helm:gateway:apply
+```
+
+`values-gateway.yaml` creates a `Gateway` (listener on port 80, class `eg`) and
+`GRPCRoute` in the `openshell` namespace. The `ha-envoy` profile installs the
+Envoy Gateway Helm chart and layers both `values-high-availability.yaml` and
+`values-gateway.yaml` onto the OpenShell release.
 
-`values-gateway.yaml` creates a `Gateway` (listener on port 80, class `eg`) and a
-`GRPCRoute` in the `openshell` namespace. Envoy Gateway provisions a LoadBalancer
-service for the proxy; klipper-lb binds it to hostPort 80, reachable via the
-`8080:80` load balancer port mapping.
+`deploy/kube/manifests/envoy-gateway-openshell.yaml` creates:
+
+- `GatewayClass/eg`
+- `BackendTrafficPolicy/openshell-grpc-timeouts`
+
+The Envoy Gateway proxy Service is usually exposed through the k3d load balancer
+at `http://127.0.0.1:8080`. If the cluster was created with a different
+`HELM_K3S_LB_HOST_PORT`, use that host port instead.
+
+For manual tests against an existing cluster, prefer forwarding the Envoy proxy
+Service rather than `svc/openshell`. That keeps client traffic on the same path
+as a real reverse proxy while gateway pods rotate behind it:
+
+```bash
+KUBECONFIG=kubeconfig kubectl get svc -A \
+  -l gateway.envoyproxy.io/owning-gateway-name=openshell
+KUBECONFIG=kubeconfig kubectl -n <envoy-service-namespace> port-forward \
+  svc/<envoy-service-name> 8080:80
+openshell gateway add http://127.0.0.1:8080 --name openshell --local
+```
+
+When running e2e tests manually through Envoy, register gateway metadata (as
+above) instead of relying only on `OPENSHELL_GATEWAY_ENDPOINT`; some tests call
+`openshell gateway info` and expect metadata for the active gateway.
+
+### Kubernetes E2E Notes
+
+Use `mise run e2e:kubernetes` for the standard Helm-backed Kubernetes suite.
+The kube e2e wrapper creates only one port-forward, to `svc/openshell`; it no
+longer forwards the unauthenticated health listener or runs a `/readyz` e2e
+target. `/readyz` remains covered by server unit/integration tests.
+
+Use `mise run e2e:kubernetes:ha-rebalancing` for full-suite HA coverage. The
+task creates an external PostgreSQL fixture, installs Envoy Gateway, applies
+`deploy/kube/manifests/envoy-gateway-openshell.yaml`, enables the chart
+`GRPCRoute`, and runs the full Kubernetes e2e suite, including
+`kubernetes_ha_rebalancing`. That coverage validates sandbox create/watch and
+exec through the Envoy proxy while gateway replicas scale up, scale down, and
+rotate.
+
+If you reuse an existing Skaffold cluster for the full kube suite, make sure the
+cluster has the Docker Desktop host-gateway alias configured for host-gateway
+tests. The e2e wrapper sets this on chart installs; manual reuse may require:
+
+```bash
+KUBECONFIG=kubeconfig helm upgrade openshell deploy/helm/openshell \
+  --namespace openshell --reuse-values \
+  --set server.hostGatewayIP=192.168.65.254 \
+  --wait --timeout 5m
+```
 
 ### Keycloak OIDC
 
@@ -226,6 +303,6 @@ mise run helm:k3s:status
 | `deploy/helm/openshell/ci/values-spire.yaml` | SPIFFE/SPIRE provider token grant overlay |
 | `deploy/helm/openshell/ci/values-spire-stack.yaml` | SPIRE hardened chart values for local dev |
 | `deploy/helm/openshell/ci/values-tls-disabled.yaml` | Lint-only: TLS + auth disabled (reverse-proxy edge termination) |
-| `deploy/kube/manifests/envoy-gateway-openshell.yaml` | GatewayClass for Envoy Gateway (`mise run helm:gateway:apply`) |
+| `deploy/kube/manifests/envoy-gateway-openshell.yaml` | GatewayClass and BackendTrafficPolicy for Envoy Gateway (`mise run helm:gateway:apply`) |
 | `tasks/scripts/helm-k3s-local.sh` | k3d cluster create/delete/start/stop/status |
 | `tasks/scripts/keycloak-k8s-setup.sh` | Keycloak deploy + realm import |
diff --git a/.github/workflows/branch-e2e.yml b/.github/workflows/branch-e2e.yml
index 8a9e7fe29..5acfb171d 100644
--- a/.github/workflows/branch-e2e.yml
+++ b/.github/workflows/branch-e2e.yml
@@ -123,6 +123,7 @@ jobs:
       job-name: Kubernetes HA E2E (Rust smoke)
       extra-helm-values: deploy/helm/openshell/ci/values-high-availability.yaml
       external-postgres-secret: openshell-ha-pg
+      use-envoy-gateway: true
 
   core-e2e-result:
     name: Core E2E result
diff --git a/.github/workflows/e2e-kubernetes-test.yml b/.github/workflows/e2e-kubernetes-test.yml
index ee9caac6f..76b233e3c 100644
--- a/.github/workflows/e2e-kubernetes-test.yml
+++ b/.github/workflows/e2e-kubernetes-test.yml
@@ -32,6 +32,21 @@ on:
         required: false
         type: string
         default: ""
+      test-name:
+        description: "Rust e2e test target to run (sets OPENSHELL_E2E_KUBE_TEST)"
+        required: false
+        type: string
+        default: ""
+      kubernetes-features:
+        description: "Cargo feature list for the Kubernetes e2e crate"
+        required: false
+        type: string
+        default: ""
+      use-envoy-gateway:
+        description: "Install Envoy Gateway and run the e2e command through the chart GRPCRoute"
+        required: false
+        type: boolean
+        default: false
       mise-version:
         description: "mise version to install on the bare Kubernetes e2e runner"
         required: false
@@ -117,6 +132,9 @@ jobs:
           OPENSHELL_E2E_KUBE_CONTEXT: kind-${{ env.KIND_CLUSTER_NAME }}
           OPENSHELL_E2E_KUBE_EXTRA_VALUES: ${{ inputs.extra-helm-values }}
           OPENSHELL_E2E_KUBE_EXTERNAL_POSTGRES_SECRET: ${{ inputs.external-postgres-secret }}
+          OPENSHELL_E2E_KUBE_TEST: ${{ inputs.test-name }}
+          OPENSHELL_E2E_KUBERNETES_FEATURES: ${{ inputs.kubernetes-features }}
+          OPENSHELL_E2E_KUBE_USE_ENVOY: ${{ inputs.use-envoy-gateway }}
           IMAGE_TAG: ${{ inputs.image-tag }}
           OPENSHELL_REGISTRY: ghcr.io/nvidia/openshell
         run: mise run --no-deps --skip-deps e2e:kubernetes
diff --git a/architecture/gateway.md b/architecture/gateway.md
index a13d58f0a..ad5a46517 100644
--- a/architecture/gateway.md
+++ b/architecture/gateway.md
@@ -91,6 +91,36 @@ authenticated sandbox ID with any sandbox ID or name resolved from the request.
 Supervisor control and relay streams require a matching sandbox principal before
 the gateway registers the session or bridges relay bytes.
 
+## HA Supervisor Ownership
+
+In multi-replica Kubernetes deployments, every gateway pod can accept client
+RPCs, but a sandbox supervisor maintains one active stream to one gateway
+replica at a time. The connected replica publishes a short-lived supervisor
+owner record in the shared Postgres object store with its replica id, peer DNS
+endpoint, supervisor instance id, and connection epoch. Heartbeats renew the
+record, and reconnects from the same supervisor instance with a newer epoch can
+supersede the previous owner before the TTL expires.
+
+Session-bound operations such as exec, TCP forwarding, file sync, and sandbox
+service routing first check the local session registry. If the supervisor is
+owned by another gateway replica, the serving gateway opens an internal
+`PeerRelay` stream to that owner and asks it to open the supervisor relay. This
+keeps client traffic working when a Kubernetes Service routes the client to a
+non-owner gateway pod. If a peer owner is stale or unreachable during a rollout,
+the serving gateway retries ownership lookup until the normal relay wait
+deadline.
+
+File upload and download use tar-over-SSH through the same relay path. A gateway
+pod termination drops the active SSH proxy byte stream, so the CLI retries the
+whole sync operation with a fresh SSH session instead of attempting mid-stream
+resume.
+
+Gateway peer RPCs authenticate with Kubernetes ServiceAccount identity rather
+than a shared secret. Helm mounts a projected, pod-bound token with audience
+`openshell-gateway-peer`; the receiving gateway validates it through
+TokenReview, checks the live pod UID and chart selector labels, and authorizes
+only the internal peer relay method.
+
 ## API Surface
 
 The gateway API is organized around platform objects and operational streams:
diff --git a/crates/openshell-cli/src/ssh.rs b/crates/openshell-cli/src/ssh.rs
index f5986a1d8..874a14564 100644
--- a/crates/openshell-cli/src/ssh.rs
+++ b/crates/openshell-cli/src/ssh.rs
@@ -4,7 +4,7 @@
 //! SSH connection and proxy utilities.
 
 use crate::tls::{TlsOptions, grpc_client};
-use miette::{IntoDiagnostic, Result, WrapErr};
+use miette::{IntoDiagnostic, Report, Result, WrapErr};
 #[cfg(unix)]
 use nix::sys::signal::{SaFlags, SigAction, SigHandler, SigSet, Signal, sigaction};
 use openshell_core::ObjectId;
@@ -18,6 +18,7 @@ use openshell_core::proto::{
 };
 use owo_colors::OwoColorize;
 use std::fs;
+use std::future::Future;
 use std::io::{IsTerminal, Write};
 #[cfg(unix)]
 use std::os::unix::process::CommandExt;
@@ -29,6 +30,8 @@ use tokio::process::Command as TokioCommand;
 use tokio_stream::wrappers::ReceiverStream;
 
 const FOREGROUND_FORWARD_STARTUP_GRACE_PERIOD: Duration = Duration::from_secs(2);
+const SYNC_RETRY_ATTEMPTS: usize = 4;
+const SYNC_RETRY_DELAY: Duration = Duration::from_secs(2);
 
 #[derive(Clone, Copy, Debug)]
 pub enum Editor {
@@ -462,6 +465,7 @@ pub(crate) async fn sandbox_exec_without_exec(
 }
 
 /// What to pack into the tar archive streamed to the sandbox.
+#[derive(Clone)]
 enum UploadSource {
     /// A single local file or directory.  `tar_name` controls the entry name
     /// inside the archive (e.g. the target basename for file-to-file uploads).
@@ -827,17 +831,15 @@ pub async fn sandbox_sync_up_files(
     if files.is_empty() {
         return Ok(());
     }
-    ssh_tar_upload(
-        server,
-        name,
-        dest,
-        UploadSource::FileList {
-            base_dir: base_dir.to_path_buf(),
-            files: files.to_vec(),
-            archive_prefix: file_list_archive_prefix(local_path),
-        },
-        tls,
-    )
+    let source = UploadSource::FileList {
+        base_dir: base_dir.to_path_buf(),
+        files: files.to_vec(),
+        archive_prefix: file_list_archive_prefix(local_path),
+    };
+    retry_sandbox_sync("upload", || {
+        let source = source.clone();
+        async move { ssh_tar_upload(server, name, dest, source, tls).await }
+    })
     .await
 }
 
@@ -871,16 +873,14 @@ pub async fn sandbox_sync_up(
     {
         let (parent, target_name) = split_sandbox_path(path);
         if parent != "/" {
-            return ssh_tar_upload(
-                server,
-                name,
-                Some(parent),
-                UploadSource::SinglePath {
-                    local_path: local_path.to_path_buf(),
-                    tar_name: target_name.into(),
-                },
-                tls,
-            )
+            let source = UploadSource::SinglePath {
+                local_path: local_path.to_path_buf(),
+                tar_name: target_name.into(),
+            };
+            return retry_sandbox_sync("upload", || {
+                let source = source.clone();
+                async move { ssh_tar_upload(server, name, Some(parent), source, tls).await }
+            })
             .await;
         }
     }
@@ -898,16 +898,14 @@ pub async fn sandbox_sync_up(
         directory_upload_prefix(local_path)
     };
 
-    ssh_tar_upload(
-        server,
-        name,
-        sandbox_path,
-        UploadSource::SinglePath {
-            local_path: local_path.to_path_buf(),
-            tar_name,
-        },
-        tls,
-    )
+    let source = UploadSource::SinglePath {
+        local_path: local_path.to_path_buf(),
+        tar_name,
+    };
+    retry_sandbox_sync("upload", || {
+        let source = source.clone();
+        async move { ssh_tar_upload(server, name, sandbox_path, source, tls).await }
+    })
     .await
 }
 
@@ -1014,6 +1012,19 @@ pub async fn sandbox_sync_down(
     sandbox_path: &str,
     dest: &str,
     tls: &TlsOptions,
+) -> Result<()> {
+    retry_sandbox_sync("download", || async {
+        sandbox_sync_down_once(server, name, sandbox_path, dest, tls).await
+    })
+    .await
+}
+
+async fn sandbox_sync_down_once(
+    server: &str,
+    name: &str,
+    sandbox_path: &str,
+    dest: &str,
+    tls: &TlsOptions,
 ) -> Result<()> {
     let sandbox_path = validate_sandbox_source_path(sandbox_path)?;
     let session = ssh_session_config(server, name, tls).await?;
@@ -1028,6 +1039,54 @@ pub async fn sandbox_sync_down(
     }
 }
 
+async fn retry_sandbox_sync<F, Fut>(operation: &str, mut run: F) -> Result<()>
+where
+    F: FnMut() -> Fut,
+    Fut: Future<Output = Result<()>>,
+{
+    let mut attempt = 1;
+    loop {
+        match run().await {
+            Ok(()) => return Ok(()),
+            Err(err) if attempt < SYNC_RETRY_ATTEMPTS && sync_error_is_retryable(&err) => {
+                tracing::warn!(
+                    operation,
+                    attempt,
+                    max_attempts = SYNC_RETRY_ATTEMPTS,
+                    error = %err,
+                    "sandbox sync operation failed; retrying"
+                );
+                tokio::time::sleep(SYNC_RETRY_DELAY).await;
+                attempt += 1;
+            }
+            Err(err) => return Err(err),
+        }
+    }
+}
+
+fn sync_error_is_retryable(err: &Report) -> bool {
+    let message = format!("{err:?}").to_ascii_lowercase();
+    [
+        "broken pipe",
+        "connection",
+        "early eof",
+        "http2",
+        "h2 protocol",
+        "reset before headers",
+        "service is currently unavailable",
+        "transport error",
+        "unexpected eof",
+        "unavailable",
+        "upstream connect error",
+        "ssh probe exited with status exit status: 255",
+        "ssh tar create exited",
+        "ssh tar extract exited",
+        "failed to extract tar archive from sandbox",
+    ]
+    .iter()
+    .any(|needle| message.contains(needle))
+}
+
 /// Stream a tar archive from the sandbox and extract it into a fresh
 /// destination directory. The source is always wrapped on the sandbox side so
 /// the host can pick a basename when needed.
@@ -1529,6 +1588,28 @@ mod tests {
         assert_eq!(output.matches("Host openshell-demo").count(), 1);
     }
 
+    #[test]
+    fn sync_error_retry_filter_accepts_transport_failures() {
+        let err = miette::miette!("transport error: connection reset by peer");
+        assert!(sync_error_is_retryable(&err));
+    }
+
+    #[test]
+    fn sync_error_retry_filter_accepts_transient_ssh_probe_failures() {
+        let err = Err::<(), _>(miette::miette!(
+            "ssh probe exited with status exit status: 255"
+        ))
+        .wrap_err("failed to resolve sandbox source path '/sandbox/ha-sync/ha-sync-upload'")
+        .unwrap_err();
+        assert!(sync_error_is_retryable(&err));
+    }
+
+    #[test]
+    fn sync_error_retry_filter_rejects_validation_failures() {
+        let err = miette::miette!("sandbox source path '/etc/passwd' resolves outside /sandbox");
+        assert!(!sync_error_is_retryable(&err));
+    }
+
     #[test]
     #[allow(unsafe_code)] // Test-only: env vars require unsafe in Rust 2024.
     fn install_ssh_config_adds_include_once_and_updates_managed_file() {
diff --git a/crates/openshell-cli/tests/ensure_providers_integration.rs b/crates/openshell-cli/tests/ensure_providers_integration.rs
index ea2d5a465..92e5aa081 100644
--- a/crates/openshell-cli/tests/ensure_providers_integration.rs
+++ b/crates/openshell-cli/tests/ensure_providers_integration.rs
@@ -566,6 +566,17 @@ impl OpenShell for TestOpenShell {
         Err(Status::unimplemented("not implemented in test"))
     }
 
+    type PeerRelayStream = tokio_stream::wrappers::ReceiverStream<
+        Result<openshell_core::proto::PeerRelayFrame, Status>,
+    >;
+
+    async fn peer_relay(
+        &self,
+        _request: tonic::Request<tonic::Streaming<openshell_core::proto::PeerRelayFrame>>,
+    ) -> Result<Response<Self::PeerRelayStream>, Status> {
+        Err(Status::unimplemented("not implemented in test"))
+    }
+
     type ForwardTcpStream = tokio_stream::wrappers::ReceiverStream<
         Result<openshell_core::proto::TcpForwardFrame, Status>,
     >;
diff --git a/crates/openshell-cli/tests/mtls_integration.rs b/crates/openshell-cli/tests/mtls_integration.rs
index 7cb9e1e76..9874de20a 100644
--- a/crates/openshell-cli/tests/mtls_integration.rs
+++ b/crates/openshell-cli/tests/mtls_integration.rs
@@ -459,6 +459,17 @@ impl OpenShell for TestOpenShell {
         Err(Status::unimplemented("not implemented in test"))
     }
 
+    type PeerRelayStream = tokio_stream::wrappers::ReceiverStream<
+        Result<openshell_core::proto::PeerRelayFrame, Status>,
+    >;
+
+    async fn peer_relay(
+        &self,
+        _request: tonic::Request<tonic::Streaming<openshell_core::proto::PeerRelayFrame>>,
+    ) -> Result<Response<Self::PeerRelayStream>, Status> {
+        Err(Status::unimplemented("not implemented in test"))
+    }
+
     type ForwardTcpStream = tokio_stream::wrappers::ReceiverStream<
         Result<openshell_core::proto::TcpForwardFrame, Status>,
     >;
diff --git a/crates/openshell-cli/tests/provider_commands_integration.rs b/crates/openshell-cli/tests/provider_commands_integration.rs
index b287b4ea0..b09a02590 100644
--- a/crates/openshell-cli/tests/provider_commands_integration.rs
+++ b/crates/openshell-cli/tests/provider_commands_integration.rs
@@ -898,6 +898,17 @@ impl OpenShell for TestOpenShell {
         Err(Status::unimplemented("not implemented in test"))
     }
 
+    type PeerRelayStream = tokio_stream::wrappers::ReceiverStream<
+        Result<openshell_core::proto::PeerRelayFrame, Status>,
+    >;
+
+    async fn peer_relay(
+        &self,
+        _request: tonic::Request<tonic::Streaming<openshell_core::proto::PeerRelayFrame>>,
+    ) -> Result<Response<Self::PeerRelayStream>, Status> {
+        Err(Status::unimplemented("not implemented in test"))
+    }
+
     type ForwardTcpStream = tokio_stream::wrappers::ReceiverStream<
         Result<openshell_core::proto::TcpForwardFrame, Status>,
     >;
diff --git a/crates/openshell-cli/tests/sandbox_create_lifecycle_integration.rs b/crates/openshell-cli/tests/sandbox_create_lifecycle_integration.rs
index 7061614cb..74fbbfe71 100644
--- a/crates/openshell-cli/tests/sandbox_create_lifecycle_integration.rs
+++ b/crates/openshell-cli/tests/sandbox_create_lifecycle_integration.rs
@@ -637,6 +637,17 @@ impl OpenShell for TestOpenShell {
         Err(Status::unimplemented("not implemented in test"))
     }
 
+    type PeerRelayStream = tokio_stream::wrappers::ReceiverStream<
+        Result<openshell_core::proto::PeerRelayFrame, Status>,
+    >;
+
+    async fn peer_relay(
+        &self,
+        _request: tonic::Request<tonic::Streaming<openshell_core::proto::PeerRelayFrame>>,
+    ) -> Result<Response<Self::PeerRelayStream>, Status> {
+        Err(Status::unimplemented("not implemented in test"))
+    }
+
     type ForwardTcpStream = tokio_stream::wrappers::ReceiverStream<
         Result<openshell_core::proto::TcpForwardFrame, Status>,
     >;
diff --git a/crates/openshell-cli/tests/sandbox_name_fallback_integration.rs b/crates/openshell-cli/tests/sandbox_name_fallback_integration.rs
index 5e753eff9..0a4b1fc2f 100644
--- a/crates/openshell-cli/tests/sandbox_name_fallback_integration.rs
+++ b/crates/openshell-cli/tests/sandbox_name_fallback_integration.rs
@@ -526,6 +526,17 @@ impl OpenShell for TestOpenShell {
         Err(Status::unimplemented("not implemented in test"))
     }
 
+    type PeerRelayStream = tokio_stream::wrappers::ReceiverStream<
+        Result<openshell_core::proto::PeerRelayFrame, Status>,
+    >;
+
+    async fn peer_relay(
+        &self,
+        _request: tonic::Request<tonic::Streaming<openshell_core::proto::PeerRelayFrame>>,
+    ) -> Result<Response<Self::PeerRelayStream>, Status> {
+        Err(Status::unimplemented("not implemented in test"))
+    }
+
     type ForwardTcpStream = tokio_stream::wrappers::ReceiverStream<
         Result<openshell_core::proto::TcpForwardFrame, Status>,
     >;
diff --git a/crates/openshell-sandbox/src/supervisor_session.rs b/crates/openshell-sandbox/src/supervisor_session.rs
index 4d7392ee3..4764bc68d 100644
--- a/crates/openshell-sandbox/src/supervisor_session.rs
+++ b/crates/openshell-sandbox/src/supervisor_session.rs
@@ -252,11 +252,21 @@ async fn run_session_loop(
 ) {
     let mut backoff = INITIAL_BACKOFF;
     let mut attempt: u64 = 0;
+    let instance_id = uuid::Uuid::new_v4().to_string();
 
     loop {
         attempt += 1;
 
-        match run_single_session(&endpoint, &sandbox_id, &ssh_socket_path, netns_fd).await {
+        match run_single_session(
+            &endpoint,
+            &sandbox_id,
+            &ssh_socket_path,
+            netns_fd,
+            &instance_id,
+            attempt,
+        )
+        .await
+        {
             Ok(()) => {
                 let event = session_closed_event(crate::ocsf_ctx(), &endpoint, &sandbox_id);
                 ocsf_emit!(event);
@@ -278,6 +288,8 @@ async fn run_single_session(
     sandbox_id: &str,
     ssh_socket_path: &std::path::Path,
     netns_fd: Option<i32>,
+    instance_id: &str,
+    connection_epoch: u64,
 ) -> Result<(), Box<dyn std::error::Error + Send + Sync>> {
     // Connect to the gateway. The same `Channel` is used for both the
     // long-lived control stream and all data-plane `RelayStream` calls, so
@@ -293,11 +305,11 @@ async fn run_single_session(
     let outbound = tokio_stream::wrappers::ReceiverStream::new(rx);
 
     // Send hello as the first message.
-    let instance_id = uuid::Uuid::new_v4().to_string();
     tx.send(SupervisorMessage {
         payload: Some(supervisor_message::Payload::Hello(SupervisorHello {
             sandbox_id: sandbox_id.to_string(),
-            instance_id: instance_id.clone(),
+            instance_id: instance_id.to_string(),
+            connection_epoch,
         })),
     })
     .await
diff --git a/crates/openshell-server-macros/src/lib.rs b/crates/openshell-server-macros/src/lib.rs
index a698ae662..a19d1bfc8 100644
--- a/crates/openshell-server-macros/src/lib.rs
+++ b/crates/openshell-server-macros/src/lib.rs
@@ -56,6 +56,7 @@ enum AuthMode {
     Sandbox,
     Bearer,
     Dual,
+    Peer,
 }
 
 #[derive(Clone, Copy)]
@@ -116,11 +117,11 @@ impl RpcAuth {
         };
 
         match mode {
-            AuthMode::Unauthenticated | AuthMode::Sandbox => {
+            AuthMode::Unauthenticated | AuthMode::Sandbox | AuthMode::Peer => {
                 if let Some(ref s) = scope {
                     return Err(Error::new(
                         s.span(),
-                        "`scope` is only valid for `auth = \"bearer\"` or `auth = \"dual\"` (sandbox principals don't carry scopes)",
+                        "`scope` is only valid for `auth = \"bearer\"` or `auth = \"dual\"`",
                     ));
                 }
                 if role.is_some() {
@@ -156,10 +157,11 @@ fn parse_auth_mode(value: &LitStr) -> Result<AuthMode> {
         "sandbox" => Ok(AuthMode::Sandbox),
         "bearer" => Ok(AuthMode::Bearer),
         "dual" => Ok(AuthMode::Dual),
+        "peer" => Ok(AuthMode::Peer),
         other => Err(Error::new(
             value.span(),
             format!(
-                "invalid auth mode `{other}`; expected one of `unauthenticated`, `sandbox`, `bearer`, `dual`"
+                "invalid auth mode `{other}`; expected one of `unauthenticated`, `sandbox`, `bearer`, `dual`, `peer`"
             ),
         )),
     }
@@ -290,6 +292,7 @@ fn expand(args: &AuthzArgs, item: &mut ItemImpl) -> Result<proc_macro2::TokenStr
             }
             AuthMode::Bearer => quote! { crate::auth::method_authz::AuthMode::Bearer },
             AuthMode::Dual => quote! { crate::auth::method_authz::AuthMode::Dual },
+            AuthMode::Peer => quote! { crate::auth::method_authz::AuthMode::Peer },
         };
 
         let scope_tokens = match &auth.scope {
diff --git a/crates/openshell-server/src/auth/guard.rs b/crates/openshell-server/src/auth/guard.rs
index edcd6bc01..a3d55eac7 100644
--- a/crates/openshell-server/src/auth/guard.rs
+++ b/crates/openshell-server/src/auth/guard.rs
@@ -30,6 +30,9 @@ use tracing::info;
 pub fn ensure_sandbox_scope(principal: &Principal, claimed_sandbox_id: &str) -> Result<(), Status> {
     match principal {
         Principal::User(_) => Ok(()),
+        Principal::Peer(_) => Err(Status::permission_denied(
+            "gateway peer principals may not call sandbox-scoped methods",
+        )),
         Principal::Sandbox(p) => {
             if p.sandbox_id == claimed_sandbox_id {
                 Ok(())
@@ -84,7 +87,7 @@ pub fn ensure_sandbox_principal_scope(
             ensure_sandbox_scope(principal, claimed_sandbox_id)?;
             Ok(p.clone())
         }
-        Principal::User(_) => Err(Status::permission_denied(
+        Principal::User(_) | Principal::Peer(_) => Err(Status::permission_denied(
             "supervisor RPCs require a sandbox principal",
         )),
         Principal::Anonymous => Err(Status::unauthenticated(
diff --git a/crates/openshell-server/src/auth/method_authz.rs b/crates/openshell-server/src/auth/method_authz.rs
index ec8dc5bca..91116f3ee 100644
--- a/crates/openshell-server/src/auth/method_authz.rs
+++ b/crates/openshell-server/src/auth/method_authz.rs
@@ -39,6 +39,8 @@ pub enum AuthMode {
     /// Either sandbox principal or Bearer; scope and role apply on
     /// the Bearer path only.
     Dual,
+    /// Only callable by a gateway peer principal.
+    Peer,
 }
 
 /// Coarse role mapping. Maps to the configured `admin_role` /
@@ -119,11 +121,17 @@ pub fn is_sandbox_callable(method: &str) -> bool {
 #[must_use]
 pub fn is_user_callable(method: &str) -> bool {
     match lookup(method).map(|m| m.mode) {
-        Some(AuthMode::Sandbox | AuthMode::Unauthenticated) => false,
+        Some(AuthMode::Sandbox | AuthMode::Unauthenticated | AuthMode::Peer) => false,
         Some(AuthMode::Bearer | AuthMode::Dual) | None => true,
     }
 }
 
+/// `true` if the method is callable by a gateway peer.
+#[must_use]
+pub fn is_peer_callable(method: &str) -> bool {
+    matches!(lookup(method).map(|m| m.mode), Some(AuthMode::Peer))
+}
+
 #[cfg(test)]
 mod tests {
     use super::*;
@@ -238,6 +246,8 @@ mod tests {
             "/openshell.v1.OpenShell/ConnectSupervisor"
         ));
         assert!(!is_user_callable("/openshell.v1.OpenShell/RelayStream"));
+        assert!(!is_user_callable("/openshell.v1.OpenShell/PeerRelay"));
+        assert!(is_peer_callable("/openshell.v1.OpenShell/PeerRelay"));
         assert!(!is_user_callable(
             "/openshell.inference.v1.Inference/GetInferenceBundle"
         ));
diff --git a/crates/openshell-server/src/auth/mod.rs b/crates/openshell-server/src/auth/mod.rs
index cbf3b94d9..2b6eeb5c2 100644
--- a/crates/openshell-server/src/auth/mod.rs
+++ b/crates/openshell-server/src/auth/mod.rs
@@ -16,6 +16,7 @@ pub mod identity;
 pub mod k8s_sa;
 pub mod method_authz;
 pub mod oidc;
+pub mod peer;
 pub mod principal;
 pub mod sandbox_jwt;
 pub mod sandbox_methods;
diff --git a/crates/openshell-server/src/auth/peer.rs b/crates/openshell-server/src/auth/peer.rs
new file mode 100644
index 000000000..ac9bb38bc
--- /dev/null
+++ b/crates/openshell-server/src/auth/peer.rs
@@ -0,0 +1,596 @@
+// SPDX-FileCopyrightText: Copyright (c) 2025-2026 NVIDIA CORPORATION & AFFILIATES. All rights reserved.
+// SPDX-License-Identifier: Apache-2.0
+
+//! Gateway peer authentication for internal replica-to-replica RPCs.
+//!
+//! Peer calls use Kubernetes projected `ServiceAccount` tokens, not an
+//! OpenShell-managed shared secret. The caller presents its pod-bound gateway
+//! `ServiceAccount` token with the peer audience; the receiver validates it with
+//! the apiserver `TokenReview` API, checks the live pod UID and required labels,
+//! and only then produces a [`Principal::Peer`].
+
+use super::authenticator::Authenticator;
+use super::principal::{PeerPrincipal, Principal};
+use async_trait::async_trait;
+use k8s_openapi::api::{
+    authentication::v1::{TokenReview, TokenReviewSpec, TokenReviewStatus, UserInfo},
+    core::v1::Pod,
+};
+use k8s_openapi::apimachinery::pkg::apis::meta::v1::ObjectMeta;
+use kube::api::{Api, PostParams};
+use std::path::PathBuf;
+use std::sync::Arc;
+use tonic::Status;
+use tracing::{debug, info, warn};
+
+/// gRPC path for internal gateway relay forwarding.
+pub const PEER_RELAY_PATH: &str = "/openshell.v1.OpenShell/PeerRelay";
+/// Audience used for gateway-to-gateway projected `ServiceAccount` tokens.
+pub const DEFAULT_PEER_TOKEN_AUDIENCE: &str = "openshell-gateway-peer";
+/// Environment variable overriding the expected peer token audience.
+pub const PEER_TOKEN_AUDIENCE_ENV: &str = "OPENSHELL_PEER_TOKEN_AUDIENCE";
+/// Environment variable carrying the projected peer `ServiceAccount` token path.
+pub const PEER_SA_TOKEN_FILE_ENV: &str = "OPENSHELL_PEER_SERVICE_ACCOUNT_TOKEN_FILE";
+/// Default mount path for the projected peer `ServiceAccount` token.
+pub const DEFAULT_PEER_SA_TOKEN_FILE: &str = "/var/run/secrets/openshell-peer/token";
+/// Environment variable with comma-separated `key=value` pod labels required
+/// on authenticated gateway peer pods.
+pub const PEER_REQUIRED_POD_LABELS_ENV: &str = "OPENSHELL_PEER_POD_LABELS";
+const POD_NAME_EXTRA: &str = "authentication.kubernetes.io/pod-name";
+const POD_UID_EXTRA: &str = "authentication.kubernetes.io/pod-uid";
+
+#[derive(Debug, Clone)]
+pub struct ResolvedGatewayPeerIdentity {
+    pub pod_name: String,
+    pub pod_uid: String,
+}
+
+#[async_trait]
+pub trait GatewayPeerIdentityResolver: Send + Sync + 'static {
+    async fn resolve(&self, token: &str) -> Result<Option<ResolvedGatewayPeerIdentity>, Status>;
+}
+
+#[derive(Debug)]
+struct PeerTokenReviewIdentity {
+    pod_name: String,
+    pod_uid: String,
+}
+
+pub struct PeerServiceAccountAuthenticator {
+    resolver: Arc<dyn GatewayPeerIdentityResolver>,
+}
+
+impl std::fmt::Debug for PeerServiceAccountAuthenticator {
+    fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result {
+        f.debug_struct("PeerServiceAccountAuthenticator")
+            .finish_non_exhaustive()
+    }
+}
+
+impl PeerServiceAccountAuthenticator {
+    pub fn new(resolver: Arc<dyn GatewayPeerIdentityResolver>) -> Self {
+        Self { resolver }
+    }
+}
+
+#[async_trait]
+impl Authenticator for PeerServiceAccountAuthenticator {
+    async fn authenticate(
+        &self,
+        headers: &http::HeaderMap,
+        path: &str,
+    ) -> Result<Option<Principal>, Status> {
+        if path != PEER_RELAY_PATH {
+            return Ok(None);
+        }
+
+        let Some(token) = headers
+            .get("authorization")
+            .and_then(|v| v.to_str().ok())
+            .and_then(|v| v.strip_prefix("Bearer "))
+        else {
+            return Ok(None);
+        };
+
+        let Some(resolved) = self.resolver.resolve(token).await? else {
+            debug!("K8s gateway peer token did not authenticate; falling through");
+            return Ok(None);
+        };
+
+        if let Some(claimed_replica) = headers
+            .get("x-openshell-peer-replica")
+            .and_then(|v| v.to_str().ok())
+            .filter(|v| !v.is_empty())
+            && claimed_replica != resolved.pod_name.as_str()
+        {
+            warn!(
+                claimed_replica,
+                pod_name = %resolved.pod_name,
+                "gateway peer replica header does not match authenticated pod"
+            );
+            return Err(Status::permission_denied(
+                "gateway peer replica does not match authenticated pod",
+            ));
+        }
+
+        Ok(Some(Principal::Peer(PeerPrincipal {
+            replica_id: resolved.pod_name,
+            pod_uid: resolved.pod_uid,
+        })))
+    }
+}
+
+/// Resolver backed by Kubernetes `TokenReview` and a live Pod lookup.
+pub struct LiveGatewayPeerResolver {
+    token_reviews_api: Api<TokenReview>,
+    pods_api: Api<Pod>,
+    expected_audience: String,
+    namespace: String,
+    expected_service_account: String,
+    required_pod_labels: Vec<(String, String)>,
+}
+
+impl LiveGatewayPeerResolver {
+    pub fn new(
+        client: kube::Client,
+        namespace: &str,
+        expected_audience: String,
+        expected_service_account: String,
+        required_pod_labels: Vec<(String, String)>,
+    ) -> Self {
+        let token_reviews_api: Api<TokenReview> = Api::all(client.clone());
+        let pods_api: Api<Pod> = Api::namespaced(client, namespace);
+        Self {
+            token_reviews_api,
+            pods_api,
+            expected_audience,
+            namespace: namespace.to_string(),
+            expected_service_account,
+            required_pod_labels,
+        }
+    }
+}
+
+#[async_trait]
+impl GatewayPeerIdentityResolver for LiveGatewayPeerResolver {
+    async fn resolve(&self, token: &str) -> Result<Option<ResolvedGatewayPeerIdentity>, Status> {
+        let review = TokenReview {
+            metadata: ObjectMeta::default(),
+            spec: TokenReviewSpec {
+                audiences: Some(vec![self.expected_audience.clone()]),
+                token: Some(token.to_string()),
+            },
+            status: None,
+        };
+
+        let review = self
+            .token_reviews_api
+            .create(&PostParams::default(), &review)
+            .await
+            .map_err(|err| {
+                warn!(error = %err, "K8s TokenReview failed for gateway peer");
+                Status::internal(format!("peer tokenreview failed: {err}"))
+            })?;
+        let status = review
+            .status
+            .ok_or_else(|| Status::internal("TokenReview response missing status"))?;
+        let Some(identity) = peer_token_review_identity(
+            &status,
+            &self.expected_audience,
+            &self.namespace,
+            &self.expected_service_account,
+        )?
+        else {
+            return Ok(None);
+        };
+
+        let pod = self
+            .pods_api
+            .get_opt(&identity.pod_name)
+            .await
+            .map_err(|err| {
+                warn!(
+                    pod = %identity.pod_name,
+                    error = %err,
+                    "failed to fetch gateway peer pod"
+                );
+                Status::internal(format!("gateway peer pod GET failed: {err}"))
+            })?;
+        let Some(pod) = pod else {
+            warn!(
+                pod = %identity.pod_name,
+                "gateway peer pod referenced by SA token not found"
+            );
+            return Err(Status::not_found("gateway peer pod not found"));
+        };
+
+        let actual_uid = pod.metadata.uid.as_deref().unwrap_or_default();
+        if actual_uid != identity.pod_uid {
+            warn!(
+                pod = %identity.pod_name,
+                claimed_uid = %identity.pod_uid,
+                actual_uid,
+                "gateway peer SA token pod UID does not match live pod"
+            );
+            return Err(Status::permission_denied(
+                "gateway peer SA token pod UID mismatch",
+            ));
+        }
+
+        let actual_service_account = pod
+            .spec
+            .as_ref()
+            .and_then(|spec| spec.service_account_name.as_deref())
+            .unwrap_or("default");
+        if actual_service_account != self.expected_service_account {
+            warn!(
+                pod = %identity.pod_name,
+                service_account = %actual_service_account,
+                expected_service_account = %self.expected_service_account,
+                "gateway peer pod service account does not match TokenReview principal"
+            );
+            return Err(Status::permission_denied(
+                "gateway peer pod service account mismatch",
+            ));
+        }
+
+        validate_required_pod_labels(&pod, &self.required_pod_labels)?;
+
+        info!(
+            pod_name = %identity.pod_name,
+            pod_uid = %identity.pod_uid,
+            service_account = %self.expected_service_account,
+            "validated gateway peer ServiceAccount token via TokenReview"
+        );
+
+        Ok(Some(ResolvedGatewayPeerIdentity {
+            pod_name: identity.pod_name,
+            pod_uid: identity.pod_uid,
+        }))
+    }
+}
+
+#[allow(clippy::result_large_err)]
+fn peer_token_review_identity(
+    status: &TokenReviewStatus,
+    expected_audience: &str,
+    namespace: &str,
+    expected_service_account: &str,
+) -> Result<Option<PeerTokenReviewIdentity>, Status> {
+    if status.authenticated != Some(true) {
+        debug!(
+            error = status.error.as_deref().unwrap_or_default(),
+            "K8s TokenReview did not authenticate gateway peer token"
+        );
+        return Ok(None);
+    }
+
+    let audiences = status.audiences.as_deref().unwrap_or_default();
+    if !audiences.iter().any(|aud| aud == expected_audience) {
+        warn!(
+            expected_audience,
+            audiences = ?audiences,
+            "K8s TokenReview authenticated gateway peer token without expected audience"
+        );
+        return Err(Status::unauthenticated(
+            "gateway peer token audience not accepted",
+        ));
+    }
+
+    let user = status
+        .user
+        .as_ref()
+        .ok_or_else(|| Status::permission_denied("TokenReview response missing user info"))?;
+    let username = user
+        .username
+        .as_deref()
+        .ok_or_else(|| Status::permission_denied("TokenReview response missing username"))?;
+    let expected_username = format!("system:serviceaccount:{namespace}:{expected_service_account}");
+    if username != expected_username {
+        warn!(
+            username,
+            namespace,
+            service_account = %expected_service_account,
+            "K8s TokenReview principal is not the configured gateway service account"
+        );
+        return Err(Status::permission_denied(
+            "gateway peer token is not from the configured service account",
+        ));
+    }
+
+    let pod_name = user_extra_one(user, POD_NAME_EXTRA)?;
+    let pod_uid = user_extra_one(user, POD_UID_EXTRA)?;
+    Ok(Some(PeerTokenReviewIdentity { pod_name, pod_uid }))
+}
+
+#[allow(clippy::result_large_err)]
+fn user_extra_one(user: &UserInfo, key: &str) -> Result<String, Status> {
+    let Some(values) = user.extra.as_ref().and_then(|extra| extra.get(key)) else {
+        return Err(Status::permission_denied(
+            "gateway peer token is not pod-bound",
+        ));
+    };
+    if values.len() != 1 || values[0].is_empty() {
+        return Err(Status::permission_denied(
+            "gateway peer token has invalid pod binding",
+        ));
+    }
+    Ok(values[0].clone())
+}
+
+#[allow(clippy::result_large_err)]
+fn validate_required_pod_labels(
+    pod: &Pod,
+    required_labels: &[(String, String)],
+) -> Result<(), Status> {
+    let labels = pod.metadata.labels.as_ref();
+    for (key, expected) in required_labels {
+        let actual = labels
+            .and_then(|labels| labels.get(key))
+            .map(String::as_str)
+            .unwrap_or_default();
+        if actual != expected {
+            warn!(
+                pod = %pod.metadata.name.as_deref().unwrap_or_default(),
+                label = %key,
+                expected,
+                actual,
+                "gateway peer pod missing required label"
+            );
+            return Err(Status::permission_denied(
+                "gateway peer pod labels do not match",
+            ));
+        }
+    }
+    Ok(())
+}
+
+pub fn peer_token_audience_from_env() -> String {
+    std::env::var(PEER_TOKEN_AUDIENCE_ENV)
+        .ok()
+        .filter(|value| !value.trim().is_empty())
+        .unwrap_or_else(|| DEFAULT_PEER_TOKEN_AUDIENCE.to_string())
+}
+
+pub fn peer_service_account_token_file_from_env() -> Option<PathBuf> {
+    if let Ok(path) = std::env::var(PEER_SA_TOKEN_FILE_ENV)
+        && !path.trim().is_empty()
+    {
+        return Some(PathBuf::from(path.trim()));
+    }
+
+    let default_path = PathBuf::from(DEFAULT_PEER_SA_TOKEN_FILE);
+    default_path.exists().then_some(default_path)
+}
+
+pub fn load_peer_service_account_token_from_env() -> Result<Option<String>, String> {
+    let Some(path) = peer_service_account_token_file_from_env() else {
+        return Ok(None);
+    };
+
+    let contents = std::fs::read_to_string(&path)
+        .map_err(|err| format!("failed to read {}: {err}", path.display()))?;
+    let token = contents.trim();
+    if token.is_empty() {
+        return Err(format!(
+            "peer ServiceAccount token file {} is empty",
+            path.display()
+        ));
+    }
+
+    Ok(Some(token.to_string()))
+}
+
+pub fn required_pod_labels_from_env() -> Result<Vec<(String, String)>, String> {
+    let raw = std::env::var(PEER_REQUIRED_POD_LABELS_ENV).unwrap_or_default();
+    parse_required_pod_labels(&raw)
+}
+
+fn parse_required_pod_labels(raw: &str) -> Result<Vec<(String, String)>, String> {
+    let mut labels = Vec::new();
+    for entry in raw
+        .split(',')
+        .map(str::trim)
+        .filter(|entry| !entry.is_empty())
+    {
+        let Some((key, value)) = entry.split_once('=') else {
+            return Err(format!(
+                "{PEER_REQUIRED_POD_LABELS_ENV} entry {entry:?} must be key=value"
+            ));
+        };
+        let key = key.trim();
+        let value = value.trim();
+        if key.is_empty() || value.is_empty() {
+            return Err(format!(
+                "{PEER_REQUIRED_POD_LABELS_ENV} entry {entry:?} must have non-empty key and value"
+            ));
+        }
+        labels.push((key.to_string(), value.to_string()));
+    }
+    Ok(labels)
+}
+
+#[cfg(test)]
+pub mod test_support {
+    use super::*;
+    use std::sync::Mutex;
+
+    pub struct FakeGatewayPeerResolver {
+        pub outcome: Result<Option<ResolvedGatewayPeerIdentity>, Status>,
+        pub seen_tokens: Mutex<Vec<String>>,
+    }
+
+    impl FakeGatewayPeerResolver {
+        pub fn returning(outcome: Result<Option<ResolvedGatewayPeerIdentity>, Status>) -> Self {
+            Self {
+                outcome,
+                seen_tokens: Mutex::new(Vec::new()),
+            }
+        }
+    }
+
+    #[async_trait]
+    impl GatewayPeerIdentityResolver for FakeGatewayPeerResolver {
+        async fn resolve(
+            &self,
+            token: &str,
+        ) -> Result<Option<ResolvedGatewayPeerIdentity>, Status> {
+            self.seen_tokens.lock().unwrap().push(token.to_string());
+            match &self.outcome {
+                Ok(opt) => Ok(opt.clone()),
+                Err(status) => Err(Status::new(status.code(), status.message())),
+            }
+        }
+    }
+}
+
+#[cfg(test)]
+mod tests {
+    use super::test_support::FakeGatewayPeerResolver;
+    use super::*;
+    use std::collections::BTreeMap;
+
+    fn bearer_headers(token: &str) -> http::HeaderMap {
+        let mut headers = http::HeaderMap::new();
+        headers.insert(
+            "authorization",
+            http::HeaderValue::from_str(&format!("Bearer {token}")).unwrap(),
+        );
+        headers
+    }
+
+    fn token_review_status(
+        authenticated: bool,
+        audiences: Vec<&str>,
+        username: &str,
+        extra: Vec<(&str, &str)>,
+    ) -> TokenReviewStatus {
+        TokenReviewStatus {
+            authenticated: Some(authenticated),
+            audiences: Some(audiences.into_iter().map(str::to_string).collect()),
+            error: None,
+            user: Some(UserInfo {
+                username: Some(username.to_string()),
+                uid: Some("sa-uid".to_string()),
+                groups: Some(vec![
+                    "system:serviceaccounts".to_string(),
+                    "system:serviceaccounts:openshell".to_string(),
+                    "system:authenticated".to_string(),
+                ]),
+                extra: Some(
+                    extra
+                        .into_iter()
+                        .map(|(key, value)| (key.to_string(), vec![value.to_string()]))
+                        .collect(),
+                ),
+            }),
+        }
+    }
+
+    #[test]
+    fn peer_token_review_identity_extracts_pod_binding() {
+        let status = token_review_status(
+            true,
+            vec![DEFAULT_PEER_TOKEN_AUDIENCE],
+            "system:serviceaccount:openshell:openshell",
+            vec![(POD_NAME_EXTRA, "openshell-0"), (POD_UID_EXTRA, "uid-a")],
+        );
+
+        let identity = peer_token_review_identity(
+            &status,
+            DEFAULT_PEER_TOKEN_AUDIENCE,
+            "openshell",
+            "openshell",
+        )
+        .unwrap()
+        .expect("authenticated token should resolve");
+
+        assert_eq!(identity.pod_name, "openshell-0");
+        assert_eq!(identity.pod_uid, "uid-a");
+    }
+
+    #[test]
+    fn peer_token_review_identity_rejects_wrong_service_account() {
+        let status = token_review_status(
+            true,
+            vec![DEFAULT_PEER_TOKEN_AUDIENCE],
+            "system:serviceaccount:openshell:default",
+            vec![(POD_NAME_EXTRA, "openshell-0"), (POD_UID_EXTRA, "uid-a")],
+        );
+
+        let err = peer_token_review_identity(
+            &status,
+            DEFAULT_PEER_TOKEN_AUDIENCE,
+            "openshell",
+            "openshell",
+        )
+        .expect_err("wrong service account must fail closed");
+        assert_eq!(err.code(), tonic::Code::PermissionDenied);
+    }
+
+    #[test]
+    fn validate_required_pod_labels_rejects_mismatch() {
+        let pod = Pod {
+            metadata: ObjectMeta {
+                name: Some("openshell-0".to_string()),
+                labels: Some(BTreeMap::from([(
+                    "app.kubernetes.io/name".to_string(),
+                    "openshell".to_string(),
+                )])),
+                ..Default::default()
+            },
+            ..Default::default()
+        };
+
+        let err = validate_required_pod_labels(
+            &pod,
+            &[(
+                "app.kubernetes.io/instance".to_string(),
+                "release-a".to_string(),
+            )],
+        )
+        .expect_err("missing required label must fail");
+        assert_eq!(err.code(), tonic::Code::PermissionDenied);
+    }
+
+    #[tokio::test]
+    async fn authenticator_uses_resolved_pod_name_as_replica() {
+        let resolver = Arc::new(FakeGatewayPeerResolver::returning(Ok(Some(
+            ResolvedGatewayPeerIdentity {
+                pod_name: "openshell-0".to_string(),
+                pod_uid: "uid-a".to_string(),
+            },
+        ))));
+        let auth = PeerServiceAccountAuthenticator::new(resolver);
+
+        let principal = auth
+            .authenticate(&bearer_headers("token-a"), PEER_RELAY_PATH)
+            .await
+            .unwrap()
+            .expect("principal");
+
+        let Principal::Peer(peer) = principal else {
+            panic!("expected peer principal");
+        };
+        assert_eq!(peer.replica_id, "openshell-0");
+        assert_eq!(peer.pod_uid, "uid-a");
+    }
+
+    #[test]
+    fn parse_required_pod_labels_accepts_comma_list() {
+        let labels = parse_required_pod_labels(
+            "app.kubernetes.io/name=openshell,app.kubernetes.io/instance=dev",
+        )
+        .unwrap();
+        assert_eq!(
+            labels,
+            vec![
+                (
+                    "app.kubernetes.io/name".to_string(),
+                    "openshell".to_string()
+                ),
+                ("app.kubernetes.io/instance".to_string(), "dev".to_string())
+            ]
+        );
+    }
+}
diff --git a/crates/openshell-server/src/auth/principal.rs b/crates/openshell-server/src/auth/principal.rs
index 1d4cb7276..ead3179a5 100644
--- a/crates/openshell-server/src/auth/principal.rs
+++ b/crates/openshell-server/src/auth/principal.rs
@@ -28,6 +28,8 @@ pub enum Principal {
     /// sandbox UUID. The wrapped `sandbox_id` MUST match any sandbox referenced
     /// in the request body for sandbox-class methods.
     Sandbox(#[allow(dead_code)] SandboxPrincipal),
+    /// Gateway replica authenticated for internal peer RPCs.
+    Peer(PeerPrincipal),
     /// Truly unauthenticated caller (health probes, reflection). Sandbox-class
     /// and user-class methods reject this variant.
     #[allow(dead_code)]
@@ -57,6 +59,15 @@ pub struct SandboxPrincipal {
     pub trust_domain: Option<String>,
 }
 
+/// Gateway peer caller.
+#[derive(Debug, Clone)]
+pub struct PeerPrincipal {
+    /// Peer replica id supplied by the authenticated caller.
+    pub replica_id: String,
+    /// UID of the authenticated Kubernetes pod.
+    pub pod_uid: String,
+}
+
 /// How a [`SandboxPrincipal`] was authenticated.
 ///
 /// Variant fields are populated by the producing authenticator and consumed
diff --git a/crates/openshell-server/src/auth/sandbox_methods.rs b/crates/openshell-server/src/auth/sandbox_methods.rs
index 76d5e1324..6e33ba24f 100644
--- a/crates/openshell-server/src/auth/sandbox_methods.rs
+++ b/crates/openshell-server/src/auth/sandbox_methods.rs
@@ -26,6 +26,7 @@ mod tests {
             "/openshell.v1.OpenShell/ConnectSupervisor"
         ));
         assert!(is_sandbox_callable("/openshell.v1.OpenShell/RelayStream"));
+        assert!(!is_sandbox_callable("/openshell.v1.OpenShell/PeerRelay"));
         assert!(is_sandbox_callable(
             "/openshell.v1.OpenShell/GetSandboxConfig"
         ));
diff --git a/crates/openshell-server/src/compute/mod.rs b/crates/openshell-server/src/compute/mod.rs
index 6d687fb7c..147bc1b72 100644
--- a/crates/openshell-server/src/compute/mod.rs
+++ b/crates/openshell-server/src/compute/mod.rs
@@ -13,6 +13,7 @@ use crate::grpc::policy::SANDBOX_SETTINGS_OBJECT_TYPE;
 use crate::persistence::{ObjectId, ObjectName, ObjectRecord, ObjectType, Store, WriteCondition};
 use crate::sandbox_index::SandboxIndex;
 use crate::sandbox_watch::SandboxWatchBus;
+use crate::supervisor_owner::{OWNER_TTL, SupervisorOwnerIndex};
 use crate::supervisor_session::SupervisorSessionRegistry;
 use crate::tracing_bus::TracingLogBus;
 use futures::{Stream, StreamExt};
@@ -997,24 +998,21 @@ impl ComputeRuntime {
             use crate::persistence::WriteCondition;
             let now_ms = openshell_core::time::now_ms();
 
-            let session_connected = self.supervisor_sessions.has_session(&incoming.id);
+            let session_connected = self.supervisor_session_ready(&incoming.id).await?;
             let mut phase = derive_phase(incoming.status.as_ref());
             let sandbox_name = incoming.name.clone();
 
-            let supervisor_promoted = session_connected
-                && matches!(phase, SandboxPhase::Provisioning | SandboxPhase::Unknown);
-            if supervisor_promoted {
-                phase = SandboxPhase::Ready;
-            }
-
             let mut status = incoming
                 .status
                 .as_ref()
                 .map(|s| public_status_from_driver(s, phase, 0));
             rewrite_user_facing_conditions(&mut status, None);
-            if supervisor_promoted {
-                ensure_supervisor_ready_status(&mut status, &sandbox_name);
-            }
+            gate_phase_on_supervisor_session(
+                &mut phase,
+                &mut status,
+                &sandbox_name,
+                session_connected,
+            );
             let mut sandbox = Sandbox {
                 metadata: Some(openshell_core::proto::datamodel::v1::ObjectMeta {
                     id: incoming.id.clone(),
@@ -1055,7 +1053,7 @@ impl ComputeRuntime {
         }
 
         // Single-attempt CAS: on conflict, the next watch event will naturally retry
-        let session_connected = self.supervisor_sessions.has_session(&incoming.id);
+        let session_connected = self.supervisor_session_ready(&incoming.id).await?;
         let sandbox_name = incoming.name.clone();
 
         let sandbox = self
@@ -1067,11 +1065,6 @@ impl ComputeRuntime {
                     .status
                     .as_ref()
                     .map_or(old_phase, |status| derive_phase(Some(status)));
-                let supervisor_promoted = session_connected
-                    && matches!(phase, SandboxPhase::Provisioning | SandboxPhase::Unknown);
-                if supervisor_promoted {
-                    phase = SandboxPhase::Ready;
-                }
 
                 let cpv = sandbox.current_policy_version();
                 let mut status = incoming
@@ -1080,9 +1073,12 @@ impl ComputeRuntime {
                     .map(|s| public_status_from_driver(s, phase, cpv))
                     .or_else(|| sandbox.status.clone());
                 rewrite_user_facing_conditions(&mut status, sandbox.spec.as_ref());
-                if supervisor_promoted {
-                    ensure_supervisor_ready_status(&mut status, &sandbox_name);
-                }
+                gate_phase_on_supervisor_session(
+                    &mut phase,
+                    &mut status,
+                    &sandbox_name,
+                    session_connected,
+                );
 
                 if let Some(s) = status.as_mut()
                     && s.sandbox_name.is_empty()
@@ -1152,6 +1148,25 @@ impl ComputeRuntime {
         self.set_supervisor_session_state(sandbox_id, false).await
     }
 
+    async fn supervisor_session_ready(&self, sandbox_id: &str) -> Result<bool, String> {
+        if self.supervisor_sessions.has_session(sandbox_id) {
+            return Ok(true);
+        }
+
+        let owner_index = SupervisorOwnerIndex::new(self.store.clone(), OWNER_TTL);
+        let Some(owner) = owner_index
+            .read(sandbox_id)
+            .await
+            .map_err(|err| err.to_string())?
+        else {
+            return Ok(false);
+        };
+
+        let age_ms = openshell_core::time::now_ms() - owner.updated_at_ms;
+        let ttl_ms = i64::try_from(OWNER_TTL.as_millis()).unwrap_or(i64::MAX);
+        Ok(age_ms < ttl_ms)
+    }
+
     async fn set_supervisor_session_state(
         &self,
         sandbox_id: &str,
@@ -1697,6 +1712,21 @@ fn ensure_supervisor_ready_status(status: &mut Option<SandboxStatus>, sandbox_na
     );
 }
 
+fn gate_phase_on_supervisor_session(
+    phase: &mut SandboxPhase,
+    status: &mut Option<SandboxStatus>,
+    sandbox_name: &str,
+    session_connected: bool,
+) {
+    if session_connected && matches!(*phase, SandboxPhase::Provisioning | SandboxPhase::Unknown) {
+        *phase = SandboxPhase::Ready;
+        ensure_supervisor_ready_status(status, sandbox_name);
+    } else if !session_connected && *phase == SandboxPhase::Ready {
+        *phase = SandboxPhase::Provisioning;
+        ensure_supervisor_not_ready_status(status, sandbox_name);
+    }
+}
+
 fn ensure_supervisor_not_ready_status(status: &mut Option<SandboxStatus>, sandbox_name: &str) {
     upsert_ready_condition(
         status,
@@ -1705,7 +1735,7 @@ fn ensure_supervisor_not_ready_status(status: &mut Option<SandboxStatus>, sandbo
             r#type: "Ready".to_string(),
             status: "False".to_string(),
             reason: "DependenciesNotReady".to_string(),
-            message: "Supervisor session disconnected".to_string(),
+            message: "Supervisor session not connected".to_string(),
             last_transition_time: String::new(),
         },
     );
@@ -2542,6 +2572,114 @@ mod tests {
         let sandbox = sandbox_record("sb-1", "sandbox-a", SandboxPhase::Deleting);
         runtime.store.put_message(&sandbox).await.unwrap();
 
+        runtime
+            .apply_sandbox_update(DriverSandbox {
+                id: "sb-1".to_string(),
+                name: "sandbox-a".to_string(),
+                namespace: "default".to_string(),
+                spec: None,
+                status: Some(DriverSandboxStatus {
+                    sandbox_name: "sandbox-a".to_string(),
+                    instance_id: "agent-pod".to_string(),
+                    agent_fd: String::new(),
+                    sandbox_fd: String::new(),
+                    conditions: vec![DriverCondition {
+                        r#type: "Ready".to_string(),
+                        status: "True".to_string(),
+                        reason: "DependenciesReady".to_string(),
+                        message: "Pod is Ready".to_string(),
+                        last_transition_time: String::new(),
+                    }],
+                    deleting: false,
+                }),
+            })
+            .await
+            .unwrap();
+
+        let stored = runtime
+            .store
+            .get_message::<Sandbox>("sb-1")
+            .await
+            .unwrap()
+            .unwrap();
+        assert_eq!(
+            SandboxPhase::try_from(stored.phase()).unwrap(),
+            SandboxPhase::Provisioning
+        );
+    }
+
+    #[tokio::test]
+    async fn apply_sandbox_update_keeps_driver_ready_provisioning_until_supervisor_connects() {
+        let runtime = test_runtime(Arc::new(TestDriver::default())).await;
+        let sandbox = sandbox_record("sb-1", "sandbox-a", SandboxPhase::Provisioning);
+        runtime.store.put_message(&sandbox).await.unwrap();
+
+        runtime
+            .apply_sandbox_update(DriverSandbox {
+                id: "sb-1".to_string(),
+                name: "sandbox-a".to_string(),
+                namespace: "default".to_string(),
+                spec: None,
+                status: Some(DriverSandboxStatus {
+                    sandbox_name: "sandbox-a".to_string(),
+                    instance_id: "agent-pod".to_string(),
+                    agent_fd: String::new(),
+                    sandbox_fd: String::new(),
+                    conditions: vec![DriverCondition {
+                        r#type: "Ready".to_string(),
+                        status: "True".to_string(),
+                        reason: "DependenciesReady".to_string(),
+                        message: "Pod is Ready".to_string(),
+                        last_transition_time: String::new(),
+                    }],
+                    deleting: false,
+                }),
+            })
+            .await
+            .unwrap();
+
+        let stored = runtime
+            .store
+            .get_message::<Sandbox>("sb-1")
+            .await
+            .unwrap()
+            .unwrap();
+        assert_eq!(
+            SandboxPhase::try_from(stored.phase()).unwrap(),
+            SandboxPhase::Provisioning
+        );
+        let ready = stored
+            .status
+            .as_ref()
+            .and_then(|status| {
+                status
+                    .conditions
+                    .iter()
+                    .find(|condition| condition.r#type == "Ready")
+            })
+            .unwrap();
+        assert_eq!(ready.status, "False");
+        assert_eq!(ready.reason, "DependenciesNotReady");
+        assert_eq!(ready.message, "Supervisor session not connected");
+    }
+
+    #[tokio::test]
+    async fn apply_sandbox_update_treats_fresh_remote_supervisor_owner_as_ready() {
+        let runtime = test_runtime(Arc::new(TestDriver::default())).await;
+        let sandbox = sandbox_record("sb-1", "sandbox-a", SandboxPhase::Provisioning);
+        runtime.store.put_message(&sandbox).await.unwrap();
+        SupervisorOwnerIndex::new(runtime.store.clone(), OWNER_TTL)
+            .publish(
+                "sb-1",
+                "session-remote",
+                "instance-remote",
+                1,
+                "openshell-1",
+                "http://openshell-1.openshell-peer.openshell.svc.cluster.local:8080",
+            )
+            .await
+            .unwrap();
+
         runtime
             .apply_sandbox_update(DriverSandbox {
                 id: "sb-1".to_string(),
@@ -2576,10 +2714,23 @@ mod tests {
             SandboxPhase::try_from(stored.phase()).unwrap(),
             SandboxPhase::Ready
         );
+        let ready = stored
+            .status
+            .as_ref()
+            .and_then(|status| {
+                status
+                    .conditions
+                    .iter()
+                    .find(|condition| condition.r#type == "Ready")
+            })
+            .unwrap();
+        assert_eq!(ready.status, "True");
+        assert_eq!(ready.reason, "DependenciesReady");
+        assert_eq!(ready.message, "Pod is Ready");
     }
 
     #[tokio::test]
-    async fn apply_sandbox_update_without_status_preserves_existing_status() {
+    async fn apply_sandbox_update_without_status_preserves_policy_and_gates_ready() {
         let runtime = test_runtime(Arc::new(TestDriver::default())).await;
         let mut sandbox = sandbox_record("sb-1", "sandbox-a", SandboxPhase::Ready);
         sandbox.status = Some(SandboxStatus {
@@ -2616,7 +2767,7 @@ mod tests {
             .unwrap();
         assert_eq!(
             SandboxPhase::try_from(stored.phase()).unwrap(),
-            SandboxPhase::Ready
+            SandboxPhase::Provisioning
         );
         assert_eq!(stored.current_policy_version(), 7);
         let ready = stored
@@ -2629,9 +2780,9 @@ mod tests {
                     .find(|condition| condition.r#type == "Ready")
             })
             .unwrap();
-        assert_eq!(ready.status, "True");
-        assert_eq!(ready.reason, "DependenciesReady");
-        assert_eq!(ready.message, "Pod is Ready");
+        assert_eq!(ready.status, "False");
+        assert_eq!(ready.reason, "DependenciesNotReady");
+        assert_eq!(ready.message, "Supervisor session not connected");
     }
 
     #[tokio::test]
@@ -2746,7 +2897,7 @@ mod tests {
             .unwrap();
         assert_eq!(ready.status, "False");
         assert_eq!(ready.reason, "DependenciesNotReady");
-        assert_eq!(ready.message, "Supervisor session disconnected");
+        assert_eq!(ready.message, "Supervisor session not connected");
     }
 
     #[tokio::test]
@@ -2818,7 +2969,7 @@ mod tests {
             .unwrap();
         assert_eq!(
             SandboxPhase::try_from(stored.phase()).unwrap(),
-            SandboxPhase::Ready
+            SandboxPhase::Provisioning
         );
         assert!(stored.spec.as_ref().is_some_and(|spec| spec.gpu));
     }
@@ -2911,7 +3062,7 @@ mod tests {
             .unwrap();
         assert_eq!(
             SandboxPhase::try_from(stored.phase()).unwrap(),
-            SandboxPhase::Ready
+            SandboxPhase::Provisioning
         );
     }
 
diff --git a/crates/openshell-server/src/grpc/mod.rs b/crates/openshell-server/src/grpc/mod.rs
index 5947bb334..952b51833 100644
--- a/crates/openshell-server/src/grpc/mod.rs
+++ b/crates/openshell-server/src/grpc/mod.rs
@@ -33,15 +33,15 @@ use openshell_core::proto::{
     ListProviderProfilesResponse, ListProvidersRequest, ListProvidersResponse,
     ListSandboxPoliciesRequest, ListSandboxPoliciesResponse, ListSandboxProvidersRequest,
     ListSandboxProvidersResponse, ListSandboxesRequest, ListSandboxesResponse, ListServicesRequest,
-    ListServicesResponse, ProviderProfileResponse, ProviderResponse, PushSandboxLogsRequest,
-    PushSandboxLogsResponse, RefreshSandboxTokenRequest, RefreshSandboxTokenResponse,
-    RejectDraftChunkRequest, RejectDraftChunkResponse, RelayFrame, ReportPolicyStatusRequest,
-    ReportPolicyStatusResponse, RevokeSshSessionRequest, RevokeSshSessionResponse,
-    RotateProviderCredentialRequest, RotateProviderCredentialResponse, SandboxResponse,
-    SandboxStreamEvent, ServiceEndpointResponse, ServiceStatus, SubmitPolicyAnalysisRequest,
-    SubmitPolicyAnalysisResponse, SupervisorMessage, TcpForwardFrame, UndoDraftChunkRequest,
-    UndoDraftChunkResponse, UpdateConfigRequest, UpdateConfigResponse, UpdateProviderRequest,
-    WatchSandboxRequest, open_shell_server::OpenShell,
+    ListServicesResponse, PeerRelayFrame, ProviderProfileResponse, ProviderResponse,
+    PushSandboxLogsRequest, PushSandboxLogsResponse, RefreshSandboxTokenRequest,
+    RefreshSandboxTokenResponse, RejectDraftChunkRequest, RejectDraftChunkResponse, RelayFrame,
+    ReportPolicyStatusRequest, ReportPolicyStatusResponse, RevokeSshSessionRequest,
+    RevokeSshSessionResponse, RotateProviderCredentialRequest, RotateProviderCredentialResponse,
+    SandboxResponse, SandboxStreamEvent, ServiceEndpointResponse, ServiceStatus,
+    SubmitPolicyAnalysisRequest, SubmitPolicyAnalysisResponse, SupervisorMessage, TcpForwardFrame,
+    UndoDraftChunkRequest, UndoDraftChunkResponse, UpdateConfigRequest, UpdateConfigResponse,
+    UpdateProviderRequest, WatchSandboxRequest, open_shell_server::OpenShell,
 };
 use serde::{Deserialize, Serialize};
 use std::collections::BTreeMap;
@@ -660,6 +660,17 @@ impl OpenShell for OpenShellService {
         crate::supervisor_session::handle_relay_stream(&self.state.supervisor_sessions, request)
             .await
     }
+
+    type PeerRelayStream =
+        Pin<Box<dyn tokio_stream::Stream<Item = Result<PeerRelayFrame, Status>> + Send + 'static>>;
+
+    #[rpc_auth(auth = "peer")]
+    async fn peer_relay(
+        &self,
+        request: Request<tonic::Streaming<PeerRelayFrame>>,
+    ) -> Result<Response<Self::PeerRelayStream>, Status> {
+        crate::supervisor_session::handle_peer_relay(&self.state, request).await
+    }
 }
 
 // ---------------------------------------------------------------------------
diff --git a/crates/openshell-server/src/grpc/policy.rs b/crates/openshell-server/src/grpc/policy.rs
index 2e2210f44..f86e76379 100644
--- a/crates/openshell-server/src/grpc/policy.rs
+++ b/crates/openshell-server/src/grpc/policy.rs
@@ -1077,6 +1077,9 @@ async fn resolve_sandbox_by_name_for_principal(
             Ok(sandbox)
         }
         Principal::User(_) => sandbox.ok_or_else(|| Status::not_found("sandbox not found")),
+        Principal::Peer(_) => Err(Status::permission_denied(
+            "gateway peer principals may not resolve sandboxes by name",
+        )),
         Principal::Anonymous => Err(Status::unauthenticated(
             "sandbox-scoped methods require an authenticated caller",
         )),
diff --git a/crates/openshell-server/src/grpc/sandbox.rs b/crates/openshell-server/src/grpc/sandbox.rs
index e60ce3995..5c135d7d0 100644
--- a/crates/openshell-server/src/grpc/sandbox.rs
+++ b/crates/openshell-server/src/grpc/sandbox.rs
@@ -629,9 +629,17 @@ pub(super) async fn handle_watch_sandbox(
             None
         };
 
+        let mut last_sandbox_resource_version: Option<u64>;
+
         // Re-read the snapshot now that we have subscriptions active.
         match state.store.get_message::<Sandbox>(&sandbox_id).await {
             Ok(Some(sandbox)) => {
+                last_sandbox_resource_version = Some(
+                    sandbox
+                        .metadata
+                        .as_ref()
+                        .map_or(0, |metadata| metadata.resource_version),
+                );
                 state.sandbox_index.update_from_sandbox(&sandbox);
                 let _ = tx
                     .send(Ok(SandboxStreamEvent {
@@ -698,6 +706,9 @@ pub(super) async fn handle_watch_sandbox(
             }
         }
 
+        let mut store_poll = tokio::time::interval(std::time::Duration::from_secs(1));
+        store_poll.set_missed_tick_behavior(tokio::time::MissedTickBehavior::Skip);
+
         loop {
             tokio::select! {
                 res = async {
@@ -710,6 +721,10 @@ pub(super) async fn handle_watch_sandbox(
                         Ok(()) => {
                             match state.store.get_message::<Sandbox>(&sandbox_id).await {
                                 Ok(Some(sandbox)) => {
+                                    last_sandbox_resource_version = Some(sandbox
+                                        .metadata
+                                        .as_ref()
+                                        .map_or(0, |metadata| metadata.resource_version));
                                     state.sandbox_index.update_from_sandbox(&sandbox);
                                     if tx.send(Ok(SandboxStreamEvent { payload: Some(openshell_core::proto::sandbox_stream_event::Payload::Sandbox(sandbox.clone()))})).await.is_err() {
                                         return;
@@ -736,6 +751,39 @@ pub(super) async fn handle_watch_sandbox(
                         }
                     }
                 }
+                _ = store_poll.tick(), if follow_status => {
+                    match state.store.get_message::<Sandbox>(&sandbox_id).await {
+                        Ok(Some(sandbox)) => {
+                            let resource_version = sandbox
+                                .metadata
+                                .as_ref()
+                                .map_or(0, |metadata| metadata.resource_version);
+                            if last_sandbox_resource_version == Some(resource_version) {
+                                continue;
+                            }
+                            last_sandbox_resource_version = Some(resource_version);
+                            state.sandbox_index.update_from_sandbox(&sandbox);
+                            if tx.send(Ok(SandboxStreamEvent {
+                                payload: Some(openshell_core::proto::sandbox_stream_event::Payload::Sandbox(sandbox.clone())),
+                            })).await.is_err() {
+                                return;
+                            }
+                            if stop_on_terminal {
+                                let phase = SandboxPhase::try_from(sandbox.phase()).unwrap_or(SandboxPhase::Unknown);
+                                if phase == SandboxPhase::Ready {
+                                    return;
+                                }
+                            }
+                        }
+                        Ok(None) => {
+                            return;
+                        }
+                        Err(e) => {
+                            let _ = tx.send(Err(Status::internal(format!("fetch sandbox failed: {e}")))).await;
+                            return;
+                        }
+                    }
+                }
                 res = async {
                     match log_rx.as_mut() {
                         Some(rx) => rx.recv().await,
@@ -825,11 +873,15 @@ pub(super) async fn handle_exec_sandbox(
     // Open a relay channel through the supervisor session. Use a 15s
     // session-wait timeout, enough to cover a transient supervisor reconnect
     // while still failing quickly during normal operation.
-    let (channel_id, relay_rx) = state
-        .supervisor_sessions
-        .open_relay(sandbox.object_id(), std::time::Duration::from_secs(15))
-        .await
-        .map_err(|e| Status::unavailable(format!("supervisor relay failed: {e}")))?;
+    let (channel_id, relay_rx) = crate::supervisor_session::open_routed_relay_with_target(
+        state,
+        sandbox.object_id(),
+        relay_open::Target::Ssh(SshRelayTarget {}),
+        String::new(),
+        std::time::Duration::from_secs(15),
+    )
+    .await
+    .map_err(|e| Status::unavailable(format!("supervisor relay failed: {e}")))?;
 
     let command_str = build_remote_exec_command(&req)
         .map_err(|e| Status::invalid_argument(format!("command construction failed: {e}")))?;
@@ -937,16 +989,15 @@ pub(super) async fn handle_forward_tcp(
     }
 
     let connection_guard = acquire_forward_connection_guard(state, &init, &sandbox).await?;
-    let (channel_id, relay_rx) = state
-        .supervisor_sessions
-        .open_relay_with_target(
-            sandbox.object_id(),
-            target,
-            init.service_id.clone(),
-            std::time::Duration::from_secs(15),
-        )
-        .await
-        .map_err(|e| Status::unavailable(format!("supervisor relay failed: {e}")))?;
+    let (channel_id, relay_rx) = crate::supervisor_session::open_routed_relay_with_target(
+        state,
+        sandbox.object_id(),
+        target,
+        init.service_id.clone(),
+        std::time::Duration::from_secs(15),
+    )
+    .await
+    .map_err(|e| Status::unavailable(format!("supervisor relay failed: {e}")))?;
 
     let sandbox_id = sandbox.object_id().to_string();
     let (tx, rx) = mpsc::channel::<Result<TcpForwardFrame, Status>>(256);
@@ -1265,11 +1316,15 @@ pub(super) async fn handle_exec_sandbox_interactive(
         return Err(Status::failed_precondition("sandbox is not ready"));
     }
 
-    let (channel_id, relay_rx) = state
-        .supervisor_sessions
-        .open_relay(sandbox.object_id(), std::time::Duration::from_secs(15))
-        .await
-        .map_err(|e| Status::unavailable(format!("supervisor relay failed: {e}")))?;
+    let (channel_id, relay_rx) = crate::supervisor_session::open_routed_relay_with_target(
+        state,
+        sandbox.object_id(),
+        relay_open::Target::Ssh(SshRelayTarget {}),
+        String::new(),
+        std::time::Duration::from_secs(15),
+    )
+    .await
+    .map_err(|e| Status::unavailable(format!("supervisor relay failed: {e}")))?;
 
     let command_str = build_remote_exec_command(&req)
         .map_err(|e| Status::invalid_argument(format!("command construction failed: {e}")))?;
@@ -1940,6 +1995,7 @@ mod tests {
     use crate::grpc::test_support::test_server_state;
     use openshell_core::proto::datamodel::v1::ObjectMeta;
     use std::collections::HashMap;
+    use tokio_stream::StreamExt;
 
     // ---- shell_escape ----
 
@@ -2221,6 +2277,72 @@ mod tests {
         sandbox
     }
 
+    #[tokio::test]
+    async fn watch_sandbox_polls_store_updates_without_local_bus_notification() {
+        let state = test_server_state().await;
+        let mut sandbox = test_sandbox("watch-poll", Vec::new());
+        sandbox.set_phase(SandboxPhase::Provisioning as i32);
+        let sandbox_id = sandbox.object_id().to_string();
+        state.store.put_message(&sandbox).await.unwrap();
+
+        let response = handle_watch_sandbox(
+            &state,
+            Request::new(WatchSandboxRequest {
+                id: sandbox_id.clone(),
+                follow_status: true,
+                ..Default::default()
+            }),
+        )
+        .await
+        .unwrap();
+        let mut stream = response.into_inner();
+
+        let first = tokio::time::timeout(std::time::Duration::from_secs(1), stream.next())
+            .await
+            .expect("initial watch snapshot should arrive")
+            .expect("stream should remain open")
+            .expect("initial watch snapshot should be ok");
+        let Some(openshell_core::proto::sandbox_stream_event::Payload::Sandbox(first)) =
+            first.payload
+        else {
+            panic!("expected initial sandbox snapshot");
+        };
+        assert_eq!(
+            SandboxPhase::try_from(first.phase()).ok(),
+            Some(SandboxPhase::Provisioning)
+        );
+
+        state
+            .store
+            .update_message_cas::<Sandbox, _>(&sandbox_id, 0, |sandbox| {
+                sandbox.set_phase(SandboxPhase::Ready as i32);
+            })
+            .await
+            .unwrap();
+
+        let ready =
+            tokio::time::timeout(std::time::Duration::from_secs(3), async {
+                loop {
+                    let event = stream
+                        .next()
+                        .await
+                        .expect("stream should remain open")
+                        .expect("watch event should be ok");
+                    if let Some(openshell_core::proto::sandbox_stream_event::Payload::Sandbox(
+                        sandbox,
+                    )) = event.payload
+                        && SandboxPhase::try_from(sandbox.phase()).ok() == Some(SandboxPhase::Ready)
+                    {
+                        break sandbox;
+                    }
+                }
+            })
+            .await
+            .expect("watch should observe cross-replica store update by polling");
+
+        assert_eq!(ready.object_id(), sandbox_id);
+    }
+
     #[tokio::test]
     async fn attach_sandbox_provider_persists_current_provider_list() {
         let state = test_server_state().await;
diff --git a/crates/openshell-server/src/inference.rs b/crates/openshell-server/src/inference.rs
index 13496cd99..39147237f 100644
--- a/crates/openshell-server/src/inference.rs
+++ b/crates/openshell-server/src/inference.rs
@@ -815,7 +815,9 @@ fn authorize_inference_bundle(
 ) -> Result<(), Status> {
     match principal {
         Some(crate::auth::principal::Principal::Sandbox(_)) => Ok(()),
-        Some(crate::auth::principal::Principal::User(_)) => Err(Status::permission_denied(
+        Some(
+            crate::auth::principal::Principal::User(_) | crate::auth::principal::Principal::Peer(_),
+        ) => Err(Status::permission_denied(
             "GetInferenceBundle requires a sandbox principal",
         )),
         Some(crate::auth::principal::Principal::Anonymous) | None => Err(Status::unauthenticated(
diff --git a/crates/openshell-server/src/lib.rs b/crates/openshell-server/src/lib.rs
index 9f1127d0e..eef6a20b0 100644
--- a/crates/openshell-server/src/lib.rs
+++ b/crates/openshell-server/src/lib.rs
@@ -37,6 +37,7 @@ mod sandbox_index;
 mod sandbox_watch;
 mod service_routing;
 mod ssh_sessions;
+mod supervisor_owner;
 pub mod supervisor_session;
 mod telemetry;
 mod tls;
@@ -114,6 +115,12 @@ pub struct ServerState {
     /// query session state to surface supervisor readiness.
     pub supervisor_sessions: Arc<supervisor_session::SupervisorSessionRegistry>,
 
+    /// Stable identity for this gateway process.
+    pub replica_id: String,
+
+    /// Internal endpoint other gateway replicas can dial for peer RPCs.
+    pub peer_endpoint: Option<String>,
+
     /// OIDC JWKS cache for JWT validation. `None` when OIDC is not configured.
     pub oidc_cache: Option<Arc<auth::oidc::JwksCache>>,
 
@@ -135,6 +142,9 @@ pub struct ServerState {
 
     /// Gateway-wide gRPC request rate limiter shared by every multiplex path.
     pub(crate) grpc_rate_limiter: Option<multiplex::GrpcRateLimiter>,
+
+    /// Optional K8s `ServiceAccount` authenticator for gateway peer RPCs.
+    pub peer_authenticator: Option<Arc<auth::peer::PeerServiceAccountAuthenticator>>,
 }
 
 fn is_benign_tls_handshake_failure(error: &std::io::Error) -> bool {
@@ -167,6 +177,8 @@ impl ServerState {
         supervisor_sessions: Arc<supervisor_session::SupervisorSessionRegistry>,
         oidc_cache: Option<Arc<auth::oidc::JwksCache>>,
     ) -> Self {
+        let replica_id = compute::lease::replica_id();
+        let peer_endpoint = derive_peer_endpoint(&config);
         let grpc_rate_limiter = multiplex::GrpcRateLimiter::from_config(&config);
         Self {
             config,
@@ -180,15 +192,46 @@ impl ServerState {
             ssh_connections_by_sandbox: Mutex::new(HashMap::new()),
             settings_mutex: tokio::sync::Mutex::new(()),
             supervisor_sessions,
+            replica_id,
+            peer_endpoint,
             oidc_cache,
             sandbox_jwt_issuer: None,
             sandbox_jwt_authenticator: None,
             k8s_sa_authenticator: None,
+            peer_authenticator: None,
             grpc_rate_limiter,
         }
     }
 }
 
+fn derive_peer_endpoint(config: &Config) -> Option<String> {
+    if let Ok(endpoint) = std::env::var("OPENSHELL_PEER_ENDPOINT")
+        && !endpoint.trim().is_empty()
+    {
+        return Some(endpoint.trim().to_string());
+    }
+
+    let pod_name = std::env::var("OPENSHELL_POD_NAME").ok()?;
+    let namespace = std::env::var("OPENSHELL_POD_NAMESPACE").ok()?;
+    let service = std::env::var("OPENSHELL_PEER_SERVICE_NAME").ok()?;
+    if pod_name.trim().is_empty() || namespace.trim().is_empty() || service.trim().is_empty() {
+        return None;
+    }
+
+    let scheme = if config.tls.is_some() {
+        "https"
+    } else {
+        "http"
+    };
+    Some(format!(
+        "{scheme}://{pod}.{service}.{namespace}.svc.cluster.local:{port}",
+        pod = pod_name.trim(),
+        service = service.trim(),
+        namespace = namespace.trim(),
+        port = config.bind_address.port()
+    ))
+}
+
 /// Run the `OpenShell` server.
 ///
 /// This starts a multiplexed gRPC/HTTP server on the configured bind address.
@@ -340,6 +383,51 @@ pub async fn run_server(
         }
     }
 
+    if std::env::var_os("KUBERNETES_SERVICE_HOST").is_some() {
+        let namespace = std::env::var("OPENSHELL_POD_NAMESPACE").ok();
+        let service_account = std::env::var("OPENSHELL_SERVICE_ACCOUNT_NAME").ok();
+        match (namespace, service_account) {
+            (Some(namespace), Some(service_account))
+                if !namespace.trim().is_empty() && !service_account.trim().is_empty() =>
+            {
+                let required_labels =
+                    auth::peer::required_pod_labels_from_env().map_err(Error::config)?;
+                match kube::Client::try_default().await {
+                    Ok(client) => {
+                        let audience = auth::peer::peer_token_audience_from_env();
+                        let resolver = Arc::new(auth::peer::LiveGatewayPeerResolver::new(
+                            client,
+                            namespace.trim(),
+                            audience.clone(),
+                            service_account.trim().to_string(),
+                            required_labels,
+                        ));
+                        let authenticator =
+                            auth::peer::PeerServiceAccountAuthenticator::new(resolver);
+                        state.peer_authenticator = Some(Arc::new(authenticator));
+                        info!(
+                            namespace = %namespace.trim(),
+                            service_account = %service_account.trim(),
+                            audience,
+                            "gateway peer ServiceAccount TokenReview authentication enabled"
+                        );
+                    }
+                    Err(err) => warn!(
+                        error = %err,
+                        "in-cluster K8s client construction failed; \
+                         gateway peer ServiceAccount authentication is disabled"
+                    ),
+                }
+            }
+            _ => {
+                debug!(
+                    "OPENSHELL_POD_NAMESPACE or OPENSHELL_SERVICE_ACCOUNT_NAME missing; \
+                     gateway peer ServiceAccount authentication disabled"
+                );
+            }
+        }
+    }
+
     let state = Arc::new(state);
 
     let (shutdown_tx, shutdown_rx) = watch::channel(false);
diff --git a/crates/openshell-server/src/multiplex.rs b/crates/openshell-server/src/multiplex.rs
index f4faa0867..8f5fed831 100644
--- a/crates/openshell-server/src/multiplex.rs
+++ b/crates/openshell-server/src/multiplex.rs
@@ -417,13 +417,16 @@ where
 /// Assemble the authenticator chain for the gateway.
 ///
 /// Chain order (first-match-wins):
-/// 1. `K8sServiceAccountAuthenticator` (path-scoped to `IssueSandboxToken`)
+/// 1. `PeerServiceAccountAuthenticator` (path-scoped to `PeerRelay`)
+///    — validates gateway replica projected `ServiceAccount` tokens with
+///    `TokenReview` for internal peer relay calls. No-op on every other path.
+/// 2. `K8sServiceAccountAuthenticator` (path-scoped to `IssueSandboxToken`)
 ///    — exchanges a projected SA token for a `Principal::Sandbox` so the
 ///    `IssueSandboxToken` handler can mint a gateway JWT. No-op on every
 ///    other path; only present when the gateway runs in-cluster.
-/// 2. `SandboxJwtAuthenticator` — validates gateway-minted JWTs. Recognized
+/// 3. `SandboxJwtAuthenticator` — validates gateway-minted JWTs. Recognized
 ///    via a distinctive `kid` so non-matching Bearer tokens fall through.
-/// 3. `OidcAuthenticator` — validates user Bearer tokens against the
+/// 4. `OidcAuthenticator` — validates user Bearer tokens against the
 ///    configured OIDC issuer. Returns `Unauthenticated` for missing
 ///    Bearer headers so non-OIDC clients can't sneak through.
 ///
@@ -438,6 +441,9 @@ where
 /// to pass-through unless mTLS or local unauthenticated users are enabled.
 fn build_authenticator_chain(state: &ServerState) -> Option<AuthenticatorChain> {
     let mut authenticators: Vec<Arc<dyn crate::auth::authenticator::Authenticator>> = Vec::new();
+    if let Some(peer) = state.peer_authenticator.clone() {
+        authenticators.push(peer);
+    }
     if let Some(k8s) = state.k8s_sa_authenticator.clone() {
         authenticators.push(k8s);
     }
@@ -604,6 +610,13 @@ where
                         )));
                     }
                 }
+                Principal::Peer(_) => {
+                    if !crate::auth::method_authz::is_peer_callable(&path) {
+                        return Ok(status_response(tonic::Status::permission_denied(
+                            "gateway peer principals may not call this method",
+                        )));
+                    }
+                }
                 Principal::Anonymous => {
                     return Ok(status_response(tonic::Status::unauthenticated(
                         "anonymous callers may not call authenticated methods",
diff --git a/crates/openshell-server/src/service_routing.rs b/crates/openshell-server/src/service_routing.rs
index 7ebd6dba9..930c80494 100644
--- a/crates/openshell-server/src/service_routing.rs
+++ b/crates/openshell-server/src/service_routing.rs
@@ -304,17 +304,16 @@ async fn proxy_to_endpoint(
     let websocket_upgrade = is_websocket_upgrade(&req);
     let downstream_upgrade = websocket_upgrade.then(|| hyper::upgrade::on(&mut req));
 
-    let (_channel_id, relay_rx) = state
-        .supervisor_sessions
-        .open_relay_with_target(
-            sandbox.object_id(),
-            relay_open::Target::Tcp(TcpRelayTarget {
-                host: RELAY_TARGET_HOST.to_string(),
-                port: u32::from(target_port),
-            }),
-            endpoint.object_id().to_string(),
-            Duration::from_secs(15),
-        )
+    let (_channel_id, relay_rx) = crate::supervisor_session::open_routed_relay_with_target(
+        &state,
+        sandbox.object_id(),
+        relay_open::Target::Tcp(TcpRelayTarget {
+            host: RELAY_TARGET_HOST.to_string(),
+            port: u32::from(target_port),
+        }),
+        endpoint.object_id().to_string(),
+        Duration::from_secs(15),
+    )
         .await
         .map_err(|err| {
             warn!(error = %err, sandbox_id = %endpoint.sandbox_id, "sandbox service routing: supervisor relay unavailable");
diff --git a/crates/openshell-server/src/supervisor_owner.rs b/crates/openshell-server/src/supervisor_owner.rs
new file mode 100644
index 000000000..5dd33b4db
--- /dev/null
+++ b/crates/openshell-server/src/supervisor_owner.rs
@@ -0,0 +1,343 @@
+// SPDX-FileCopyrightText: Copyright (c) 2025-2026 NVIDIA CORPORATION & AFFILIATES. All rights reserved.
+// SPDX-License-Identifier: Apache-2.0
+
+//! Shared supervisor-session ownership index for HA gateway replicas.
+
+use crate::persistence::{PersistenceError, Store, WriteCondition};
+use openshell_core::time::now_ms;
+use serde::{Deserialize, Serialize};
+use std::sync::Arc;
+use std::time::Duration;
+use thiserror::Error;
+
+const OWNER_OBJECT_TYPE: &str = "supervisor_session_owner";
+
+pub const OWNER_TTL: Duration = Duration::from_secs(45);
+
+fn owner_object_id(sandbox_id: &str) -> String {
+    format!("supervisor-owner:{sandbox_id}")
+}
+
+#[derive(Debug, Error)]
+pub enum OwnerError {
+    #[error("supervisor session is owned by another active gateway replica")]
+    AlreadyOwned,
+    #[error("supervisor owner record CAS conflict")]
+    Conflict,
+    #[error("persistence error: {0}")]
+    Store(#[from] PersistenceError),
+}
+
+#[derive(Debug, Clone, Serialize, Deserialize)]
+struct OwnerPayload {
+    sandbox_id: String,
+    session_id: String,
+    supervisor_instance_id: String,
+    connection_epoch: u64,
+    owner_replica_id: String,
+    owner_peer_endpoint: String,
+    connected_at_ms: i64,
+}
+
+#[derive(Debug, Clone)]
+pub struct OwnerRecord {
+    pub session_id: String,
+    pub supervisor_instance_id: String,
+    pub connection_epoch: u64,
+    pub owner_replica_id: String,
+    pub owner_peer_endpoint: String,
+    #[allow(dead_code)]
+    pub connected_at_ms: i64,
+    pub updated_at_ms: i64,
+    pub resource_version: u64,
+}
+
+#[derive(Debug, Clone)]
+pub struct OwnerGuard {
+    pub sandbox_id: String,
+    pub session_id: String,
+    pub supervisor_instance_id: String,
+    pub connection_epoch: u64,
+    pub owner_replica_id: String,
+    pub owner_peer_endpoint: String,
+    connected_at_ms: i64,
+    resource_version: u64,
+}
+
+pub struct SupervisorOwnerIndex {
+    store: Arc<Store>,
+    ttl: Duration,
+}
+
+impl SupervisorOwnerIndex {
+    pub fn new(store: Arc<Store>, ttl: Duration) -> Self {
+        Self { store, ttl }
+    }
+
+    pub async fn publish(
+        &self,
+        sandbox_id: &str,
+        session_id: &str,
+        supervisor_instance_id: &str,
+        connection_epoch: u64,
+        owner_replica_id: &str,
+        owner_peer_endpoint: &str,
+    ) -> Result<OwnerGuard, OwnerError> {
+        let connected_at_ms = now_ms();
+        let payload = OwnerPayload {
+            sandbox_id: sandbox_id.to_string(),
+            session_id: session_id.to_string(),
+            supervisor_instance_id: supervisor_instance_id.to_string(),
+            connection_epoch,
+            owner_replica_id: owner_replica_id.to_string(),
+            owner_peer_endpoint: owner_peer_endpoint.to_string(),
+            connected_at_ms,
+        };
+
+        let condition = match self.read(sandbox_id).await? {
+            None => WriteCondition::MustCreate,
+            Some(existing)
+                if can_supersede(
+                    &existing,
+                    supervisor_instance_id,
+                    connection_epoch,
+                    self.ttl,
+                ) =>
+            {
+                WriteCondition::MatchResourceVersion(existing.resource_version)
+            }
+            Some(_) => return Err(OwnerError::AlreadyOwned),
+        };
+
+        let result = self.write_payload(sandbox_id, &payload, condition).await?;
+        Ok(OwnerGuard {
+            sandbox_id: sandbox_id.to_string(),
+            session_id: session_id.to_string(),
+            supervisor_instance_id: supervisor_instance_id.to_string(),
+            connection_epoch,
+            owner_replica_id: owner_replica_id.to_string(),
+            owner_peer_endpoint: owner_peer_endpoint.to_string(),
+            connected_at_ms,
+            resource_version: result.resource_version,
+        })
+    }
+
+    pub async fn renew(&self, guard: &mut OwnerGuard) -> Result<(), OwnerError> {
+        let payload = OwnerPayload {
+            sandbox_id: guard.sandbox_id.clone(),
+            session_id: guard.session_id.clone(),
+            supervisor_instance_id: guard.supervisor_instance_id.clone(),
+            connection_epoch: guard.connection_epoch,
+            owner_replica_id: guard.owner_replica_id.clone(),
+            owner_peer_endpoint: guard.owner_peer_endpoint.clone(),
+            connected_at_ms: guard.connected_at_ms,
+        };
+
+        match self
+            .write_payload(
+                &guard.sandbox_id,
+                &payload,
+                WriteCondition::MatchResourceVersion(guard.resource_version),
+            )
+            .await
+        {
+            Ok(result) => {
+                guard.resource_version = result.resource_version;
+                Ok(())
+            }
+            Err(OwnerError::Store(PersistenceError::Conflict { .. })) => Err(OwnerError::Conflict),
+            Err(err) => Err(err),
+        }
+    }
+
+    pub async fn release_if_current(&self, guard: &OwnerGuard) -> Result<(), OwnerError> {
+        let Some(record) = self.read(&guard.sandbox_id).await? else {
+            return Ok(());
+        };
+        if record.session_id != guard.session_id
+            || record.owner_replica_id != guard.owner_replica_id
+        {
+            return Ok(());
+        }
+        match self
+            .store
+            .delete_if(
+                OWNER_OBJECT_TYPE,
+                &owner_object_id(&guard.sandbox_id),
+                record.resource_version,
+            )
+            .await
+        {
+            Ok(_) => Ok(()),
+            Err(PersistenceError::Conflict { .. }) => Err(OwnerError::Conflict),
+            Err(err) => Err(OwnerError::Store(err)),
+        }
+    }
+
+    pub async fn read(&self, sandbox_id: &str) -> Result<Option<OwnerRecord>, OwnerError> {
+        let Some(record) = self
+            .store
+            .get(OWNER_OBJECT_TYPE, &owner_object_id(sandbox_id))
+            .await
+            .map_err(OwnerError::Store)?
+        else {
+            return Ok(None);
+        };
+
+        let payload: OwnerPayload = serde_json::from_slice(&record.payload)
+            .map_err(|err| PersistenceError::Decode(err.to_string()))?;
+        Ok(Some(OwnerRecord {
+            session_id: payload.session_id,
+            supervisor_instance_id: payload.supervisor_instance_id,
+            connection_epoch: payload.connection_epoch,
+            owner_replica_id: payload.owner_replica_id,
+            owner_peer_endpoint: payload.owner_peer_endpoint,
+            connected_at_ms: payload.connected_at_ms,
+            updated_at_ms: record.updated_at_ms,
+            resource_version: record.resource_version,
+        }))
+    }
+
+    async fn write_payload(
+        &self,
+        sandbox_id: &str,
+        payload: &OwnerPayload,
+        condition: WriteCondition,
+    ) -> Result<crate::persistence::WriteResult, OwnerError> {
+        let payload_bytes =
+            serde_json::to_vec(payload).map_err(|err| PersistenceError::Encode(err.to_string()));
+        let payload_bytes = payload_bytes.map_err(OwnerError::Store)?;
+        match self
+            .store
+            .put_if(
+                OWNER_OBJECT_TYPE,
+                &owner_object_id(sandbox_id),
+                sandbox_id,
+                &payload_bytes,
+                None,
+                condition,
+            )
+            .await
+        {
+            Ok(result) => Ok(result),
+            Err(PersistenceError::UniqueViolation { .. }) => Err(OwnerError::AlreadyOwned),
+            Err(PersistenceError::Conflict { .. }) => Err(OwnerError::Conflict),
+            Err(err) => Err(OwnerError::Store(err)),
+        }
+    }
+}
+
+fn can_supersede(
+    existing: &OwnerRecord,
+    supervisor_instance_id: &str,
+    connection_epoch: u64,
+    ttl: Duration,
+) -> bool {
+    let age_ms = now_ms() - existing.updated_at_ms;
+    let ttl_ms = i64::try_from(ttl.as_millis()).unwrap_or(i64::MAX);
+    if age_ms >= ttl_ms {
+        return true;
+    }
+
+    existing.supervisor_instance_id == supervisor_instance_id
+        && connection_epoch > existing.connection_epoch
+}
+
+#[cfg(test)]
+mod tests {
+    use super::*;
+
+    async fn test_index(ttl: Duration) -> SupervisorOwnerIndex {
+        let store = Arc::new(crate::persistence::test_store().await);
+        SupervisorOwnerIndex::new(store, ttl)
+    }
+
+    #[tokio::test]
+    async fn publish_creates_owner() {
+        let index = test_index(OWNER_TTL).await;
+        let guard = index
+            .publish("sbx", "s1", "inst", 1, "gw-1", "http://gw-1")
+            .await
+            .unwrap();
+        let record = index.read("sbx").await.unwrap().unwrap();
+        assert_eq!(record.session_id, guard.session_id);
+        assert_eq!(record.owner_replica_id, "gw-1");
+    }
+
+    #[tokio::test]
+    async fn publish_does_not_collide_with_sandbox_object_id() {
+        let index = test_index(OWNER_TTL).await;
+        index
+            .store
+            .put("sandbox", "sbx", "sandbox-a", br"{}", None)
+            .await
+            .unwrap();
+
+        index
+            .publish("sbx", "s1", "inst", 1, "gw-1", "http://gw-1")
+            .await
+            .unwrap();
+
+        let record = index.read("sbx").await.unwrap().unwrap();
+        assert_eq!(record.session_id, "s1");
+        assert_eq!(record.owner_replica_id, "gw-1");
+    }
+
+    #[tokio::test]
+    async fn publish_rejects_active_different_instance() {
+        let index = test_index(OWNER_TTL).await;
+        index
+            .publish("sbx", "s1", "inst-a", 1, "gw-1", "http://gw-1")
+            .await
+            .unwrap();
+        let err = index
+            .publish("sbx", "s2", "inst-b", 1, "gw-2", "http://gw-2")
+            .await
+            .unwrap_err();
+        assert!(matches!(err, OwnerError::AlreadyOwned));
+    }
+
+    #[tokio::test]
+    async fn publish_supersedes_same_instance_higher_epoch() {
+        let index = test_index(OWNER_TTL).await;
+        index
+            .publish("sbx", "s1", "inst", 1, "gw-1", "http://gw-1")
+            .await
+            .unwrap();
+        let guard = index
+            .publish("sbx", "s2", "inst", 2, "gw-2", "http://gw-2")
+            .await
+            .unwrap();
+        let record = index.read("sbx").await.unwrap().unwrap();
+        assert_eq!(record.session_id, guard.session_id);
+        assert_eq!(record.owner_replica_id, "gw-2");
+    }
+
+    #[tokio::test]
+    async fn release_if_current_ignores_stale_guard() {
+        let index = test_index(OWNER_TTL).await;
+        let old = index
+            .publish("sbx", "s1", "inst", 1, "gw-1", "http://gw-1")
+            .await
+            .unwrap();
+        let new = index
+            .publish("sbx", "s2", "inst", 2, "gw-2", "http://gw-2")
+            .await
+            .unwrap();
+        index.release_if_current(&old).await.unwrap();
+        let record = index.read("sbx").await.unwrap().unwrap();
+        assert_eq!(record.session_id, new.session_id);
+    }
+
+    #[tokio::test]
+    async fn renew_updates_resource_version() {
+        let index = test_index(OWNER_TTL).await;
+        let mut guard = index
+            .publish("sbx", "s1", "inst", 1, "gw-1", "http://gw-1")
+            .await
+            .unwrap();
+        let before = guard.resource_version;
+        index.renew(&mut guard).await.unwrap();
+        assert!(guard.resource_version > before);
+    }
+}
diff --git a/crates/openshell-server/src/supervisor_session.rs b/crates/openshell-server/src/supervisor_session.rs
index 4adf9e8b6..4c25facb9 100644
--- a/crates/openshell-server/src/supervisor_session.rs
+++ b/crates/openshell-server/src/supervisor_session.rs
@@ -8,17 +8,21 @@ use std::time::{Duration, Instant};
 
 use tokio::sync::{mpsc, oneshot};
 use tokio_stream::wrappers::ReceiverStream;
+use tonic::metadata::{Ascii, MetadataValue};
+use tonic::transport::{Channel, ClientTlsConfig, Endpoint};
 use tonic::{Request, Response, Status};
 use tracing::{info, warn};
 use uuid::Uuid;
 
 use openshell_core::proto::{
-    GatewayMessage, RelayFrame, RelayInit, RelayOpen, Sandbox, SessionAccepted, SshRelayTarget,
-    SupervisorMessage, gateway_message, relay_open, supervisor_message,
+    GatewayMessage, PeerRelayFrame, PeerRelayInit, RelayFrame, RelayInit, RelayOpen, Sandbox,
+    SessionAccepted, SshRelayTarget, SupervisorMessage, gateway_message, open_shell_client,
+    peer_relay_frame, relay_open, supervisor_message,
 };
 
 use crate::ServerState;
 use crate::auth::principal::Principal;
+use crate::supervisor_owner::{OWNER_TTL, OwnerError, OwnerGuard, SupervisorOwnerIndex};
 
 const HEARTBEAT_INTERVAL_SECS: u32 = 15;
 const RELAY_PENDING_TIMEOUT: Duration = Duration::from_secs(10);
@@ -266,16 +270,36 @@ impl SupervisorSessionRegistry {
         ),
         Status,
     > {
-        let tx = self
-            .wait_for_session(sandbox_id, session_wait_timeout)
-            .await?;
-
         let channel_id = Uuid::new_v4().to_string();
         let relay_open = RelayOpen {
             channel_id: channel_id.clone(),
             target: Some(target),
             service_id,
         };
+        self.open_relay_with_message(sandbox_id, relay_open, session_wait_timeout)
+            .await
+    }
+
+    pub async fn open_relay_with_message(
+        &self,
+        sandbox_id: &str,
+        relay_open: RelayOpen,
+        session_wait_timeout: Duration,
+    ) -> Result<
+        (
+            String,
+            oneshot::Receiver<Result<tokio::io::DuplexStream, Status>>,
+        ),
+        Status,
+    > {
+        if relay_open.channel_id.is_empty() {
+            return Err(Status::invalid_argument("relay channel_id is required"));
+        }
+        let tx = self
+            .wait_for_session(sandbox_id, session_wait_timeout)
+            .await?;
+
+        let channel_id = relay_open.channel_id.clone();
 
         // Register the pending relay before sending RelayOpen to avoid a race.
         // Both caps are checked and the insert happens under a single lock hold
@@ -434,6 +458,18 @@ pub fn spawn_relay_reaper(state: Arc<ServerState>, interval: Duration) {
     });
 }
 
+fn owner_error_to_status(err: OwnerError) -> Status {
+    match err {
+        OwnerError::AlreadyOwned => {
+            Status::unavailable("supervisor session owned by another gateway replica")
+        }
+        OwnerError::Conflict => Status::aborted("supervisor owner record changed concurrently"),
+        OwnerError::Store(err) => {
+            Status::internal(format!("supervisor owner persistence failed: {err}"))
+        }
+    }
+}
+
 async fn require_persisted_sandbox(
     store: &Arc<crate::persistence::Store>,
     sandbox_id: &str,
@@ -566,6 +602,414 @@ pub async fn handle_relay_stream(
     Ok(Response::new(stream))
 }
 
+// ---------------------------------------------------------------------------
+// PeerRelay gRPC handler and client-side forwarding
+// ---------------------------------------------------------------------------
+
+#[derive(Clone)]
+struct PeerAuthInterceptor {
+    bearer: MetadataValue<Ascii>,
+    replica_id: MetadataValue<Ascii>,
+}
+
+impl PeerAuthInterceptor {
+    fn new(token: &str, replica_id: &str) -> Result<Self, Status> {
+        let bearer = MetadataValue::try_from(format!("Bearer {token}"))
+            .map_err(|_| Status::internal("invalid gateway peer SA token header value"))?;
+        let replica_id = MetadataValue::try_from(replica_id.to_string())
+            .map_err(|_| Status::internal("invalid gateway replica id header value"))?;
+        Ok(Self { bearer, replica_id })
+    }
+}
+
+impl tonic::service::Interceptor for PeerAuthInterceptor {
+    fn call(&mut self, mut req: Request<()>) -> Result<Request<()>, Status> {
+        req.metadata_mut()
+            .insert("authorization", self.bearer.clone());
+        req.metadata_mut()
+            .insert("x-openshell-peer-replica", self.replica_id.clone());
+        Ok(req)
+    }
+}
+
+async fn build_peer_channel(endpoint: &str) -> Result<Channel, Status> {
+    let mut ep = Endpoint::from_shared(endpoint.to_string())
+        .map_err(|err| Status::internal(format!("invalid gateway peer endpoint: {err}")))?
+        .connect_timeout(Duration::from_secs(10))
+        .http2_keep_alive_interval(Duration::from_secs(10))
+        .keep_alive_while_idle(true)
+        .keep_alive_timeout(Duration::from_secs(20))
+        .http2_adaptive_window(true);
+
+    if endpoint.starts_with("https://") {
+        ep = ep
+            .tls_config(ClientTlsConfig::new().with_native_roots())
+            .map_err(|err| Status::internal(format!("failed to configure peer TLS: {err}")))?;
+    }
+
+    ep.connect()
+        .await
+        .map_err(|err| Status::unavailable(format!("gateway peer connection failed: {err}")))
+}
+
+pub async fn open_routed_relay_with_target(
+    state: &Arc<ServerState>,
+    sandbox_id: &str,
+    target: relay_open::Target,
+    service_id: String,
+    session_wait_timeout: Duration,
+) -> Result<
+    (
+        String,
+        oneshot::Receiver<Result<tokio::io::DuplexStream, Status>>,
+    ),
+    Status,
+> {
+    let channel_id = Uuid::new_v4().to_string();
+    let relay_open = RelayOpen {
+        channel_id: channel_id.clone(),
+        target: Some(target),
+        service_id,
+    };
+    open_routed_relay_with_message(state, sandbox_id, relay_open, session_wait_timeout).await
+}
+
+pub async fn open_routed_relay_with_message(
+    state: &Arc<ServerState>,
+    sandbox_id: &str,
+    relay_open: RelayOpen,
+    session_wait_timeout: Duration,
+) -> Result<
+    (
+        String,
+        oneshot::Receiver<Result<tokio::io::DuplexStream, Status>>,
+    ),
+    Status,
+> {
+    if state.supervisor_sessions.has_session(sandbox_id) {
+        return state
+            .supervisor_sessions
+            .open_relay_with_message(sandbox_id, relay_open, session_wait_timeout)
+            .await;
+    }
+
+    let deadline = Instant::now() + session_wait_timeout;
+    let mut backoff = SESSION_WAIT_INITIAL_BACKOFF;
+    let owner_index = SupervisorOwnerIndex::new(state.store.clone(), OWNER_TTL);
+    loop {
+        if state.supervisor_sessions.has_session(sandbox_id) {
+            return state
+                .supervisor_sessions
+                .open_relay_with_message(sandbox_id, relay_open, session_wait_timeout)
+                .await;
+        }
+
+        if let Some(owner) = owner_index
+            .read(sandbox_id)
+            .await
+            .map_err(owner_error_to_status)?
+            && owner_is_fresh(&owner)
+        {
+            if owner.owner_replica_id == state.replica_id {
+                warn!(
+                    sandbox_id,
+                    owner_replica_id = %owner.owner_replica_id,
+                    "supervisor owner record points at this replica but no local session is registered; retrying"
+                );
+                if Instant::now() + backoff > deadline {
+                    return Err(Status::unavailable("supervisor session not connected"));
+                }
+                tokio::time::sleep(backoff).await;
+                backoff = (backoff * 2).min(SESSION_WAIT_MAX_BACKOFF);
+                continue;
+            }
+            match open_peer_relay(
+                state,
+                owner.owner_peer_endpoint.clone(),
+                sandbox_id,
+                relay_open.clone(),
+            )
+            .await
+            {
+                Ok(relay) => return Ok(relay),
+                Err(status) => {
+                    warn!(
+                        sandbox_id,
+                        owner_replica_id = %owner.owner_replica_id,
+                        owner_peer_endpoint = %owner.owner_peer_endpoint,
+                        error = %status,
+                        "gateway peer owner relay open failed; retrying until session wait timeout"
+                    );
+                }
+            }
+        }
+
+        if Instant::now() + backoff > deadline {
+            return Err(Status::unavailable("supervisor session not connected"));
+        }
+        tokio::time::sleep(backoff).await;
+        backoff = (backoff * 2).min(SESSION_WAIT_MAX_BACKOFF);
+    }
+}
+
+fn owner_is_fresh(owner: &crate::supervisor_owner::OwnerRecord) -> bool {
+    let age_ms = openshell_core::time::now_ms() - owner.updated_at_ms;
+    let ttl_ms = i64::try_from(OWNER_TTL.as_millis()).unwrap_or(i64::MAX);
+    age_ms < ttl_ms
+}
+
+async fn open_peer_relay(
+    state: &Arc<ServerState>,
+    owner_peer_endpoint: String,
+    sandbox_id: &str,
+    relay_open: RelayOpen,
+) -> Result<
+    (
+        String,
+        oneshot::Receiver<Result<tokio::io::DuplexStream, Status>>,
+    ),
+    Status,
+> {
+    let channel_id = relay_open.channel_id.clone();
+    let (relay_tx, relay_rx) = oneshot::channel();
+    let stream = connect_peer_relay(state, &owner_peer_endpoint, sandbox_id, relay_open).await?;
+    let _ = relay_tx.send(Ok(stream));
+    Ok((channel_id, relay_rx))
+}
+
+async fn connect_peer_relay(
+    state: &Arc<ServerState>,
+    owner_peer_endpoint: &str,
+    sandbox_id: &str,
+    relay_open: RelayOpen,
+) -> Result<tokio::io::DuplexStream, Status> {
+    let token = crate::auth::peer::load_peer_service_account_token_from_env()
+        .map_err(|err| {
+            Status::failed_precondition(format!("gateway peer token load failed: {err}"))
+        })?
+        .ok_or_else(|| {
+            Status::failed_precondition("gateway peer ServiceAccount token is not configured")
+        })?;
+    let channel = build_peer_channel(owner_peer_endpoint).await?;
+    let interceptor = PeerAuthInterceptor::new(&token, &state.replica_id)?;
+    let mut client = open_shell_client::OpenShellClient::with_interceptor(channel, interceptor);
+
+    let (out_tx, out_rx) = mpsc::channel::<PeerRelayFrame>(16);
+    out_tx
+        .send(PeerRelayFrame {
+            payload: Some(peer_relay_frame::Payload::Init(PeerRelayInit {
+                sandbox_id: sandbox_id.to_string(),
+                relay_open: Some(relay_open),
+                requester_replica_id: state.replica_id.clone(),
+            })),
+        })
+        .await
+        .map_err(|_| Status::internal("failed to initialize peer relay stream"))?;
+
+    let response = client
+        .peer_relay(ReceiverStream::new(out_rx))
+        .await
+        .map_err(|err| Status::unavailable(format!("gateway peer relay RPC failed: {err}")))?;
+    let inbound = response.into_inner();
+    let (gateway_stream, bridge_stream) = tokio::io::duplex(64 * 1024);
+    spawn_peer_bridge(bridge_stream, inbound, out_tx, sandbox_id.to_string());
+    Ok(gateway_stream)
+}
+
+pub async fn handle_peer_relay(
+    state: &Arc<ServerState>,
+    request: Request<tonic::Streaming<PeerRelayFrame>>,
+) -> Result<
+    Response<
+        Pin<Box<dyn tokio_stream::Stream<Item = Result<PeerRelayFrame, Status>> + Send + 'static>>,
+    >,
+    Status,
+> {
+    let peer = match request.extensions().get::<Principal>() {
+        Some(Principal::Peer(peer)) => peer.clone(),
+        _ => {
+            return Err(Status::permission_denied(
+                "gateway peer principal is required",
+            ));
+        }
+    };
+    let mut inbound = request.into_inner();
+
+    let first = inbound
+        .message()
+        .await?
+        .ok_or_else(|| Status::invalid_argument("empty PeerRelay stream"))?;
+    let Some(peer_relay_frame::Payload::Init(init)) = first.payload else {
+        return Err(Status::invalid_argument(
+            "first PeerRelayFrame must be init",
+        ));
+    };
+    if init.sandbox_id.is_empty() {
+        return Err(Status::invalid_argument("sandbox_id is required"));
+    }
+    let relay_open = init
+        .relay_open
+        .ok_or_else(|| Status::invalid_argument("relay_open is required"))?;
+    if relay_open.channel_id.is_empty() {
+        return Err(Status::invalid_argument("relay channel_id is required"));
+    }
+
+    info!(
+        sandbox_id = %init.sandbox_id,
+        channel_id = %relay_open.channel_id,
+        requester = %peer.replica_id,
+        "gateway peer relay: opening local supervisor relay"
+    );
+
+    let (channel_id, relay_rx) = state
+        .supervisor_sessions
+        .open_relay_with_message(&init.sandbox_id, relay_open, Duration::from_secs(5))
+        .await?;
+    let supervisor_stream = match tokio::time::timeout(Duration::from_secs(10), relay_rx).await {
+        Ok(Ok(Ok(stream))) => stream,
+        Ok(Ok(Err(status))) => return Err(status),
+        Ok(Err(_)) => return Err(Status::unavailable("relay channel dropped")),
+        Err(_) => return Err(Status::deadline_exceeded("relay open timed out")),
+    };
+
+    let (out_tx, out_rx) = mpsc::channel::<Result<PeerRelayFrame, Status>>(16);
+    spawn_peer_owner_bridge(
+        supervisor_stream,
+        inbound,
+        out_tx,
+        init.sandbox_id,
+        channel_id,
+    );
+    let stream: Pin<
+        Box<dyn tokio_stream::Stream<Item = Result<PeerRelayFrame, Status>> + Send + 'static>,
+    > = Box::pin(ReceiverStream::new(out_rx));
+    Ok(Response::new(stream))
+}
+
+fn spawn_peer_bridge(
+    bridge_stream: tokio::io::DuplexStream,
+    mut inbound: tonic::Streaming<PeerRelayFrame>,
+    out_tx: mpsc::Sender<PeerRelayFrame>,
+    sandbox_id: String,
+) {
+    let (mut read_half, mut write_half) = tokio::io::split(bridge_stream);
+    let sandbox_id_in = sandbox_id.clone();
+    tokio::spawn(async move {
+        loop {
+            match inbound.message().await {
+                Ok(Some(frame)) => {
+                    let Some(peer_relay_frame::Payload::Data(data)) = frame.payload else {
+                        warn!(sandbox_id = %sandbox_id_in, "gateway peer relay: non-data frame after init");
+                        break;
+                    };
+                    if data.is_empty() {
+                        continue;
+                    }
+                    if let Err(err) =
+                        tokio::io::AsyncWriteExt::write_all(&mut write_half, &data).await
+                    {
+                        warn!(sandbox_id = %sandbox_id_in, error = %err, "gateway peer relay: write to duplex failed");
+                        break;
+                    }
+                }
+                Ok(None) => break,
+                Err(err) => {
+                    warn!(sandbox_id = %sandbox_id_in, error = %err, "gateway peer relay: inbound errored");
+                    break;
+                }
+            }
+        }
+        let _ = tokio::io::AsyncWriteExt::shutdown(&mut write_half).await;
+    });
+
+    tokio::spawn(async move {
+        let mut buf = vec![0u8; RELAY_STREAM_CHUNK_SIZE];
+        loop {
+            match tokio::io::AsyncReadExt::read(&mut read_half, &mut buf).await {
+                Ok(0) => break,
+                Ok(n) => {
+                    if out_tx
+                        .send(PeerRelayFrame {
+                            payload: Some(peer_relay_frame::Payload::Data(buf[..n].to_vec())),
+                        })
+                        .await
+                        .is_err()
+                    {
+                        break;
+                    }
+                }
+                Err(err) => {
+                    warn!(sandbox_id = %sandbox_id, error = %err, "gateway peer relay: read from duplex failed");
+                    break;
+                }
+            }
+        }
+    });
+}
+
+fn spawn_peer_owner_bridge(
+    supervisor_stream: tokio::io::DuplexStream,
+    mut inbound: tonic::Streaming<PeerRelayFrame>,
+    out_tx: mpsc::Sender<Result<PeerRelayFrame, Status>>,
+    sandbox_id: String,
+    channel_id: String,
+) {
+    let (mut read_half, mut write_half) = tokio::io::split(supervisor_stream);
+    let sandbox_id_in = sandbox_id.clone();
+    let channel_id_in = channel_id.clone();
+    tokio::spawn(async move {
+        loop {
+            match inbound.message().await {
+                Ok(Some(frame)) => {
+                    let Some(peer_relay_frame::Payload::Data(data)) = frame.payload else {
+                        warn!(sandbox_id = %sandbox_id_in, channel_id = %channel_id_in, "gateway peer relay owner: non-data frame after init");
+                        break;
+                    };
+                    if data.is_empty() {
+                        continue;
+                    }
+                    if let Err(err) =
+                        tokio::io::AsyncWriteExt::write_all(&mut write_half, &data).await
+                    {
+                        warn!(sandbox_id = %sandbox_id_in, channel_id = %channel_id_in, error = %err, "gateway peer relay owner: write to supervisor relay failed");
+                        break;
+                    }
+                }
+                Ok(None) => break,
+                Err(err) => {
+                    warn!(sandbox_id = %sandbox_id_in, channel_id = %channel_id_in, error = %err, "gateway peer relay owner: inbound errored");
+                    break;
+                }
+            }
+        }
+        let _ = tokio::io::AsyncWriteExt::shutdown(&mut write_half).await;
+    });
+
+    tokio::spawn(async move {
+        let mut buf = vec![0u8; RELAY_STREAM_CHUNK_SIZE];
+        loop {
+            match tokio::io::AsyncReadExt::read(&mut read_half, &mut buf).await {
+                Ok(0) => break,
+                Ok(n) => {
+                    if out_tx
+                        .send(Ok(PeerRelayFrame {
+                            payload: Some(peer_relay_frame::Payload::Data(buf[..n].to_vec())),
+                        }))
+                        .await
+                        .is_err()
+                    {
+                        break;
+                    }
+                }
+                Err(err) => {
+                    warn!(sandbox_id = %sandbox_id, channel_id = %channel_id, error = %err, "gateway peer relay owner: read from supervisor relay failed");
+                    break;
+                }
+            }
+        }
+    });
+}
+
 // ---------------------------------------------------------------------------
 // ConnectSupervisor gRPC handler
 // ---------------------------------------------------------------------------
@@ -601,10 +1045,35 @@ pub async fn handle_connect_supervisor(
     require_persisted_sandbox(&state.store, &sandbox_id).await?;
 
     let session_id = Uuid::new_v4().to_string();
+    let owner_peer_endpoint = state.peer_endpoint.clone().unwrap_or_default();
+    if !state.store.is_single_replica() && owner_peer_endpoint.is_empty() {
+        return Err(Status::failed_precondition(
+            "gateway peer endpoint is required for multi-replica supervisor ownership",
+        ));
+    }
+    let owner_peer_endpoint = if owner_peer_endpoint.is_empty() {
+        format!("local://{}", state.replica_id)
+    } else {
+        owner_peer_endpoint
+    };
+    let owner_index = SupervisorOwnerIndex::new(state.store.clone(), OWNER_TTL);
+    let owner_guard = owner_index
+        .publish(
+            &sandbox_id,
+            &session_id,
+            &hello.instance_id,
+            hello.connection_epoch,
+            &state.replica_id,
+            &owner_peer_endpoint,
+        )
+        .await
+        .map_err(owner_error_to_status)?;
     info!(
         sandbox_id = %sandbox_id,
         session_id = %session_id,
         instance_id = %hello.instance_id,
+        connection_epoch = hello.connection_epoch,
+        replica_id = %state.replica_id,
         "supervisor session: accepted"
     );
 
@@ -638,6 +1107,9 @@ pub async fn handle_connect_supervisor(
         state
             .supervisor_sessions
             .remove_if_current(&sandbox_id, &session_id);
+        if let Err(err) = owner_index.release_if_current(&owner_guard).await {
+            warn!(sandbox_id = %sandbox_id, session_id = %session_id, error = %err, "supervisor session: failed to release owner after accept send failure");
+        }
         return Err(Status::internal("failed to send session accepted"));
     }
 
@@ -667,6 +1139,7 @@ pub async fn handle_connect_supervisor(
     let state_clone = Arc::clone(state);
     let sandbox_id_clone = sandbox_id.clone();
     tokio::spawn(async move {
+        let mut owner_guard = owner_guard;
         run_session_loop(
             &state_clone,
             &sandbox_id_clone,
@@ -674,11 +1147,16 @@ pub async fn handle_connect_supervisor(
             &tx,
             &mut inbound,
             shutdown_rx,
+            &mut owner_guard,
         )
         .await;
         let still_ours = state_clone
             .supervisor_sessions
             .remove_if_current(&sandbox_id_clone, &session_id);
+        let owner_index = SupervisorOwnerIndex::new(state_clone.store.clone(), OWNER_TTL);
+        if let Err(err) = owner_index.release_if_current(&owner_guard).await {
+            warn!(sandbox_id = %sandbox_id_clone, session_id = %session_id, error = %err, "supervisor session: failed to release owner record");
+        }
         if still_ours {
             info!(sandbox_id = %sandbox_id_clone, session_id = %session_id, "supervisor session: ended");
             state_clone
@@ -717,6 +1195,7 @@ async fn run_session_loop(
     tx: &mpsc::Sender<GatewayMessage>,
     inbound: &mut tonic::Streaming<SupervisorMessage>,
     mut shutdown_rx: oneshot::Receiver<()>,
+    owner_guard: &mut OwnerGuard,
 ) {
     let heartbeat_interval = Duration::from_secs(u64::from(HEARTBEAT_INTERVAL_SECS));
     let mut heartbeat_timer = tokio::time::interval(heartbeat_interval);
@@ -732,7 +1211,9 @@ async fn run_session_loop(
             msg = inbound.message() => {
                 match msg {
                     Ok(Some(msg)) => {
-                        handle_supervisor_message(state, sandbox_id, session_id, msg);
+                        if !handle_supervisor_message(state, sandbox_id, session_id, msg, owner_guard).await {
+                            break;
+                        }
                     }
                     Ok(None) => {
                         info!(sandbox_id = %sandbox_id, session_id = %session_id, "supervisor session: stream closed by supervisor");
@@ -759,15 +1240,25 @@ async fn run_session_loop(
     }
 }
 
-fn handle_supervisor_message(
+async fn handle_supervisor_message(
     state: &Arc<ServerState>,
     sandbox_id: &str,
     session_id: &str,
     msg: SupervisorMessage,
-) {
+    owner_guard: &mut OwnerGuard,
+) -> bool {
     match msg.payload {
         Some(supervisor_message::Payload::Heartbeat(_)) => {
-            // Heartbeat received — nothing to do for now.
+            let owner_index = SupervisorOwnerIndex::new(state.store.clone(), OWNER_TTL);
+            if let Err(err) = owner_index.renew(owner_guard).await {
+                warn!(
+                    sandbox_id = %sandbox_id,
+                    session_id = %session_id,
+                    error = %err,
+                    "supervisor session: owner renewal failed; closing session"
+                );
+                return false;
+            }
         }
         Some(supervisor_message::Payload::RelayOpenResult(result)) => {
             if result.success {
@@ -808,6 +1299,7 @@ fn handle_supervisor_message(
             );
         }
     }
+    true
 }
 
 // ---------------------------------------------------------------------------
diff --git a/crates/openshell-server/tests/common/mod.rs b/crates/openshell-server/tests/common/mod.rs
index 00228b043..03478b863 100644
--- a/crates/openshell-server/tests/common/mod.rs
+++ b/crates/openshell-server/tests/common/mod.rs
@@ -21,10 +21,10 @@ use openshell_core::proto::{
     GetSandboxConfigResponse, GetSandboxProviderEnvironmentRequest,
     GetSandboxProviderEnvironmentResponse, GetSandboxRequest, HealthRequest, HealthResponse,
     IssueSandboxTokenRequest, IssueSandboxTokenResponse, ListProvidersRequest,
-    ListProvidersResponse, ListSandboxesRequest, ListSandboxesResponse, ProviderResponse,
-    RefreshSandboxTokenRequest, RefreshSandboxTokenResponse, RelayFrame, RevokeSshSessionRequest,
-    RevokeSshSessionResponse, SandboxResponse, SandboxStreamEvent, ServiceStatus,
-    SupervisorMessage, TcpForwardFrame, UpdateProviderRequest, WatchSandboxRequest,
+    ListProvidersResponse, ListSandboxesRequest, ListSandboxesResponse, PeerRelayFrame,
+    ProviderResponse, RefreshSandboxTokenRequest, RefreshSandboxTokenResponse, RelayFrame,
+    RevokeSshSessionRequest, RevokeSshSessionResponse, SandboxResponse, SandboxStreamEvent,
+    ServiceStatus, SupervisorMessage, TcpForwardFrame, UpdateProviderRequest, WatchSandboxRequest,
     open_shell_client::OpenShellClient,
     open_shell_server::{OpenShell, OpenShellServer},
 };
@@ -457,6 +457,15 @@ impl OpenShell for TestOpenShell {
         Err(Status::unimplemented("not implemented in test"))
     }
 
+    type PeerRelayStream = ReceiverStream<Result<PeerRelayFrame, Status>>;
+
+    async fn peer_relay(
+        &self,
+        _request: tonic::Request<tonic::Streaming<PeerRelayFrame>>,
+    ) -> Result<Response<Self::PeerRelayStream>, Status> {
+        Err(Status::unimplemented("not implemented in test"))
+    }
+
     type ForwardTcpStream =
         std::pin::Pin<Box<dyn tokio_stream::Stream<Item = Result<TcpForwardFrame, Status>> + Send>>;
 
diff --git a/crates/openshell-server/tests/supervisor_relay_integration.rs b/crates/openshell-server/tests/supervisor_relay_integration.rs
index dadb8b384..ce4c4bdd3 100644
--- a/crates/openshell-server/tests/supervisor_relay_integration.rs
+++ b/crates/openshell-server/tests/supervisor_relay_integration.rs
@@ -23,7 +23,7 @@ use hyper_util::{
     server::conn::auto::Builder,
 };
 use openshell_core::proto::{
-    GatewayMessage, RelayFrame, RelayInit, SupervisorMessage, TcpForwardFrame,
+    GatewayMessage, PeerRelayFrame, RelayFrame, RelayInit, SupervisorMessage, TcpForwardFrame,
     open_shell_client::OpenShellClient,
     open_shell_server::{OpenShell, OpenShellServer},
 };
@@ -61,6 +61,16 @@ impl OpenShell for RelayGateway {
 
     // ------ unused stubs ------
 
+    type PeerRelayStream =
+        std::pin::Pin<Box<dyn tokio_stream::Stream<Item = Result<PeerRelayFrame, Status>> + Send>>;
+
+    async fn peer_relay(
+        &self,
+        _: tonic::Request<tonic::Streaming<PeerRelayFrame>>,
+    ) -> Result<Response<Self::PeerRelayStream>, Status> {
+        Err(Status::unimplemented("unused"))
+    }
+
     type ConnectSupervisorStream = ReceiverStream<Result<GatewayMessage, Status>>;
     async fn connect_supervisor(
         &self,
diff --git a/deploy/helm/openshell/README.md b/deploy/helm/openshell/README.md
index e6d539592..edfca0553 100644
--- a/deploy/helm/openshell/README.md
+++ b/deploy/helm/openshell/README.md
@@ -109,6 +109,22 @@ Append these flags to any of the PostgreSQL commands above for OpenShift:
 --set securityContext.runAsUser=null
 ```
 
+### High availability
+
+Set `replicaCount` above `1` only with `server.externalDbSecret`; the default
+SQLite database is per pod and cannot coordinate multiple gateway replicas.
+The chart creates a headless peer Service for gateway-to-gateway relay traffic.
+StatefulSet pods use stable pod DNS names through that headless Service.
+Deployment pods advertise their pod IP with `OPENSHELL_PEER_ENDPOINT`, because
+Kubernetes does not assign stable per-pod DNS names to Deployment replicas.
+
+Gateway peer traffic uses Kubernetes ServiceAccount identity. Each gateway pod
+mounts a projected, pod-bound ServiceAccount token with audience
+`openshell-gateway-peer`; receiving replicas validate that token with the
+Kubernetes TokenReview API, verify the live pod UID and Helm selector labels,
+and authorize only the internal `PeerRelay` RPC. The chart does not create or
+accept a shared gateway peer Secret.
+
 ## Secret bootstrap
 
 By default, a pre-install/pre-upgrade hook Job runs `openshell-gateway generate-certs`
diff --git a/deploy/helm/openshell/README.md.gotmpl b/deploy/helm/openshell/README.md.gotmpl
index e246ca67b..3dcdc457f 100644
--- a/deploy/helm/openshell/README.md.gotmpl
+++ b/deploy/helm/openshell/README.md.gotmpl
@@ -109,6 +109,22 @@ Append these flags to any of the PostgreSQL commands above for OpenShift:
 --set securityContext.runAsUser=null
 ```
 
+### High availability
+
+Set `replicaCount` above `1` only with `server.externalDbSecret`; the default
+SQLite database is per pod and cannot coordinate multiple gateway replicas.
+The chart creates a headless peer Service for gateway-to-gateway relay traffic.
+StatefulSet pods use stable pod DNS names through that headless Service.
+Deployment pods advertise their pod IP with `OPENSHELL_PEER_ENDPOINT`, because
+Kubernetes does not assign stable per-pod DNS names to Deployment replicas.
+
+Gateway peer traffic uses Kubernetes ServiceAccount identity. Each gateway pod
+mounts a projected, pod-bound ServiceAccount token with audience
+`openshell-gateway-peer`; receiving replicas validate that token with the
+Kubernetes TokenReview API, verify the live pod UID and Helm selector labels,
+and authorize only the internal `PeerRelay` RPC. The chart does not create or
+accept a shared gateway peer Secret.
+
 ## Secret bootstrap
 
 By default, a pre-install/pre-upgrade hook Job runs `openshell-gateway generate-certs`
diff --git a/deploy/helm/openshell/skaffold.yaml b/deploy/helm/openshell/skaffold.yaml
index d4608da5e..95ebe75d2 100644
--- a/deploy/helm/openshell/skaffold.yaml
+++ b/deploy/helm/openshell/skaffold.yaml
@@ -127,3 +127,26 @@ deploy:
           image.tag: '{{.IMAGE_TAG_openshell_gateway}}'
           supervisor.image.repository: '{{.IMAGE_REPO_openshell_supervisor}}'
           supervisor.image.tag: '{{.IMAGE_TAG_openshell_supervisor}}'
+profiles:
+  - name: high-availability
+    patches:
+      - op: add
+        path: /deploy/helm/releases/0/valuesFiles/-
+        value: ci/values-high-availability.yaml
+  - name: ha-envoy
+    patches:
+      - op: add
+        path: /deploy/helm/releases/0
+        value:
+          name: envoy-gateway
+          remoteChart: oci://docker.io/envoyproxy/gateway-helm
+          version: v1.7.2
+          namespace: envoy-gateway-system
+          createNamespace: true
+          wait: true
+      - op: add
+        path: /deploy/helm/releases/1/valuesFiles/-
+        value: ci/values-high-availability.yaml
+      - op: add
+        path: /deploy/helm/releases/1/valuesFiles/-
+        value: ci/values-gateway.yaml
diff --git a/deploy/helm/openshell/templates/_gateway-workload.tpl b/deploy/helm/openshell/templates/_gateway-workload.tpl
index 5931047e5..8cdea6455 100644
--- a/deploy/helm/openshell/templates/_gateway-workload.tpl
+++ b/deploy/helm/openshell/templates/_gateway-workload.tpl
@@ -50,6 +50,36 @@ spec:
         - {{ .Values.server.dbUrl | quote }}
         {{- end }}
       env:
+        - name: OPENSHELL_REPLICA_ID
+          valueFrom:
+            fieldRef:
+              fieldPath: metadata.name
+        - name: OPENSHELL_POD_NAME
+          valueFrom:
+            fieldRef:
+              fieldPath: metadata.name
+        - name: OPENSHELL_POD_NAMESPACE
+          valueFrom:
+            fieldRef:
+              fieldPath: metadata.namespace
+        {{- if eq (include "openshell.workloadKind" .) "deployment" }}
+        - name: OPENSHELL_POD_IP
+          valueFrom:
+            fieldRef:
+              fieldPath: status.podIP
+        - name: OPENSHELL_PEER_ENDPOINT
+          value: {{ printf "%s://$(OPENSHELL_POD_IP):%d" (ternary "http" "https" (default false .Values.server.disableTls)) (int .Values.service.port) | quote }}
+        {{- end }}
+        - name: OPENSHELL_SERVICE_ACCOUNT_NAME
+          value: {{ include "openshell.serviceAccountName" . | quote }}
+        - name: OPENSHELL_PEER_SERVICE_NAME
+          value: {{ include "openshell.peerServiceName" . | quote }}
+        - name: OPENSHELL_PEER_TOKEN_AUDIENCE
+          value: "openshell-gateway-peer"
+        - name: OPENSHELL_PEER_SERVICE_ACCOUNT_TOKEN_FILE
+          value: /var/run/secrets/openshell-peer/token
+        - name: OPENSHELL_PEER_POD_LABELS
+          value: {{ printf "app.kubernetes.io/name=%s,app.kubernetes.io/instance=%s" (include "openshell.name" .) .Release.Name | quote }}
         {{- if .Values.server.externalDbSecret }}
         - name: OPENSHELL_DB_URL
           valueFrom:
@@ -80,6 +110,9 @@ spec:
         - name: sandbox-jwt
           mountPath: /etc/openshell-jwt
           readOnly: true
+        - name: gateway-peer-token
+          mountPath: /var/run/secrets/openshell-peer
+          readOnly: true
         {{- if not .Values.server.disableTls }}
         - name: tls-cert
           mountPath: /etc/openshell-tls/server
@@ -140,6 +173,14 @@ spec:
       secret:
         secretName: {{ include "openshell.sandboxJwtSecretName" . }}
         defaultMode: {{ .Values.server.sandboxJwt.secretDefaultMode | default 0400 }}
+    - name: gateway-peer-token
+      projected:
+        defaultMode: 0400
+        sources:
+          - serviceAccountToken:
+              path: token
+              audience: openshell-gateway-peer
+              expirationSeconds: 3600
     {{- if not .Values.server.disableTls }}
     - name: tls-cert
       secret:
diff --git a/deploy/helm/openshell/templates/_helpers.tpl b/deploy/helm/openshell/templates/_helpers.tpl
index 30c027576..0dff39d67 100644
--- a/deploy/helm/openshell/templates/_helpers.tpl
+++ b/deploy/helm/openshell/templates/_helpers.tpl
@@ -109,6 +109,10 @@ Name of the Secret holding gateway-minted sandbox JWT signing material.
 {{- .Values.server.sandboxJwt.signingSecretName | default (printf "%s-jwt-keys" (include "openshell.fullname" .)) -}}
 {{- end }}
 
+{{- define "openshell.peerServiceName" -}}
+{{- printf "%s-peer" (include "openshell.fullname" .) -}}
+{{- end }}
+
 {{/*
 gRPC endpoint sandbox pods use to call back into the gateway. An explicit
 .Values.server.grpcEndpoint is used verbatim. Otherwise it is derived from
diff --git a/deploy/helm/openshell/templates/clusterrole.yaml b/deploy/helm/openshell/templates/clusterrole.yaml
index 30a192fc3..2f15d923c 100644
--- a/deploy/helm/openshell/templates/clusterrole.yaml
+++ b/deploy/helm/openshell/templates/clusterrole.yaml
@@ -8,8 +8,8 @@ metadata:
   labels:
     {{- include "openshell.labels" . | nindent 4 }}
 rules:
-  # Validate projected sandbox ServiceAccount tokens during the
-  # IssueSandboxToken bootstrap exchange.
+  # Validate projected ServiceAccount tokens during sandbox bootstrap and
+  # internal gateway peer authentication.
   - apiGroups:
       - authentication.k8s.io
     resources:
diff --git a/deploy/helm/openshell/templates/peer-role.yaml b/deploy/helm/openshell/templates/peer-role.yaml
new file mode 100644
index 000000000..59ae7479a
--- /dev/null
+++ b/deploy/helm/openshell/templates/peer-role.yaml
@@ -0,0 +1,35 @@
+# SPDX-FileCopyrightText: Copyright (c) 2025-2026 NVIDIA CORPORATION & AFFILIATES. All rights reserved.
+# SPDX-License-Identifier: Apache-2.0
+apiVersion: rbac.authorization.k8s.io/v1
+kind: Role
+metadata:
+  name: {{ include "openshell.fullname" . }}-peer
+  namespace: {{ .Release.Namespace }}
+  labels:
+    {{- include "openshell.labels" . | nindent 4 }}
+rules:
+  # Gateway peer identity: TokenReview authenticates the projected token, then
+  # the receiver resolves the returned pod name and UID to the live gateway pod
+  # in the release namespace.
+  - apiGroups:
+      - ""
+    resources:
+      - pods
+    verbs:
+      - get
+---
+apiVersion: rbac.authorization.k8s.io/v1
+kind: RoleBinding
+metadata:
+  name: {{ include "openshell.fullname" . }}-peer
+  namespace: {{ .Release.Namespace }}
+  labels:
+    {{- include "openshell.labels" . | nindent 4 }}
+roleRef:
+  apiGroup: rbac.authorization.k8s.io
+  kind: Role
+  name: {{ include "openshell.fullname" . }}-peer
+subjects:
+  - kind: ServiceAccount
+    name: {{ include "openshell.serviceAccountName" . }}
+    namespace: {{ .Release.Namespace }}
diff --git a/deploy/helm/openshell/templates/peer-service.yaml b/deploy/helm/openshell/templates/peer-service.yaml
new file mode 100644
index 000000000..f5d93af24
--- /dev/null
+++ b/deploy/helm/openshell/templates/peer-service.yaml
@@ -0,0 +1,19 @@
+# SPDX-FileCopyrightText: Copyright (c) 2025-2026 NVIDIA CORPORATION & AFFILIATES. All rights reserved.
+# SPDX-License-Identifier: Apache-2.0
+apiVersion: v1
+kind: Service
+metadata:
+  name: {{ include "openshell.peerServiceName" . }}
+  labels:
+    {{- include "openshell.labels" . | nindent 4 }}
+spec:
+  clusterIP: None
+  publishNotReadyAddresses: true
+  ports:
+    - port: {{ .Values.service.port }}
+      targetPort: grpc
+      protocol: TCP
+      name: grpc
+      appProtocol: grpc
+  selector:
+    {{- include "openshell.selectorLabels" . | nindent 4 }}
diff --git a/deploy/helm/openshell/templates/statefulset.yaml b/deploy/helm/openshell/templates/statefulset.yaml
index 30571f80b..10d0839f6 100644
--- a/deploy/helm/openshell/templates/statefulset.yaml
+++ b/deploy/helm/openshell/templates/statefulset.yaml
@@ -9,7 +9,7 @@ metadata:
   labels:
     {{- include "openshell.labels" . | nindent 4 }}
 spec:
-  serviceName: {{ include "openshell.fullname" . }}
+  serviceName: {{ include "openshell.peerServiceName" . }}
   replicas: {{ .Values.replicaCount }}
   selector:
     matchLabels:
diff --git a/deploy/helm/openshell/tests/gateway_config_test.yaml b/deploy/helm/openshell/tests/gateway_config_test.yaml
index c2708a20f..856e2d25f 100644
--- a/deploy/helm/openshell/tests/gateway_config_test.yaml
+++ b/deploy/helm/openshell/tests/gateway_config_test.yaml
@@ -5,6 +5,8 @@ suite: gateway TOML config shape
 templates:
   - templates/gateway-config.yaml
   - templates/deployment.yaml
+  - templates/peer-role.yaml
+  - templates/peer-service.yaml
   - templates/statefulset.yaml
 release:
   name: openshell
@@ -50,16 +52,16 @@ tests:
       server.oidc.caConfigMapName: openshell-oidc-ca
     asserts:
       - equal:
-          path: spec.template.spec.containers[0].volumeMounts[3].name
+          path: spec.template.spec.containers[0].volumeMounts[4].name
           value: oidc-ca
       - equal:
-          path: spec.template.spec.containers[0].volumeMounts[3].mountPath
+          path: spec.template.spec.containers[0].volumeMounts[4].mountPath
           value: /etc/openshell-tls/oidc-ca
       - equal:
-          path: spec.template.spec.volumes[2].name
+          path: spec.template.spec.volumes[3].name
           value: oidc-ca
       - equal:
-          path: spec.template.spec.volumes[2].configMap.name
+          path: spec.template.spec.volumes[3].configMap.name
           value: openshell-oidc-ca
 
   # Regression for the P1 bug Drew flagged: grpc_endpoint MUST live in the
@@ -252,6 +254,102 @@ tests:
           path: spec.template.spec.containers[0].args
           content: "sqlite:/var/openshell/openshell.db"
 
+  - it: configures gateway peer identity and projected peer token
+    template: templates/statefulset.yaml
+    asserts:
+      - equal:
+          path: spec.serviceName
+          value: openshell-peer
+      - contains:
+          path: spec.template.spec.containers[0].env
+          content:
+            name: OPENSHELL_PEER_SERVICE_NAME
+            value: openshell-peer
+      - contains:
+          path: spec.template.spec.containers[0].env
+          content:
+            name: OPENSHELL_PEER_TOKEN_AUDIENCE
+            value: openshell-gateway-peer
+      - contains:
+          path: spec.template.spec.containers[0].volumeMounts
+          content:
+            name: gateway-peer-token
+            mountPath: /var/run/secrets/openshell-peer
+            readOnly: true
+      - equal:
+          path: spec.template.spec.volumes[2].projected.sources[0].serviceAccountToken.audience
+          value: openshell-gateway-peer
+
+  - it: configures gateway peer identity and projected peer token for Deployment
+    template: templates/deployment.yaml
+    set:
+      workload.kind: deployment
+      server.externalDbSecret: my-pg-secret
+      server.disableTls: true
+    asserts:
+      - contains:
+          path: spec.template.spec.containers[0].env
+          content:
+            name: OPENSHELL_POD_IP
+            valueFrom:
+              fieldRef:
+                fieldPath: status.podIP
+      - contains:
+          path: spec.template.spec.containers[0].env
+          content:
+            name: OPENSHELL_PEER_ENDPOINT
+            value: http://$(OPENSHELL_POD_IP):8080
+      - contains:
+          path: spec.template.spec.containers[0].env
+          content:
+            name: OPENSHELL_PEER_SERVICE_NAME
+            value: openshell-peer
+      - contains:
+          path: spec.template.spec.containers[0].env
+          content:
+            name: OPENSHELL_PEER_TOKEN_AUDIENCE
+            value: openshell-gateway-peer
+      - contains:
+          path: spec.template.spec.containers[0].volumeMounts
+          content:
+            name: gateway-peer-token
+            mountPath: /var/run/secrets/openshell-peer
+            readOnly: true
+      - equal:
+          path: spec.template.spec.volumes[2].projected.sources[0].serviceAccountToken.audience
+          value: openshell-gateway-peer
+
+  - it: renders headless gateway peer service
+    template: templates/peer-service.yaml
+    asserts:
+      - equal:
+          path: metadata.name
+          value: openshell-peer
+      - equal:
+          path: spec.clusterIP
+          value: None
+      - equal:
+          path: spec.publishNotReadyAddresses
+          value: true
+
+  - it: grants release-namespace pod lookup for gateway peer identity validation
+    template: templates/peer-role.yaml
+    asserts:
+      - hasDocuments:
+          count: 2
+      - equal:
+          path: metadata.namespace
+          value: my-namespace
+        documentIndex: 0
+      - equal:
+          path: rules[0].resources[0]
+          value: pods
+        documentIndex: 0
+      - equal:
+          path: subjects[0].name
+          value: openshell
+        documentIndex: 1
+
   - it: fails when legacy postgres.enabled is set
     template: templates/statefulset.yaml
     set:
diff --git a/deploy/helm/openshell/tests/statefulset_client_ca_test.yaml b/deploy/helm/openshell/tests/statefulset_client_ca_test.yaml
index a7b02310c..b1ccc5623 100644
--- a/deploy/helm/openshell/tests/statefulset_client_ca_test.yaml
+++ b/deploy/helm/openshell/tests/statefulset_client_ca_test.yaml
@@ -17,13 +17,13 @@ tests:
       certManager.enabled: false
     asserts:
       - equal:
-          path: spec.template.spec.volumes[3].name
+          path: spec.template.spec.volumes[4].name
           value: tls-client-ca
       - equal:
-          path: spec.template.spec.volumes[3].secret.secretName
+          path: spec.template.spec.volumes[4].secret.secretName
           value: openshell-server-tls
       - equal:
-          path: spec.template.spec.volumes[3].secret.items[0].key
+          path: spec.template.spec.volumes[4].secret.items[0].key
           value: ca.crt
 
   - it: shares the cert-manager server TLS ca.crt when clientCaFromServerTlsSecret is true
@@ -33,13 +33,13 @@ tests:
       certManager.clientCaFromServerTlsSecret: true
     asserts:
       - equal:
-          path: spec.template.spec.volumes[3].name
+          path: spec.template.spec.volumes[4].name
           value: tls-client-ca
       - equal:
-          path: spec.template.spec.volumes[3].secret.secretName
+          path: spec.template.spec.volumes[4].secret.secretName
           value: openshell-server-tls
       - equal:
-          path: spec.template.spec.volumes[3].secret.items[0].key
+          path: spec.template.spec.volumes[4].secret.items[0].key
           value: ca.crt
 
   # Regression: with cert-manager enabled and pkiInitJob left at its default
@@ -55,13 +55,13 @@ tests:
       pkiInitJob.enabled: true
     asserts:
       - equal:
-          path: spec.template.spec.volumes[3].name
+          path: spec.template.spec.volumes[4].name
           value: tls-client-ca
       - equal:
-          path: spec.template.spec.volumes[3].secret.secretName
+          path: spec.template.spec.volumes[4].secret.secretName
           value: openshell-server-client-ca
       - notExists:
-          path: spec.template.spec.volumes[3].secret.items
+          path: spec.template.spec.volumes[4].secret.items
 
   # When cert-manager owns TLS, does not share its CA, and no separate client CA
   # secret is configured, there is no client CA to mount: the volume must not
@@ -76,7 +76,7 @@ tests:
     asserts:
       - lengthEqual:
           path: spec.template.spec.volumes
-          count: 3
+          count: 4
       - notContains:
           path: spec.template.spec.containers[0].volumeMounts
           content:
diff --git a/deploy/kube/manifests/envoy-gateway-openshell.yaml b/deploy/kube/manifests/envoy-gateway-openshell.yaml
index 583f2b41b..68d1ea7d7 100644
--- a/deploy/kube/manifests/envoy-gateway-openshell.yaml
+++ b/deploy/kube/manifests/envoy-gateway-openshell.yaml
@@ -15,3 +15,24 @@ metadata:
   name: eg
 spec:
   controllerName: gateway.envoyproxy.io/gatewayclass-controller
+---
+# OpenShell gRPC streams can remain active across sandbox create, exec, relay,
+# and watch operations. Disable Envoy's backend request and stream duration
+# timeouts for the OpenShell GRPCRoute so the proxy does not reset long-running
+# HTTP/2 streams while gateway pods rotate behind it.
+apiVersion: gateway.envoyproxy.io/v1alpha1
+kind: BackendTrafficPolicy
+metadata:
+  name: openshell-grpc-timeouts
+  namespace: openshell
+  labels:
+    app.kubernetes.io/name: openshell
+spec:
+  targetRefs:
+    - group: gateway.networking.k8s.io
+      kind: GRPCRoute
+      name: openshell
+  timeout:
+    http:
+      requestTimeout: 0s
+      maxStreamDuration: 0s
diff --git a/e2e/rust/Cargo.toml b/e2e/rust/Cargo.toml
index 083c622df..1c36048e2 100644
--- a/e2e/rust/Cargo.toml
+++ b/e2e/rust/Cargo.toml
@@ -68,8 +68,8 @@ path = "tests/vm_gateway_resume.rs"
 required-features = ["e2e-vm"]
 
 [[test]]
-name = "readyz_health"
-path = "tests/readyz_health.rs"
+name = "kubernetes_ha_rebalancing"
+path = "tests/kubernetes_ha_rebalancing.rs"
 required-features = ["e2e-kubernetes"]
 
 [[test]]
diff --git a/e2e/rust/tests/kubernetes_ha_rebalancing.rs b/e2e/rust/tests/kubernetes_ha_rebalancing.rs
new file mode 100644
index 000000000..8d241c29e
--- /dev/null
+++ b/e2e/rust/tests/kubernetes_ha_rebalancing.rs
@@ -0,0 +1,636 @@
+// SPDX-FileCopyrightText: Copyright (c) 2025-2026 NVIDIA CORPORATION & AFFILIATES. All rights reserved.
+// SPDX-License-Identifier: Apache-2.0
+
+#![cfg(feature = "e2e-kubernetes")]
+
+use std::fs;
+use std::io::Write;
+use std::path::Path;
+use std::process::Stdio;
+use std::time::{Duration, Instant};
+
+use openshell_e2e::harness::binary::openshell_cmd;
+use openshell_e2e::harness::output::strip_ansi;
+use openshell_e2e::harness::port::{find_free_port, wait_for_port};
+use openshell_e2e::harness::sandbox::SandboxGuard;
+use serde_json::Value;
+use sha2::{Digest, Sha256};
+use tokio::process::{Child, Command};
+
+static KUBE_HA_TEST_LOCK: tokio::sync::Mutex<()> = tokio::sync::Mutex::const_new(());
+
+const HA_SYNC_PAYLOAD_BYTES: usize = 32 * 1024 * 1024;
+const HA_SYNC_TIMEOUT: Duration = Duration::from_secs(600);
+
+#[derive(Clone)]
+struct KubeTarget {
+    context: String,
+    namespace: String,
+    release: String,
+}
+
+impl KubeTarget {
+    fn from_env() -> Self {
+        Self {
+            context: required_env("OPENSHELL_E2E_KUBE_CONTEXT"),
+            namespace: std::env::var("OPENSHELL_E2E_KUBE_NAMESPACE")
+                .unwrap_or_else(|_| "openshell".to_string()),
+            release: std::env::var("OPENSHELL_E2E_KUBE_RELEASE")
+                .unwrap_or_else(|_| "openshell".to_string()),
+        }
+    }
+
+    async fn kubectl(&self, args: &[&str]) -> Result<String, String> {
+        let output = Command::new("kubectl")
+            .arg("--context")
+            .arg(&self.context)
+            .args(args)
+            .stdout(Stdio::piped())
+            .stderr(Stdio::piped())
+            .output()
+            .await
+            .map_err(|err| format!("failed to spawn kubectl {args:?}: {err}"))?;
+
+        let combined = format!(
+            "{}{}",
+            String::from_utf8_lossy(&output.stdout),
+            String::from_utf8_lossy(&output.stderr)
+        );
+
+        if !output.status.success() {
+            return Err(format!(
+                "kubectl {args:?} failed with exit {:?}:\n{combined}",
+                output.status.code()
+            ));
+        }
+
+        Ok(combined)
+    }
+
+    async fn scale_gateway(&self, replicas: usize) -> Result<(), String> {
+        let resource = self.gateway_workload_resource().await?;
+        let replicas_arg = replicas.to_string();
+
+        self.kubectl(&[
+            "-n",
+            &self.namespace,
+            "scale",
+            &resource,
+            "--replicas",
+            &replicas_arg,
+        ])
+        .await?;
+        self.kubectl(&[
+            "-n",
+            &self.namespace,
+            "rollout",
+            "status",
+            &resource,
+            "--timeout=180s",
+        ])
+        .await?;
+        Ok(())
+    }
+
+    async fn gateway_workload_resource(&self) -> Result<String, String> {
+        let deployment = format!("deployment/{}", self.release);
+        if self
+            .kubectl(&["-n", &self.namespace, "get", &deployment])
+            .await
+            .is_ok()
+        {
+            return Ok(deployment);
+        }
+
+        let statefulset = format!("statefulset/{}", self.release);
+        if self
+            .kubectl(&["-n", &self.namespace, "get", &statefulset])
+            .await
+            .is_ok()
+        {
+            return Ok(statefulset);
+        }
+
+        Err(format!(
+            "no gateway Deployment or StatefulSet named {} found in namespace {}",
+            self.release, self.namespace
+        ))
+    }
+
+    async fn delete_gateway_pod(&self, pod: &str) -> Result<(), String> {
+        self.kubectl(&[
+            "-n",
+            &self.namespace,
+            "delete",
+            "pod",
+            pod,
+            "--wait=true",
+            "--timeout=90s",
+        ])
+        .await?;
+        Ok(())
+    }
+
+    async fn roll_gateway_pods(
+        &self,
+        pods: Vec<String>,
+        expected: usize,
+    ) -> Result<(), String> {
+        for pod in pods {
+            self.delete_gateway_pod(&pod).await?;
+            self.wait_for_gateway_pods(expected).await?;
+        }
+        Ok(())
+    }
+
+    async fn wait_for_gateway_pods(&self, expected: usize) -> Result<Vec<String>, String> {
+        let deadline = Instant::now() + Duration::from_secs(240);
+        let mut last = String::new();
+
+        while Instant::now() < deadline {
+            match self.gateway_pods().await {
+                Ok(pods) => {
+                    if pods.len() == expected && pods.iter().all(|pod| pod.ready) {
+                        return Ok(pods.into_iter().map(|pod| pod.name).collect());
+                    }
+                    last = format!(
+                        "pods={:?}",
+                        pods.iter()
+                            .map(|pod| format!("{} ready={}", pod.name, pod.ready))
+                            .collect::<Vec<_>>()
+                    );
+                }
+                Err(err) => last = err,
+            }
+            tokio::time::sleep(Duration::from_secs(2)).await;
+        }
+
+        Err(format!(
+            "gateway pods did not reach expected ready count {expected} within 240s; last={last}"
+        ))
+    }
+
+    async fn gateway_pods(&self) -> Result<Vec<GatewayPod>, String> {
+        let selector = format!("app.kubernetes.io/instance={}", self.release);
+        let json = self
+            .kubectl(&[
+                "-n",
+                &self.namespace,
+                "get",
+                "pods",
+                "-l",
+                &selector,
+                "-o",
+                "json",
+            ])
+            .await?;
+        let value = serde_json::from_str::<Value>(&json)
+            .map_err(|err| format!("failed to parse gateway pod JSON: {err}\n{json}"))?;
+        let items = value["items"]
+            .as_array()
+            .ok_or_else(|| format!("gateway pod JSON missing items array: {value}"))?;
+
+        let mut pods = Vec::new();
+        for item in items {
+            if !item["metadata"]["deletionTimestamp"].is_null() {
+                continue;
+            }
+            let Some(name) = item["metadata"]["name"].as_str() else {
+                continue;
+            };
+            let ready = item["status"]["conditions"]
+                .as_array()
+                .is_some_and(|conditions| {
+                    conditions.iter().any(|condition| {
+                    condition["type"].as_str() == Some("Ready")
+                        && condition["status"].as_str() == Some("True")
+                    })
+                });
+            pods.push(GatewayPod {
+                name: name.to_string(),
+                ready,
+            });
+        }
+        pods.sort_by(|a, b| a.name.cmp(&b.name));
+        Ok(pods)
+    }
+}
+
+#[derive(Debug)]
+struct GatewayPod {
+    name: String,
+    ready: bool,
+}
+
+struct PortForward {
+    port: u16,
+    child: Child,
+}
+
+impl PortForward {
+    async fn start(kube: &KubeTarget, pod: &str) -> Result<Self, String> {
+        let port = find_free_port();
+        let mut child = Command::new("kubectl")
+            .arg("--context")
+            .arg(&kube.context)
+            .arg("-n")
+            .arg(&kube.namespace)
+            .arg("port-forward")
+            .arg(format!("pod/{pod}"))
+            .arg(format!("{port}:8080"))
+            .stdout(Stdio::null())
+            .stderr(Stdio::null())
+            .kill_on_drop(true)
+            .spawn()
+            .map_err(|err| format!("failed to start kubectl port-forward for {pod}: {err}"))?;
+
+        match wait_for_port("127.0.0.1", port, Duration::from_secs(30)).await {
+            Ok(()) => Ok(Self { port, child }),
+            Err(err) => {
+                let status = child.try_wait().ok().flatten();
+                let _ = child.kill().await;
+                Err(format!(
+                    "port-forward to {pod} did not become ready on {port}: {err}; status={status:?}"
+                ))
+            }
+        }
+    }
+}
+
+impl Drop for PortForward {
+    fn drop(&mut self) {
+        let _ = self.child.start_kill();
+    }
+}
+
+fn required_env(name: &str) -> String {
+    std::env::var(name).unwrap_or_else(|_| {
+        panic!("{name} is not set; run through e2e/rust/e2e-kubernetes.sh")
+    })
+}
+
+async fn exec_through_pod(
+    kube: &KubeTarget,
+    pod: &str,
+    sandbox_name: &str,
+    marker: &str,
+) -> Result<(), String> {
+    let port_forward = PortForward::start(kube, pod).await?;
+    let endpoint = format!("http://127.0.0.1:{}", port_forward.port);
+
+    let mut cmd = openshell_cmd();
+    cmd.arg("--gateway-endpoint")
+        .arg(&endpoint)
+        .args([
+            "sandbox",
+            "exec",
+            "--name",
+            sandbox_name,
+            "--no-tty",
+            "--",
+            "printf",
+            "%s",
+            marker,
+        ])
+        .stdout(Stdio::piped())
+        .stderr(Stdio::piped());
+    let output = cmd
+        .output()
+        .await
+        .map_err(|err| format!("failed to spawn openshell exec via {pod}: {err}"))?;
+
+    let combined = strip_ansi(&format!(
+        "{}{}",
+        String::from_utf8_lossy(&output.stdout),
+        String::from_utf8_lossy(&output.stderr)
+    ));
+    if !output.status.success() || !combined.contains(marker) {
+        return Err(format!(
+            "exec through {pod} ({endpoint}) failed with exit {:?}; expected marker {marker:?}; output:\n{combined}",
+            output.status.code()
+        ));
+    }
+
+    Ok(())
+}
+
+async fn exec_through_configured_gateway(sandbox_name: &str, marker: &str) -> Result<(), String> {
+    let mut cmd = openshell_cmd();
+    cmd.args([
+        "sandbox",
+        "exec",
+        "--name",
+        sandbox_name,
+        "--no-tty",
+        "--",
+        "printf",
+        "%s",
+        marker,
+    ])
+    .stdout(Stdio::piped())
+    .stderr(Stdio::piped());
+    let output = cmd
+        .output()
+        .await
+        .map_err(|err| format!("failed to spawn openshell exec via configured gateway: {err}"))?;
+
+    let combined = strip_ansi(&format!(
+        "{}{}",
+        String::from_utf8_lossy(&output.stdout),
+        String::from_utf8_lossy(&output.stderr)
+    ));
+    if !output.status.success() || !combined.contains(marker) {
+        return Err(format!(
+            "exec through configured gateway failed with exit {:?}; expected marker {marker:?}; output:\n{combined}",
+            output.status.code()
+        ));
+    }
+
+    Ok(())
+}
+
+async fn create_sandbox_through_configured_gateway(
+    phase: &str,
+) -> Result<SandboxGuard, String> {
+    let marker = format!("ha-create-watch-{phase}");
+    let guard = SandboxGuard::create(&["--", "printf", "%s", &marker]).await?;
+    let output = strip_ansi(&guard.create_output);
+
+    if !output.contains(&marker) {
+        return Err(format!(
+            "sandbox create through configured gateway did not include marker {marker:?}; output:\n{output}"
+        ));
+    }
+
+    Ok(guard)
+}
+
+async fn assert_exec_through_all_pods(
+    kube: &KubeTarget,
+    pods: &[String],
+    sandbox_name: &str,
+    phase: &str,
+) -> Result<(), String> {
+    for pod in pods {
+        let marker = format!("ha-rebalance-{phase}-{pod}");
+        exec_through_pod(kube, pod, sandbox_name, &marker).await?;
+    }
+    Ok(())
+}
+
+fn write_deterministic_payload(path: &Path, size: usize) {
+    let mut file = fs::File::create(path).expect("create HA sync payload");
+    let mut offset = 0usize;
+    let mut remaining = size;
+    let mut buf = vec![0_u8; 64 * 1024];
+
+    while remaining > 0 {
+        let chunk_len = remaining.min(buf.len());
+        for (idx, byte) in buf[..chunk_len].iter_mut().enumerate() {
+            *byte = u8::try_from((offset + idx) % 251).expect("byte value fits");
+        }
+        file.write_all(&buf[..chunk_len])
+            .expect("write HA sync payload chunk");
+        offset += chunk_len;
+        remaining -= chunk_len;
+    }
+}
+
+fn sha256_file(path: &Path) -> String {
+    let data = fs::read(path).expect("read file for SHA-256");
+    let mut hasher = Sha256::new();
+    hasher.update(&data);
+    hex::encode(hasher.finalize())
+}
+
+fn upload_command(sandbox_name: &str, local_path: &Path, dest: &str) -> Command {
+    let mut cmd = openshell_cmd();
+    cmd.arg("sandbox")
+        .arg("upload")
+        .arg(sandbox_name)
+        .arg(local_path)
+        .arg(dest)
+        .arg("--no-git-ignore");
+    cmd
+}
+
+fn download_command(sandbox_name: &str, sandbox_path: &str, local_dest: &Path) -> Command {
+    let mut cmd = openshell_cmd();
+    cmd.arg("sandbox")
+        .arg("download")
+        .arg(sandbox_name)
+        .arg(sandbox_path)
+        .arg(local_dest);
+    cmd
+}
+
+async fn run_cli_during_gateway_pod_roll(
+    kube: &KubeTarget,
+    mut cmd: Command,
+    operation: &str,
+) -> Result<String, String> {
+    let pods = kube.wait_for_gateway_pods(2).await?;
+
+    cmd.stdout(Stdio::piped())
+        .stderr(Stdio::piped())
+        .kill_on_drop(true);
+    let child = cmd
+        .spawn()
+        .map_err(|err| format!("failed to spawn {operation} command: {err}"))?;
+
+    let (roll_result, output_result) = tokio::time::timeout(HA_SYNC_TIMEOUT, async {
+        let roll = async {
+            tokio::time::sleep(Duration::from_millis(250)).await;
+            kube.roll_gateway_pods(pods, 2).await
+        };
+        tokio::join!(roll, child.wait_with_output())
+    })
+    .await
+    .map_err(|_| {
+        format!(
+            "{operation} command and gateway pod roll did not finish within {HA_SYNC_TIMEOUT:?}"
+        )
+    })?;
+
+    roll_result.map_err(|err| {
+        format!("gateway pod roll failed while {operation} command was running: {err}")
+    })?;
+
+    let output =
+        output_result.map_err(|err| format!("failed to wait for {operation} command: {err}"))?;
+    let combined = strip_ansi(&format!(
+        "{}{}",
+        String::from_utf8_lossy(&output.stdout),
+        String::from_utf8_lossy(&output.stderr)
+    ));
+    if !output.status.success() {
+        return Err(format!(
+            "{operation} command failed with exit {:?} during gateway pod roll:\n{combined}",
+            output.status.code()
+        ));
+    }
+
+    Ok(combined)
+}
+
+#[tokio::test]
+async fn sandbox_exec_rebalances_across_gateway_scale_and_rollout() {
+    let _test_lock = KUBE_HA_TEST_LOCK.lock().await;
+    let kube = KubeTarget::from_env();
+
+    let mut pods = kube
+        .wait_for_gateway_pods(2)
+        .await
+        .expect("gateway should start with two ready HA replicas");
+
+    let mut sandbox = create_sandbox_through_configured_gateway("initial")
+        .await
+        .expect("sandbox create and readiness watch should succeed through the configured gateway endpoint initially");
+
+    assert_exec_through_all_pods(&kube, &pods, &sandbox.name, "initial")
+        .await
+        .expect("exec should work through every initial gateway pod");
+    exec_through_configured_gateway(&sandbox.name, "ha-rebalance-client-initial")
+        .await
+        .expect("exec should work through the configured client gateway endpoint initially");
+
+    kube.scale_gateway(3)
+        .await
+        .expect("scale gateway to three replicas");
+    pods = kube
+        .wait_for_gateway_pods(3)
+        .await
+        .expect("gateway should scale to three ready replicas");
+    assert_exec_through_all_pods(&kube, &pods, &sandbox.name, "scale-up")
+        .await
+        .expect("exec should work through every gateway pod after scale-up");
+    exec_through_configured_gateway(&sandbox.name, "ha-rebalance-client-scale-up")
+        .await
+        .expect("exec should work through the configured client gateway endpoint after scale-up");
+    let mut scale_up_sandbox = create_sandbox_through_configured_gateway("scale-up")
+        .await
+        .expect(
+            "sandbox create and readiness watch should succeed through the configured gateway endpoint after scale-up",
+        );
+    scale_up_sandbox.cleanup().await;
+
+    kube.scale_gateway(2)
+        .await
+        .expect("scale gateway back to two replicas");
+    pods = kube
+        .wait_for_gateway_pods(2)
+        .await
+        .expect("gateway should scale back to two ready replicas");
+    assert_exec_through_all_pods(&kube, &pods, &sandbox.name, "scale-down")
+        .await
+        .expect("exec should work through every gateway pod after scale-down");
+    exec_through_configured_gateway(&sandbox.name, "ha-rebalance-client-scale-down")
+        .await
+        .expect("exec should work through the configured client gateway endpoint after scale-down");
+    let mut scale_down_sandbox = create_sandbox_through_configured_gateway("scale-down")
+        .await
+        .expect(
+            "sandbox create and readiness watch should succeed through the configured gateway endpoint after scale-down",
+        );
+    scale_down_sandbox.cleanup().await;
+
+    for (idx, pod) in pods.clone().into_iter().enumerate() {
+        kube.delete_gateway_pod(&pod)
+            .await
+            .unwrap_or_else(|err| panic!("delete gateway pod {pod}: {err}"));
+        pods = kube
+            .wait_for_gateway_pods(2)
+            .await
+            .unwrap_or_else(|err| panic!("gateway pods should recover after deleting {pod}: {err}"));
+        assert_exec_through_all_pods(&kube, &pods, &sandbox.name, &format!("delete-{pod}"))
+            .await
+            .unwrap_or_else(|err| panic!("exec should work after deleting {pod}: {err}"));
+        exec_through_configured_gateway(
+            &sandbox.name,
+            &format!("ha-rebalance-client-delete-{pod}"),
+        )
+        .await
+        .unwrap_or_else(|err| {
+            panic!(
+                "exec should work through the configured client gateway endpoint after deleting {pod}: {err}"
+            )
+        });
+        let mut delete_sandbox =
+            create_sandbox_through_configured_gateway(&format!("delete-{idx}"))
+                .await
+                .unwrap_or_else(|err| {
+                    panic!(
+                        "sandbox create and readiness watch should succeed through the configured gateway endpoint after deleting {pod}: {err}"
+                    )
+                });
+        delete_sandbox.cleanup().await;
+    }
+
+    sandbox.cleanup().await;
+}
+
+#[tokio::test]
+async fn sandbox_file_sync_survives_gateway_pod_rolls() {
+    let _test_lock = KUBE_HA_TEST_LOCK.lock().await;
+    let kube = KubeTarget::from_env();
+
+    kube.scale_gateway(2)
+        .await
+        .expect("gateway should run with two HA replicas for sync outage testing");
+    kube.wait_for_gateway_pods(2)
+        .await
+        .expect("gateway should have two ready replicas before sync outage testing");
+
+    let mut sandbox =
+        SandboxGuard::create_keep(&["sh", "-c", "echo Ready && sleep infinity"], "Ready")
+            .await
+            .expect("sandbox create --keep for HA sync testing");
+
+    let tmpdir = tempfile::tempdir().expect("create HA sync tmpdir");
+    let upload_dir = tmpdir.path().join("ha-sync-upload");
+    fs::create_dir_all(&upload_dir).expect("create HA sync upload dir");
+    fs::write(upload_dir.join("marker.txt"), "ha-sync-marker")
+        .expect("write HA sync marker");
+
+    let payload = upload_dir.join("payload.bin");
+    write_deterministic_payload(&payload, HA_SYNC_PAYLOAD_BYTES);
+    let expected_hash = sha256_file(&payload);
+
+    let upload = upload_command(&sandbox.name, &upload_dir, "/sandbox/ha-sync");
+    run_cli_during_gateway_pod_roll(&kube, upload, "upload")
+        .await
+        .expect("upload should survive rolling gateway pod outages");
+
+    let remote_payload = "/sandbox/ha-sync/ha-sync-upload/payload.bin";
+    let remote_hash_cmd = format!("sha256sum {remote_payload} | awk '{{print $1}}'");
+    let remote_hash = sandbox
+        .exec(&["sh", "-c", &remote_hash_cmd])
+        .await
+        .expect("uploaded payload should be readable in sandbox");
+    assert!(
+        strip_ansi(&remote_hash).contains(&expected_hash),
+        "uploaded payload SHA-256 mismatch; expected {expected_hash}, got:\n{remote_hash}"
+    );
+
+    let download_dir = tmpdir.path().join("ha-sync-download");
+    fs::create_dir_all(&download_dir).expect("create HA sync download dir");
+    let download = download_command(
+        &sandbox.name,
+        "/sandbox/ha-sync/ha-sync-upload",
+        &download_dir,
+    );
+    run_cli_during_gateway_pod_roll(&kube, download, "download")
+        .await
+        .expect("download should survive rolling gateway pod outages");
+
+    let actual_hash = sha256_file(&download_dir.join("payload.bin"));
+    assert_eq!(
+        expected_hash, actual_hash,
+        "downloaded payload SHA-256 mismatch after gateway pod rolls"
+    );
+    let marker = fs::read_to_string(download_dir.join("marker.txt"))
+        .expect("read downloaded HA sync marker");
+    assert_eq!(marker, "ha-sync-marker", "downloaded marker mismatch");
+
+    sandbox.cleanup().await;
+}
diff --git a/e2e/rust/tests/readyz_health.rs b/e2e/rust/tests/readyz_health.rs
deleted file mode 100644
index 8f093dabe..000000000
--- a/e2e/rust/tests/readyz_health.rs
+++ /dev/null
@@ -1,95 +0,0 @@
-// SPDX-FileCopyrightText: Copyright (c) 2025-2026 NVIDIA CORPORATION & AFFILIATES. All rights reserved.
-// SPDX-License-Identifier: Apache-2.0
-
-#![cfg(feature = "e2e-kubernetes")]
-
-use bytes::Bytes;
-use http_body_util::{BodyExt, Empty};
-use hyper::Request;
-use hyper_util::rt::TokioIo;
-use serde_json::Value;
-use std::time::{Duration, Instant};
-use tokio::net::TcpStream;
-
-fn health_port_from_env() -> u16 {
-    let raw = std::env::var("OPENSHELL_E2E_HEALTH_PORT").unwrap_or_else(|_| {
-        panic!(
-            "OPENSHELL_E2E_HEALTH_PORT is not set. The Kubernetes e2e wrapper \
-             (e2e/with-kube-gateway.sh) must export this variable so the \
-             /readyz test can reach the gateway health listener."
-        )
-    });
-    raw.parse::<u16>().unwrap_or_else(|err| {
-        panic!("OPENSHELL_E2E_HEALTH_PORT=\"{raw}\" is not a valid u16 port: {err}")
-    })
-}
-
-async fn http_get_json(port: u16, path: &str) -> Result<(u16, Value), String> {
-    let stream = TcpStream::connect(("127.0.0.1", port))
-        .await
-        .map_err(|err| format!("connect health endpoint :{port}: {err}"))?;
-    let (mut sender, conn) = hyper::client::conn::http1::Builder::new()
-        .handshake(TokioIo::new(stream))
-        .await
-        .map_err(|err| format!("handshake health HTTP/1 client :{port}: {err}"))?;
-    tokio::spawn(async move {
-        let _ = conn.await;
-    });
-
-    let req = Request::builder()
-        .method("GET")
-        .uri(format!("http://127.0.0.1:{port}{path}"))
-        .body(Empty::<Bytes>::new())
-        .map_err(|err| format!("build health request {path}: {err}"))?;
-    let resp = sender
-        .send_request(req)
-        .await
-        .map_err(|err| format!("send health request {path} to :{port}: {err}"))?;
-    let status_code = resp.status().as_u16();
-    let bytes = resp
-        .into_body()
-        .collect()
-        .await
-        .map_err(|err| format!("read health response body {path}: {err}"))?
-        .to_bytes();
-    let json = serde_json::from_slice::<Value>(&bytes)
-        .map_err(|err| format!("health endpoint {path} did not return valid JSON: {err}"))?;
-
-    Ok((status_code, json))
-}
-
-#[tokio::test]
-async fn readyz_reports_healthy_database_check() {
-    let port = health_port_from_env();
-
-    let deadline = Instant::now() + Duration::from_secs(20);
-    let timeout_detail = loop {
-        let observation = match http_get_json(port, "/readyz").await {
-            Ok((status, payload)) => {
-                let ready = status == 200
-                    && payload["status"] == "healthy"
-                    && payload["checks"]["database"]["status"] == "healthy";
-                if ready {
-                    assert!(
-                        payload["checks"]["database"]["latency_ms"].is_number(),
-                        "readyz payload should include checks.database.latency_ms: {payload}"
-                    );
-                    assert!(
-                        payload["checks"]["database"]["error"].is_null(),
-                        "readyz payload should not include checks.database.error when healthy: {payload}"
-                    );
-                    return;
-                }
-                format!("unexpected /readyz response status={status} payload={payload}")
-            }
-            Err(err) => err,
-        };
-
-        if Instant::now() >= deadline {
-            break observation;
-        }
-
-        tokio::time::sleep(Duration::from_secs(1)).await;
-    };
-    panic!("timed out waiting for /readyz healthy response after 20s: {timeout_detail}");
-}
diff --git a/e2e/with-kube-gateway.sh b/e2e/with-kube-gateway.sh
index bea1c01d3..6acaf255a 100755
--- a/e2e/with-kube-gateway.sh
+++ b/e2e/with-kube-gateway.sh
@@ -69,8 +69,6 @@ NAMESPACE="openshell"
 RELEASE_NAME="openshell"
 PORTFORWARD_PID=""
 PORTFORWARD_LOG="${WORKDIR}/portforward.log"
-PORTFORWARD_HEALTH_PID=""
-PORTFORWARD_HEALTH_LOG="${WORKDIR}/portforward-health.log"
 HELM_INSTALLED=0
 EXTERNAL_PG_FIXTURE_DEPLOYED=0
 EXTERNAL_PG_FIXTURE_SECRET=""
@@ -79,6 +77,12 @@ EXTERNAL_PG_FIXTURE_SERVICE="openshell-e2e-postgres"
 EXTERNAL_PG_FIXTURE_USER="openshell"
 EXTERNAL_PG_FIXTURE_PASSWORD="openshell-e2e-postgres"
 EXTERNAL_PG_FIXTURE_DATABASE="openshell"
+ENVOY_RELEASE_NAME="${OPENSHELL_E2E_ENVOY_RELEASE_NAME:-envoy-gateway}"
+ENVOY_NAMESPACE="${OPENSHELL_E2E_ENVOY_NAMESPACE:-envoy-gateway-system}"
+ENVOY_CHART_VERSION="${OPENSHELL_E2E_ENVOY_VERSION:-v1.7.2}"
+ENVOY_GATEWAY_MANIFEST="${ROOT}/deploy/kube/manifests/envoy-gateway-openshell.yaml"
+ENVOY_HELM_INSTALLED=0
+ENVOY_GATEWAY_CONFIG_APPLIED=0
 
 # Isolate CLI/SDK gateway metadata from the developer's real config.
 export XDG_CONFIG_HOME="${WORKDIR}/config"
@@ -114,6 +118,118 @@ deploy_postgres_fixture() {
     --from-literal=uri="${pg_uri}"
 }
 
+use_envoy_gateway() {
+  case "${OPENSHELL_E2E_KUBE_USE_ENVOY:-0}" in
+    1 | true | TRUE | yes | YES) return 0 ;;
+    *) return 1 ;;
+  esac
+}
+
+install_envoy_gateway() {
+  echo "Installing Envoy Gateway (${ENVOY_CHART_VERSION})..."
+  helmctl upgrade --install "${ENVOY_RELEASE_NAME}" \
+    oci://docker.io/envoyproxy/gateway-helm \
+    --version "${ENVOY_CHART_VERSION}" \
+    --namespace "${ENVOY_NAMESPACE}" --create-namespace \
+    --wait --timeout 5m
+  ENVOY_HELM_INSTALLED=1
+
+  if ! kctl get namespace "${NAMESPACE}" >/dev/null 2>&1; then
+    kctl create namespace "${NAMESPACE}"
+  fi
+
+  kctl apply -f "${ENVOY_GATEWAY_MANIFEST}"
+  ENVOY_GATEWAY_CONFIG_APPLIED=1
+}
+
+wait_for_envoy_service() {
+  local svc_ref=""
+  local svc_namespace=""
+
+  for _ in $(seq 1 60); do
+    svc_ref="$(kctl get svc -A \
+      -l "gateway.envoyproxy.io/owning-gateway-name=${RELEASE_NAME},gateway.envoyproxy.io/owning-gateway-namespace=${NAMESPACE}" \
+      -o jsonpath='{range .items[0]}{.metadata.namespace}{"/"}{.metadata.name}{end}' \
+      2>/dev/null || true)"
+    if [ -n "${svc_ref}" ]; then
+      svc_namespace="${svc_ref%%/*}"
+      if kctl -n "${svc_namespace}" wait --for=condition=Ready pod \
+        -l "gateway.envoyproxy.io/owning-gateway-name=${RELEASE_NAME},gateway.envoyproxy.io/owning-gateway-namespace=${NAMESPACE}" \
+        --timeout=5s >/dev/null 2>&1; then
+        printf '%s\n' "${svc_ref}"
+        return 0
+      fi
+    fi
+    sleep 2
+  done
+
+  echo "ERROR: Envoy proxy Service for Gateway ${RELEASE_NAME} was not ready." >&2
+  kctl -n "${NAMESPACE}" get gateway,grpcroute -o wide >&2 || true
+  kctl get svc -A \
+    -l "gateway.envoyproxy.io/owning-gateway-name=${RELEASE_NAME},gateway.envoyproxy.io/owning-gateway-namespace=${NAMESPACE}" \
+    -o wide >&2 || true
+  kctl get pods -A \
+    -l "gateway.envoyproxy.io/owning-gateway-name=${RELEASE_NAME},gateway.envoyproxy.io/owning-gateway-namespace=${NAMESPACE}" \
+    -o wide >&2 || true
+  return 1
+}
+
+start_gateway_portforward() {
+  local elapsed=0
+  local pf_timeout=30
+  local target_port=8080
+  local target_namespace="${NAMESPACE}"
+  local target_service="${RELEASE_NAME}"
+  local target_service_ref=""
+
+  LOCAL_PORT="$(e2e_pick_port)"
+  if use_envoy_gateway; then
+    target_service_ref="$(wait_for_envoy_service)"
+    target_namespace="${target_service_ref%%/*}"
+    target_service="${target_service_ref#*/}"
+    target_port=80
+    echo "Starting kubectl port-forward -n ${target_namespace} svc/${target_service} ${LOCAL_PORT}:${target_port} (Envoy Gateway)..."
+  else
+    echo "Starting kubectl port-forward svc/${target_service} ${LOCAL_PORT}:${target_port}..."
+  fi
+
+  kctl -n "${target_namespace}" port-forward "svc/${target_service}" \
+    "${LOCAL_PORT}:${target_port}" >"${PORTFORWARD_LOG}" 2>&1 &
+  PORTFORWARD_PID=$!
+
+  while [ "${elapsed}" -lt "${pf_timeout}" ]; do
+    if ! kill -0 "${PORTFORWARD_PID}" 2>/dev/null; then
+      echo "ERROR: kubectl port-forward exited before becoming reachable" >&2
+      cat "${PORTFORWARD_LOG}" >&2 || true
+      return 1
+    fi
+    if curl -s -o /dev/null --connect-timeout 1 "http://127.0.0.1:${LOCAL_PORT}"; then
+      return 0
+    fi
+    sleep 1
+    elapsed=$((elapsed + 1))
+  done
+
+  echo "ERROR: port-forward did not accept TCP within ${pf_timeout}s" >&2
+  cat "${PORTFORWARD_LOG}" >&2 || true
+  return 1
+}
+
+stop_gateway_portforward() {
+  [ -n "${PORTFORWARD_PID}" ] || return 0
+
+  kill "${PORTFORWARD_PID}" >/dev/null 2>&1 || true
+  for _ in $(seq 1 10); do
+    if ! kill -0 "${PORTFORWARD_PID}" >/dev/null 2>&1; then
+      break
+    fi
+    sleep 0.5
+  done
+  kill -KILL "${PORTFORWARD_PID}" >/dev/null 2>&1 || true
+  wait "${PORTFORWARD_PID}" >/dev/null 2>&1 || true
+  PORTFORWARD_PID=""
+}
+
 cleanup_postgres_fixture() {
   local secret_name="$1"
 
@@ -132,15 +248,7 @@ cleanup_postgres_fixture() {
 cleanup() {
   local exit_code=$?
 
-  if [ -n "${PORTFORWARD_PID}" ]; then
-    kill "${PORTFORWARD_PID}" >/dev/null 2>&1 || true
-    wait "${PORTFORWARD_PID}" >/dev/null 2>&1 || true
-  fi
-
-  if [ -n "${PORTFORWARD_HEALTH_PID}" ]; then
-    kill "${PORTFORWARD_HEALTH_PID}" >/dev/null 2>&1 || true
-    wait "${PORTFORWARD_HEALTH_PID}" >/dev/null 2>&1 || true
-  fi
+  stop_gateway_portforward
 
   if [ "${exit_code}" -ne 0 ] && [ -n "${KUBE_CONTEXT}" ] && [ -n "${NAMESPACE}" ]; then
     if command -v kubectl >/dev/null 2>&1 \
@@ -161,15 +269,6 @@ cleanup() {
       cat "${PORTFORWARD_LOG}" || true
       echo "=== end port-forward log ==="
     fi
-    if [ -f "${PORTFORWARD_HEALTH_LOG}" ]; then
-      echo "=== health port-forward log ==="
-      cat "${PORTFORWARD_HEALTH_LOG}" || true
-      echo "=== end health port-forward log ==="
-    fi
-  fi
-
-  if [ "${EXTERNAL_PG_FIXTURE_DEPLOYED}" = "1" ]; then
-    cleanup_postgres_fixture "${EXTERNAL_PG_FIXTURE_SECRET}"
   fi
 
   if [ "${HELM_INSTALLED}" = "1" ] && [ -n "${KUBE_CONTEXT}" ] && [ -n "${NAMESPACE}" ]; then
@@ -185,6 +284,33 @@ cleanup() {
     fi
   fi
 
+  if [ "${EXTERNAL_PG_FIXTURE_DEPLOYED}" = "1" ]; then
+    cleanup_postgres_fixture "${EXTERNAL_PG_FIXTURE_SECRET}"
+  fi
+
+  if [ "${ENVOY_GATEWAY_CONFIG_APPLIED}" = "1" ] && [ -n "${KUBE_CONTEXT}" ]; then
+    if command -v kubectl >/dev/null 2>&1; then
+      kctl -n "${NAMESPACE}" delete backendtrafficpolicy.gateway.envoyproxy.io \
+        openshell-grpc-timeouts --ignore-not-found --wait=false \
+        >/dev/null 2>&1 || true
+      kctl delete gatewayclass.gateway.networking.k8s.io eg \
+        --ignore-not-found --wait=false >/dev/null 2>&1 || true
+    fi
+    ENVOY_GATEWAY_CONFIG_APPLIED=0
+  fi
+
+  if [ "${ENVOY_HELM_INSTALLED}" = "1" ] && [ -n "${KUBE_CONTEXT}" ]; then
+    if command -v helm >/dev/null 2>&1; then
+      helmctl uninstall "${ENVOY_RELEASE_NAME}" --namespace "${ENVOY_NAMESPACE}" \
+        --wait --timeout 60s >/dev/null 2>&1 || true
+    fi
+    if command -v kubectl >/dev/null 2>&1; then
+      kctl delete namespace "${ENVOY_NAMESPACE}" --wait=true --timeout=60s \
+        --ignore-not-found >/dev/null 2>&1 || true
+    fi
+    ENVOY_HELM_INSTALLED=0
+  fi
+
   if [ "${CLUSTER_CREATED_BY_US}" = "1" ] && [ -n "${CLUSTER_NAME}" ]; then
     if command -v k3d >/dev/null 2>&1 && k3d cluster list "${CLUSTER_NAME}" \
         >/dev/null 2>&1; then
@@ -200,16 +326,7 @@ trap cleanup EXIT
 # --- DB-scenario helpers (used only when OPENSHELL_E2E_KUBE_DB_SCENARIOS=1) ---
 
 scenario_stop_portforward() {
-  if [ -n "${PORTFORWARD_PID}" ]; then
-    kill "${PORTFORWARD_PID}" >/dev/null 2>&1 || true
-    wait "${PORTFORWARD_PID}" >/dev/null 2>&1 || true
-    PORTFORWARD_PID=""
-  fi
-  if [ -n "${PORTFORWARD_HEALTH_PID}" ]; then
-    kill "${PORTFORWARD_HEALTH_PID}" >/dev/null 2>&1 || true
-    wait "${PORTFORWARD_HEALTH_PID}" >/dev/null 2>&1 || true
-    PORTFORWARD_HEALTH_PID=""
-  fi
+  stop_gateway_portforward
 }
 
 scenario_cleanup_release() {
@@ -263,74 +380,14 @@ run_scenario() {
     --wait --timeout 5m
   HELM_INSTALLED=1
 
-  LOCAL_PORT="$(e2e_pick_port)"
-  echo "Starting kubectl port-forward svc/openshell ${LOCAL_PORT}:8080..."
-  kctl -n "${NAMESPACE}" port-forward "svc/openshell" \
-    "${LOCAL_PORT}:8080" >"${PORTFORWARD_LOG}" 2>&1 &
-  PORTFORWARD_PID=$!
-
-  local elapsed=0 pf_timeout=30
-  while [ "${elapsed}" -lt "${pf_timeout}" ]; do
-    if ! kill -0 "${PORTFORWARD_PID}" 2>/dev/null; then
-      echo "ERROR: kubectl port-forward exited before becoming reachable" >&2
-      cat "${PORTFORWARD_LOG}" >&2 || true
-      DB_FAILED=$((DB_FAILED + 1))
-      DB_SCENARIOS_SUMMARY+=("FAIL  ${scenario_label}: port-forward died")
-      scenario_stop_portforward
-      scenario_cleanup_release
-      return
-    fi
-    if curl -s -o /dev/null --connect-timeout 1 "http://127.0.0.1:${LOCAL_PORT}"; then
-      break
-    fi
-    sleep 1
-    elapsed=$((elapsed + 1))
-  done
-  if [ "${elapsed}" -ge "${pf_timeout}" ]; then
-    echo "ERROR: port-forward did not accept TCP within ${pf_timeout}s" >&2
-    cat "${PORTFORWARD_LOG}" >&2 || true
+  if ! start_gateway_portforward; then
     DB_FAILED=$((DB_FAILED + 1))
-    DB_SCENARIOS_SUMMARY+=("FAIL  ${scenario_label}: port-forward timeout")
+    DB_SCENARIOS_SUMMARY+=("FAIL  ${scenario_label}: port-forward failed")
     scenario_stop_portforward
     scenario_cleanup_release
     return
   fi
 
-  HEALTH_LOCAL_PORT="$(e2e_pick_port)"
-  echo "Starting kubectl port-forward sts/${RELEASE_NAME} ${HEALTH_LOCAL_PORT}:health..."
-  kctl -n "${NAMESPACE}" port-forward "sts/${RELEASE_NAME}" \
-    "${HEALTH_LOCAL_PORT}:health" >"${PORTFORWARD_HEALTH_LOG}" 2>&1 &
-  PORTFORWARD_HEALTH_PID=$!
-
-  elapsed=0
-  while [ "${elapsed}" -lt "${pf_timeout}" ]; do
-    if ! kill -0 "${PORTFORWARD_HEALTH_PID}" 2>/dev/null; then
-      echo "ERROR: kubectl health port-forward exited before becoming reachable" >&2
-      cat "${PORTFORWARD_HEALTH_LOG}" >&2 || true
-      DB_FAILED=$((DB_FAILED + 1))
-      DB_SCENARIOS_SUMMARY+=("FAIL  ${scenario_label}: health port-forward died")
-      scenario_stop_portforward
-      scenario_cleanup_release
-      return
-    fi
-    if curl -s -o /dev/null --connect-timeout 1 "http://127.0.0.1:${HEALTH_LOCAL_PORT}/healthz"; then
-      break
-    fi
-    sleep 1
-    elapsed=$((elapsed + 1))
-  done
-  if [ "${elapsed}" -ge "${pf_timeout}" ]; then
-    echo "ERROR: health port-forward did not accept TCP within ${pf_timeout}s" >&2
-    cat "${PORTFORWARD_HEALTH_LOG}" >&2 || true
-    DB_FAILED=$((DB_FAILED + 1))
-    DB_SCENARIOS_SUMMARY+=("FAIL  ${scenario_label}: health port-forward timeout")
-    scenario_stop_portforward
-    scenario_cleanup_release
-    return
-  fi
-
-  export OPENSHELL_E2E_HEALTH_PORT="${HEALTH_LOCAL_PORT}"
-
   GATEWAY_NAME="openshell-e2e-kube-${LOCAL_PORT}"
   GATEWAY_ENDPOINT="http://127.0.0.1:${LOCAL_PORT}"
   e2e_register_plaintext_gateway \
@@ -342,6 +399,9 @@ run_scenario() {
   export OPENSHELL_GATEWAY="${GATEWAY_NAME}"
   export OPENSHELL_E2E_DRIVER="kubernetes"
   export OPENSHELL_E2E_SANDBOX_NAMESPACE="${NAMESPACE}"
+  export OPENSHELL_E2E_KUBE_CONTEXT="${KUBE_CONTEXT}"
+  export OPENSHELL_E2E_KUBE_NAMESPACE="${NAMESPACE}"
+  export OPENSHELL_E2E_KUBE_RELEASE="${RELEASE_NAME}"
   export OPENSHELL_PROVISION_TIMEOUT="${OPENSHELL_PROVISION_TIMEOUT:-300}"
 
   echo "Running e2e command against ${GATEWAY_ENDPOINT}: ${E2E_CMD[*]}"
@@ -552,6 +612,10 @@ if [ -n "${OPENSHELL_E2E_KUBE_EXTRA_VALUES:-}" ]; then
     helm_values_args+=(--values "${values_file}")
   done
 fi
+if use_envoy_gateway; then
+  helm_values_args+=(--values "${ROOT}/deploy/helm/openshell/ci/values-gateway.yaml")
+  install_envoy_gateway
+fi
 
 if [ "${OPENSHELL_E2E_KUBE_DB_SCENARIOS:-0}" = "1" ]; then
   # --- Multi-scenario mode: test all database backends ---
@@ -602,59 +666,7 @@ else
     --wait --timeout 5m
   HELM_INSTALLED=1
 
-  LOCAL_PORT="$(e2e_pick_port)"
-  echo "Starting kubectl port-forward svc/openshell ${LOCAL_PORT}:8080..."
-  kctl -n "${NAMESPACE}" port-forward "svc/openshell" \
-    "${LOCAL_PORT}:8080" >"${PORTFORWARD_LOG}" 2>&1 &
-  PORTFORWARD_PID=$!
-
-  elapsed=0
-  timeout=30
-  while [ "${elapsed}" -lt "${timeout}" ]; do
-    if ! kill -0 "${PORTFORWARD_PID}" 2>/dev/null; then
-      echo "ERROR: kubectl port-forward exited before becoming reachable" >&2
-      cat "${PORTFORWARD_LOG}" >&2 || true
-      exit 1
-    fi
-    if curl -s -o /dev/null --connect-timeout 1 "http://127.0.0.1:${LOCAL_PORT}"; then
-      break
-    fi
-    sleep 1
-    elapsed=$((elapsed + 1))
-  done
-  if [ "${elapsed}" -ge "${timeout}" ]; then
-    echo "ERROR: port-forward did not accept TCP within ${timeout}s" >&2
-    cat "${PORTFORWARD_LOG}" >&2 || true
-    exit 1
-  fi
-
-  HEALTH_LOCAL_PORT="$(e2e_pick_port)"
-  echo "Starting kubectl port-forward sts/${RELEASE_NAME} ${HEALTH_LOCAL_PORT}:health..."
-  kctl -n "${NAMESPACE}" port-forward "sts/${RELEASE_NAME}" \
-    "${HEALTH_LOCAL_PORT}:health" >"${PORTFORWARD_HEALTH_LOG}" 2>&1 &
-  PORTFORWARD_HEALTH_PID=$!
-
-  elapsed=0
-  timeout=30
-  while [ "${elapsed}" -lt "${timeout}" ]; do
-    if ! kill -0 "${PORTFORWARD_HEALTH_PID}" 2>/dev/null; then
-      echo "ERROR: kubectl health port-forward exited before becoming reachable" >&2
-      cat "${PORTFORWARD_HEALTH_LOG}" >&2 || true
-      exit 1
-    fi
-    if curl -s -o /dev/null --connect-timeout 1 "http://127.0.0.1:${HEALTH_LOCAL_PORT}/healthz"; then
-      break
-    fi
-    sleep 1
-    elapsed=$((elapsed + 1))
-  done
-  if [ "${elapsed}" -ge "${timeout}" ]; then
-    echo "ERROR: health port-forward did not accept TCP within ${timeout}s" >&2
-    cat "${PORTFORWARD_HEALTH_LOG}" >&2 || true
-    exit 1
-  fi
-
-  export OPENSHELL_E2E_HEALTH_PORT="${HEALTH_LOCAL_PORT}"
+  start_gateway_portforward
 
   GATEWAY_NAME="openshell-e2e-kube-${LOCAL_PORT}"
   GATEWAY_ENDPOINT="http://127.0.0.1:${LOCAL_PORT}"
@@ -667,6 +679,9 @@ else
   export OPENSHELL_GATEWAY="${GATEWAY_NAME}"
   export OPENSHELL_E2E_DRIVER="kubernetes"
   export OPENSHELL_E2E_SANDBOX_NAMESPACE="${NAMESPACE}"
+  export OPENSHELL_E2E_KUBE_CONTEXT="${KUBE_CONTEXT}"
+  export OPENSHELL_E2E_KUBE_NAMESPACE="${NAMESPACE}"
+  export OPENSHELL_E2E_KUBE_RELEASE="${RELEASE_NAME}"
   export OPENSHELL_PROVISION_TIMEOUT="${OPENSHELL_PROVISION_TIMEOUT:-300}"
 
   echo "Running e2e command against ${GATEWAY_ENDPOINT}: $*"
diff --git a/proto/openshell.proto b/proto/openshell.proto
index d701956d3..38e9cf84d 100644
--- a/proto/openshell.proto
+++ b/proto/openshell.proto
@@ -181,6 +181,15 @@ service OpenShell {
   // no new TLS handshake, no reverse HTTP CONNECT.
   rpc RelayStream(stream RelayFrame) returns (stream RelayFrame);
 
+  // Internal gateway-to-gateway relay forwarding.
+  //
+  // A gateway replica that receives a user request for a sandbox whose
+  // supervisor session is owned by a different replica opens this stream to the
+  // owner. The first frame carries PeerRelayInit; subsequent frames carry raw
+  // bytes in either direction. This RPC is authenticated as a gateway peer, not
+  // as a user or sandbox supervisor.
+  rpc PeerRelay(stream PeerRelayFrame) returns (stream PeerRelayFrame);
+
   // Watch a sandbox and stream updates.
   //
   // This stream can include:
@@ -1390,6 +1399,9 @@ message SupervisorHello {
   string sandbox_id = 1;
   // Supervisor instance ID (e.g. boot id or process epoch).
   string instance_id = 2;
+  // Monotonic counter scoped to instance_id. Incremented for each reconnect so
+  // gateways can distinguish a fresh supervisor connection from stale cleanup.
+  uint64 connection_epoch = 3;
 }
 
 // Gateway accepts the supervisor session.
@@ -1459,6 +1471,25 @@ message RelayFrame {
   }
 }
 
+// Initial frame for gateway peer relay forwarding.
+message PeerRelayInit {
+  // Stable sandbox UUID whose supervisor relay should be opened.
+  string sandbox_id = 1;
+  // Relay target to ask the owning gateway to open on its local supervisor
+  // session. The channel_id is assigned by the forwarding gateway.
+  RelayOpen relay_open = 2;
+  // Gateway replica id that initiated the peer relay.
+  string requester_replica_id = 3;
+}
+
+// A single frame on the gateway-to-gateway peer relay RPC.
+message PeerRelayFrame {
+  oneof payload {
+    PeerRelayInit init = 1;
+    bytes data = 2;
+  }
+}
+
 // Supervisor reports the result of a relay open request.
 message RelayOpenResult {
   // Channel identifier from the RelayOpen request.
diff --git a/tasks/test.toml b/tasks/test.toml
index c80225e4f..ada8c7249 100644
--- a/tasks/test.toml
+++ b/tasks/test.toml
@@ -94,6 +94,11 @@ description = "Run Kubernetes e2e with all database backend scenarios (SQLite an
 env = { OPENSHELL_E2E_KUBE_DB_SCENARIOS = "1" }
 run = "e2e/rust/e2e-kubernetes.sh"
 
+["e2e:kubernetes:ha-rebalancing"]
+description = "Run the full Kubernetes e2e suite through Envoy against two gateway replicas and external PostgreSQL"
+env = { OPENSHELL_E2E_KUBE_EXTERNAL_POSTGRES_SECRET = "openshell-ha-pg", OPENSHELL_E2E_KUBE_EXTRA_VALUES = "deploy/helm/openshell/ci/values-high-availability.yaml", OPENSHELL_E2E_KUBE_USE_ENVOY = "1" }
+run = "e2e/rust/e2e-kubernetes.sh"
+
 ["e2e:vm"]
 description = "Start openshell-gateway with the VM compute driver and run the cluster-agnostic smoke e2e"
 run = "e2e/rust/e2e-vm.sh"