diff --git a/docs/reference/troubleshooting.mdx b/docs/reference/troubleshooting.mdx
index 6d23f1f476..8d57d3f8cd 100644
--- a/docs/reference/troubleshooting.mdx
+++ b/docs/reference/troubleshooting.mdx
@@ -1254,6 +1254,11 @@ Fix the NVIDIA Container Toolkit or CDI configuration reported in the diagnostic
 If you do not need GPU access inside the sandbox, rerun with `--no-sandbox-gpu`.
 Set `NEMOCLAW_DOCKER_GPU_PATCH=0` only when you need to bypass this compatibility path during troubleshooting.
 
+If onboarding reports `OpenShell supervisor did not reconnect to the GPU-enabled container.` even though the diagnostic bundle shows the patched container is running and healthy, the supervisor-reconnect wait is treating a transient Error phase (reported while the OpenShell host re-registers the new container) as fatal.
+The reconnect wait debounces consecutive Error-phase polls before fast-failing, defaulting to five consecutive polls of about 10 seconds in total.
+Increase the debounce window with `NEMOCLAW_DOCKER_GPU_SUPERVISOR_RECONNECT_ERROR_DEBOUNCE` if your host needs more time to re-register the patched container, for example slow WSL2 + Docker Desktop setups.
+Set it to a higher integer such as `15` (about 30 seconds) and rerun onboarding; the value is clamped to a minimum of `1`.
+
 ### `pip install` fails with a system-packages error
 
 Recent Ubuntu releases (including DGX Spark's Ubuntu 24.04) mark the system Python install as externally managed, so `pip install` without a virtual environment fails.
diff --git a/skills/nemoclaw-user-reference/references/troubleshooting.md b/skills/nemoclaw-user-reference/references/troubleshooting.md
index bcee680bfe..7ee345ab8f 100644
--- a/skills/nemoclaw-user-reference/references/troubleshooting.md
+++ b/skills/nemoclaw-user-reference/references/troubleshooting.md
@@ -1244,6 +1244,11 @@ Fix the NVIDIA Container Toolkit or CDI configuration reported in the diagnostic
 If you do not need GPU access inside the sandbox, rerun with `--no-sandbox-gpu`.
 Set `NEMOCLAW_DOCKER_GPU_PATCH=0` only when you need to bypass this compatibility path during troubleshooting.
 
+If onboarding reports `OpenShell supervisor did not reconnect to the GPU-enabled container.` even though the diagnostic bundle shows the patched container is running and healthy, the supervisor-reconnect wait is treating a transient Error phase (reported while the OpenShell host re-registers the new container) as fatal.
+The reconnect wait debounces consecutive Error-phase polls before fast-failing, defaulting to five consecutive polls of about 10 seconds in total.
+Increase the debounce window with `NEMOCLAW_DOCKER_GPU_SUPERVISOR_RECONNECT_ERROR_DEBOUNCE` if your host needs more time to re-register the patched container, for example slow WSL2 + Docker Desktop setups.
+Set it to a higher integer such as `15` (about 30 seconds) and rerun onboarding; the value is clamped to a minimum of `1`.
+
 ### `pip install` fails with a system-packages error
 
 Recent Ubuntu releases (including DGX Spark's Ubuntu 24.04) mark the system Python install as externally managed, so `pip install` without a virtual environment fails.
diff --git a/src/lib/onboard/docker-gpu-patch.test.ts b/src/lib/onboard/docker-gpu-patch.test.ts
index cf560e7e4f..88223f119f 100644
--- a/src/lib/onboard/docker-gpu-patch.test.ts
+++ b/src/lib/onboard/docker-gpu-patch.test.ts
@@ -837,7 +837,10 @@ describe("docker-gpu-patch Error-phase diagnostics (#4316)", () => {
   it("short-circuits the supervisor-reconnect wait when the sandbox enters Error phase", () => {
     // Without the short-circuit, a patched container that crashes on startup
     // leaves users waiting the full 900s+ supervisor-reconnect timeout before
-    // any Error-phase diagnostics run (#4316).
+    // any Error-phase diagnostics run. With the debounce now in place, this
+    // test asserts the K=1 (no-debounce) behavior explicitly so the original
+    // fast-fail intent is preserved when the operator opts out of the
+    // debounce.
     const runOpenshell = vi.fn(() => ({ status: 1, stderr: "sandbox not ready" }));
     const listOutputs = [
       "alpha   Provisioning   1s ago",
@@ -853,10 +856,11 @@ describe("docker-gpu-patch Error-phase diagnostics (#4316)", () => {
       runOpenshell,
       runCaptureOpenshell,
       sleep,
+      errorPhaseDebouncePolls: 1,
     });
 
     expect(ok).toBe(false);
-    // Without short-circuit we'd loop ~300 iterations. With it, the second
+    // Without short-circuit we'd loop ~300 iterations. With K=1 the second
     // iteration's list output shows Error and the wait bails out.
     expect(runOpenshell).toHaveBeenCalledTimes(2);
     expect(sleep).toHaveBeenCalledTimes(1);
diff --git a/src/lib/onboard/docker-gpu-patch.ts b/src/lib/onboard/docker-gpu-patch.ts
index 5e9dd16a13..d46705ebc5 100644
--- a/src/lib/onboard/docker-gpu-patch.ts
+++ b/src/lib/onboard/docker-gpu-patch.ts
@@ -14,7 +14,22 @@ import {
   dockerRunDetached,
   dockerStop,
 } from "../adapters/docker";
-import { envInt } from "./env";
+import {
+  type DockerGpuSupervisorReconnectDeps,
+  DOCKER_GPU_SUPERVISOR_RECONNECT_ERROR_DEBOUNCE_ENV,
+  DOCKER_GPU_SUPERVISOR_RECONNECT_TIMEOUT_ENV,
+  getDockerGpuSupervisorReconnectErrorDebouncePolls,
+  getDockerGpuSupervisorReconnectTimeoutSecs,
+  waitForOpenShellSupervisorReconnect,
+} from "./docker-gpu-supervisor-reconnect";
+export {
+  DOCKER_GPU_SUPERVISOR_RECONNECT_ERROR_DEBOUNCE_ENV,
+  DOCKER_GPU_SUPERVISOR_RECONNECT_TIMEOUT_ENV,
+  getDockerGpuSupervisorReconnectErrorDebouncePolls,
+  getDockerGpuSupervisorReconnectTimeoutSecs,
+  waitForOpenShellSupervisorReconnect,
+};
+export type { DockerGpuSupervisorReconnectDeps };
 
 export const OPENSHELL_MANAGED_BY_LABEL = "openshell.ai/managed-by";
 export const OPENSHELL_MANAGED_BY_VALUE = "openshell";
@@ -23,9 +38,6 @@ const OPENSHELL_SANDBOX_COMMAND_ENV = "OPENSHELL_SANDBOX_COMMAND";
 
 const DOCKER_GPU_PATCH_TIMEOUT_MS = 30_000;
 const DOCKER_GPU_PATCH_WAIT_SECS = 180;
-const DOCKER_GPU_SUPERVISOR_RECONNECT_MIN_SECS = 900;
-export const DOCKER_GPU_SUPERVISOR_RECONNECT_TIMEOUT_ENV =
-  "NEMOCLAW_DOCKER_GPU_SUPERVISOR_RECONNECT_TIMEOUT";
 export const DOCKER_GPU_PATCH_NETWORK_ENV = "NEMOCLAW_DOCKER_GPU_PATCH_NETWORK";
 const MAX_DOCKER_CONTAINER_NAME_LENGTH = 253;
 const GPU_ENV_KEYS = new Set([
@@ -70,6 +82,11 @@ export type DockerGpuPatchDeps = {
   readDir?: (dirPath: string) => string[] | null;
   /** Injectable file reader for unit testing CDI spec content checks. */
   readFile?: (filePath: string) => string | null;
+  /**
+   * Forwarded to the supervisor-reconnect wait. See
+   * `DockerGpuSupervisorReconnectDeps.errorPhaseDebouncePolls`.
+   */
+  errorPhaseDebouncePolls?: number;
 };
 
 export type DockerGpuPatchModeKind = "gpus" | "nvidia-runtime" | "cdi";
@@ -833,72 +850,6 @@ function waitForNewContainerId(
   return null;
 }
 
-function sandboxListShowsErrorPhase(
-  sandboxName: string,
-  runCaptureOpenshell: NonNullable<DockerGpuPatchDeps["runCaptureOpenshell"]>,
-): boolean {
-  try {
-    const list = runCaptureOpenshell(["sandbox", "list"], {
-      ignoreError: true,
-      suppressOutput: true,
-      timeout: DOCKER_GPU_PATCH_TIMEOUT_MS,
-    });
-    return SANDBOX_FAILURE_PHASE_TOKENS.has(
-      parseSandboxPhaseFromListOutput(list, sandboxName) ?? "",
-    );
-  } catch {
-    return false;
-  }
-}
-
-function waitForOpenShellSandboxExec(
-  sandboxName: string,
-  timeoutSecs: number,
-  deps: DockerGpuPatchDeps,
-): boolean {
-  if (!deps.runOpenshell) return true;
-  const d = depsWithDefaults(deps);
-  const deadline = Date.now() + Math.max(1, timeoutSecs) * 1000;
-  while (Date.now() <= deadline) {
-    const result = deps.runOpenshell(
-      ["sandbox", "exec", "-n", sandboxName, "--", "true"],
-      { ignoreError: true, suppressOutput: true, timeout: DOCKER_GPU_PATCH_TIMEOUT_MS },
-    );
-    if (isZeroStatus(result)) return true;
-    // Short-circuit the supervisor-reconnect wait when the sandbox enters a
-    // terminal failure phase. Without this, a patched container that exits
-    // on startup leaves the user staring at the supervisor-reconnect
-    // timeout (default 900s) before any Error-phase diagnostics run (#4316).
-    if (
-      deps.runCaptureOpenshell &&
-      sandboxListShowsErrorPhase(sandboxName, deps.runCaptureOpenshell)
-    ) {
-      return false;
-    }
-    d.sleep(2);
-  }
-  return false;
-}
-
-export const waitForOpenShellSupervisorReconnect = waitForOpenShellSandboxExec;
-
-export function getDockerGpuSupervisorReconnectTimeoutSecs(
-  sandboxReadyTimeoutSecs: number,
-  env: Record<string, string | undefined> = process.env,
-): number {
-  const readyTimeoutSecs = Number.isFinite(sandboxReadyTimeoutSecs)
-    ? Math.max(1, Math.round(sandboxReadyTimeoutSecs))
-    : 1;
-  const fallback = Math.max(
-    readyTimeoutSecs,
-    DOCKER_GPU_SUPERVISOR_RECONNECT_MIN_SECS,
-  );
-  return Math.max(
-    1,
-    envInt(DOCKER_GPU_SUPERVISOR_RECONNECT_TIMEOUT_ENV, fallback, env),
-  );
-}
-
 function decoratePatchError<T extends Error>(
   error: T,
   context: DockerGpuPatchFailureContext,
@@ -1017,7 +968,7 @@ export function recreateOpenShellDockerSandboxWithGpu(
     });
 
     if (options.waitForSupervisor !== false) {
-      const execReady = waitForOpenShellSandboxExec(
+      const execReady = waitForOpenShellSupervisorReconnect(
         options.sandboxName,
         options.timeoutSecs ?? DOCKER_GPU_PATCH_WAIT_SECS,
         deps,
diff --git a/src/lib/onboard/docker-gpu-supervisor-reconnect.test.ts b/src/lib/onboard/docker-gpu-supervisor-reconnect.test.ts
new file mode 100644
index 0000000000..62976067f0
--- /dev/null
+++ b/src/lib/onboard/docker-gpu-supervisor-reconnect.test.ts
@@ -0,0 +1,159 @@
+// SPDX-FileCopyrightText: Copyright (c) 2026 NVIDIA CORPORATION & AFFILIATES. All rights reserved.
+// SPDX-License-Identifier: Apache-2.0
+
+import { describe, expect, it, vi } from "vitest";
+
+import {
+  getDockerGpuSupervisorReconnectErrorDebouncePolls,
+  waitForOpenShellSupervisorReconnect,
+} from "../../../dist/lib/onboard/docker-gpu-supervisor-reconnect";
+
+// The Docker GPU patch supervisor-reconnect wait must absorb a transient
+// Error phase reported while OpenShell's sandbox-list cache catches up to
+// the newly-recreated GPU container. The old-container teardown briefly
+// marks the row Error before the host re-registers the new container.
+// Without debouncing, the fast-fail short-circuits within ~12s on a healthy
+// GPU sandbox whose container is running and whose supervisor has already
+// logged `LIFECYCLE:INSTALL OpenShell Sandbox Supervisor success`.
+describe("docker-gpu-supervisor-reconnect Error-phase debounce", () => {
+  it("absorbs a transient Error phase shorter than the debounce window", () => {
+    const execOutputs = [
+      { status: 1, stderr: "sandbox not ready" },
+      { status: 1, stderr: "sandbox not ready" },
+      { status: 1, stderr: "sandbox not ready" },
+      { status: 0, stdout: "" },
+    ];
+    let execIdx = 0;
+    const runOpenshell = vi.fn(
+      () => execOutputs[Math.min(execIdx++, execOutputs.length - 1)],
+    );
+    const listOutputs = [
+      "alpha   Error         1s ago",
+      "alpha   Error         3s ago",
+      "alpha   Provisioning  5s ago",
+      "alpha   Ready         7s ago",
+    ];
+    let listIdx = 0;
+    const runCaptureOpenshell = vi.fn(
+      () => listOutputs[Math.min(listIdx++, listOutputs.length - 1)],
+    );
+    const sleep = vi.fn();
+
+    const ok = waitForOpenShellSupervisorReconnect("alpha", 600, {
+      runOpenshell,
+      runCaptureOpenshell,
+      sleep,
+      errorPhaseDebouncePolls: 5,
+    });
+
+    expect(ok).toBe(true);
+    expect(runOpenshell).toHaveBeenCalledTimes(4);
+  });
+
+  it("still fast-fails when Error phase persists for the full debounce window", () => {
+    const runOpenshell = vi.fn(() => ({ status: 1, stderr: "sandbox not ready" }));
+    const runCaptureOpenshell = vi.fn(() => "alpha   Error   1s ago");
+    const sleep = vi.fn();
+
+    const ok = waitForOpenShellSupervisorReconnect("alpha", 600, {
+      runOpenshell,
+      runCaptureOpenshell,
+      sleep,
+      errorPhaseDebouncePolls: 3,
+    });
+
+    expect(ok).toBe(false);
+    // Three consecutive Error polls trigger the short-circuit on poll 3.
+    // Sleeps happen only between polls 1->2 and 2->3, so two sleeps total.
+    expect(runOpenshell).toHaveBeenCalledTimes(3);
+    expect(sleep).toHaveBeenCalledTimes(2);
+  });
+
+  it("resets the consecutive-Error counter when the phase recovers", () => {
+    // Error, Error, Provisioning (counter resets), Error, Error, Error
+    // -> bails out on the 3rd post-recovery Error, not earlier.
+    const runOpenshell = vi.fn(() => ({ status: 1, stderr: "sandbox not ready" }));
+    const listOutputs = [
+      "alpha   Error         1s ago",
+      "alpha   Error         3s ago",
+      "alpha   Provisioning  5s ago",
+      "alpha   Error         7s ago",
+      "alpha   Error         9s ago",
+      "alpha   Error         11s ago",
+    ];
+    let listIdx = 0;
+    const runCaptureOpenshell = vi.fn(
+      () => listOutputs[Math.min(listIdx++, listOutputs.length - 1)],
+    );
+    const sleep = vi.fn();
+
+    const ok = waitForOpenShellSupervisorReconnect("alpha", 600, {
+      runOpenshell,
+      runCaptureOpenshell,
+      sleep,
+      errorPhaseDebouncePolls: 3,
+    });
+
+    expect(ok).toBe(false);
+    expect(runOpenshell).toHaveBeenCalledTimes(6);
+  });
+
+  it("defaults the debounce to 5 polls and honors the env override", () => {
+    expect(getDockerGpuSupervisorReconnectErrorDebouncePolls({})).toBe(5);
+    expect(
+      getDockerGpuSupervisorReconnectErrorDebouncePolls({
+        NEMOCLAW_DOCKER_GPU_SUPERVISOR_RECONNECT_ERROR_DEBOUNCE: "2",
+      }),
+    ).toBe(2);
+    // Non-positive values are clamped to a minimum of 1.
+    expect(
+      getDockerGpuSupervisorReconnectErrorDebouncePolls({
+        NEMOCLAW_DOCKER_GPU_SUPERVISOR_RECONNECT_ERROR_DEBOUNCE: "0",
+      }),
+    ).toBe(1);
+  });
+
+  it("clamps an injected debounce override to the same minimum as the env path", () => {
+    // 0 / negative / fractional overrides must not bypass the ≥1 contract that
+    // the env-backed helper enforces.
+    const runOpenshell = vi.fn(() => ({ status: 1, stderr: "sandbox not ready" }));
+    const runCaptureOpenshell = vi.fn(() => "alpha   Error   1s ago");
+    const sleep = vi.fn();
+
+    const ok = waitForOpenShellSupervisorReconnect("alpha", 600, {
+      runOpenshell,
+      runCaptureOpenshell,
+      sleep,
+      errorPhaseDebouncePolls: 0,
+    });
+
+    expect(ok).toBe(false);
+    // Clamped to K=1: first Error poll short-circuits with no preceding sleep.
+    expect(runOpenshell).toHaveBeenCalledTimes(1);
+    expect(sleep).not.toHaveBeenCalled();
+  });
+
+  it("falls back to the env-backed default when an injected override is non-finite", () => {
+    // NaN / +Infinity / -Infinity overrides must not silently neutralise the
+    // fast-fail loop. A NaN comparison would always be false and `Infinity`
+    // would never satisfy `>= debouncePolls`, leaving the wait to burn the
+    // full timeout window.
+    for (const bogus of [Number.NaN, Number.POSITIVE_INFINITY, Number.NEGATIVE_INFINITY]) {
+      const runOpenshell = vi.fn(() => ({ status: 1, stderr: "sandbox not ready" }));
+      const runCaptureOpenshell = vi.fn(() => "alpha   Error   1s ago");
+      const sleep = vi.fn();
+
+      const ok = waitForOpenShellSupervisorReconnect("alpha", 600, {
+        runOpenshell,
+        runCaptureOpenshell,
+        sleep,
+        errorPhaseDebouncePolls: bogus,
+      });
+
+      expect(ok).toBe(false);
+      // Default K=5 from the env-backed helper: 5 polls + 4 sleeps before fast-fail.
+      expect(runOpenshell).toHaveBeenCalledTimes(5);
+      expect(sleep).toHaveBeenCalledTimes(4);
+    }
+  });
+});
diff --git a/src/lib/onboard/docker-gpu-supervisor-reconnect.ts b/src/lib/onboard/docker-gpu-supervisor-reconnect.ts
new file mode 100644
index 0000000000..c8906e9501
--- /dev/null
+++ b/src/lib/onboard/docker-gpu-supervisor-reconnect.ts
@@ -0,0 +1,158 @@
+// SPDX-FileCopyrightText: Copyright (c) 2026 NVIDIA CORPORATION & AFFILIATES. All rights reserved.
+// SPDX-License-Identifier: Apache-2.0
+
+/**
+ * Supervisor-reconnect wait for the Docker GPU patch path.
+ *
+ * Source-of-truth boundary
+ * ------------------------
+ * The transient Error phase this module debounces is observed in the
+ * `openshell sandbox list` cache while the OpenShell host re-registers the
+ * newly-recreated GPU container after `docker stop` + `docker run`. The
+ * preferred fix lives at the OpenShell gateway: `sandbox list` should not
+ * report a terminal phase for a sandbox whose Docker container is being
+ * recreated by the GPU patch path. Until that upstream change ships,
+ * NemoClaw tolerates the transient Error at this layer via a
+ * consecutive-poll debounce.
+ *
+ * Removal condition
+ * -----------------
+ * Delete this debounce once OpenShell guarantees `sandbox list` skips the
+ * brief Error transition during a known recreate. A real-Docker GPU E2E
+ * reproduction (e.g. `e2e-branch-validation:gpu`,
+ * `gpu-repo-local-ollama-openclaw`) showing a transient teardown-Error that
+ * recovers to Ready is the runtime evidence required.
+ */
+
+import { envInt } from "./env";
+
+const DOCKER_GPU_PATCH_TIMEOUT_MS = 30_000;
+const DOCKER_GPU_SUPERVISOR_RECONNECT_MIN_SECS = 900;
+// Default consecutive Error-phase polls required before fast-fail. With a
+// 2-second poll interval this is ~10s of sustained Error, which absorbs the
+// transient Error reported during container recreation while still bailing
+// fast on a patched container that crashed on startup.
+const DOCKER_GPU_SUPERVISOR_RECONNECT_ERROR_PHASE_DEFAULT_DEBOUNCE_POLLS = 5;
+
+export const DOCKER_GPU_SUPERVISOR_RECONNECT_TIMEOUT_ENV =
+  "NEMOCLAW_DOCKER_GPU_SUPERVISOR_RECONNECT_TIMEOUT";
+export const DOCKER_GPU_SUPERVISOR_RECONNECT_ERROR_DEBOUNCE_ENV =
+  "NEMOCLAW_DOCKER_GPU_SUPERVISOR_RECONNECT_ERROR_DEBOUNCE";
+
+const TERMINAL_SANDBOX_FAILURE_PHASES = new Set(["Error", "Failed", "CrashLoopBackOff"]);
+
+type DockerRunResult = {
+  status?: number | null;
+  stdout?: string | Buffer | null;
+  stderr?: string | Buffer | null;
+};
+
+type RunOpenshellFn = (
+  args: string[],
+  opts?: Record<string, unknown>,
+) => DockerRunResult;
+type RunCaptureOpenshellFn = (
+  args: string[],
+  opts?: Record<string, unknown>,
+) => string;
+
+export type DockerGpuSupervisorReconnectDeps = {
+  runOpenshell?: RunOpenshellFn;
+  runCaptureOpenshell?: RunCaptureOpenshellFn;
+  sleep?: (seconds: number) => void;
+  errorPhaseDebouncePolls?: number;
+};
+
+function defaultSleep(seconds: number): void {
+  Atomics.wait(new Int32Array(new SharedArrayBuffer(4)), 0, 0, Math.max(0, seconds) * 1000);
+}
+
+function isZeroStatus(result: DockerRunResult | null | undefined): boolean {
+  return Number(result?.status ?? 0) === 0;
+}
+
+const ANSI_RE = /\x1b\[[0-9;]*m/g;
+
+function parseSandboxListFailurePhase(output: string, sandboxName: string): string | null {
+  if (typeof output !== "string" || !output.includes(sandboxName)) return null;
+  for (const line of output.replace(ANSI_RE, "").split(/\r?\n/)) {
+    const cols = line.trim().split(/\s+/);
+    if (cols[0] === sandboxName) {
+      return cols.find((col) => TERMINAL_SANDBOX_FAILURE_PHASES.has(col)) ?? null;
+    }
+  }
+  return null;
+}
+
+function sandboxListShowsErrorPhase(
+  sandboxName: string,
+  runCaptureOpenshell: RunCaptureOpenshellFn,
+): boolean {
+  try {
+    const list = runCaptureOpenshell(["sandbox", "list"], {
+      ignoreError: true,
+      suppressOutput: true,
+      timeout: DOCKER_GPU_PATCH_TIMEOUT_MS,
+    });
+    return parseSandboxListFailurePhase(list, sandboxName) !== null;
+  } catch {
+    return false;
+  }
+}
+
+export function waitForOpenShellSupervisorReconnect(
+  sandboxName: string,
+  timeoutSecs: number,
+  deps: DockerGpuSupervisorReconnectDeps,
+): boolean {
+  if (!deps.runOpenshell) return true;
+  const sleep = deps.sleep ?? defaultSleep;
+  const deadline = Date.now() + Math.max(1, timeoutSecs) * 1000;
+  const errorPhaseDebouncePolls =
+    deps.errorPhaseDebouncePolls == null || !Number.isFinite(deps.errorPhaseDebouncePolls)
+      ? getDockerGpuSupervisorReconnectErrorDebouncePolls()
+      : Math.max(1, Math.trunc(deps.errorPhaseDebouncePolls));
+  let consecutiveErrorPolls = 0;
+  while (Date.now() <= deadline) {
+    const result = deps.runOpenshell(
+      ["sandbox", "exec", "-n", sandboxName, "--", "true"],
+      { ignoreError: true, suppressOutput: true, timeout: DOCKER_GPU_PATCH_TIMEOUT_MS },
+    );
+    if (isZeroStatus(result)) return true;
+    if (
+      deps.runCaptureOpenshell &&
+      sandboxListShowsErrorPhase(sandboxName, deps.runCaptureOpenshell)
+    ) {
+      consecutiveErrorPolls += 1;
+      if (consecutiveErrorPolls >= errorPhaseDebouncePolls) return false;
+    } else {
+      consecutiveErrorPolls = 0;
+    }
+    sleep(2);
+  }
+  return false;
+}
+
+export function getDockerGpuSupervisorReconnectTimeoutSecs(
+  sandboxReadyTimeoutSecs: number,
+  env: Record<string, string | undefined> = process.env,
+): number {
+  const readyTimeoutSecs = Number.isFinite(sandboxReadyTimeoutSecs)
+    ? Math.max(1, Math.round(sandboxReadyTimeoutSecs))
+    : 1;
+  const fallback = Math.max(readyTimeoutSecs, DOCKER_GPU_SUPERVISOR_RECONNECT_MIN_SECS);
+  return Math.max(1, envInt(DOCKER_GPU_SUPERVISOR_RECONNECT_TIMEOUT_ENV, fallback, env));
+}
+
+export function getDockerGpuSupervisorReconnectErrorDebouncePolls(
+  env: Record<string, string | undefined> = process.env,
+): number {
+  return Math.max(
+    1,
+    envInt(
+      DOCKER_GPU_SUPERVISOR_RECONNECT_ERROR_DEBOUNCE_ENV,
+      DOCKER_GPU_SUPERVISOR_RECONNECT_ERROR_PHASE_DEFAULT_DEBOUNCE_POLLS,
+      env,
+    ),
+  );
+}