From 0c25fc185804792d84678285fd44420d9e16f881 Mon Sep 17 00:00:00 2001
From: Tony Luo <xialuo@nvidia.com>
Date: Thu, 4 Jun 2026 17:18:57 +0800
Subject: [PATCH 1/2] fix(onboard): pre-approve gateway scope upgrades after
 onboard and recover
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

After a fresh onboard, the first `openclaw agent` run emitted "scope upgrade
pending approval" and silently fell back to embedded mode, and the same
pending requestId persisted across every subsequent run — the approval never
auto-resolved.

Root cause was a divergence in the host one-shot auto-pair approval pass: it
ran `openclaw devices approve` with a 1s timeout and stripped only
OPENCLAW_GATEWAY_URL from the child env. Leaving PORT and TOKEN set re-pinned
the approve to the gateway, hitting the #4462 self-defeat where a
gateway-pinned scope-upgrade approve requests the very scopes it is trying to
grant and fails — so the request was never cleared.

- Match the in-sandbox watcher: approve timeout 1s -> 10s, strip
  OPENCLAW_GATEWAY_URL + PORT + TOKEN. MAX_APPROVALS 8 -> 1 (single realistic
  pending upgrade) and outer cap 12s -> 15s so a slow 10s approve is never
  SIGKILLed mid-loop (2 + 10x1 = 12s < 15s).
- Wire the now-correct pass into onboard finalization (after process recovery,
  before verification) and the `nemoclaw recover` / probe-only path (gateway-up
  branches only, not the failure exit), so `nemoclaw recover` deterministically
  clears a pending scope upgrade.

The CLI scope-upgrade request is only created by the user's first
`openclaw agent` run, so that first run can still fall back once; the
deterministic remedy is `nemoclaw recover`.

Closes #4504

Signed-off-by: Tony Luo <xialuo@nvidia.com>
Co-Authored-By: Claude Opus 4.8 (1M context) <noreply@anthropic.com>
---
 .../sandbox/connect-autopair-budget.ts        |  22 ++
 src/lib/actions/sandbox/connect.ts            |  39 ++-
 src/lib/onboard.ts                            |   7 +-
 src/lib/onboard/finalization-deps.ts          |  23 ++
 .../machine/handlers/finalization.test.ts     |  52 ++++
 .../onboard/machine/handlers/finalization.ts  |  12 +
 .../auto-pair-approval.test.ts                | 243 +++++++++++++++++-
 test/sandbox-connect-inference/helpers.ts     |  11 +
 8 files changed, 382 insertions(+), 27 deletions(-)
 create mode 100644 src/lib/actions/sandbox/connect-autopair-budget.ts
 create mode 100644 src/lib/onboard/finalization-deps.ts

diff --git a/src/lib/actions/sandbox/connect-autopair-budget.ts b/src/lib/actions/sandbox/connect-autopair-budget.ts
new file mode 100644
index 0000000000..0f4af25364
--- /dev/null
+++ b/src/lib/actions/sandbox/connect-autopair-budget.ts
@@ -0,0 +1,22 @@
+// SPDX-FileCopyrightText: Copyright (c) 2026 NVIDIA CORPORATION & AFFILIATES. All rights reserved.
+// SPDX-License-Identifier: Apache-2.0
+
+// Budget constants for the connect-time auto-pair scope-approval pass
+// (runConnectAutoPairApprovalPass in ./connect). Kept in a dependency-free leaf
+// module so tests can import and assert the invariant on the real values
+// without pulling in connect.ts's heavy transitive requires (#4504).
+
+export const CONNECT_AUTO_PAIR_MAX_APPROVALS = 1;
+// `openclaw devices list` budget (seconds), interpolated into the in-sandbox
+// script so the invariant below is asserted on real values, not source text.
+export const CONNECT_AUTO_PAIR_LIST_TIMEOUT_S = 2;
+// `openclaw devices approve` budget (seconds); matches the in-sandbox watcher's
+// RUN_TIMEOUT_SECS = 10 (nemoclaw-start.sh).
+export const CONNECT_AUTO_PAIR_APPROVE_TIMEOUT_S = 10;
+// Outer spawnSync cap (ms). Must exceed the internal worst case
+// (CONNECT_AUTO_PAIR_LIST_TIMEOUT_S + CONNECT_AUTO_PAIR_APPROVE_TIMEOUT_S ×
+// CONNECT_AUTO_PAIR_MAX_APPROVALS) PLUS shell/python startup, since the outer
+// timer starts at `sh` spawn before the proxy env is sourced and python3
+// launches; the ~3s slack means a legitimate slow approve is never SIGKILLed
+// mid-loop, which would strand the allowlisted request.
+export const CONNECT_AUTO_PAIR_TIMEOUT_MS = 15_000;
diff --git a/src/lib/actions/sandbox/connect.ts b/src/lib/actions/sandbox/connect.ts
index 5bfc97a3f2..4c2d3443cb 100644
--- a/src/lib/actions/sandbox/connect.ts
+++ b/src/lib/actions/sandbox/connect.ts
@@ -46,6 +46,12 @@ import {
   getActiveSandboxSessions,
 } from "../../state/sandbox-session";
 import { runSetupDnsProxy } from "../dns";
+import {
+  CONNECT_AUTO_PAIR_APPROVE_TIMEOUT_S,
+  CONNECT_AUTO_PAIR_LIST_TIMEOUT_S,
+  CONNECT_AUTO_PAIR_MAX_APPROVALS,
+  CONNECT_AUTO_PAIR_TIMEOUT_MS,
+} from "./connect-autopair-budget";
 import { preflightVllmModelEnvOrExit } from "./connect-vllm-preflight";
 import {
   isDockerRuntimeDown,
@@ -206,6 +212,10 @@ function runSandboxConnectProbe(sandboxName: string): void {
   }
   if (processCheck.wasRunning) {
     ensureSandboxInferenceRoute(sandboxName, { quiet: true });
+    // Defense-in-depth scope-upgrade approval on the probe-only / `recover`
+    // path (#4504): the gateway is up, so deterministically clear any pending
+    // allowlisted CLI/webchat scope upgrade. Best-effort; never throws.
+    runConnectAutoPairApprovalPass(sandboxName);
     if (processCheck.forwardRecovered) {
       console.log(
         `  Probe complete: ${agentName} gateway is running in '${sandboxName}'; restored dashboard port forward.`,
@@ -217,6 +227,8 @@ function runSandboxConnectProbe(sandboxName: string): void {
   }
   if (processCheck.recovered) {
     ensureSandboxInferenceRoute(sandboxName, { quiet: true });
+    // Same defense-in-depth approval after a recovery (#4504); best-effort.
+    runConnectAutoPairApprovalPass(sandboxName);
     console.log(`  Probe complete: recovered ${agentName} gateway in '${sandboxName}'.`);
     return;
   }
@@ -773,11 +785,17 @@ function ensureSandboxInferenceRouteOrExit(
 //
 // Failure modes (timeout, sandbox-exec errors, missing openclaw, gateway
 // unreachable) are swallowed: the connect flow must not be blocked by a
-// best-effort approval. Internal timeouts (2s list + 1s x MAX_APPROVALS
-// attempts) fit within the outer spawnSync cap, so a partial-completion
-// mid-loop kill cannot strand allowlisted requests within a normal batch.
-const CONNECT_AUTO_PAIR_MAX_APPROVALS = 8;
-const CONNECT_AUTO_PAIR_TIMEOUT_MS = 12_000;
+// best-effort approval. The approve timeout matches the in-sandbox watcher's
+// RUN_TIMEOUT_SECS = 10 (nemoclaw-start.sh) so a scope-upgrade approve via the
+// local pairing fallback has the same budget. MAX_APPROVALS is 1 — the
+// realistic case is a single pending CLI/webchat scope upgrade. The outer
+// spawnSync cap must exceed the internal worst case (2s list + 10s × 1 = 12s)
+// PLUS shell/python startup, since the outer timer starts at `sh` spawn before
+// the proxy env is sourced and python3 launches; 15s leaves ~3s slack so a
+// legitimate slow 10s approve is never SIGKILLed mid-loop, which would strand
+// the allowlisted request (ref #4504). Budget constants live in the
+// dependency-free ./connect-autopair-budget leaf so tests can assert the
+// invariant on the real values without importing this heavy module (#4504).
 const CONNECT_AUTO_PAIR_POLICY_PATH = path.join(
   ROOT,
   "scripts",
@@ -796,7 +814,7 @@ function readConnectAutoPairPolicyModule(): string | null {
   }
 }
 
-function runConnectAutoPairApprovalPass(sandboxName: string): void {
+export function runConnectAutoPairApprovalPass(sandboxName: string): void {
   const approvalPolicyModule = readConnectAutoPairPolicyModule();
   if (!approvalPolicyModule) {
     return;
@@ -831,7 +849,7 @@ MAX_APPROVALS = ${CONNECT_AUTO_PAIR_MAX_APPROVALS}
 try:
     proc = subprocess.run(
         [OPENCLAW, 'devices', 'list', '--json'],
-        capture_output=True, text=True, timeout=2,
+        capture_output=True, text=True, timeout=${CONNECT_AUTO_PAIR_LIST_TIMEOUT_S},
     )
 except (subprocess.TimeoutExpired, FileNotFoundError, OSError):
     sys.exit(0)
@@ -866,7 +884,7 @@ for device in pending:
     try:
         approve_proc = subprocess.run(
             [OPENCLAW, 'devices', 'approve', request_id, '--json'],
-            capture_output=True, text=True, timeout=1, env=approve_env,
+            capture_output=True, text=True, timeout=${CONNECT_AUTO_PAIR_APPROVE_TIMEOUT_S}, env=approve_env,
         )
         if approve_proc.returncode == 0:
             approved_count += 1
@@ -877,8 +895,9 @@ exit 0
 `;
   try {
     // Best-effort: discard stdout/stderr. Outer cap is sized to cover the
-    // internal budget (2s list + 1s × MAX_APPROVALS plus shell/python
-    // startup slack) so a wedged sandbox can never block the connect flow.
+    // internal budget (CONNECT_AUTO_PAIR_LIST_TIMEOUT_S list +
+    // CONNECT_AUTO_PAIR_APPROVE_TIMEOUT_S × CONNECT_AUTO_PAIR_MAX_APPROVALS plus
+    // shell/python startup slack) so a wedged sandbox can never block connect.
     spawnSync(
       getOpenshellBinary(),
       ["sandbox", "exec", "--name", sandboxName, "--", "sh", "-c", script],
diff --git a/src/lib/onboard.ts b/src/lib/onboard.ts
index b31d0f12f2..04deb5fa00 100644
--- a/src/lib/onboard.ts
+++ b/src/lib/onboard.ts
@@ -503,6 +503,7 @@ import {
   type MessagingChannelConfig,
   readMessagingChannelConfigFromEnv,
 } from "./messaging-channel-config";
+import { finalizationHandlerDeps } from "./onboard/finalization-deps";
 import { streamGatewayStart } from "./onboard/gateway";
 import {
   mergeRequiredHermesToolGatewayPolicyPresets,
@@ -6588,11 +6589,7 @@ async function onboard(opts: OnboardOptions = {}): Promise<void> {
         toSessionUpdates: (updates) => toSessionUpdates(updates as Parameters<typeof toSessionUpdates>[0]),
         removeLegacyCredentialsFile,
         cleanupStaleHostFiles,
-        checkAndRecoverSandboxProcesses: (name, options) => {
-          const processRecovery: typeof import("./actions/sandbox/process-recovery") =
-            require("./actions/sandbox/process-recovery");
-          processRecovery.checkAndRecoverSandboxProcesses(name, options);
-        },
+        ...finalizationHandlerDeps,
         getChatUiUrl: () => process.env.CHAT_UI_URL || `http://127.0.0.1:${DASHBOARD_PORT}`,
         buildVerifyChain: (chatUiUrl) =>
           // biome-ignore format: keep src/lib/onboard.ts net-neutral for growth guardrail.
diff --git a/src/lib/onboard/finalization-deps.ts b/src/lib/onboard/finalization-deps.ts
new file mode 100644
index 0000000000..b9e63a105a
--- /dev/null
+++ b/src/lib/onboard/finalization-deps.ts
@@ -0,0 +1,23 @@
+// SPDX-FileCopyrightText: Copyright (c) 2026 NVIDIA CORPORATION & AFFILIATES. All rights reserved.
+// SPDX-License-Identifier: Apache-2.0
+
+// Lazy-require runtime dependencies for the onboarding finalization handler.
+// Kept in a focused module under src/lib/onboard/ so the top-level onboard
+// entrypoint stays lean (codebase-growth-guardrails). The lazy `require` calls
+// avoid an import cycle: connect.ts and process-recovery.ts both pull in
+// onboard helpers, so they must not be statically imported here.
+export const finalizationHandlerDeps = {
+  checkAndRecoverSandboxProcesses(name: string, options: { quiet: boolean }): void {
+    const processRecovery: typeof import("../actions/sandbox/process-recovery") =
+      require("../actions/sandbox/process-recovery");
+    processRecovery.checkAndRecoverSandboxProcesses(name, options);
+  },
+  // Best-effort device-approval sweep that clears pending allowlisted
+  // CLI/webchat scope upgrades so onboard hands off without a stuck pairing
+  // request (#4504). Never throws.
+  autoPairScopeApproval(name: string): void {
+    const connect: typeof import("../actions/sandbox/connect") =
+      require("../actions/sandbox/connect");
+    connect.runConnectAutoPairApprovalPass(name);
+  },
+};
diff --git a/src/lib/onboard/machine/handlers/finalization.test.ts b/src/lib/onboard/machine/handlers/finalization.test.ts
index b6cfec8202..438ae3f3b9 100644
--- a/src/lib/onboard/machine/handlers/finalization.test.ts
+++ b/src/lib/onboard/machine/handlers/finalization.test.ts
@@ -18,6 +18,7 @@ function createDeps(overrides: Partial<FinalizationStateOptions<Agent, VerifyCha
     removeLegacy: vi.fn(),
     cleanupHost: vi.fn(),
     recoverProcesses: vi.fn(),
+    autoPairScopeApproval: vi.fn(),
     getChatUiUrl: vi.fn(() => "http://127.0.0.1:18789"),
     buildChain: vi.fn(() => ({ port: 18789 })),
     verify: vi.fn(async () => ({ ok: true })),
@@ -37,6 +38,7 @@ function createDeps(overrides: Partial<FinalizationStateOptions<Agent, VerifyCha
       removeLegacyCredentialsFile: calls.removeLegacy,
       cleanupStaleHostFiles: calls.cleanupHost,
       checkAndRecoverSandboxProcesses: calls.recoverProcesses,
+      autoPairScopeApproval: calls.autoPairScopeApproval,
       getChatUiUrl: calls.getChatUiUrl,
       buildVerifyChain: calls.buildChain,
       verifyDeployment: calls.verify,
@@ -177,4 +179,54 @@ describe("handleFinalizationState", () => {
       callsOn.verify.mock.invocationCallOrder[0],
     );
   });
+
+  // Scenario A (#4504): the auto-pair scope-approval sweep runs against the
+  // freshly-recovered gateway — strictly after process recovery (which can
+  // restart the gateway, #3573) and strictly before deployment verification
+  // (so the gateway state is settled before we probe it).
+  it("runs the auto-pair scope-approval sweep after process recovery and before verify (#4504)", async () => {
+    const { deps, calls } = createDeps();
+
+    await handleFinalizationState(baseOptions(deps));
+
+    expect(calls.autoPairScopeApproval).toHaveBeenCalledOnce();
+    expect(calls.autoPairScopeApproval).toHaveBeenCalledWith("my-assistant");
+    // Ordering: recover → autoPairScopeApproval → verify.
+    expect(calls.autoPairScopeApproval.mock.invocationCallOrder[0]).toBeGreaterThan(
+      calls.recoverProcesses.mock.invocationCallOrder[0],
+    );
+    expect(calls.autoPairScopeApproval.mock.invocationCallOrder[0]).toBeLessThan(
+      calls.verify.mock.invocationCallOrder[0],
+    );
+  });
+
+  // Scenario B (#4504): the sweep is agent-agnostic — the stuck CLI/webchat
+  // scope upgrade can occur regardless of which agent the sandbox runs.
+  it("runs the scope-approval sweep regardless of agent type (#4504)", async () => {
+    const { deps, calls } = createDeps();
+    const agent = { name: "hermes" };
+
+    await handleFinalizationState({ ...baseOptions(deps), agent });
+
+    expect(calls.autoPairScopeApproval).toHaveBeenCalledWith("my-assistant");
+  });
+
+  // Scenario C (#4504): the dep is documented as best-effort / never-throws and
+  // the handler wraps no try/catch around it. Per the contract we assert the
+  // implemented behavior: the sweep is invoked and, because it returns cleanly,
+  // finalization proceeds to completion (recordSessionComplete still runs). A
+  // dep that threw would abort finalization here — the regression this guards.
+  it("treats the scope-approval sweep as best-effort and still completes the session (#4504)", async () => {
+    const { deps, calls } = createDeps();
+
+    const result = await handleFinalizationState(baseOptions(deps));
+
+    expect(calls.autoPairScopeApproval).toHaveBeenCalledOnce();
+    // The non-throwing sweep does not abort finalization: it proceeds through
+    // verification and the dashboard print to a completed result. (#4472 moved
+    // session completion to the imported completeOnboardMachine, so completion
+    // is asserted via the downstream dashboard + diagnostics rather than a dep.)
+    expect(calls.dashboard).toHaveBeenCalledOnce();
+    expect(result.verificationDiagnostics).toEqual(["  ✓ verified"]);
+  });
 });
diff --git a/src/lib/onboard/machine/handlers/finalization.ts b/src/lib/onboard/machine/handlers/finalization.ts
index a89d0920c9..d0ccf0750f 100644
--- a/src/lib/onboard/machine/handlers/finalization.ts
+++ b/src/lib/onboard/machine/handlers/finalization.ts
@@ -28,6 +28,14 @@ export interface FinalizationStateOptions<Agent, VerifyChain, VerificationResult
     removeLegacyCredentialsFile(): void;
     cleanupStaleHostFiles(): void;
     checkAndRecoverSandboxProcesses(sandboxName: string, options: { quiet: boolean }): void;
+    /**
+     * Best-effort device-approval sweep that clears pending allowlisted
+     * CLI/webchat scope upgrades before handoff. Never throws; swallows its own
+     * failures (timeout, sandbox-exec errors). Run after process recovery
+     * because that can restart the gateway (#3573), so the sweep targets the
+     * freshly-recovered gateway (ref #4504 / #4263).
+     */
+    autoPairScopeApproval(sandboxName: string): void;
     getChatUiUrl(): string;
     buildVerifyChain(chatUiUrl: string): VerifyChain;
     verifyDeployment(sandboxName: string, chain: VerifyChain): Promise<VerificationResult>;
@@ -94,6 +102,10 @@ export async function handleFinalizationState<Agent, VerifyChain, VerificationRe
   deps.cleanupStaleHostFiles();
   // Policy application can restart the sandbox; recover OpenClaw before verification (#3573).
   deps.checkAndRecoverSandboxProcesses(sandboxName, { quiet: true });
+  // Clear any pending allowlisted scope upgrade against the freshly-recovered
+  // gateway before verification, so onboard hands off without a stuck pairing
+  // request (#4504 / #4263). Best-effort; never blocks.
+  deps.autoPairScopeApproval(sandboxName);
 
   // Probe Brave Search egress through the L7 proxy now that the final
   // policy and provider state are live — earlier probes would race the
diff --git a/test/sandbox-connect-inference/auto-pair-approval.test.ts b/test/sandbox-connect-inference/auto-pair-approval.test.ts
index 3e54cd04cd..7fd5aa5a2a 100644
--- a/test/sandbox-connect-inference/auto-pair-approval.test.ts
+++ b/test/sandbox-connect-inference/auto-pair-approval.test.ts
@@ -5,9 +5,25 @@ import fs from "node:fs";
 import path from "node:path";
 import { describe, expect, it } from "vitest";
 
+import {
+  CONNECT_AUTO_PAIR_APPROVE_TIMEOUT_S,
+  CONNECT_AUTO_PAIR_LIST_TIMEOUT_S,
+  CONNECT_AUTO_PAIR_MAX_APPROVALS,
+  CONNECT_AUTO_PAIR_TIMEOUT_MS,
+} from "../../src/lib/actions/sandbox/connect-autopair-budget";
 import { testTimeoutOptions } from "../helpers/timeouts";
 import { extractApprovalPassScript, runApprovalPassScript, runConnect, setupFixture } from "./helpers";
 
+function findApprovalExec(sandboxExecCalls: string[][]): string[] | undefined {
+  return sandboxExecCalls.find(
+    (call) =>
+      call.includes("--") &&
+      call.some((segment) => segment.includes("openclaw")) &&
+      call.some((segment) => segment.includes("devices")) &&
+      call.some((segment) => segment.includes("approve")),
+  );
+}
+
 describe("sandbox connect auto-pair approval pass (#4263)", () => {
   it(
     "runs a bounded openclaw devices approval pass before opening SSH",
@@ -73,13 +89,13 @@ describe("sandbox connect auto-pair approval pass (#4263)", () => {
       const result = runConnect(tmpDir, sandboxName);
       expect(result.status).toBe(0);
       const script = extractApprovalPassScript(stateFile, sandboxName);
+      // Disallowed/malformed/unknown requests are skipped by the policy before
+      // an approve is even attempted (they `continue` before the attempt
+      // counter increments), so they do not consume the MAX_APPROVALS=1 budget
+      // (#4504). They are ordered first here to prove the rejection path runs;
+      // the single allowed request (`ok-cli`) is then approved and exhausts the
+      // one-attempt budget, so the trailing duplicate `ok-cli` is never reached.
       const run = runApprovalPassScript(script, [
-        {
-          requestId: "ok-cli",
-          clientId: "openclaw-cli",
-          clientMode: "cli",
-          scopes: ["operator.read", "operator.write"],
-        },
         {
           requestId: "admin-cli",
           clientId: "openclaw-cli",
@@ -99,22 +115,24 @@ describe("sandbox connect auto-pair approval pass (#4263)", () => {
           scopes: ["operator.read"],
         },
         {
-          requestId: "dedupe-cli",
+          requestId: "ok-cli",
           clientId: "openclaw-cli",
           clientMode: "cli",
-          requestedScopes: ["operator.read"],
+          scopes: ["operator.read", "operator.write"],
         },
         {
-          requestId: "dedupe-cli",
+          requestId: "ok-cli",
           clientId: "openclaw-cli",
           clientMode: "cli",
-          requestedScopes: ["operator.read"],
+          scopes: ["operator.read", "operator.write"],
         },
       ]);
 
       expect(run.result.status).toBe(0);
-      expect(run.approvals).toEqual(["ok-cli", "dedupe-cli"]);
-      expect(run.approvalEnv).toEqual(["unset:unset:unset", "unset:unset:unset"]);
+      // Only the first allowed request is approved — MAX_APPROVALS is 1 (#4504),
+      // the realistic single pending CLI/webchat scope upgrade.
+      expect(run.approvals).toEqual(["ok-cli"]);
+      expect(run.approvalEnv).toEqual(["unset:unset:unset"]);
     },
   );
 
@@ -214,3 +232,204 @@ describe("sandbox connect auto-pair approval pass (#4263)", () => {
     },
   );
 });
+
+// The #4504 fix also wires the approval pass into the `nemoclaw recover` /
+// `connect --probe-only` path (defect A) — the gateway-up branches only, never
+// the gateway-down failure exit — and re-uses the shared policy's
+// gateway_approval_env so a scope-upgrade approve drops the full gateway env
+// triplet (#4462) on the watcher's 10s budget while staying within the outer
+// spawnSync cap (defect B). The interactive-connect cases above cover the
+// allowlist and best-effort semantics; these add the probe-path wiring,
+// gateway-down negative, and the budget invariant on the real constants.
+describe("sandbox connect scope-upgrade approval on recover/probe (#4504)", () => {
+  it(
+    "runs the approval pass on the --probe-only (recover) path",
+    testTimeoutOptions(20_000),
+    () => {
+      // The probe takes the wasRunning branch (the fake openshell health probe
+      // reports RUNNING), so the recover path must run the sweep — and must NOT
+      // open an SSH session.
+      const { tmpDir, stateFile, sandboxName } = setupFixture(
+        {
+          name: "probe-approval-sandbox",
+          model: "claude-sonnet-4-20250514",
+          provider: "anthropic-prod",
+          gpuEnabled: false,
+          policies: [],
+        },
+        "anthropic-prod",
+        "claude-sonnet-4-20250514",
+      );
+
+      const result = runConnect(tmpDir, sandboxName, {}, ["--probe-only"]);
+      expect(result.status).toBe(0);
+
+      const state = JSON.parse(fs.readFileSync(stateFile, "utf-8"));
+      const approvalExec = findApprovalExec(state.sandboxExecCalls as string[][]);
+      expect(approvalExec).toBeDefined();
+      expect(approvalExec).toContain("sandbox");
+      expect(approvalExec).toContain("exec");
+      expect(approvalExec).toContain("--name");
+      expect(approvalExec).toContain(sandboxName);
+      // probe-only never opens an SSH connect session.
+      expect(state.sandboxConnectCalls).toEqual([]);
+    },
+  );
+
+  it(
+    "does not fail the recover path when the probe approval pass errors",
+    testTimeoutOptions(20_000),
+    () => {
+      // Best-effort: even when the in-sandbox approval exec exits non-zero, the
+      // probe-only flow must still succeed.
+      const { tmpDir, stateFile, sandboxName } = setupFixture(
+        {
+          name: "probe-approval-tolerant",
+          model: "claude-sonnet-4-20250514",
+          provider: "anthropic-prod",
+          gpuEnabled: false,
+          policies: [],
+        },
+        "anthropic-prod",
+        "claude-sonnet-4-20250514",
+      );
+
+      const result = runConnect(
+        tmpDir,
+        sandboxName,
+        { NEMOCLAW_TEST_FAIL_APPROVAL_PASS: "1" },
+        ["--probe-only"],
+      );
+      expect(result.status).toBe(0);
+
+      const state = JSON.parse(fs.readFileSync(stateFile, "utf-8"));
+      const approvalExec = findApprovalExec(state.sandboxExecCalls as string[][]);
+      expect(approvalExec).toBeDefined();
+    },
+  );
+
+  it(
+    "does not run the approval pass when the probe fails (gateway down, recovery fails)",
+    testTimeoutOptions(20_000),
+    () => {
+      // The sweep is wired only into the wasRunning and recovered success
+      // branches — never the not-running failure exit, where the gateway is
+      // down. Force the health probe to report STOPPED and let recovery fail so
+      // the probe lands on the failure branch; the approval pass must NOT run.
+      const { tmpDir, stateFile, sandboxName } = setupFixture(
+        {
+          name: "probe-gateway-down",
+          model: "claude-sonnet-4-20250514",
+          provider: "anthropic-prod",
+          gpuEnabled: false,
+          policies: [],
+        },
+        "anthropic-prod",
+        "claude-sonnet-4-20250514",
+      );
+
+      const result = runConnect(
+        tmpDir,
+        sandboxName,
+        { NEMOCLAW_TEST_GATEWAY_DOWN: "1" },
+        ["--probe-only"],
+      );
+      expect(result.status).toBe(1);
+
+      const state = JSON.parse(fs.readFileSync(stateFile, "utf-8"));
+      const approvalExec = findApprovalExec(state.sandboxExecCalls as string[][]);
+      expect(approvalExec).toBeUndefined();
+      // And it never opens an SSH session on the failure path.
+      expect(state.sandboxConnectCalls).toEqual([]);
+    },
+  );
+
+  it(
+    "approve child strips the full gateway env triplet on the probe path (#4462)",
+    testTimeoutOptions(20_000),
+    () => {
+      // The probe-path approve must drop OPENCLAW_GATEWAY_URL/_PORT/_TOKEN via
+      // the shared policy's gateway_approval_env so the local pairing fallback
+      // cannot re-pin to the gateway and hit the #4462 self-defeat. Render the
+      // probe-path script, then actually run it and assert the approve child
+      // saw none of the triplet.
+      const { tmpDir, stateFile, sandboxName } = setupFixture(
+        {
+          name: "probe-env-strip-sandbox",
+          model: "claude-sonnet-4-20250514",
+          provider: "anthropic-prod",
+          gpuEnabled: false,
+          policies: [],
+        },
+        "anthropic-prod",
+        "claude-sonnet-4-20250514",
+      );
+
+      const result = runConnect(tmpDir, sandboxName, {}, ["--probe-only"]);
+      expect(result.status).toBe(0);
+
+      const script = extractApprovalPassScript(stateFile, sandboxName);
+      expect(script).toContain("approve_env = gateway_approval_env(os.environ)");
+      expect(script).toContain("env=approve_env");
+
+      const run = runApprovalPassScript(script, [
+        {
+          requestId: "probe-cli",
+          clientId: "openclaw-cli",
+          clientMode: "cli",
+          scopes: ["operator.read", "operator.write"],
+        },
+      ]);
+      expect(run.result.status).toBe(0);
+      expect(run.approvals).toEqual(["probe-cli"]);
+      // The approve child saw none of the gateway env triplet (#4462).
+      expect(run.approvalEnv).toEqual(["unset:unset:unset"]);
+    },
+  );
+
+  it(
+    "approve timeout matches the watcher (10s), list keeps 2s, and stays within the outer cap",
+    testTimeoutOptions(20_000),
+    () => {
+      const { tmpDir, stateFile, sandboxName } = setupFixture(
+        {
+          name: "approve-budget-sandbox",
+          model: "claude-sonnet-4-20250514",
+          provider: "anthropic-prod",
+          gpuEnabled: false,
+          policies: [],
+        },
+        "anthropic-prod",
+        "claude-sonnet-4-20250514",
+      );
+
+      const result = runConnect(tmpDir, sandboxName, {}, ["--probe-only"]);
+      expect(result.status).toBe(0);
+
+      const script = extractApprovalPassScript(stateFile, sandboxName);
+      // The rendered script interpolates the exported budget constants, tying
+      // runtime behaviour to the values the invariant below asserts on (no
+      // source-text scraping — numbers come from the imported constants).
+      expect(script).toContain("[OPENCLAW, 'devices', 'list', '--json']");
+      expect(script).toContain(`timeout=${CONNECT_AUTO_PAIR_LIST_TIMEOUT_S},`);
+      expect(script).toContain(`timeout=${CONNECT_AUTO_PAIR_APPROVE_TIMEOUT_S},`);
+      expect(script).toContain(`MAX_APPROVALS = ${CONNECT_AUTO_PAIR_MAX_APPROVALS}`);
+
+      // Approve budget matches the in-sandbox watcher RUN_TIMEOUT_SECS = 10;
+      // list budget is 2s.
+      expect(CONNECT_AUTO_PAIR_APPROVE_TIMEOUT_S).toBe(10);
+      expect(CONNECT_AUTO_PAIR_LIST_TIMEOUT_S).toBe(2);
+
+      // Budget invariant: the inner worst case (list + approve × MAX_APPROVALS)
+      // must stay STRICTLY below the outer spawnSync cap. The outer timer starts
+      // when `sh` is spawned — before shell startup, sourcing the proxy env, the
+      // python3 launch, and `devices list` even begin — so the cap must leave
+      // slack above the inner budget, or a legitimate slow 10s approve is killed
+      // mid-loop and the allowlisted request is stranded (#4504).
+      const innerBudgetSeconds =
+        CONNECT_AUTO_PAIR_LIST_TIMEOUT_S +
+        CONNECT_AUTO_PAIR_APPROVE_TIMEOUT_S * CONNECT_AUTO_PAIR_MAX_APPROVALS;
+      expect(innerBudgetSeconds).toBeLessThan(CONNECT_AUTO_PAIR_TIMEOUT_MS / 1000);
+    },
+  );
+});
diff --git a/test/sandbox-connect-inference/helpers.ts b/test/sandbox-connect-inference/helpers.ts
index 2e4c4a5dff..6140419769 100644
--- a/test/sandbox-connect-inference/helpers.ts
+++ b/test/sandbox-connect-inference/helpers.ts
@@ -160,6 +160,17 @@ if (args[0] === "sandbox" && args[1] === "exec") {
       process.stderr.write("simulated sandbox exec failure\\n");
       process.exit(7);
     }
+    // Test hook (#4504): force the in-sandbox gateway health probe to report
+    // STOPPED so the probe path takes the not-running branch and (when recovery
+    // also fails) the probe-failure exit — where the approval sweep must NOT run.
+    if (
+      process.env.NEMOCLAW_TEST_GATEWAY_DOWN === "1" &&
+      command.includes("/health") &&
+      command.includes("HTTP_CODE")
+    ) {
+      process.stdout.write("__NEMOCLAW_SANDBOX_EXEC_STARTED__\\nSTOPPED\\n");
+      process.exit(0);
+    }
     process.stdout.write("__NEMOCLAW_SANDBOX_EXEC_STARTED__\\nRUNNING\\n");
     process.exit(0);
   }

From 6ab4c05f95b9926f868b878bab4f724dc254a2a2 Mon Sep 17 00:00:00 2001
From: Tony Luo <xialuo@nvidia.com>
Date: Mon, 8 Jun 2026 10:44:24 +0800
Subject: [PATCH 2/2] docs(connect): clarify approve env strips full gateway
 triplet
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

The auto-pair approval-pass comment still described the old behavior
(removing OPENCLAW_GATEWAY_URL only). Update it to reflect the shared
gateway_approval_env() policy helper, which strips URL + PORT + TOKEN —
the full triplet that avoids the #4462 self-defeat. Comment-only.

Signed-off-by: Tony Luo <xialuo@nvidia.com>
Co-Authored-By: Claude Opus 4.8 (1M context) <noreply@anthropic.com>
---
 src/lib/actions/sandbox/connect.ts | 6 ++++--
 1 file changed, 4 insertions(+), 2 deletions(-)

diff --git a/src/lib/actions/sandbox/connect.ts b/src/lib/actions/sandbox/connect.ts
index 4c2d3443cb..1c0ac0f7b7 100644
--- a/src/lib/actions/sandbox/connect.ts
+++ b/src/lib/actions/sandbox/connect.ts
@@ -771,8 +771,10 @@ function ensureSandboxInferenceRouteOrExit(
 // when the user runs `nemoclaw <sandbox> connect`. The script sources
 // `/tmp/nemoclaw-proxy-env.sh` (written by `nemoclaw-start.sh`) so the
 // in-sandbox `openclaw devices list` invocation targets the running gateway
-// with its token. Approvals then use OpenClaw's local fallback by removing
-// OPENCLAW_GATEWAY_URL only from the child env, and apply the same allowlist
+// with its token. Approvals then use OpenClaw's local fallback via the shared
+// `gateway_approval_env()` policy helper, which strips OPENCLAW_GATEWAY_URL,
+// OPENCLAW_GATEWAY_PORT, and OPENCLAW_GATEWAY_TOKEN from the child env (the full
+// triplet needed to avoid the #4462 self-defeat), and apply the same allowlist
 // as the startup watcher — `openclaw-control-ui` clients plus `webchat`/`cli`
 // modes. Unknown clients are ignored, not approved.
 //