From 0c25fc185804792d84678285fd44420d9e16f881 Mon Sep 17 00:00:00 2001 From: Tony Luo Date: Thu, 4 Jun 2026 17:18:57 +0800 Subject: [PATCH 1/2] fix(onboard): pre-approve gateway scope upgrades after onboard and recover MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit After a fresh onboard, the first `openclaw agent` run emitted "scope upgrade pending approval" and silently fell back to embedded mode, and the same pending requestId persisted across every subsequent run — the approval never auto-resolved. Root cause was a divergence in the host one-shot auto-pair approval pass: it ran `openclaw devices approve` with a 1s timeout and stripped only OPENCLAW_GATEWAY_URL from the child env. Leaving PORT and TOKEN set re-pinned the approve to the gateway, hitting the #4462 self-defeat where a gateway-pinned scope-upgrade approve requests the very scopes it is trying to grant and fails — so the request was never cleared. - Match the in-sandbox watcher: approve timeout 1s -> 10s, strip OPENCLAW_GATEWAY_URL + PORT + TOKEN. MAX_APPROVALS 8 -> 1 (single realistic pending upgrade) and outer cap 12s -> 15s so a slow 10s approve is never SIGKILLed mid-loop (2 + 10x1 = 12s < 15s). - Wire the now-correct pass into onboard finalization (after process recovery, before verification) and the `nemoclaw recover` / probe-only path (gateway-up branches only, not the failure exit), so `nemoclaw recover` deterministically clears a pending scope upgrade. The CLI scope-upgrade request is only created by the user's first `openclaw agent` run, so that first run can still fall back once; the deterministic remedy is `nemoclaw recover`. Closes #4504 Signed-off-by: Tony Luo Co-Authored-By: Claude Opus 4.8 (1M context) --- .../sandbox/connect-autopair-budget.ts | 22 ++ src/lib/actions/sandbox/connect.ts | 39 ++- src/lib/onboard.ts | 7 +- src/lib/onboard/finalization-deps.ts | 23 ++ .../machine/handlers/finalization.test.ts | 52 ++++ .../onboard/machine/handlers/finalization.ts | 12 + .../auto-pair-approval.test.ts | 243 +++++++++++++++++- test/sandbox-connect-inference/helpers.ts | 11 + 8 files changed, 382 insertions(+), 27 deletions(-) create mode 100644 src/lib/actions/sandbox/connect-autopair-budget.ts create mode 100644 src/lib/onboard/finalization-deps.ts diff --git a/src/lib/actions/sandbox/connect-autopair-budget.ts b/src/lib/actions/sandbox/connect-autopair-budget.ts new file mode 100644 index 0000000000..0f4af25364 --- /dev/null +++ b/src/lib/actions/sandbox/connect-autopair-budget.ts @@ -0,0 +1,22 @@ +// SPDX-FileCopyrightText: Copyright (c) 2026 NVIDIA CORPORATION & AFFILIATES. All rights reserved. +// SPDX-License-Identifier: Apache-2.0 + +// Budget constants for the connect-time auto-pair scope-approval pass +// (runConnectAutoPairApprovalPass in ./connect). Kept in a dependency-free leaf +// module so tests can import and assert the invariant on the real values +// without pulling in connect.ts's heavy transitive requires (#4504). + +export const CONNECT_AUTO_PAIR_MAX_APPROVALS = 1; +// `openclaw devices list` budget (seconds), interpolated into the in-sandbox +// script so the invariant below is asserted on real values, not source text. +export const CONNECT_AUTO_PAIR_LIST_TIMEOUT_S = 2; +// `openclaw devices approve` budget (seconds); matches the in-sandbox watcher's +// RUN_TIMEOUT_SECS = 10 (nemoclaw-start.sh). +export const CONNECT_AUTO_PAIR_APPROVE_TIMEOUT_S = 10; +// Outer spawnSync cap (ms). Must exceed the internal worst case +// (CONNECT_AUTO_PAIR_LIST_TIMEOUT_S + CONNECT_AUTO_PAIR_APPROVE_TIMEOUT_S × +// CONNECT_AUTO_PAIR_MAX_APPROVALS) PLUS shell/python startup, since the outer +// timer starts at `sh` spawn before the proxy env is sourced and python3 +// launches; the ~3s slack means a legitimate slow approve is never SIGKILLed +// mid-loop, which would strand the allowlisted request. +export const CONNECT_AUTO_PAIR_TIMEOUT_MS = 15_000; diff --git a/src/lib/actions/sandbox/connect.ts b/src/lib/actions/sandbox/connect.ts index 5bfc97a3f2..4c2d3443cb 100644 --- a/src/lib/actions/sandbox/connect.ts +++ b/src/lib/actions/sandbox/connect.ts @@ -46,6 +46,12 @@ import { getActiveSandboxSessions, } from "../../state/sandbox-session"; import { runSetupDnsProxy } from "../dns"; +import { + CONNECT_AUTO_PAIR_APPROVE_TIMEOUT_S, + CONNECT_AUTO_PAIR_LIST_TIMEOUT_S, + CONNECT_AUTO_PAIR_MAX_APPROVALS, + CONNECT_AUTO_PAIR_TIMEOUT_MS, +} from "./connect-autopair-budget"; import { preflightVllmModelEnvOrExit } from "./connect-vllm-preflight"; import { isDockerRuntimeDown, @@ -206,6 +212,10 @@ function runSandboxConnectProbe(sandboxName: string): void { } if (processCheck.wasRunning) { ensureSandboxInferenceRoute(sandboxName, { quiet: true }); + // Defense-in-depth scope-upgrade approval on the probe-only / `recover` + // path (#4504): the gateway is up, so deterministically clear any pending + // allowlisted CLI/webchat scope upgrade. Best-effort; never throws. + runConnectAutoPairApprovalPass(sandboxName); if (processCheck.forwardRecovered) { console.log( ` Probe complete: ${agentName} gateway is running in '${sandboxName}'; restored dashboard port forward.`, @@ -217,6 +227,8 @@ function runSandboxConnectProbe(sandboxName: string): void { } if (processCheck.recovered) { ensureSandboxInferenceRoute(sandboxName, { quiet: true }); + // Same defense-in-depth approval after a recovery (#4504); best-effort. + runConnectAutoPairApprovalPass(sandboxName); console.log(` Probe complete: recovered ${agentName} gateway in '${sandboxName}'.`); return; } @@ -773,11 +785,17 @@ function ensureSandboxInferenceRouteOrExit( // // Failure modes (timeout, sandbox-exec errors, missing openclaw, gateway // unreachable) are swallowed: the connect flow must not be blocked by a -// best-effort approval. Internal timeouts (2s list + 1s x MAX_APPROVALS -// attempts) fit within the outer spawnSync cap, so a partial-completion -// mid-loop kill cannot strand allowlisted requests within a normal batch. -const CONNECT_AUTO_PAIR_MAX_APPROVALS = 8; -const CONNECT_AUTO_PAIR_TIMEOUT_MS = 12_000; +// best-effort approval. The approve timeout matches the in-sandbox watcher's +// RUN_TIMEOUT_SECS = 10 (nemoclaw-start.sh) so a scope-upgrade approve via the +// local pairing fallback has the same budget. MAX_APPROVALS is 1 — the +// realistic case is a single pending CLI/webchat scope upgrade. The outer +// spawnSync cap must exceed the internal worst case (2s list + 10s × 1 = 12s) +// PLUS shell/python startup, since the outer timer starts at `sh` spawn before +// the proxy env is sourced and python3 launches; 15s leaves ~3s slack so a +// legitimate slow 10s approve is never SIGKILLed mid-loop, which would strand +// the allowlisted request (ref #4504). Budget constants live in the +// dependency-free ./connect-autopair-budget leaf so tests can assert the +// invariant on the real values without importing this heavy module (#4504). const CONNECT_AUTO_PAIR_POLICY_PATH = path.join( ROOT, "scripts", @@ -796,7 +814,7 @@ function readConnectAutoPairPolicyModule(): string | null { } } -function runConnectAutoPairApprovalPass(sandboxName: string): void { +export function runConnectAutoPairApprovalPass(sandboxName: string): void { const approvalPolicyModule = readConnectAutoPairPolicyModule(); if (!approvalPolicyModule) { return; @@ -831,7 +849,7 @@ MAX_APPROVALS = ${CONNECT_AUTO_PAIR_MAX_APPROVALS} try: proc = subprocess.run( [OPENCLAW, 'devices', 'list', '--json'], - capture_output=True, text=True, timeout=2, + capture_output=True, text=True, timeout=${CONNECT_AUTO_PAIR_LIST_TIMEOUT_S}, ) except (subprocess.TimeoutExpired, FileNotFoundError, OSError): sys.exit(0) @@ -866,7 +884,7 @@ for device in pending: try: approve_proc = subprocess.run( [OPENCLAW, 'devices', 'approve', request_id, '--json'], - capture_output=True, text=True, timeout=1, env=approve_env, + capture_output=True, text=True, timeout=${CONNECT_AUTO_PAIR_APPROVE_TIMEOUT_S}, env=approve_env, ) if approve_proc.returncode == 0: approved_count += 1 @@ -877,8 +895,9 @@ exit 0 `; try { // Best-effort: discard stdout/stderr. Outer cap is sized to cover the - // internal budget (2s list + 1s × MAX_APPROVALS plus shell/python - // startup slack) so a wedged sandbox can never block the connect flow. + // internal budget (CONNECT_AUTO_PAIR_LIST_TIMEOUT_S list + + // CONNECT_AUTO_PAIR_APPROVE_TIMEOUT_S × CONNECT_AUTO_PAIR_MAX_APPROVALS plus + // shell/python startup slack) so a wedged sandbox can never block connect. spawnSync( getOpenshellBinary(), ["sandbox", "exec", "--name", sandboxName, "--", "sh", "-c", script], diff --git a/src/lib/onboard.ts b/src/lib/onboard.ts index b31d0f12f2..04deb5fa00 100644 --- a/src/lib/onboard.ts +++ b/src/lib/onboard.ts @@ -503,6 +503,7 @@ import { type MessagingChannelConfig, readMessagingChannelConfigFromEnv, } from "./messaging-channel-config"; +import { finalizationHandlerDeps } from "./onboard/finalization-deps"; import { streamGatewayStart } from "./onboard/gateway"; import { mergeRequiredHermesToolGatewayPolicyPresets, @@ -6588,11 +6589,7 @@ async function onboard(opts: OnboardOptions = {}): Promise { toSessionUpdates: (updates) => toSessionUpdates(updates as Parameters[0]), removeLegacyCredentialsFile, cleanupStaleHostFiles, - checkAndRecoverSandboxProcesses: (name, options) => { - const processRecovery: typeof import("./actions/sandbox/process-recovery") = - require("./actions/sandbox/process-recovery"); - processRecovery.checkAndRecoverSandboxProcesses(name, options); - }, + ...finalizationHandlerDeps, getChatUiUrl: () => process.env.CHAT_UI_URL || `http://127.0.0.1:${DASHBOARD_PORT}`, buildVerifyChain: (chatUiUrl) => // biome-ignore format: keep src/lib/onboard.ts net-neutral for growth guardrail. diff --git a/src/lib/onboard/finalization-deps.ts b/src/lib/onboard/finalization-deps.ts new file mode 100644 index 0000000000..b9e63a105a --- /dev/null +++ b/src/lib/onboard/finalization-deps.ts @@ -0,0 +1,23 @@ +// SPDX-FileCopyrightText: Copyright (c) 2026 NVIDIA CORPORATION & AFFILIATES. All rights reserved. +// SPDX-License-Identifier: Apache-2.0 + +// Lazy-require runtime dependencies for the onboarding finalization handler. +// Kept in a focused module under src/lib/onboard/ so the top-level onboard +// entrypoint stays lean (codebase-growth-guardrails). The lazy `require` calls +// avoid an import cycle: connect.ts and process-recovery.ts both pull in +// onboard helpers, so they must not be statically imported here. +export const finalizationHandlerDeps = { + checkAndRecoverSandboxProcesses(name: string, options: { quiet: boolean }): void { + const processRecovery: typeof import("../actions/sandbox/process-recovery") = + require("../actions/sandbox/process-recovery"); + processRecovery.checkAndRecoverSandboxProcesses(name, options); + }, + // Best-effort device-approval sweep that clears pending allowlisted + // CLI/webchat scope upgrades so onboard hands off without a stuck pairing + // request (#4504). Never throws. + autoPairScopeApproval(name: string): void { + const connect: typeof import("../actions/sandbox/connect") = + require("../actions/sandbox/connect"); + connect.runConnectAutoPairApprovalPass(name); + }, +}; diff --git a/src/lib/onboard/machine/handlers/finalization.test.ts b/src/lib/onboard/machine/handlers/finalization.test.ts index b6cfec8202..438ae3f3b9 100644 --- a/src/lib/onboard/machine/handlers/finalization.test.ts +++ b/src/lib/onboard/machine/handlers/finalization.test.ts @@ -18,6 +18,7 @@ function createDeps(overrides: Partial "http://127.0.0.1:18789"), buildChain: vi.fn(() => ({ port: 18789 })), verify: vi.fn(async () => ({ ok: true })), @@ -37,6 +38,7 @@ function createDeps(overrides: Partial { callsOn.verify.mock.invocationCallOrder[0], ); }); + + // Scenario A (#4504): the auto-pair scope-approval sweep runs against the + // freshly-recovered gateway — strictly after process recovery (which can + // restart the gateway, #3573) and strictly before deployment verification + // (so the gateway state is settled before we probe it). + it("runs the auto-pair scope-approval sweep after process recovery and before verify (#4504)", async () => { + const { deps, calls } = createDeps(); + + await handleFinalizationState(baseOptions(deps)); + + expect(calls.autoPairScopeApproval).toHaveBeenCalledOnce(); + expect(calls.autoPairScopeApproval).toHaveBeenCalledWith("my-assistant"); + // Ordering: recover → autoPairScopeApproval → verify. + expect(calls.autoPairScopeApproval.mock.invocationCallOrder[0]).toBeGreaterThan( + calls.recoverProcesses.mock.invocationCallOrder[0], + ); + expect(calls.autoPairScopeApproval.mock.invocationCallOrder[0]).toBeLessThan( + calls.verify.mock.invocationCallOrder[0], + ); + }); + + // Scenario B (#4504): the sweep is agent-agnostic — the stuck CLI/webchat + // scope upgrade can occur regardless of which agent the sandbox runs. + it("runs the scope-approval sweep regardless of agent type (#4504)", async () => { + const { deps, calls } = createDeps(); + const agent = { name: "hermes" }; + + await handleFinalizationState({ ...baseOptions(deps), agent }); + + expect(calls.autoPairScopeApproval).toHaveBeenCalledWith("my-assistant"); + }); + + // Scenario C (#4504): the dep is documented as best-effort / never-throws and + // the handler wraps no try/catch around it. Per the contract we assert the + // implemented behavior: the sweep is invoked and, because it returns cleanly, + // finalization proceeds to completion (recordSessionComplete still runs). A + // dep that threw would abort finalization here — the regression this guards. + it("treats the scope-approval sweep as best-effort and still completes the session (#4504)", async () => { + const { deps, calls } = createDeps(); + + const result = await handleFinalizationState(baseOptions(deps)); + + expect(calls.autoPairScopeApproval).toHaveBeenCalledOnce(); + // The non-throwing sweep does not abort finalization: it proceeds through + // verification and the dashboard print to a completed result. (#4472 moved + // session completion to the imported completeOnboardMachine, so completion + // is asserted via the downstream dashboard + diagnostics rather than a dep.) + expect(calls.dashboard).toHaveBeenCalledOnce(); + expect(result.verificationDiagnostics).toEqual([" ✓ verified"]); + }); }); diff --git a/src/lib/onboard/machine/handlers/finalization.ts b/src/lib/onboard/machine/handlers/finalization.ts index a89d0920c9..d0ccf0750f 100644 --- a/src/lib/onboard/machine/handlers/finalization.ts +++ b/src/lib/onboard/machine/handlers/finalization.ts @@ -28,6 +28,14 @@ export interface FinalizationStateOptions; @@ -94,6 +102,10 @@ export async function handleFinalizationState + call.includes("--") && + call.some((segment) => segment.includes("openclaw")) && + call.some((segment) => segment.includes("devices")) && + call.some((segment) => segment.includes("approve")), + ); +} + describe("sandbox connect auto-pair approval pass (#4263)", () => { it( "runs a bounded openclaw devices approval pass before opening SSH", @@ -73,13 +89,13 @@ describe("sandbox connect auto-pair approval pass (#4263)", () => { const result = runConnect(tmpDir, sandboxName); expect(result.status).toBe(0); const script = extractApprovalPassScript(stateFile, sandboxName); + // Disallowed/malformed/unknown requests are skipped by the policy before + // an approve is even attempted (they `continue` before the attempt + // counter increments), so they do not consume the MAX_APPROVALS=1 budget + // (#4504). They are ordered first here to prove the rejection path runs; + // the single allowed request (`ok-cli`) is then approved and exhausts the + // one-attempt budget, so the trailing duplicate `ok-cli` is never reached. const run = runApprovalPassScript(script, [ - { - requestId: "ok-cli", - clientId: "openclaw-cli", - clientMode: "cli", - scopes: ["operator.read", "operator.write"], - }, { requestId: "admin-cli", clientId: "openclaw-cli", @@ -99,22 +115,24 @@ describe("sandbox connect auto-pair approval pass (#4263)", () => { scopes: ["operator.read"], }, { - requestId: "dedupe-cli", + requestId: "ok-cli", clientId: "openclaw-cli", clientMode: "cli", - requestedScopes: ["operator.read"], + scopes: ["operator.read", "operator.write"], }, { - requestId: "dedupe-cli", + requestId: "ok-cli", clientId: "openclaw-cli", clientMode: "cli", - requestedScopes: ["operator.read"], + scopes: ["operator.read", "operator.write"], }, ]); expect(run.result.status).toBe(0); - expect(run.approvals).toEqual(["ok-cli", "dedupe-cli"]); - expect(run.approvalEnv).toEqual(["unset:unset:unset", "unset:unset:unset"]); + // Only the first allowed request is approved — MAX_APPROVALS is 1 (#4504), + // the realistic single pending CLI/webchat scope upgrade. + expect(run.approvals).toEqual(["ok-cli"]); + expect(run.approvalEnv).toEqual(["unset:unset:unset"]); }, ); @@ -214,3 +232,204 @@ describe("sandbox connect auto-pair approval pass (#4263)", () => { }, ); }); + +// The #4504 fix also wires the approval pass into the `nemoclaw recover` / +// `connect --probe-only` path (defect A) — the gateway-up branches only, never +// the gateway-down failure exit — and re-uses the shared policy's +// gateway_approval_env so a scope-upgrade approve drops the full gateway env +// triplet (#4462) on the watcher's 10s budget while staying within the outer +// spawnSync cap (defect B). The interactive-connect cases above cover the +// allowlist and best-effort semantics; these add the probe-path wiring, +// gateway-down negative, and the budget invariant on the real constants. +describe("sandbox connect scope-upgrade approval on recover/probe (#4504)", () => { + it( + "runs the approval pass on the --probe-only (recover) path", + testTimeoutOptions(20_000), + () => { + // The probe takes the wasRunning branch (the fake openshell health probe + // reports RUNNING), so the recover path must run the sweep — and must NOT + // open an SSH session. + const { tmpDir, stateFile, sandboxName } = setupFixture( + { + name: "probe-approval-sandbox", + model: "claude-sonnet-4-20250514", + provider: "anthropic-prod", + gpuEnabled: false, + policies: [], + }, + "anthropic-prod", + "claude-sonnet-4-20250514", + ); + + const result = runConnect(tmpDir, sandboxName, {}, ["--probe-only"]); + expect(result.status).toBe(0); + + const state = JSON.parse(fs.readFileSync(stateFile, "utf-8")); + const approvalExec = findApprovalExec(state.sandboxExecCalls as string[][]); + expect(approvalExec).toBeDefined(); + expect(approvalExec).toContain("sandbox"); + expect(approvalExec).toContain("exec"); + expect(approvalExec).toContain("--name"); + expect(approvalExec).toContain(sandboxName); + // probe-only never opens an SSH connect session. + expect(state.sandboxConnectCalls).toEqual([]); + }, + ); + + it( + "does not fail the recover path when the probe approval pass errors", + testTimeoutOptions(20_000), + () => { + // Best-effort: even when the in-sandbox approval exec exits non-zero, the + // probe-only flow must still succeed. + const { tmpDir, stateFile, sandboxName } = setupFixture( + { + name: "probe-approval-tolerant", + model: "claude-sonnet-4-20250514", + provider: "anthropic-prod", + gpuEnabled: false, + policies: [], + }, + "anthropic-prod", + "claude-sonnet-4-20250514", + ); + + const result = runConnect( + tmpDir, + sandboxName, + { NEMOCLAW_TEST_FAIL_APPROVAL_PASS: "1" }, + ["--probe-only"], + ); + expect(result.status).toBe(0); + + const state = JSON.parse(fs.readFileSync(stateFile, "utf-8")); + const approvalExec = findApprovalExec(state.sandboxExecCalls as string[][]); + expect(approvalExec).toBeDefined(); + }, + ); + + it( + "does not run the approval pass when the probe fails (gateway down, recovery fails)", + testTimeoutOptions(20_000), + () => { + // The sweep is wired only into the wasRunning and recovered success + // branches — never the not-running failure exit, where the gateway is + // down. Force the health probe to report STOPPED and let recovery fail so + // the probe lands on the failure branch; the approval pass must NOT run. + const { tmpDir, stateFile, sandboxName } = setupFixture( + { + name: "probe-gateway-down", + model: "claude-sonnet-4-20250514", + provider: "anthropic-prod", + gpuEnabled: false, + policies: [], + }, + "anthropic-prod", + "claude-sonnet-4-20250514", + ); + + const result = runConnect( + tmpDir, + sandboxName, + { NEMOCLAW_TEST_GATEWAY_DOWN: "1" }, + ["--probe-only"], + ); + expect(result.status).toBe(1); + + const state = JSON.parse(fs.readFileSync(stateFile, "utf-8")); + const approvalExec = findApprovalExec(state.sandboxExecCalls as string[][]); + expect(approvalExec).toBeUndefined(); + // And it never opens an SSH session on the failure path. + expect(state.sandboxConnectCalls).toEqual([]); + }, + ); + + it( + "approve child strips the full gateway env triplet on the probe path (#4462)", + testTimeoutOptions(20_000), + () => { + // The probe-path approve must drop OPENCLAW_GATEWAY_URL/_PORT/_TOKEN via + // the shared policy's gateway_approval_env so the local pairing fallback + // cannot re-pin to the gateway and hit the #4462 self-defeat. Render the + // probe-path script, then actually run it and assert the approve child + // saw none of the triplet. + const { tmpDir, stateFile, sandboxName } = setupFixture( + { + name: "probe-env-strip-sandbox", + model: "claude-sonnet-4-20250514", + provider: "anthropic-prod", + gpuEnabled: false, + policies: [], + }, + "anthropic-prod", + "claude-sonnet-4-20250514", + ); + + const result = runConnect(tmpDir, sandboxName, {}, ["--probe-only"]); + expect(result.status).toBe(0); + + const script = extractApprovalPassScript(stateFile, sandboxName); + expect(script).toContain("approve_env = gateway_approval_env(os.environ)"); + expect(script).toContain("env=approve_env"); + + const run = runApprovalPassScript(script, [ + { + requestId: "probe-cli", + clientId: "openclaw-cli", + clientMode: "cli", + scopes: ["operator.read", "operator.write"], + }, + ]); + expect(run.result.status).toBe(0); + expect(run.approvals).toEqual(["probe-cli"]); + // The approve child saw none of the gateway env triplet (#4462). + expect(run.approvalEnv).toEqual(["unset:unset:unset"]); + }, + ); + + it( + "approve timeout matches the watcher (10s), list keeps 2s, and stays within the outer cap", + testTimeoutOptions(20_000), + () => { + const { tmpDir, stateFile, sandboxName } = setupFixture( + { + name: "approve-budget-sandbox", + model: "claude-sonnet-4-20250514", + provider: "anthropic-prod", + gpuEnabled: false, + policies: [], + }, + "anthropic-prod", + "claude-sonnet-4-20250514", + ); + + const result = runConnect(tmpDir, sandboxName, {}, ["--probe-only"]); + expect(result.status).toBe(0); + + const script = extractApprovalPassScript(stateFile, sandboxName); + // The rendered script interpolates the exported budget constants, tying + // runtime behaviour to the values the invariant below asserts on (no + // source-text scraping — numbers come from the imported constants). + expect(script).toContain("[OPENCLAW, 'devices', 'list', '--json']"); + expect(script).toContain(`timeout=${CONNECT_AUTO_PAIR_LIST_TIMEOUT_S},`); + expect(script).toContain(`timeout=${CONNECT_AUTO_PAIR_APPROVE_TIMEOUT_S},`); + expect(script).toContain(`MAX_APPROVALS = ${CONNECT_AUTO_PAIR_MAX_APPROVALS}`); + + // Approve budget matches the in-sandbox watcher RUN_TIMEOUT_SECS = 10; + // list budget is 2s. + expect(CONNECT_AUTO_PAIR_APPROVE_TIMEOUT_S).toBe(10); + expect(CONNECT_AUTO_PAIR_LIST_TIMEOUT_S).toBe(2); + + // Budget invariant: the inner worst case (list + approve × MAX_APPROVALS) + // must stay STRICTLY below the outer spawnSync cap. The outer timer starts + // when `sh` is spawned — before shell startup, sourcing the proxy env, the + // python3 launch, and `devices list` even begin — so the cap must leave + // slack above the inner budget, or a legitimate slow 10s approve is killed + // mid-loop and the allowlisted request is stranded (#4504). + const innerBudgetSeconds = + CONNECT_AUTO_PAIR_LIST_TIMEOUT_S + + CONNECT_AUTO_PAIR_APPROVE_TIMEOUT_S * CONNECT_AUTO_PAIR_MAX_APPROVALS; + expect(innerBudgetSeconds).toBeLessThan(CONNECT_AUTO_PAIR_TIMEOUT_MS / 1000); + }, + ); +}); diff --git a/test/sandbox-connect-inference/helpers.ts b/test/sandbox-connect-inference/helpers.ts index 2e4c4a5dff..6140419769 100644 --- a/test/sandbox-connect-inference/helpers.ts +++ b/test/sandbox-connect-inference/helpers.ts @@ -160,6 +160,17 @@ if (args[0] === "sandbox" && args[1] === "exec") { process.stderr.write("simulated sandbox exec failure\\n"); process.exit(7); } + // Test hook (#4504): force the in-sandbox gateway health probe to report + // STOPPED so the probe path takes the not-running branch and (when recovery + // also fails) the probe-failure exit — where the approval sweep must NOT run. + if ( + process.env.NEMOCLAW_TEST_GATEWAY_DOWN === "1" && + command.includes("/health") && + command.includes("HTTP_CODE") + ) { + process.stdout.write("__NEMOCLAW_SANDBOX_EXEC_STARTED__\\nSTOPPED\\n"); + process.exit(0); + } process.stdout.write("__NEMOCLAW_SANDBOX_EXEC_STARTED__\\nRUNNING\\n"); process.exit(0); } From 6ab4c05f95b9926f868b878bab4f724dc254a2a2 Mon Sep 17 00:00:00 2001 From: Tony Luo Date: Mon, 8 Jun 2026 10:44:24 +0800 Subject: [PATCH 2/2] docs(connect): clarify approve env strips full gateway triplet MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit The auto-pair approval-pass comment still described the old behavior (removing OPENCLAW_GATEWAY_URL only). Update it to reflect the shared gateway_approval_env() policy helper, which strips URL + PORT + TOKEN — the full triplet that avoids the #4462 self-defeat. Comment-only. Signed-off-by: Tony Luo Co-Authored-By: Claude Opus 4.8 (1M context) --- src/lib/actions/sandbox/connect.ts | 6 ++++-- 1 file changed, 4 insertions(+), 2 deletions(-) diff --git a/src/lib/actions/sandbox/connect.ts b/src/lib/actions/sandbox/connect.ts index 4c2d3443cb..1c0ac0f7b7 100644 --- a/src/lib/actions/sandbox/connect.ts +++ b/src/lib/actions/sandbox/connect.ts @@ -771,8 +771,10 @@ function ensureSandboxInferenceRouteOrExit( // when the user runs `nemoclaw connect`. The script sources // `/tmp/nemoclaw-proxy-env.sh` (written by `nemoclaw-start.sh`) so the // in-sandbox `openclaw devices list` invocation targets the running gateway -// with its token. Approvals then use OpenClaw's local fallback by removing -// OPENCLAW_GATEWAY_URL only from the child env, and apply the same allowlist +// with its token. Approvals then use OpenClaw's local fallback via the shared +// `gateway_approval_env()` policy helper, which strips OPENCLAW_GATEWAY_URL, +// OPENCLAW_GATEWAY_PORT, and OPENCLAW_GATEWAY_TOKEN from the child env (the full +// triplet needed to avoid the #4462 self-defeat), and apply the same allowlist // as the startup watcher — `openclaw-control-ui` clients plus `webchat`/`cli` // modes. Unknown clients are ignored, not approved. //