Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
40 changes: 32 additions & 8 deletions src/lib/actions/sandbox/auto-pair-approval.ts
Original file line number Diff line number Diff line change
Expand Up @@ -41,9 +41,26 @@ import { ROOT } from "../../state/paths";

// Bound the in-sandbox work: 2s list + 1s × MAX_APPROVALS attempts plus
// shell/python startup slack fits inside the outer spawnSync cap, so a wedged
// sandbox can never block the caller.
// sandbox can never block the caller. These are the defaults for the doctor
// recovery surface (#4616), which batch-clears any backlog of pending upgrades.
export const AUTO_PAIR_MAX_APPROVALS = 8;
export const AUTO_PAIR_APPROVAL_TIMEOUT_MS = 12_000;
// Default per-call budgets (seconds) for the in-sandbox openclaw subcommands.
const AUTO_PAIR_LIST_TIMEOUT_S = 2;
const AUTO_PAIR_APPROVE_TIMEOUT_S = 1;

// Per-surface budget overrides. The connect/probe/finalization surfaces (#4504)
// supply a tighter budget — a single realistic pending CLI/webchat scope
// upgrade (maxApprovals = 1) on the watcher's 10s approve budget with a 15s
// outer cap — via ./connect-autopair-budget. The doctor surface (#4616) uses
// the defaults above to drain a backlog. Callers that omit a field inherit the
// default, so the historical doctor payload stays byte-stable.
export type AutoPairApprovalBudget = {
maxApprovals?: number;
listTimeoutS?: number;
approveTimeoutS?: number;
timeoutMs?: number;
};

const AUTO_PAIR_POLICY_PATH = path.join(
ROOT,
Expand Down Expand Up @@ -102,11 +119,14 @@ export function readAutoPairApprovalPolicyModule(): string | null {
*/
export function buildAutoPairApprovalScript(
approvalPolicyModuleB64: string,
options: { emitSummary?: boolean } = {},
options: { emitSummary?: boolean; budget?: AutoPairApprovalBudget } = {},
): string {
const summaryLine = options.emitSummary
? "print(f'__NEMOCLAW_AUTO_PAIR_APPROVED__={approved_count}')\n"
: "";
const maxApprovals = options.budget?.maxApprovals ?? AUTO_PAIR_MAX_APPROVALS;
const listTimeoutS = options.budget?.listTimeoutS ?? AUTO_PAIR_LIST_TIMEOUT_S;
const approveTimeoutS = options.budget?.approveTimeoutS ?? AUTO_PAIR_APPROVE_TIMEOUT_S;
return `
PROXY_ENV=/tmp/nemoclaw-proxy-env.sh
[ -r "$PROXY_ENV" ] && . "$PROXY_ENV"
Expand All @@ -131,12 +151,12 @@ except Exception:
sys.exit(0)

OPENCLAW = os.environ.get('OPENCLAW_BIN', 'openclaw')
MAX_APPROVALS = ${AUTO_PAIR_MAX_APPROVALS}
MAX_APPROVALS = ${maxApprovals}

try:
proc = subprocess.run(
[OPENCLAW, 'devices', 'list', '--json'],
capture_output=True, text=True, timeout=2,
capture_output=True, text=True, timeout=${listTimeoutS},
)
except (subprocess.TimeoutExpired, FileNotFoundError, OSError):
sys.exit(0)
Expand Down Expand Up @@ -171,7 +191,7 @@ for device in pending:
try:
approve_proc = subprocess.run(
[OPENCLAW, 'devices', 'approve', request_id, '--json'],
capture_output=True, text=True, timeout=1, env=approve_env,
capture_output=True, text=True, timeout=${approveTimeoutS}, env=approve_env,
)
if approve_proc.returncode == 0:
approved_count += 1
Expand All @@ -194,15 +214,19 @@ exit 0
*/
export function runSandboxAutoPairApprovalPass(
sandboxName: string,
options: { capture?: boolean } = {},
options: { capture?: boolean; budget?: AutoPairApprovalBudget } = {},
): AutoPairApprovalResult {
const capture = options.capture === true;
const approvalPolicyModule = readAutoPairApprovalPolicyModule();
if (!approvalPolicyModule) {
return { attempted: false, reported: false, approved: 0 };
}
const approvalPolicyModuleB64 = Buffer.from(approvalPolicyModule, "utf-8").toString("base64");
const script = buildAutoPairApprovalScript(approvalPolicyModuleB64, { emitSummary: capture });
const script = buildAutoPairApprovalScript(approvalPolicyModuleB64, {
emitSummary: capture,
budget: options.budget,
});
const outerTimeoutMs = options.budget?.timeoutMs ?? AUTO_PAIR_APPROVAL_TIMEOUT_MS;
// Lazy require: `adapters/openshell/runtime` pulls in `runner`, whose
// load-time `require("./platform")` cannot be resolved by the Vitest TS
// loader. Importing it here keeps this module unit-testable in-process.
Expand All @@ -217,7 +241,7 @@ export function runSandboxAutoPairApprovalPass(
env: process.env,
stdio: capture ? ["ignore", "pipe", "pipe"] : ["ignore", "ignore", "ignore"],
encoding: "utf-8",
timeout: AUTO_PAIR_APPROVAL_TIMEOUT_MS,
timeout: outerTimeoutMs,
},
);
if (!capture) {
Expand Down
22 changes: 22 additions & 0 deletions src/lib/actions/sandbox/connect-autopair-budget.ts
Original file line number Diff line number Diff line change
@@ -0,0 +1,22 @@
// SPDX-FileCopyrightText: Copyright (c) 2026 NVIDIA CORPORATION & AFFILIATES. All rights reserved.
// SPDX-License-Identifier: Apache-2.0

// Budget constants for the connect-time auto-pair scope-approval pass
// (runConnectAutoPairApprovalPass in ./connect). Kept in a dependency-free leaf
// module so tests can import and assert the invariant on the real values
// without pulling in connect.ts's heavy transitive requires (#4504).

export const CONNECT_AUTO_PAIR_MAX_APPROVALS = 1;
// `openclaw devices list` budget (seconds), interpolated into the in-sandbox
// script so the invariant below is asserted on real values, not source text.
export const CONNECT_AUTO_PAIR_LIST_TIMEOUT_S = 2;
// `openclaw devices approve` budget (seconds); matches the in-sandbox watcher's
// RUN_TIMEOUT_SECS = 10 (nemoclaw-start.sh).
export const CONNECT_AUTO_PAIR_APPROVE_TIMEOUT_S = 10;
// Outer spawnSync cap (ms). Must exceed the internal worst case
// (CONNECT_AUTO_PAIR_LIST_TIMEOUT_S + CONNECT_AUTO_PAIR_APPROVE_TIMEOUT_S ×
// CONNECT_AUTO_PAIR_MAX_APPROVALS) PLUS shell/python startup, since the outer
// timer starts at `sh` spawn before the proxy env is sourced and python3
// launches; the ~3s slack means a legitimate slow approve is never SIGKILLed
// mid-loop, which would strand the allowlisted request.
export const CONNECT_AUTO_PAIR_TIMEOUT_MS = 15_000;
39 changes: 37 additions & 2 deletions src/lib/actions/sandbox/connect.ts
Original file line number Diff line number Diff line change
Expand Up @@ -45,6 +45,12 @@ import {
} from "../../state/sandbox-session";
import { runSetupDnsProxy } from "../dns";
import { runSandboxAutoPairApprovalPass } from "./auto-pair-approval";
import {
CONNECT_AUTO_PAIR_APPROVE_TIMEOUT_S,
CONNECT_AUTO_PAIR_LIST_TIMEOUT_S,
CONNECT_AUTO_PAIR_MAX_APPROVALS,
CONNECT_AUTO_PAIR_TIMEOUT_MS,
} from "./connect-autopair-budget";
import { preflightVllmModelEnvOrExit } from "./connect-vllm-preflight";
import {
isDockerRuntimeDown,
Expand Down Expand Up @@ -205,6 +211,10 @@ function runSandboxConnectProbe(sandboxName: string): void {
}
if (processCheck.wasRunning) {
ensureSandboxInferenceRoute(sandboxName, { quiet: true });
// Defense-in-depth scope-upgrade approval on the probe-only / `recover`
// path (#4504): the gateway is up, so deterministically clear any pending
// allowlisted CLI/webchat scope upgrade. Best-effort; never throws.
runConnectAutoPairApprovalPass(sandboxName);
if (processCheck.forwardRecovered) {
console.log(
` Probe complete: ${agentName} gateway is running in '${sandboxName}'; restored dashboard port forward.`,
Expand All @@ -216,6 +226,8 @@ function runSandboxConnectProbe(sandboxName: string): void {
}
if (processCheck.recovered) {
ensureSandboxInferenceRoute(sandboxName, { quiet: true });
// Same defense-in-depth approval after a recovery (#4504); best-effort.
runConnectAutoPairApprovalPass(sandboxName);
console.log(` Probe complete: recovered ${agentName} gateway in '${sandboxName}'.`);
return;
}
Expand Down Expand Up @@ -751,6 +763,29 @@ function ensureSandboxInferenceRouteOrExit(
return result.sandbox;
}

// Connect/probe/finalization budget for the shared auto-pair approval pass
// (#4504). The realistic case here is a single pending CLI/webchat scope
// upgrade, so MAX_APPROVALS is 1 and the approve timeout matches the in-sandbox
// watcher's RUN_TIMEOUT_SECS = 10 (nemoclaw-start.sh). The outer spawnSync cap
// (15s) exceeds the internal worst case (2s list + 10s × 1 = 12s) plus
// shell/python startup so a legitimate slow approve is never SIGKILLed mid-loop
// and the allowlisted request is never stranded. Constants live in the
// dependency-free ./connect-autopair-budget leaf so tests can assert the
// invariant on the real values without importing this heavy module. The doctor
// recovery surface (#4616) keeps the wider default budget in ./auto-pair-approval.
const CONNECT_AUTO_PAIR_BUDGET = {
maxApprovals: CONNECT_AUTO_PAIR_MAX_APPROVALS,
listTimeoutS: CONNECT_AUTO_PAIR_LIST_TIMEOUT_S,
approveTimeoutS: CONNECT_AUTO_PAIR_APPROVE_TIMEOUT_S,
timeoutMs: CONNECT_AUTO_PAIR_TIMEOUT_MS,
} as const;

// Thin wrapper so the connect/probe/finalization surfaces share one budget
// without each caller restating it. Best-effort; never throws (#4263/#4504).
export function runConnectAutoPairApprovalPass(sandboxName: string): void {
runSandboxAutoPairApprovalPass(sandboxName, { budget: CONNECT_AUTO_PAIR_BUDGET });
}

function maybeEnsureHermesToolGatewayBroker(sb: SandboxEntry | null): void {
if (
!sb ||
Expand Down Expand Up @@ -976,8 +1011,8 @@ export async function connectSandbox(
// piled up between startup and now (e.g., watcher crashed, watcher
// deadline exhausted, multi-sandbox gateway contention). The same pass
// is reachable without SSH via `doctor --fix` for dashboard-only users
// (#4616).
runSandboxAutoPairApprovalPass(sandboxName);
// (#4616). Uses the tight connect budget (#4504).
runConnectAutoPairApprovalPass(sandboxName);

// Print a one-shot hint before dropping the user into the sandbox
// shell so a fresh user knows the first thing to type. Without this,
Expand Down
7 changes: 2 additions & 5 deletions src/lib/onboard.ts
Original file line number Diff line number Diff line change
Expand Up @@ -504,6 +504,7 @@ import {
type MessagingChannelConfig,
readMessagingChannelConfigFromEnv,
} from "./messaging-channel-config";
import { finalizationHandlerDeps } from "./onboard/finalization-deps";
import { streamGatewayStart } from "./onboard/gateway";
import {
mergeRequiredHermesToolGatewayPolicyPresets,
Expand Down Expand Up @@ -6539,11 +6540,7 @@ async function onboard(opts: OnboardOptions = {}): Promise<void> {
toSessionUpdates: (updates) => toSessionUpdates(updates as Parameters<typeof toSessionUpdates>[0]),
removeLegacyCredentialsFile,
cleanupStaleHostFiles,
checkAndRecoverSandboxProcesses: (name, options) => {
const processRecovery: typeof import("./actions/sandbox/process-recovery") =
require("./actions/sandbox/process-recovery");
processRecovery.checkAndRecoverSandboxProcesses(name, options);
},
...finalizationHandlerDeps,
getChatUiUrl: () => process.env.CHAT_UI_URL || `http://127.0.0.1:${DASHBOARD_PORT}`,
buildVerifyChain: (chatUiUrl) =>
// biome-ignore format: keep src/lib/onboard.ts net-neutral for growth guardrail.
Expand Down
23 changes: 23 additions & 0 deletions src/lib/onboard/finalization-deps.ts
Original file line number Diff line number Diff line change
@@ -0,0 +1,23 @@
// SPDX-FileCopyrightText: Copyright (c) 2026 NVIDIA CORPORATION & AFFILIATES. All rights reserved.
// SPDX-License-Identifier: Apache-2.0

// Lazy-require runtime dependencies for the onboarding finalization handler.
// Kept in a focused module under src/lib/onboard/ so the top-level onboard
// entrypoint stays lean (codebase-growth-guardrails). The lazy `require` calls
// avoid an import cycle: connect.ts and process-recovery.ts both pull in
// onboard helpers, so they must not be statically imported here.
export const finalizationHandlerDeps = {
checkAndRecoverSandboxProcesses(name: string, options: { quiet: boolean }): void {
const processRecovery: typeof import("../actions/sandbox/process-recovery") =
require("../actions/sandbox/process-recovery");
processRecovery.checkAndRecoverSandboxProcesses(name, options);
},
Comment on lines +10 to +14

Copy link
Copy Markdown
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

⚠️ Potential issue | 🟠 Major | ⚡ Quick win

Return type mismatch breaks the upstream contract.

The upstream checkAndRecoverSandboxProcesses function returns a structured object with { checked, wasRunning, recovered, forwardRecovered } fields (see src/lib/actions/sandbox/process-recovery.ts:421-545), but this wrapper changes the return type to void. This contract break prevents the finalization handler from:

  • Knowing whether the sandbox exists (checked: false)
  • Determining if recovery succeeded or failed
  • Inspecting whether the port forward was restored
  • Making decisions or logging based on recovery outcomes

In src/lib/actions/sandbox/connect.ts:203-241, the same function's return value is actively used to control probe behavior and user messaging. Even if the current finalization handler doesn't need this information, preserving the return type:

  • Maintains API consistency with the upstream function
  • Enables future code to inspect recovery outcomes without modifying the wrapper
  • Provides valuable debugging information for onboard failures
🔧 Proposed fix to preserve the upstream return type
-  checkAndRecoverSandboxProcesses(name: string, options: { quiet: boolean }): void {
+  checkAndRecoverSandboxProcesses(
+    name: string,
+    options: { quiet: boolean },
+  ): { checked: boolean; wasRunning: boolean | null; recovered: boolean; forwardRecovered: boolean } {
     const processRecovery: typeof import("../actions/sandbox/process-recovery") =
       require("../actions/sandbox/process-recovery");
-    processRecovery.checkAndRecoverSandboxProcesses(name, options);
+    return processRecovery.checkAndRecoverSandboxProcesses(name, options);
   },
📝 Committable suggestion

‼️ IMPORTANT
Carefully review the code before committing. Ensure that it accurately replaces the highlighted code, contains no missing lines, and has no issues with indentation. Thoroughly test & benchmark the code to ensure it meets the requirements.

Suggested change
checkAndRecoverSandboxProcesses(name: string, options: { quiet: boolean }): void {
const processRecovery: typeof import("../actions/sandbox/process-recovery") =
require("../actions/sandbox/process-recovery");
processRecovery.checkAndRecoverSandboxProcesses(name, options);
},
checkAndRecoverSandboxProcesses(
name: string,
options: { quiet: boolean },
): { checked: boolean; wasRunning: boolean | null; recovered: boolean; forwardRecovered: boolean } {
const processRecovery: typeof import("../actions/sandbox/process-recovery") =
require("../actions/sandbox/process-recovery");
return processRecovery.checkAndRecoverSandboxProcesses(name, options);
},
🤖 Prompt for AI Agents
Verify each finding against current code. Fix only still-valid issues, skip the
rest with a brief reason, keep changes minimal, and validate.

In `@src/lib/onboard/finalization-deps.ts` around lines 10 - 14, The wrapper
checkAndRecoverSandboxProcesses in src/lib/onboard/finalization-deps.ts
currently declares a void return and discards the result from
require("../actions/sandbox/process-recovery"); change the wrapper to preserve
and return the upstream function's structured result ({ checked, wasRunning,
recovered, forwardRecovered }) by updating the wrapper signature to return that
same type and returning the value from
processRecovery.checkAndRecoverSandboxProcesses(name, options) so callers get
the original recovery object and API contract remains consistent with
src/lib/actions/sandbox/process-recovery.ts and usages such as
src/lib/actions/sandbox/connect.ts.

// Best-effort device-approval sweep that clears pending allowlisted
// CLI/webchat scope upgrades so onboard hands off without a stuck pairing
// request (#4504). Never throws.
autoPairScopeApproval(name: string): void {
const connect: typeof import("../actions/sandbox/connect") =
require("../actions/sandbox/connect");
connect.runConnectAutoPairApprovalPass(name);
},
};
52 changes: 52 additions & 0 deletions src/lib/onboard/machine/handlers/finalization.test.ts
Original file line number Diff line number Diff line change
Expand Up @@ -18,6 +18,7 @@ function createDeps(overrides: Partial<FinalizationStateOptions<Agent, VerifyCha
removeLegacy: vi.fn(),
cleanupHost: vi.fn(),
recoverProcesses: vi.fn(),
autoPairScopeApproval: vi.fn(),
getChatUiUrl: vi.fn(() => "http://127.0.0.1:18789"),
buildChain: vi.fn(() => ({ port: 18789 })),
verify: vi.fn(async () => ({ ok: true })),
Expand All @@ -37,6 +38,7 @@ function createDeps(overrides: Partial<FinalizationStateOptions<Agent, VerifyCha
removeLegacyCredentialsFile: calls.removeLegacy,
cleanupStaleHostFiles: calls.cleanupHost,
checkAndRecoverSandboxProcesses: calls.recoverProcesses,
autoPairScopeApproval: calls.autoPairScopeApproval,
getChatUiUrl: calls.getChatUiUrl,
buildVerifyChain: calls.buildChain,
verifyDeployment: calls.verify,
Expand Down Expand Up @@ -177,4 +179,54 @@ describe("handleFinalizationState", () => {
callsOn.verify.mock.invocationCallOrder[0],
);
});

// Scenario A (#4504): the auto-pair scope-approval sweep runs against the
// freshly-recovered gateway — strictly after process recovery (which can
// restart the gateway, #3573) and strictly before deployment verification
// (so the gateway state is settled before we probe it).
it("runs the auto-pair scope-approval sweep after process recovery and before verify (#4504)", async () => {
const { deps, calls } = createDeps();

await handleFinalizationState(baseOptions(deps));

expect(calls.autoPairScopeApproval).toHaveBeenCalledOnce();
expect(calls.autoPairScopeApproval).toHaveBeenCalledWith("my-assistant");
// Ordering: recover → autoPairScopeApproval → verify.
expect(calls.autoPairScopeApproval.mock.invocationCallOrder[0]).toBeGreaterThan(
calls.recoverProcesses.mock.invocationCallOrder[0],
);
expect(calls.autoPairScopeApproval.mock.invocationCallOrder[0]).toBeLessThan(
calls.verify.mock.invocationCallOrder[0],
);
});

// Scenario B (#4504): the sweep is agent-agnostic — the stuck CLI/webchat
// scope upgrade can occur regardless of which agent the sandbox runs.
it("runs the scope-approval sweep regardless of agent type (#4504)", async () => {
const { deps, calls } = createDeps();
const agent = { name: "hermes" };

await handleFinalizationState({ ...baseOptions(deps), agent });

expect(calls.autoPairScopeApproval).toHaveBeenCalledWith("my-assistant");
});

// Scenario C (#4504): the dep is documented as best-effort / never-throws and
// the handler wraps no try/catch around it. Per the contract we assert the
// implemented behavior: the sweep is invoked and, because it returns cleanly,
// finalization proceeds to completion (recordSessionComplete still runs). A
// dep that threw would abort finalization here — the regression this guards.
it("treats the scope-approval sweep as best-effort and still completes the session (#4504)", async () => {
const { deps, calls } = createDeps();

const result = await handleFinalizationState(baseOptions(deps));

expect(calls.autoPairScopeApproval).toHaveBeenCalledOnce();
// The non-throwing sweep does not abort finalization: it proceeds through
// verification and the dashboard print to a completed result. (#4472 moved
// session completion to the imported completeOnboardMachine, so completion
// is asserted via the downstream dashboard + diagnostics rather than a dep.)
expect(calls.dashboard).toHaveBeenCalledOnce();
expect(result.verificationDiagnostics).toEqual([" ✓ verified"]);
});
});
12 changes: 12 additions & 0 deletions src/lib/onboard/machine/handlers/finalization.ts
Original file line number Diff line number Diff line change
Expand Up @@ -28,6 +28,14 @@ export interface FinalizationStateOptions<Agent, VerifyChain, VerificationResult
removeLegacyCredentialsFile(): void;
cleanupStaleHostFiles(): void;
checkAndRecoverSandboxProcesses(sandboxName: string, options: { quiet: boolean }): void;
/**
* Best-effort device-approval sweep that clears pending allowlisted
* CLI/webchat scope upgrades before handoff. Never throws; swallows its own
* failures (timeout, sandbox-exec errors). Run after process recovery
* because that can restart the gateway (#3573), so the sweep targets the
* freshly-recovered gateway (ref #4504 / #4263).
*/
autoPairScopeApproval(sandboxName: string): void;
getChatUiUrl(): string;
buildVerifyChain(chatUiUrl: string): VerifyChain;
verifyDeployment(sandboxName: string, chain: VerifyChain): Promise<VerificationResult>;
Expand Down Expand Up @@ -94,6 +102,10 @@ export async function handleFinalizationState<Agent, VerifyChain, VerificationRe
deps.cleanupStaleHostFiles();
// Policy application can restart the sandbox; recover OpenClaw before verification (#3573).
deps.checkAndRecoverSandboxProcesses(sandboxName, { quiet: true });
// Clear any pending allowlisted scope upgrade against the freshly-recovered
// gateway before verification, so onboard hands off without a stuck pairing
// request (#4504 / #4263). Best-effort; never blocks.
deps.autoPairScopeApproval(sandboxName);

// Probe Brave Search egress through the L7 proxy now that the final
// policy and provider state are live — earlier probes would race the
Expand Down
Loading
Loading