From bba77bda2b5c0f3fdd69f13cf4c24a8de23e83a7 Mon Sep 17 00:00:00 2001 From: Tony Luo Date: Tue, 2 Jun 2026 14:48:49 +0800 Subject: [PATCH 1/6] fix(cli): warn on cross-sandbox messaging credential conflicts in channels add MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit `nemoclaw channels add ` accepted the same messaging bot credential (e.g. a Telegram bot token) on a second sandbox with no warning, so two sandboxes silently competed for the single allowed getUpdates/gateway consumer and one looped on HTTP 409. The onboard path already guarded against this; the channels-add path did not. Mirror the onboard conflict check inside addSandboxChannel: after token acquisition but before the gateway/registry mutation, hash the acquired credentials and call findChannelConflicts against the other sandboxes in the registry (with a tri-state gateway probe to backfill legacy entries). On a matching or unverifiable token, interactive runs warn and prompt `Continue anyway? [y/N]` (default no); non-interactive runs abort with `exit(1)` and actionable guidance; a new `--force` flag is the documented override. The check runs before any gateway/registry mutation, so declining leaves nothing half-wired. QR-paired (tokenless) adds and `--dry-run` are unaffected, and only non-secret credential hashes — never raw tokens — are compared or surfaced. Fixes #4305 Signed-off-by: Tony Luo Co-Authored-By: Claude Opus 4.8 (1M context) --- src/commands/sandbox/channels/mutate.test.ts | 61 ++ .../sandbox/policy-channel-conflict.test.ts | 557 ++++++++++++++++++ src/lib/actions/sandbox/policy-channel.ts | 132 ++++- src/lib/sandbox/channels-command-support.ts | 7 +- 4 files changed, 739 insertions(+), 18 deletions(-) create mode 100644 src/lib/actions/sandbox/policy-channel-conflict.test.ts diff --git a/src/commands/sandbox/channels/mutate.test.ts b/src/commands/sandbox/channels/mutate.test.ts index f07cb00151..9857aa4a69 100644 --- a/src/commands/sandbox/channels/mutate.test.ts +++ b/src/commands/sandbox/channels/mutate.test.ts @@ -30,6 +30,40 @@ describe("channels mutation oclif commands", () => { expect(mocks.addSandboxChannel).toHaveBeenCalledWith("alpha", { channel: "telegram", dryRun: true, + force: false, + }); + }); + + // Scenario 12 (#4305): --force is threaded through to addSandboxChannel. + it("threads --force through add to typed action options", async () => { + await ChannelsAddCommand.run(["alpha", "telegram", "--force"], rootDir); + + expect(mocks.addSandboxChannel).toHaveBeenCalledWith("alpha", { + channel: "telegram", + dryRun: false, + force: true, + }); + }); + + // Scenario 12 (#4305): omitting --force yields force:false (no implicit override). + it("defaults force to false when --force is omitted on add", async () => { + await ChannelsAddCommand.run(["alpha", "telegram"], rootDir); + + expect(mocks.addSandboxChannel).toHaveBeenCalledWith("alpha", { + channel: "telegram", + dryRun: false, + force: false, + }); + }); + + // Scenario 12 (#4305): --force combines with --dry-run independently. + it("threads both --force and --dry-run on add", async () => { + await ChannelsAddCommand.run(["alpha", "telegram", "--force", "--dry-run"], rootDir); + + expect(mocks.addSandboxChannel).toHaveBeenCalledWith("alpha", { + channel: "telegram", + dryRun: true, + force: true, }); }); @@ -41,14 +75,41 @@ describe("channels mutation oclif commands", () => { expect(mocks.removeSandboxChannel).toHaveBeenCalledWith("alpha", { channel: "telegram", dryRun: false, + force: false, }); expect(mocks.startSandboxChannel).toHaveBeenCalledWith("alpha", { channel: "telegram", dryRun: true, + force: false, + }); + expect(mocks.stopSandboxChannel).toHaveBeenCalledWith("alpha", { + channel: "slack", + dryRun: false, + force: false, + }); + }); + + // Scenario 12 (#4305): the shared flag exposes a (no-op) --force on + // remove/start/stop too; confirm it is threaded so the no-op is faithful. + it("threads --force through remove/start/stop (shared flag, no-op downstream)", async () => { + await ChannelsRemoveCommand.run(["alpha", "telegram", "--force"], rootDir); + await ChannelsStartCommand.run(["alpha", "telegram", "--force"], rootDir); + await ChannelsStopCommand.run(["alpha", "slack", "--force"], rootDir); + + expect(mocks.removeSandboxChannel).toHaveBeenCalledWith("alpha", { + channel: "telegram", + dryRun: false, + force: true, + }); + expect(mocks.startSandboxChannel).toHaveBeenCalledWith("alpha", { + channel: "telegram", + dryRun: false, + force: true, }); expect(mocks.stopSandboxChannel).toHaveBeenCalledWith("alpha", { channel: "slack", dryRun: false, + force: true, }); }); diff --git a/src/lib/actions/sandbox/policy-channel-conflict.test.ts b/src/lib/actions/sandbox/policy-channel-conflict.test.ts new file mode 100644 index 0000000000..6580342e06 --- /dev/null +++ b/src/lib/actions/sandbox/policy-channel-conflict.test.ts @@ -0,0 +1,557 @@ +// SPDX-FileCopyrightText: Copyright (c) 2026 NVIDIA CORPORATION & AFFILIATES. All rights reserved. +// SPDX-License-Identifier: Apache-2.0 + +// Cross-sandbox messaging-credential conflict detection on `channels add` +// (issue #4305). These tests drive the public `addSandboxChannel` action and +// assert only on the mocked module boundaries — never on the private helper +// names — so they survive a refactor of the internal conflict-check plumbing. +// +// Why dist + vi.spyOn (the rebuild-shields-finally.test.ts pattern): the source +// policy-channel.ts loads several deps via runtime CommonJS `require()` +// (../../onboard, ../../onboard/providers, ./rebuild, ../../runner, ...). In +// this repo's vitest setup, `vi.mock` only intercepts ESM `import`, not plain +// `require()`, and those modules do extensionless sibling requires the TS +// transform cannot resolve. So we require the COMPILED module + its real +// compiled dependency modules from dist/ (one shared require cache) and +// `vi.spyOn` the dependency exports. Run `npm run build:cli` first. +// +// isNonInteractive is destructured at module load (`const { isNonInteractive } +// = require("../../onboard")`), so it cannot be spied after load; it reads +// process.env.NEMOCLAW_NON_INTERACTIVE === "1" at call time, which we drive +// directly. The real messaging-conflict, sandbox/channels, and credential-hash +// modules run unmocked so the genuine hash + conflict logic is exercised. + +import { createRequire } from "node:module"; + +import { afterEach, beforeEach, describe, expect, it, type MockInstance, vi } from "vitest"; + +const requireDist = createRequire(import.meta.url); +const D = (p: string) => requireDist(`../../../../dist/lib/${p}`); + +type SandboxEntry = import("../../state/registry").SandboxEntry; + +// Real compiled dependency modules (shared require cache with the SUT). +const store = D("credentials/store.js"); +const registry = D("state/registry.js"); +const providers = D("onboard/providers.js"); +const runtime = D("adapters/openshell/runtime.js"); +const gatewayRuntime = D("gateway-runtime-action.js"); +const defs = D("agent/defs.js"); +const rebuild = D("actions/sandbox/rebuild.js"); +const onboardSession = D("state/onboard-session.js"); +const slackValidation = D("actions/sandbox/slack-channel-validation.js"); +const policy = D("policy/index.js"); +const { hashCredential } = D("security/credential-hash.js") as { + hashCredential: (v: string) => string | null; +}; +const { addSandboxChannel } = D("actions/sandbox/policy-channel.js") as { + addSandboxChannel: ( + name: string, + options?: { channel?: string; dryRun?: boolean; force?: boolean }, + ) => Promise; +}; + +const TELEGRAM_TOKEN = "123456:AAH-secret-bot-token-value"; +const TELEGRAM_HASH = hashCredential(TELEGRAM_TOKEN) as string; + +let spies: MockInstance[]; +let logSpy: MockInstance; +let errSpy: MockInstance; +let exitMock: MockInstance; +let promptMock: MockInstance; +let getCredentialMock: MockInstance; +let updateSandboxMock: MockInstance; +let upsertMock: MockInstance; +let runOpenshellMock: MockInstance; +let applyPresetMock: MockInstance; +let getSandboxMock: MockInstance; +let listSandboxesMock: MockInstance; + +function arrangeRegistry(opts: { current: SandboxEntry; others?: SandboxEntry[] }): void { + const all = [opts.current, ...(opts.others ?? [])]; + listSandboxesMock.mockReturnValue({ sandboxes: all, defaultSandbox: opts.current.name }); + getSandboxMock.mockImplementation((name: string) => all.find((s) => s.name === name) ?? null); +} + +function loggedText(): string { + const lines: string[] = []; + for (const call of (logSpy.mock.calls as unknown[][]) ?? []) lines.push(call.map(String).join(" ")); + for (const call of (errSpy.mock.calls as unknown[][]) ?? []) lines.push(call.map(String).join(" ")); + return lines.join("\n"); +} + +// True iff the conflict-resolution "Continue anyway?" prompt was shown. +// (The unrelated "Rebuild now?" prompt fires after a successful add and must +// not be conflated with the conflict prompt.) +function conflictPromptShown(): boolean { + return (promptMock.mock.calls as unknown[][]).some((call) => + String(call[0]).includes("Continue anyway?"), + ); +} + +beforeEach(() => { + spies = []; + delete process.env.NEMOCLAW_NON_INTERACTIVE; + + logSpy = vi.spyOn(console, "log").mockImplementation(() => undefined); + errSpy = vi.spyOn(console, "error").mockImplementation(() => undefined); + exitMock = vi.spyOn(process, "exit").mockImplementation(((code?: number) => { + throw new Error(`process.exit(${code})`); + }) as never); + + // Registry seam. + getSandboxMock = vi.spyOn(registry, "getSandbox").mockReturnValue(null); + listSandboxesMock = vi + .spyOn(registry, "listSandboxes") + .mockReturnValue({ sandboxes: [], defaultSandbox: null }); + updateSandboxMock = vi.spyOn(registry, "updateSandbox").mockReturnValue(true); + + // onboard/providers seam (gateway probe + register). + vi.spyOn(providers, "providerExistsInGateway").mockReturnValue(false); + upsertMock = vi.spyOn(providers, "upsertMessagingProviders").mockImplementation(() => undefined); + + // openshell runtime + gateway recovery. + runOpenshellMock = vi + .spyOn(runtime, "runOpenshell") + .mockReturnValue({ status: 0, stdout: "", stderr: "" }); + vi.spyOn(gatewayRuntime, "recoverNamedGatewayRuntime").mockResolvedValue({ recovered: true }); + + // Credentials store: staged token (no real prompt) + controllable prompt. + getCredentialMock = vi.spyOn(store, "getCredential").mockReturnValue(null); + promptMock = vi.spyOn(store, "prompt").mockResolvedValue(""); + vi.spyOn(store, "saveCredential").mockImplementation(() => undefined); + + // Agent gate: support every channel. + vi.spyOn(defs, "loadAgent").mockReturnValue({ + name: "openclaw", + messagingPlatforms: ["telegram", "discord", "slack", "wechat", "whatsapp"], + }); + + // Policy seam. addSandboxChannel gates on loadPreset()/parsePresetPolicyKeys() + // up front (the channel must ship a preset with network_policies); stub both + // so the gate passes without reading preset YAML off disk. listPresets [] so + // no preset is treated as "built-in" for any channel + // (applyChannelPresetIfAvailable then short-circuits to success). + vi.spyOn(policy, "loadPreset").mockReturnValue("network_policies:\n stub: {}\n"); + vi.spyOn(policy, "parsePresetPolicyKeys").mockReturnValue(["stub"]); + vi.spyOn(policy, "listPresets").mockReturnValue([]); + applyPresetMock = vi.spyOn(policy, "applyPreset").mockReturnValue(true); + vi.spyOn(policy, "getAppliedPresets").mockReturnValue([]); + + // Downstream rebuild is not under test. + vi.spyOn(rebuild, "rebuildSandbox").mockResolvedValue(undefined); + + // Slack add runs a credential validation (auth.test-style) before the + // conflict check; that is its own concern and would otherwise probe Slack. + // Stub it to pass so scenario 11 can reach the conflict logic. + vi.spyOn(slackValidation, "validateSlackChannelCredentials").mockReturnValue({ ok: true }); + + // onboard-session for the wechat host-qr branch. + vi.spyOn(onboardSession, "loadSession").mockReturnValue(null); + vi.spyOn(onboardSession, "updateSession").mockImplementation(() => undefined); +}); + +afterEach(() => { + vi.restoreAllMocks(); + for (const s of spies) s.mockRestore(); + delete process.env.NEMOCLAW_NON_INTERACTIVE; + delete process.env.WECHAT_ACCOUNT_ID; +}); + +describe("addSandboxChannel cross-sandbox conflict check (#4305)", () => { + // Scenario 1 + it("interactive matching-token conflict: warns, user continues, add proceeds", async () => { + arrangeRegistry({ + current: { name: "alpha", messagingChannels: [] }, + others: [ + { + name: "bob", + messagingChannels: ["telegram"], + providerCredentialHashes: { TELEGRAM_BOT_TOKEN: TELEGRAM_HASH }, + }, + ], + }); + getCredentialMock.mockReturnValue(TELEGRAM_TOKEN); + promptMock.mockResolvedValue("y"); + + await addSandboxChannel("alpha", { channel: "telegram" }); + + const text = loggedText(); + expect(text).toContain("bob"); + expect(text).toContain("same telegram credential"); + expect(upsertMock).toHaveBeenCalledTimes(1); + expect(updateSandboxMock).toHaveBeenCalledWith("alpha", expect.any(Object)); + }); + + // Scenario 2 + it("interactive matching-token conflict: user aborts, nothing is mutated", async () => { + arrangeRegistry({ + current: { name: "alpha", messagingChannels: [] }, + others: [ + { + name: "bob", + messagingChannels: ["telegram"], + providerCredentialHashes: { TELEGRAM_BOT_TOKEN: TELEGRAM_HASH }, + }, + ], + }); + getCredentialMock.mockReturnValue(TELEGRAM_TOKEN); + promptMock.mockResolvedValue("n"); + + await addSandboxChannel("alpha", { channel: "telegram" }); + + expect(loggedText()).toContain("same telegram credential"); + expect(upsertMock).not.toHaveBeenCalled(); + expect(updateSandboxMock).not.toHaveBeenCalledWith("alpha", expect.any(Object)); + expect(applyPresetMock).not.toHaveBeenCalled(); + }); + + it("interactive matching-token conflict: empty answer (default N) aborts", async () => { + arrangeRegistry({ + current: { name: "alpha", messagingChannels: [] }, + others: [ + { + name: "bob", + messagingChannels: ["telegram"], + providerCredentialHashes: { TELEGRAM_BOT_TOKEN: TELEGRAM_HASH }, + }, + ], + }); + getCredentialMock.mockReturnValue(TELEGRAM_TOKEN); + promptMock.mockResolvedValue(""); // bare Enter -> default No + + await addSandboxChannel("alpha", { channel: "telegram" }); + + expect(upsertMock).not.toHaveBeenCalled(); + expect(updateSandboxMock).not.toHaveBeenCalledWith("alpha", expect.any(Object)); + }); + + // Scenario 3 + it("non-interactive matching-token conflict: aborts with exit(1) and guidance", async () => { + arrangeRegistry({ + current: { name: "alpha", messagingChannels: [] }, + others: [ + { + name: "bob", + messagingChannels: ["telegram"], + providerCredentialHashes: { TELEGRAM_BOT_TOKEN: TELEGRAM_HASH }, + }, + ], + }); + getCredentialMock.mockReturnValue(TELEGRAM_TOKEN); + process.env.NEMOCLAW_NON_INTERACTIVE = "1"; + + await expect(addSandboxChannel("alpha", { channel: "telegram" })).rejects.toThrow( + "process.exit(1)", + ); + + expect(exitMock).toHaveBeenCalledWith(1); + const text = loggedText(); + expect(text).toContain("same telegram credential"); + expect(text).toContain("Aborting"); + expect(text).toContain("--force"); + expect(text).toContain("channels remove"); + expect(upsertMock).not.toHaveBeenCalled(); + expect(updateSandboxMock).not.toHaveBeenCalledWith("alpha", expect.any(Object)); + expect(promptMock).not.toHaveBeenCalled(); + }); + + // Scenario 4 + it("--force bypasses the conflict even in non-interactive mode", async () => { + arrangeRegistry({ + current: { name: "alpha", messagingChannels: [] }, + others: [ + { + name: "bob", + messagingChannels: ["telegram"], + providerCredentialHashes: { TELEGRAM_BOT_TOKEN: TELEGRAM_HASH }, + }, + ], + }); + getCredentialMock.mockReturnValue(TELEGRAM_TOKEN); + process.env.NEMOCLAW_NON_INTERACTIVE = "1"; + + await addSandboxChannel("alpha", { channel: "telegram", force: true }); + + const text = loggedText(); + expect(text).toContain("same telegram credential"); // warning still shown + expect(text).toContain("--force"); // proceed line + expect(exitMock).not.toHaveBeenCalled(); + expect(upsertMock).toHaveBeenCalledTimes(1); + expect(updateSandboxMock).toHaveBeenCalledWith("alpha", expect.any(Object)); + expect(promptMock).not.toHaveBeenCalled(); + }); + + // Scenario 5a + it("unknown-token wording when the other sandbox has the channel but no hash", async () => { + arrangeRegistry({ + current: { name: "alpha", messagingChannels: [] }, + others: [{ name: "bob", messagingChannels: ["telegram"] }], // no providerCredentialHashes + }); + getCredentialMock.mockReturnValue(TELEGRAM_TOKEN); + promptMock.mockResolvedValue("y"); + + await addSandboxChannel("alpha", { channel: "telegram" }); + + const text = loggedText(); + expect(text).toContain("credential hash is unavailable"); + expect(text).not.toContain("same telegram credential"); + }); + + // Scenario 5b + it("different hash on the other sandbox is NOT a conflict (no warning, add proceeds)", async () => { + arrangeRegistry({ + current: { name: "alpha", messagingChannels: [] }, + others: [ + { + name: "bob", + messagingChannels: ["telegram"], + providerCredentialHashes: { + TELEGRAM_BOT_TOKEN: hashCredential("a-completely-different-token") as string, + }, + }, + ], + }); + getCredentialMock.mockReturnValue(TELEGRAM_TOKEN); + promptMock.mockResolvedValue("n"); // would abort IF prompted; proves no prompt happens + + await addSandboxChannel("alpha", { channel: "telegram" }); + + const text = loggedText(); + expect(text).not.toContain("credential hash is unavailable"); + expect(text).not.toContain("same telegram credential"); + expect(conflictPromptShown()).toBe(false); + expect(upsertMock).toHaveBeenCalledTimes(1); + expect(updateSandboxMock).toHaveBeenCalledWith("alpha", expect.any(Object)); + }); + + // Scenario 6 + it("idempotent same-sandbox re-add does not self-conflict", async () => { + arrangeRegistry({ + current: { + name: "alpha", + messagingChannels: ["telegram"], + providerCredentialHashes: { TELEGRAM_BOT_TOKEN: TELEGRAM_HASH }, + }, + }); + getCredentialMock.mockReturnValue(TELEGRAM_TOKEN); + promptMock.mockResolvedValue("n"); // would abort IF prompted + + await addSandboxChannel("alpha", { channel: "telegram" }); + + const text = loggedText(); + expect(text).not.toContain("same telegram credential"); + expect(text).not.toContain("credential hash is unavailable"); + expect(conflictPromptShown()).toBe(false); + expect(upsertMock).toHaveBeenCalledTimes(1); + expect(updateSandboxMock).toHaveBeenCalledWith("alpha", expect.any(Object)); + }); + + // Scenario 7 + it("--dry-run never runs the conflict check or touches credentials", async () => { + arrangeRegistry({ + current: { name: "alpha", messagingChannels: [] }, + others: [ + { + name: "bob", + messagingChannels: ["telegram"], + providerCredentialHashes: { TELEGRAM_BOT_TOKEN: TELEGRAM_HASH }, + }, + ], + }); + + await addSandboxChannel("alpha", { channel: "telegram", dryRun: true }); + + const text = loggedText(); + expect(text).toContain("--dry-run: would enable channel 'telegram'"); + expect(text).not.toContain("same telegram credential"); + expect(text).not.toContain("credential hash is unavailable"); + expect(getCredentialMock).not.toHaveBeenCalled(); + expect(runOpenshellMock).not.toHaveBeenCalled(); + expect(upsertMock).not.toHaveBeenCalled(); + expect(promptMock).not.toHaveBeenCalled(); + expect(exitMock).not.toHaveBeenCalled(); + }); + + // Scenario 8: WeChat is host-qr (token-bearing) -> non-empty acquired -> IS conflict-checked. + it("host-qr wechat (token-bearing) IS conflict-checked", async () => { + const wechatToken = "wx-secret-token-abc"; + const wechatHash = hashCredential(wechatToken) as string; + arrangeRegistry({ + current: { name: "alpha", messagingChannels: [] }, + others: [ + { + name: "bob", + messagingChannels: ["wechat"], + providerCredentialHashes: { WECHAT_BOT_TOKEN: wechatHash }, + }, + ], + }); + // acquireHostQrChannel short-circuits on a cached token; supply account + // metadata so the wechat cached-token branch does not bail. + getCredentialMock.mockImplementation((key: string) => + key === "WECHAT_BOT_TOKEN" ? wechatToken : null, + ); + process.env.WECHAT_ACCOUNT_ID = "acct-1"; + process.env.NEMOCLAW_NON_INTERACTIVE = "1"; + + await expect(addSandboxChannel("alpha", { channel: "wechat" })).rejects.toThrow( + "process.exit(1)", + ); + expect(loggedText()).toContain("same wechat credential"); + expect(exitMock).toHaveBeenCalledWith(1); + }); + + // Scenario 8 (companion): genuinely in-sandbox-QR channel (whatsapp) has empty + // acquired and skips the credential conflict check entirely. + it("in-sandbox-qr whatsapp skips the credential conflict check", async () => { + arrangeRegistry({ + current: { name: "alpha", messagingChannels: [] }, + others: [ + { + name: "bob", + messagingChannels: ["whatsapp"], + providerCredentialHashes: { WHATSAPP_TOKEN: "irrelevant" }, + }, + ], + }); + process.env.NEMOCLAW_NON_INTERACTIVE = "1"; + + await addSandboxChannel("alpha", { channel: "whatsapp" }); + + const text = loggedText(); + expect(text).not.toContain("Continue anyway?"); + expect(text).toContain("Enabled whatsapp channel"); + expect(exitMock).not.toHaveBeenCalled(); + expect(promptMock).not.toHaveBeenCalled(); + }); + + // Scenario 9 + it("probe + backfill failure is swallowed; a pre-recorded matching hash still warns", async () => { + arrangeRegistry({ + current: { name: "alpha", messagingChannels: [] }, + others: [ + { + name: "bob", + messagingChannels: ["telegram"], + providerCredentialHashes: { TELEGRAM_BOT_TOKEN: TELEGRAM_HASH }, + }, + // Legacy entry with NO messagingChannels field — backfill probes the + // (alive) gateway, gets "absent" for every provider, then writes + // messagingChannels:[] for it. We make THAT write throw to genuinely + // exercise the try/catch around backfillMessagingChannels. + { name: "legacy" }, + ], + }); + getCredentialMock.mockReturnValue(TELEGRAM_TOKEN); + // Gateway alive (status 0) + every provider absent, so backfill reaches the + // updateSandbox("legacy") write — which throws below. + runOpenshellMock.mockReturnValue({ status: 0, stdout: "", stderr: "" }); + updateSandboxMock.mockImplementation((name: string, _updates: Partial) => { + if (name === "legacy") throw new Error("backfill boom"); + return true; + }); + process.env.NEMOCLAW_NON_INTERACTIVE = "1"; + + // bob already has messagingChannels + a matching hash, so the conflict is + // still found -> non-interactive abort. Key guarantee: a throw inside + // backfillMessagingChannels is swallowed; the only exit is the conflict + // exit(1), not an unhandled exception. + await expect(addSandboxChannel("alpha", { channel: "telegram" })).rejects.toThrow( + "process.exit(1)", + ); + expect(exitMock).toHaveBeenCalledWith(1); + expect(loggedText()).toContain("same telegram credential"); + }); + + it("probe + backfill failure with no pre-recorded conflict lets the add proceed", async () => { + arrangeRegistry({ + current: { name: "alpha", messagingChannels: [] }, + others: [], // no other sandbox -> no conflict resolvable + }); + getCredentialMock.mockReturnValue(TELEGRAM_TOKEN); + runOpenshellMock.mockReturnValue({ status: 1, stdout: "", stderr: "down" }); + + await addSandboxChannel("alpha", { channel: "telegram" }); + + expect(exitMock).not.toHaveBeenCalled(); + expect(upsertMock).toHaveBeenCalledTimes(1); + expect(updateSandboxMock).toHaveBeenCalledWith("alpha", expect.any(Object)); + }); + + // Scenario 10 + it("never prints the raw token value in any conflict output (proceed path)", async () => { + arrangeRegistry({ + current: { name: "alpha", messagingChannels: [] }, + others: [ + { + name: "bob", + messagingChannels: ["telegram"], + providerCredentialHashes: { TELEGRAM_BOT_TOKEN: TELEGRAM_HASH }, + }, + ], + }); + getCredentialMock.mockReturnValue(TELEGRAM_TOKEN); + promptMock.mockResolvedValue("y"); + + await addSandboxChannel("alpha", { channel: "telegram" }); + + const text = loggedText(); + expect(text).toContain("same telegram credential"); // sanity + expect(text).not.toContain(TELEGRAM_TOKEN); // no raw secret + expect(text).not.toContain(TELEGRAM_HASH); // hash not in conflict warning text + }); + + it("non-interactive abort path also keeps the raw token out of output", async () => { + arrangeRegistry({ + current: { name: "alpha", messagingChannels: [] }, + others: [ + { + name: "bob", + messagingChannels: ["telegram"], + providerCredentialHashes: { TELEGRAM_BOT_TOKEN: TELEGRAM_HASH }, + }, + ], + }); + getCredentialMock.mockReturnValue(TELEGRAM_TOKEN); + process.env.NEMOCLAW_NON_INTERACTIVE = "1"; + + await expect(addSandboxChannel("alpha", { channel: "telegram" })).rejects.toThrow( + "process.exit(1)", + ); + + expect(loggedText()).not.toContain(TELEGRAM_TOKEN); + }); + + // Scenario 11 + it("slack two-token channel: matching SLACK_BOT_TOKEN hash is detected", async () => { + const slackBot = "xoxb-1234-5678-abcdef"; + const slackApp = "xapp-1-A0000-12345-abcdef"; + const slackBotHash = hashCredential(slackBot) as string; + arrangeRegistry({ + current: { name: "alpha", messagingChannels: [] }, + others: [ + { + name: "bob", + messagingChannels: ["slack"], + providerCredentialHashes: { SLACK_BOT_TOKEN: slackBotHash }, // only bot token matches + }, + ], + }); + getCredentialMock.mockImplementation((key: string) => { + if (key === "SLACK_BOT_TOKEN") return slackBot; + if (key === "SLACK_APP_TOKEN") return slackApp; + return null; + }); + promptMock.mockResolvedValue("y"); + + await addSandboxChannel("alpha", { channel: "slack" }); + + const text = loggedText(); + expect(text).toContain("bob"); + expect(text).toContain("same slack credential"); // matching-token wording + expect(text).not.toContain(slackBot); + expect(text).not.toContain(slackApp); + expect(upsertMock).toHaveBeenCalledTimes(1); + }); +}); diff --git a/src/lib/actions/sandbox/policy-channel.ts b/src/lib/actions/sandbox/policy-channel.ts index f8b11d4b42..beee2b924f 100644 --- a/src/lib/actions/sandbox/policy-channel.ts +++ b/src/lib/actions/sandbox/policy-channel.ts @@ -5,50 +5,54 @@ import fs from "node:fs"; import path from "node:path"; -import { loadAgent, type AgentDefinition } from "../../agent/defs"; +import { type AgentDefinition, loadAgent } from "../../agent/defs"; import { CLI_DISPLAY_NAME, CLI_NAME } from "../../cli/branding"; -import { hashCredential } from "../../security/credential-hash"; -import { getCredential, prompt as askPrompt } from "../../credentials/store"; +import { prompt as askPrompt, getCredential } from "../../credentials/store"; import { recoverNamedGatewayRuntime } from "../../gateway-runtime-action"; +import { hashCredential } from "../../security/credential-hash"; + const { isNonInteractive } = require("../../onboard") as { isNonInteractive: () => boolean }; const onboardProviders = require("../../onboard/providers"); + import * as policies from "../../policy"; + // Lazy-required: keeps qrcode-terminal + the iLink HTTP client out of the // import graph for non-host-qr channels-add calls. const { HOST_QR_LOGIN_HANDLERS } = require("../../host-qr-handlers") as typeof import("../../host-qr-handlers"); const onboardSession = require("../../state/onboard-session") as typeof import("../../state/onboard-session"); +import { runOpenshell } from "../../adapters/openshell/runtime"; import { - parsePolicyAddOptions, type PolicyAddOptions, type PolicyRemoveOptions, + parsePolicyAddOptions, } from "../../domain/policy-channel"; -import * as registry from "../../state/registry"; -import { runOpenshell } from "../../adapters/openshell/runtime"; +import type { HostQrLoginResult } from "../../host-qr-handlers"; import { shellQuote } from "../../runner"; -import { - isDockerRuntimeDown, - printDockerRuntimeDownGuidance, -} from "./gateway-failure-classifier"; -import { executeSandboxCommand, executeSandboxExecCommand } from "./process-recovery"; -import { rebuildSandbox } from "./rebuild"; -import { validateSlackChannelCredentials } from "./slack-channel-validation"; -import { printTelegramDirectMessageAllowlistWarning } from "./telegram-channel-bridge-verification"; import { type ChannelDef, - KNOWN_CHANNELS, channelUsesInSandboxQrPairing, clearChannelTokens, getChannelDef, getChannelTokenKeys, + KNOWN_CHANNELS, knownChannelNames, persistChannelTokens, } from "../../sandbox/channels"; -import type { HostQrLoginResult } from "../../host-qr-handlers"; +import * as registry from "../../state/registry"; +import { + isDockerRuntimeDown, + printDockerRuntimeDownGuidance, +} from "./gateway-failure-classifier"; +import { executeSandboxCommand, executeSandboxExecCommand } from "./process-recovery"; +import { rebuildSandbox } from "./rebuild"; +import { validateSlackChannelCredentials } from "./slack-channel-validation"; +import { printTelegramDirectMessageAllowlistWarning } from "./telegram-channel-bridge-verification"; type ChannelMutationOptions = { channel?: string; dryRun?: boolean; + force?: boolean; }; const useColor = !process.env.NO_COLOR && !!process.stdout.isTTY; @@ -323,6 +327,98 @@ function bridgeProviderName(sandboxName: string, channelName: string, envKey: st return `${sandboxName}-${channelName}-bridge`; } +// Tri-state gateway probe for cross-sandbox messaging-conflict backfill, +// mirroring onboard.ts makeConflictProbe(). An upfront liveness check keeps a +// transient gateway failure ("error") from being mis-recorded as "no +// providers" ("absent"), which would permanently suppress backfill retries. +function makeChannelsConflictProbe() { + let gatewayAlive: boolean | null = null; + const isGatewayAlive = (): boolean => { + if (gatewayAlive === null) { + const result = runOpenshell(["sandbox", "list"], { + ignoreError: true, + stdio: ["ignore", "ignore", "ignore"], + }); + gatewayAlive = result.status === 0; + } + return gatewayAlive; + }; + return { + providerExists: (name: string): "present" | "absent" | "error" => { + if (!isGatewayAlive()) return "error"; + return onboardProviders.providerExistsInGateway(name, runOpenshell) ? "present" : "absent"; + }, + }; +} + +// Detect whether another sandbox already uses one of this channel's +// credentials. Mirrors the onboard.ts conflict check. Returns true if the +// caller should PROCEED with the add, false if it should abort. Never logs +// credential values — only the non-secret hashes computed inline are passed +// to findChannelConflicts. Probe/backfill failures are swallowed +// (non-fatal): backfillMessagingChannels already skips sandboxes whose probe +// errors, so a flaky gateway degrades to "no conflict found" rather than +// blocking the add. +async function checkChannelAddConflict( + sandboxName: string, + channelName: string, + acquired: Record, + force: boolean, +): Promise { + // QR-paired / tokenless adds have empty `acquired` and no host-side + // credential to hash. Skip — there is no credential to collide on, and + // findChannelConflicts with empty credentialHashes would only ever report + // "unknown-token" noise against every other sandbox holding the channel. + const credentialHashes: Record = {}; + for (const [envKey, token] of Object.entries(acquired)) { + const hash = hashCredential(token); + if (hash) credentialHashes[envKey] = hash; + } + if (Object.keys(credentialHashes).length === 0) return true; + + const { backfillMessagingChannels, findChannelConflicts } = + require("../../messaging-conflict") as typeof import("../../messaging-conflict"); + + try { + backfillMessagingChannels(registry, makeChannelsConflictProbe()); + } catch { + // Non-fatal: a backfill blow-up must not block adding a channel. + } + + let conflicts: ReturnType; + try { + conflicts = findChannelConflicts(sandboxName, [{ channel: channelName, credentialHashes }], registry); + } catch { + return true; + } + if (conflicts.length === 0) return true; + + for (const { channel, sandbox, reason } of conflicts) { + const detail = + reason === "matching-token" + ? `uses the same ${channel} credential` + : `already has ${channel} enabled, but its credential hash is unavailable`; + console.log( + ` ${YW}⚠${R} Sandbox '${sandbox}' ${detail}. Shared channel credentials only allow one sandbox to poll/connect — continuing may break both bridges (e.g. Telegram getUpdates 409).`, + ); + } + + if (force) { + console.log(` --force: proceeding despite the messaging channel conflict above.`); + return true; + } + if (isNonInteractive()) { + console.error( + ` Aborting: resolve the messaging channel conflict above, run \`${CLI_NAME} channels remove ${channelName}\` on the other sandbox, or re-run with --force.`, + ); + process.exit(1); + } + const answer = (await askPrompt(" Continue anyway? [y/N]: ")).trim().toLowerCase(); + if (answer === "y" || answer === "yes") return true; + console.log(" Aborting channel add."); + return false; +} + // Push channel tokens to the OpenShell gateway and add the channel to the // sandbox registry's messagingChannels list. Done eagerly at `channels // add` time (not deferred to rebuild) because the host-side credential @@ -799,6 +895,7 @@ export async function addSandboxChannel( options: ChannelMutationOptions = {}, ): Promise { const dryRun = Boolean(options.dryRun); + const force = Boolean(options.force); const rawChannelArg = options.channel; if (!rawChannelArg) { console.error(` Usage: ${CLI_NAME} channels add [--dry-run]`); @@ -901,6 +998,9 @@ export async function addSandboxChannel( } persistChannelTokens(acquired); + if (!(await checkChannelAddConflict(sandboxName, canonical, acquired, force))) { + return; // user aborted; nothing registered or widened + } // Push to the gateway and update the registry NOW so that answering // "rebuild later" (or running non-interactively) does not silently // discard the change. Pre-fix this was safe because saveCredential() diff --git a/src/lib/sandbox/channels-command-support.ts b/src/lib/sandbox/channels-command-support.ts index a8a5ce3fb0..d41c114382 100644 --- a/src/lib/sandbox/channels-command-support.ts +++ b/src/lib/sandbox/channels-command-support.ts @@ -3,11 +3,12 @@ import { Args } from "@oclif/core"; -import { dryRunFlag } from "../cli/common-flags"; +import { dryRunFlag, forceFlag } from "../cli/common-flags"; export type ChannelMutationOptions = { channel?: string; dryRun?: boolean; + force?: boolean; }; const sandboxNameArg = Args.string({ name: "sandbox", description: "Sandbox name", required: true }); @@ -15,11 +16,12 @@ const channelArg = Args.string({ name: "channel", description: "Messaging channe export function channelMutationOptions( channel: string | undefined, - flags: { "dry-run"?: boolean }, + flags: { "dry-run"?: boolean; force?: boolean }, ): ChannelMutationOptions { return { channel, dryRun: Boolean(flags["dry-run"]), + force: Boolean(flags.force), }; } @@ -30,4 +32,5 @@ export const channelMutationArgs = { export const channelMutationFlags = { "dry-run": dryRunFlag("Preview the change without applying it"), + force: forceFlag("Add the channel even if another sandbox already uses this credential"), }; From 43931af9de3178669b0f6a9d16fd9b33f24de10a Mon Sep 17 00:00:00 2001 From: Tony Luo Date: Tue, 2 Jun 2026 15:03:43 +0800 Subject: [PATCH 2/6] fix(cli): scope channels-add --force flag to add only The `--force` override added for the cross-sandbox messaging conflict check (#4305) lived on the shared channelMutationFlags, so it surfaced as a no-op `--force` on `channels remove/start/stop` and failed the CLI/docs flag-parity check (the flag was undocumented on those commands). Move `--force` into an add-only flag set, document it under the `channels add` reference section, and update the command-layer test to assert the flag is exposed only on add. No behavior change to the conflict check itself. Signed-off-by: Tony Luo Co-Authored-By: Claude Opus 4.8 (1M context) --- docs/reference/commands.mdx | 1 + src/commands/sandbox/channels/add.ts | 6 ++-- src/commands/sandbox/channels/mutate.test.ts | 31 ++++++-------------- src/lib/sandbox/channels-command-support.ts | 8 +++++ 4 files changed, 21 insertions(+), 25 deletions(-) diff --git a/docs/reference/commands.mdx b/docs/reference/commands.mdx index ddcb99d617..4831919115 100644 --- a/docs/reference/commands.mdx +++ b/docs/reference/commands.mdx @@ -744,6 +744,7 @@ $ nemoclaw my-assistant channels add telegram | Flag | Description | |------|-------------| | `--dry-run` | Validate the channel name and matching policy preset without prompting for credentials, contacting the gateway, or rebuilding | +| `--force` | Add the channel even when another sandbox already uses the same messaging credential, bypassing the cross-sandbox conflict warning (otherwise `channels add` warns and, when non-interactive, aborts) | Slack requires both `SLACK_BOT_TOKEN` (bot user OAuth) and `SLACK_APP_TOKEN` (app-level Socket Mode token); the command prompts for each in turn. Optional Slack allowlists come from `SLACK_ALLOWED_USERS` and `SLACK_ALLOWED_CHANNELS` at rebuild time. diff --git a/src/commands/sandbox/channels/add.ts b/src/commands/sandbox/channels/add.ts index 6b1c05292e..b0be6622c9 100644 --- a/src/commands/sandbox/channels/add.ts +++ b/src/commands/sandbox/channels/add.ts @@ -7,7 +7,7 @@ import { NemoClawCommand } from "../../../lib/cli/nemoclaw-oclif-command"; import { channelMutationOptions, channelMutationArgs, - channelMutationFlags, + channelAddFlags, } from "../../../lib/sandbox/channels-command-support"; export default class ChannelsAddCommand extends NemoClawCommand { @@ -15,10 +15,10 @@ export default class ChannelsAddCommand extends NemoClawCommand { static strict = true; static summary = "Save messaging channel credentials and rebuild"; static description = "Store credentials for a messaging channel and queue a sandbox rebuild."; - static usage = [" [--dry-run]"]; + static usage = [" [--dry-run] [--force]"]; static examples = ["<%= config.bin %> sandbox channels add alpha telegram"]; static args = channelMutationArgs; - static flags = channelMutationFlags; + static flags = channelAddFlags; public async run(): Promise { const { args, flags } = await this.parse(ChannelsAddCommand); diff --git a/src/commands/sandbox/channels/mutate.test.ts b/src/commands/sandbox/channels/mutate.test.ts index 9857aa4a69..b232512467 100644 --- a/src/commands/sandbox/channels/mutate.test.ts +++ b/src/commands/sandbox/channels/mutate.test.ts @@ -89,28 +89,15 @@ describe("channels mutation oclif commands", () => { }); }); - // Scenario 12 (#4305): the shared flag exposes a (no-op) --force on - // remove/start/stop too; confirm it is threaded so the no-op is faithful. - it("threads --force through remove/start/stop (shared flag, no-op downstream)", async () => { - await ChannelsRemoveCommand.run(["alpha", "telegram", "--force"], rootDir); - await ChannelsStartCommand.run(["alpha", "telegram", "--force"], rootDir); - await ChannelsStopCommand.run(["alpha", "slack", "--force"], rootDir); - - expect(mocks.removeSandboxChannel).toHaveBeenCalledWith("alpha", { - channel: "telegram", - dryRun: false, - force: true, - }); - expect(mocks.startSandboxChannel).toHaveBeenCalledWith("alpha", { - channel: "telegram", - dryRun: false, - force: true, - }); - expect(mocks.stopSandboxChannel).toHaveBeenCalledWith("alpha", { - channel: "slack", - dryRun: false, - force: true, - }); + // Scenario 12 (#4305): --force is add-only. Only `channels add` can create a + // cross-sandbox credential overlap, so only it exposes the override; surfacing + // a no-op --force on remove/start/stop would mislead users and break the + // CLI/docs flag-parity check. + it("exposes --force only on add, not on remove/start/stop", () => { + expect(ChannelsAddCommand.flags).toHaveProperty("force"); + expect(ChannelsRemoveCommand.flags).not.toHaveProperty("force"); + expect(ChannelsStartCommand.flags).not.toHaveProperty("force"); + expect(ChannelsStopCommand.flags).not.toHaveProperty("force"); }); it("requires a channel before dispatch", async () => { diff --git a/src/lib/sandbox/channels-command-support.ts b/src/lib/sandbox/channels-command-support.ts index d41c114382..03bc82eca7 100644 --- a/src/lib/sandbox/channels-command-support.ts +++ b/src/lib/sandbox/channels-command-support.ts @@ -32,5 +32,13 @@ export const channelMutationArgs = { export const channelMutationFlags = { "dry-run": dryRunFlag("Preview the change without applying it"), +}; + +// `--force` is add-only: only `channels add` can overlap a messaging +// credential another sandbox already uses, so only it exposes the override. +// Keeping it off remove/start/stop avoids a misleading no-op flag and keeps +// CLI/docs flag parity (the shared object would surface --force everywhere). +export const channelAddFlags = { + ...channelMutationFlags, force: forceFlag("Add the channel even if another sandbox already uses this credential"), }; From ba7f16d9ad4e7d1d4202d2a4073155ea30193409 Mon Sep 17 00:00:00 2001 From: Tony Luo Date: Tue, 2 Jun 2026 15:22:42 +0800 Subject: [PATCH 3/6] test(cli): stub sandbox exec seam in channels-add conflict tests verifyChannelBridgeAfterRebuild (run after a successful interactive `channels add`) probes the sandbox via executeSandboxExecCommand, which calls getOpenshellBinary() -> process.exit(1) when the openshell binary is absent. On the CI unit-test runner openshell is not installed, so that exit fired (caught by the exec wrapper, but still recorded by the process.exit spy), failing the "add proceeds" assertion expect(exitMock).not.toHaveBeenCalled() in policy-channel-conflict.test.ts. Locally openshell is installed, so it only reproduced in CI. Stub executeSandboxExecCommand/executeSandboxCommand at the process-recovery boundary (matching channel-status.test.ts) so the downstream bridge verification never shells out. Test-only; no source/behavior change. Signed-off-by: Tony Luo Co-Authored-By: Claude Opus 4.8 (1M context) --- .../actions/sandbox/policy-channel-conflict.test.ts | 10 ++++++++++ 1 file changed, 10 insertions(+) diff --git a/src/lib/actions/sandbox/policy-channel-conflict.test.ts b/src/lib/actions/sandbox/policy-channel-conflict.test.ts index 6580342e06..6858fe18a4 100644 --- a/src/lib/actions/sandbox/policy-channel-conflict.test.ts +++ b/src/lib/actions/sandbox/policy-channel-conflict.test.ts @@ -38,6 +38,7 @@ const runtime = D("adapters/openshell/runtime.js"); const gatewayRuntime = D("gateway-runtime-action.js"); const defs = D("agent/defs.js"); const rebuild = D("actions/sandbox/rebuild.js"); +const processRecovery = D("actions/sandbox/process-recovery.js"); const onboardSession = D("state/onboard-session.js"); const slackValidation = D("actions/sandbox/slack-channel-validation.js"); const policy = D("policy/index.js"); @@ -141,6 +142,15 @@ beforeEach(() => { // Downstream rebuild is not under test. vi.spyOn(rebuild, "rebuildSandbox").mockResolvedValue(undefined); + // After a successful interactive add, verifyChannelBridgeAfterRebuild probes + // the sandbox via executeSandboxExecCommand, which calls getOpenshellBinary() + // -> process.exit(1) when the openshell binary is absent (e.g. the CI + // unit-test runner; locally it is installed, so this only bites in CI). Stub + // the exec seam so the post-add verification never shells out and never trips + // the exit spy. The bridge verification is downstream and not under test here. + vi.spyOn(processRecovery, "executeSandboxExecCommand").mockReturnValue(null); + vi.spyOn(processRecovery, "executeSandboxCommand").mockReturnValue(null); + // Slack add runs a credential validation (auth.test-style) before the // conflict check; that is its own concern and would otherwise probe Slack. // Stub it to pass so scenario 11 can reach the conflict logic. From 953229d66b9112c0427a0d727da13d11270a71be Mon Sep 17 00:00:00 2001 From: Tony Luo Date: Thu, 4 Jun 2026 10:00:54 +0800 Subject: [PATCH 4/6] docs(cli): sync nemohermes variant for channels-add --force flag The agent-variant parity check (preview job) requires commands-nemohermes.mdx to mirror commands.mdx. Regenerate via docs:sync-agent-variants to propagate the new --force flag row that was added to commands.mdx. Generated file; no manual content. Signed-off-by: Tony Luo Co-Authored-By: Claude Opus 4.8 (1M context) --- docs/reference/commands-nemohermes.mdx | 1 + 1 file changed, 1 insertion(+) diff --git a/docs/reference/commands-nemohermes.mdx b/docs/reference/commands-nemohermes.mdx index 1e637bab8d..d3559bca3e 100644 --- a/docs/reference/commands-nemohermes.mdx +++ b/docs/reference/commands-nemohermes.mdx @@ -747,6 +747,7 @@ nemohermes my-assistant channels add telegram | Flag | Description | |------|-------------| | `--dry-run` | Validate the channel name and matching policy preset without prompting for credentials, contacting the gateway, or rebuilding | +| `--force` | Add the channel even when another sandbox already uses the same messaging credential, bypassing the cross-sandbox conflict warning (otherwise `channels add` warns and, when non-interactive, aborts) | Slack requires both `SLACK_BOT_TOKEN` (bot user OAuth) and `SLACK_APP_TOKEN` (app-level Socket Mode token); the command prompts for each in turn. Optional Slack allowlists come from `SLACK_ALLOWED_USERS` and `SLACK_ALLOWED_CHANNELS` at rebuild time. From a0085a72d7e69f712cfa38208ff985476fc6ce39 Mon Sep 17 00:00:00 2001 From: Tony Luo Date: Thu, 4 Jun 2026 16:54:59 +0800 Subject: [PATCH 5/6] ci: re-trigger run (flaky 5s timeout in unrelated repro-2666 CLI-subprocess test) The `checks` prek Test (CLI) hook runs vitest at the default 5s timeout; test/repro-2666-silent-list-status.test.ts spawns the real CLI per case and intermittently exceeds it under runner load. Unrelated to this PR's files; passed in prior runs. Empty commit to re-run CI (no rerun admin on the repo). Signed-off-by: Tony Luo Co-Authored-By: Claude Opus 4.8 (1M context) --- src/lib/actions/sandbox/connect.ts | 41 +-- src/lib/onboard.ts | 5 + .../machine/handlers/finalization.test.ts | 50 ++++ .../onboard/machine/handlers/finalization.ts | 12 + test/sandbox-connect-inference.test.ts | 235 ++++++++++++++++++ 5 files changed, 329 insertions(+), 14 deletions(-) diff --git a/src/lib/actions/sandbox/connect.ts b/src/lib/actions/sandbox/connect.ts index 48629c0ed7..c840bad608 100644 --- a/src/lib/actions/sandbox/connect.ts +++ b/src/lib/actions/sandbox/connect.ts @@ -156,6 +156,7 @@ function runSandboxConnectProbe(sandboxName: string): void { } if (processCheck.wasRunning) { ensureSandboxInferenceRoute(sandboxName, { quiet: true }); + runConnectAutoPairApprovalPass(sandboxName); if (processCheck.forwardRecovered) { console.log( ` Probe complete: ${agentName} gateway is running in '${sandboxName}'; restored dashboard port forward.`, @@ -167,6 +168,7 @@ function runSandboxConnectProbe(sandboxName: string): void { } if (processCheck.recovered) { ensureSandboxInferenceRoute(sandboxName, { quiet: true }); + runConnectAutoPairApprovalPass(sandboxName); console.log(` Probe complete: recovered ${agentName} gateway in '${sandboxName}'.`); return; } @@ -663,9 +665,11 @@ function ensureSandboxInferenceRouteOrExit( // `/tmp/nemoclaw-proxy-env.sh` (written by `nemoclaw-start.sh`) so the // in-sandbox `openclaw devices list` invocation targets the running gateway // with its token. Approvals then use OpenClaw's local fallback by removing -// OPENCLAW_GATEWAY_URL only from the child env, and apply the same allowlist -// as the startup watcher — `openclaw-control-ui` clients plus `webchat`/`cli` -// modes. Unknown clients are ignored, not approved. +// OPENCLAW_GATEWAY_URL, OPENCLAW_GATEWAY_PORT, and OPENCLAW_GATEWAY_TOKEN from +// the child env (matching the in-sandbox watcher and interactive `openclaw()` +// wrapper), and apply the same allowlist as the startup watcher — +// `openclaw-control-ui` clients plus `webchat`/`cli` modes. Unknown clients +// are ignored, not approved. // // Workaround boundary (NemoClaw#4462): OpenClaw owns device-pairing approval // semantics. In OpenClaw 2026.5.x, a gateway-pinned `devices approve` for a @@ -676,13 +680,19 @@ function ensureSandboxInferenceRouteOrExit( // // Failure modes (timeout, sandbox-exec errors, missing openclaw, gateway // unreachable) are swallowed: the connect flow must not be blocked by a -// best-effort approval. Internal timeouts (2s list + 1s x MAX_APPROVALS -// attempts) fit within the outer spawnSync cap, so a partial-completion -// mid-loop kill cannot strand allowlisted requests within a normal batch. -const CONNECT_AUTO_PAIR_MAX_APPROVALS = 8; -const CONNECT_AUTO_PAIR_TIMEOUT_MS = 12_000; - -function runConnectAutoPairApprovalPass(sandboxName: string): void { +// best-effort approval. The approve timeout matches the in-sandbox watcher's +// RUN_TIMEOUT_SECS = 10 (nemoclaw-start.sh) so a scope-upgrade approve via the +// local pairing fallback has the same budget. MAX_APPROVALS is 1 — the +// realistic case is a single pending CLI/webchat scope upgrade. The outer +// spawnSync cap must exceed the internal worst case (2s list + 10s × 1 = 12s) +// PLUS shell/python startup, since the outer timer starts at `sh` spawn before +// the proxy env is sourced and python3 launches; 15s leaves ~3s slack so a +// legitimate slow 10s approve is never SIGKILLed mid-loop, which would strand +// the allowlisted request (ref #4504). +const CONNECT_AUTO_PAIR_MAX_APPROVALS = 1; +const CONNECT_AUTO_PAIR_TIMEOUT_MS = 15_000; + +export function runConnectAutoPairApprovalPass(sandboxName: string): void { const script = ` PROXY_ENV=/tmp/nemoclaw-proxy-env.sh [ -r "$PROXY_ENV" ] && . "$PROXY_ENV" @@ -735,11 +745,13 @@ for device in pending: seen_request_ids.add(request_id) approve_env = os.environ.copy() approve_env.pop('OPENCLAW_GATEWAY_URL', None) + approve_env.pop('OPENCLAW_GATEWAY_PORT', None) + approve_env.pop('OPENCLAW_GATEWAY_TOKEN', None) attempted_count += 1 try: approve_proc = subprocess.run( [OPENCLAW, 'devices', 'approve', request_id, '--json'], - capture_output=True, text=True, timeout=1, env=approve_env, + capture_output=True, text=True, timeout=10, env=approve_env, ) if approve_proc.returncode == 0: approved_count += 1 @@ -749,9 +761,10 @@ PYAPPROVE exit 0 `; try { - // Best-effort: discard stdout/stderr. Outer cap is sized to cover the - // internal budget (2s list + 1s × MAX_APPROVALS plus shell/python - // startup slack) so a wedged sandbox can never block the connect flow. + // Best-effort: discard stdout/stderr. Outer cap (15s) covers the internal + // budget (2s list + 10s × MAX_APPROVALS) plus shell/python startup slack so + // a legitimate slow approve is never killed mid-loop and a wedged sandbox + // can never block the connect flow. spawnSync( getOpenshellBinary(), ["sandbox", "exec", "--name", sandboxName, "--", "sh", "-c", script], diff --git a/src/lib/onboard.ts b/src/lib/onboard.ts index b287a2b556..67c1e2f425 100644 --- a/src/lib/onboard.ts +++ b/src/lib/onboard.ts @@ -7029,6 +7029,11 @@ async function onboard(opts: OnboardOptions = {}): Promise { require("./actions/sandbox/process-recovery"); processRecovery.checkAndRecoverSandboxProcesses(name, options); }, + autoPairScopeApproval: (name) => { + const connect: typeof import("./actions/sandbox/connect") = + require("./actions/sandbox/connect"); + connect.runConnectAutoPairApprovalPass(name); + }, getChatUiUrl: () => process.env.CHAT_UI_URL || `http://127.0.0.1:${DASHBOARD_PORT}`, buildVerifyChain: (chatUiUrl) => buildChain({ chatUiUrl, isWsl: isWsl(), wslHostAddress: getWslHostAddress() }), diff --git a/src/lib/onboard/machine/handlers/finalization.test.ts b/src/lib/onboard/machine/handlers/finalization.test.ts index aa6e6b2ad8..7fb77433c1 100644 --- a/src/lib/onboard/machine/handlers/finalization.test.ts +++ b/src/lib/onboard/machine/handlers/finalization.test.ts @@ -19,6 +19,7 @@ function createDeps(overrides: Partial "http://127.0.0.1:18789"), buildChain: vi.fn(() => ({ port: 18789 })), verify: vi.fn(async () => ({ ok: true })), @@ -39,6 +40,7 @@ function createDeps(overrides: Partial { callsOn.verify.mock.invocationCallOrder[0], ); }); + + // Scenario A (#4504): the auto-pair scope-approval sweep runs against the + // freshly-recovered gateway — strictly after process recovery (which can + // restart the gateway, #3573) and strictly before deployment verification + // (so the gateway state is settled before we probe it). + it("runs the auto-pair scope-approval sweep after process recovery and before verify (#4504)", async () => { + const { deps, calls } = createDeps(); + + await handleFinalizationState(baseOptions(deps)); + + expect(calls.autoPairScopeApproval).toHaveBeenCalledOnce(); + expect(calls.autoPairScopeApproval).toHaveBeenCalledWith("my-assistant"); + // Ordering: recover → autoPairScopeApproval → verify. + expect(calls.autoPairScopeApproval.mock.invocationCallOrder[0]).toBeGreaterThan( + calls.recoverProcesses.mock.invocationCallOrder[0], + ); + expect(calls.autoPairScopeApproval.mock.invocationCallOrder[0]).toBeLessThan( + calls.verify.mock.invocationCallOrder[0], + ); + }); + + // Scenario B (#4504): the sweep is agent-agnostic — the stuck CLI/webchat + // scope upgrade can occur regardless of which agent the sandbox runs. + it("runs the scope-approval sweep regardless of agent type (#4504)", async () => { + const { deps, calls } = createDeps(); + const agent = { name: "hermes" }; + + await handleFinalizationState({ ...baseOptions(deps), agent }); + + expect(calls.autoPairScopeApproval).toHaveBeenCalledWith("my-assistant"); + }); + + // Scenario C (#4504): the dep is documented as best-effort / never-throws and + // the handler wraps no try/catch around it. Per the contract we assert the + // implemented behavior: the sweep is invoked and, because it returns cleanly, + // finalization proceeds to completion (recordSessionComplete still runs). A + // dep that threw would abort finalization here — the regression this guards. + it("treats the scope-approval sweep as best-effort and still completes the session (#4504)", async () => { + const { deps, calls } = createDeps(); + + const result = await handleFinalizationState(baseOptions(deps)); + + expect(calls.autoPairScopeApproval).toHaveBeenCalledOnce(); + // The non-throwing sweep does not abort finalization. + expect(calls.complete).toHaveBeenCalledOnce(); + expect(calls.dashboard).toHaveBeenCalledOnce(); + expect(result.verificationDiagnostics).toEqual([" ✓ verified"]); + }); }); diff --git a/src/lib/onboard/machine/handlers/finalization.ts b/src/lib/onboard/machine/handlers/finalization.ts index 760cdf1561..df137bdf29 100644 --- a/src/lib/onboard/machine/handlers/finalization.ts +++ b/src/lib/onboard/machine/handlers/finalization.ts @@ -28,6 +28,14 @@ export interface FinalizationStateOptions; @@ -94,6 +102,10 @@ export async function handleFinalizationState { }, ); }); + +/** + * Tests for #4504 — the auto-pair scope-approval pass must also run on the + * recover / `connect --probe-only` path (defect A), and the shared pass must be + * able to actually complete a scope-upgrade approve (defect B): strip the full + * gateway env triplet and use the watcher's 10s approve budget while staying + * within the outer spawnSync cap. + * + * Helper: locate the approval-pass `sandbox exec` invocation (identified by the + * embedded `openclaw devices approve` call) and return its rendered script. + */ +function findApprovalExec(sandboxExecCalls: string[][]): string[] | undefined { + return sandboxExecCalls.find( + (call) => + call.includes("--") && + call.some((segment) => segment.includes("openclaw")) && + call.some((segment) => segment.includes("devices")) && + call.some((segment) => segment.includes("approve")), + ); +} + +describe("sandbox connect scope-upgrade approval on recover/probe (#4504)", () => { + it( + "runs the approval pass on the --probe-only (recover) path", + testTimeoutOptions(20_000), + () => { + // Scenario D: the probe takes the wasRunning branch (the fake openshell's + // health probe reports RUNNING), so the recover path must run the sweep — + // and must NOT open an SSH session. + const { tmpDir, stateFile, sandboxName } = setupFixture( + { + name: "probe-approval-sandbox", + model: "claude-sonnet-4-20250514", + provider: "anthropic-prod", + gpuEnabled: false, + policies: [], + }, + "anthropic-prod", + "claude-sonnet-4-20250514", + ); + + const result = runConnect(tmpDir, sandboxName, {}, ["--probe-only"]); + expect(result.status).toBe(0); + + const state = JSON.parse(fs.readFileSync(stateFile, "utf-8")); + const approvalExec = findApprovalExec(state.sandboxExecCalls as string[][]); + expect(approvalExec).toBeDefined(); + // Targets the requested sandbox via `sandbox exec --name -- sh -c`. + expect(approvalExec).toContain("sandbox"); + expect(approvalExec).toContain("exec"); + expect(approvalExec).toContain("--name"); + expect(approvalExec).toContain(sandboxName); + // probe-only never opens an SSH connect session. + expect(state.sandboxConnectCalls).toEqual([]); + }, + ); + + it( + "does not fail the recover path when the probe approval pass errors", + testTimeoutOptions(20_000), + () => { + // Scenario D (best-effort): even when the in-sandbox approval exec exits + // non-zero, the probe-only flow must still succeed. + const { tmpDir, stateFile, sandboxName } = setupFixture( + { + name: "probe-approval-tolerant", + model: "claude-sonnet-4-20250514", + provider: "anthropic-prod", + gpuEnabled: false, + policies: [], + }, + "anthropic-prod", + "claude-sonnet-4-20250514", + ); + + const result = runConnect(tmpDir, sandboxName, { + NEMOCLAW_TEST_FAIL_APPROVAL_PASS: "1", + }, ["--probe-only"]); + expect(result.status).toBe(0); + + const state = JSON.parse(fs.readFileSync(stateFile, "utf-8")); + const approvalExec = findApprovalExec(state.sandboxExecCalls as string[][]); + expect(approvalExec).toBeDefined(); + }, + ); + + it( + "does not run the approval pass when the probe fails (gateway down, recovery fails)", + testTimeoutOptions(20_000), + () => { + // Scenario D (negative half): the sweep is wired only into the wasRunning + // and recovered success branches — never the not-checked early exit nor + // the not-running failure exit, where the gateway is un-inspectable / down. + // Force the health probe to report STOPPED and let recovery fail so the + // probe lands on the failure branch; the approval pass must NOT run. + const { tmpDir, stateFile, sandboxName } = setupFixture( + { + name: "probe-gateway-down", + model: "claude-sonnet-4-20250514", + provider: "anthropic-prod", + gpuEnabled: false, + policies: [], + }, + "anthropic-prod", + "claude-sonnet-4-20250514", + ); + + const result = runConnect(tmpDir, sandboxName, { + NEMOCLAW_TEST_GATEWAY_DOWN: "1", + }, ["--probe-only"]); + expect(result.status).toBe(1); + + const state = JSON.parse(fs.readFileSync(stateFile, "utf-8")); + const approvalExec = findApprovalExec(state.sandboxExecCalls as string[][]); + expect(approvalExec).toBeUndefined(); + // And it never opens an SSH session on the failure path. + expect(state.sandboxConnectCalls).toEqual([]); + }, + ); + + it( + "approve child strips the full gateway env triplet (#4462 self-defeat fix)", + testTimeoutOptions(20_000), + () => { + // Scenario E: the approve child must drop OPENCLAW_GATEWAY_URL, _PORT and + // _TOKEN so the local pairing fallback cannot re-pin to the gateway and + // hit the #4462 self-defeat. Assert on the rendered script text. + const { tmpDir, stateFile, sandboxName } = setupFixture( + { + name: "env-strip-sandbox", + model: "claude-sonnet-4-20250514", + provider: "anthropic-prod", + gpuEnabled: false, + policies: [], + }, + "anthropic-prod", + "claude-sonnet-4-20250514", + ); + + const result = runConnect(tmpDir, sandboxName, {}, ["--probe-only"]); + expect(result.status).toBe(0); + + const state = JSON.parse(fs.readFileSync(stateFile, "utf-8")); + const approvalExec = findApprovalExec(state.sandboxExecCalls as string[][]); + expect(approvalExec).toBeDefined(); + const script = approvalExec?.[approvalExec.length - 1] || ""; + expect(script).toContain("approve_env = os.environ.copy()"); + expect(script).toContain("approve_env.pop('OPENCLAW_GATEWAY_URL', None)"); + expect(script).toContain("approve_env.pop('OPENCLAW_GATEWAY_PORT', None)"); + expect(script).toContain("approve_env.pop('OPENCLAW_GATEWAY_TOKEN', None)"); + // The stripped env is the one passed to the approve subprocess. + expect(script).toContain("env=approve_env"); + }, + ); + + it( + "approve timeout matches the watcher (10s) and stays within the outer cap", + testTimeoutOptions(20_000), + () => { + // Scenario F: assert the approve subprocess uses timeout=10 (matching the + // in-sandbox watcher RUN_TIMEOUT_SECS) and the list call keeps timeout=2, + // then check the budget invariant against the outer spawnSync cap. + const { tmpDir, stateFile, sandboxName } = setupFixture( + { + name: "approve-budget-sandbox", + model: "claude-sonnet-4-20250514", + provider: "anthropic-prod", + gpuEnabled: false, + policies: [], + }, + "anthropic-prod", + "claude-sonnet-4-20250514", + ); + + const result = runConnect(tmpDir, sandboxName, {}, ["--probe-only"]); + expect(result.status).toBe(0); + + const state = JSON.parse(fs.readFileSync(stateFile, "utf-8")); + const approvalExec = findApprovalExec(state.sandboxExecCalls as string[][]); + expect(approvalExec).toBeDefined(); + const script = approvalExec?.[approvalExec.length - 1] || ""; + + // List call keeps a tight 2s budget; approve call matches the watcher's + // RUN_TIMEOUT_SECS = 10s. + const listTimeout = 2; + expect(script).toContain("[OPENCLAW, 'devices', 'list', '--json']"); + expect(script).toMatch( + /\[OPENCLAW, 'devices', 'list', '--json'\][\s\S]*?timeout=2,/, + ); + const approveTimeoutMatch = script.match( + /'devices', 'approve', request_id, '--json'\][\s\S]*?timeout=(\d+),/, + ); + expect(approveTimeoutMatch).not.toBeNull(); + const approveTimeout = Number(approveTimeoutMatch?.[1]); + expect(approveTimeout).toBe(10); + + // MAX_APPROVALS is rendered into the script as a literal. + const maxApprovalsMatch = script.match(/MAX_APPROVALS = (\d+)/); + expect(maxApprovalsMatch).not.toBeNull(); + const maxApprovals = Number(maxApprovalsMatch?.[1]); + + // Read the real outer cap from source rather than hardcoding it, so this + // invariant tracks any future change to CONNECT_AUTO_PAIR_TIMEOUT_MS. + const connectSource = fs.readFileSync( + path.join(import.meta.dirname, "..", "src", "lib", "actions", "sandbox", "connect.ts"), + "utf-8", + ); + const outerCapMatch = connectSource.match( + /CONNECT_AUTO_PAIR_TIMEOUT_MS\s*=\s*([\d_]+)/, + ); + expect(outerCapMatch).not.toBeNull(); + const outerCapSeconds = Number(outerCapMatch?.[1].replace(/_/g, "")) / 1000; + + // Budget invariant: the inner worst case (list + approveTimeout × MAX_APPROVALS) + // must stay STRICTLY below the outer spawnSync cap. The outer timer starts + // when `sh` is spawned — before shell startup, sourcing the proxy env, the + // python3 launch, and `devices list` even begin — so the cap must leave + // slack above the inner budget, or a legitimate slow 10s approve is killed + // mid-loop and the allowlisted request is stranded (#4504). + const innerBudgetSeconds = listTimeout + approveTimeout * maxApprovals; + expect(innerBudgetSeconds).toBeLessThan(outerCapSeconds); + }, + ); +}); From 9084b0e6afc29927d05bd4da94a6cf4f18cb7e1e Mon Sep 17 00:00:00 2001 From: Tony Luo Date: Thu, 4 Jun 2026 17:24:38 +0800 Subject: [PATCH 6/6] revert: drop unrelated connect-autopair changes mistakenly committed Commit a0085a72d was intended as an empty CI re-trigger but accidentally swept in staged work from a parallel branch (sandbox connect auto-pair, #4462/#4504): connect.ts, onboard.ts, finalization handler + test. Revert restores the tree to 0f2533d7 so this PR contains only the #4305 cross-sandbox channels-add conflict changes. The connect-autopair work lives on its own branch (fix/4504-onboard-scope-upgrade-autopair). Signed-off-by: Tony Luo Co-Authored-By: Claude Opus 4.8 (1M context) --- src/lib/actions/sandbox/connect.ts | 41 ++- src/lib/onboard.ts | 5 - .../machine/handlers/finalization.test.ts | 50 ---- .../onboard/machine/handlers/finalization.ts | 12 - test/sandbox-connect-inference.test.ts | 235 ------------------ 5 files changed, 14 insertions(+), 329 deletions(-) diff --git a/src/lib/actions/sandbox/connect.ts b/src/lib/actions/sandbox/connect.ts index c840bad608..48629c0ed7 100644 --- a/src/lib/actions/sandbox/connect.ts +++ b/src/lib/actions/sandbox/connect.ts @@ -156,7 +156,6 @@ function runSandboxConnectProbe(sandboxName: string): void { } if (processCheck.wasRunning) { ensureSandboxInferenceRoute(sandboxName, { quiet: true }); - runConnectAutoPairApprovalPass(sandboxName); if (processCheck.forwardRecovered) { console.log( ` Probe complete: ${agentName} gateway is running in '${sandboxName}'; restored dashboard port forward.`, @@ -168,7 +167,6 @@ function runSandboxConnectProbe(sandboxName: string): void { } if (processCheck.recovered) { ensureSandboxInferenceRoute(sandboxName, { quiet: true }); - runConnectAutoPairApprovalPass(sandboxName); console.log(` Probe complete: recovered ${agentName} gateway in '${sandboxName}'.`); return; } @@ -665,11 +663,9 @@ function ensureSandboxInferenceRouteOrExit( // `/tmp/nemoclaw-proxy-env.sh` (written by `nemoclaw-start.sh`) so the // in-sandbox `openclaw devices list` invocation targets the running gateway // with its token. Approvals then use OpenClaw's local fallback by removing -// OPENCLAW_GATEWAY_URL, OPENCLAW_GATEWAY_PORT, and OPENCLAW_GATEWAY_TOKEN from -// the child env (matching the in-sandbox watcher and interactive `openclaw()` -// wrapper), and apply the same allowlist as the startup watcher — -// `openclaw-control-ui` clients plus `webchat`/`cli` modes. Unknown clients -// are ignored, not approved. +// OPENCLAW_GATEWAY_URL only from the child env, and apply the same allowlist +// as the startup watcher — `openclaw-control-ui` clients plus `webchat`/`cli` +// modes. Unknown clients are ignored, not approved. // // Workaround boundary (NemoClaw#4462): OpenClaw owns device-pairing approval // semantics. In OpenClaw 2026.5.x, a gateway-pinned `devices approve` for a @@ -680,19 +676,13 @@ function ensureSandboxInferenceRouteOrExit( // // Failure modes (timeout, sandbox-exec errors, missing openclaw, gateway // unreachable) are swallowed: the connect flow must not be blocked by a -// best-effort approval. The approve timeout matches the in-sandbox watcher's -// RUN_TIMEOUT_SECS = 10 (nemoclaw-start.sh) so a scope-upgrade approve via the -// local pairing fallback has the same budget. MAX_APPROVALS is 1 — the -// realistic case is a single pending CLI/webchat scope upgrade. The outer -// spawnSync cap must exceed the internal worst case (2s list + 10s × 1 = 12s) -// PLUS shell/python startup, since the outer timer starts at `sh` spawn before -// the proxy env is sourced and python3 launches; 15s leaves ~3s slack so a -// legitimate slow 10s approve is never SIGKILLed mid-loop, which would strand -// the allowlisted request (ref #4504). -const CONNECT_AUTO_PAIR_MAX_APPROVALS = 1; -const CONNECT_AUTO_PAIR_TIMEOUT_MS = 15_000; - -export function runConnectAutoPairApprovalPass(sandboxName: string): void { +// best-effort approval. Internal timeouts (2s list + 1s x MAX_APPROVALS +// attempts) fit within the outer spawnSync cap, so a partial-completion +// mid-loop kill cannot strand allowlisted requests within a normal batch. +const CONNECT_AUTO_PAIR_MAX_APPROVALS = 8; +const CONNECT_AUTO_PAIR_TIMEOUT_MS = 12_000; + +function runConnectAutoPairApprovalPass(sandboxName: string): void { const script = ` PROXY_ENV=/tmp/nemoclaw-proxy-env.sh [ -r "$PROXY_ENV" ] && . "$PROXY_ENV" @@ -745,13 +735,11 @@ for device in pending: seen_request_ids.add(request_id) approve_env = os.environ.copy() approve_env.pop('OPENCLAW_GATEWAY_URL', None) - approve_env.pop('OPENCLAW_GATEWAY_PORT', None) - approve_env.pop('OPENCLAW_GATEWAY_TOKEN', None) attempted_count += 1 try: approve_proc = subprocess.run( [OPENCLAW, 'devices', 'approve', request_id, '--json'], - capture_output=True, text=True, timeout=10, env=approve_env, + capture_output=True, text=True, timeout=1, env=approve_env, ) if approve_proc.returncode == 0: approved_count += 1 @@ -761,10 +749,9 @@ PYAPPROVE exit 0 `; try { - // Best-effort: discard stdout/stderr. Outer cap (15s) covers the internal - // budget (2s list + 10s × MAX_APPROVALS) plus shell/python startup slack so - // a legitimate slow approve is never killed mid-loop and a wedged sandbox - // can never block the connect flow. + // Best-effort: discard stdout/stderr. Outer cap is sized to cover the + // internal budget (2s list + 1s × MAX_APPROVALS plus shell/python + // startup slack) so a wedged sandbox can never block the connect flow. spawnSync( getOpenshellBinary(), ["sandbox", "exec", "--name", sandboxName, "--", "sh", "-c", script], diff --git a/src/lib/onboard.ts b/src/lib/onboard.ts index 67c1e2f425..b287a2b556 100644 --- a/src/lib/onboard.ts +++ b/src/lib/onboard.ts @@ -7029,11 +7029,6 @@ async function onboard(opts: OnboardOptions = {}): Promise { require("./actions/sandbox/process-recovery"); processRecovery.checkAndRecoverSandboxProcesses(name, options); }, - autoPairScopeApproval: (name) => { - const connect: typeof import("./actions/sandbox/connect") = - require("./actions/sandbox/connect"); - connect.runConnectAutoPairApprovalPass(name); - }, getChatUiUrl: () => process.env.CHAT_UI_URL || `http://127.0.0.1:${DASHBOARD_PORT}`, buildVerifyChain: (chatUiUrl) => buildChain({ chatUiUrl, isWsl: isWsl(), wslHostAddress: getWslHostAddress() }), diff --git a/src/lib/onboard/machine/handlers/finalization.test.ts b/src/lib/onboard/machine/handlers/finalization.test.ts index 7fb77433c1..aa6e6b2ad8 100644 --- a/src/lib/onboard/machine/handlers/finalization.test.ts +++ b/src/lib/onboard/machine/handlers/finalization.test.ts @@ -19,7 +19,6 @@ function createDeps(overrides: Partial "http://127.0.0.1:18789"), buildChain: vi.fn(() => ({ port: 18789 })), verify: vi.fn(async () => ({ ok: true })), @@ -40,7 +39,6 @@ function createDeps(overrides: Partial { callsOn.verify.mock.invocationCallOrder[0], ); }); - - // Scenario A (#4504): the auto-pair scope-approval sweep runs against the - // freshly-recovered gateway — strictly after process recovery (which can - // restart the gateway, #3573) and strictly before deployment verification - // (so the gateway state is settled before we probe it). - it("runs the auto-pair scope-approval sweep after process recovery and before verify (#4504)", async () => { - const { deps, calls } = createDeps(); - - await handleFinalizationState(baseOptions(deps)); - - expect(calls.autoPairScopeApproval).toHaveBeenCalledOnce(); - expect(calls.autoPairScopeApproval).toHaveBeenCalledWith("my-assistant"); - // Ordering: recover → autoPairScopeApproval → verify. - expect(calls.autoPairScopeApproval.mock.invocationCallOrder[0]).toBeGreaterThan( - calls.recoverProcesses.mock.invocationCallOrder[0], - ); - expect(calls.autoPairScopeApproval.mock.invocationCallOrder[0]).toBeLessThan( - calls.verify.mock.invocationCallOrder[0], - ); - }); - - // Scenario B (#4504): the sweep is agent-agnostic — the stuck CLI/webchat - // scope upgrade can occur regardless of which agent the sandbox runs. - it("runs the scope-approval sweep regardless of agent type (#4504)", async () => { - const { deps, calls } = createDeps(); - const agent = { name: "hermes" }; - - await handleFinalizationState({ ...baseOptions(deps), agent }); - - expect(calls.autoPairScopeApproval).toHaveBeenCalledWith("my-assistant"); - }); - - // Scenario C (#4504): the dep is documented as best-effort / never-throws and - // the handler wraps no try/catch around it. Per the contract we assert the - // implemented behavior: the sweep is invoked and, because it returns cleanly, - // finalization proceeds to completion (recordSessionComplete still runs). A - // dep that threw would abort finalization here — the regression this guards. - it("treats the scope-approval sweep as best-effort and still completes the session (#4504)", async () => { - const { deps, calls } = createDeps(); - - const result = await handleFinalizationState(baseOptions(deps)); - - expect(calls.autoPairScopeApproval).toHaveBeenCalledOnce(); - // The non-throwing sweep does not abort finalization. - expect(calls.complete).toHaveBeenCalledOnce(); - expect(calls.dashboard).toHaveBeenCalledOnce(); - expect(result.verificationDiagnostics).toEqual([" ✓ verified"]); - }); }); diff --git a/src/lib/onboard/machine/handlers/finalization.ts b/src/lib/onboard/machine/handlers/finalization.ts index df137bdf29..760cdf1561 100644 --- a/src/lib/onboard/machine/handlers/finalization.ts +++ b/src/lib/onboard/machine/handlers/finalization.ts @@ -28,14 +28,6 @@ export interface FinalizationStateOptions; @@ -102,10 +94,6 @@ export async function handleFinalizationState { }, ); }); - -/** - * Tests for #4504 — the auto-pair scope-approval pass must also run on the - * recover / `connect --probe-only` path (defect A), and the shared pass must be - * able to actually complete a scope-upgrade approve (defect B): strip the full - * gateway env triplet and use the watcher's 10s approve budget while staying - * within the outer spawnSync cap. - * - * Helper: locate the approval-pass `sandbox exec` invocation (identified by the - * embedded `openclaw devices approve` call) and return its rendered script. - */ -function findApprovalExec(sandboxExecCalls: string[][]): string[] | undefined { - return sandboxExecCalls.find( - (call) => - call.includes("--") && - call.some((segment) => segment.includes("openclaw")) && - call.some((segment) => segment.includes("devices")) && - call.some((segment) => segment.includes("approve")), - ); -} - -describe("sandbox connect scope-upgrade approval on recover/probe (#4504)", () => { - it( - "runs the approval pass on the --probe-only (recover) path", - testTimeoutOptions(20_000), - () => { - // Scenario D: the probe takes the wasRunning branch (the fake openshell's - // health probe reports RUNNING), so the recover path must run the sweep — - // and must NOT open an SSH session. - const { tmpDir, stateFile, sandboxName } = setupFixture( - { - name: "probe-approval-sandbox", - model: "claude-sonnet-4-20250514", - provider: "anthropic-prod", - gpuEnabled: false, - policies: [], - }, - "anthropic-prod", - "claude-sonnet-4-20250514", - ); - - const result = runConnect(tmpDir, sandboxName, {}, ["--probe-only"]); - expect(result.status).toBe(0); - - const state = JSON.parse(fs.readFileSync(stateFile, "utf-8")); - const approvalExec = findApprovalExec(state.sandboxExecCalls as string[][]); - expect(approvalExec).toBeDefined(); - // Targets the requested sandbox via `sandbox exec --name -- sh -c`. - expect(approvalExec).toContain("sandbox"); - expect(approvalExec).toContain("exec"); - expect(approvalExec).toContain("--name"); - expect(approvalExec).toContain(sandboxName); - // probe-only never opens an SSH connect session. - expect(state.sandboxConnectCalls).toEqual([]); - }, - ); - - it( - "does not fail the recover path when the probe approval pass errors", - testTimeoutOptions(20_000), - () => { - // Scenario D (best-effort): even when the in-sandbox approval exec exits - // non-zero, the probe-only flow must still succeed. - const { tmpDir, stateFile, sandboxName } = setupFixture( - { - name: "probe-approval-tolerant", - model: "claude-sonnet-4-20250514", - provider: "anthropic-prod", - gpuEnabled: false, - policies: [], - }, - "anthropic-prod", - "claude-sonnet-4-20250514", - ); - - const result = runConnect(tmpDir, sandboxName, { - NEMOCLAW_TEST_FAIL_APPROVAL_PASS: "1", - }, ["--probe-only"]); - expect(result.status).toBe(0); - - const state = JSON.parse(fs.readFileSync(stateFile, "utf-8")); - const approvalExec = findApprovalExec(state.sandboxExecCalls as string[][]); - expect(approvalExec).toBeDefined(); - }, - ); - - it( - "does not run the approval pass when the probe fails (gateway down, recovery fails)", - testTimeoutOptions(20_000), - () => { - // Scenario D (negative half): the sweep is wired only into the wasRunning - // and recovered success branches — never the not-checked early exit nor - // the not-running failure exit, where the gateway is un-inspectable / down. - // Force the health probe to report STOPPED and let recovery fail so the - // probe lands on the failure branch; the approval pass must NOT run. - const { tmpDir, stateFile, sandboxName } = setupFixture( - { - name: "probe-gateway-down", - model: "claude-sonnet-4-20250514", - provider: "anthropic-prod", - gpuEnabled: false, - policies: [], - }, - "anthropic-prod", - "claude-sonnet-4-20250514", - ); - - const result = runConnect(tmpDir, sandboxName, { - NEMOCLAW_TEST_GATEWAY_DOWN: "1", - }, ["--probe-only"]); - expect(result.status).toBe(1); - - const state = JSON.parse(fs.readFileSync(stateFile, "utf-8")); - const approvalExec = findApprovalExec(state.sandboxExecCalls as string[][]); - expect(approvalExec).toBeUndefined(); - // And it never opens an SSH session on the failure path. - expect(state.sandboxConnectCalls).toEqual([]); - }, - ); - - it( - "approve child strips the full gateway env triplet (#4462 self-defeat fix)", - testTimeoutOptions(20_000), - () => { - // Scenario E: the approve child must drop OPENCLAW_GATEWAY_URL, _PORT and - // _TOKEN so the local pairing fallback cannot re-pin to the gateway and - // hit the #4462 self-defeat. Assert on the rendered script text. - const { tmpDir, stateFile, sandboxName } = setupFixture( - { - name: "env-strip-sandbox", - model: "claude-sonnet-4-20250514", - provider: "anthropic-prod", - gpuEnabled: false, - policies: [], - }, - "anthropic-prod", - "claude-sonnet-4-20250514", - ); - - const result = runConnect(tmpDir, sandboxName, {}, ["--probe-only"]); - expect(result.status).toBe(0); - - const state = JSON.parse(fs.readFileSync(stateFile, "utf-8")); - const approvalExec = findApprovalExec(state.sandboxExecCalls as string[][]); - expect(approvalExec).toBeDefined(); - const script = approvalExec?.[approvalExec.length - 1] || ""; - expect(script).toContain("approve_env = os.environ.copy()"); - expect(script).toContain("approve_env.pop('OPENCLAW_GATEWAY_URL', None)"); - expect(script).toContain("approve_env.pop('OPENCLAW_GATEWAY_PORT', None)"); - expect(script).toContain("approve_env.pop('OPENCLAW_GATEWAY_TOKEN', None)"); - // The stripped env is the one passed to the approve subprocess. - expect(script).toContain("env=approve_env"); - }, - ); - - it( - "approve timeout matches the watcher (10s) and stays within the outer cap", - testTimeoutOptions(20_000), - () => { - // Scenario F: assert the approve subprocess uses timeout=10 (matching the - // in-sandbox watcher RUN_TIMEOUT_SECS) and the list call keeps timeout=2, - // then check the budget invariant against the outer spawnSync cap. - const { tmpDir, stateFile, sandboxName } = setupFixture( - { - name: "approve-budget-sandbox", - model: "claude-sonnet-4-20250514", - provider: "anthropic-prod", - gpuEnabled: false, - policies: [], - }, - "anthropic-prod", - "claude-sonnet-4-20250514", - ); - - const result = runConnect(tmpDir, sandboxName, {}, ["--probe-only"]); - expect(result.status).toBe(0); - - const state = JSON.parse(fs.readFileSync(stateFile, "utf-8")); - const approvalExec = findApprovalExec(state.sandboxExecCalls as string[][]); - expect(approvalExec).toBeDefined(); - const script = approvalExec?.[approvalExec.length - 1] || ""; - - // List call keeps a tight 2s budget; approve call matches the watcher's - // RUN_TIMEOUT_SECS = 10s. - const listTimeout = 2; - expect(script).toContain("[OPENCLAW, 'devices', 'list', '--json']"); - expect(script).toMatch( - /\[OPENCLAW, 'devices', 'list', '--json'\][\s\S]*?timeout=2,/, - ); - const approveTimeoutMatch = script.match( - /'devices', 'approve', request_id, '--json'\][\s\S]*?timeout=(\d+),/, - ); - expect(approveTimeoutMatch).not.toBeNull(); - const approveTimeout = Number(approveTimeoutMatch?.[1]); - expect(approveTimeout).toBe(10); - - // MAX_APPROVALS is rendered into the script as a literal. - const maxApprovalsMatch = script.match(/MAX_APPROVALS = (\d+)/); - expect(maxApprovalsMatch).not.toBeNull(); - const maxApprovals = Number(maxApprovalsMatch?.[1]); - - // Read the real outer cap from source rather than hardcoding it, so this - // invariant tracks any future change to CONNECT_AUTO_PAIR_TIMEOUT_MS. - const connectSource = fs.readFileSync( - path.join(import.meta.dirname, "..", "src", "lib", "actions", "sandbox", "connect.ts"), - "utf-8", - ); - const outerCapMatch = connectSource.match( - /CONNECT_AUTO_PAIR_TIMEOUT_MS\s*=\s*([\d_]+)/, - ); - expect(outerCapMatch).not.toBeNull(); - const outerCapSeconds = Number(outerCapMatch?.[1].replace(/_/g, "")) / 1000; - - // Budget invariant: the inner worst case (list + approveTimeout × MAX_APPROVALS) - // must stay STRICTLY below the outer spawnSync cap. The outer timer starts - // when `sh` is spawned — before shell startup, sourcing the proxy env, the - // python3 launch, and `devices list` even begin — so the cap must leave - // slack above the inner budget, or a legitimate slow 10s approve is killed - // mid-loop and the allowlisted request is stranded (#4504). - const innerBudgetSeconds = listTimeout + approveTimeout * maxApprovals; - expect(innerBudgetSeconds).toBeLessThan(outerCapSeconds); - }, - ); -});