diff --git a/src/lib/onboard.ts b/src/lib/onboard.ts index e498981ee7..f198977134 100644 --- a/src/lib/onboard.ts +++ b/src/lib/onboard.ts @@ -424,6 +424,7 @@ const { } = modelRouter; const routedInference: typeof import("./onboard/routed-inference") = require("./onboard/routed-inference"); const { OnboardRuntimeBoundary }: typeof import("./onboard/runtime-boundary") = require("./onboard/runtime-boundary"); +const { installSandboxCancelRollback, makeOnboardCancelExit, wasSandboxDefault, restoreDefaultAfterRecreate }: typeof import("./onboard/cancel-rollback") = require("./onboard/cancel-rollback"); const { handleAgentSetupState }: typeof import("./onboard/machine/handlers/agent-setup") = require("./onboard/machine/handlers/agent-setup"); const { handleFinalizationState }: typeof import("./onboard/machine/handlers/finalization") = require("./onboard/machine/handlers/finalization"); const { handleGatewayState }: typeof import("./onboard/machine/handlers/gateway") = require("./onboard/machine/handlers/gateway"); @@ -2980,6 +2981,8 @@ async function createSandbox( // Reconcile local registry state with the live OpenShell gateway state. const liveExists = pruneStaleSandboxEntry(sandboxName); + // #4614: capture default AFTER prune so a stale registry row isn't read as a live sandbox. + const sandboxWasLiveDefault = liveExists && wasSandboxDefault(registry.getDefault(), sandboxName); // Declared outside the liveExists block so it is accessible during // post-creation restore (the sandbox create path runs after the block). @@ -3818,7 +3821,7 @@ async function createSandbox( gatewayName: GATEWAY_NAME, gatewayPort: GATEWAY_PORT, }); - registry.setDefault(sandboxName); + restoreDefaultAfterRecreate(registry.setDefault, sandboxName, sandboxWasLiveDefault); // #4614: default deferred to finalization if (restoreBackupPath) { note( @@ -3863,6 +3866,8 @@ async function createSandbox( warnIfLandlockUnsupported({ dockerInfoFormat, runCapture }); + // #4614: arm rollback only when the sandbox was not live before (never a recreate/rebuild). + if (!liveExists) sandboxCancelRollback.arm(sandboxName); return sandboxName; } @@ -5856,10 +5861,7 @@ async function selectPolicyTier(): Promise { process.removeListener("SIGTERM", onSigterm); }; - const onSigterm = () => { - cleanup(); - process.exit(1); - }; + const onSigterm = makeOnboardCancelExit(sandboxCancelRollback, cleanup); process.once("SIGTERM", onSigterm); const onData = (key: string) => { @@ -5871,8 +5873,7 @@ async function selectPolicyTier(): Promise { selectedIdx = cursor; redraw(); } else if (key === "\x03") { - cleanup(); - process.exit(1); + makeOnboardCancelExit(sandboxCancelRollback, cleanup)(); } else if (key === "\x1b[A" || key === "k") { cursor = (cursor - 1 + n) % n; redraw(); @@ -6047,10 +6048,7 @@ async function selectTierPresetsAndAccess( process.removeListener("SIGTERM", onSigterm); }; - const onSigterm = () => { - cleanup(); - process.exit(1); - }; + const onSigterm = makeOnboardCancelExit(sandboxCancelRollback, cleanup); process.once("SIGTERM", onSigterm); const onData = (key: string) => { @@ -6063,8 +6061,7 @@ async function selectTierPresetsAndAccess( .map((p) => ({ name: p.name, access: accessModes[p.name] })), ); } else if (key === "\x03") { - cleanup(); - process.exit(1); + makeOnboardCancelExit(sandboxCancelRollback, cleanup)(); } else if (key === "\x1b[A" || key === "k") { cursor = (cursor - 1 + n) % n; redraw(); @@ -6197,10 +6194,7 @@ async function presetsCheckboxSelector( process.removeListener("SIGTERM", onSigterm); }; - const onSigterm = () => { - cleanup(); - process.exit(1); - }; + const onSigterm = makeOnboardCancelExit(sandboxCancelRollback, cleanup); process.once("SIGTERM", onSigterm); const onData = (key: string) => { @@ -6209,9 +6203,7 @@ async function presetsCheckboxSelector( process.stdout.write("\n"); resolve([...selected]); } else if (key === "\x03") { - // Ctrl+C - cleanup(); - process.exit(1); + makeOnboardCancelExit(sandboxCancelRollback, cleanup)(); // Ctrl+C } else if (key === "\x1b[A" || key === "k") { cursor = (cursor - 1 + n) % n; redraw(); @@ -6302,6 +6294,8 @@ const onboardRuntimeBoundary = new OnboardRuntimeBoundary({ maybeForceE2eStepFailure, }); +const sandboxCancelRollback = installSandboxCancelRollback({ runOpenshell, registry, clearOnboardSession: onboardSession.clearSession }); // #4614 + const startRecordedStep = onboardRuntimeBoundary.startRecordedStep.bind(onboardRuntimeBoundary); const recordStepComplete = onboardRuntimeBoundary.recordStepComplete.bind(onboardRuntimeBoundary); const recordStepSkipped = onboardRuntimeBoundary.recordStepSkipped.bind(onboardRuntimeBoundary); @@ -6926,7 +6920,6 @@ async function onboard(opts: OnboardOptions = {}): Promise { listRegistrySandboxes: registry.listSandboxes, createSandbox, updateSandboxRegistry: (name, updates) => registry.updateSandbox(name, updates), - setDefaultSandbox: registry.setDefault, getSandboxAgentRegistryFields, recordStepComplete, toSessionUpdates: (updates) => toSessionUpdates(updates as Parameters[0]), @@ -7016,6 +7009,7 @@ async function onboard(opts: OnboardOptions = {}): Promise { }, }); session = policiesResult.session; + sandboxCancelRollback.disarm(); // #4614: policies confirmed, past the cancellable window await handleFinalizationState({ sandboxName, @@ -7030,6 +7024,7 @@ async function onboard(opts: OnboardOptions = {}): Promise { webSearchEnabled: braveProviderProfile.shouldEnableBraveWebSearch(webSearchConfig), deps: { ensureAgentDashboardForward, + setDefaultSandbox: registry.setDefault, verifyWebSearchInsideSandbox, recordPostVerifyStarted, recordSessionComplete, diff --git a/src/lib/onboard/cancel-rollback.test.ts b/src/lib/onboard/cancel-rollback.test.ts new file mode 100644 index 0000000000..4ff069e95a --- /dev/null +++ b/src/lib/onboard/cancel-rollback.test.ts @@ -0,0 +1,211 @@ +// SPDX-FileCopyrightText: Copyright (c) 2026 NVIDIA CORPORATION & AFFILIATES. All rights reserved. +// SPDX-License-Identifier: Apache-2.0 + +import { describe, expect, it, vi } from "vitest"; + +import { + buildCancelRollbackMessage, + createSandboxCancelRollback, + installSandboxCancelRollback, + makeOnboardCancelExit, + type SandboxCancelRollbackDeps, +} from "./cancel-rollback"; + +function createDeps(overrides: Partial = {}) { + const calls = { + deleteContainer: vi.fn((_name: string) => true), + removeFromRegistry: vi.fn(), + clearSession: vi.fn(), + log: vi.fn(), + }; + const deps: SandboxCancelRollbackDeps = { + deleteSandboxContainer: calls.deleteContainer, + removeSandboxFromRegistry: calls.removeFromRegistry, + clearOnboardSession: calls.clearSession, + log: calls.log, + ...overrides, + }; + return { calls, deps }; +} + +describe("createSandboxCancelRollback", () => { + it("rolls back (delete + unregister) when armed and cancelled", () => { + const { deps, calls } = createDeps(); + const rollback = createSandboxCancelRollback(deps); + + rollback.arm("new-sb"); + rollback.markCancelled(); + rollback.runIfArmed(); + + expect(calls.deleteContainer).toHaveBeenCalledWith("new-sb"); + expect(calls.removeFromRegistry).toHaveBeenCalledWith("new-sb"); + // also discards the aborted session so `nemoclaw list` recovery can't resurrect it + expect(calls.clearSession).toHaveBeenCalledOnce(); + // delete is attempted before the registry entry is removed + expect(calls.deleteContainer.mock.invocationCallOrder[0]).toBeLessThan( + calls.removeFromRegistry.mock.invocationCallOrder[0], + ); + expect(calls.log).toHaveBeenCalledWith(expect.stringContaining("removed incomplete sandbox 'new-sb'")); + }); + + it("still unregisters and prints manual cleanup when container delete fails", () => { + const { deps, calls } = createDeps({ deleteSandboxContainer: vi.fn(() => false) }); + const rollback = createSandboxCancelRollback(deps); + + rollback.arm("new-sb"); + rollback.markCancelled(); + rollback.runIfArmed(); + + expect(calls.removeFromRegistry).toHaveBeenCalledWith("new-sb"); + expect(calls.log).toHaveBeenCalledWith(expect.stringContaining("unregistered incomplete sandbox 'new-sb'")); + expect(calls.log).toHaveBeenCalledWith(expect.stringContaining('openshell sandbox delete "new-sb"')); + }); + + it("does NOT roll back on a non-cancel exit (armed but not cancelled)", () => { + const { deps, calls } = createDeps(); + const rollback = createSandboxCancelRollback(deps); + + rollback.arm("new-sb"); + // no markCancelled() — this is an ordinary failure-path process.exit + rollback.runIfArmed(); + + expect(calls.deleteContainer).not.toHaveBeenCalled(); + expect(calls.removeFromRegistry).not.toHaveBeenCalled(); + expect(calls.clearSession).not.toHaveBeenCalled(); + expect(calls.log).not.toHaveBeenCalled(); + }); + + it("does NOT roll back when cancelled before any sandbox was armed", () => { + const { deps, calls } = createDeps(); + const rollback = createSandboxCancelRollback(deps); + + rollback.markCancelled(); + rollback.runIfArmed(); + + expect(calls.deleteContainer).not.toHaveBeenCalled(); + expect(calls.removeFromRegistry).not.toHaveBeenCalled(); + }); + + it("does NOT roll back after disarm (policies confirmed), even if later cancelled", () => { + const { deps, calls } = createDeps(); + const rollback = createSandboxCancelRollback(deps); + + rollback.arm("new-sb"); + rollback.disarm(); + rollback.markCancelled(); + rollback.runIfArmed(); + + expect(calls.deleteContainer).not.toHaveBeenCalled(); + expect(calls.removeFromRegistry).not.toHaveBeenCalled(); + }); + + it("is idempotent — runs the rollback at most once", () => { + const { deps, calls } = createDeps(); + const rollback = createSandboxCancelRollback(deps); + + rollback.arm("new-sb"); + rollback.markCancelled(); + rollback.runIfArmed(); + rollback.runIfArmed(); + rollback.runIfArmed(); + + expect(calls.deleteContainer).toHaveBeenCalledTimes(1); + expect(calls.removeFromRegistry).toHaveBeenCalledTimes(1); + }); + + it("reports armed state via isArmed()", () => { + const { deps } = createDeps(); + const rollback = createSandboxCancelRollback(deps); + + expect(rollback.isArmed()).toBe(false); + rollback.arm("new-sb"); + expect(rollback.isArmed()).toBe(true); + rollback.disarm(); + expect(rollback.isArmed()).toBe(false); + }); + + it("re-arming after a previous sandbox tracks the latest name", () => { + const { deps, calls } = createDeps(); + const rollback = createSandboxCancelRollback(deps); + + rollback.arm("first"); + rollback.arm("second"); + rollback.markCancelled(); + rollback.runIfArmed(); + + expect(calls.deleteContainer).toHaveBeenCalledWith("second"); + expect(calls.deleteContainer).not.toHaveBeenCalledWith("first"); + }); +}); + +describe("installSandboxCancelRollback", () => { + it("wires delete to openshell and unregister to the registry, and registers an exit hook", () => { + const runOpenshell = vi.fn(() => ({ status: 0 })); + const removeSandbox = vi.fn(); + const exitHandlers: Array<() => void> = []; + + const rollback = installSandboxCancelRollback({ + runOpenshell, + registry: { removeSandbox }, + clearOnboardSession: () => {}, + registerExitHandler: (h) => exitHandlers.push(h), + }); + + expect(exitHandlers).toHaveLength(1); + + rollback.arm("new-sb"); + rollback.markCancelled(); + exitHandlers[0](); + + expect(runOpenshell).toHaveBeenCalledWith(["sandbox", "delete", "new-sb"], { ignoreError: true }); + expect(removeSandbox).toHaveBeenCalledWith("new-sb"); + }); + + it("does not fire the rollback on a non-cancel exit", () => { + const runOpenshell = vi.fn(() => ({ status: 0 })); + const removeSandbox = vi.fn(); + const exitHandlers: Array<() => void> = []; + + const rollback = installSandboxCancelRollback({ + runOpenshell, + registry: { removeSandbox }, + clearOnboardSession: () => {}, + registerExitHandler: (h) => exitHandlers.push(h), + }); + rollback.arm("new-sb"); // armed, but never cancelled + exitHandlers[0](); + + expect(runOpenshell).not.toHaveBeenCalled(); + expect(removeSandbox).not.toHaveBeenCalled(); + }); +}); + +describe("makeOnboardCancelExit", () => { + it("cleans up, marks cancelled, then exits non-zero", () => { + const order: string[] = []; + const cleanup = vi.fn(() => order.push("cleanup")); + const rollback = { markCancelled: vi.fn(() => order.push("markCancelled")) }; + const exit = vi.fn((_code: number) => { + order.push("exit"); + }); + + makeOnboardCancelExit(rollback, cleanup, exit)(); + + expect(order).toEqual(["cleanup", "markCancelled", "exit"]); + expect(exit).toHaveBeenCalledWith(1); + }); +}); + +describe("buildCancelRollbackMessage", () => { + it("reports a clean removal when the delete succeeded", () => { + const lines = buildCancelRollbackMessage("sb", true); + expect(lines.join("\n")).toContain("removed incomplete sandbox 'sb'"); + expect(lines.join("\n")).not.toContain("openshell sandbox delete"); + }); + + it("falls back to manual cleanup guidance when the delete failed", () => { + const lines = buildCancelRollbackMessage("sb", false); + expect(lines.join("\n")).toContain("unregistered incomplete sandbox 'sb'"); + expect(lines.join("\n")).toContain('openshell sandbox delete "sb"'); + }); +}); diff --git a/src/lib/onboard/cancel-rollback.ts b/src/lib/onboard/cancel-rollback.ts new file mode 100644 index 0000000000..ac1bafcb51 --- /dev/null +++ b/src/lib/onboard/cancel-rollback.ts @@ -0,0 +1,160 @@ +// SPDX-FileCopyrightText: Copyright (c) 2026 NVIDIA CORPORATION & AFFILIATES. All rights reserved. +// SPDX-License-Identifier: Apache-2.0 + +// Re-exported so the onboard entrypoint imports its sandbox default/cancel +// lifecycle helpers from a single module. +export { restoreDefaultAfterRecreate, wasSandboxDefault } from "./default-preservation"; + +/** + * Rollback guard for a sandbox that was created during onboarding but whose + * onboarding was cancelled before the policy-preset step was confirmed. + * + * Without this, pressing Ctrl+C at the `[8/8] Policy presets` screen leaves a + * fully created OpenShell container registered as the default sandbox even + * though no policies were ever applied (#4614). + * + * The guard is deliberately a two-key gate — it only fires when BOTH: + * - a freshly-created sandbox is `arm()`ed (set after createSandbox succeeds), AND + * - the operator actually cancelled via `markCancelled()` (the policy-step + * prompts call this on Ctrl+C / SIGTERM before exiting). + * + * This keeps every other `process.exit(1)` failure path untouched: a genuine + * build/verify failure exits without `markCancelled()`, so the sandbox it left + * behind is preserved exactly as before. Only an explicit cancel rolls back. + */ +export interface SandboxCancelRollbackDeps { + /** Delete the OpenShell sandbox container. Returns true when the delete succeeded. */ + deleteSandboxContainer(sandboxName: string): boolean; + /** Remove the sandbox entry from the NemoClaw registry (clears default). */ + removeSandboxFromRegistry(sandboxName: string): void; + /** + * Discard the onboard session for the aborted run. Without this, the session + * still records the sandbox step as "complete", and `nemoclaw list`'s + * session-recovery resurrects the just-removed sandbox as a phantom entry. + */ + clearOnboardSession(): void; + /** Emit an operator-facing line (stderr). */ + log(message: string): void; +} + +export interface SandboxCancelRollback { + /** Arm rollback for a just-created sandbox. */ + arm(sandboxName: string): void; + /** Disarm once the sandbox is past the cancellable window (policies confirmed). */ + disarm(): void; + /** Record that the operator cancelled at a cancellable step. */ + markCancelled(): void; + /** Run the rollback iff armed AND cancelled. Idempotent. */ + runIfArmed(): void; + /** Test/introspection helper. */ + isArmed(): boolean; +} + +export function buildCancelRollbackMessage(sandboxName: string, deleteSucceeded: boolean): string[] { + if (deleteSucceeded) { + return [ + "", + ` Onboarding cancelled — removed incomplete sandbox '${sandboxName}' (no policy presets were applied).`, + ]; + } + return [ + "", + ` Onboarding cancelled — unregistered incomplete sandbox '${sandboxName}'.`, + " The sandbox container may still be running. Remove it with:", + ` openshell sandbox delete "${sandboxName}"`, + ]; +} + +export interface InstallSandboxCancelRollbackOptions { + runOpenshell: (args: string[], opts: { ignoreError: boolean }) => { status: number | null }; + registry: { removeSandbox(name: string): void }; + clearOnboardSession: () => void; + log?: (message: string) => void; + /** Override for tests; defaults to `process.on("exit", ...)`. */ + registerExitHandler?: (handler: () => void) => void; +} + +/** + * Wire a sandbox cancel-rollback to OpenShell + the registry and register the + * process-exit hook that fires it. Kept here (not in onboard.ts) so the + * orchestration lives in a focused module rather than the onboard entrypoint. + * + * `process.exit()` — how the policy-step prompts terminate on Ctrl+C — + * synchronously emits 'exit', and runOpenshell/removeSandbox are synchronous, + * so the rollback completes inside the handler. No-op unless armed AND cancelled. + */ +export function installSandboxCancelRollback( + opts: InstallSandboxCancelRollbackOptions, +): SandboxCancelRollback { + const rollback = createSandboxCancelRollback({ + deleteSandboxContainer: (name) => + opts.runOpenshell(["sandbox", "delete", name], { ignoreError: true }).status === 0, + removeSandboxFromRegistry: (name) => opts.registry.removeSandbox(name), + clearOnboardSession: opts.clearOnboardSession, + log: opts.log ?? ((message) => console.error(message)), + }); + const register = + opts.registerExitHandler ?? + ((handler: () => void) => { + process.on("exit", handler); + }); + register(() => rollback.runIfArmed()); + return rollback; +} + +/** + * Build the cancel handler the policy-step prompts run on Ctrl+C / SIGTERM: + * restore the terminal (`cleanup`), record the cancel, then exit non-zero. + * Shared so both the tier and preset selectors stay in sync. + */ +export function makeOnboardCancelExit( + rollback: Pick, + cleanup: () => void, + exit: (code: number) => void = (code) => process.exit(code), +): () => void { + return () => { + cleanup(); + rollback.markCancelled(); + exit(1); + }; +} + +export function createSandboxCancelRollback( + deps: SandboxCancelRollbackDeps, +): SandboxCancelRollback { + let armedSandboxName: string | null = null; + let cancelRequested = false; + let done = false; + + return { + arm(sandboxName: string): void { + armedSandboxName = sandboxName; + }, + disarm(): void { + armedSandboxName = null; + }, + markCancelled(): void { + cancelRequested = true; + }, + isArmed(): boolean { + return armedSandboxName !== null; + }, + runIfArmed(): void { + if (done || !cancelRequested || armedSandboxName === null) return; + done = true; + const sandboxName = armedSandboxName; + armedSandboxName = null; + + // Delete the container first, then unregister regardless of the delete + // result — leaving a registry entry pointing at a half-built sandbox is + // worse than an orphaned container the operator can clean up manually. + const deleteSucceeded = deps.deleteSandboxContainer(sandboxName); + deps.removeSandboxFromRegistry(sandboxName); + // Discard the aborted session so `nemoclaw list` recovery doesn't resurrect it. + deps.clearOnboardSession(); + for (const line of buildCancelRollbackMessage(sandboxName, deleteSucceeded)) { + deps.log(line); + } + }, + }; +} diff --git a/src/lib/onboard/default-preservation.test.ts b/src/lib/onboard/default-preservation.test.ts new file mode 100644 index 0000000000..75d870807e --- /dev/null +++ b/src/lib/onboard/default-preservation.test.ts @@ -0,0 +1,48 @@ +// SPDX-FileCopyrightText: Copyright (c) 2026 NVIDIA CORPORATION & AFFILIATES. All rights reserved. +// SPDX-License-Identifier: Apache-2.0 + +import { describe, expect, it, vi } from "vitest"; + +import { restoreDefaultAfterRecreate, wasSandboxDefault } from "./default-preservation"; + +describe("wasSandboxDefault", () => { + it("is true when the sandbox is the current default", () => { + expect(wasSandboxDefault("my-sb", "my-sb")).toBe(true); + }); + + it("is false when a different sandbox is the default", () => { + expect(wasSandboxDefault("other-sb", "my-sb")).toBe(false); + }); + + it("is false when there is no default", () => { + expect(wasSandboxDefault(null, "my-sb")).toBe(false); + }); +}); + +describe("restoreDefaultAfterRecreate", () => { + it("re-applies the default when the sandbox held it before recreate", () => { + const setDefault = vi.fn(); + restoreDefaultAfterRecreate(setDefault, "my-sb", true); + expect(setDefault).toHaveBeenCalledWith("my-sb"); + }); + + it("does NOT touch the default when the sandbox was not default (e.g. a brand-new sandbox)", () => { + const setDefault = vi.fn(); + restoreDefaultAfterRecreate(setDefault, "new-sb", false); + expect(setDefault).not.toHaveBeenCalled(); + }); + + it("round-trips: a recreate of the prior default restores it", () => { + const setDefault = vi.fn(); + const wasDefault = wasSandboxDefault("my-sb", "my-sb"); + restoreDefaultAfterRecreate(setDefault, "my-sb", wasDefault); + expect(setDefault).toHaveBeenCalledWith("my-sb"); + }); + + it("round-trips: a recreate of a non-default sandbox leaves the default alone", () => { + const setDefault = vi.fn(); + const wasDefault = wasSandboxDefault("other-sb", "my-sb"); + restoreDefaultAfterRecreate(setDefault, "my-sb", wasDefault); + expect(setDefault).not.toHaveBeenCalled(); + }); +}); diff --git a/src/lib/onboard/default-preservation.ts b/src/lib/onboard/default-preservation.ts new file mode 100644 index 0000000000..0b9b30a9f8 --- /dev/null +++ b/src/lib/onboard/default-preservation.ts @@ -0,0 +1,36 @@ +// SPDX-FileCopyrightText: Copyright (c) 2026 NVIDIA CORPORATION & AFFILIATES. All rights reserved. +// SPDX-License-Identifier: Apache-2.0 + +/** + * Preserve the "default sandbox" flag across a destructive recreate/rebuild. + * + * Default-marking is normally deferred to the finalization step so a cancelled + * fresh onboard never leaves an unconfigured sandbox registered as default + * (#4614). But a recreate/rebuild first removes the existing registry entry + * (which clears the default pointer) and re-registers it without a default. + * If the rebuild then fails before finalization, the sandbox the operator was + * already using would silently stop being the default until a successful re-run. + * + * These helpers snapshot whether the sandbox was the default *before* it is torn + * down, and restore that flag immediately after it is re-registered — so only a + * genuinely new sandbox (which was never the default) stays deferred. + */ + +/** True iff `sandboxName` is the current default — capture this before recreate tears it down. */ +export function wasSandboxDefault( + currentDefault: string | null, + sandboxName: string, +): boolean { + return currentDefault === sandboxName; +} + +/** Re-apply the default flag after re-registration iff the sandbox held it beforehand. */ +export function restoreDefaultAfterRecreate( + setDefault: (sandboxName: string) => void, + sandboxName: string, + wasDefaultBeforeRecreate: boolean, +): void { + if (wasDefaultBeforeRecreate) { + setDefault(sandboxName); + } +} diff --git a/src/lib/onboard/machine/handlers/finalization.test.ts b/src/lib/onboard/machine/handlers/finalization.test.ts index df6000b2e6..aa6e6b2ad8 100644 --- a/src/lib/onboard/machine/handlers/finalization.test.ts +++ b/src/lib/onboard/machine/handlers/finalization.test.ts @@ -12,6 +12,7 @@ type VerificationResult = { ok: boolean }; function createDeps(overrides: Partial["deps"]> = {}) { const calls = { + setDefaultSandbox: vi.fn(), ensureAgentDashboard: vi.fn(() => 18789), postVerify: vi.fn(async () => createSession({ machine: { version: 1, state: "post_verify", stateEnteredAt: null, revision: 1 } })), complete: vi.fn(async () => createSession({ status: "complete" })), @@ -31,6 +32,7 @@ function createDeps(overrides: Partial) => updates as SessionUpdates, @@ -74,6 +76,11 @@ describe("handleFinalizationState", () => { const result = await handleFinalizationState(baseOptions(deps)); + // Default is set at finalization (deferred from sandbox creation, #4614), before completion. + expect(calls.setDefaultSandbox).toHaveBeenCalledWith("my-assistant"); + expect(calls.setDefaultSandbox.mock.invocationCallOrder[0]).toBeLessThan( + calls.complete.mock.invocationCallOrder[0], + ); expect(calls.cleanupHost).toHaveBeenCalledOnce(); expect(calls.recoverProcesses).toHaveBeenCalledWith("my-assistant", { quiet: true }); expect(calls.buildChain).toHaveBeenCalledWith("http://127.0.0.1:18789"); @@ -117,6 +124,9 @@ describe("handleFinalizationState", () => { expect(calls.postVerify).toHaveBeenCalledOnce(); expect(calls.complete).not.toHaveBeenCalled(); expect(calls.dashboard).not.toHaveBeenCalled(); + // The sandbox reached finalization (policies confirmed), so it stays the default + // even when post-policy verification flakes — only a pre-policy cancel rolls back. + expect(calls.setDefaultSandbox).toHaveBeenCalledWith("my-assistant"); }); it("removes legacy credentials only when all staged values migrated", async () => { diff --git a/src/lib/onboard/machine/handlers/finalization.ts b/src/lib/onboard/machine/handlers/finalization.ts index 34e2dba224..760cdf1561 100644 --- a/src/lib/onboard/machine/handlers/finalization.ts +++ b/src/lib/onboard/machine/handlers/finalization.ts @@ -16,6 +16,12 @@ export interface FinalizationStateOptions): number; + /** + * Mark this sandbox as the default. Called here (not at sandbox creation) so + * a cancel at the policy-preset step never leaves an unconfigured sandbox + * registered as default (#4614). + */ + setDefaultSandbox(sandboxName: string): void; recordPostVerifyStarted(): Promise; recordSessionComplete(updates: SessionUpdates): Promise; toSessionUpdates(updates: Record): SessionUpdates; @@ -64,6 +70,10 @@ export async function handleFinalizationState): Promise { + // Reaching finalization means the policy-preset step was confirmed, so it is + // now safe to register this sandbox as the default (#4614). + deps.setDefaultSandbox(sandboxName); + if (agent) deps.ensureAgentDashboardForward(sandboxName, agent as NonNullable); const allStagedMigrated = diff --git a/src/lib/onboard/machine/handlers/sandbox.test.ts b/src/lib/onboard/machine/handlers/sandbox.test.ts index 52cf8a6db2..bd8ecade50 100644 --- a/src/lib/onboard/machine/handlers/sandbox.test.ts +++ b/src/lib/onboard/machine/handlers/sandbox.test.ts @@ -35,7 +35,6 @@ function createDeps(overrides: Partial "my-assistant"), updateSandbox: vi.fn(), - setDefault: vi.fn(), complete: vi.fn(async () => createSession()), skipped: vi.fn(), recordSkip: vi.fn(async () => createSession()), @@ -79,7 +78,6 @@ function createDeps(overrides: Partial ({ sandboxes: [{ name: "old" }] }), createSandbox: calls.createSandbox, updateSandboxRegistry: calls.updateSandbox, - setDefaultSandbox: calls.setDefault, getSandboxAgentRegistryFields: () => ({ agent: null }), recordStepComplete: calls.complete, toSessionUpdates: (updates: Record) => updates as SessionUpdates, @@ -150,7 +148,7 @@ describe("handleSandboxState", () => { [], ); expect(calls.updateSandbox).toHaveBeenCalledWith("my-assistant", expect.objectContaining({ model: "model", provider: "provider" })); - expect(calls.setDefault).toHaveBeenCalledWith("my-assistant"); + // Default-marking is deferred to finalization (#4614) — the sandbox step must not set it. expect(calls.complete).toHaveBeenCalledWith("sandbox", expect.objectContaining({ sandboxName: "my-assistant" })); expect(result).toMatchObject({ sandboxName: "my-assistant", selectedMessagingChannels: ["telegram"], webSearchSupported: true }); }); diff --git a/src/lib/onboard/machine/handlers/sandbox.ts b/src/lib/onboard/machine/handlers/sandbox.ts index efa5cf0adb..9a91ba6360 100644 --- a/src/lib/onboard/machine/handlers/sandbox.ts +++ b/src/lib/onboard/machine/handlers/sandbox.ts @@ -77,7 +77,6 @@ export interface SandboxStateOptions; updateSandboxRegistry(sandboxName: string, updates: Record): void; - setDefaultSandbox(sandboxName: string): void; getSandboxAgentRegistryFields(agent: Agent, agentVersionKnown: boolean): Record; recordStepComplete(stepName: string, updates: SessionUpdates): Promise; toSessionUpdates(updates: Record): SessionUpdates; @@ -307,7 +306,8 @@ export async function handleSandboxState) =>