From 6b753bf4ebf69d157170c477b96a9ae8daa19b16 Mon Sep 17 00:00:00 2001 From: Carlos Villela Date: Wed, 27 May 2026 15:18:22 -0700 Subject: [PATCH 01/25] docs(onboard): document FSM migration target Signed-off-by: Carlos Villela --- src/lib/onboard/machine/README.md | 111 ++++++++++++++++++++++++++++++ 1 file changed, 111 insertions(+) create mode 100644 src/lib/onboard/machine/README.md diff --git a/src/lib/onboard/machine/README.md b/src/lib/onboard/machine/README.md new file mode 100644 index 0000000000..752e8959f4 --- /dev/null +++ b/src/lib/onboard/machine/README.md @@ -0,0 +1,111 @@ + + + +# Onboard finite-state machine + +This directory contains the transitional onboarding finite-state-machine (FSM) layer. The current implementation records coarse state snapshots and emits machine events while the legacy `src/lib/onboard.ts` entrypoint is split into explicit state handlers. + +## Target architecture + +The target shape is a machine-driven onboarding runner: + +1. Normalize CLI flags, environment, session locking, and consent in `src/lib/onboard.ts`. +2. Build an onboarding context that contains sanitized operator choices, runtime dependencies, and mutable values returned by states. +3. Enter `runOnboardMachine(context)`. +4. Dispatch the current machine state to a handler. +5. Let the handler return an explicit state result such as advance, retry, branch, complete, or failed. +6. Apply the result through `OnboardRuntime`, which validates the transition, updates the persisted session snapshot, and emits redacted machine events. +7. Continue until the machine reaches `complete` or `failed`. + +In that final shape, `src/lib/onboard.ts` should be a thin entrypoint. State handlers should own state-specific prompts, resume validation, repair decisions, and side effects. + +## State ownership + +Machine states are coarse user-visible onboarding phases, not every subprocess or probe inside a phase. The current vocabulary is intentionally limited to major boundaries: + +- `init` +- `preflight` +- `gateway` +- `provider_selection` +- `inference` +- `sandbox` +- `openclaw` or `agent_setup` +- `policies` +- `finalizing` +- `post_verify` +- `complete` or `failed` + +A state handler may perform many smaller operations, but it should expose only stable, redacted state transitions and context updates to the FSM. + +## Session steps versus machine state + +The persisted onboarding session still tracks step-level progress for resumability. Step recording is older than the FSM and is currently used as a compatibility bridge. + +Long term: + +- `OnboardRuntime` should own machine transitions and machine revision increments. +- Session step helpers should record only step status (`pending`, `in_progress`, `complete`, `failed`, `skipped`). +- State handlers should return explicit results instead of implicitly moving the machine by calling step helpers. + +Until that migration completes, step helpers may still infer machine snapshots for compatibility with older sessions and tests. + +## Handler contract + +Each state handler should eventually follow this shape: + +```ts +type OnboardStateHandler = (context: OnboardContext) => Promise; +``` + +A handler should: + +- validate whether the state can be resumed or skipped; +- run state-local repairs before declaring a cached step reusable; +- perform the phase side effects; +- return the next state explicitly; +- keep secrets out of returned metadata and event context. + +A handler should not: + +- mutate the machine snapshot directly; +- jump to states outside the declared transition graph; +- rely on console output as the only observable diagnostic; +- store raw credentials, provider URLs with secrets, or other sensitive values in machine context. + +## Runtime responsibilities + +`OnboardRuntime` is the intended authority for: + +- validating transitions against `transitions.ts`; +- applying safe session context updates; +- marking terminal states; +- emitting redacted lifecycle, state, repair, resume-conflict, and hook events; +- preserving compatibility with normalized older sessions. + +The runtime should reject invalid transitions before they can be persisted. + +## Event semantics + +Machine events are diagnostics and automation hooks. They must be safe to write to JSONL logs and attach to CI/E2E artifacts. + +Event payloads should include only stable, redacted context such as: + +- selected agent; +- sandbox name; +- provider and model names; +- endpoint origin, not full secret-bearing URLs; +- credential environment variable name, not credential value; +- policy presets and messaging channel names. + +Observers and hooks must not change onboarding behavior. A failing hook should emit hook failure diagnostics and let onboarding continue. + +## Migration stages + +The FSM migration is considered complete when: + +1. state metadata is defined once and derived by session, event, progress, and transition code; +2. live onboarding emits `onboard.started`, `onboard.resumed`, `resume.conflict`, terminal, state, skip, repair, and context events consistently; +3. handlers return explicit state results; +4. the runner applies all handler results through `OnboardRuntime`; +5. step helpers no longer implicitly own machine transitions; +6. `src/lib/onboard.ts` contains entrypoint setup and dependency wiring rather than state sequencing. From fb1b32d0a8725934d3c49e77ca375abdcedf2c81 Mon Sep 17 00:00:00 2001 From: Carlos Villela Date: Wed, 27 May 2026 15:19:42 -0700 Subject: [PATCH 02/25] refactor(onboard): centralize machine state metadata Signed-off-by: Carlos Villela --- src/lib/onboard/machine/definition.test.ts | 85 ++++++++++++++++ src/lib/onboard/machine/definition.ts | 108 +++++++++++++++++++++ src/lib/onboard/machine/types.ts | 43 +++----- 3 files changed, 208 insertions(+), 28 deletions(-) create mode 100644 src/lib/onboard/machine/definition.test.ts create mode 100644 src/lib/onboard/machine/definition.ts diff --git a/src/lib/onboard/machine/definition.test.ts b/src/lib/onboard/machine/definition.test.ts new file mode 100644 index 0000000000..7fffa49ec5 --- /dev/null +++ b/src/lib/onboard/machine/definition.test.ts @@ -0,0 +1,85 @@ +// SPDX-FileCopyrightText: Copyright (c) 2026 NVIDIA CORPORATION & AFFILIATES. All rights reserved. +// SPDX-License-Identifier: Apache-2.0 + +import { describe, expect, it } from "vitest"; + +import { + getOnboardMachineStateDefinition, + ONBOARD_MACHINE_NON_TERMINAL_STATE_IDS, + ONBOARD_MACHINE_STATE_DEFINITIONS, + ONBOARD_MACHINE_STATE_IDS, + ONBOARD_MACHINE_TERMINAL_STATE_IDS, +} from "./definition"; + +const expectedStateOrder = [ + "init", + "preflight", + "gateway", + "provider_selection", + "inference", + "sandbox", + "agent_setup", + "openclaw", + "policies", + "finalizing", + "post_verify", + "complete", + "failed", +]; + +describe("onboard machine definition", () => { + it("is the canonical ordered state catalog", () => { + expect(ONBOARD_MACHINE_STATE_IDS).toEqual(expectedStateOrder); + expect(ONBOARD_MACHINE_STATE_DEFINITIONS.map((definition) => definition.state)).toEqual( + expectedStateOrder, + ); + }); + + it("derives terminal and non-terminal state catalogs from the same vocabulary", () => { + const terminalFromDefinitions = ONBOARD_MACHINE_STATE_DEFINITIONS.filter( + (definition) => definition.terminal, + ).map((definition) => definition.state); + const nonTerminalFromDefinitions = ONBOARD_MACHINE_STATE_DEFINITIONS.filter( + (definition) => !definition.terminal, + ).map((definition) => definition.state); + + expect(ONBOARD_MACHINE_TERMINAL_STATE_IDS).toEqual(terminalFromDefinitions); + expect(ONBOARD_MACHINE_NON_TERMINAL_STATE_IDS).toEqual(nonTerminalFromDefinitions); + }); + + it("keeps resumable step names unique", () => { + const stepNames = ONBOARD_MACHINE_STATE_DEFINITIONS.flatMap((definition) => + "stepName" in definition ? [definition.stepName] : [], + ); + + expect(new Set(stepNames).size).toBe(stepNames.length); + expect(stepNames).toEqual([ + "preflight", + "gateway", + "provider_selection", + "inference", + "sandbox", + "agent_setup", + "openclaw", + "policies", + ]); + }); + + it("keeps progress metadata attached only to state-backed steps", () => { + for (const definition of ONBOARD_MACHINE_STATE_DEFINITIONS) { + if (!("progress" in definition)) continue; + expect("stepName" in definition).toBe(true); + expect(definition.progress.total).toBe(8); + expect(definition.progress.number).toBeGreaterThanOrEqual(1); + expect(definition.progress.number).toBeLessThanOrEqual(definition.progress.total); + expect(definition.progress.title).not.toHaveLength(0); + } + }); + + it("looks up definitions by state", () => { + expect(getOnboardMachineStateDefinition("gateway")).toMatchObject({ + state: "gateway", + stepName: "gateway", + }); + }); +}); diff --git a/src/lib/onboard/machine/definition.ts b/src/lib/onboard/machine/definition.ts new file mode 100644 index 0000000000..0f873edf0b --- /dev/null +++ b/src/lib/onboard/machine/definition.ts @@ -0,0 +1,108 @@ +// SPDX-FileCopyrightText: Copyright (c) 2026 NVIDIA CORPORATION & AFFILIATES. All rights reserved. +// SPDX-License-Identifier: Apache-2.0 + +/** + * Canonical metadata for the coarse onboard finite-state machine. + * + * Keep this file free of imports from the rest of the machine package so the + * core state vocabulary can be reused by type, transition, event, session, and + * progress helpers without introducing circular dependencies. + */ + +export const ONBOARD_MACHINE_STATE_DEFINITIONS = [ + { state: "init", terminal: false }, + { + state: "preflight", + terminal: false, + stepName: "preflight", + progress: { number: 1, total: 8, title: "Preflight checks" }, + }, + { + state: "gateway", + terminal: false, + stepName: "gateway", + progress: { number: 2, total: 8, title: "Starting OpenShell gateway" }, + }, + { + state: "provider_selection", + terminal: false, + stepName: "provider_selection", + progress: { number: 3, total: 8, title: "Configuring inference (NIM)" }, + }, + { + state: "inference", + terminal: false, + stepName: "inference", + progress: { number: 4, total: 8, title: "Setting up inference provider" }, + }, + { + state: "sandbox", + terminal: false, + stepName: "sandbox", + progress: { number: 6, total: 8, title: "Creating sandbox" }, + }, + { + state: "agent_setup", + terminal: false, + stepName: "agent_setup", + progress: { number: 7, total: 8, title: "Setting up agent inside sandbox" }, + }, + { + state: "openclaw", + terminal: false, + stepName: "openclaw", + progress: { number: 7, total: 8, title: "Setting up agent inside sandbox" }, + }, + { + state: "policies", + terminal: false, + stepName: "policies", + progress: { number: 8, total: 8, title: "Policy presets" }, + }, + { state: "finalizing", terminal: false }, + { state: "post_verify", terminal: false }, + { state: "complete", terminal: true }, + { state: "failed", terminal: true }, +] as const; + +export const ONBOARD_MACHINE_STATE_IDS = ONBOARD_MACHINE_STATE_DEFINITIONS.map( + (definition) => definition.state, +) as readonly OnboardMachineStateId[]; + +export const ONBOARD_MACHINE_TERMINAL_STATE_IDS = ["complete", "failed"] as const; + +export type OnboardTerminalMachineStateId = (typeof ONBOARD_MACHINE_TERMINAL_STATE_IDS)[number]; + +export type OnboardMachineStateId = (typeof ONBOARD_MACHINE_STATE_DEFINITIONS)[number]["state"]; + +export type OnboardNonTerminalMachineStateId = Exclude< + OnboardMachineStateId, + OnboardTerminalMachineStateId +>; + +export const ONBOARD_MACHINE_NON_TERMINAL_STATE_IDS = ONBOARD_MACHINE_STATE_DEFINITIONS.filter( + (definition): definition is Extract< + (typeof ONBOARD_MACHINE_STATE_DEFINITIONS)[number], + { terminal: false } + > => definition.terminal === false, +).map((definition) => definition.state) as readonly OnboardNonTerminalMachineStateId[]; + +export type OnboardMachineStateDefinition = (typeof ONBOARD_MACHINE_STATE_DEFINITIONS)[number]; + +export type OnboardMachineStateWithStepDefinition = Extract< + OnboardMachineStateDefinition, + { stepName: string } +>; + +export type OnboardMachineStateWithProgressDefinition = Extract< + OnboardMachineStateDefinition, + { progress: { number: number; total: number; title: string } } +>; + +export function getOnboardMachineStateDefinition( + state: OnboardMachineStateId, +): OnboardMachineStateDefinition { + const definition = ONBOARD_MACHINE_STATE_DEFINITIONS.find((entry) => entry.state === state); + if (!definition) throw new Error(`Unknown onboarding machine state: ${state}`); + return definition; +} diff --git a/src/lib/onboard/machine/types.ts b/src/lib/onboard/machine/types.ts index e1dca21e72..d5f00477ee 100644 --- a/src/lib/onboard/machine/types.ts +++ b/src/lib/onboard/machine/types.ts @@ -9,39 +9,26 @@ * probes, or policy application is out of scope for the initial FSM shell. */ -export const ONBOARD_MACHINE_STATES = [ - "init", - "preflight", - "gateway", - "provider_selection", - "inference", - "sandbox", - "agent_setup", - "openclaw", - "policies", - "finalizing", - "post_verify", - "complete", - "failed", -] as const; +import { + ONBOARD_MACHINE_NON_TERMINAL_STATE_IDS, + ONBOARD_MACHINE_STATE_IDS, + ONBOARD_MACHINE_TERMINAL_STATE_IDS, + type OnboardMachineStateId, + type OnboardNonTerminalMachineStateId, + type OnboardTerminalMachineStateId, +} from "./definition"; + +export const ONBOARD_MACHINE_STATES = ONBOARD_MACHINE_STATE_IDS; -export type OnboardMachineState = (typeof ONBOARD_MACHINE_STATES)[number]; +export type OnboardMachineState = OnboardMachineStateId; -export const ONBOARD_TERMINAL_MACHINE_STATES = ["complete", "failed"] as const; +export const ONBOARD_TERMINAL_MACHINE_STATES = ONBOARD_MACHINE_TERMINAL_STATE_IDS; -export type OnboardTerminalMachineState = - (typeof ONBOARD_TERMINAL_MACHINE_STATES)[number]; +export type OnboardTerminalMachineState = OnboardTerminalMachineStateId; -export type OnboardNonTerminalMachineState = Exclude< - OnboardMachineState, - OnboardTerminalMachineState ->; +export type OnboardNonTerminalMachineState = OnboardNonTerminalMachineStateId; -export const ONBOARD_NON_TERMINAL_MACHINE_STATES: readonly OnboardNonTerminalMachineState[] = - ONBOARD_MACHINE_STATES.filter( - (state): state is OnboardNonTerminalMachineState => - !ONBOARD_TERMINAL_MACHINE_STATES.includes(state as OnboardTerminalMachineState), - ); +export const ONBOARD_NON_TERMINAL_MACHINE_STATES = ONBOARD_MACHINE_NON_TERMINAL_STATE_IDS; export const ONBOARD_MACHINE_EVENT_TYPES = [ "onboard.started", From c3e4ad63b31738ffad7f7c7bb306dfa8d6eca39a Mon Sep 17 00:00:00 2001 From: Carlos Villela Date: Wed, 27 May 2026 15:21:58 -0700 Subject: [PATCH 03/25] refactor(onboard): derive session step mapping from FSM metadata Signed-off-by: Carlos Villela --- src/lib/onboard/machine/definition.test.ts | 11 ++++++++ src/lib/onboard/machine/events.ts | 32 ++++++++++++++-------- 2 files changed, 31 insertions(+), 12 deletions(-) diff --git a/src/lib/onboard/machine/definition.test.ts b/src/lib/onboard/machine/definition.test.ts index 7fffa49ec5..6c5d87c6f7 100644 --- a/src/lib/onboard/machine/definition.test.ts +++ b/src/lib/onboard/machine/definition.test.ts @@ -10,6 +10,7 @@ import { ONBOARD_MACHINE_STATE_IDS, ONBOARD_MACHINE_TERMINAL_STATE_IDS, } from "./definition"; +import { ONBOARD_SESSION_STEP_TO_MACHINE_STATE } from "./events"; const expectedStateOrder = [ "init", @@ -65,6 +66,16 @@ describe("onboard machine definition", () => { ]); }); + it("derives the session step mapping from state definitions", () => { + const mappingFromDefinitions = Object.fromEntries( + ONBOARD_MACHINE_STATE_DEFINITIONS.flatMap((definition) => + "stepName" in definition ? [[definition.stepName, definition.state]] : [], + ), + ); + + expect(ONBOARD_SESSION_STEP_TO_MACHINE_STATE).toEqual(mappingFromDefinitions); + }); + it("keeps progress metadata attached only to state-backed steps", () => { for (const definition of ONBOARD_MACHINE_STATE_DEFINITIONS) { if (!("progress" in definition)) continue; diff --git a/src/lib/onboard/machine/events.ts b/src/lib/onboard/machine/events.ts index f6b7dca47c..2ce746167a 100644 --- a/src/lib/onboard/machine/events.ts +++ b/src/lib/onboard/machine/events.ts @@ -4,24 +4,32 @@ import type { JsonObject, JsonValue } from "../../core/json-types"; import { redactSensitiveText, redactUrl } from "../../security/redact"; import type { HermesAuthMethod, Session } from "../../state/onboard-session"; +import { + ONBOARD_MACHINE_STATE_DEFINITIONS, + type OnboardMachineStateWithStepDefinition, +} from "./definition"; import type { OnboardMachineContext, OnboardMachineEventType, OnboardMachineState, } from "./types"; -export const ONBOARD_SESSION_STEP_TO_MACHINE_STATE = { - preflight: "preflight", - gateway: "gateway", - provider_selection: "provider_selection", - inference: "inference", - sandbox: "sandbox", - agent_setup: "agent_setup", - openclaw: "openclaw", - policies: "policies", -} as const satisfies Readonly>; - -export type OnboardSessionStepName = keyof typeof ONBOARD_SESSION_STEP_TO_MACHINE_STATE; +type OnboardSessionStepDefinition = OnboardMachineStateWithStepDefinition; + +export type OnboardSessionStepName = OnboardSessionStepDefinition["stepName"]; + +type OnboardSessionStepToMachineState = { + readonly [StepName in OnboardSessionStepName]: Extract< + OnboardSessionStepDefinition, + { stepName: StepName } + >["state"]; +}; + +export const ONBOARD_SESSION_STEP_TO_MACHINE_STATE = Object.fromEntries( + ONBOARD_MACHINE_STATE_DEFINITIONS.flatMap((definition) => + "stepName" in definition ? [[definition.stepName, definition.state]] : [], + ), +) as OnboardSessionStepToMachineState; export interface OnboardMachineEvent { version: 1; From 603832c0c5d9e2ea2e9a8b27158ee00b8fd9bc93 Mon Sep 17 00:00:00 2001 From: Carlos Villela Date: Wed, 27 May 2026 15:24:39 -0700 Subject: [PATCH 04/25] refactor(onboard): derive progress labels from FSM metadata Signed-off-by: Carlos Villela --- src/lib/onboard.ts | 23 +++++--------- src/lib/onboard/machine/definition.ts | 1 - src/lib/onboard/machine/progress.test.ts | 38 ++++++++++++++++++++++++ src/lib/onboard/machine/progress.ts | 38 ++++++++++++++++++++++++ 4 files changed, 83 insertions(+), 17 deletions(-) create mode 100644 src/lib/onboard/machine/progress.test.ts create mode 100644 src/lib/onboard/machine/progress.ts diff --git a/src/lib/onboard.ts b/src/lib/onboard.ts index 761c3c2454..4fef39b2aa 100644 --- a/src/lib/onboard.ts +++ b/src/lib/onboard.ts @@ -419,6 +419,7 @@ const { handlePoliciesState }: typeof import("./onboard/machine/handlers/policie const { handlePreflightState }: typeof import("./onboard/machine/handlers/preflight") = require("./onboard/machine/handlers/preflight"); const { handleProviderInferenceState }: typeof import("./onboard/machine/handlers/provider-inference") = require("./onboard/machine/handlers/provider-inference"); const { handleSandboxState }: typeof import("./onboard/machine/handlers/sandbox") = require("./onboard/machine/handlers/sandbox"); +const { getOnboardProgressStep }: typeof import("./onboard/machine/progress") = require("./onboard/machine/progress"); const policies: typeof import("./policy") = require("./policy"); const tiers: typeof import("./policy/tiers") = require("./policy/tiers"); const { ensureUsageNoticeConsent } = require("./onboard/usage-notice"); @@ -6390,28 +6391,18 @@ const recordRepairEvent = onboardRuntimeBoundary.recordRepairEvent.bind(onboardR const recordPostVerifyStarted = onboardRuntimeBoundary.recordPostVerifyStarted.bind(onboardRuntimeBoundary); const recordSessionComplete = onboardRuntimeBoundary.recordSessionComplete.bind(onboardRuntimeBoundary); -const ONBOARD_STEP_INDEX: Record = { - preflight: { number: 1, title: "Preflight checks" }, - gateway: { number: 2, title: "Starting OpenShell gateway" }, - provider_selection: { number: 3, title: "Configuring inference (NIM)" }, - inference: { number: 4, title: "Setting up inference provider" }, - messaging: { number: 5, title: "Messaging channels" }, - sandbox: { number: 6, title: "Creating sandbox" }, - openclaw: { number: 7, title: "Setting up agent inside sandbox" }, - policies: { number: 8, title: "Policy presets" }, -}; - function skippedStepMessage( stepName: string, detail?: string | null, reason: "resume" | "reuse" = "resume", ): void { - let stepInfo = ONBOARD_STEP_INDEX[stepName]; - if (stepInfo && stepName === "openclaw") { - stepInfo = { ...stepInfo, title: `Setting up ${agentProductName()} inside sandbox` }; - } + const progressStep = getOnboardProgressStep(stepName); + const stepInfo = + progressStep && stepName === "openclaw" + ? { ...progressStep, title: `Setting up ${agentProductName()} inside sandbox` } + : progressStep; if (stepInfo) { - step(stepInfo.number, 8, stepInfo.title); + step(stepInfo.number, stepInfo.total, stepInfo.title); } const prefix = reason === "reuse" ? "[reuse]" : "[resume]"; console.log(` ${prefix} Skipping ${stepName}${detail ? ` (${detail})` : ""}`); diff --git a/src/lib/onboard/machine/definition.ts b/src/lib/onboard/machine/definition.ts index 0f873edf0b..03903bfb34 100644 --- a/src/lib/onboard/machine/definition.ts +++ b/src/lib/onboard/machine/definition.ts @@ -45,7 +45,6 @@ export const ONBOARD_MACHINE_STATE_DEFINITIONS = [ state: "agent_setup", terminal: false, stepName: "agent_setup", - progress: { number: 7, total: 8, title: "Setting up agent inside sandbox" }, }, { state: "openclaw", diff --git a/src/lib/onboard/machine/progress.test.ts b/src/lib/onboard/machine/progress.test.ts new file mode 100644 index 0000000000..4c1ee2e99d --- /dev/null +++ b/src/lib/onboard/machine/progress.test.ts @@ -0,0 +1,38 @@ +// SPDX-FileCopyrightText: Copyright (c) 2026 NVIDIA CORPORATION & AFFILIATES. All rights reserved. +// SPDX-License-Identifier: Apache-2.0 + +import { describe, expect, it } from "vitest"; + +import { ONBOARD_MACHINE_STATE_DEFINITIONS } from "./definition"; +import { getOnboardProgressStep, ONBOARD_PROGRESS_STEPS } from "./progress"; + +describe("onboard progress metadata", () => { + it("derives state-backed progress labels from machine definitions", () => { + for (const definition of ONBOARD_MACHINE_STATE_DEFINITIONS) { + if (!("progress" in definition)) continue; + expect(ONBOARD_PROGRESS_STEPS[definition.stepName]).toEqual(definition.progress); + } + }); + + it("preserves the existing eight-step onboarding labels", () => { + expect(ONBOARD_PROGRESS_STEPS).toEqual({ + preflight: { number: 1, total: 8, title: "Preflight checks" }, + gateway: { number: 2, total: 8, title: "Starting OpenShell gateway" }, + provider_selection: { number: 3, total: 8, title: "Configuring inference (NIM)" }, + inference: { number: 4, total: 8, title: "Setting up inference provider" }, + messaging: { number: 5, total: 8, title: "Messaging channels" }, + sandbox: { number: 6, total: 8, title: "Creating sandbox" }, + openclaw: { number: 7, total: 8, title: "Setting up agent inside sandbox" }, + policies: { number: 8, total: 8, title: "Policy presets" }, + }); + }); + + it("looks up known labels and ignores unknown steps", () => { + expect(getOnboardProgressStep("gateway")).toEqual({ + number: 2, + total: 8, + title: "Starting OpenShell gateway", + }); + expect(getOnboardProgressStep("not-a-step")).toBeNull(); + }); +}); diff --git a/src/lib/onboard/machine/progress.ts b/src/lib/onboard/machine/progress.ts new file mode 100644 index 0000000000..2cf485655e --- /dev/null +++ b/src/lib/onboard/machine/progress.ts @@ -0,0 +1,38 @@ +// SPDX-FileCopyrightText: Copyright (c) 2026 NVIDIA CORPORATION & AFFILIATES. All rights reserved. +// SPDX-License-Identifier: Apache-2.0 + +import { + ONBOARD_MACHINE_STATE_DEFINITIONS, + type OnboardMachineStateWithProgressDefinition, +} from "./definition"; + +export interface OnboardProgressStep { + number: number; + total: number; + title: string; +} + +export type OnboardMachineProgressStepName = + OnboardMachineStateWithProgressDefinition["stepName"]; + +export type OnboardProgressStepName = OnboardMachineProgressStepName | "messaging"; + +const EXTRA_PROGRESS_STEPS = [ + { + stepName: "messaging", + progress: { number: 5, total: 8, title: "Messaging channels" }, + }, +] as const; + +export const ONBOARD_PROGRESS_STEPS = Object.fromEntries([ + ...ONBOARD_MACHINE_STATE_DEFINITIONS.flatMap((definition) => + "progress" in definition ? [[definition.stepName, definition.progress]] : [], + ), + ...EXTRA_PROGRESS_STEPS.map((definition) => [definition.stepName, definition.progress]), +]) as Readonly>; + +export function getOnboardProgressStep(stepName: string): OnboardProgressStep | null { + return Object.prototype.hasOwnProperty.call(ONBOARD_PROGRESS_STEPS, stepName) + ? ONBOARD_PROGRESS_STEPS[stepName as OnboardProgressStepName] + : null; +} From 4fad8e7cc0461d01f50dae21055a2a1c6d7232f5 Mon Sep 17 00:00:00 2001 From: Carlos Villela Date: Wed, 27 May 2026 17:02:42 -0700 Subject: [PATCH 05/25] fix(onboard): emit lifecycle events for onboarding start Signed-off-by: Carlos Villela --- src/lib/onboard.ts | 3 + src/lib/onboard/runtime-boundary.test.ts | 94 ++++++++++++++++++++++++ src/lib/onboard/runtime-boundary.ts | 10 ++- 3 files changed, 105 insertions(+), 2 deletions(-) create mode 100644 src/lib/onboard/runtime-boundary.test.ts diff --git a/src/lib/onboard.ts b/src/lib/onboard.ts index 4fef39b2aa..a05c9f16da 100644 --- a/src/lib/onboard.ts +++ b/src/lib/onboard.ts @@ -6382,6 +6382,7 @@ const onboardRuntimeBoundary = new OnboardRuntimeBoundary({ maybeForceE2eStepFailure, }); +const recordOnboardStarted = onboardRuntimeBoundary.recordOnboardStarted.bind(onboardRuntimeBoundary); const startRecordedStep = onboardRuntimeBoundary.startRecordedStep.bind(onboardRuntimeBoundary); const recordStepComplete = onboardRuntimeBoundary.recordStepComplete.bind(onboardRuntimeBoundary); const recordStepSkipped = onboardRuntimeBoundary.recordStepSkipped.bind(onboardRuntimeBoundary); @@ -6675,6 +6676,8 @@ async function onboard(opts: OnboardOptions = {}): Promise { ); } + await recordOnboardStarted(resume); + // Backstop for the resume path: a session may exist (so the early guard // skipped because resume === true) but never have recorded a sandboxName // — sandbox creation could have failed before that step ran. Without a diff --git a/src/lib/onboard/runtime-boundary.test.ts b/src/lib/onboard/runtime-boundary.test.ts new file mode 100644 index 0000000000..d81116ed86 --- /dev/null +++ b/src/lib/onboard/runtime-boundary.test.ts @@ -0,0 +1,94 @@ +// SPDX-FileCopyrightText: Copyright (c) 2026 NVIDIA CORPORATION & AFFILIATES. All rights reserved. +// SPDX-License-Identifier: Apache-2.0 + +import { describe, expect, it } from "vitest"; + +import { + createSession, + filterSafeUpdates, + normalizeSession, + type Session, + type SessionUpdates, +} from "../state/onboard-session"; +import type { OnboardMachineEvent } from "./machine/events"; +import { OnboardRuntime, type OnboardRuntimeDeps } from "./machine/runtime"; +import { OnboardRuntimeBoundary } from "./runtime-boundary"; + +function cloneSession(session: Session): Session { + return normalizeSession(JSON.parse(JSON.stringify(session))) ?? session; +} + +function createRuntimeHarness() { + let session: Session | null = createSession(); + const events: OnboardMachineEvent[] = []; + const updateSession = (mutator: (value: Session) => Session | void): Session => { + const current = session ? cloneSession(session) : createSession(); + session = cloneSession(mutator(current) ?? current); + return cloneSession(session); + }; + const deps: OnboardRuntimeDeps = { + loadSession: () => (session ? cloneSession(session) : null), + createSession, + saveSession: (next) => { + session = cloneSession(next); + return cloneSession(session); + }, + updateSession, + markStepStarted: (stepName) => + updateSession((current) => { + current.steps[stepName].status = "in_progress"; + return current; + }), + markStepComplete: (stepName, updates: SessionUpdates = {}) => + updateSession((current) => { + current.steps[stepName].status = "complete"; + Object.assign(current, filterSafeUpdates(updates)); + return current; + }), + markStepSkipped: (stepName) => + updateSession((current) => { + current.steps[stepName].status = "skipped"; + return current; + }), + markStepFailed: (stepName, message) => + updateSession((current) => { + current.steps[stepName].status = "failed"; + current.failure = { step: stepName, message: message ?? null, recordedAt: "now" }; + return current; + }), + completeSession: (updates: SessionUpdates = {}) => + updateSession((current) => { + Object.assign(current, filterSafeUpdates(updates)); + current.status = "complete"; + return current; + }), + filterSafeUpdates, + emitEvent: (event) => events.push(event), + now: () => "2026-05-27T00:00:00.000Z", + }; + return { + createRuntime: () => new OnboardRuntime(deps), + events, + }; +} + +describe("OnboardRuntimeBoundary", () => { + it("records started and resumed lifecycle events through the runtime", async () => { + const harness = createRuntimeHarness(); + const boundary = new OnboardRuntimeBoundary({ + toSessionUpdates: (updates) => filterSafeUpdates(updates as SessionUpdates) as SessionUpdates, + maybeForceE2eStepFailure: () => undefined, + createRuntime: harness.createRuntime, + }); + + await boundary.recordOnboardStarted(false); + await boundary.recordOnboardStarted(true); + + expect(harness.events.map((event) => event.type)).toEqual([ + "onboard.started", + "onboard.resumed", + ]); + expect(harness.events[0]).toMatchObject({ state: "init" }); + expect(harness.events[1]).toMatchObject({ state: "init" }); + }); +}); diff --git a/src/lib/onboard/runtime-boundary.ts b/src/lib/onboard/runtime-boundary.ts index daa8a13367..e90166e17b 100644 --- a/src/lib/onboard/runtime-boundary.ts +++ b/src/lib/onboard/runtime-boundary.ts @@ -8,6 +8,7 @@ import type { OnboardMachineEventType, OnboardMachineState } from "./machine/typ export interface OnboardRuntimeBoundaryOptions { toSessionUpdates(updates: Record): SessionUpdates; maybeForceE2eStepFailure(stepName: string): void; + createRuntime?(): OnboardRuntime; } export class OnboardRuntimeBoundary { @@ -16,7 +17,7 @@ export class OnboardRuntimeBoundary { constructor(private readonly options: OnboardRuntimeBoundaryOptions) {} reset(): void { - this.runtime = new OnboardRuntime(); + this.runtime = this.options.createRuntime?.() ?? new OnboardRuntime(); } clear(): void { @@ -24,12 +25,13 @@ export class OnboardRuntimeBoundary { } getRuntime(): OnboardRuntime { - if (!this.runtime) this.runtime = new OnboardRuntime(); + if (!this.runtime) this.runtime = this.options.createRuntime?.() ?? new OnboardRuntime(); return this.runtime; } recorders() { return { + recordOnboardStarted: this.recordOnboardStarted.bind(this), startRecordedStep: this.startRecordedStep.bind(this), recordStepComplete: this.recordStepComplete.bind(this), recordStepSkipped: this.recordStepSkipped.bind(this), @@ -41,6 +43,10 @@ export class OnboardRuntimeBoundary { }; } + async recordOnboardStarted(resumed: boolean): Promise { + return this.getRuntime().start({ resumed }); + } + async startRecordedStep( stepName: string, updates: { From f99e9cbaa1820e89996b9059f4bb3a2c7a82c2d6 Mon Sep 17 00:00:00 2001 From: Carlos Villela Date: Wed, 27 May 2026 17:05:42 -0700 Subject: [PATCH 06/25] fix(onboard): emit machine events for resume conflicts Signed-off-by: Carlos Villela --- src/lib/onboard.ts | 2 ++ src/lib/onboard/machine/runtime.test.ts | 18 ++++++++++++++++++ src/lib/onboard/machine/runtime.ts | 19 +++++++++++++++++++ src/lib/onboard/runtime-boundary.test.ts | 21 +++++++++++++++++++++ src/lib/onboard/runtime-boundary.ts | 10 ++++++++++ 5 files changed, 70 insertions(+) diff --git a/src/lib/onboard.ts b/src/lib/onboard.ts index a05c9f16da..98a49eea55 100644 --- a/src/lib/onboard.ts +++ b/src/lib/onboard.ts @@ -6389,6 +6389,7 @@ const recordStepSkipped = onboardRuntimeBoundary.recordStepSkipped.bind(onboardR const recordStepFailed = onboardRuntimeBoundary.recordStepFailed.bind(onboardRuntimeBoundary); const recordStateSkipped = onboardRuntimeBoundary.recordStateSkipped.bind(onboardRuntimeBoundary); const recordRepairEvent = onboardRuntimeBoundary.recordRepairEvent.bind(onboardRuntimeBoundary); +const recordResumeConflict = onboardRuntimeBoundary.recordResumeConflict.bind(onboardRuntimeBoundary); const recordPostVerifyStarted = onboardRuntimeBoundary.recordPostVerifyStarted.bind(onboardRuntimeBoundary); const recordSessionComplete = onboardRuntimeBoundary.recordSessionComplete.bind(onboardRuntimeBoundary); @@ -6598,6 +6599,7 @@ async function onboard(opts: OnboardOptions = {}): Promise { }); if (resumeConflicts.length > 0) { for (const conflict of resumeConflicts) { + await recordResumeConflict(conflict); if (conflict.field === "sandbox") { console.error( ` Resumable state belongs to sandbox '${conflict.recorded}', not '${conflict.requested}'.`, diff --git a/src/lib/onboard/machine/runtime.test.ts b/src/lib/onboard/machine/runtime.test.ts index f098ba0dc3..d48da85e0a 100644 --- a/src/lib/onboard/machine/runtime.test.ts +++ b/src/lib/onboard/machine/runtime.test.ts @@ -209,6 +209,24 @@ describe("OnboardRuntime", () => { expect(events[1]).toMatchObject({ state: "post_verify" }); }); + it("emits redacted resume conflict events without mutating durable state", async () => { + const { runtime, events, getSession } = createHarness(sessionInState("provider_selection")); + + await runtime.emitResumeConflict({ + field: "fromDockerfile", + recorded: "/workspace/Dockerfile", + requested: "/tmp/Dockerfile", + metadata: { endpoint: "https://alice:secret@example.com/v1?token=super-secret" }, + }); + + expect(getSession().machine.state).toBe("provider_selection"); + expect(events).toHaveLength(1); + expect(events[0]).toMatchObject({ type: "resume.conflict", state: "provider_selection" }); + expect(events[0].metadata.field).toBe("fromDockerfile"); + expect(JSON.stringify(events)).not.toContain("super-secret"); + expect(JSON.stringify(events)).not.toContain("alice:secret"); + }); + it("emits skipped and repair events without mutating durable state", async () => { const { runtime, events, getSession } = createHarness(sessionInState("provider_selection")); diff --git a/src/lib/onboard/machine/runtime.ts b/src/lib/onboard/machine/runtime.ts index 2e5d584f3b..65516c3212 100644 --- a/src/lib/onboard/machine/runtime.ts +++ b/src/lib/onboard/machine/runtime.ts @@ -243,6 +243,25 @@ export class OnboardRuntime { return session; } + async emitResumeConflict(options: { + field: string; + recorded?: unknown; + requested?: unknown; + metadata?: Record | null; + }): Promise { + const session = this.ensureSession(); + this.emit("resume.conflict", session, { + state: session.machine.state, + metadata: { + ...eventMetadata(options.metadata), + field: options.field, + recorded: options.recorded ?? null, + requested: options.requested ?? null, + }, + }); + return session; + } + async emitRepairEvent( type: Extract< OnboardMachineEventType, diff --git a/src/lib/onboard/runtime-boundary.test.ts b/src/lib/onboard/runtime-boundary.test.ts index d81116ed86..21d6f1083e 100644 --- a/src/lib/onboard/runtime-boundary.test.ts +++ b/src/lib/onboard/runtime-boundary.test.ts @@ -91,4 +91,25 @@ describe("OnboardRuntimeBoundary", () => { expect(harness.events[0]).toMatchObject({ state: "init" }); expect(harness.events[1]).toMatchObject({ state: "init" }); }); + + it("records resume conflict diagnostics through the runtime", async () => { + const harness = createRuntimeHarness(); + const boundary = new OnboardRuntimeBoundary({ + toSessionUpdates: (updates) => filterSafeUpdates(updates as SessionUpdates) as SessionUpdates, + maybeForceE2eStepFailure: () => undefined, + createRuntime: harness.createRuntime, + }); + + await boundary.recordResumeConflict({ + field: "sandbox", + recorded: "old-sandbox", + requested: "new-sandbox", + }); + + expect(harness.events).toHaveLength(1); + expect(harness.events[0]).toMatchObject({ + type: "resume.conflict", + metadata: { field: "sandbox", recorded: "old-sandbox", requested: "new-sandbox" }, + }); + }); }); diff --git a/src/lib/onboard/runtime-boundary.ts b/src/lib/onboard/runtime-boundary.ts index e90166e17b..e2306e3ce5 100644 --- a/src/lib/onboard/runtime-boundary.ts +++ b/src/lib/onboard/runtime-boundary.ts @@ -37,6 +37,7 @@ export class OnboardRuntimeBoundary { recordStepSkipped: this.recordStepSkipped.bind(this), recordStateSkipped: this.recordStateSkipped.bind(this), recordRepairEvent: this.recordRepairEvent.bind(this), + recordResumeConflict: this.recordResumeConflict.bind(this), recordStepFailed: this.recordStepFailed.bind(this), recordPostVerifyStarted: this.recordPostVerifyStarted.bind(this), recordSessionComplete: this.recordSessionComplete.bind(this), @@ -83,6 +84,15 @@ export class OnboardRuntimeBoundary { return this.getRuntime().markSkipped(state, metadata); } + async recordResumeConflict(conflict: { + field: string; + recorded?: unknown; + requested?: unknown; + metadata?: Record | null; + }): Promise { + return this.getRuntime().emitResumeConflict(conflict); + } + async recordRepairEvent( type: Extract< OnboardMachineEventType, From 2b60df442657ef2d850f4b371d57450bf92ffeae Mon Sep 17 00:00:00 2001 From: Carlos Villela Date: Wed, 27 May 2026 17:07:45 -0700 Subject: [PATCH 07/25] refactor(onboard): introduce explicit state result types Signed-off-by: Carlos Villela --- src/lib/onboard/machine/result.test.ts | 62 ++++++++++++++++++ src/lib/onboard/machine/result.ts | 89 ++++++++++++++++++++++++++ 2 files changed, 151 insertions(+) create mode 100644 src/lib/onboard/machine/result.test.ts create mode 100644 src/lib/onboard/machine/result.ts diff --git a/src/lib/onboard/machine/result.test.ts b/src/lib/onboard/machine/result.test.ts new file mode 100644 index 0000000000..b995f6ac3b --- /dev/null +++ b/src/lib/onboard/machine/result.test.ts @@ -0,0 +1,62 @@ +// SPDX-FileCopyrightText: Copyright (c) 2026 NVIDIA CORPORATION & AFFILIATES. All rights reserved. +// SPDX-License-Identifier: Apache-2.0 + +import { describe, expect, it } from "vitest"; + +import { + advanceTo, + branchTo, + completeOnboardMachine, + failOnboardMachine, + retryTo, + transitionTo, +} from "./result"; + +describe("onboard state result helpers", () => { + it("builds transition results with optional updates and metadata", () => { + expect( + transitionTo("gateway", { + updates: { sandboxName: "my-assistant" }, + metadata: { reason: "test" }, + }), + ).toEqual({ + type: "transition", + next: "gateway", + transitionKind: undefined, + updates: { sandboxName: "my-assistant" }, + metadata: { reason: "test" }, + }); + }); + + it("labels advance, retry, and branch transitions", () => { + expect(advanceTo("preflight")).toMatchObject({ + type: "transition", + next: "preflight", + transitionKind: "advance", + }); + expect(retryTo("provider_selection")).toMatchObject({ + type: "transition", + next: "provider_selection", + transitionKind: "retry", + }); + expect(branchTo("agent_setup")).toMatchObject({ + type: "transition", + next: "agent_setup", + transitionKind: "branch", + }); + }); + + it("builds terminal completion and failure results", () => { + expect(completeOnboardMachine({ sandboxName: "my-assistant" }, { verified: true })).toEqual({ + type: "complete", + updates: { sandboxName: "my-assistant" }, + metadata: { verified: true }, + }); + expect(failOnboardMachine("boom", { step: "gateway", metadata: { phase: 2 } })).toEqual({ + type: "failed", + error: "boom", + step: "gateway", + metadata: { phase: 2 }, + }); + }); +}); diff --git a/src/lib/onboard/machine/result.ts b/src/lib/onboard/machine/result.ts new file mode 100644 index 0000000000..e80fae20b5 --- /dev/null +++ b/src/lib/onboard/machine/result.ts @@ -0,0 +1,89 @@ +// SPDX-FileCopyrightText: Copyright (c) 2026 NVIDIA CORPORATION & AFFILIATES. All rights reserved. +// SPDX-License-Identifier: Apache-2.0 + +import type { SessionUpdates } from "../../state/onboard-session"; +import type { OnboardMachineTransitionKind } from "./types"; +import type { OnboardMachineState } from "./types"; + +export interface OnboardStateTransitionResult { + type: "transition"; + next: OnboardMachineState; + transitionKind?: OnboardMachineTransitionKind; + updates?: SessionUpdates; + metadata?: Record | null; +} + +export interface OnboardStateCompleteResult { + type: "complete"; + updates?: SessionUpdates; + metadata?: Record | null; +} + +export interface OnboardStateFailedResult { + type: "failed"; + error: string | null; + step?: string | null; + metadata?: Record | null; +} + +export type OnboardStateResult = + | OnboardStateTransitionResult + | OnboardStateCompleteResult + | OnboardStateFailedResult; + +export function transitionTo( + next: OnboardMachineState, + options: { + transitionKind?: OnboardMachineTransitionKind; + updates?: SessionUpdates; + metadata?: Record | null; + } = {}, +): OnboardStateTransitionResult { + return { + type: "transition", + next, + transitionKind: options.transitionKind, + updates: options.updates, + metadata: options.metadata, + }; +} + +export function advanceTo( + next: OnboardMachineState, + options: Omit[1], "transitionKind"> = {}, +): OnboardStateTransitionResult { + return transitionTo(next, { ...options, transitionKind: "advance" }); +} + +export function retryTo( + next: OnboardMachineState, + options: Omit[1], "transitionKind"> = {}, +): OnboardStateTransitionResult { + return transitionTo(next, { ...options, transitionKind: "retry" }); +} + +export function branchTo( + next: OnboardMachineState, + options: Omit[1], "transitionKind"> = {}, +): OnboardStateTransitionResult { + return transitionTo(next, { ...options, transitionKind: "branch" }); +} + +export function completeOnboardMachine( + updates: SessionUpdates = {}, + metadata: Record | null = null, +): OnboardStateCompleteResult { + return { type: "complete", updates, metadata }; +} + +export function failOnboardMachine( + error: string | null, + options: { step?: string | null; metadata?: Record | null } = {}, +): OnboardStateFailedResult { + return { + type: "failed", + error, + step: options.step, + metadata: options.metadata, + }; +} From 30341b06786955d07f4ce6f96862a7ff76e1de5f Mon Sep 17 00:00:00 2001 From: Carlos Villela Date: Wed, 27 May 2026 17:09:54 -0700 Subject: [PATCH 08/25] refactor(onboard): apply explicit state results through runtime Signed-off-by: Carlos Villela --- src/lib/onboard/machine/runtime.test.ts | 66 +++++++++++++++++++++++++ src/lib/onboard/machine/runtime.ts | 28 +++++++++++ 2 files changed, 94 insertions(+) diff --git a/src/lib/onboard/machine/runtime.test.ts b/src/lib/onboard/machine/runtime.test.ts index d48da85e0a..512ee7f56b 100644 --- a/src/lib/onboard/machine/runtime.test.ts +++ b/src/lib/onboard/machine/runtime.test.ts @@ -12,6 +12,13 @@ import { type SessionUpdates, } from "../../state/onboard-session"; import type { OnboardMachineEvent } from "./events"; +import { + advanceTo, + branchTo, + completeOnboardMachine, + failOnboardMachine, + retryTo, +} from "./result"; import { OnboardRuntime, type OnboardRuntimeDeps } from "./runtime"; import { InvalidOnboardMachineTransitionError } from "./transitions"; @@ -159,6 +166,65 @@ describe("OnboardRuntime", () => { expect(JSON.stringify(events)).not.toContain("super-secret"); }); + it("applies explicit advance results through validated runtime transitions", async () => { + const { runtime, events, getSession } = createHarness(); + + await runtime.applyResult( + advanceTo("preflight", { + updates: { sandboxName: "my-assistant" }, + metadata: { source: "handler" }, + }), + ); + + expect(getSession()).toMatchObject({ + sandboxName: "my-assistant", + machine: { state: "preflight", revision: 1 }, + }); + expect(events.map((event) => event.type)).toEqual([ + "context.updated", + "state.exited", + "state.entered", + ]); + expect(events[0].metadata.fields).toEqual(["sandboxName"]); + expect(events[1]).toMatchObject({ state: "init", metadata: { source: "handler" } }); + expect(events[2]).toMatchObject({ state: "preflight", metadata: { source: "handler" } }); + }); + + it("applies explicit retry, branch, completion, and failure results", async () => { + const retryHarness = createHarness(sessionInState("inference")); + await retryHarness.runtime.applyResult(retryTo("provider_selection")); + expect(retryHarness.getSession().machine).toMatchObject({ state: "provider_selection" }); + + const branchHarness = createHarness(sessionInState("sandbox")); + await branchHarness.runtime.applyResult(branchTo("agent_setup")); + expect(branchHarness.getSession().machine).toMatchObject({ state: "agent_setup" }); + + const completeHarness = createHarness(sessionInState("post_verify")); + await completeHarness.runtime.applyResult(completeOnboardMachine({ sandboxName: "done" })); + expect(completeHarness.getSession()).toMatchObject({ + status: "complete", + sandboxName: "done", + machine: { state: "complete" }, + }); + + const failedHarness = createHarness(sessionInState("gateway")); + await failedHarness.runtime.applyResult(failOnboardMachine("boom", { step: "gateway" })); + expect(failedHarness.getSession()).toMatchObject({ + status: "failed", + failure: { step: "gateway", message: "boom" }, + machine: { state: "failed" }, + }); + }); + + it("rejects invalid explicit transition kinds before mutating context", async () => { + const { runtime, getSession } = createHarness(sessionInState("inference")); + + await expect( + runtime.applyResult(advanceTo("provider_selection", { updates: { sandboxName: "mutated" } })), + ).rejects.toThrow("expected advance, got retry"); + expect(getSession()).toMatchObject({ sandboxName: null, machine: { state: "inference" } }); + }); + it("fails non-terminal sessions with redacted failure events", async () => { const { runtime, events, getSession } = createHarness(sessionInState("gateway")); diff --git a/src/lib/onboard/machine/runtime.ts b/src/lib/onboard/machine/runtime.ts index 65516c3212..47cee9f0d2 100644 --- a/src/lib/onboard/machine/runtime.ts +++ b/src/lib/onboard/machine/runtime.ts @@ -9,6 +9,7 @@ import { emitOnboardMachineEvent, type OnboardMachineEvent, } from "./events"; +import type { OnboardStateResult } from "./result"; import { assertValidOnboardMachineTransition, canTransitionOnboardMachineState, @@ -197,6 +198,33 @@ export class OnboardRuntime { return updated; } + async applyResult(result: OnboardStateResult): Promise { + if (result.type === "complete") { + return this.complete(result.updates ?? {}); + } + if (result.type === "failed") { + return this.fail(result.error, { + step: result.step, + metadata: result.metadata, + }); + } + + const current = this.ensureSession(); + const transition = assertValidOnboardMachineTransition(current.machine.state, result.next); + if (result.transitionKind && transition.kind !== result.transitionKind) { + throw new Error( + `Invalid onboarding machine transition kind: ${current.machine.state} -> ${result.next} expected ${result.transitionKind}, got ${transition.kind}`, + ); + } + if (result.updates && Object.keys(this.deps.filterSafeUpdates(result.updates)).length > 0) { + await this.updateContext(result.updates, { + state: current.machine.state, + metadata: result.metadata, + }); + } + return this.transition(result.next, { metadata: result.metadata }); + } + async fail(message: string | null, options: OnboardRuntimeFailureOptions = {}): Promise { const current = this.ensureSession(); const from = current.machine.state; From d4ad2d9cb7bf1ba56f82cf544ae425531d65528a Mon Sep 17 00:00:00 2001 From: Carlos Villela Date: Thu, 28 May 2026 09:28:14 -0700 Subject: [PATCH 09/25] refactor(onboard): make finalization return FSM result Signed-off-by: Carlos Villela --- src/lib/onboard.ts | 6 ++--- .../machine/handlers/finalization.test.ts | 22 +++++++++---------- .../onboard/machine/handlers/finalization.ts | 13 ++++++----- src/lib/onboard/runtime-boundary.ts | 6 +++++ 4 files changed, 27 insertions(+), 20 deletions(-) diff --git a/src/lib/onboard.ts b/src/lib/onboard.ts index 98a49eea55..7d26fae8c6 100644 --- a/src/lib/onboard.ts +++ b/src/lib/onboard.ts @@ -6390,8 +6390,8 @@ const recordStepFailed = onboardRuntimeBoundary.recordStepFailed.bind(onboardRun const recordStateSkipped = onboardRuntimeBoundary.recordStateSkipped.bind(onboardRuntimeBoundary); const recordRepairEvent = onboardRuntimeBoundary.recordRepairEvent.bind(onboardRuntimeBoundary); const recordResumeConflict = onboardRuntimeBoundary.recordResumeConflict.bind(onboardRuntimeBoundary); +const recordStateResult = onboardRuntimeBoundary.recordStateResult.bind(onboardRuntimeBoundary); const recordPostVerifyStarted = onboardRuntimeBoundary.recordPostVerifyStarted.bind(onboardRuntimeBoundary); -const recordSessionComplete = onboardRuntimeBoundary.recordSessionComplete.bind(onboardRuntimeBoundary); function skippedStepMessage( stepName: string, @@ -7099,7 +7099,7 @@ async function onboard(opts: OnboardOptions = {}): Promise { }); session = policiesResult.session; - await handleFinalizationState({ + const finalizationResult = await handleFinalizationState({ sandboxName, model, provider, @@ -7114,7 +7114,6 @@ async function onboard(opts: OnboardOptions = {}): Promise { ensureAgentDashboardForward, verifyWebSearchInsideSandbox, recordPostVerifyStarted, - recordSessionComplete, toSessionUpdates: (updates) => toSessionUpdates(updates as Parameters[0]), removeLegacyCredentialsFile, cleanupStaleHostFiles, @@ -7152,6 +7151,7 @@ async function onboard(opts: OnboardOptions = {}): Promise { log: (message) => console.log(message), }, }); + await recordStateResult(finalizationResult.stateResult); traceCompleted = true; } finally { releaseOnboardLock(); diff --git a/src/lib/onboard/machine/handlers/finalization.test.ts b/src/lib/onboard/machine/handlers/finalization.test.ts index df6000b2e6..b70f2eef57 100644 --- a/src/lib/onboard/machine/handlers/finalization.test.ts +++ b/src/lib/onboard/machine/handlers/finalization.test.ts @@ -14,7 +14,6 @@ function createDeps(overrides: Partial 18789), postVerify: vi.fn(async () => createSession({ machine: { version: 1, state: "post_verify", stateEnteredAt: null, revision: 1 } })), - complete: vi.fn(async () => createSession({ status: "complete" })), removeLegacy: vi.fn(), cleanupHost: vi.fn(), recoverProcesses: vi.fn(), @@ -32,7 +31,6 @@ function createDeps(overrides: Partial) => updates as SessionUpdates, removeLegacyCredentialsFile: calls.removeLegacy, cleanupStaleHostFiles: calls.cleanupHost, @@ -81,12 +79,16 @@ describe("handleFinalizationState", () => { expect(calls.log).toHaveBeenCalledWith(" ✓ verified"); expect(calls.dashboard).toHaveBeenCalledWith("my-assistant", "model", "provider", null, null); expect(calls.postVerify).toHaveBeenCalledOnce(); - expect(calls.complete).toHaveBeenCalledWith({ - sandboxName: "my-assistant", - provider: "provider", - model: "model", - hermesAuthMethod: null, - hermesToolGateways: [], + expect(result.stateResult).toEqual({ + type: "complete", + updates: { + sandboxName: "my-assistant", + provider: "provider", + model: "model", + hermesAuthMethod: null, + hermesToolGateways: [], + }, + metadata: { state: "finalizing" }, }); expect(result.verificationDiagnostics).toEqual([" ✓ verified"]); }); @@ -98,9 +100,8 @@ describe("handleFinalizationState", () => { await handleFinalizationState({ ...baseOptions(deps), agent }); expect(calls.ensureAgentDashboard).toHaveBeenCalledWith("my-assistant", agent); - expect(calls.complete).toHaveBeenCalled(); expect(calls.ensureAgentDashboard.mock.invocationCallOrder[0]).toBeLessThan( - calls.complete.mock.invocationCallOrder[0], + calls.dashboard.mock.invocationCallOrder[0], ); expect(calls.dashboard).toHaveBeenCalledWith("my-assistant", "model", "provider", null, agent); }); @@ -115,7 +116,6 @@ describe("handleFinalizationState", () => { await expect(handleFinalizationState(baseOptions(deps))).rejects.toThrow("verification failed"); expect(calls.postVerify).toHaveBeenCalledOnce(); - expect(calls.complete).not.toHaveBeenCalled(); expect(calls.dashboard).not.toHaveBeenCalled(); }); diff --git a/src/lib/onboard/machine/handlers/finalization.ts b/src/lib/onboard/machine/handlers/finalization.ts index 34e2dba224..5bc8f96ccb 100644 --- a/src/lib/onboard/machine/handlers/finalization.ts +++ b/src/lib/onboard/machine/handlers/finalization.ts @@ -1,7 +1,8 @@ // SPDX-FileCopyrightText: Copyright (c) 2026 NVIDIA CORPORATION & AFFILIATES. All rights reserved. // SPDX-License-Identifier: Apache-2.0 -import type { Session, SessionUpdates } from "../../../state/onboard-session"; +import type { Session } from "../../../state/onboard-session"; +import { completeOnboardMachine, type OnboardStateCompleteResult } from "../result"; export interface FinalizationStateOptions { sandboxName: string; @@ -17,8 +18,7 @@ export interface FinalizationStateOptions): number; recordPostVerifyStarted(): Promise; - recordSessionComplete(updates: SessionUpdates): Promise; - toSessionUpdates(updates: Record): SessionUpdates; + toSessionUpdates(updates: Record): NonNullable; removeLegacyCredentialsFile(): void; cleanupStaleHostFiles(): void; checkAndRecoverSandboxProcesses(sandboxName: string, options: { quiet: boolean }): void; @@ -46,7 +46,7 @@ export interface FinalizationStateOptions { + return this.getRuntime().applyResult(result); + } + async recordResumeConflict(conflict: { field: string; recorded?: unknown; From 356c9470245d6d7d7f4a50bc0a0bfa8e01763e68 Mon Sep 17 00:00:00 2001 From: Carlos Villela Date: Thu, 28 May 2026 09:30:58 -0700 Subject: [PATCH 10/25] refactor(onboard): make agent setup return FSM result Signed-off-by: Carlos Villela --- .../onboard/machine/handlers/agent-setup.test.ts | 15 +++++++++++++++ src/lib/onboard/machine/handlers/agent-setup.ts | 6 ++++-- 2 files changed, 19 insertions(+), 2 deletions(-) diff --git a/src/lib/onboard/machine/handlers/agent-setup.test.ts b/src/lib/onboard/machine/handlers/agent-setup.test.ts index f5dd3e5f65..9eb998ea4d 100644 --- a/src/lib/onboard/machine/handlers/agent-setup.test.ts +++ b/src/lib/onboard/machine/handlers/agent-setup.test.ts @@ -88,6 +88,13 @@ describe("handleAgentSetupState", () => { expect(calls.skipped).toHaveBeenCalledWith("openclaw"); expect(calls.setupOpenclaw).not.toHaveBeenCalled(); expect(result.session?.steps.openclaw.status).toBe("skipped"); + expect(result.stateResult).toEqual({ + type: "transition", + next: "policies", + transitionKind: "advance", + updates: undefined, + metadata: { state: "agent_setup" }, + }); }); it("skips OpenClaw setup on resume when OpenClaw is ready", async () => { @@ -108,6 +115,13 @@ describe("handleAgentSetupState", () => { expect.objectContaining({ sandboxName: "my-assistant", provider: "provider", model: "model" }), ); expect(calls.skipped).toHaveBeenCalledWith("agent_setup"); + expect(result.stateResult).toEqual({ + type: "transition", + next: "policies", + transitionKind: "advance", + updates: undefined, + metadata: { state: "openclaw" }, + }); expect(result.session).toMatchObject({ sandboxName: "my-assistant", provider: "provider", @@ -143,6 +157,7 @@ describe("handleAgentSetupState", () => { }), ); expect(calls.skipped).toHaveBeenCalledWith("agent_setup"); + expect(result.stateResult).toMatchObject({ next: "policies", transitionKind: "advance" }); expect(result.session).toMatchObject({ sandboxName: "my-assistant", provider: "provider", diff --git a/src/lib/onboard/machine/handlers/agent-setup.ts b/src/lib/onboard/machine/handlers/agent-setup.ts index 3b43bd69cb..4ec59f8c79 100644 --- a/src/lib/onboard/machine/handlers/agent-setup.ts +++ b/src/lib/onboard/machine/handlers/agent-setup.ts @@ -2,6 +2,7 @@ // SPDX-License-Identifier: Apache-2.0 import type { Session, SessionUpdates } from "../../../state/onboard-session"; +import { advanceTo, type OnboardStateTransitionResult } from "../result"; export interface AgentSetupStateOptions { agent: Agent | null; @@ -41,6 +42,7 @@ export interface AgentSetupStateOptions { export interface AgentSetupStateResult { session: Session | null; + stateResult: OnboardStateTransitionResult; } export async function handleAgentSetupState({ @@ -66,7 +68,7 @@ export async function handleAgentSetupState({ ); deps.ensureAgentDashboardForward(sandboxName, agent); session = await deps.recordStepSkipped("openclaw"); - return { session }; + return { session, stateResult: advanceTo("policies", { metadata: { state: "agent_setup" } }) }; } const resumeOpenclaw = resume && sandboxName && deps.isOpenclawReady(sandboxName); @@ -87,5 +89,5 @@ export async function handleAgentSetupState({ ); } session = await deps.recordStepSkipped("agent_setup"); - return { session }; + return { session, stateResult: advanceTo("policies", { metadata: { state: "openclaw" } }) }; } From 2296519e6d7875238bc02da68f9f0c0f97489b26 Mon Sep 17 00:00:00 2001 From: Carlos Villela Date: Thu, 28 May 2026 09:33:07 -0700 Subject: [PATCH 11/25] refactor(onboard): make policy setup return FSM result Signed-off-by: Carlos Villela --- src/lib/onboard/machine/handlers/policies.test.ts | 14 +++++++++++++- src/lib/onboard/machine/handlers/policies.ts | 11 ++++++++++- 2 files changed, 23 insertions(+), 2 deletions(-) diff --git a/src/lib/onboard/machine/handlers/policies.test.ts b/src/lib/onboard/machine/handlers/policies.test.ts index f2865b1e5c..7ccf7cf12e 100644 --- a/src/lib/onboard/machine/handlers/policies.test.ts +++ b/src/lib/onboard/machine/handlers/policies.test.ts @@ -91,7 +91,7 @@ describe("handlePoliciesState", () => { it("runs compatible endpoint smoke before policy selection", async () => { const { deps, calls } = createDeps(); - await handlePoliciesState(baseOptions(deps)); + const result = await handlePoliciesState(baseOptions(deps)); expect(calls.smoke).toHaveBeenCalledWith({ sandboxName: "my-assistant", @@ -121,6 +121,13 @@ describe("handlePoliciesState", () => { "policies", expect.objectContaining({ policyPresets: ["npm"] }), ); + expect(result.stateResult).toEqual({ + type: "transition", + next: "finalizing", + transitionKind: "advance", + updates: undefined, + metadata: { state: "policies", policyPresets: ["npm"] }, + }); }); it("uses recorded messaging channels when no active selection exists", async () => { @@ -158,6 +165,11 @@ describe("handlePoliciesState", () => { expect.objectContaining({ policyPresets: ["npm"] }), ); expect(result.appliedPolicyPresets).toEqual(["npm"]); + expect(result.stateResult).toMatchObject({ + next: "finalizing", + transitionKind: "advance", + metadata: { policyPresets: ["npm"] }, + }); }); it("reconciles unsupported recorded presets before interactive setup", async () => { diff --git a/src/lib/onboard/machine/handlers/policies.ts b/src/lib/onboard/machine/handlers/policies.ts index 586a312abc..d0c7305171 100644 --- a/src/lib/onboard/machine/handlers/policies.ts +++ b/src/lib/onboard/machine/handlers/policies.ts @@ -2,6 +2,7 @@ // SPDX-License-Identifier: Apache-2.0 import type { Session, SessionUpdates } from "../../../state/onboard-session"; +import { advanceTo, type OnboardStateTransitionResult } from "../result"; // Inlined to avoid pulling sandbox-agent's transitive runner.ts deps into // the generic state handler. Matches normalizeSandboxAgentName: trim, @@ -99,6 +100,7 @@ export interface PoliciesStateResult { session: Session | null; recordedMessagingChannels: string[]; appliedPolicyPresets: string[]; + stateResult: OnboardStateTransitionResult; } export async function handlePoliciesState({ @@ -206,5 +208,12 @@ export async function handlePoliciesState({ ); } - return { session, recordedMessagingChannels, appliedPolicyPresets }; + return { + session, + recordedMessagingChannels, + appliedPolicyPresets, + stateResult: advanceTo("finalizing", { + metadata: { state: "policies", policyPresets: appliedPolicyPresets }, + }), + }; } From 67a9a1e26a3428e0c51f8d56a98b7711aad059f0 Mon Sep 17 00:00:00 2001 From: Carlos Villela Date: Thu, 28 May 2026 09:35:40 -0700 Subject: [PATCH 12/25] refactor(onboard): make preflight and gateway return FSM results Signed-off-by: Carlos Villela --- src/lib/onboard/machine/handlers/gateway.test.ts | 7 +++++++ src/lib/onboard/machine/handlers/gateway.ts | 10 +++++++++- src/lib/onboard/machine/handlers/preflight.test.ts | 7 +++++++ src/lib/onboard/machine/handlers/preflight.ts | 5 +++++ 4 files changed, 28 insertions(+), 1 deletion(-) diff --git a/src/lib/onboard/machine/handlers/gateway.test.ts b/src/lib/onboard/machine/handlers/gateway.test.ts index 696e4940ac..b184fdb826 100644 --- a/src/lib/onboard/machine/handlers/gateway.test.ts +++ b/src/lib/onboard/machine/handlers/gateway.test.ts @@ -95,6 +95,13 @@ describe("handleGatewayState", () => { expect(calls.startGateway).toHaveBeenCalledWith({ type: "nvidia" }, { gpuPassthrough: true }); expect(calls.complete).toHaveBeenCalledWith("gateway"); expect(result.gatewayReuseState).toBe("missing"); + expect(result.stateResult).toEqual({ + type: "transition", + next: "provider_selection", + transitionKind: "advance", + updates: undefined, + metadata: { state: "gateway", gatewayReuseState: "missing" }, + }); }); it("reuses healthy gateways on fresh runs", async () => { diff --git a/src/lib/onboard/machine/handlers/gateway.ts b/src/lib/onboard/machine/handlers/gateway.ts index 461a19f924..6589db29cd 100644 --- a/src/lib/onboard/machine/handlers/gateway.ts +++ b/src/lib/onboard/machine/handlers/gateway.ts @@ -6,6 +6,7 @@ import type { GatewayReuseState } from "../../../state/gateway"; import type { Session } from "../../../state/onboard-session"; import type { GatewayContainerState } from "../../gateway-container-running"; import { withGatewayTrace } from "../../tracing"; +import { advanceTo, type OnboardStateTransitionResult } from "../result"; export interface GatewayStateOptions { resume: boolean; @@ -68,6 +69,7 @@ export interface GatewayStateOptions { export interface GatewayStateResult { gatewayReuseState: GatewayReuseState; session: Session | null; + stateResult: OnboardStateTransitionResult; } export async function handleGatewayState({ @@ -213,5 +215,11 @@ export async function handleGatewayState({ session = await deps.recordStepComplete("gateway"); } - return { gatewayReuseState, session }; + return { + gatewayReuseState, + session, + stateResult: advanceTo("provider_selection", { + metadata: { state: "gateway", gatewayReuseState }, + }), + }; } diff --git a/src/lib/onboard/machine/handlers/preflight.test.ts b/src/lib/onboard/machine/handlers/preflight.test.ts index f625a33de0..4b68f9b550 100644 --- a/src/lib/onboard/machine/handlers/preflight.test.ts +++ b/src/lib/onboard/machine/handlers/preflight.test.ts @@ -104,6 +104,13 @@ describe("handlePreflightState", () => { sandboxGpuDevice: "GPU-0", }); expect(result.gpuPassthrough).toBe(true); + expect(result.stateResult).toEqual({ + type: "transition", + next: "gateway", + transitionKind: "advance", + updates: undefined, + metadata: { state: "preflight", gpuPassthrough: true }, + }); }); it("skips full preflight on resume but re-detects GPU and revalidates CDI/sandbox GPU", async () => { diff --git a/src/lib/onboard/machine/handlers/preflight.ts b/src/lib/onboard/machine/handlers/preflight.ts index 599781119c..be28649cd8 100644 --- a/src/lib/onboard/machine/handlers/preflight.ts +++ b/src/lib/onboard/machine/handlers/preflight.ts @@ -3,6 +3,7 @@ import type { Session } from "../../../state/onboard-session"; import { withPreflightTrace } from "../../tracing"; +import { advanceTo, type OnboardStateTransitionResult } from "../result"; export type PreflightSandboxGpuFlag = "enable" | "disable" | null; @@ -86,6 +87,7 @@ export interface PreflightStateResult Date: Thu, 28 May 2026 09:38:19 -0700 Subject: [PATCH 13/25] refactor(onboard): make sandbox return branch FSM result Signed-off-by: Carlos Villela --- src/lib/onboard/machine/handlers/sandbox.test.ts | 7 +++++++ src/lib/onboard/machine/handlers/sandbox.ts | 9 +++++++++ 2 files changed, 16 insertions(+) diff --git a/src/lib/onboard/machine/handlers/sandbox.test.ts b/src/lib/onboard/machine/handlers/sandbox.test.ts index 52cf8a6db2..443166bc1e 100644 --- a/src/lib/onboard/machine/handlers/sandbox.test.ts +++ b/src/lib/onboard/machine/handlers/sandbox.test.ts @@ -153,6 +153,13 @@ describe("handleSandboxState", () => { expect(calls.setDefault).toHaveBeenCalledWith("my-assistant"); expect(calls.complete).toHaveBeenCalledWith("sandbox", expect.objectContaining({ sandboxName: "my-assistant" })); expect(result).toMatchObject({ sandboxName: "my-assistant", selectedMessagingChannels: ["telegram"], webSearchSupported: true }); + expect(result.stateResult).toEqual({ + type: "transition", + next: "openclaw", + transitionKind: "branch", + updates: undefined, + metadata: { state: "sandbox", sandboxName: "my-assistant", agent: "openclaw" }, + }); }); it("reuses a completed ready sandbox on resume", async () => { diff --git a/src/lib/onboard/machine/handlers/sandbox.ts b/src/lib/onboard/machine/handlers/sandbox.ts index efa5cf0adb..e7740fbf6d 100644 --- a/src/lib/onboard/machine/handlers/sandbox.ts +++ b/src/lib/onboard/machine/handlers/sandbox.ts @@ -3,6 +3,7 @@ import type { Session, SessionUpdates } from "../../../state/onboard-session"; import { withSandboxPhaseTrace } from "../../tracing"; +import { branchTo, type OnboardStateTransitionResult } from "../result"; export interface SandboxStateOptions { resume: boolean; @@ -98,6 +99,7 @@ export interface SandboxStateResult { selectedMessagingChannels: string[]; webSearchSupported: boolean; session: Session | null; + stateResult: OnboardStateTransitionResult; } function sameEffectiveTelegramRequireMention(left: boolean | null, right: boolean | null): boolean { @@ -335,5 +337,12 @@ export async function handleSandboxState Date: Thu, 28 May 2026 11:20:05 -0700 Subject: [PATCH 14/25] refactor(onboard): return FSM results from provider inference Signed-off-by: Carlos Villela --- .../handlers/provider-inference.test.ts | 23 +++++++++++++++++++ .../machine/handlers/provider-inference.ts | 18 +++++++++++++++ 2 files changed, 41 insertions(+) diff --git a/src/lib/onboard/machine/handlers/provider-inference.test.ts b/src/lib/onboard/machine/handlers/provider-inference.test.ts index 5414e898a5..2865973de1 100644 --- a/src/lib/onboard/machine/handlers/provider-inference.test.ts +++ b/src/lib/onboard/machine/handlers/provider-inference.test.ts @@ -157,6 +157,14 @@ describe("handleProviderInferenceState", () => { provider: "nvidia-prod", preferredInferenceApi: "openai-responses", }); + expect(result.stateResult).toEqual({ + type: "transition", + next: "sandbox", + transitionKind: "advance", + updates: undefined, + metadata: { state: "inference", provider: "nvidia-prod", model: "nvidia/test" }, + }); + expect(result.retryStateResults).toEqual([]); }); it("clears non-NVIDIA provider credentials when inference setup fails", async () => { @@ -347,6 +355,21 @@ describe("handleProviderInferenceState", () => { expect(setupInference).toHaveBeenCalledTimes(2); expect(result.model).toBe("good"); expect(calls.startStep).toHaveBeenCalledWith("provider_selection"); + expect(result.retryStateResults).toEqual([ + { + type: "transition", + next: "provider_selection", + transitionKind: "retry", + updates: undefined, + metadata: { + state: "inference", + provider: "nvidia-prod", + model: "bad", + reason: "selection_retry", + }, + }, + ]); + expect(result.stateResult).toMatchObject({ next: "sandbox", transitionKind: "advance" }); }); it("aborts before inference setup when the configuration summary is rejected", async () => { diff --git a/src/lib/onboard/machine/handlers/provider-inference.ts b/src/lib/onboard/machine/handlers/provider-inference.ts index 44d2cf5ed5..1a90147d2a 100644 --- a/src/lib/onboard/machine/handlers/provider-inference.ts +++ b/src/lib/onboard/machine/handlers/provider-inference.ts @@ -4,6 +4,7 @@ import type { WebSearchConfig } from "../../../inference/web-search"; import type { Session, SessionUpdates } from "../../../state/onboard-session"; import { withInferenceTrace, withProviderSelectionTrace } from "../../tracing"; +import { advanceTo, retryTo, type OnboardStateTransitionResult } from "../result"; export type ProviderInferenceRetry = { retry: "selection" } | { ok: true; retry?: undefined }; @@ -120,6 +121,8 @@ export interface ProviderInferenceStateResult { nimContainer: string | null; webSearchConfig: WebSearchConfig | null; session: Session | null; + stateResult: OnboardStateTransitionResult; + retryStateResults: OnboardStateTransitionResult[]; } function requireSelection( @@ -169,6 +172,7 @@ export async function handleProviderInferenceState({ const webSearchConfig = initial.webSearchConfig; let forceProviderSelection = initialForceProviderSelection; let allowToolsIncompatible = false; + const retryStateResults: OnboardStateTransitionResult[] = []; while (true) { let forceInferenceSetup = false; @@ -288,6 +292,11 @@ export async function handleProviderInferenceState({ clearStagedCredentialEnv(deps, credentialEnv); } if (inferenceResult?.retry === "selection") { + retryStateResults.push( + retryTo("provider_selection", { + metadata: { state: "inference", provider, model, reason: "selection_retry" }, + }), + ); forceProviderSelection = true; continue; } @@ -372,6 +381,11 @@ export async function handleProviderInferenceState({ clearStagedCredentialEnv(deps, credentialEnv); } if (inferenceResult?.retry === "selection") { + retryStateResults.push( + retryTo("provider_selection", { + metadata: { state: "inference", provider, model, reason: "selection_retry" }, + }), + ); forceProviderSelection = true; continue; } @@ -395,5 +409,9 @@ export async function handleProviderInferenceState({ nimContainer, webSearchConfig, session, + stateResult: advanceTo("sandbox", { + metadata: { state: "inference", provider, model }, + }), + retryStateResults, }; } From dbbb273a067af0faaf94b35509211b6c08a53b94 Mon Sep 17 00:00:00 2001 From: Carlos Villela Date: Thu, 28 May 2026 11:23:11 -0700 Subject: [PATCH 15/25] refactor(onboard): add FSM runner shell Signed-off-by: Carlos Villela --- src/lib/onboard/machine/runner.test.ts | 158 +++++++++++++++++++++++++ src/lib/onboard/machine/runner.ts | 71 +++++++++++ 2 files changed, 229 insertions(+) create mode 100644 src/lib/onboard/machine/runner.test.ts create mode 100644 src/lib/onboard/machine/runner.ts diff --git a/src/lib/onboard/machine/runner.test.ts b/src/lib/onboard/machine/runner.test.ts new file mode 100644 index 0000000000..558960f618 --- /dev/null +++ b/src/lib/onboard/machine/runner.test.ts @@ -0,0 +1,158 @@ +// SPDX-FileCopyrightText: Copyright (c) 2026 NVIDIA CORPORATION & AFFILIATES. All rights reserved. +// SPDX-License-Identifier: Apache-2.0 + +import { describe, expect, it, vi } from "vitest"; + +import { + createSession, + filterSafeUpdates, + normalizeSession, + sanitizeFailure, + type Session, + type SessionUpdates, +} from "../../state/onboard-session"; +import { advanceTo, branchTo, completeOnboardMachine, failOnboardMachine, retryTo } from "./result"; +import { OnboardRuntime, type OnboardRuntimeDeps } from "./runtime"; +import { + MissingOnboardStateHandlerError, + runOnboardMachine, + type OnboardStateHandlers, +} from "./runner"; + +interface RunnerContext { + attempts: number; + visited: string[]; +} + +function cloneSession(session: Session): Session { + return normalizeSession(JSON.parse(JSON.stringify(session))) ?? session; +} + +function createRuntime(initialSession: Session = createSession()) { + let session = cloneSession(initialSession); + const updateSession = (mutator: (value: Session) => Session | void): Session => { + const next = mutator(cloneSession(session)) ?? session; + session = cloneSession(next); + return cloneSession(session); + }; + const deps: OnboardRuntimeDeps = { + loadSession: () => cloneSession(session), + createSession, + saveSession: (next) => { + session = cloneSession(next); + return cloneSession(session); + }, + updateSession, + markStepStarted: () => cloneSession(session), + markStepComplete: (_stepName, updates: SessionUpdates = {}) => + updateSession((current) => { + Object.assign(current, filterSafeUpdates(updates)); + return current; + }), + markStepSkipped: () => cloneSession(session), + markStepFailed: (_stepName, message) => + updateSession((current) => { + current.status = "failed"; + current.failure = sanitizeFailure({ step: _stepName, message, recordedAt: "now" }); + return current; + }), + completeSession: (updates: SessionUpdates = {}) => + updateSession((current) => { + Object.assign(current, filterSafeUpdates(updates)); + current.status = "complete"; + current.resumable = false; + return current; + }), + filterSafeUpdates, + emitEvent: () => undefined, + now: () => "2026-05-28T00:00:00.000Z", + }; + return new OnboardRuntime(deps); +} + +describe("runOnboardMachine", () => { + it("runs handlers until completion while applying retry and branch transitions", async () => { + const runtime = createRuntime(); + const calls: string[] = []; + const handlers: OnboardStateHandlers = { + init: () => advanceTo("preflight"), + preflight: () => advanceTo("gateway"), + gateway: () => advanceTo("provider_selection"), + provider_selection: () => advanceTo("inference"), + inference: (context) => { + calls.push(`inference:${context.attempts}`); + return context.attempts === 0 ? retryTo("provider_selection") : advanceTo("sandbox"); + }, + sandbox: () => branchTo("openclaw"), + openclaw: () => advanceTo("policies"), + policies: () => advanceTo("finalizing"), + finalizing: () => advanceTo("post_verify"), + post_verify: () => completeOnboardMachine({ sandboxName: "my-assistant" }), + }; + + const result = await runOnboardMachine({ + context: { attempts: 0, visited: [] } as RunnerContext, + runtime, + handlers, + updateContext: ({ context, state }) => ({ + attempts: state === "inference" ? context.attempts + 1 : context.attempts, + visited: [...context.visited, state], + }), + }); + + expect(result.session).toMatchObject({ + status: "complete", + sandboxName: "my-assistant", + machine: { state: "complete" }, + }); + expect(calls).toEqual(["inference:0", "inference:1"]); + expect(result.context.visited).toEqual([ + "init", + "preflight", + "gateway", + "provider_selection", + "inference", + "provider_selection", + "inference", + "sandbox", + "openclaw", + "policies", + "finalizing", + "post_verify", + ]); + }); + + it("stops on failed terminal results", async () => { + const runtime = createRuntime(); + const policies = vi.fn(() => advanceTo("finalizing")); + + const result = await runOnboardMachine({ + context: { attempts: 0, visited: [] } as RunnerContext, + runtime, + handlers: { + init: () => advanceTo("preflight"), + preflight: () => failOnboardMachine("preflight failed", { step: "preflight" }), + policies, + }, + }); + + expect(result.session).toMatchObject({ + status: "failed", + failure: { step: "preflight", message: "preflight failed" }, + machine: { state: "failed" }, + }); + expect(policies).not.toHaveBeenCalled(); + }); + + it("throws when a non-terminal state has no handler", async () => { + const runtime = createRuntime(); + + await expect( + runOnboardMachine({ + context: { attempts: 0, visited: [] } as RunnerContext, + runtime, + handlers: {}, + }), + ).rejects.toThrow(MissingOnboardStateHandlerError); + }); +}); diff --git a/src/lib/onboard/machine/runner.ts b/src/lib/onboard/machine/runner.ts new file mode 100644 index 0000000000..5e4db4174d --- /dev/null +++ b/src/lib/onboard/machine/runner.ts @@ -0,0 +1,71 @@ +// SPDX-FileCopyrightText: Copyright (c) 2026 NVIDIA CORPORATION & AFFILIATES. All rights reserved. +// SPDX-License-Identifier: Apache-2.0 + +import type { Session } from "../../state/onboard-session"; +import type { OnboardStateResult } from "./result"; +import { isTerminalOnboardMachineState } from "./transitions"; +import type { OnboardMachineState, OnboardNonTerminalMachineState } from "./types"; + +export type OnboardStateHandler = ( + context: Context, +) => Promise | OnboardStateResult; + +export type OnboardStateHandlers = Partial< + Record> +>; + +export interface OnboardMachineRunnerRuntime { + session(): Promise; + applyResult(result: OnboardStateResult): Promise; +} + +export interface OnboardMachineRunnerOptions { + context: Context; + runtime: OnboardMachineRunnerRuntime; + handlers: OnboardStateHandlers; + updateContext?(input: { + context: Context; + state: OnboardMachineState; + result: OnboardStateResult; + session: Session; + }): Context | Promise; +} + +export interface OnboardMachineRunnerResult { + context: Context; + session: Session; +} + +export class MissingOnboardStateHandlerError extends Error { + readonly state: OnboardNonTerminalMachineState; + + constructor(state: OnboardNonTerminalMachineState) { + super(`Missing onboarding machine handler for state: ${state}`); + this.name = "MissingOnboardStateHandlerError"; + this.state = state; + } +} + +export async function runOnboardMachine({ + context: initialContext, + runtime, + handlers, + updateContext, +}: OnboardMachineRunnerOptions): Promise> { + let context = initialContext; + let session = await runtime.session(); + + while (!isTerminalOnboardMachineState(session.machine.state)) { + const state = session.machine.state; + const handler = handlers[state as OnboardNonTerminalMachineState]; + if (!handler) throw new MissingOnboardStateHandlerError(state as OnboardNonTerminalMachineState); + + const result = await handler(context); + session = await runtime.applyResult(result); + context = updateContext + ? await updateContext({ context, state, result, session }) + : context; + } + + return { context, session }; +} From 6b27a0bd6638fa95e928fe5c34b0ed9533e67c0d Mon Sep 17 00:00:00 2001 From: Carlos Villela Date: Thu, 28 May 2026 11:28:31 -0700 Subject: [PATCH 16/25] refactor(onboard): consume handler FSM results compatibly Signed-off-by: Carlos Villela --- src/lib/onboard.ts | 8 ++++++ src/lib/onboard/runtime-boundary.test.ts | 36 ++++++++++++++++++++++++ src/lib/onboard/runtime-boundary.ts | 15 ++++++++++ 3 files changed, 59 insertions(+) diff --git a/src/lib/onboard.ts b/src/lib/onboard.ts index 7d26fae8c6..621a0f8aa1 100644 --- a/src/lib/onboard.ts +++ b/src/lib/onboard.ts @@ -6391,6 +6391,7 @@ const recordStateSkipped = onboardRuntimeBoundary.recordStateSkipped.bind(onboar const recordRepairEvent = onboardRuntimeBoundary.recordRepairEvent.bind(onboardRuntimeBoundary); const recordResumeConflict = onboardRuntimeBoundary.recordResumeConflict.bind(onboardRuntimeBoundary); const recordStateResult = onboardRuntimeBoundary.recordStateResult.bind(onboardRuntimeBoundary); +const recordStateResultWithStepCompatibility = onboardRuntimeBoundary.recordStateResultWithStepCompatibility.bind(onboardRuntimeBoundary); const recordPostVerifyStarted = onboardRuntimeBoundary.recordPostVerifyStarted.bind(onboardRuntimeBoundary); function skippedStepMessage( @@ -6790,6 +6791,7 @@ async function onboard(opts: OnboardOptions = {}): Promise { }, }); if (resume && _preflightDashboardPort === null) preflightDashboardPortRangeAvailability(); // #3953 — resume must mirror preflight()'s fail-fast + await recordStateResultWithStepCompatibility(preflightResult.stateResult); session = preflightResult.session; const { sandboxGpuConfig, @@ -6862,6 +6864,7 @@ async function onboard(opts: OnboardOptions = {}): Promise { exitProcess: (code) => process.exit(code), }, }); + await recordStateResultWithStepCompatibility(gatewayResult.stateResult); session = gatewayResult.session; // #2753: prefer requestedSandboxName over an unconfirmed session name. @@ -6943,6 +6946,7 @@ async function onboard(opts: OnboardOptions = {}): Promise { }, }, }); + await recordStateResultWithStepCompatibility(providerInferenceResult.stateResult); session = providerInferenceResult.session; sandboxName = providerInferenceResult.sandboxName; const { @@ -7019,6 +7023,7 @@ async function onboard(opts: OnboardOptions = {}): Promise { exitProcess: (code) => process.exit(code), }, }); + await recordStateResultWithStepCompatibility(sandboxStateResult.stateResult); session = sandboxStateResult.session; sandboxName = sandboxStateResult.sandboxName; webSearchConfig = sandboxStateResult.webSearchConfig ?? null; @@ -7061,6 +7066,7 @@ async function onboard(opts: OnboardOptions = {}): Promise { toSessionUpdates: (updates) => toSessionUpdates(updates as Parameters[0]), }, }); + await recordStateResultWithStepCompatibility(agentSetupResult.stateResult); session = agentSetupResult.session; const policiesResult = await handlePoliciesState({ @@ -7097,9 +7103,11 @@ async function onboard(opts: OnboardOptions = {}): Promise { toSessionUpdates: (updates) => toSessionUpdates(updates as Parameters[0]), }, }); + await recordStateResultWithStepCompatibility(policiesResult.stateResult); session = policiesResult.session; const finalizationResult = await handleFinalizationState({ + sandboxName, model, provider, diff --git a/src/lib/onboard/runtime-boundary.test.ts b/src/lib/onboard/runtime-boundary.test.ts index 21d6f1083e..89d671da2c 100644 --- a/src/lib/onboard/runtime-boundary.test.ts +++ b/src/lib/onboard/runtime-boundary.test.ts @@ -11,6 +11,7 @@ import { type SessionUpdates, } from "../state/onboard-session"; import type { OnboardMachineEvent } from "./machine/events"; +import { advanceTo } from "./machine/result"; import { OnboardRuntime, type OnboardRuntimeDeps } from "./machine/runtime"; import { OnboardRuntimeBoundary } from "./runtime-boundary"; @@ -92,6 +93,41 @@ describe("OnboardRuntimeBoundary", () => { expect(harness.events[1]).toMatchObject({ state: "init" }); }); + it("applies state results unless legacy step helpers already advanced the machine", async () => { + const harness = createRuntimeHarness(); + const boundary = new OnboardRuntimeBoundary({ + toSessionUpdates: (updates) => filterSafeUpdates(updates as SessionUpdates) as SessionUpdates, + maybeForceE2eStepFailure: () => undefined, + createRuntime: harness.createRuntime, + }); + + await boundary.recordStateResultWithStepCompatibility(advanceTo("preflight", { metadata: { state: "init" } })); + await boundary.recordStateResultWithStepCompatibility(advanceTo("preflight", { metadata: { state: "init" } })); + await boundary.recordStateResultWithStepCompatibility(advanceTo("gateway", { metadata: { state: "preflight" } })); + + expect(harness.events.map((event) => event.type)).toEqual([ + "state.exited", + "state.entered", + "state.exited", + "state.entered", + ]); + expect(harness.events[1]).toMatchObject({ state: "preflight" }); + expect(harness.events[3]).toMatchObject({ state: "gateway" }); + }); + + it("ignores stale compatible state results when legacy tests leave the machine behind", async () => { + const harness = createRuntimeHarness(); + const boundary = new OnboardRuntimeBoundary({ + toSessionUpdates: (updates) => filterSafeUpdates(updates as SessionUpdates) as SessionUpdates, + maybeForceE2eStepFailure: () => undefined, + createRuntime: harness.createRuntime, + }); + + await boundary.recordStateResultWithStepCompatibility(advanceTo("gateway", { metadata: { state: "preflight" } })); + + expect(harness.events).toEqual([]); + }); + it("records resume conflict diagnostics through the runtime", async () => { const harness = createRuntimeHarness(); const boundary = new OnboardRuntimeBoundary({ diff --git a/src/lib/onboard/runtime-boundary.ts b/src/lib/onboard/runtime-boundary.ts index 58a970dfdd..31a4dbbea8 100644 --- a/src/lib/onboard/runtime-boundary.ts +++ b/src/lib/onboard/runtime-boundary.ts @@ -40,6 +40,7 @@ export class OnboardRuntimeBoundary { recordRepairEvent: this.recordRepairEvent.bind(this), recordResumeConflict: this.recordResumeConflict.bind(this), recordStateResult: this.recordStateResult.bind(this), + recordStateResultWithStepCompatibility: this.recordStateResultWithStepCompatibility.bind(this), recordStepFailed: this.recordStepFailed.bind(this), recordPostVerifyStarted: this.recordPostVerifyStarted.bind(this), recordSessionComplete: this.recordSessionComplete.bind(this), @@ -90,6 +91,20 @@ export class OnboardRuntimeBoundary { return this.getRuntime().applyResult(result); } + async recordStateResultWithStepCompatibility(result: OnboardStateResult): Promise { + const runtime = this.getRuntime(); + const current = await runtime.session(); + if (result.type !== "transition") return runtime.applyResult(result); + + if (current.machine.state === result.next) return current; + + const sourceState = + result.metadata && typeof result.metadata.state === "string" ? result.metadata.state : null; + if (sourceState && current.machine.state !== sourceState) return current; + + return runtime.applyResult(result); + } + async recordResumeConflict(conflict: { field: string; recorded?: unknown; From 44009ad23b63cce9b465670d0943490eb121cfef Mon Sep 17 00:00:00 2001 From: Carlos Villela Date: Thu, 28 May 2026 11:53:44 -0700 Subject: [PATCH 17/25] refactor(onboard): allow step recording without machine transitions Signed-off-by: Carlos Villela --- src/lib/state/onboard-session.test.ts | 22 ++++++++++++++++++ src/lib/state/onboard-session.ts | 33 ++++++++++++++++++++++----- 2 files changed, 49 insertions(+), 6 deletions(-) diff --git a/src/lib/state/onboard-session.test.ts b/src/lib/state/onboard-session.test.ts index be35e8f73d..5c8b35f380 100644 --- a/src/lib/state/onboard-session.test.ts +++ b/src/lib/state/onboard-session.test.ts @@ -144,6 +144,28 @@ describe("onboard session", () => { expect(loaded.machine.state).toBe("failed"); }); + it("can record step boundaries without mutating the machine snapshot", () => { + session.saveSession(session.createSession()); + + session.markStepStarted("preflight", { updateMachine: false }); + let loaded = requireLoadedSession(session.loadSession()); + expect(loaded.steps.preflight.status).toBe("in_progress"); + expect(loaded.machine).toMatchObject({ state: "init", revision: 0 }); + + session.markStepComplete("preflight", { sandboxName: "my-assistant" }, { updateMachine: false }); + loaded = requireLoadedSession(session.loadSession()); + expect(loaded.steps.preflight.status).toBe("complete"); + expect(loaded.sandboxName).toBe("my-assistant"); + expect(loaded.machine).toMatchObject({ state: "init", revision: 0 }); + + session.markStepFailed("gateway", "Gateway failed", { updateMachine: false }); + loaded = requireLoadedSession(session.loadSession()); + expect(loaded.steps.gateway.status).toBe("failed"); + expect(loaded.status).toBe("failed"); + expect(loaded.failure).toMatchObject({ step: "gateway", message: "Gateway failed" }); + expect(loaded.machine).toMatchObject({ state: "init", revision: 0 }); + }); + it("persists a compact machine snapshot across step boundaries", () => { session.saveSession(session.createSession()); let loaded = requireLoadedSession(session.loadSession()); diff --git a/src/lib/state/onboard-session.ts b/src/lib/state/onboard-session.ts index 26cbf08353..d100f19ddb 100644 --- a/src/lib/state/onboard-session.ts +++ b/src/lib/state/onboard-session.ts @@ -1005,7 +1005,20 @@ export function updateSession(mutator: (session: Session) => Session | void): Se return saveSession(next); } -export function markStepStarted(stepName: string): Session { +export interface StepMutationOptions { + /** + * Transitional FSM migration escape hatch. The legacy step helpers own the + * durable machine snapshot by default; new runtime-driven paths can set this + * false so step status is recorded without advancing the machine. + */ + updateMachine?: boolean; +} + +function shouldUpdateMachine(options: StepMutationOptions | undefined): boolean { + return options?.updateMachine !== false; +} + +export function markStepStarted(stepName: string, options: StepMutationOptions = {}): Session { let shouldEmit = false; const updatedSession = updateSession((session) => { const step = session.steps[stepName]; @@ -1019,7 +1032,7 @@ export function markStepStarted(stepName: string): Session { session.failure = null; session.status = "in_progress"; const state = machineStateFromOnboardSessionStep(stepName); - if (state) transitionMachineSnapshot(session, state, now); + if (state && shouldUpdateMachine(options)) transitionMachineSnapshot(session, state, now); shouldEmit = true; return session; }); @@ -1031,7 +1044,11 @@ export function markStepStarted(stepName: string): Session { return updatedSession; } -export function markStepComplete(stepName: string, updates: SessionUpdates = {}): Session { +export function markStepComplete( + stepName: string, + updates: SessionUpdates = {}, + options: StepMutationOptions = {}, +): Session { const safeUpdates = filterSafeUpdates(updates); let shouldEmit = false; const updatedSession = updateSession((session) => { @@ -1045,7 +1062,7 @@ export function markStepComplete(stepName: string, updates: SessionUpdates = {}) session.failure = null; Object.assign(session, safeUpdates); const nextState = nextMachineStateAfterCompletedStep(stepName, session); - if (nextState) transitionMachineSnapshot(session, nextState, now); + if (nextState && shouldUpdateMachine(options)) transitionMachineSnapshot(session, nextState, now); shouldEmit = true; return session; }); @@ -1088,7 +1105,11 @@ export function markStepSkipped(stepName: string): Session { return updatedSession; } -export function markStepFailed(stepName: string, message: string | null = null): Session { +export function markStepFailed( + stepName: string, + message: string | null = null, + options: StepMutationOptions = {}, +): Session { let shouldEmit = false; const updatedSession = updateSession((session) => { const step = session.steps[stepName]; @@ -1103,7 +1124,7 @@ export function markStepFailed(stepName: string, message: string | null = null): recordedAt: now, }); session.status = "failed"; - transitionMachineSnapshot(session, "failed", now); + if (shouldUpdateMachine(options)) transitionMachineSnapshot(session, "failed", now); shouldEmit = true; return session; }); From cd6e5f720366a2b50a55f88a95a0b010356f6053 Mon Sep 17 00:00:00 2001 From: Carlos Villela Date: Thu, 28 May 2026 11:56:24 -0700 Subject: [PATCH 18/25] refactor(onboard): plumb step mutation options through runtime Signed-off-by: Carlos Villela --- src/lib/onboard/machine/runtime.test.ts | 17 ++++++++++++++ src/lib/onboard/machine/runtime.ts | 31 +++++++++++++++++-------- src/lib/onboard/runtime-boundary.ts | 9 +++---- 3 files changed, 43 insertions(+), 14 deletions(-) diff --git a/src/lib/onboard/machine/runtime.test.ts b/src/lib/onboard/machine/runtime.test.ts index 512ee7f56b..76ea68d4a9 100644 --- a/src/lib/onboard/machine/runtime.test.ts +++ b/src/lib/onboard/machine/runtime.test.ts @@ -123,6 +123,23 @@ describe("OnboardRuntime", () => { expect(events[1]).toMatchObject({ type: "onboard.resumed", state: "init" }); }); + it("forwards step mutation options to step recording dependencies", async () => { + const { runtime, getSession } = createHarness(); + + await runtime.markStepStarted("preflight", { updateMachine: false }); + await runtime.markStepComplete("preflight", { sandboxName: "my-assistant" }, { updateMachine: false }); + await runtime.markStepFailed("gateway", "boom", { updateMachine: false }); + + expect(getSession()).toMatchObject({ + sandboxName: "my-assistant", + status: "failed", + steps: { + preflight: { status: "complete" }, + gateway: { status: "failed" }, + }, + }); + }); + it("validates and persists explicit transitions", async () => { const { runtime, events, getSession } = createHarness(); diff --git a/src/lib/onboard/machine/runtime.ts b/src/lib/onboard/machine/runtime.ts index 47cee9f0d2..8ff35afbb4 100644 --- a/src/lib/onboard/machine/runtime.ts +++ b/src/lib/onboard/machine/runtime.ts @@ -3,7 +3,7 @@ import type { JsonObject } from "../../core/json-types"; import * as onboardSession from "../../state/onboard-session"; -import type { Session, SessionUpdates } from "../../state/onboard-session"; +import type { Session, SessionUpdates, StepMutationOptions } from "../../state/onboard-session"; import { createOnboardMachineEvent, emitOnboardMachineEvent, @@ -22,10 +22,10 @@ export interface OnboardRuntimeDeps { createSession(overrides?: Partial): Session; saveSession(session: Session): Session; updateSession(mutator: (session: Session) => Session | void): Session; - markStepStarted(stepName: string): Session; - markStepComplete(stepName: string, updates?: SessionUpdates): Session; + markStepStarted(stepName: string, options?: StepMutationOptions): Session; + markStepComplete(stepName: string, updates?: SessionUpdates, options?: StepMutationOptions): Session; markStepSkipped(stepName: string): Session; - markStepFailed(stepName: string, message?: string | null): Session; + markStepFailed(stepName: string, message?: string | null, options?: StepMutationOptions): Session; completeSession(updates?: SessionUpdates): Session; filterSafeUpdates(updates: SessionUpdates): Partial; emitEvent(event: OnboardMachineEvent): void; @@ -102,20 +102,31 @@ export class OnboardRuntime { return session; } - async markStepStarted(stepName: string): Promise { - return this.deps.markStepStarted(stepName); + async markStepStarted( + stepName: string, + options: StepMutationOptions = {}, + ): Promise { + return this.deps.markStepStarted(stepName, options); } - async markStepComplete(stepName: string, updates: SessionUpdates = {}): Promise { - return this.deps.markStepComplete(stepName, updates); + async markStepComplete( + stepName: string, + updates: SessionUpdates = {}, + options: StepMutationOptions = {}, + ): Promise { + return this.deps.markStepComplete(stepName, updates, options); } async markStepSkipped(stepName: string): Promise { return this.deps.markStepSkipped(stepName); } - async markStepFailed(stepName: string, message: string | null = null): Promise { - return this.deps.markStepFailed(stepName, message); + async markStepFailed( + stepName: string, + message: string | null = null, + options: StepMutationOptions = {}, + ): Promise { + return this.deps.markStepFailed(stepName, message, options); } async completeSession(updates: SessionUpdates = {}): Promise { diff --git a/src/lib/onboard/runtime-boundary.ts b/src/lib/onboard/runtime-boundary.ts index 31a4dbbea8..609a610b91 100644 --- a/src/lib/onboard/runtime-boundary.ts +++ b/src/lib/onboard/runtime-boundary.ts @@ -1,7 +1,7 @@ // SPDX-FileCopyrightText: Copyright (c) 2026 NVIDIA CORPORATION & AFFILIATES. All rights reserved. // SPDX-License-Identifier: Apache-2.0 -import type { Session, SessionUpdates } from "../state/onboard-session"; +import type { Session, SessionUpdates, StepMutationOptions } from "../state/onboard-session"; import type { OnboardStateResult } from "./machine/result"; import { OnboardRuntime } from "./machine/runtime"; import type { OnboardMachineEventType, OnboardMachineState } from "./machine/types"; @@ -10,6 +10,7 @@ export interface OnboardRuntimeBoundaryOptions { toSessionUpdates(updates: Record): SessionUpdates; maybeForceE2eStepFailure(stepName: string): void; createRuntime?(): OnboardRuntime; + stepMutationOptions?: StepMutationOptions; } export class OnboardRuntimeBoundary { @@ -61,7 +62,7 @@ export class OnboardRuntimeBoundary { } = {}, ): Promise { const runtime = this.getRuntime(); - await runtime.markStepStarted(stepName); + await runtime.markStepStarted(stepName, this.options.stepMutationOptions); if (Object.keys(updates).length > 0) { await runtime.updateContext(this.options.toSessionUpdates(updates)); } @@ -69,7 +70,7 @@ export class OnboardRuntimeBoundary { } async recordStepComplete(stepName: string, updates: SessionUpdates = {}): Promise { - return this.getRuntime().markStepComplete(stepName, updates); + return this.getRuntime().markStepComplete(stepName, updates, this.options.stepMutationOptions); } async recordStepSkipped(stepName: string): Promise { @@ -77,7 +78,7 @@ export class OnboardRuntimeBoundary { } async recordStepFailed(stepName: string, message: string | null): Promise { - return this.getRuntime().markStepFailed(stepName, message); + return this.getRuntime().markStepFailed(stepName, message, this.options.stepMutationOptions); } async recordStateSkipped( From e266e3b53d9147b88c8a5bcf448583119206c379 Mon Sep 17 00:00:00 2001 From: Carlos Villela Date: Thu, 28 May 2026 12:24:22 -0700 Subject: [PATCH 19/25] refactor(onboard): add record-only FSM runner adapter Signed-off-by: Carlos Villela --- .../machine/record-only-runner.test.ts | 148 ++++++++++++++++++ src/lib/onboard/machine/record-only-runner.ts | 54 +++++++ 2 files changed, 202 insertions(+) create mode 100644 src/lib/onboard/machine/record-only-runner.test.ts create mode 100644 src/lib/onboard/machine/record-only-runner.ts diff --git a/src/lib/onboard/machine/record-only-runner.test.ts b/src/lib/onboard/machine/record-only-runner.test.ts new file mode 100644 index 0000000000..968132506e --- /dev/null +++ b/src/lib/onboard/machine/record-only-runner.test.ts @@ -0,0 +1,148 @@ +// SPDX-FileCopyrightText: Copyright (c) 2026 NVIDIA CORPORATION & AFFILIATES. All rights reserved. +// SPDX-License-Identifier: Apache-2.0 + +import { describe, expect, it } from "vitest"; + +import { + createSession, + filterSafeUpdates, + normalizeSession, + type Session, + type SessionUpdates, + type StepMutationOptions, +} from "../../state/onboard-session"; +import type { OnboardMachineEvent } from "./events"; +import { advanceTo, branchTo, completeOnboardMachine } from "./result"; +import { OnboardRuntime, type OnboardRuntimeDeps } from "./runtime"; +import { + createRecordOnlyOnboardRuntimeBoundary, + runOnboardMachineWithRecordOnlySteps, +} from "./record-only-runner"; + +function cloneSession(session: Session): Session { + return normalizeSession(JSON.parse(JSON.stringify(session))) ?? session; +} + +function createHarness() { + let session = createSession(); + const events: OnboardMachineEvent[] = []; + + const updateSession = (mutator: (value: Session) => Session | void): Session => { + session = cloneSession(mutator(cloneSession(session)) ?? session); + return cloneSession(session); + }; + const maybeLegacyTransition = (state: Session["machine"]["state"], options?: StepMutationOptions) => { + if (options?.updateMachine === false) return; + session.machine = { + version: 1, + state, + stateEnteredAt: "legacy-step-transition", + revision: session.machine.revision + 1, + }; + }; + + const deps: OnboardRuntimeDeps = { + loadSession: () => cloneSession(session), + createSession, + saveSession: (next) => { + session = cloneSession(next); + return cloneSession(session); + }, + updateSession, + markStepStarted: (stepName: string, options?: StepMutationOptions) => + updateSession((current) => { + current.steps[stepName].status = "in_progress"; + if (stepName === "preflight") maybeLegacyTransition("preflight", options); + if (stepName === "gateway") maybeLegacyTransition("gateway", options); + return current; + }), + markStepComplete: (stepName: string, updates: SessionUpdates = {}, options?: StepMutationOptions) => + updateSession((current) => { + current.steps[stepName].status = "complete"; + Object.assign(current, filterSafeUpdates(updates)); + if (stepName === "preflight") maybeLegacyTransition("gateway", options); + if (stepName === "gateway") maybeLegacyTransition("provider_selection", options); + return current; + }), + markStepSkipped: (stepName) => + updateSession((current) => { + current.steps[stepName].status = "skipped"; + return current; + }), + markStepFailed: (stepName, message) => + updateSession((current) => { + current.steps[stepName].status = "failed"; + current.failure = { step: stepName, message: message ?? null, recordedAt: "now" }; + return current; + }), + completeSession: (updates: SessionUpdates = {}) => + updateSession((current) => { + Object.assign(current, filterSafeUpdates(updates)); + current.status = "complete"; + return current; + }), + filterSafeUpdates, + emitEvent: (event) => events.push(event), + now: () => "2026-05-28T00:00:00.000Z", + }; + + return { + events, + getSession: () => cloneSession(session), + boundary: createRecordOnlyOnboardRuntimeBoundary({ + toSessionUpdates: (updates) => filterSafeUpdates(updates as SessionUpdates) as SessionUpdates, + maybeForceE2eStepFailure: () => undefined, + createRuntime: () => new OnboardRuntime(deps), + }), + }; +} + +describe("record-only onboard runner", () => { + it("lets handlers record steps while the runner owns machine transitions", async () => { + const harness = createHarness(); + const recorders = harness.boundary.recorders(); + + const result = await runOnboardMachineWithRecordOnlySteps({ + boundary: harness.boundary, + context: { visited: [] as string[] }, + handlers: { + init: () => advanceTo("preflight"), + preflight: async () => { + await recorders.startRecordedStep("preflight"); + expect(harness.getSession().machine.state).toBe("preflight"); + await recorders.recordStepComplete("preflight"); + expect(harness.getSession().machine.state).toBe("preflight"); + return advanceTo("gateway"); + }, + gateway: async () => { + await recorders.startRecordedStep("gateway"); + expect(harness.getSession().machine.state).toBe("gateway"); + await recorders.recordStepComplete("gateway"); + expect(harness.getSession().machine.state).toBe("gateway"); + return advanceTo("provider_selection"); + }, + provider_selection: () => advanceTo("inference"), + inference: () => advanceTo("sandbox"), + sandbox: () => branchTo("openclaw"), + openclaw: () => advanceTo("policies"), + policies: () => advanceTo("finalizing"), + finalizing: () => advanceTo("post_verify"), + post_verify: () => completeOnboardMachine({ sandboxName: "my-assistant" }), + }, + updateContext: ({ context, state }) => ({ visited: [...context.visited, state] }), + }); + + expect(result.session).toMatchObject({ + status: "complete", + sandboxName: "my-assistant", + machine: { state: "complete" }, + steps: { + preflight: { status: "complete" }, + gateway: { status: "complete" }, + }, + }); + expect(result.context.visited).toContain("preflight"); + expect(result.context.visited).toContain("gateway"); + expect(harness.events.map((event) => event.type)).toContain("onboard.started"); + }); +}); diff --git a/src/lib/onboard/machine/record-only-runner.ts b/src/lib/onboard/machine/record-only-runner.ts new file mode 100644 index 0000000000..9490d76326 --- /dev/null +++ b/src/lib/onboard/machine/record-only-runner.ts @@ -0,0 +1,54 @@ +// SPDX-FileCopyrightText: Copyright (c) 2026 NVIDIA CORPORATION & AFFILIATES. All rights reserved. +// SPDX-License-Identifier: Apache-2.0 + +import type { StepMutationOptions } from "../../state/onboard-session"; +import { OnboardRuntimeBoundary, type OnboardRuntimeBoundaryOptions } from "../runtime-boundary"; +import { + runOnboardMachine, + type OnboardMachineRunnerOptions, + type OnboardMachineRunnerResult, +} from "./runner"; + +export type RecordOnlyOnboardRuntimeBoundaryOptions = Omit< + OnboardRuntimeBoundaryOptions, + "stepMutationOptions" +> & { + stepMutationOptions?: Omit; +}; + +export interface RecordOnlyOnboardMachineRunnerOptions + extends Omit, "runtime"> { + boundary: OnboardRuntimeBoundary; + resumed?: boolean; + emitLifecycleEvent?: boolean; +} + +export function createRecordOnlyOnboardRuntimeBoundary( + options: RecordOnlyOnboardRuntimeBoundaryOptions, +): OnboardRuntimeBoundary { + return new OnboardRuntimeBoundary({ + ...options, + stepMutationOptions: { ...options.stepMutationOptions, updateMachine: false }, + }); +} + +/** + * Run the FSM with step recorders configured for status-only mutations. + * + * This is the adapter path for the post-legacy architecture: handlers may keep + * using step boundary helpers for resumability, but those helpers do not move + * `session.machine`; the runner applies every machine transition explicitly via + * `OnboardRuntime.applyResult()`. + */ +export async function runOnboardMachineWithRecordOnlySteps({ + boundary, + resumed = false, + emitLifecycleEvent = true, + ...options +}: RecordOnlyOnboardMachineRunnerOptions): Promise> { + if (emitLifecycleEvent) await boundary.recordOnboardStarted(resumed); + return runOnboardMachine({ + ...options, + runtime: boundary.getRuntime(), + }); +} From bf4da0bf0474ee788232b00301066a6b0da95a0b Mon Sep 17 00:00:00 2001 From: Carlos Villela Date: Thu, 28 May 2026 15:20:10 -0700 Subject: [PATCH 20/25] refactor(onboard): return ordered provider FSM results Signed-off-by: Carlos Villela --- .../handlers/provider-inference.test.ts | 16 ++++++++ .../machine/handlers/provider-inference.ts | 37 ++++++++++++------- 2 files changed, 40 insertions(+), 13 deletions(-) diff --git a/src/lib/onboard/machine/handlers/provider-inference.test.ts b/src/lib/onboard/machine/handlers/provider-inference.test.ts index 2865973de1..49cc89d6d2 100644 --- a/src/lib/onboard/machine/handlers/provider-inference.test.ts +++ b/src/lib/onboard/machine/handlers/provider-inference.test.ts @@ -165,6 +165,16 @@ describe("handleProviderInferenceState", () => { metadata: { state: "inference", provider: "nvidia-prod", model: "nvidia/test" }, }); expect(result.retryStateResults).toEqual([]); + expect(result.stateResults).toEqual([ + { + type: "transition", + next: "inference", + transitionKind: "advance", + updates: undefined, + metadata: { state: "provider_selection", provider: "nvidia-prod", model: "nvidia/test" }, + }, + result.stateResult, + ]); }); it("clears non-NVIDIA provider credentials when inference setup fails", async () => { @@ -370,6 +380,12 @@ describe("handleProviderInferenceState", () => { }, ]); expect(result.stateResult).toMatchObject({ next: "sandbox", transitionKind: "advance" }); + expect(result.stateResults.map((stateResult) => [stateResult.next, stateResult.transitionKind])).toEqual([ + ["inference", "advance"], + ["provider_selection", "retry"], + ["inference", "advance"], + ["sandbox", "advance"], + ]); }); it("aborts before inference setup when the configuration summary is rejected", async () => { diff --git a/src/lib/onboard/machine/handlers/provider-inference.ts b/src/lib/onboard/machine/handlers/provider-inference.ts index 1a90147d2a..15d49d8d2c 100644 --- a/src/lib/onboard/machine/handlers/provider-inference.ts +++ b/src/lib/onboard/machine/handlers/provider-inference.ts @@ -122,6 +122,7 @@ export interface ProviderInferenceStateResult { webSearchConfig: WebSearchConfig | null; session: Session | null; stateResult: OnboardStateTransitionResult; + stateResults: OnboardStateTransitionResult[]; retryStateResults: OnboardStateTransitionResult[]; } @@ -172,6 +173,7 @@ export async function handleProviderInferenceState({ const webSearchConfig = initial.webSearchConfig; let forceProviderSelection = initialForceProviderSelection; let allowToolsIncompatible = false; + const stateResults: OnboardStateTransitionResult[] = []; const retryStateResults: OnboardStateTransitionResult[] = []; while (true) { @@ -256,6 +258,11 @@ export async function handleProviderInferenceState({ }), ); } + stateResults.push( + advanceTo("inference", { + metadata: { state: "provider_selection", provider, model }, + }), + ); env.NEMOCLAW_OPENSHELL_BIN = deps.getOpenshellBinary(); const needsBedrockRuntimeAdapter = deps.needsBedrockRuntimeAdapter(provider, endpointUrl); const resumeInference = @@ -292,11 +299,11 @@ export async function handleProviderInferenceState({ clearStagedCredentialEnv(deps, credentialEnv); } if (inferenceResult?.retry === "selection") { - retryStateResults.push( - retryTo("provider_selection", { - metadata: { state: "inference", provider, model, reason: "selection_retry" }, - }), - ); + const retryStateResult = retryTo("provider_selection", { + metadata: { state: "inference", provider, model, reason: "selection_retry" }, + }); + retryStateResults.push(retryStateResult); + stateResults.push(retryStateResult); forceProviderSelection = true; continue; } @@ -381,11 +388,11 @@ export async function handleProviderInferenceState({ clearStagedCredentialEnv(deps, credentialEnv); } if (inferenceResult?.retry === "selection") { - retryStateResults.push( - retryTo("provider_selection", { - metadata: { state: "inference", provider, model, reason: "selection_retry" }, - }), - ); + const retryStateResult = retryTo("provider_selection", { + metadata: { state: "inference", provider, model, reason: "selection_retry" }, + }); + retryStateResults.push(retryStateResult); + stateResults.push(retryStateResult); forceProviderSelection = true; continue; } @@ -397,6 +404,11 @@ export async function handleProviderInferenceState({ break; } + const stateResult = advanceTo("sandbox", { + metadata: { state: "inference", provider, model }, + }); + stateResults.push(stateResult); + return { sandboxName, model, @@ -409,9 +421,8 @@ export async function handleProviderInferenceState({ nimContainer, webSearchConfig, session, - stateResult: advanceTo("sandbox", { - metadata: { state: "inference", provider, model }, - }), + stateResult, + stateResults, retryStateResults, }; } From 212ff4d677480ea21e8d18fe4fc51341c17eaa42 Mon Sep 17 00:00:00 2001 From: Carlos Villela Date: Thu, 28 May 2026 15:22:41 -0700 Subject: [PATCH 21/25] refactor(onboard): run live sequence with record-only steps Signed-off-by: Carlos Villela --- src/lib/onboard.ts | 9 ++++++++- 1 file changed, 8 insertions(+), 1 deletion(-) diff --git a/src/lib/onboard.ts b/src/lib/onboard.ts index 621a0f8aa1..687da4235d 100644 --- a/src/lib/onboard.ts +++ b/src/lib/onboard.ts @@ -419,6 +419,7 @@ const { handlePoliciesState }: typeof import("./onboard/machine/handlers/policie const { handlePreflightState }: typeof import("./onboard/machine/handlers/preflight") = require("./onboard/machine/handlers/preflight"); const { handleProviderInferenceState }: typeof import("./onboard/machine/handlers/provider-inference") = require("./onboard/machine/handlers/provider-inference"); const { handleSandboxState }: typeof import("./onboard/machine/handlers/sandbox") = require("./onboard/machine/handlers/sandbox"); +const { advanceTo }: typeof import("./onboard/machine/result") = require("./onboard/machine/result"); const { getOnboardProgressStep }: typeof import("./onboard/machine/progress") = require("./onboard/machine/progress"); const policies: typeof import("./policy") = require("./policy"); const tiers: typeof import("./policy/tiers") = require("./policy/tiers"); @@ -6380,6 +6381,7 @@ const onboardRuntimeBoundary = new OnboardRuntimeBoundary({ toSessionUpdates: (updates: Record) => toSessionUpdates(updates as Parameters[0]), maybeForceE2eStepFailure, + stepMutationOptions: { updateMachine: false }, }); const recordOnboardStarted = onboardRuntimeBoundary.recordOnboardStarted.bind(onboardRuntimeBoundary); @@ -6680,6 +6682,9 @@ async function onboard(opts: OnboardOptions = {}): Promise { } await recordOnboardStarted(resume); + await recordStateResultWithStepCompatibility( + advanceTo("preflight", { metadata: { state: "init" } }), + ); // Backstop for the resume path: a session may exist (so the early guard // skipped because resume === true) but never have recorded a sandboxName @@ -6946,7 +6951,9 @@ async function onboard(opts: OnboardOptions = {}): Promise { }, }, }); - await recordStateResultWithStepCompatibility(providerInferenceResult.stateResult); + for (const stateResult of providerInferenceResult.stateResults) { + await recordStateResultWithStepCompatibility(stateResult); + } session = providerInferenceResult.session; sandboxName = providerInferenceResult.sandboxName; const { From 11440c45d73c3973778a94d1b9122facb5c81a7b Mon Sep 17 00:00:00 2001 From: Carlos Villela Date: Sun, 7 Jun 2026 14:38:37 -0700 Subject: [PATCH 22/25] chore(onboard): keep entrypoint net-neutral Signed-off-by: Carlos Villela --- src/lib/onboard.ts | 13 ++----------- 1 file changed, 2 insertions(+), 11 deletions(-) diff --git a/src/lib/onboard.ts b/src/lib/onboard.ts index de55e891a9..60516766d4 100644 --- a/src/lib/onboard.ts +++ b/src/lib/onboard.ts @@ -90,9 +90,7 @@ const { setupMessagingChannels: setupMessagingChannelsImpl, readMessagingPlanFro const { clearAgentScopedResumeState, }: typeof import("./onboard/agent-resume-state") = require("./onboard/agent-resume-state"); -const { - repairResumeMachineSnapshot, -}: typeof import("./onboard/resume-machine-repair") = require("./onboard/resume-machine-repair"); +const { repairResumeMachineSnapshot }: typeof import("./onboard/resume-machine-repair") = require("./onboard/resume-machine-repair"); const { stopTrackedModelRouterForAgentChange, }: typeof import("./onboard/model-router-process") = require("./onboard/model-router-process"); @@ -561,9 +559,7 @@ const RESET = USE_COLOR ? "\x1b[0m" : ""; let OPENSHELL_BIN: string | null = null; const GATEWAY_NAME = gatewayBinding.resolveGatewayName(GATEWAY_PORT); -import type { - JsonObject as LooseObject, -} from "./core/json-types"; +import type { JsonObject as LooseObject } from "./core/json-types"; type OnboardOptions = { nonInteractive?: boolean; @@ -691,7 +687,6 @@ const selectOnboardAgent = createSelectOnboardAgent({ note, }); - const { getTransportRecoveryMessage } = validationRecovery; // Validation functions — delegated to src/lib/validation.ts @@ -863,7 +858,6 @@ const verifyDirectSandboxGpu = sandboxGpuPreflight.createDirectSandboxGpuVerifie redact, }); - function upsertMessagingProviders( tokenDefs: MessagingTokenDef[], options: { replaceExisting?: boolean } = {}, @@ -953,7 +947,6 @@ const { isAffirmativeAnswer, }); - const { ensureValidatedBraveSearchCredential, configureWebSearch, @@ -966,7 +959,6 @@ const { runCaptureOpenshell, }); - // getSandboxInferenceConfig — moved to onboard-providers.ts // Inference probes — moved to inference/onboard-probes.ts @@ -991,7 +983,6 @@ const { promptValidationRecovery, }); - const { promptCloudModel, promptRemoteModel, promptInputModel } = modelPrompts; const { validateAnthropicModel, validateOpenAiLikeModel } = providerModels; const nousModels: typeof import("./inference/nous-models") = require("./inference/nous-models"); From f5c0a22f044987e42d028e2e8bf50d82738a980b Mon Sep 17 00:00:00 2001 From: Carlos Villela Date: Sun, 7 Jun 2026 14:41:44 -0700 Subject: [PATCH 23/25] fix(onboard): remove unused resume session assignment Signed-off-by: Carlos Villela --- src/lib/onboard.ts | 1 - 1 file changed, 1 deletion(-) diff --git a/src/lib/onboard.ts b/src/lib/onboard.ts index 60516766d4..b31d0f12f2 100644 --- a/src/lib/onboard.ts +++ b/src/lib/onboard.ts @@ -6410,7 +6410,6 @@ async function onboard(opts: OnboardOptions = {}): Promise { }, }); await onboardRuntimeBoundary.recordStateResultsWithStepCompatibility(providerInferenceResult.stateResults); - session = providerInferenceResult.session; sandboxName = providerInferenceResult.sandboxName; const { model, From 5285792d63a953d68dbb170eff84d96a40c7fec8 Mon Sep 17 00:00:00 2001 From: Carlos Villela Date: Sun, 7 Jun 2026 14:50:20 -0700 Subject: [PATCH 24/25] test(onboard): cover failed resume record-only sequence Signed-off-by: Carlos Villela --- src/lib/onboard/resume-machine-repair.test.ts | 104 ++++++++++++++++++ src/lib/onboard/resume-machine-repair.ts | 15 +++ 2 files changed, 119 insertions(+) diff --git a/src/lib/onboard/resume-machine-repair.test.ts b/src/lib/onboard/resume-machine-repair.test.ts index 848c47b5f7..20ea0da79b 100644 --- a/src/lib/onboard/resume-machine-repair.test.ts +++ b/src/lib/onboard/resume-machine-repair.test.ts @@ -5,13 +5,19 @@ import { describe, expect, it } from "vitest"; import { createSession, + filterSafeUpdates, MACHINE_SNAPSHOT_VERSION, + normalizeSession, type Session, + type SessionUpdates, } from "../state/onboard-session"; +import { advanceTo, branchTo } from "./machine/result"; +import { OnboardRuntime, type OnboardRuntimeDeps } from "./machine/runtime"; import { repairResumeMachineSnapshot, resumeMachineState, } from "./resume-machine-repair"; +import { OnboardRuntimeBoundary } from "./runtime-boundary"; function createFailedSession(mutator: (session: Session) => void): Session { const session = createSession({ @@ -32,6 +38,73 @@ function createFailedSession(mutator: (session: Session) => void): Session { return session; } +function cloneSession(session: Session): Session { + return normalizeSession(JSON.parse(JSON.stringify(session))) ?? session; +} + +function createBoundaryHarness(initial: Session) { + let session = cloneSession(initial); + const updateSession = (mutator: (value: Session) => Session | void): Session => { + const current = cloneSession(session); + session = cloneSession(mutator(current) ?? current); + return cloneSession(session); + }; + const deps: OnboardRuntimeDeps = { + loadSession: () => cloneSession(session), + createSession, + saveSession: (next) => { + session = cloneSession(next); + return cloneSession(session); + }, + updateSession, + markStepStarted: () => cloneSession(session), + markStepComplete: (_stepName, updates: SessionUpdates = {}) => + updateSession((current) => Object.assign(current, filterSafeUpdates(updates))), + markStepCompleteRecordOnly: (_stepName, updates: SessionUpdates = {}) => + updateSession((current) => Object.assign(current, filterSafeUpdates(updates))), + markStepSkipped: () => cloneSession(session), + markStepFailed: () => cloneSession(session), + markStepFailedRecordOnly: () => cloneSession(session), + completeSession: (updates: SessionUpdates = {}) => + updateSession((current) => { + Object.assign(current, filterSafeUpdates(updates)); + current.status = "complete"; + current.resumable = false; + return current; + }), + filterSafeUpdates, + emitEvent: () => undefined, + now: () => "2026-06-01T00:02:00.000Z", + }; + const boundary = new OnboardRuntimeBoundary({ + toSessionUpdates: (updates) => filterSafeUpdates(updates as SessionUpdates) as SessionUpdates, + maybeForceE2eStepFailure: () => undefined, + createRuntime: () => new OnboardRuntime(deps), + stepMutationOptions: { updateMachine: false }, + }); + return { boundary, getSession: () => cloneSession(session) }; +} + +async function runRecordOnlyResumeSequence(initial: Session): Promise { + repairResumeMachineSnapshot(initial, "2026-06-01T00:01:00.000Z"); + initial.failure = null; + initial.status = "in_progress"; + const { boundary, getSession } = createBoundaryHarness(initial); + await boundary.recordOnboardStarted(true); + await boundary.recordStateResultsWithStepCompatibility([ + advanceTo("preflight", { metadata: { state: "init" } }), + advanceTo("gateway", { metadata: { state: "preflight" } }), + advanceTo("provider_selection", { metadata: { state: "gateway" } }), + advanceTo("inference", { metadata: { state: "provider_selection" } }), + advanceTo("sandbox", { metadata: { state: "inference" } }), + branchTo("openclaw", { metadata: { state: "sandbox" } }), + advanceTo("policies", { metadata: { state: "openclaw" } }), + advanceTo("finalizing", { metadata: { state: "policies" } }), + ]); + await boundary.recordSessionComplete(); + return getSession(); +} + describe("resume machine repair", () => { it("resumes a failed preflight session from preflight", () => { const session = createFailedSession((current) => { @@ -101,4 +174,35 @@ describe("resume machine repair", () => { revision: 3, }); }); + + it.each([ + ["preflight", "preflight", null], + ["gateway", "gateway", "preflight"], + ["inference", "inference", "provider_selection"], + ] as const)( + "lets record-only resume complete from failed %s", + async (_name, failedStep, completedStep) => { + const session = createFailedSession((current) => { + current.failure = { + step: failedStep, + message: `${failedStep} failed`, + recordedAt: "2026-06-01T00:00:00.000Z", + }; + current.lastStepStarted = failedStep; + current.steps[failedStep].status = "failed"; + if (completedStep) { + current.lastCompletedStep = completedStep; + current.steps[completedStep].status = "complete"; + } + }); + + const completed = await runRecordOnlyResumeSequence(session); + + expect(completed).toMatchObject({ + status: "complete", + failure: null, + machine: { state: "complete" }, + }); + }, + ); }); diff --git a/src/lib/onboard/resume-machine-repair.ts b/src/lib/onboard/resume-machine-repair.ts index d66be4f894..8879abc7f7 100644 --- a/src/lib/onboard/resume-machine-repair.ts +++ b/src/lib/onboard/resume-machine-repair.ts @@ -9,6 +9,10 @@ import { nextMachineStateAfterCompletedStep } from "../state/onboard-step-state" import { machineStateFromOnboardSessionStep } from "./machine/events"; import type { OnboardMachineState } from "./machine/types"; +/** + * Reads the legacy step-level source of truth for interrupted sessions whose + * durable FSM snapshot was already collapsed to the terminal failed state. + */ function activeStepMachineState(session: Session): OnboardMachineState | null { const failedStepName = session.failure?.step ?? null; const failedStep = failedStepName ? session.steps[failedStepName] : null; @@ -30,6 +34,9 @@ function activeStepMachineState(session: Session): OnboardMachineState | null { return null; } +/** + * Computes the nonterminal state where a failed durable session should resume. + */ export function resumeMachineState(session: Session): OnboardMachineState { return activeStepMachineState(session) ?? nextMachineStateAfterCompletedStep( session.lastCompletedStep, @@ -37,6 +44,14 @@ export function resumeMachineState(session: Session): OnboardMachineState { ) ?? "init"; } +/** + * Repairs the legacy failed-session/FSM boundary during --resume. + * + * Source fix constraint: failed -> resume is not a modeled FSM transition yet, + * and legacy step fields still act as the secondary durable source for resume. + * Remove this bridge once failed-session recovery is represented by explicit + * FSM recovery results or step fields stop being used to derive resume state. + */ export function repairResumeMachineSnapshot( session: Session, stateEnteredAt = new Date().toISOString(), From e0e31628676f9d36d9fb87d00c5a2dbb20f25a07 Mon Sep 17 00:00:00 2001 From: Carlos Villela Date: Sun, 7 Jun 2026 15:56:40 -0700 Subject: [PATCH 25/25] test(onboard): document resume repair harness Signed-off-by: Carlos Villela --- src/lib/onboard/resume-machine-repair.test.ts | 12 ++++++++++++ 1 file changed, 12 insertions(+) diff --git a/src/lib/onboard/resume-machine-repair.test.ts b/src/lib/onboard/resume-machine-repair.test.ts index 20ea0da79b..f94ea0c1e6 100644 --- a/src/lib/onboard/resume-machine-repair.test.ts +++ b/src/lib/onboard/resume-machine-repair.test.ts @@ -19,6 +19,9 @@ import { } from "./resume-machine-repair"; import { OnboardRuntimeBoundary } from "./runtime-boundary"; +/** + * Builds a failed durable session while letting each test set the interrupted step. + */ function createFailedSession(mutator: (session: Session) => void): Session { const session = createSession({ machine: { @@ -38,10 +41,16 @@ function createFailedSession(mutator: (session: Session) => void): Session { return session; } +/** + * Round-trips sessions through normalization to match persisted runtime state. + */ function cloneSession(session: Session): Session { return normalizeSession(JSON.parse(JSON.stringify(session))) ?? session; } +/** + * Creates a memory-backed runtime boundary with record-only step mutations. + */ function createBoundaryHarness(initial: Session) { let session = cloneSession(initial); const updateSession = (mutator: (value: Session) => Session | void): Session => { @@ -85,6 +94,9 @@ function createBoundaryHarness(initial: Session) { return { boundary, getSession: () => cloneSession(session) }; } +/** + * Replays the live resume sequence from failed snapshot repair through completion. + */ async function runRecordOnlyResumeSequence(initial: Session): Promise { repairResumeMachineSnapshot(initial, "2026-06-01T00:01:00.000Z"); initial.failure = null;