Skip to content
26 changes: 16 additions & 10 deletions src/lib/agent/onboard.ts
Original file line number Diff line number Diff line change
Expand Up @@ -13,7 +13,6 @@ import { dockerBuild, dockerImageInspect } from "../adapters/docker";
import { getAgentBranding } from "../cli/branding";
import { getProviderSelectionConfig } from "../inference/config";
import type { JsonObject as LooseObject } from "../core/json-types";
import * as onboardSession from "../state/onboard-session";
import { runSandboxConfigSync } from "../onboard/config-sync";
import { ROOT, redact, run, shellQuote } from "../runner";
import {
Expand All @@ -29,7 +28,9 @@ export interface OnboardContext {
runCaptureOpenshell: (args: string[], opts?: { ignoreError?: boolean }) => string | null;
openshellShellCommand: (args: string[], options?: { openshellBinary?: string }) => string;
openshellBinary: string;
startRecordedStep: (stepName: string, updates: LooseObject) => void;
startRecordedStep: (stepName: string, updates: LooseObject) => Promise<void>;
recordStepComplete: (stepName: string, updates: LooseObject) => Promise<unknown>;
recordStepFailed: (stepName: string, message: string | null) => Promise<unknown>;
skippedStepMessage: (stepName: string, sandboxName: string) => void;
}

Expand Down Expand Up @@ -348,13 +349,14 @@ export function collectHermesStartupDiagnostics(
/**
* Record and print an agent setup failure before exiting the onboarding flow.
*/
function failAgentSetup(
async function failAgentSetup(
sandboxName: string,
agent: AgentDefinition,
message: string,
recordStepFailed: OnboardContext["recordStepFailed"],
details: string[] = [],
): never {
onboardSession.markStepFailed(
): Promise<never> {
await recordStepFailed(
"agent_setup",
details.length > 0 ? `${message}\n${details.join("\n")}` : message,
);
Expand Down Expand Up @@ -401,6 +403,8 @@ export async function handleAgentSetup(
runCaptureOpenshell,
openshellBinary: openshellBin,
startRecordedStep,
recordStepComplete,
recordStepFailed,
skippedStepMessage,
} = ctx;

Expand Down Expand Up @@ -433,21 +437,22 @@ export async function handleAgentSetup(
// to the Dockerfile's zero-byte placeholder. Mirrors the OpenClaw
// path in src/lib/onboard.ts. Fixes #3999 for non-OpenClaw agents.
syncNemoClawConfig();
onboardSession.markStepComplete("agent_setup", { sandboxName, provider, model });
await recordStepComplete("agent_setup", { sandboxName, provider, model });
return;
}
}
}

startRecordedStep("agent_setup", { sandboxName, provider, model });
await startRecordedStep("agent_setup", { sandboxName, provider, model });
step(7, 8, `Setting up ${agent.displayName} inside sandbox`);

const binaryAvailability = verifyAgentBinaryAvailable(sandboxName, agent, runCaptureOpenshell);
if (!binaryAvailability.available) {
failAgentSetup(
await failAgentSetup(
sandboxName,
agent,
describeAgentBinaryFailure(sandboxName, agent, binaryAvailability),
recordStepFailed,
);
}

Expand Down Expand Up @@ -478,18 +483,19 @@ export async function handleAgentSetup(
agent.name === "hermes"
? collectHermesStartupDiagnostics(sandboxName, runCaptureOpenshell)
: [];
failAgentSetup(
await failAgentSetup(
sandboxName,
agent,
`${agent.displayName} gateway did not respond within ${timeoutSecs}s`,
recordStepFailed,
diagnostics,
);
}
} else {
console.log(` \u2713 ${agent.displayName} configured inside sandbox`);
}

onboardSession.markStepComplete("agent_setup", { sandboxName, provider, model });
await recordStepComplete("agent_setup", { sandboxName, provider, model });
}

/**
Expand Down
83 changes: 38 additions & 45 deletions src/lib/onboard.ts
Original file line number Diff line number Diff line change
Expand Up @@ -293,6 +293,7 @@ const { resolveSandboxImageTagFromCreateOutput } =
require("./domain/sandbox/image-tag") as typeof import("./domain/sandbox/image-tag");
const nim: typeof import("./inference/nim") = require("./inference/nim");
const onboardSession: typeof import("./state/onboard-session") = require("./state/onboard-session");
const { OnboardRuntimeBoundary }: typeof import("./onboard/runtime-boundary") = require("./onboard/runtime-boundary");
const policies: typeof import("./policy") = require("./policy");
const tiers: typeof import("./policy/tiers") = require("./policy/tiers");
const { ensureUsageNoticeConsent } = require("./onboard/usage-notice");
Expand Down Expand Up @@ -8915,27 +8916,15 @@ function toSessionUpdates(
return normalized;
}

function startRecordedStep(
stepName: string,
updates: {
sandboxName?: string | null;
provider?: string | null;
model?: string | null;
policyPresets?: string[] | null;
} = {},
): void {
onboardSession.markStepStarted(stepName);
if (Object.keys(updates).length > 0) {
onboardSession.updateSession((session: Session) => {
if (updates.sandboxName !== undefined) session.sandboxName = updates.sandboxName;
if (updates.provider !== undefined) session.provider = updates.provider;
if (updates.model !== undefined) session.model = updates.model;
if (updates.policyPresets !== undefined) session.policyPresets = updates.policyPresets;
return session;
});
}
maybeForceE2eStepFailure(stepName);
}
const onboardRuntimeBoundary = new OnboardRuntimeBoundary({
toSessionUpdates,
maybeForceE2eStepFailure,
});
const startRecordedStep = onboardRuntimeBoundary.startRecordedStep.bind(onboardRuntimeBoundary);
const recordStepComplete = onboardRuntimeBoundary.recordStepComplete.bind(onboardRuntimeBoundary);
const recordStepSkipped = onboardRuntimeBoundary.recordStepSkipped.bind(onboardRuntimeBoundary);
const recordStepFailed = onboardRuntimeBoundary.recordStepFailed.bind(onboardRuntimeBoundary);
const recordSessionComplete = onboardRuntimeBoundary.recordSessionComplete.bind(onboardRuntimeBoundary);
Comment thread
coderabbitai[bot] marked this conversation as resolved.

const ONBOARD_STEP_INDEX: Record<string, { number: number; title: string }> = {
preflight: { number: 1, title: "Preflight checks" },
Expand Down Expand Up @@ -8972,6 +8961,7 @@ async function onboard(opts: OnboardOptions = {}): Promise<void> {
RECREATE_SANDBOX = opts.recreateSandbox || process.env.NEMOCLAW_RECREATE_SANDBOX === "1";
AUTO_YES = opts.autoYes === true || process.env.NEMOCLAW_YES === "1";
_preflightDashboardPort = opts.controlUiPort || null;
onboardRuntimeBoundary.reset();
delete process.env.OPENSHELL_GATEWAY;
const resume = opts.resume === true;
const fresh = opts.fresh === true;
Expand Down Expand Up @@ -9341,9 +9331,9 @@ async function onboard(opts: OnboardOptions = {}): Promise<void> {
assertCdiNvidiaGpuSpecPresent(assessHost(), resumeOptedOutGpuPassthrough);
validateSandboxGpuPreflight(resumeSandboxGpuConfig);
} else {
startRecordedStep("preflight");
await startRecordedStep("preflight");
gpu = await preflight({ ...opts, optedOutGpuPassthrough: opts.noGpu === true });
onboardSession.markStepComplete("preflight");
await recordStepComplete("preflight");
}
const sandboxGpuConfig = resolveSandboxGpuConfig(gpu, {
flag: effectiveSandboxGpuFlag,
Expand Down Expand Up @@ -9480,11 +9470,11 @@ async function onboard(opts: OnboardOptions = {}): Promise<void> {
resume && session?.steps?.gateway?.status === "complete" && canReuseHealthyGateway;
if (resumeGateway) {
skippedStepMessage("gateway", "running");
onboardSession.markStepComplete("gateway");
await recordStepComplete("gateway");
} else if (!resume && canReuseHealthyGateway) {
skippedStepMessage("gateway", "running", "reuse");
note(" Reusing healthy NemoClaw gateway.");
onboardSession.markStepComplete("gateway");
await recordStepComplete("gateway");
} else {
if (resume && session?.steps?.gateway?.status === "complete") {
if (gatewayReuseState === "active-unnamed") {
Expand All @@ -9502,9 +9492,9 @@ async function onboard(opts: OnboardOptions = {}): Promise<void> {
retireLegacyGatewayForDockerDriverUpgrade();
gatewayReuseState = "missing";
}
startRecordedStep("gateway");
await startRecordedStep("gateway");
await startGateway(gpu, { gpuPassthrough });
onboardSession.markStepComplete("gateway");
await recordStepComplete("gateway");
}

// #2753: prefer requestedSandboxName over an unconfirmed session name.
Expand Down Expand Up @@ -9555,7 +9545,7 @@ async function onboard(opts: OnboardOptions = {}): Promise<void> {
// below). A SIGINT between any earlier step and createSandbox would
// otherwise leave a phantom that `nemoclaw list` resurrects until
// manually destroyed.
startRecordedStep("provider_selection");
await startRecordedStep("provider_selection");
const selection = await setupNim(gpu, sandboxName, agent);
model = selection.model;
provider = selection.provider;
Expand All @@ -9565,7 +9555,7 @@ async function onboard(opts: OnboardOptions = {}): Promise<void> {
hermesToolGateways = selection.hermesToolGateways;
preferredInferenceApi = selection.preferredInferenceApi;
nimContainer = selection.nimContainer;
onboardSession.markStepComplete(
await recordStepComplete(
"provider_selection",
toSessionUpdates({
provider,
Expand Down Expand Up @@ -9598,7 +9588,7 @@ async function onboard(opts: OnboardOptions = {}): Promise<void> {
if (!sandboxName) {
sandboxName = await promptValidatedSandboxName(agent);
}
startRecordedStep("inference", { provider, model });
await startRecordedStep("inference", { provider, model });
const inferenceResult = await setupInference(
sandboxName,
model,
Expand All @@ -9612,7 +9602,7 @@ async function onboard(opts: OnboardOptions = {}): Promise<void> {
forceProviderSelection = true;
continue;
}
onboardSession.markStepComplete(
await recordStepComplete(
"inference",
toSessionUpdates({ provider, model, hermesAuthMethod, nimContainer, hermesToolGateways }),
);
Expand All @@ -9632,7 +9622,7 @@ async function onboard(opts: OnboardOptions = {}): Promise<void> {
if (nimContainer && sandboxName) {
registry.updateSandbox(sandboxName, { nimContainer });
}
onboardSession.markStepComplete(
await recordStepComplete(
"inference",
toSessionUpdates({ provider, model, hermesAuthMethod, nimContainer, hermesToolGateways }),
);
Expand Down Expand Up @@ -9671,7 +9661,7 @@ async function onboard(opts: OnboardOptions = {}): Promise<void> {
}
}

startRecordedStep("inference", { provider, model });
await startRecordedStep("inference", { provider, model });
const inferenceResult = await setupInference(
sandboxName,
model,
Expand All @@ -9689,7 +9679,7 @@ async function onboard(opts: OnboardOptions = {}): Promise<void> {
if (nimContainer && sandboxName) {
registry.updateSandbox(sandboxName, { nimContainer });
}
onboardSession.markStepComplete(
await recordStepComplete(
"inference",
toSessionUpdates({ provider, model, hermesAuthMethod, nimContainer, hermesToolGateways }),
);
Expand Down Expand Up @@ -9831,7 +9821,7 @@ async function onboard(opts: OnboardOptions = {}): Promise<void> {
} else {
nextWebSearchConfig = await configureWebSearch(null, agent, webSearchSupportProbePath);
}
startRecordedStep("sandbox", { provider, model });
await startRecordedStep("sandbox", { provider, model });
const recordedMessagingChannels = getRecordedMessagingChannelsForResume(resume, session, sandboxName);
if (recordedMessagingChannels) {
selectedMessagingChannels = recordedMessagingChannels;
Expand Down Expand Up @@ -9885,7 +9875,7 @@ async function onboard(opts: OnboardOptions = {}): Promise<void> {
...getSandboxAgentRegistryFields(agent, !fromDockerfile),
});
registry.setDefault(sandboxName);
onboardSession.markStepComplete(
await recordStepComplete(
"sandbox",
toSessionUpdates({
sandboxName,
Expand Down Expand Up @@ -9915,10 +9905,12 @@ async function onboard(opts: OnboardOptions = {}): Promise<void> {
openshellShellCommand,
openshellBinary: getOpenshellBinary(),
startRecordedStep,
recordStepComplete,
recordStepFailed,
skippedStepMessage,
});
ensureAgentDashboardForward(sandboxName, agent);
onboardSession.markStepSkipped("openclaw");
await recordStepSkipped("openclaw");
} else {
const resumeOpenclaw = resume && sandboxName && isOpenclawReady(sandboxName);
if (resumeOpenclaw) {
Expand All @@ -9927,19 +9919,19 @@ async function onboard(opts: OnboardOptions = {}): Promise<void> {
// zero-byte placeholder; re-sync to avoid loadOnboardConfig
// SyntaxError. Fixes #3999.
syncNemoClawConfigInSandbox(sandboxName, provider, model);
onboardSession.markStepComplete(
await recordStepComplete(
"openclaw",
toSessionUpdates({ sandboxName, provider, model, hermesAuthMethod, hermesToolGateways }),
);
} else {
startRecordedStep("openclaw", { sandboxName, provider, model });
await startRecordedStep("openclaw", { sandboxName, provider, model });
await setupOpenclaw(sandboxName, model, provider);
onboardSession.markStepComplete(
await recordStepComplete(
"openclaw",
toSessionUpdates({ sandboxName, provider, model, hermesAuthMethod, hermesToolGateways }),
);
}
onboardSession.markStepSkipped("agent_setup");
await recordStepSkipped("agent_setup");
}

const latestSession = onboardSession.loadSession();
Expand Down Expand Up @@ -9999,7 +9991,7 @@ async function onboard(opts: OnboardOptions = {}): Promise<void> {
arePolicyPresetsApplied(sandboxName, recordedPolicyPresetsForSupport);
if (resumePolicies) {
skippedStepMessage("policies", recordedPolicyPresetsForSupport.join(", "));
onboardSession.markStepComplete(
await recordStepComplete(
"policies",
toSessionUpdates({
sandboxName,
Expand All @@ -10009,7 +10001,7 @@ async function onboard(opts: OnboardOptions = {}): Promise<void> {
}),
);
} else {
startRecordedStep("policies", {
await startRecordedStep("policies", {
sandboxName,
provider,
model,
Expand All @@ -10035,7 +10027,7 @@ async function onboard(opts: OnboardOptions = {}): Promise<void> {
});
},
});
onboardSession.markStepComplete(
await recordStepComplete(
"policies",
toSessionUpdates({ sandboxName, provider, model, policyPresets: appliedPolicyPresets }),
);
Expand All @@ -10045,7 +10037,7 @@ async function onboard(opts: OnboardOptions = {}): Promise<void> {
ensureAgentDashboardForward(sandboxName, agent);
}

onboardSession.completeSession(
await recordSessionComplete(
toSessionUpdates({ sandboxName, provider, model, hermesAuthMethod, hermesToolGateways }),
);
completed = true;
Expand Down Expand Up @@ -10125,6 +10117,7 @@ async function onboard(opts: OnboardOptions = {}): Promise<void> {
printDashboard(sandboxName, model, provider, nimContainer, agent);
} finally {
releaseOnboardLock();
onboardRuntimeBoundary.clear();
}
}

Expand Down
Loading
Loading