Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
74 changes: 74 additions & 0 deletions src/lib/onboard/resume-machine-repair.test.ts
Original file line number Diff line number Diff line change
Expand Up @@ -187,6 +187,54 @@ describe("resume machine repair", () => {
});
});

it("repairs a complete snapshot reopened by rebuild from the last completed step", () => {
const session = createSession({
resumable: true,
status: "in_progress",
lastCompletedStep: "gateway",
machine: {
version: MACHINE_SNAPSHOT_VERSION,
state: "complete",
stateEnteredAt: "2026-06-01T00:00:00.000Z",
revision: 9,
},
});
session.steps.preflight.status = "complete";
session.steps.gateway.status = "complete";

repairResumeMachineSnapshot(session, "2026-06-01T00:01:00.000Z");

expect(session.machine).toEqual({
version: MACHINE_SNAPSHOT_VERSION,
state: "provider_selection",
stateEnteredAt: "2026-06-01T00:01:00.000Z",
revision: 10,
});
});

it("leaves a non-resumable complete snapshot untouched", () => {
const session = createSession({
lastCompletedStep: "policies",
machine: {
version: MACHINE_SNAPSHOT_VERSION,
state: "complete",
stateEnteredAt: "2026-06-01T00:00:00.000Z",
revision: 5,
},
});
session.resumable = false;
session.status = "complete";

repairResumeMachineSnapshot(session, "2026-06-01T00:01:00.000Z");

expect(session.machine).toEqual({
version: MACHINE_SNAPSHOT_VERSION,
state: "complete",
stateEnteredAt: "2026-06-01T00:00:00.000Z",
revision: 5,
});
});

it.each([
["preflight", "preflight", null],
["gateway", "gateway", "preflight"],
Expand Down Expand Up @@ -217,4 +265,30 @@ describe("resume machine repair", () => {
});
},
);

it.each(["gateway", "policies"] as const)(
"lets record-only resume complete from a reopened complete snapshot after %s",
async (completedStep) => {
const session = createSession({
resumable: true,
status: "in_progress",
lastCompletedStep: completedStep,
machine: {
version: MACHINE_SNAPSHOT_VERSION,
state: "complete",
stateEnteredAt: "2026-06-01T00:00:00.000Z",
revision: 7,
},
});
session.steps[completedStep].status = "complete";

const completed = await runRecordOnlyResumeSequence(session);

expect(completed).toMatchObject({
status: "complete",
failure: null,
machine: { state: "complete" },
});
},
);
});
21 changes: 15 additions & 6 deletions src/lib/onboard/resume-machine-repair.ts
Original file line number Diff line number Diff line change
Expand Up @@ -44,19 +44,28 @@ export function resumeMachineState(session: Session): OnboardMachineState {
) ?? "init";
}

function shouldRepairTerminalMachineSnapshot(session: Session): boolean {
if (session.machine.state === "failed") return true;
return (
session.machine.state === "complete" &&
(session.status !== "complete" || session.resumable !== false)
);
}

/**
* Repairs the legacy failed-session/FSM boundary during --resume.
* Repairs legacy terminal-session/FSM boundaries during --resume.
*
* Source fix constraint: failed -> resume is not a modeled FSM transition yet,
* and legacy step fields still act as the secondary durable source for resume.
* Remove this bridge once failed-session recovery is represented by explicit
* FSM recovery results or step fields stop being used to derive resume state.
* Source fix constraint: terminal -> resume is not a modeled FSM transition
* yet, and legacy step fields still act as the secondary durable source for
* resume. Remove this bridge once terminal-session recovery is represented by
* explicit FSM recovery results or step fields stop being used to derive resume
* state.
*/
export function repairResumeMachineSnapshot(
session: Session,
stateEnteredAt = new Date().toISOString(),
): Session {
if (session.machine.state !== "failed") return session;
if (!shouldRepairTerminalMachineSnapshot(session)) return session;
const state = resumeMachineState(session);
session.machine = {
version: MACHINE_SNAPSHOT_VERSION,
Expand Down
77 changes: 77 additions & 0 deletions src/lib/verify-deployment-messaging.test.ts
Original file line number Diff line number Diff line change
@@ -0,0 +1,77 @@
// SPDX-FileCopyrightText: Copyright (c) 2026 NVIDIA CORPORATION & AFFILIATES. All rights reserved.
// SPDX-License-Identifier: Apache-2.0

import { describe, expect, it } from "vitest";
import { buildChain } from "../../dist/lib/dashboard/contract.js";
import { verifyDeployment } from "../../dist/lib/verify-deployment.js";

const chain = buildChain();
const NO_RETRY = { retryDelaysMs: [], sleep: async (_ms: number) => {} };

function makeDeps(overrides: Record<string, unknown> = {}) {
return {
executeSandboxCommand: (_name: string, _script: string) => ({ status: 0, stdout: "200", stderr: "" }),
probeHostPort: (_port: number, _path: string) => 200,
captureForwardList: () => "my-sandbox 127.0.0.1 18789 12345 running",
getMessagingChannels: (_name: string) => [] as string[],
providerExistsInGateway: (_name: string) => true,
...overrides,
};
}

describe("verifyDeployment messaging provider checks", () => {
it("checks sandbox-scoped provider names for configured messaging channels", async () => {
const checkedProviders: string[] = [];
const deps = makeDeps({
getMessagingChannels: () => ["telegram", "slack"],
providerExistsInGateway: (name: string) => {
checkedProviders.push(name);
return true;
},
});

const result = await verifyDeployment("my-sandbox", chain, deps, NO_RETRY);

expect(result.verification.messagingBridgesHealthy).toBe(true);
expect(checkedProviders).toEqual([
"my-sandbox-telegram-bridge",
"my-sandbox-slack-bridge",
"my-sandbox-slack-app",
]);
});

it.each(["my-sandbox-slack-bridge", "my-sandbox-slack-app"])(
"warns when Slack provider %s is missing",
async (missingProvider) => {
const deps = makeDeps({
getMessagingChannels: () => ["slack"],
providerExistsInGateway: (name: string) => name !== missingProvider,
});

const result = await verifyDeployment("my-sandbox", chain, deps, NO_RETRY);

expect(result.healthy).toBe(true);
expect(result.verification.messagingBridgesHealthy).toBe(false);
const msgDiag = result.diagnostics.find((d) => d.link === "messaging");
expect(msgDiag?.status).toBe("warn");
expect(msgDiag?.detail).toContain("slack");
},
);

it("does not require a gateway provider for tokenless messaging channels", async () => {
const checkedProviders: string[] = [];
const deps = makeDeps({
getMessagingChannels: () => ["whatsapp"],
providerExistsInGateway: (name: string) => {
checkedProviders.push(name);
return false;
},
});

const result = await verifyDeployment("my-sandbox", chain, deps, NO_RETRY);

expect(result.verification.messagingBridgesHealthy).toBe(true);
expect(checkedProviders).toEqual([]);
expect(result.diagnostics.find((d) => d.link === "messaging")).toBeUndefined();
});
});
2 changes: 1 addition & 1 deletion src/lib/verify-deployment.test.ts
Original file line number Diff line number Diff line change
Expand Up @@ -110,7 +110,7 @@ describe("verifyDeployment", () => {
it("messaging failure is a warning, not a blocker", async () => {
const deps = makeDeps({
getMessagingChannels: () => ["slack", "discord"],
providerExistsInGateway: (name: string) => name !== "discord",
providerExistsInGateway: (name: string) => name !== "my-sandbox-discord-bridge",
});
const result = await verifyDeployment("my-sandbox", chain, deps, NO_RETRY);
expect(result.healthy).toBe(true); // messaging is non-blocking
Expand Down
9 changes: 8 additions & 1 deletion src/lib/verify-deployment.ts
Original file line number Diff line number Diff line change
Expand Up @@ -19,6 +19,7 @@

import type { DashboardDeliveryChain } from "./dashboard/contract";
import { compareChannelSets, type RuntimeChannelStatus } from "./channel-runtime-status";
import { getMessagingProviderNamesForChannel } from "./onboard/messaging-reuse";

// ── Types ────────────────────────────────────────────────────────────

Expand Down Expand Up @@ -124,6 +125,7 @@ function defaultSleep(ms: number): Promise<void> {
// HTTP status codes that indicate the gateway process is alive.
// 401 = device auth is enabled but the gateway is running.
const GATEWAY_ALIVE_CODES = new Set([200, 401]);
const TOKENLESS_MESSAGING_CHANNELS = new Set(["whatsapp"]);

// Gateway-failure hint: cover both layers the probe could be failing at.
// The probe runs curl inside the sandbox against the in-sandbox OpenClaw
Expand Down Expand Up @@ -317,7 +319,12 @@ function verifyMessagingBridges(
}
const missingProviders: string[] = [];
for (const channel of channels) {
if (!deps.providerExistsInGateway(channel)) {
const providerNames = getMessagingProviderNamesForChannel(sandboxName, channel);
if (providerNames.length === 0 && TOKENLESS_MESSAGING_CHANNELS.has(channel)) {
continue;
}
const expectedProviders = providerNames.length > 0 ? providerNames : [channel];
if (!expectedProviders.every((providerName) => deps.providerExistsInGateway(providerName))) {
missingProviders.push(channel);
}
}
Expand Down