Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
Show all changes
14 commits
Select commit Hold shift + click to select a range
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
5 changes: 2 additions & 3 deletions src/lib/onboard.ts
Original file line number Diff line number Diff line change
Expand Up @@ -2974,7 +2974,7 @@ async function createSandbox(
}
}
const preferredPort =
controlUiPort ?? envPort ?? persistedPort ?? (agent ? agent.forwardPort : CONTROL_UI_PORT);
controlUiPort ?? envPort ?? persistedPort ?? (agent ? agent.forwardPort : DASHBOARD_PORT);
const earlyForwards = runCaptureOpenshell(["forward", "list"], { ignoreError: true });
const effectivePort = findAvailableDashboardPort(sandboxName, preferredPort, earlyForwards);
if (effectivePort !== preferredPort) {
Expand Down Expand Up @@ -6694,8 +6694,6 @@ async function setupPoliciesWithSelection(

// ── Dashboard ────────────────────────────────────────────────────

const CONTROL_UI_PORT = DASHBOARD_PORT;

const {
buildChain,
buildControlUiUrls,
Expand All @@ -6710,6 +6708,7 @@ const {
} = onboardDashboard.createOnboardDashboardHelpers({
runOpenshell,
runCaptureOpenshell,
openshellArgv,
runCapture,
cliName,
agentProductName,
Expand Down
39 changes: 26 additions & 13 deletions src/lib/onboard/dashboard.ts
Original file line number Diff line number Diff line change
Expand Up @@ -16,11 +16,14 @@ import {
getOccupiedPorts,
isLiveForwardStatus,
} from "./dashboard-port";
import { OPENSHELL_PROBE_TIMEOUT_MS } from "../adapters/openshell/timeouts";
import {
buildDetachedForwardStartSpawn,
buildForwardStartProgressLogger,
looksLikeForwardPortConflict,
runBackgroundForwardStartWithPortReleaseRetries,
runDetachedForwardStartWithPortReleaseRetries,
} from "./forward-start";
import { bestEffortForwardStop } from "./forward-cleanup";
import { bestEffortForwardStop, bestEffortForwardStopForSandbox } from "./forward-cleanup";

const ANSI_RE = /\x1B(?:\[[0-?]*[ -/]*[@-~]|\][^\x07]*(?:\x07|\x1B\\)|[@-_])/g;
export const CONTROL_UI_PORT = DASHBOARD_PORT;
Expand All @@ -30,6 +33,7 @@ type CommandResult = { status: number | null };
export interface OnboardDashboardDeps {
runOpenshell(args: string[], opts?: Record<string, unknown>): CommandResult;
runCaptureOpenshell(args: string[], opts?: Record<string, unknown>): string | null;
openshellArgv(args: string[]): string[];
runCapture?: typeof defaultRunCapture;
cliName(): string;
agentProductName(): string;
Expand Down Expand Up @@ -215,13 +219,20 @@ export function createOnboardDashboardHelpers(deps: OnboardDashboardDeps): Onboa
): number {
const { rollbackSandboxOnFailure = false } = options;
const preferredPort = Number(getDashboardForwardPort(chatUiUrl));
const stopForwardForSandbox = (port: string | number) =>
bestEffortForwardStopForSandbox(
deps.runOpenshell,
(args, opts) => (deps.runCaptureOpenshell(args, opts) ?? "") as string,
port,
sandboxName,
);
let existingForwards = deps.runCaptureOpenshell(["forward", "list"], { ignoreError: true });
const preferredEntry = findForwardEntry(existingForwards, String(preferredPort));
if (
preferredEntry &&
(preferredEntry.sandboxName === sandboxName || !isLiveForwardStatus(preferredEntry.status))
) {
bestEffortForwardStop(deps.runOpenshell, preferredPort);
stopForwardForSandbox(preferredPort);
existingForwards = deps.runCaptureOpenshell(["forward", "list"], { ignoreError: true });
}
let actualPort: number;
Expand All @@ -248,26 +259,28 @@ export function createOnboardDashboardHelpers(deps: OnboardDashboardDeps): Onboa
const occupied = getOccupiedPorts(existingForwards);
for (const [port, owner] of occupied.entries()) {
if (owner === sandboxName && Number(port) !== actualPort) {
bestEffortForwardStop(deps.runOpenshell, port);
stopForwardForSandbox(port);
}
}

const parsedUrl = new URL(chatUiUrl.includes("://") ? chatUiUrl : `http://${chatUiUrl}`);
parsedUrl.port = String(actualPort);
const actualTarget = getDashboardForwardTarget(parsedUrl.toString());
bestEffortForwardStop(deps.runOpenshell, actualPort);
const { result: fwdResult, diagnostic: fwdDiagnostic } = runBackgroundForwardStartWithPortReleaseRetries(
(stdio, timeout) =>
deps.runOpenshell(
["forward", "start", "--background", actualTarget, sandboxName],
{ ignoreError: true, suppressOutput: true, stdio, timeout },
),
stopForwardForSandbox(actualPort);
const { ok: fwdOk, diagnostic: fwdDiagnostic } = runDetachedForwardStartWithPortReleaseRetries(
buildDetachedForwardStartSpawn(
deps.openshellArgv(["forward", "start", "--background", actualTarget, sandboxName]),
),
() =>
(deps.runCaptureOpenshell(["forward", "list"], { timeout: OPENSHELL_PROBE_TIMEOUT_MS }) ?? "") as string,
{ port: actualPort, sandboxName },
() => {
deps.sleep(1);
bestEffortForwardStop(deps.runOpenshell, actualPort);
stopForwardForSandbox(actualPort);
},
{ onProgress: buildForwardStartProgressLogger(actualPort) },
);
if (fwdResult && fwdResult.status !== 0) {
if (!fwdOk) {
const looksLikePortConflict = looksLikeForwardPortConflict(fwdDiagnostic);
if (rollbackSandboxOnFailure) {
const err = new Error(
Expand Down
115 changes: 115 additions & 0 deletions src/lib/onboard/forward-cleanup.test.ts
Original file line number Diff line number Diff line change
@@ -0,0 +1,115 @@
// SPDX-FileCopyrightText: Copyright (c) 2026 NVIDIA CORPORATION & AFFILIATES. All rights reserved.
// SPDX-License-Identifier: Apache-2.0

import { describe, expect, it, vi } from "vitest";

import {
bestEffortForwardStop,
bestEffortForwardStopForSandbox,
} from "../../../dist/lib/onboard/forward-cleanup";

function forwardListWith(entries: Array<{ sandbox: string; port: number; status?: string }>): string {
const header = "SANDBOX BIND PORT PID STATUS";
const rows = entries.map(
(e) => `${e.sandbox} 127.0.0.1 ${e.port} 1234 ${e.status ?? "running"}`,
);
return [header, ...rows].join("\n");
}

describe("bestEffortForwardStop", () => {
it("invokes `forward stop` with the port and silently ignores errors", () => {
const run = vi.fn();
bestEffortForwardStop(run, 18789);
expect(run).toHaveBeenCalledWith(
["forward", "stop", "18789"],
{ ignoreError: true, suppressOutput: true },
);
});
});

describe("bestEffortForwardStopForSandbox", () => {
it("returns owned-other and skips stop when the port belongs to a different sandbox", () => {
const run = vi.fn();
const fetch = vi
.fn()
.mockReturnValue(forwardListWith([{ sandbox: "other-sandbox", port: 18789 }]));

const outcome = bestEffortForwardStopForSandbox(run, fetch, 18789, "my-sandbox");

expect(outcome).toBe("owned-other");
expect(run).not.toHaveBeenCalled();
expect(fetch).toHaveBeenCalledWith(
["forward", "list"],
expect.objectContaining({ timeout: 15_000 }),
);
// Caller must NOT pass ignoreError; failures should throw so the catch
// branch returns "list-failed" instead of running a stop with no owner data.
expect(fetch).not.toHaveBeenCalledWith(
["forward", "list"],
expect.objectContaining({ ignoreError: true }),
);
});

it("returns stopped and uses the sandbox-scoped forward stop form when ownership matches", () => {
const run = vi.fn();
const fetch = vi
.fn()
.mockReturnValue(forwardListWith([{ sandbox: "my-sandbox", port: 18789 }]));

const outcome = bestEffortForwardStopForSandbox(run, fetch, 18789, "my-sandbox");

expect(outcome).toBe("stopped");
// Sandbox-scoped stop closes the TOCTOU window between list and stop:
// even if another sandbox bound the port in the meantime, openshell
// forward stop with both args will refuse to kill it.
expect(run).toHaveBeenCalledWith(
["forward", "stop", "18789", "my-sandbox"],
{ ignoreError: true, suppressOutput: true },
);
});

it("returns no-entry and runs a sandbox-scoped stop when no live forward is on that port", () => {
const run = vi.fn();
const fetch = vi.fn().mockReturnValue(forwardListWith([]));

const outcome = bestEffortForwardStopForSandbox(run, fetch, 18789, "my-sandbox");

expect(outcome).toBe("no-entry");
expect(run).toHaveBeenCalledWith(
["forward", "stop", "18789", "my-sandbox"],
{ ignoreError: true, suppressOutput: true },
);
});

it("skips the stop entirely when `forward list` itself throws (owner unknown)", () => {
const run = vi.fn();
const fetch = vi.fn().mockImplementation(() => {
throw new Error("gateway timed out");
});

const outcome = bestEffortForwardStopForSandbox(run, fetch, 18789, "my-sandbox");

expect(outcome).toBe("list-failed");
// Without ownership data, a port-only stop could kill another
// sandbox's forward — better to leave the port alone and let the
// helper's retry / next poll observe the real state.
expect(run).not.toHaveBeenCalled();
});

it("ignores forwards with non-live status when deciding ownership", () => {
// `getOccupiedPorts` filters by `isLiveForwardStatus`, so a "stopped"
// entry on the requested port should be treated as no-entry (not as a
// foreign owner).
const run = vi.fn();
const fetch = vi
.fn()
.mockReturnValue(
forwardListWith([{ sandbox: "other-sandbox", port: 18789, status: "stopped" }]),
);

const outcome = bestEffortForwardStopForSandbox(run, fetch, 18789, "my-sandbox");

expect(outcome).toBe("no-entry");
expect(run).toHaveBeenCalledTimes(1);
});
});
72 changes: 72 additions & 0 deletions src/lib/onboard/forward-cleanup.ts
Original file line number Diff line number Diff line change
@@ -1,11 +1,29 @@
// SPDX-FileCopyrightText: Copyright (c) 2026 NVIDIA CORPORATION & AFFILIATES. All rights reserved.
// SPDX-License-Identifier: Apache-2.0

import { OPENSHELL_PROBE_TIMEOUT_MS } from "../adapters/openshell/timeouts";

import { getOccupiedPorts } from "./dashboard-port";

export type ForwardStopRunner = (
args: string[],
opts: { ignoreError?: boolean; suppressOutput?: boolean },
) => unknown;

export type ForwardListRunner = (
args: string[],
opts: { ignoreError?: boolean; timeout?: number },
) => string;

/**
* `openshell forward stop <port>` — port-scoped, kills whatever forward is
* currently bound to that port. Use only when the caller has no sandbox
* context (or is intentionally targeting any owner — e.g. wholesale
* dashboard-port reclaim during recovery). Sandbox-aware call sites should
* prefer `bestEffortForwardStopForSandbox`, which uses the sandbox-scoped
* `forward stop <port> <sandbox>` form so the kill cannot collateral
* another sandbox's forward in a TOCTOU window.
*/
export function bestEffortForwardStop(
runOpenshell: ForwardStopRunner,
port: string | number,
Expand All @@ -15,3 +33,57 @@ export function bestEffortForwardStop(
suppressOutput: true,
});
}

/**
* Stop the forward on `port` only when `openshell forward list` reports it
* is owned by `sandboxName` (or unowned). When the list query fails the
* stop is skipped — without ownership data we cannot tell whether port
* belongs to a concurrent onboard / connect on a different sandbox, so the
* safe choice is to leave the port alone and let the helper's retry path
* (or the next forward list poll once the gateway is responsive) sort
* itself out.
*
* The stop itself uses the sandbox-scoped `forward stop <port> <sandbox>`
* form so a TOCTOU window between list and stop cannot accidentally kill
* another sandbox's forward that bound to the same port in the meantime.
*
* Returns:
* - "stopped" — entry matched sandboxName and the stop ran.
* - "owned-other" — entry exists for a different sandbox; stop skipped.
* - "no-entry" — no live entry for that port; stop ran defensively
* (sandbox-scoped, so a concurrent racer's forward
* that may have bound the port between list and stop
* is left alone).
* - "list-failed" — could not enumerate forwards; stop SKIPPED. The
* owner is unknown and the port-only `forward stop`
* could kill an unrelated sandbox's forward.
*/
export function bestEffortForwardStopForSandbox(
runOpenshell: ForwardStopRunner,
runCaptureOpenshell: ForwardListRunner,
port: string | number,
sandboxName: string,
): "stopped" | "owned-other" | "no-entry" | "list-failed" {
// Let runCaptureOpenshell throw on failure/timeout so the catch branch
// returns "list-failed". With ignoreError: true the runner would swallow
// the error and return "", which getOccupiedPorts parses as an empty map
// and the "no-entry" branch below would still run the stop — exactly the
// collateral-damage case this helper exists to avoid.
let listOutput = "";
try {
listOutput = runCaptureOpenshell(["forward", "list"], {
timeout: OPENSHELL_PROBE_TIMEOUT_MS,
});
} catch {
return "list-failed";
}
const owner = getOccupiedPorts(listOutput).get(String(port)) ?? null;
if (owner && owner !== sandboxName) {
return "owned-other";
}
runOpenshell(["forward", "stop", String(port), sandboxName], {
ignoreError: true,
suppressOutput: true,
});
return owner === sandboxName ? "stopped" : "no-entry";
}
Loading
Loading