Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
52 changes: 44 additions & 8 deletions src/lib/actions/sandbox/destroy.ts
Original file line number Diff line number Diff line change
Expand Up @@ -42,6 +42,19 @@ type RemoveSandboxRegistryEntryDeps = {
removeSandbox?: typeof registry.removeSandbox;
};

type RunOpenshell = (
args: string[],
opts?: Record<string, unknown>,
) => { status: number | null };

export type CleanupSandboxServicesDeps = {
getSandbox?: typeof registry.getSandbox;
stopAll?: (opts: { sandboxName: string }) => void;
unloadOllamaModels?: () => void;
runOpenshell?: RunOpenshell;
rmSync?: typeof fs.rmSync;
};

const NEMOCLAW_GATEWAY_NAME = "nemoclaw";
const DASHBOARD_FORWARD_PORT = String(DASHBOARD_PORT);

Expand Down Expand Up @@ -80,37 +93,60 @@ function hasNoLiveSandboxes(): boolean {
return parseLiveSandboxNames(liveList.output).size === 0;
}

function cleanupSandboxServices(
export function cleanupSandboxServices(
sandboxName: string,
{ stopHostServices = false }: { stopHostServices?: boolean } = {},
deps: CleanupSandboxServicesDeps = {},
): void {
const getSandbox = deps.getSandbox ?? registry.getSandbox;
const stopAll =
deps.stopAll ??
((opts: { sandboxName: string }) => {
const services = require("../../services") as {
stopAll: (opts: { sandboxName: string }) => void;
};
services.stopAll(opts);
});
const unloadOllamaModels =
deps.unloadOllamaModels ??
(() => {
const { unloadOllamaModels: unload } = require("../../inference/ollama/proxy") as {
unloadOllamaModels: () => void;
};
unload();
});
const runOpenshell =
deps.runOpenshell ??
((args: string[], opts?: Record<string, unknown>) => {
const runtime = require("../../adapters/openshell/runtime") as {
runOpenshell: RunOpenshell;
};
return runtime.runOpenshell(args, opts);
});
const rmSync = deps.rmSync ?? fs.rmSync;

if (stopHostServices) {
// `stopAll()` already runs `unloadOllamaModels()` unconditionally —
// see src/lib/services.ts. Don't double-call here.
const { stopAll } = require("../../services");
stopAll({ sandboxName });
} else {
// No global stop, so `stopAll()` did not run; explicitly free Ollama
// models for this sandbox if its provider used Ollama. Without this
// branch a single-sandbox destroy would leave models loaded on the GPU.
const sb = registry.getSandbox(sandboxName);
const sb = getSandbox(sandboxName);
if (sb?.provider?.includes("ollama")) {
const { unloadOllamaModels } = require("../../inference/ollama/proxy");
unloadOllamaModels();
}
}

try {
fs.rmSync(`/tmp/nemoclaw-services-${sandboxName}`, { recursive: true, force: true });
rmSync(`/tmp/nemoclaw-services-${sandboxName}`, { recursive: true, force: true });
} catch {
// PID directory may not exist — ignore.
}

// Delete messaging providers created during onboard. Suppress stderr so
// "! Provider not found" noise doesn't appear when messaging was never configured.
const { runOpenshell } = require("../../adapters/openshell/runtime") as {
runOpenshell: (args: string[], opts?: Record<string, unknown>) => { status: number | null };
};
for (const suffix of ["telegram-bridge", "discord-bridge", "slack-bridge", "slack-app"]) {
runOpenshell(["provider", "delete", `${sandboxName}-${suffix}`], {
ignoreError: true,
Expand Down
7 changes: 7 additions & 0 deletions src/lib/inference/ollama/proxy.ts
Original file line number Diff line number Diff line change
Expand Up @@ -638,6 +638,8 @@ async function prepareOllamaModel(model, installedModels = []) {
* dropped by Node's event loop on fast CLI exit (e.g. `nemoclaw destroy`),
* leaving GPU memory reserved. Reverting to async HTTP would reintroduce
* that race; keep it synchronous.
*
* Keep this logic in sync with `test/ollama-gpu-cleanup.test.ts`.
*/
function unloadOllamaModels() {
try {
Expand All @@ -653,6 +655,11 @@ function unloadOllamaModels() {

for (const entry of models) {
if (!entry?.name) continue;
// `-sS` deliberately swallows HTTP 4xx/5xx; this path is best-effort
// and `--fail` would only surface orphaned-GPU-memory failures into
// unrelated CLI exit codes during destroy. If we ever want explicit
// visibility, add `--fail-with-body` and route the result to a warn
// logger.
spawnSync(
"curl",
[
Expand Down
114 changes: 114 additions & 0 deletions test/destroy-cleanup-sandbox-services.test.ts
Original file line number Diff line number Diff line change
@@ -0,0 +1,114 @@
// SPDX-FileCopyrightText: Copyright (c) 2026 NVIDIA CORPORATION & AFFILIATES. All rights reserved.
// SPDX-License-Identifier: Apache-2.0
//
// Regression guard for #2717: cleanupSandboxServices must invoke
// `unloadOllamaModels()` exactly once across both branches of the destroy
// flow — never zero (orphans GPU memory) and never twice (the original
// duplicate-call bug). Mirrors the structural argument captured in the
// inline comments in `src/lib/actions/sandbox/destroy.ts`.

import path from "node:path";
import { describe, expect, it, vi } from "vitest";

import type { CleanupSandboxServicesDeps } from "../dist/lib/actions/sandbox/destroy.js";
import { cleanupSandboxServices } from "../dist/lib/actions/sandbox/destroy.js";

type SandboxLike = { provider?: string | null } | null;

function buildDeps(sandbox: SandboxLike): {
deps: Required<Pick<CleanupSandboxServicesDeps, "getSandbox" | "stopAll" | "unloadOllamaModels" | "runOpenshell" | "rmSync">>;
stopAllCalls: Array<{ sandboxName: string }>;
unloadCalls: number;
} {
const stopAllCalls: Array<{ sandboxName: string }> = [];
let unloadCalls = 0;
return {
stopAllCalls,
get unloadCalls() {
return unloadCalls;
},
deps: {
getSandbox: vi.fn(() => sandbox as never),
stopAll: vi.fn((opts: { sandboxName: string }) => {
stopAllCalls.push(opts);
}),
unloadOllamaModels: vi.fn(() => {
unloadCalls += 1;
}),
runOpenshell: vi.fn(() => ({ status: 0 })),
rmSync: vi.fn(),
},
};
}

describe("cleanupSandboxServices Ollama unload (#2717)", () => {
it("delegates GPU unload to stopAll() exactly once when stopHostServices=true", () => {
const harness = buildDeps({ provider: "ollama-local" });

cleanupSandboxServices(
"regression-2717",
{ stopHostServices: true },
harness.deps,
);

expect(harness.deps.stopAll).toHaveBeenCalledTimes(1);
expect(harness.stopAllCalls[0]).toEqual({ sandboxName: "regression-2717" });
// stopAll() invokes unloadOllamaModels() internally — see services.ts.
// cleanupSandboxServices itself must not call it again.
expect(harness.deps.unloadOllamaModels).not.toHaveBeenCalled();
expect(harness.unloadCalls).toBe(0);
});

it("calls unloadOllamaModels() exactly once for an Ollama sandbox when stopHostServices=false", () => {
const harness = buildDeps({ provider: "ollama-local" });

cleanupSandboxServices(
"regression-2717",
{ stopHostServices: false },
harness.deps,
);

expect(harness.deps.stopAll).not.toHaveBeenCalled();
expect(harness.deps.unloadOllamaModels).toHaveBeenCalledTimes(1);
expect(harness.unloadCalls).toBe(1);
});

it("skips unloadOllamaModels() entirely for non-Ollama providers", () => {
const harness = buildDeps({ provider: "nvidia-prod" });

cleanupSandboxServices(
"regression-2717",
{ stopHostServices: false },
harness.deps,
);

expect(harness.deps.stopAll).not.toHaveBeenCalled();
expect(harness.deps.unloadOllamaModels).not.toHaveBeenCalled();
});

it("removes the sandbox PID dir and tears down all messaging providers", () => {
const harness = buildDeps({ provider: "ollama-local" });

cleanupSandboxServices(
"regression-2717",
{ stopHostServices: false },
harness.deps,
);

expect(harness.deps.rmSync).toHaveBeenCalledWith(
path.join("/tmp", "nemoclaw-services-regression-2717"),
{ recursive: true, force: true },
);

const providerDeleteCalls = vi
.mocked(harness.deps.runOpenshell)
.mock.calls.map((args) => args[0])
.filter((argv) => argv[0] === "provider" && argv[1] === "delete");
expect(providerDeleteCalls.map((argv) => argv[2])).toEqual([
"regression-2717-telegram-bridge",
"regression-2717-discord-bridge",
"regression-2717-slack-bridge",
"regression-2717-slack-app",
]);
});
});
Loading