diff --git a/src/lib/actions/sandbox/destroy.ts b/src/lib/actions/sandbox/destroy.ts index a762f178df..c101b3b614 100644 --- a/src/lib/actions/sandbox/destroy.ts +++ b/src/lib/actions/sandbox/destroy.ts @@ -42,6 +42,19 @@ type RemoveSandboxRegistryEntryDeps = { removeSandbox?: typeof registry.removeSandbox; }; +type RunOpenshell = ( + args: string[], + opts?: Record, +) => { status: number | null }; + +export type CleanupSandboxServicesDeps = { + getSandbox?: typeof registry.getSandbox; + stopAll?: (opts: { sandboxName: string }) => void; + unloadOllamaModels?: () => void; + runOpenshell?: RunOpenshell; + rmSync?: typeof fs.rmSync; +}; + const NEMOCLAW_GATEWAY_NAME = "nemoclaw"; const DASHBOARD_FORWARD_PORT = String(DASHBOARD_PORT); @@ -80,37 +93,60 @@ function hasNoLiveSandboxes(): boolean { return parseLiveSandboxNames(liveList.output).size === 0; } -function cleanupSandboxServices( +export function cleanupSandboxServices( sandboxName: string, { stopHostServices = false }: { stopHostServices?: boolean } = {}, + deps: CleanupSandboxServicesDeps = {}, ): void { + const getSandbox = deps.getSandbox ?? registry.getSandbox; + const stopAll = + deps.stopAll ?? + ((opts: { sandboxName: string }) => { + const services = require("../../services") as { + stopAll: (opts: { sandboxName: string }) => void; + }; + services.stopAll(opts); + }); + const unloadOllamaModels = + deps.unloadOllamaModels ?? + (() => { + const { unloadOllamaModels: unload } = require("../../inference/ollama/proxy") as { + unloadOllamaModels: () => void; + }; + unload(); + }); + const runOpenshell = + deps.runOpenshell ?? + ((args: string[], opts?: Record) => { + const runtime = require("../../adapters/openshell/runtime") as { + runOpenshell: RunOpenshell; + }; + return runtime.runOpenshell(args, opts); + }); + const rmSync = deps.rmSync ?? fs.rmSync; + if (stopHostServices) { // `stopAll()` already runs `unloadOllamaModels()` unconditionally — // see src/lib/services.ts. Don't double-call here. - const { stopAll } = require("../../services"); stopAll({ sandboxName }); } else { // No global stop, so `stopAll()` did not run; explicitly free Ollama // models for this sandbox if its provider used Ollama. Without this // branch a single-sandbox destroy would leave models loaded on the GPU. - const sb = registry.getSandbox(sandboxName); + const sb = getSandbox(sandboxName); if (sb?.provider?.includes("ollama")) { - const { unloadOllamaModels } = require("../../inference/ollama/proxy"); unloadOllamaModels(); } } try { - fs.rmSync(`/tmp/nemoclaw-services-${sandboxName}`, { recursive: true, force: true }); + rmSync(`/tmp/nemoclaw-services-${sandboxName}`, { recursive: true, force: true }); } catch { // PID directory may not exist — ignore. } // Delete messaging providers created during onboard. Suppress stderr so // "! Provider not found" noise doesn't appear when messaging was never configured. - const { runOpenshell } = require("../../adapters/openshell/runtime") as { - runOpenshell: (args: string[], opts?: Record) => { status: number | null }; - }; for (const suffix of ["telegram-bridge", "discord-bridge", "slack-bridge", "slack-app"]) { runOpenshell(["provider", "delete", `${sandboxName}-${suffix}`], { ignoreError: true, diff --git a/src/lib/inference/ollama/proxy.ts b/src/lib/inference/ollama/proxy.ts index 12143d871e..00cde4c9aa 100644 --- a/src/lib/inference/ollama/proxy.ts +++ b/src/lib/inference/ollama/proxy.ts @@ -638,6 +638,8 @@ async function prepareOllamaModel(model, installedModels = []) { * dropped by Node's event loop on fast CLI exit (e.g. `nemoclaw destroy`), * leaving GPU memory reserved. Reverting to async HTTP would reintroduce * that race; keep it synchronous. + * + * Keep this logic in sync with `test/ollama-gpu-cleanup.test.ts`. */ function unloadOllamaModels() { try { @@ -653,6 +655,11 @@ function unloadOllamaModels() { for (const entry of models) { if (!entry?.name) continue; + // `-sS` deliberately swallows HTTP 4xx/5xx; this path is best-effort + // and `--fail` would only surface orphaned-GPU-memory failures into + // unrelated CLI exit codes during destroy. If we ever want explicit + // visibility, add `--fail-with-body` and route the result to a warn + // logger. spawnSync( "curl", [ diff --git a/test/destroy-cleanup-sandbox-services.test.ts b/test/destroy-cleanup-sandbox-services.test.ts new file mode 100644 index 0000000000..edd5d56d28 --- /dev/null +++ b/test/destroy-cleanup-sandbox-services.test.ts @@ -0,0 +1,114 @@ +// SPDX-FileCopyrightText: Copyright (c) 2026 NVIDIA CORPORATION & AFFILIATES. All rights reserved. +// SPDX-License-Identifier: Apache-2.0 +// +// Regression guard for #2717: cleanupSandboxServices must invoke +// `unloadOllamaModels()` exactly once across both branches of the destroy +// flow — never zero (orphans GPU memory) and never twice (the original +// duplicate-call bug). Mirrors the structural argument captured in the +// inline comments in `src/lib/actions/sandbox/destroy.ts`. + +import path from "node:path"; +import { describe, expect, it, vi } from "vitest"; + +import type { CleanupSandboxServicesDeps } from "../dist/lib/actions/sandbox/destroy.js"; +import { cleanupSandboxServices } from "../dist/lib/actions/sandbox/destroy.js"; + +type SandboxLike = { provider?: string | null } | null; + +function buildDeps(sandbox: SandboxLike): { + deps: Required>; + stopAllCalls: Array<{ sandboxName: string }>; + unloadCalls: number; +} { + const stopAllCalls: Array<{ sandboxName: string }> = []; + let unloadCalls = 0; + return { + stopAllCalls, + get unloadCalls() { + return unloadCalls; + }, + deps: { + getSandbox: vi.fn(() => sandbox as never), + stopAll: vi.fn((opts: { sandboxName: string }) => { + stopAllCalls.push(opts); + }), + unloadOllamaModels: vi.fn(() => { + unloadCalls += 1; + }), + runOpenshell: vi.fn(() => ({ status: 0 })), + rmSync: vi.fn(), + }, + }; +} + +describe("cleanupSandboxServices Ollama unload (#2717)", () => { + it("delegates GPU unload to stopAll() exactly once when stopHostServices=true", () => { + const harness = buildDeps({ provider: "ollama-local" }); + + cleanupSandboxServices( + "regression-2717", + { stopHostServices: true }, + harness.deps, + ); + + expect(harness.deps.stopAll).toHaveBeenCalledTimes(1); + expect(harness.stopAllCalls[0]).toEqual({ sandboxName: "regression-2717" }); + // stopAll() invokes unloadOllamaModels() internally — see services.ts. + // cleanupSandboxServices itself must not call it again. + expect(harness.deps.unloadOllamaModels).not.toHaveBeenCalled(); + expect(harness.unloadCalls).toBe(0); + }); + + it("calls unloadOllamaModels() exactly once for an Ollama sandbox when stopHostServices=false", () => { + const harness = buildDeps({ provider: "ollama-local" }); + + cleanupSandboxServices( + "regression-2717", + { stopHostServices: false }, + harness.deps, + ); + + expect(harness.deps.stopAll).not.toHaveBeenCalled(); + expect(harness.deps.unloadOllamaModels).toHaveBeenCalledTimes(1); + expect(harness.unloadCalls).toBe(1); + }); + + it("skips unloadOllamaModels() entirely for non-Ollama providers", () => { + const harness = buildDeps({ provider: "nvidia-prod" }); + + cleanupSandboxServices( + "regression-2717", + { stopHostServices: false }, + harness.deps, + ); + + expect(harness.deps.stopAll).not.toHaveBeenCalled(); + expect(harness.deps.unloadOllamaModels).not.toHaveBeenCalled(); + }); + + it("removes the sandbox PID dir and tears down all messaging providers", () => { + const harness = buildDeps({ provider: "ollama-local" }); + + cleanupSandboxServices( + "regression-2717", + { stopHostServices: false }, + harness.deps, + ); + + expect(harness.deps.rmSync).toHaveBeenCalledWith( + path.join("/tmp", "nemoclaw-services-regression-2717"), + { recursive: true, force: true }, + ); + + const providerDeleteCalls = vi + .mocked(harness.deps.runOpenshell) + .mock.calls.map((args) => args[0]) + .filter((argv) => argv[0] === "provider" && argv[1] === "delete"); + expect(providerDeleteCalls.map((argv) => argv[2])).toEqual([ + "regression-2717-telegram-bridge", + "regression-2717-discord-bridge", + "regression-2717-slack-bridge", + "regression-2717-slack-app", + ]); + }); +});