From da250a732236e6e318c98e8e65da3749f112c20d Mon Sep 17 00:00:00 2001 From: Tinson Lai Date: Fri, 8 May 2026 02:55:54 +0000 Subject: [PATCH] test(sandbox): add exactly-once regression for Ollama GPU unload (#2717) MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Address the deferred suggestions on #3173: - Make `cleanupSandboxServices` injectable and exported with a `CleanupSandboxServicesDeps` shape mirroring the existing destroy.ts patterns (`RemoveSandboxImageDeps`, etc.). Runtime callers keep passing nothing and continue to resolve dependencies via lazy `require()` so the circular import boundary is preserved. - Add `test/destroy-cleanup-sandbox-services.test.ts` covering the three call-count cases that motivated the original dedup fix: - `stopHostServices=true` + Ollama provider — `stopAll()` invoked once, `unloadOllamaModels()` not called by destroy.ts (it runs transitively inside `stopAll()`). - `stopHostServices=false` + Ollama provider — `unloadOllamaModels()` invoked exactly once, `stopAll()` not called. - `stopHostServices=false` + non-Ollama provider — neither called. Plus a check that the PID directory removal and messaging-provider teardown loop run with the expected arguments. - In `unloadOllamaModels`, append a sync pointer to `test/ollama-gpu-cleanup.test.ts` and document why `-sS` (rather than `--fail`) is intentional on the POST to `/api/generate`. Per #3173 review feedback (jyaunches optional regression test, --fail nit, CodeRabbit source/test sync pointer). Signed-off-by: Tinson Lai --- src/lib/actions/sandbox/destroy.ts | 52 ++++++-- src/lib/onboard-ollama-proxy.ts | 7 ++ test/destroy-cleanup-sandbox-services.test.ts | 114 ++++++++++++++++++ 3 files changed, 165 insertions(+), 8 deletions(-) create mode 100644 test/destroy-cleanup-sandbox-services.test.ts diff --git a/src/lib/actions/sandbox/destroy.ts b/src/lib/actions/sandbox/destroy.ts index 125d99abde..fcfa233207 100644 --- a/src/lib/actions/sandbox/destroy.ts +++ b/src/lib/actions/sandbox/destroy.ts @@ -42,6 +42,19 @@ type RemoveSandboxRegistryEntryDeps = { removeSandbox?: typeof registry.removeSandbox; }; +type RunOpenshell = ( + args: string[], + opts?: Record, +) => { status: number | null }; + +export type CleanupSandboxServicesDeps = { + getSandbox?: typeof registry.getSandbox; + stopAll?: (opts: { sandboxName: string }) => void; + unloadOllamaModels?: () => void; + runOpenshell?: RunOpenshell; + rmSync?: typeof fs.rmSync; +}; + const NEMOCLAW_GATEWAY_NAME = "nemoclaw"; const DASHBOARD_FORWARD_PORT = String(DASHBOARD_PORT); @@ -80,37 +93,60 @@ function hasNoLiveSandboxes(): boolean { return parseLiveSandboxNames(liveList.output).size === 0; } -function cleanupSandboxServices( +export function cleanupSandboxServices( sandboxName: string, { stopHostServices = false }: { stopHostServices?: boolean } = {}, + deps: CleanupSandboxServicesDeps = {}, ): void { + const getSandbox = deps.getSandbox ?? registry.getSandbox; + const stopAll = + deps.stopAll ?? + ((opts: { sandboxName: string }) => { + const services = require("../../services") as { + stopAll: (opts: { sandboxName: string }) => void; + }; + services.stopAll(opts); + }); + const unloadOllamaModels = + deps.unloadOllamaModels ?? + (() => { + const { unloadOllamaModels: unload } = require("../../onboard-ollama-proxy") as { + unloadOllamaModels: () => void; + }; + unload(); + }); + const runOpenshell = + deps.runOpenshell ?? + ((args: string[], opts?: Record) => { + const runtime = require("../../adapters/openshell/runtime") as { + runOpenshell: RunOpenshell; + }; + return runtime.runOpenshell(args, opts); + }); + const rmSync = deps.rmSync ?? fs.rmSync; + if (stopHostServices) { // `stopAll()` already runs `unloadOllamaModels()` unconditionally — // see src/lib/services.ts. Don't double-call here. - const { stopAll } = require("../../services"); stopAll({ sandboxName }); } else { // No global stop, so `stopAll()` did not run; explicitly free Ollama // models for this sandbox if its provider used Ollama. Without this // branch a single-sandbox destroy would leave models loaded on the GPU. - const sb = registry.getSandbox(sandboxName); + const sb = getSandbox(sandboxName); if (sb?.provider?.includes("ollama")) { - const { unloadOllamaModels } = require("../../onboard-ollama-proxy"); unloadOllamaModels(); } } try { - fs.rmSync(`/tmp/nemoclaw-services-${sandboxName}`, { recursive: true, force: true }); + rmSync(`/tmp/nemoclaw-services-${sandboxName}`, { recursive: true, force: true }); } catch { // PID directory may not exist — ignore. } // Delete messaging providers created during onboard. Suppress stderr so // "! Provider not found" noise doesn't appear when messaging was never configured. - const { runOpenshell } = require("../../adapters/openshell/runtime") as { - runOpenshell: (args: string[], opts?: Record) => { status: number | null }; - }; for (const suffix of ["telegram-bridge", "discord-bridge", "slack-bridge", "slack-app"]) { runOpenshell(["provider", "delete", `${sandboxName}-${suffix}`], { ignoreError: true, diff --git a/src/lib/onboard-ollama-proxy.ts b/src/lib/onboard-ollama-proxy.ts index bf5ecf2a3a..d86d699359 100644 --- a/src/lib/onboard-ollama-proxy.ts +++ b/src/lib/onboard-ollama-proxy.ts @@ -637,6 +637,8 @@ async function prepareOllamaModel(model, installedModels = []) { * dropped by Node's event loop on fast CLI exit (e.g. `nemoclaw destroy`), * leaving GPU memory reserved. Reverting to async HTTP would reintroduce * that race; keep it synchronous. + * + * Keep this logic in sync with `test/ollama-gpu-cleanup.test.ts`. */ function unloadOllamaModels() { try { @@ -652,6 +654,11 @@ function unloadOllamaModels() { for (const entry of models) { if (!entry?.name) continue; + // `-sS` deliberately swallows HTTP 4xx/5xx; this path is best-effort + // and `--fail` would only surface orphaned-GPU-memory failures into + // unrelated CLI exit codes during destroy. If we ever want explicit + // visibility, add `--fail-with-body` and route the result to a warn + // logger. spawnSync( "curl", [ diff --git a/test/destroy-cleanup-sandbox-services.test.ts b/test/destroy-cleanup-sandbox-services.test.ts new file mode 100644 index 0000000000..edd5d56d28 --- /dev/null +++ b/test/destroy-cleanup-sandbox-services.test.ts @@ -0,0 +1,114 @@ +// SPDX-FileCopyrightText: Copyright (c) 2026 NVIDIA CORPORATION & AFFILIATES. All rights reserved. +// SPDX-License-Identifier: Apache-2.0 +// +// Regression guard for #2717: cleanupSandboxServices must invoke +// `unloadOllamaModels()` exactly once across both branches of the destroy +// flow — never zero (orphans GPU memory) and never twice (the original +// duplicate-call bug). Mirrors the structural argument captured in the +// inline comments in `src/lib/actions/sandbox/destroy.ts`. + +import path from "node:path"; +import { describe, expect, it, vi } from "vitest"; + +import type { CleanupSandboxServicesDeps } from "../dist/lib/actions/sandbox/destroy.js"; +import { cleanupSandboxServices } from "../dist/lib/actions/sandbox/destroy.js"; + +type SandboxLike = { provider?: string | null } | null; + +function buildDeps(sandbox: SandboxLike): { + deps: Required>; + stopAllCalls: Array<{ sandboxName: string }>; + unloadCalls: number; +} { + const stopAllCalls: Array<{ sandboxName: string }> = []; + let unloadCalls = 0; + return { + stopAllCalls, + get unloadCalls() { + return unloadCalls; + }, + deps: { + getSandbox: vi.fn(() => sandbox as never), + stopAll: vi.fn((opts: { sandboxName: string }) => { + stopAllCalls.push(opts); + }), + unloadOllamaModels: vi.fn(() => { + unloadCalls += 1; + }), + runOpenshell: vi.fn(() => ({ status: 0 })), + rmSync: vi.fn(), + }, + }; +} + +describe("cleanupSandboxServices Ollama unload (#2717)", () => { + it("delegates GPU unload to stopAll() exactly once when stopHostServices=true", () => { + const harness = buildDeps({ provider: "ollama-local" }); + + cleanupSandboxServices( + "regression-2717", + { stopHostServices: true }, + harness.deps, + ); + + expect(harness.deps.stopAll).toHaveBeenCalledTimes(1); + expect(harness.stopAllCalls[0]).toEqual({ sandboxName: "regression-2717" }); + // stopAll() invokes unloadOllamaModels() internally — see services.ts. + // cleanupSandboxServices itself must not call it again. + expect(harness.deps.unloadOllamaModels).not.toHaveBeenCalled(); + expect(harness.unloadCalls).toBe(0); + }); + + it("calls unloadOllamaModels() exactly once for an Ollama sandbox when stopHostServices=false", () => { + const harness = buildDeps({ provider: "ollama-local" }); + + cleanupSandboxServices( + "regression-2717", + { stopHostServices: false }, + harness.deps, + ); + + expect(harness.deps.stopAll).not.toHaveBeenCalled(); + expect(harness.deps.unloadOllamaModels).toHaveBeenCalledTimes(1); + expect(harness.unloadCalls).toBe(1); + }); + + it("skips unloadOllamaModels() entirely for non-Ollama providers", () => { + const harness = buildDeps({ provider: "nvidia-prod" }); + + cleanupSandboxServices( + "regression-2717", + { stopHostServices: false }, + harness.deps, + ); + + expect(harness.deps.stopAll).not.toHaveBeenCalled(); + expect(harness.deps.unloadOllamaModels).not.toHaveBeenCalled(); + }); + + it("removes the sandbox PID dir and tears down all messaging providers", () => { + const harness = buildDeps({ provider: "ollama-local" }); + + cleanupSandboxServices( + "regression-2717", + { stopHostServices: false }, + harness.deps, + ); + + expect(harness.deps.rmSync).toHaveBeenCalledWith( + path.join("/tmp", "nemoclaw-services-regression-2717"), + { recursive: true, force: true }, + ); + + const providerDeleteCalls = vi + .mocked(harness.deps.runOpenshell) + .mock.calls.map((args) => args[0]) + .filter((argv) => argv[0] === "provider" && argv[1] === "delete"); + expect(providerDeleteCalls.map((argv) => argv[2])).toEqual([ + "regression-2717-telegram-bridge", + "regression-2717-discord-bridge", + "regression-2717-slack-bridge", + "regression-2717-slack-app", + ]); + }); +});