NVIDIA · ericksoa · May 9, 2026 · May 8, 2026 · May 8, 2026
diff --git a/src/lib/actions/sandbox/destroy.ts b/src/lib/actions/sandbox/destroy.ts
@@ -42,6 +42,19 @@ type RemoveSandboxRegistryEntryDeps = {
   removeSandbox?: typeof registry.removeSandbox;
 };
 
+type RunOpenshell = (
+  args: string[],
+  opts?: Record<string, unknown>,
+) => { status: number | null };
+
+export type CleanupSandboxServicesDeps = {
+  getSandbox?: typeof registry.getSandbox;
+  stopAll?: (opts: { sandboxName: string }) => void;
+  unloadOllamaModels?: () => void;
+  runOpenshell?: RunOpenshell;
+  rmSync?: typeof fs.rmSync;
+};
+
 const NEMOCLAW_GATEWAY_NAME = "nemoclaw";
 const DASHBOARD_FORWARD_PORT = String(DASHBOARD_PORT);
 
@@ -80,37 +93,60 @@ function hasNoLiveSandboxes(): boolean {
   return parseLiveSandboxNames(liveList.output).size === 0;
 }
 
-function cleanupSandboxServices(
+export function cleanupSandboxServices(
   sandboxName: string,
   { stopHostServices = false }: { stopHostServices?: boolean } = {},
+  deps: CleanupSandboxServicesDeps = {},
 ): void {
+  const getSandbox = deps.getSandbox ?? registry.getSandbox;
+  const stopAll =
+    deps.stopAll ??
+    ((opts: { sandboxName: string }) => {
+      const services = require("../../services") as {
+        stopAll: (opts: { sandboxName: string }) => void;
+      };
+      services.stopAll(opts);
+    });
+  const unloadOllamaModels =
+    deps.unloadOllamaModels ??
+    (() => {
+      const { unloadOllamaModels: unload } = require("../../inference/ollama/proxy") as {
+        unloadOllamaModels: () => void;
+      };
+      unload();
+    });
+  const runOpenshell =
+    deps.runOpenshell ??
+    ((args: string[], opts?: Record<string, unknown>) => {
+      const runtime = require("../../adapters/openshell/runtime") as {
+        runOpenshell: RunOpenshell;
+      };
+      return runtime.runOpenshell(args, opts);
+    });
+  const rmSync = deps.rmSync ?? fs.rmSync;
+
   if (stopHostServices) {
     // `stopAll()` already runs `unloadOllamaModels()` unconditionally —
     // see src/lib/services.ts. Don't double-call here.
-    const { stopAll } = require("../../services");
     stopAll({ sandboxName });
   } else {
     // No global stop, so `stopAll()` did not run; explicitly free Ollama
     // models for this sandbox if its provider used Ollama. Without this
     // branch a single-sandbox destroy would leave models loaded on the GPU.
-    const sb = registry.getSandbox(sandboxName);
+    const sb = getSandbox(sandboxName);
     if (sb?.provider?.includes("ollama")) {
-      const { unloadOllamaModels } = require("../../inference/ollama/proxy");
       unloadOllamaModels();
     }
   }
 
   try {
-    fs.rmSync(`/tmp/nemoclaw-services-${sandboxName}`, { recursive: true, force: true });
+    rmSync(`/tmp/nemoclaw-services-${sandboxName}`, { recursive: true, force: true });
   } catch {
     // PID directory may not exist — ignore.
   }
 
   // Delete messaging providers created during onboard. Suppress stderr so
   // "! Provider not found" noise doesn't appear when messaging was never configured.
-  const { runOpenshell } = require("../../adapters/openshell/runtime") as {
-    runOpenshell: (args: string[], opts?: Record<string, unknown>) => { status: number | null };
-  };
   for (const suffix of ["telegram-bridge", "discord-bridge", "slack-bridge", "slack-app"]) {
     runOpenshell(["provider", "delete", `${sandboxName}-${suffix}`], {
       ignoreError: true,

diff --git a/src/lib/inference/ollama/proxy.ts b/src/lib/inference/ollama/proxy.ts
@@ -638,6 +638,8 @@ async function prepareOllamaModel(model, installedModels = []) {
  * dropped by Node's event loop on fast CLI exit (e.g. `nemoclaw destroy`),
  * leaving GPU memory reserved. Reverting to async HTTP would reintroduce
  * that race; keep it synchronous.
+ *
+ * Keep this logic in sync with `test/ollama-gpu-cleanup.test.ts`.
  */
 function unloadOllamaModels() {
   try {
@@ -653,6 +655,11 @@ function unloadOllamaModels() {
 
     for (const entry of models) {
       if (!entry?.name) continue;
+      // `-sS` deliberately swallows HTTP 4xx/5xx; this path is best-effort
+      // and `--fail` would only surface orphaned-GPU-memory failures into
+      // unrelated CLI exit codes during destroy. If we ever want explicit
+      // visibility, add `--fail-with-body` and route the result to a warn
+      // logger.
       spawnSync(
         "curl",
         [

diff --git a/test/destroy-cleanup-sandbox-services.test.ts b/test/destroy-cleanup-sandbox-services.test.ts
@@ -0,0 +1,114 @@
+// SPDX-FileCopyrightText: Copyright (c) 2026 NVIDIA CORPORATION & AFFILIATES. All rights reserved.
+// SPDX-License-Identifier: Apache-2.0
+//
+// Regression guard for #2717: cleanupSandboxServices must invoke
+// `unloadOllamaModels()` exactly once across both branches of the destroy
+// flow — never zero (orphans GPU memory) and never twice (the original
+// duplicate-call bug). Mirrors the structural argument captured in the
+// inline comments in `src/lib/actions/sandbox/destroy.ts`.
+
+import path from "node:path";
+import { describe, expect, it, vi } from "vitest";
+
+import type { CleanupSandboxServicesDeps } from "../dist/lib/actions/sandbox/destroy.js";
+import { cleanupSandboxServices } from "../dist/lib/actions/sandbox/destroy.js";
+
+type SandboxLike = { provider?: string | null } | null;
+
+function buildDeps(sandbox: SandboxLike): {
+  deps: Required<Pick<CleanupSandboxServicesDeps, "getSandbox" | "stopAll" | "unloadOllamaModels" | "runOpenshell" | "rmSync">>;
+  stopAllCalls: Array<{ sandboxName: string }>;
+  unloadCalls: number;
+} {
+  const stopAllCalls: Array<{ sandboxName: string }> = [];
+  let unloadCalls = 0;
+  return {
+    stopAllCalls,
+    get unloadCalls() {
+      return unloadCalls;
+    },
+    deps: {
+      getSandbox: vi.fn(() => sandbox as never),
+      stopAll: vi.fn((opts: { sandboxName: string }) => {
+        stopAllCalls.push(opts);
+      }),
+      unloadOllamaModels: vi.fn(() => {
+        unloadCalls += 1;
+      }),
+      runOpenshell: vi.fn(() => ({ status: 0 })),
+      rmSync: vi.fn(),
+    },
+  };
+}
+
+describe("cleanupSandboxServices Ollama unload (#2717)", () => {
+  it("delegates GPU unload to stopAll() exactly once when stopHostServices=true", () => {
+    const harness = buildDeps({ provider: "ollama-local" });
+
+    cleanupSandboxServices(
+      "regression-2717",
+      { stopHostServices: true },
+      harness.deps,
+    );
+
+    expect(harness.deps.stopAll).toHaveBeenCalledTimes(1);
+    expect(harness.stopAllCalls[0]).toEqual({ sandboxName: "regression-2717" });
+    // stopAll() invokes unloadOllamaModels() internally — see services.ts.
+    // cleanupSandboxServices itself must not call it again.
+    expect(harness.deps.unloadOllamaModels).not.toHaveBeenCalled();
+    expect(harness.unloadCalls).toBe(0);
+  });
+
+  it("calls unloadOllamaModels() exactly once for an Ollama sandbox when stopHostServices=false", () => {
+    const harness = buildDeps({ provider: "ollama-local" });
+
+    cleanupSandboxServices(
+      "regression-2717",
+      { stopHostServices: false },
+      harness.deps,
+    );
+
+    expect(harness.deps.stopAll).not.toHaveBeenCalled();
+    expect(harness.deps.unloadOllamaModels).toHaveBeenCalledTimes(1);
+    expect(harness.unloadCalls).toBe(1);
+  });
+
+  it("skips unloadOllamaModels() entirely for non-Ollama providers", () => {
+    const harness = buildDeps({ provider: "nvidia-prod" });
+
+    cleanupSandboxServices(
+      "regression-2717",
+      { stopHostServices: false },
+      harness.deps,
+    );
+
+    expect(harness.deps.stopAll).not.toHaveBeenCalled();
+    expect(harness.deps.unloadOllamaModels).not.toHaveBeenCalled();
+  });
+
+  it("removes the sandbox PID dir and tears down all messaging providers", () => {
+    const harness = buildDeps({ provider: "ollama-local" });
+
+    cleanupSandboxServices(
+      "regression-2717",
+      { stopHostServices: false },
+      harness.deps,
+    );
+
+    expect(harness.deps.rmSync).toHaveBeenCalledWith(
+      path.join("/tmp", "nemoclaw-services-regression-2717"),
+      { recursive: true, force: true },
+    );
+
+    const providerDeleteCalls = vi
+      .mocked(harness.deps.runOpenshell)
+      .mock.calls.map((args) => args[0])
+      .filter((argv) => argv[0] === "provider" && argv[1] === "delete");
+    expect(providerDeleteCalls.map((argv) => argv[2])).toEqual([
+      "regression-2717-telegram-bridge",
+      "regression-2717-discord-bridge",
+      "regression-2717-slack-bridge",
+      "regression-2717-slack-app",
+    ]);
+  });
+});