diff --git a/src/lib/actions/sandbox/destroy.ts b/src/lib/actions/sandbox/destroy.ts
index a9383f2235..125d99abde 100644
--- a/src/lib/actions/sandbox/destroy.ts
+++ b/src/lib/actions/sandbox/destroy.ts
@@ -85,14 +85,19 @@ function cleanupSandboxServices(
   { stopHostServices = false }: { stopHostServices?: boolean } = {},
 ): void {
   if (stopHostServices) {
+    // `stopAll()` already runs `unloadOllamaModels()` unconditionally —
+    // see src/lib/services.ts. Don't double-call here.
     const { stopAll } = require("../../services");
     stopAll({ sandboxName });
-  }
-
-  const sb = registry.getSandbox(sandboxName);
-  if (sb?.provider?.includes("ollama")) {
-    const { unloadOllamaModels } = require("../../onboard-ollama-proxy");
-    unloadOllamaModels();
+  } else {
+    // No global stop, so `stopAll()` did not run; explicitly free Ollama
+    // models for this sandbox if its provider used Ollama. Without this
+    // branch a single-sandbox destroy would leave models loaded on the GPU.
+    const sb = registry.getSandbox(sandboxName);
+    if (sb?.provider?.includes("ollama")) {
+      const { unloadOllamaModels } = require("../../onboard-ollama-proxy");
+      unloadOllamaModels();
+    }
   }
 
   try {
@@ -235,9 +240,13 @@ export async function destroySandbox(
     nim.stopNimContainer(sandboxName, { silent: true });
   }
 
+  // The Ollama auth proxy is per-sandbox and only spawned when the provider
+  // is Ollama, so this guard scopes only `killStaleProxy()`. GPU unload is
+  // handled separately by `cleanupSandboxServices` above (which routes
+  // through `stopAll()` or directly into `unloadOllamaModels()` based on
+  // whether host services are being torn down).
   if (sb?.provider?.includes("ollama")) {
-    const { unloadOllamaModels, killStaleProxy } = require("../../onboard-ollama-proxy");
-    unloadOllamaModels();
+    const { killStaleProxy } = require("../../onboard-ollama-proxy");
     killStaleProxy();
   }
 
diff --git a/src/lib/onboard-ollama-proxy.ts b/src/lib/onboard-ollama-proxy.ts
index df02749a76..bf5ecf2a3a 100644
--- a/src/lib/onboard-ollama-proxy.ts
+++ b/src/lib/onboard-ollama-proxy.ts
@@ -9,7 +9,6 @@ const fs = require("fs");
 const os = require("os");
 const path = require("path");
 const { spawn, spawnSync } = require("child_process");
-const http = require("http");
 const { ROOT, SCRIPTS, run, runCapture, shellQuote } = require("./runner");
 const { OLLAMA_PORT, OLLAMA_PROXY_PORT } = require("./core/ports");
 const {
@@ -631,56 +630,47 @@ async function prepareOllamaModel(model, installedModels = []) {
 /**
  * Unload all running Ollama models from GPU memory.
  * Best-effort operation: silently ignores errors if Ollama is not running.
+ *
+ * Uses `spawnSync` with `curl --max-time 3` rather than Node's
+ * `http.request`/`http.get` so the unload completes before
+ * `process.exit()`. The previous async version was fire-and-forget and got
+ * dropped by Node's event loop on fast CLI exit (e.g. `nemoclaw destroy`),
+ * leaving GPU memory reserved. Reverting to async HTTP would reintroduce
+ * that race; keep it synchronous.
  */
 function unloadOllamaModels() {
   try {
-    const req = http.get(
-      {
-        hostname: "localhost",
-        port: OLLAMA_PORT,
-        path: "/api/ps",
-        timeout: 3000,
-      },
-      (res) => {
-        let data = "";
-        res.on("data", (chunk) => {
-          data += chunk;
-        });
-        res.on("end", () => {
-          if (res.statusCode !== 200) return;
-          try {
-            const parsed = JSON.parse(data);
-            const models = parsed.models || [];
-            for (const entry of models) {
-              if (!entry.name) continue;
-              const unloadReq = http.request(
-                {
-                  hostname: "localhost",
-                  port: OLLAMA_PORT,
-                  path: "/api/generate",
-                  method: "POST",
-                  timeout: 3000,
-                  headers: { "Content-Type": "application/json" },
-                },
-                () => {
-                  /* ignore response */
-                },
-              );
-              unloadReq.on("error", () => {
-                /* best-effort */
-              });
-              unloadReq.write(JSON.stringify({ model: entry.name, keep_alive: 0 }));
-              unloadReq.end();
-            }
-          } catch {
-            /* best-effort */
-          }
-        });
-      },
+    const psResult = spawnSync(
+      "curl",
+      ["-sS", "--max-time", "3", `http://localhost:${OLLAMA_PORT}/api/ps`],
+      { encoding: "utf8" },
     );
-    req.on("error", () => {
-      /* best-effort */
-    });
+    if (psResult.status !== 0) return;
+
+    const parsed = JSON.parse(psResult.stdout || "{}");
+    const models = Array.isArray(parsed.models) ? parsed.models : [];
+
+    for (const entry of models) {
+      if (!entry?.name) continue;
+      spawnSync(
+        "curl",
+        [
+          "-sS",
+          "-o",
+          "/dev/null",
+          "--max-time",
+          "3",
+          "-X",
+          "POST",
+          "-H",
+          "Content-Type: application/json",
+          "-d",
+          JSON.stringify({ model: entry.name, keep_alive: 0 }),
+          `http://localhost:${OLLAMA_PORT}/api/generate`,
+        ],
+        { encoding: "utf8" },
+      );
+    }
   } catch {
     /* best-effort */
   }
diff --git a/src/lib/services.test.ts b/src/lib/services.test.ts
index c4149014a3..4c37965de0 100644
--- a/src/lib/services.test.ts
+++ b/src/lib/services.test.ts
@@ -2,14 +2,23 @@
 // SPDX-License-Identifier: Apache-2.0
 
 import { describe, it, expect, beforeEach, afterEach, vi } from "vitest";
-import http from "node:http";
+import childProcess, { type SpawnSyncReturns } from "node:child_process";
 import { mkdtempSync, writeFileSync, existsSync, rmSync } from "node:fs";
-import { join } from "node:path";
+import { join, resolve } from "node:path";
 import { tmpdir } from "node:os";
 
 // Import from compiled dist/ so coverage is attributed correctly.
 import { getServiceStatuses, showStatus, stopAll } from "../../dist/lib/services";
 
+const ollamaProxyDistPath = resolve(
+  import.meta.dirname,
+  "..",
+  "..",
+  "dist",
+  "lib",
+  "onboard-ollama-proxy.js",
+);
+
 describe("getServiceStatuses", () => {
   let pidDir: string;
 
@@ -116,16 +125,41 @@ describe("showStatus", () => {
 
 describe("stopAll", () => {
   let pidDir: string;
-  let httpGetSpy: ReturnType<typeof vi.spyOn>;
+  let spawnSyncCalls: Array<{ command: string; args: readonly string[] }>;
+  let originalSpawnSync: typeof childProcess.spawnSync;
 
   beforeEach(() => {
     pidDir = mkdtempSync(join(tmpdir(), "nemoclaw-svc-test-"));
-    const request = { on: vi.fn(() => request) };
-    httpGetSpy = vi.spyOn(http, "get").mockImplementation((() => request) as any);
+    spawnSyncCalls = [];
+    originalSpawnSync = childProcess.spawnSync;
+    // @ts-expect-error — partial mock signature is intentional.
+    childProcess.spawnSync = (command: string, args: readonly string[]) => {
+      spawnSyncCalls.push({ command, args });
+      const reply: SpawnSyncReturns<string> = {
+        pid: 0,
+        output: ["", "", ""],
+        stdout: "",
+        stderr: "",
+        status: 0,
+        signal: null,
+      };
+      // Return an empty model list so the unload's for-loop is a no-op.
+      if (command === "curl" && args.some((a) => a.endsWith("/api/ps"))) {
+        reply.stdout = JSON.stringify({ models: [] });
+        reply.output = ["", reply.stdout, ""];
+      }
+      return reply;
+    };
+    // The dist `onboard-ollama-proxy` module destructures `spawnSync` at
+    // require time, so to make `stopAll` pick up the patched function we
+    // bust its cache. `services.ts` requires the proxy lazily, so the
+    // next call sees the freshly-loaded module.
+    delete require.cache[require.resolve(ollamaProxyDistPath)];
   });
 
   afterEach(() => {
-    httpGetSpy.mockRestore();
+    childProcess.spawnSync = originalSpawnSync;
+    delete require.cache[require.resolve(ollamaProxyDistPath)];
     rmSync(pidDir, { recursive: true, force: true });
   });
 
@@ -159,9 +193,12 @@ describe("stopAll", () => {
     stopAll({ pidDir });
     logSpy.mockRestore();
 
-    expect(httpGetSpy).toHaveBeenCalledWith(
-      expect.objectContaining({ hostname: "localhost", port: 11434, path: "/api/ps" }),
-      expect.any(Function),
+    const psCall = spawnSyncCalls.find(
+      (c) =>
+        c.command === "curl" &&
+        c.args.some((a) => a.endsWith("/api/ps")),
     );
+    expect(psCall).toBeDefined();
+    expect(psCall?.args).toContain("--max-time");
   });
 });
diff --git a/test/ollama-gpu-cleanup.test.ts b/test/ollama-gpu-cleanup.test.ts
index 22671a30ef..a69031df3b 100644
--- a/test/ollama-gpu-cleanup.test.ts
+++ b/test/ollama-gpu-cleanup.test.ts
@@ -1,135 +1,136 @@
 // SPDX-FileCopyrightText: Copyright (c) 2026 NVIDIA CORPORATION & AFFILIATES. All rights reserved.
 // SPDX-License-Identifier: Apache-2.0
 
-import { describe, expect, it, vi } from "vitest";
-import http from "node:http";
-
-import { unloadOllamaModels } from "../dist/lib/onboard-ollama-proxy.js";
+import childProcess, { type SpawnSyncReturns } from "node:child_process";
+import path from "node:path";
+import { describe, expect, it } from "vitest";
+
+const modulePath = path.join(
+  import.meta.dirname,
+  "..",
+  "dist",
+  "lib",
+  "onboard-ollama-proxy.js",
+);
+
+type SpawnCall = { command: string; args: readonly string[] };
+
+function ok(stdout = ""): SpawnSyncReturns<string> {
+  return {
+    pid: 0,
+    output: ["", stdout, ""],
+    stdout,
+    stderr: "",
+    status: 0,
+    signal: null,
+  };
+}
+
+function fail(stderr = "couldn't connect"): SpawnSyncReturns<string> {
+  return {
+    pid: 0,
+    output: ["", "", stderr],
+    stdout: "",
+    stderr,
+    status: 7,
+    signal: null,
+  };
+}
+
+function withMockedSpawnSync<T>(
+  responder: (call: SpawnCall) => SpawnSyncReturns<string>,
+  fn: (calls: SpawnCall[]) => T,
+): T {
+  const calls: SpawnCall[] = [];
+  const original = childProcess.spawnSync;
+  // @ts-expect-error — partial mock signature is intentional.
+  childProcess.spawnSync = (command: string, args: readonly string[]) => {
+    const call = { command, args };
+    calls.push(call);
+    return responder(call);
+  };
+  try {
+    delete require.cache[require.resolve(modulePath)];
+    return fn(calls);
+  } finally {
+    childProcess.spawnSync = original;
+    delete require.cache[require.resolve(modulePath)];
+  }
+}
 
 describe("Ollama GPU cleanup", () => {
-  it("unloads all running Ollama models via the production HTTP implementation", async () => {
-    const mockModels = {
-      models: [{ name: "llama3.1:8b" }, { name: "qwen:7b" }],
-    };
-
-    const mockResponse = {
-      statusCode: 200,
-      on: vi.fn((event, handler) => {
-        if (event === "data") {
-          handler(JSON.stringify(mockModels));
-        } else if (event === "end") {
-          handler();
+  it("calls curl synchronously to unload every running model via /api/generate", () => {
+    withMockedSpawnSync(
+      ({ args }) => {
+        if (args.some((a) => a.endsWith("/api/ps"))) {
+          return ok(
+            JSON.stringify({ models: [{ name: "llama3.1:8b" }, { name: "qwen:7b" }] }),
+          );
         }
-        return mockResponse;
-      }),
-    };
-
-    const mockGetRequest = {
-      on: vi.fn(() => mockGetRequest),
-    };
-
-    const mockUnloadRequests: Array<{
-      on: ReturnType<typeof vi.fn>;
-      write: ReturnType<typeof vi.fn>;
-      end: ReturnType<typeof vi.fn>;
-    }> = [];
-
-    const httpGetSpy = vi.spyOn(http, "get").mockImplementation(((options: any, callback: any) => {
-      expect(options.hostname).toBe("localhost");
-      expect(options.port).toBe(11434);
-      expect(options.path).toBe("/api/ps");
-      callback(mockResponse);
-      return mockGetRequest;
-    }) as any);
-
-    const httpRequestSpy = vi.spyOn(http, "request").mockImplementation(((
-      options: any,
-      callback: any,
-    ) => {
-      expect(options.hostname).toBe("localhost");
-      expect(options.port).toBe(11434);
-      expect(options.path).toBe("/api/generate");
-      expect(options.method).toBe("POST");
-      expect(options.headers["Content-Type"]).toBe("application/json");
-      const req = {
-        on: vi.fn(() => req),
-        write: vi.fn(),
-        end: vi.fn(),
-      };
-      mockUnloadRequests.push(req);
-      callback();
-      return req;
-    }) as any);
-
-    unloadOllamaModels();
-
-    expect(httpGetSpy).toHaveBeenCalledTimes(1);
-
-    await new Promise((resolve) => setTimeout(resolve, 50));
-
-    expect(httpRequestSpy).toHaveBeenCalledTimes(2);
-    expect(mockUnloadRequests.map((req) => req.write.mock.calls[0]?.[0])).toEqual([
-      JSON.stringify({ model: "llama3.1:8b", keep_alive: 0 }),
-      JSON.stringify({ model: "qwen:7b", keep_alive: 0 }),
-    ]);
-    expect(mockUnloadRequests.every((req) => req.end.mock.calls.length === 1)).toBe(true);
+        return ok();
+      },
+      (calls) => {
+        const { unloadOllamaModels } = require(modulePath);
+        unloadOllamaModels();
+
+        expect(calls).toHaveLength(3);
+
+        expect(calls[0].command).toBe("curl");
+        expect(calls[0].args).toContain("--max-time");
+        expect(calls[0].args[calls[0].args.length - 1]).toMatch(/\/api\/ps$/);
+
+        expect(calls[1].command).toBe("curl");
+        expect(calls[1].args).toContain("-X");
+        expect(calls[1].args).toContain("POST");
+        expect(calls[1].args).toContain(JSON.stringify({ model: "llama3.1:8b", keep_alive: 0 }));
+        expect(calls[1].args[calls[1].args.length - 1]).toMatch(/\/api\/generate$/);
+
+        expect(calls[2].args).toContain(JSON.stringify({ model: "qwen:7b", keep_alive: 0 }));
+      },
+    );
+  });
 
-    httpGetSpy.mockRestore();
-    httpRequestSpy.mockRestore();
+  it("returns silently when /api/ps fails (Ollama not running)", () => {
+    withMockedSpawnSync(
+      () => fail(),
+      (calls) => {
+        const { unloadOllamaModels } = require(modulePath);
+        expect(() => unloadOllamaModels()).not.toThrow();
+        expect(calls).toHaveLength(1);
+        expect(calls[0].args[calls[0].args.length - 1]).toMatch(/\/api\/ps$/);
+      },
+    );
   });
 
-  it("handles errors gracefully when Ollama is not running", () => {
-    const mockGetRequest = {
-      on: vi.fn((event, handler) => {
-        if (event === "error") {
-          handler(new Error("Connection refused"));
+  it("does not unload anything when Ollama reports no loaded models", () => {
+    withMockedSpawnSync(
+      ({ args }) => {
+        if (args.some((a) => a.endsWith("/api/ps"))) {
+          return ok(JSON.stringify({ models: [] }));
         }
-        return mockGetRequest;
-      }),
-    };
-
-    const httpGetSpy = vi.spyOn(http, "get").mockImplementation((() => mockGetRequest) as any);
-
-    expect(() => unloadOllamaModels()).not.toThrow();
-    expect(httpGetSpy).toHaveBeenCalledTimes(1);
-
-    httpGetSpy.mockRestore();
+        return ok();
+      },
+      (calls) => {
+        const { unloadOllamaModels } = require(modulePath);
+        unloadOllamaModels();
+        expect(calls).toHaveLength(1);
+      },
+    );
   });
 
-  it("does not unload anything when Ollama reports no loaded models", async () => {
-    const mockModels = { models: [] };
-
-    const mockResponse = {
-      statusCode: 200,
-      on: vi.fn((event, handler) => {
-        if (event === "data") {
-          handler(JSON.stringify(mockModels));
-        } else if (event === "end") {
-          handler();
+  it("ignores malformed JSON from /api/ps without throwing", () => {
+    withMockedSpawnSync(
+      ({ args }) => {
+        if (args.some((a) => a.endsWith("/api/ps"))) {
+          return ok("not-json");
         }
-        return mockResponse;
-      }),
-    };
-
-    const mockGetRequest = {
-      on: vi.fn(() => mockGetRequest),
-    };
-
-    const httpGetSpy = vi.spyOn(http, "get").mockImplementation(((_options: any, callback: any) => {
-      callback(mockResponse);
-      return mockGetRequest;
-    }) as any);
-
-    const httpRequestSpy = vi.spyOn(http, "request");
-
-    unloadOllamaModels();
-
-    await new Promise((resolve) => setTimeout(resolve, 50));
-
-    expect(httpGetSpy).toHaveBeenCalledTimes(1);
-    expect(httpRequestSpy).not.toHaveBeenCalled();
-
-    httpGetSpy.mockRestore();
-    httpRequestSpy.mockRestore();
+        return ok();
+      },
+      (calls) => {
+        const { unloadOllamaModels } = require(modulePath);
+        expect(() => unloadOllamaModels()).not.toThrow();
+        expect(calls).toHaveLength(1);
+      },
+    );
   });
 });