NVIDIA · cv · May 29, 2026 · May 28, 2026 · May 28, 2026 · May 28, 2026
diff --git a/src/lib/inference/local.test.ts b/src/lib/inference/local.test.ts
@@ -34,6 +34,7 @@ import {
   getOllamaModelOptions,
   getOllamaProbeCommand,
   getOllamaWarmupCommand,
+  isOllamaRunnerCrash,
   parseOllamaList,
   parseOllamaTags,
   probeLocalProviderHealth,
@@ -984,6 +985,46 @@ describe("local inference helpers", () => {
     expect(result.message).toMatch(/did not answer the local probe in time/);
   });
 
+  it("flags runner-crash error payloads as a daemon failure (#4365)", () => {
+    // Issue #4365: when Ollama's model runner crashes ("model runner has
+    // unexpectedly stopped"), surface daemonFailure so the wizard escapes the
+    // Ollama-model inner loop instead of asking for another tag.
+    const crashSamples = [
+      "model runner has unexpectedly stopped, this may be due to resource limitations or an internal error",
+      "llama runner process has terminated: exit status 134",
+      "model runner crashed",
+      "Ollama runner process exited unexpectedly",
+      "runner died: signal 9",
+      "runner killed",
+    ];
+    for (const errText of crashSamples) {
+      expect(isOllamaRunnerCrash(errText)).toBe(true);
+      const payload = JSON.stringify({ error: errText });
+      const captureEx = () => ({ stdout: payload, exitCode: 0, timedOut: false });
+      const result = validateOllamaModel("nemotron-3-nano:30b", () => payload, undefined, captureEx);
+      expect(result.ok).toBe(false);
+      expect(result.daemonFailure).toBe(true);
+    }
+  });
+
+  it("does not flag model-fit / generic errors as a daemon failure (#4365)", () => {
+    expect(isOllamaRunnerCrash("model requires more system memory")).toBe(false);
+    expect(isOllamaRunnerCrash("model 'foo:latest' not found")).toBe(false);
+    expect(isOllamaRunnerCrash("")).toBe(false);
+    expect(isOllamaRunnerCrash(null)).toBe(false);
+    expect(isOllamaRunnerCrash(undefined)).toBe(false);
+    const payload = JSON.stringify({ error: "model requires more system memory" });
+    const captureEx = () => ({ stdout: payload, exitCode: 0, timedOut: false });
+    const result = validateOllamaModel(
+      "gabegoodhart/minimax-m2.1:latest",
+      () => payload,
+      () => false,
+      captureEx,
+    );
+    expect(result.ok).toBe(false);
+    expect(result.daemonFailure).toBeUndefined();
+  });
+
   it("passes when first probe times out then retry returns OOM error but total RAM is sufficient", () => {
     // Composite: mode 2 (first probe timeout) + mode 1 (retry returns OOM error).
     const freeOutput = "               total        used        free\nMem:          131072       120000       1000";

diff --git a/src/lib/inference/local.ts b/src/lib/inference/local.ts
@@ -175,6 +175,26 @@ export interface ValidationResult {
   ok: boolean;
   message?: string;
   diagnostic?: string;
+  /**
+   * Set when the failure points at the Ollama daemon / model runner itself,
+   * not the chosen model. Callers escape the Ollama-model loop instead of
+   * asking for another tag that would hit the same failure. (#4365)
+   */
+  daemonFailure?: boolean;
+}
+
+/**
+ * Recognises Ollama probe errors that mean the daemon's model runner crashed,
+ * stopped, or otherwise died (rather than the chosen model being unsuitable).
+ * Picking a different model would loop on the same failure, so the wizard
+ * escapes back to provider selection. (#4365)
+ */
+export function isOllamaRunnerCrash(errText: string | null | undefined): boolean {
+  const text = String(errText || "");
+  if (!text) return false;
+  return /\brunner\b[\s\S]{0,80}\b(?:stopped|terminated|crashed|exited|died|killed)\b/i.test(
+    text,
+  );
 }
 
 export interface LocalProviderHealthStatus {
@@ -994,6 +1014,7 @@ export function validateOllamaModel(
         return {
           ok: false,
           message: `Selected Ollama model '${model}' failed the local probe: ${errText}`,
+          ...(isOllamaRunnerCrash(errText) ? { daemonFailure: true } : {}),
         };
       }
     }

diff --git a/src/lib/inference/ollama/proxy.ts b/src/lib/inference/ollama/proxy.ts
@@ -739,7 +739,12 @@ async function checkOllamaModelToolSupport(
 async function prepareOllamaModel(
   model,
   installedModels: string[] = [],
-): Promise<{ ok: boolean; message?: string; allowToolsIncompatible?: boolean }> {
+): Promise<{
+  ok: boolean;
+  message?: string;
+  allowToolsIncompatible?: boolean;
+  daemonFailure?: boolean;
+}> {
   const alreadyInstalled = installedModels.includes(model);
   if (!alreadyInstalled) {
     console.log(`  Pulling Ollama model: ${model}`);

diff --git a/src/lib/onboard.ts b/src/lib/onboard.ts
@@ -526,6 +526,7 @@ import {
   resolveQrSelectedChannels,
 } from "./onboard/messaging-state";
 import { getValidatedMessagingToken, getValidatedMessagingTokenByEnvKey } from "./onboard/messaging-token";
+import { handleOllamaProbeFailure } from "./onboard/ollama-probe-failure";
 import { runOllamaStartupOrGate } from "./onboard/ollama-startup";
 import type {
   DockerDriverBinaryOverrides,
@@ -3935,10 +3936,8 @@ async function selectAndValidateOllamaModel(
     }
     const probe = await prepareOllamaModel(selectedModel, installedModels);
     if (!probe.ok) {
-      console.error(`  ${probe.message}`);
-      if (isNonInteractive()) abortNonInteractive(`Ollama model '${selectedModel}' unavailable.`);
-      console.log("  Choose a different Ollama model or select Other.");
-      console.log("");
+      const action = handleOllamaProbeFailure(probe, selectedModel, isNonInteractive);
+      if (action === "back-to-selection") return { outcome: "back-to-selection" };
       continue;
     }
     const allowToolsIncompatible = probe.allowToolsIncompatible === true;

diff --git a/src/lib/onboard/ollama-probe-failure.test.ts b/src/lib/onboard/ollama-probe-failure.test.ts
@@ -0,0 +1,171 @@
+// SPDX-FileCopyrightText: Copyright (c) 2026 NVIDIA CORPORATION & AFFILIATES. All rights reserved.
+// SPDX-License-Identifier: Apache-2.0
+
+// Issue #4365: focused unit tests for the Ollama probe-failure dispatcher.
+// Mirrors the four branches handleOllamaProbeFailure picks between: pinned-
+// provider exit, non-interactive abort, interactive daemon escape, and the
+// non-daemon "choose another model" continue path.
+
+import { beforeEach, describe, expect, it, vi } from "vitest";
+
+import { handleOllamaProbeFailure } from "../../../dist/lib/onboard/ollama-probe-failure";
+
+describe("handleOllamaProbeFailure (#4365)", () => {
+  let originalProvider: string | undefined;
+  let originalNonInteractive: string | undefined;
+
+  beforeEach(() => {
+    originalProvider = process.env.NEMOCLAW_PROVIDER;
+    originalNonInteractive = process.env.NEMOCLAW_NON_INTERACTIVE;
+  });
+
+  function restore() {
+    if (originalProvider === undefined) delete process.env.NEMOCLAW_PROVIDER;
+    else process.env.NEMOCLAW_PROVIDER = originalProvider;
+    if (originalNonInteractive === undefined) delete process.env.NEMOCLAW_NON_INTERACTIVE;
+    else process.env.NEMOCLAW_NON_INTERACTIVE = originalNonInteractive;
+  }
+
+  it("exits when a pinned Ollama provider hits a daemon failure", () => {
+    process.env.NEMOCLAW_PROVIDER = "ollama";
+    const errSpy = vi.spyOn(console, "error").mockImplementation(() => {});
+    const logSpy = vi.spyOn(console, "log").mockImplementation(() => {});
+    const exitSpy = vi.spyOn(process, "exit").mockImplementation(((code?: number) => {
+      throw new Error(`process.exit:${code ?? 0}`);
+    }) as never);
+
+    try {
+      expect(() =>
+        handleOllamaProbeFailure(
+          { ok: false, message: "runner crashed", daemonFailure: true },
+          "nemotron-3-nano:30b",
+          () => false,
+        ),
+      ).toThrow(/process\.exit:1/);
+      const errLines = errSpy.mock.calls.map((c) => String(c[0]));
+      expect(
+        errLines.some((l) =>
+          l.includes("NEMOCLAW_PROVIDER pins onboarding to Ollama but the Ollama model runner is unhealthy"),
+        ),
+      ).toBe(true);
+    } finally {
+      errSpy.mockRestore();
+      logSpy.mockRestore();
+      exitSpy.mockRestore();
+      restore();
+    }
+  });
+
+  it("aborts non-interactive runs on a daemon failure", () => {
+    delete process.env.NEMOCLAW_PROVIDER;
+    const errSpy = vi.spyOn(console, "error").mockImplementation(() => {});
+    const logSpy = vi.spyOn(console, "log").mockImplementation(() => {});
+    const exitSpy = vi.spyOn(process, "exit").mockImplementation(((code?: number) => {
+      throw new Error(`process.exit:${code ?? 0}`);
+    }) as never);
+
+    try {
+      expect(() =>
+        handleOllamaProbeFailure(
+          { ok: false, message: "runner died", daemonFailure: true },
+          "nemotron-3-nano:30b",
+          () => true,
+        ),
+      ).toThrow(/process\.exit:1/);
+      const errLines = errSpy.mock.calls.map((c) => String(c[0]));
+      expect(
+        errLines.some((l) => l.includes("Aborting: Ollama daemon is unhealthy")),
+      ).toBe(true);
+    } finally {
+      errSpy.mockRestore();
+      logSpy.mockRestore();
+      exitSpy.mockRestore();
+      restore();
+    }
+  });
+
+  it("returns 'back-to-selection' with an escape hint for interactive non-pinned daemon failures", () => {
+    delete process.env.NEMOCLAW_PROVIDER;
+    const errSpy = vi.spyOn(console, "error").mockImplementation(() => {});
+    const logSpy = vi.spyOn(console, "log").mockImplementation(() => {});
+
+    try {
+      const action = handleOllamaProbeFailure(
+        { ok: false, message: "model runner has unexpectedly stopped", daemonFailure: true },
+        "qwen2.5:7b",
+        () => false,
+      );
+      expect(action).toBe("back-to-selection");
+      const logLines = logSpy.mock.calls.map((c) => String(c[0]));
+      expect(
+        logLines.some((l) =>
+          l.includes("Ollama itself appears unavailable"),
+        ),
+      ).toBe(true);
+      expect(
+        logLines.some((l) =>
+          l.includes("Returning to provider selection; choose a non-Ollama provider"),
+        ),
+      ).toBe(true);
+    } finally {
+      errSpy.mockRestore();
+      logSpy.mockRestore();
+      restore();
+    }
+  });
+
+  it("returns 'continue' on a model-level failure (no daemonFailure flag)", () => {
+    delete process.env.NEMOCLAW_PROVIDER;
+    const errSpy = vi.spyOn(console, "error").mockImplementation(() => {});
+    const logSpy = vi.spyOn(console, "log").mockImplementation(() => {});
+
+    try {
+      const action = handleOllamaProbeFailure(
+        { ok: false, message: "model requires more system memory" },
+        "qwen2.5:7b",
+        () => false,
+      );
+      expect(action).toBe("continue");
+      const logLines = logSpy.mock.calls.map((c) => String(c[0]));
+      expect(
+        logLines.some((l) => l.includes("Choose a different Ollama model")),
+      ).toBe(true);
+      // Daemon-escape hint MUST NOT appear in the non-daemon path.
+      expect(
+        logLines.some((l) => l.includes("Ollama itself appears unavailable")),
+      ).toBe(false);
+    } finally {
+      errSpy.mockRestore();
+      logSpy.mockRestore();
+      restore();
+    }
+  });
+
+  it("aborts non-interactive model-level failures via the legacy message", () => {
+    delete process.env.NEMOCLAW_PROVIDER;
+    const errSpy = vi.spyOn(console, "error").mockImplementation(() => {});
+    const logSpy = vi.spyOn(console, "log").mockImplementation(() => {});
+    const exitSpy = vi.spyOn(process, "exit").mockImplementation(((code?: number) => {
+      throw new Error(`process.exit:${code ?? 0}`);
+    }) as never);
+
+    try {
+      expect(() =>
+        handleOllamaProbeFailure(
+          { ok: false, message: "model requires more system memory" },
+          "qwen2.5:7b",
+          () => true,
+        ),
+      ).toThrow(/process\.exit:1/);
+      const errLines = errSpy.mock.calls.map((c) => String(c[0]));
+      expect(
+        errLines.some((l) => l.includes("Aborting: Ollama model 'qwen2.5:7b' unavailable")),
+      ).toBe(true);
+    } finally {
+      errSpy.mockRestore();
+      logSpy.mockRestore();
+      exitSpy.mockRestore();
+      restore();
+    }
+  });
+});
diff --git a/src/lib/onboard/ollama-probe-failure.ts b/src/lib/onboard/ollama-probe-failure.ts
@@ -0,0 +1,60 @@
+// SPDX-FileCopyrightText: Copyright (c) 2026 NVIDIA CORPORATION & AFFILIATES. All rights reserved.
+// SPDX-License-Identifier: Apache-2.0
+
+import { abortNonInteractive } from "./non-interactive-abort";
+import { isOllamaProviderPinned } from "./ollama-startup";
+
+export interface OllamaProbeFailureInput {
+  ok: boolean;
+  message?: string;
+  daemonFailure?: boolean;
+}
+
+export type OllamaProbeFailureAction = "back-to-selection" | "continue";
+
+/**
+ * Centralizes selectAndValidateOllamaModel's reaction to a failed Ollama
+ * probe. Lives outside onboard.ts so the codebase growth guardrail stays
+ * green and so the sequence has a focused test surface. (#4365)
+ *
+ * - daemonFailure → the Ollama daemon / runner itself is broken. Pinned-
+ *   provider runs exit, non-interactive runs abort, interactive runs escape
+ *   to provider selection (picking another Ollama tag would loop on the
+ *   same failure).
+ * - otherwise → the chosen model is unsuitable. Non-interactive runs
+ *   abort; interactive runs continue to the next inner-loop prompt for a
+ *   different Ollama tag (existing behavior).
+ */
+export function handleOllamaProbeFailure(
+  probe: OllamaProbeFailureInput,
+  selectedModel: string,
+  isNonInteractive: () => boolean,
+): OllamaProbeFailureAction {
+  console.error(`  ${probe.message}`);
+  if (probe.daemonFailure) {
+    if (isOllamaProviderPinned()) {
+      console.error(
+        "  NEMOCLAW_PROVIDER pins onboarding to Ollama but the Ollama model runner is unhealthy; refusing to loop on Ollama model selection.",
+      );
+      process.exit(1);
+    }
+    if (isNonInteractive()) {
+      abortNonInteractive(
+        `Ollama daemon is unhealthy for model '${selectedModel}'.`,
+        "Pick a non-Ollama provider, restart Ollama, or rerun with NEMOCLAW_PROVIDER set explicitly.",
+      );
+    }
+    console.log(
+      "  Ollama itself appears unavailable — selecting a different Ollama model would hit the same failure.",
+    );
+    console.log(
+      "  Returning to provider selection; choose a non-Ollama provider to continue. (#4365)",
+    );
+    console.log("");
+    return "back-to-selection";
+  }
+  if (isNonInteractive()) abortNonInteractive(`Ollama model '${selectedModel}' unavailable.`);
+  console.log("  Choose a different Ollama model or select Other.");
+  console.log("");
+  return "continue";
+}