Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
41 changes: 41 additions & 0 deletions src/lib/inference/local.test.ts
Original file line number Diff line number Diff line change
Expand Up @@ -34,6 +34,7 @@ import {
getOllamaModelOptions,
getOllamaProbeCommand,
getOllamaWarmupCommand,
isOllamaRunnerCrash,
parseOllamaList,
parseOllamaTags,
probeLocalProviderHealth,
Expand Down Expand Up @@ -984,6 +985,46 @@ describe("local inference helpers", () => {
expect(result.message).toMatch(/did not answer the local probe in time/);
});

it("flags runner-crash error payloads as a daemon failure (#4365)", () => {
// Issue #4365: when Ollama's model runner crashes ("model runner has
// unexpectedly stopped"), surface daemonFailure so the wizard escapes the
// Ollama-model inner loop instead of asking for another tag.
const crashSamples = [
"model runner has unexpectedly stopped, this may be due to resource limitations or an internal error",
"llama runner process has terminated: exit status 134",
"model runner crashed",
"Ollama runner process exited unexpectedly",
"runner died: signal 9",
"runner killed",
];
for (const errText of crashSamples) {
expect(isOllamaRunnerCrash(errText)).toBe(true);
const payload = JSON.stringify({ error: errText });
const captureEx = () => ({ stdout: payload, exitCode: 0, timedOut: false });
const result = validateOllamaModel("nemotron-3-nano:30b", () => payload, undefined, captureEx);
expect(result.ok).toBe(false);
expect(result.daemonFailure).toBe(true);
}
});

it("does not flag model-fit / generic errors as a daemon failure (#4365)", () => {
expect(isOllamaRunnerCrash("model requires more system memory")).toBe(false);
expect(isOllamaRunnerCrash("model 'foo:latest' not found")).toBe(false);
expect(isOllamaRunnerCrash("")).toBe(false);
expect(isOllamaRunnerCrash(null)).toBe(false);
expect(isOllamaRunnerCrash(undefined)).toBe(false);
const payload = JSON.stringify({ error: "model requires more system memory" });
const captureEx = () => ({ stdout: payload, exitCode: 0, timedOut: false });
const result = validateOllamaModel(
"gabegoodhart/minimax-m2.1:latest",
() => payload,
() => false,
captureEx,
);
expect(result.ok).toBe(false);
expect(result.daemonFailure).toBeUndefined();
});

it("passes when first probe times out then retry returns OOM error but total RAM is sufficient", () => {
// Composite: mode 2 (first probe timeout) + mode 1 (retry returns OOM error).
const freeOutput = " total used free\nMem: 131072 120000 1000";
Expand Down
21 changes: 21 additions & 0 deletions src/lib/inference/local.ts
Original file line number Diff line number Diff line change
Expand Up @@ -175,6 +175,26 @@ export interface ValidationResult {
ok: boolean;
message?: string;
diagnostic?: string;
/**
* Set when the failure points at the Ollama daemon / model runner itself,
* not the chosen model. Callers escape the Ollama-model loop instead of
* asking for another tag that would hit the same failure. (#4365)
*/
daemonFailure?: boolean;
}

/**
* Recognises Ollama probe errors that mean the daemon's model runner crashed,
* stopped, or otherwise died (rather than the chosen model being unsuitable).
* Picking a different model would loop on the same failure, so the wizard
* escapes back to provider selection. (#4365)
*/
export function isOllamaRunnerCrash(errText: string | null | undefined): boolean {
const text = String(errText || "");
if (!text) return false;
return /\brunner\b[\s\S]{0,80}\b(?:stopped|terminated|crashed|exited|died|killed)\b/i.test(
text,
);
}

export interface LocalProviderHealthStatus {
Expand Down Expand Up @@ -994,6 +1014,7 @@ export function validateOllamaModel(
return {
ok: false,
message: `Selected Ollama model '${model}' failed the local probe: ${errText}`,
...(isOllamaRunnerCrash(errText) ? { daemonFailure: true } : {}),
};
}
}
Expand Down
7 changes: 6 additions & 1 deletion src/lib/inference/ollama/proxy.ts
Original file line number Diff line number Diff line change
Expand Up @@ -739,7 +739,12 @@ async function checkOllamaModelToolSupport(
async function prepareOllamaModel(
model,
installedModels: string[] = [],
): Promise<{ ok: boolean; message?: string; allowToolsIncompatible?: boolean }> {
): Promise<{
ok: boolean;
message?: string;
allowToolsIncompatible?: boolean;
daemonFailure?: boolean;
}> {
const alreadyInstalled = installedModels.includes(model);
if (!alreadyInstalled) {
console.log(` Pulling Ollama model: ${model}`);
Expand Down
7 changes: 3 additions & 4 deletions src/lib/onboard.ts
Original file line number Diff line number Diff line change
Expand Up @@ -526,6 +526,7 @@ import {
resolveQrSelectedChannels,
} from "./onboard/messaging-state";
import { getValidatedMessagingToken, getValidatedMessagingTokenByEnvKey } from "./onboard/messaging-token";
import { handleOllamaProbeFailure } from "./onboard/ollama-probe-failure";
import { runOllamaStartupOrGate } from "./onboard/ollama-startup";
import type {
DockerDriverBinaryOverrides,
Expand Down Expand Up @@ -3935,10 +3936,8 @@ async function selectAndValidateOllamaModel(
}
const probe = await prepareOllamaModel(selectedModel, installedModels);
if (!probe.ok) {
console.error(` ${probe.message}`);
if (isNonInteractive()) abortNonInteractive(`Ollama model '${selectedModel}' unavailable.`);
console.log(" Choose a different Ollama model or select Other.");
console.log("");
const action = handleOllamaProbeFailure(probe, selectedModel, isNonInteractive);
if (action === "back-to-selection") return { outcome: "back-to-selection" };
continue;
}
const allowToolsIncompatible = probe.allowToolsIncompatible === true;
Expand Down
171 changes: 171 additions & 0 deletions src/lib/onboard/ollama-probe-failure.test.ts
Original file line number Diff line number Diff line change
@@ -0,0 +1,171 @@
// SPDX-FileCopyrightText: Copyright (c) 2026 NVIDIA CORPORATION & AFFILIATES. All rights reserved.
// SPDX-License-Identifier: Apache-2.0

// Issue #4365: focused unit tests for the Ollama probe-failure dispatcher.
// Mirrors the four branches handleOllamaProbeFailure picks between: pinned-
// provider exit, non-interactive abort, interactive daemon escape, and the
// non-daemon "choose another model" continue path.

import { beforeEach, describe, expect, it, vi } from "vitest";

import { handleOllamaProbeFailure } from "../../../dist/lib/onboard/ollama-probe-failure";

describe("handleOllamaProbeFailure (#4365)", () => {
let originalProvider: string | undefined;
let originalNonInteractive: string | undefined;

beforeEach(() => {
originalProvider = process.env.NEMOCLAW_PROVIDER;
originalNonInteractive = process.env.NEMOCLAW_NON_INTERACTIVE;
});

function restore() {
if (originalProvider === undefined) delete process.env.NEMOCLAW_PROVIDER;
else process.env.NEMOCLAW_PROVIDER = originalProvider;
if (originalNonInteractive === undefined) delete process.env.NEMOCLAW_NON_INTERACTIVE;
else process.env.NEMOCLAW_NON_INTERACTIVE = originalNonInteractive;
}

it("exits when a pinned Ollama provider hits a daemon failure", () => {
process.env.NEMOCLAW_PROVIDER = "ollama";
const errSpy = vi.spyOn(console, "error").mockImplementation(() => {});
const logSpy = vi.spyOn(console, "log").mockImplementation(() => {});
const exitSpy = vi.spyOn(process, "exit").mockImplementation(((code?: number) => {
throw new Error(`process.exit:${code ?? 0}`);
}) as never);

try {
expect(() =>
handleOllamaProbeFailure(
{ ok: false, message: "runner crashed", daemonFailure: true },
"nemotron-3-nano:30b",
() => false,
),
).toThrow(/process\.exit:1/);
const errLines = errSpy.mock.calls.map((c) => String(c[0]));
expect(
errLines.some((l) =>
l.includes("NEMOCLAW_PROVIDER pins onboarding to Ollama but the Ollama model runner is unhealthy"),
),
).toBe(true);
} finally {
errSpy.mockRestore();
logSpy.mockRestore();
exitSpy.mockRestore();
restore();
}
});

it("aborts non-interactive runs on a daemon failure", () => {
delete process.env.NEMOCLAW_PROVIDER;
const errSpy = vi.spyOn(console, "error").mockImplementation(() => {});
const logSpy = vi.spyOn(console, "log").mockImplementation(() => {});
const exitSpy = vi.spyOn(process, "exit").mockImplementation(((code?: number) => {
throw new Error(`process.exit:${code ?? 0}`);
}) as never);

try {
expect(() =>
handleOllamaProbeFailure(
{ ok: false, message: "runner died", daemonFailure: true },
"nemotron-3-nano:30b",
() => true,
),
).toThrow(/process\.exit:1/);
const errLines = errSpy.mock.calls.map((c) => String(c[0]));
expect(
errLines.some((l) => l.includes("Aborting: Ollama daemon is unhealthy")),
).toBe(true);
} finally {
errSpy.mockRestore();
logSpy.mockRestore();
exitSpy.mockRestore();
restore();
}
});

it("returns 'back-to-selection' with an escape hint for interactive non-pinned daemon failures", () => {
delete process.env.NEMOCLAW_PROVIDER;
const errSpy = vi.spyOn(console, "error").mockImplementation(() => {});
const logSpy = vi.spyOn(console, "log").mockImplementation(() => {});

try {
const action = handleOllamaProbeFailure(
{ ok: false, message: "model runner has unexpectedly stopped", daemonFailure: true },
"qwen2.5:7b",
() => false,
);
expect(action).toBe("back-to-selection");
const logLines = logSpy.mock.calls.map((c) => String(c[0]));
expect(
logLines.some((l) =>
l.includes("Ollama itself appears unavailable"),
),
).toBe(true);
expect(
logLines.some((l) =>
l.includes("Returning to provider selection; choose a non-Ollama provider"),
),
).toBe(true);
} finally {
errSpy.mockRestore();
logSpy.mockRestore();
restore();
}
});

it("returns 'continue' on a model-level failure (no daemonFailure flag)", () => {
delete process.env.NEMOCLAW_PROVIDER;
const errSpy = vi.spyOn(console, "error").mockImplementation(() => {});
const logSpy = vi.spyOn(console, "log").mockImplementation(() => {});

try {
const action = handleOllamaProbeFailure(
{ ok: false, message: "model requires more system memory" },
"qwen2.5:7b",
() => false,
);
expect(action).toBe("continue");
const logLines = logSpy.mock.calls.map((c) => String(c[0]));
expect(
logLines.some((l) => l.includes("Choose a different Ollama model")),
).toBe(true);
// Daemon-escape hint MUST NOT appear in the non-daemon path.
expect(
logLines.some((l) => l.includes("Ollama itself appears unavailable")),
).toBe(false);
} finally {
errSpy.mockRestore();
logSpy.mockRestore();
restore();
}
});

it("aborts non-interactive model-level failures via the legacy message", () => {
delete process.env.NEMOCLAW_PROVIDER;
const errSpy = vi.spyOn(console, "error").mockImplementation(() => {});
const logSpy = vi.spyOn(console, "log").mockImplementation(() => {});
const exitSpy = vi.spyOn(process, "exit").mockImplementation(((code?: number) => {
throw new Error(`process.exit:${code ?? 0}`);
}) as never);

try {
expect(() =>
handleOllamaProbeFailure(
{ ok: false, message: "model requires more system memory" },
"qwen2.5:7b",
() => true,
),
).toThrow(/process\.exit:1/);
const errLines = errSpy.mock.calls.map((c) => String(c[0]));
expect(
errLines.some((l) => l.includes("Aborting: Ollama model 'qwen2.5:7b' unavailable")),
).toBe(true);
} finally {
errSpy.mockRestore();
logSpy.mockRestore();
exitSpy.mockRestore();
restore();
}
});
});
60 changes: 60 additions & 0 deletions src/lib/onboard/ollama-probe-failure.ts
Original file line number Diff line number Diff line change
@@ -0,0 +1,60 @@
// SPDX-FileCopyrightText: Copyright (c) 2026 NVIDIA CORPORATION & AFFILIATES. All rights reserved.
// SPDX-License-Identifier: Apache-2.0

import { abortNonInteractive } from "./non-interactive-abort";
import { isOllamaProviderPinned } from "./ollama-startup";

export interface OllamaProbeFailureInput {
ok: boolean;
message?: string;
daemonFailure?: boolean;
}

export type OllamaProbeFailureAction = "back-to-selection" | "continue";

/**
* Centralizes selectAndValidateOllamaModel's reaction to a failed Ollama
* probe. Lives outside onboard.ts so the codebase growth guardrail stays
* green and so the sequence has a focused test surface. (#4365)
*
* - daemonFailure → the Ollama daemon / runner itself is broken. Pinned-
* provider runs exit, non-interactive runs abort, interactive runs escape
* to provider selection (picking another Ollama tag would loop on the
* same failure).
* - otherwise → the chosen model is unsuitable. Non-interactive runs
* abort; interactive runs continue to the next inner-loop prompt for a
* different Ollama tag (existing behavior).
*/
export function handleOllamaProbeFailure(
probe: OllamaProbeFailureInput,
selectedModel: string,
isNonInteractive: () => boolean,
): OllamaProbeFailureAction {
console.error(` ${probe.message}`);
if (probe.daemonFailure) {
if (isOllamaProviderPinned()) {
console.error(
" NEMOCLAW_PROVIDER pins onboarding to Ollama but the Ollama model runner is unhealthy; refusing to loop on Ollama model selection.",
);
process.exit(1);
}
if (isNonInteractive()) {
abortNonInteractive(
`Ollama daemon is unhealthy for model '${selectedModel}'.`,
"Pick a non-Ollama provider, restart Ollama, or rerun with NEMOCLAW_PROVIDER set explicitly.",
);
}
console.log(
" Ollama itself appears unavailable — selecting a different Ollama model would hit the same failure.",
);
console.log(
" Returning to provider selection; choose a non-Ollama provider to continue. (#4365)",
);
console.log("");
return "back-to-selection";
}
if (isNonInteractive()) abortNonInteractive(`Ollama model '${selectedModel}' unavailable.`);
console.log(" Choose a different Ollama model or select Other.");
console.log("");
return "continue";
}
Loading
Loading