Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
87 changes: 60 additions & 27 deletions src/lib/inference/local.ts
Original file line number Diff line number Diff line change
Expand Up @@ -930,6 +930,7 @@ export function validateOllamaModel(
runCaptureImpl?: RunCaptureFn,
isSparkImpl?: () => boolean,
runCaptureExImpl?: RunCaptureExFn,
options: { allowToolsIncompatible?: boolean } = {},
): ValidationResult {
const capture = runCaptureImpl ?? runCapture;
const captureEx = runCaptureExImpl ?? runCaptureEx;
Expand Down Expand Up @@ -959,37 +960,46 @@ export function validateOllamaModel(
if (parsed && typeof parsed.error === "string" && parsed.error.trim()) {
const errText = parsed.error.trim();
if (/does not support tools/i.test(errText)) {
return {
ok: false,
message:
`Selected Ollama model '${model}' does not support tool calling, which ` +
`NemoClaw agents require. Run \`ollama show <model>\` to inspect a ` +
`model's capabilities and pick one whose list includes 'tools'.`,
};
}
// Ollama checks available RAM instead of total; false positive on DGX Spark
// unified-memory hosts where GPU and CPU share the same 128 GB pool. (#3251)
const memMatch = errText.match(
/model requires more system memory \(([0-9.]+)\s*GiB\) than is available \([0-9.]+\s*GiB\)/i,
);
if (memMatch && sparkHost) {
const requiresGiB = parseFloat(memMatch[1]);
const freeOut = capture(["free", "-m"], { ignoreError: true });
if (freeOut) {
const memLine = freeOut.split("\n").find((l: string) => l.includes("Mem:"));
if (memLine) {
const totalMB = parseInt(memLine.trim().split(/\s+/)[1], 10) || 0;
const totalGiB = totalMB / 1024;
if (totalGiB >= requiresGiB) {
return { ok: true };
if (options.allowToolsIncompatible !== true) {
return {
ok: false,
message:
`Selected Ollama model '${model}' does not support tool calling, which ` +
`NemoClaw agents require. Run \`ollama show <model>\` to inspect a ` +
`model's capabilities and pick one whose list includes 'tools'.`,
};
}
Comment thread
coderabbitai[bot] marked this conversation as resolved.
// Override accepted — log and fall through to the Spark CPU-only
// runtime check below so it still enforces. (#4241)
console.warn(
` ⚠ Ollama model '${model}' confirmed not to support tools; ` +
`continuing because the no-tools override was accepted.`,
);
} else {
// Ollama checks available RAM instead of total; false positive on DGX Spark
// unified-memory hosts where GPU and CPU share the same 128 GB pool. (#3251)
const memMatch = errText.match(
/model requires more system memory \(([0-9.]+)\s*GiB\) than is available \([0-9.]+\s*GiB\)/i,
);
if (memMatch && sparkHost) {
const requiresGiB = parseFloat(memMatch[1]);
const freeOut = capture(["free", "-m"], { ignoreError: true });
if (freeOut) {
const memLine = freeOut.split("\n").find((l: string) => l.includes("Mem:"));
if (memLine) {
const totalMB = parseInt(memLine.trim().split(/\s+/)[1], 10) || 0;
const totalGiB = totalMB / 1024;
if (totalGiB >= requiresGiB) {
return { ok: true };
}
}
}
}
return {
ok: false,
message: `Selected Ollama model '${model}' failed the local probe: ${errText}`,
};
}
return {
ok: false,
message: `Selected Ollama model '${model}' failed the local probe: ${errText}`,
};
}
} catch {
/* ignored */
Expand All @@ -1008,6 +1018,29 @@ export function validateOllamaModel(
return { ok: true };
}

// Helpers for threading the user's "use this no-tools Ollama model anyway"
// override (see #4241) through onboard validators so they don't loop the
// wizard back to model selection after the user already accepted.

export function buildOllamaProbeOptions(allowToolsIncompatible: boolean): {
skipResponsesProbe: true;
requireChatCompletionsToolCalling: boolean;
allowHostDockerInternal: boolean;
} {
return {
skipResponsesProbe: true,
requireChatCompletionsToolCalling: !allowToolsIncompatible,
allowHostDockerInternal: getResolvedOllamaHost() === OLLAMA_HOST_DOCKER_INTERNAL,
};
}

export function validateOllamaModelWithToolsOverride(
model: string,
allowToolsIncompatible: boolean,
): ValidationResult {
return validateOllamaModel(model, undefined, undefined, undefined, { allowToolsIncompatible });
}

// ─── Tools-capability probe (issue #2667) ─────────────────────────
//
// Ollama exposes a model's declared capabilities via /api/show. Tool calling
Expand Down
23 changes: 17 additions & 6 deletions src/lib/inference/ollama/proxy.ts
Original file line number Diff line number Diff line change
Expand Up @@ -688,7 +688,7 @@ function printToolsIncompatibleWarning(model: string): void {

async function checkOllamaModelToolSupport(
model: string,
): Promise<{ ok: boolean; message?: string }> {
): Promise<{ ok: boolean; message?: string; allowToolsIncompatible?: boolean }> {
const caps = probeOllamaModelCapabilities(model);

if (caps.supportsTools === true) {
Expand All @@ -705,19 +705,23 @@ async function checkOllamaModelToolSupport(
}

// supportsTools === false — model is on disk but advertises no tools support.
// Every code path below that returns ok:true must also set
// allowToolsIncompatible:true so downstream validators (validateOllamaModel,
// probeChatCompletionsToolCalling via setupOllama / setupInference) don't
// reject the same model on the same condition — see issue #4241.
printToolsIncompatibleWarning(model);

if (isProxyAutoYes()) {
console.log(" Continuing because --yes was passed.");
return { ok: true };
return { ok: true, allowToolsIncompatible: true };
}

if (isProxyNonInteractive()) {
if (process.env.NEMOCLAW_OLLAMA_REQUIRE_TOOLS === "0") {
console.error(
` NEMOCLAW_OLLAMA_REQUIRE_TOOLS=0 set — proceeding with '${model}' despite missing 'tools'.`,
);
return { ok: true };
return { ok: true, allowToolsIncompatible: true };
}
console.error(
" Re-run with NEMOCLAW_OLLAMA_REQUIRE_TOOLS=0 to override, or pick a tools-capable model.",
Expand All @@ -729,10 +733,13 @@ async function checkOllamaModelToolSupport(
if (!proceed) {
return { ok: false, message: "Choose a tools-capable model." };
}
return { ok: true };
return { ok: true, allowToolsIncompatible: true };
}

async function prepareOllamaModel(model, installedModels = []) {
async function prepareOllamaModel(
model,
installedModels: string[] = [],
): Promise<{ ok: boolean; message?: string; allowToolsIncompatible?: boolean }> {
const alreadyInstalled = installedModels.includes(model);
if (!alreadyInstalled) {
console.log(` Pulling Ollama model: ${model}`);
Expand All @@ -753,7 +760,11 @@ async function prepareOllamaModel(model, installedModels = []) {

console.log(` Loading Ollama model: ${model}`);
run(getOllamaWarmupCommand(model), { ignoreError: true });
return validateOllamaModel(model);
const allowToolsIncompatible = capCheck.allowToolsIncompatible === true;
const result = validateOllamaModel(model, undefined, undefined, undefined, {
allowToolsIncompatible,
});
return { ...result, allowToolsIncompatible };
}

/**
Expand Down
24 changes: 12 additions & 12 deletions src/lib/onboard.ts
Original file line number Diff line number Diff line change
Expand Up @@ -4107,7 +4107,7 @@ const { readLiveInference, readRecordedProvider, readRecordedNimContainer, readR
});

type OllamaModelSelectionOutcome =
| { outcome: "selected"; model: string }
| { outcome: "selected"; model: string; allowToolsIncompatible: boolean }
| { outcome: "back-to-selection" };
// Pick an Ollama model, pull it if missing, and validate it via the local
// proxy. Shared by the three Ollama provider branches (running, Windows-host
Expand Down Expand Up @@ -4169,6 +4169,7 @@ async function selectAndValidateOllamaModel(
console.log("");
continue;
}
const allowToolsIncompatible = probe.allowToolsIncompatible === true;
const validationBaseUrl = getLocalProviderValidationBaseUrl(provider);
if (!validationBaseUrl)
abortNonInteractive("Local Ollama validation URL could not be determined.");
Expand All @@ -4179,12 +4180,7 @@ async function selectAndValidateOllamaModel(
null,
"Choose a different Ollama model or select Other.",
null,
{
skipResponsesProbe: true,
requireChatCompletionsToolCalling: process.env.NEMOCLAW_OLLAMA_REQUIRE_TOOLS !== "0",
allowHostDockerInternal:
localInference.getResolvedOllamaHost() === OLLAMA_HOST_DOCKER_INTERNAL,
},
localInference.buildOllamaProbeOptions(allowToolsIncompatible),
);
if (validation.retry === "selection") return { outcome: "back-to-selection" };
if (!validation.ok) {
Expand All @@ -4199,7 +4195,7 @@ async function selectAndValidateOllamaModel(
);
}
localInference.applyOllamaRuntimeContextWindow(selectedModel);
return { outcome: "selected", model: selectedModel };
return { outcome: "selected", model: selectedModel, allowToolsIncompatible };
}
}

Expand All @@ -4216,6 +4212,7 @@ async function setupNim(
hermesToolGateways: string[];
preferredInferenceApi: string | null;
nimContainer: string | null;
allowToolsIncompatible: boolean;
}> {
step(3, 8, "Configuring inference provider");

Expand All @@ -4227,6 +4224,7 @@ async function setupNim(
let hermesAuthMethod: HermesAuthMethod | null = null;
let hermesToolGateways: string[] = [];
let preferredInferenceApi: string | null = null;
let allowToolsIncompatible = false;

// Detect local inference options. Bound curl with --connect-timeout/--max-time
// so a half-open port or stalled listener cannot hang the onboard at step 3
Expand Down Expand Up @@ -5139,7 +5137,7 @@ async function setupNim(
recoveredModel: recoveredFromSandbox ? recoveredModel : null,
});
if (result.outcome === "back-to-selection") continue selectionLoop;
model = result.model;
({ model, allowToolsIncompatible } = result);
preferredInferenceApi = "openai-completions";
}
break;
Expand Down Expand Up @@ -5225,7 +5223,7 @@ async function setupNim(
resetOllamaHostCache();
continue selectionLoop;
}
model = result.model;
({ model, allowToolsIncompatible } = result);
preferredInferenceApi = "openai-completions";
}
break;
Expand Down Expand Up @@ -5267,7 +5265,7 @@ async function setupNim(
recoveredModel: recoveredFromSandbox ? recoveredModel : null,
});
if (result.outcome === "back-to-selection") continue selectionLoop;
model = result.model;
({ model, allowToolsIncompatible } = result);
preferredInferenceApi = "openai-completions";
}
break;
Expand Down Expand Up @@ -5425,6 +5423,7 @@ async function setupNim(
hermesToolGateways,
preferredInferenceApi,
nimContainer,
allowToolsIncompatible,
};
}

Expand All @@ -5438,6 +5437,7 @@ async function setupInference(
credentialEnv: string | null = null,
hermesAuthMethod: HermesAuthMethod | string | null = null,
hermesToolGateways: string[] = [],
options: { allowToolsIncompatible?: boolean } = {},
): Promise<{ ok: true; retry?: undefined } | { retry: "selection" }> {
step(4, 8, "Setting up inference provider");
runOpenshell(["gateway", "select", GATEWAY_NAME], { ignoreError: true });
Expand Down Expand Up @@ -5742,7 +5742,7 @@ async function setupInference(
if (await applyLocalInferenceRoute("ollama-local", model)) return { retry: "selection" };
console.log(` Priming Ollama model: ${model}`);
run(getOllamaWarmupCommand(model), { ignoreError: true });
const probe = validateOllamaModel(model);
const probe = localInference.validateOllamaModelWithToolsOverride(model, options.allowToolsIncompatible === true);
if (!probe.ok) {
console.error(` ${probe.message}`);
process.exit(1);
Expand Down
32 changes: 32 additions & 0 deletions src/lib/onboard/machine/handlers/provider-inference.test.ts
Original file line number Diff line number Diff line change
Expand Up @@ -148,6 +148,7 @@ describe("handleProviderInferenceState", () => {
"NVIDIA_API_KEY",
null,
[],
{ allowToolsIncompatible: false },
);
expect(calls.deleteEnv).toHaveBeenCalledWith("NVIDIA_API_KEY");
expect(result).toMatchObject({
Expand Down Expand Up @@ -311,6 +312,7 @@ describe("handleProviderInferenceState", () => {
"COMPATIBLE_API_KEY",
null,
[],
{ allowToolsIncompatible: false },
);
});

Expand Down Expand Up @@ -358,4 +360,34 @@ describe("handleProviderInferenceState", () => {
expect(calls.exit).toHaveBeenCalledWith(0);
expect(calls.setupInference).not.toHaveBeenCalled();
});

// Regression: #4241. When the provider selection step accepted a no-tools
// Ollama model (the user answered "yes" to the override prompt or
// NEMOCLAW_OLLAMA_REQUIRE_TOOLS=0 was set), the same flag must reach
// setupInference so the second validateOllamaModel pass does not reject the
// model on the same condition and bounce the user back to model selection.
it("forwards allowToolsIncompatible from provider selection into setupInference (#4241)", async () => {
const setupNim = vi.fn(async () => ({
...baseSelection,
provider: "ollama-local",
model: "tinyllama:1.1b",
endpointUrl: "http://127.0.0.1:11434/v1",
credentialEnv: null,
allowToolsIncompatible: true,
}));
const { deps, calls } = createDeps({ setupNim });

await handleProviderInferenceState(baseOptions(deps));

expect(calls.setupInference).toHaveBeenCalledWith(
"my-assistant",
"tinyllama:1.1b",
"ollama-local",
"http://127.0.0.1:11434/v1",
null,
null,
[],
{ allowToolsIncompatible: true },
);
});
});
6 changes: 6 additions & 0 deletions src/lib/onboard/machine/handlers/provider-inference.ts
Original file line number Diff line number Diff line change
Expand Up @@ -16,6 +16,7 @@ export interface ProviderSelectionResult {
hermesToolGateways: string[];
preferredInferenceApi: string | null;
nimContainer: string | null;
allowToolsIncompatible?: boolean;
}

export interface ProviderInferenceStateOptions<Gpu, Agent, Host> {
Expand Down Expand Up @@ -54,6 +55,7 @@ export interface ProviderInferenceStateOptions<Gpu, Agent, Host> {
credentialEnv: string | null,
hermesAuthMethod: string | null,
hermesToolGateways: string[],
options?: { allowToolsIncompatible?: boolean },
): Promise<ProviderInferenceRetry>;
startRecordedStep(
stepName: string,
Expand Down Expand Up @@ -166,6 +168,7 @@ export async function handleProviderInferenceState<Gpu, Agent, Host>({
let nimContainer = initial.nimContainer;
const webSearchConfig = initial.webSearchConfig;
let forceProviderSelection = initialForceProviderSelection;
let allowToolsIncompatible = false;

while (true) {
let forceInferenceSetup = false;
Expand Down Expand Up @@ -225,6 +228,7 @@ export async function handleProviderInferenceState<Gpu, Agent, Host>({
hermesToolGateways = selection.hermesToolGateways;
preferredInferenceApi = selection.preferredInferenceApi;
nimContainer = selection.nimContainer;
allowToolsIncompatible = selection.allowToolsIncompatible === true;
shouldRecordProviderSelection = true;
}

Expand Down Expand Up @@ -277,6 +281,7 @@ export async function handleProviderInferenceState<Gpu, Agent, Host>({
credentialEnv,
hermesAuthMethod,
hermesToolGateways,
{ allowToolsIncompatible },
),
);
} finally {
Expand Down Expand Up @@ -360,6 +365,7 @@ export async function handleProviderInferenceState<Gpu, Agent, Host>({
credentialEnv,
hermesAuthMethod,
hermesToolGateways,
{ allowToolsIncompatible },
),
);
} finally {
Expand Down
Loading
Loading