From 13fffd9a842300db365b96df1d5366f48b347e7a Mon Sep 17 00:00:00 2001
From: Yimo Jiang <yimoj@nvidia.com>
Date: Wed, 27 May 2026 03:31:11 +0000
Subject: [PATCH 1/4] fix(onboard): honor accepted no-tools Ollama override
 end-to-end (#4241)

When an Ollama model lacks the 'tools' capability, NemoClaw asks "Use this
model anyway?". Pre-fix, answering yes (or setting NEMOCLAW_YES=1 /
NEMOCLAW_OLLAMA_REQUIRE_TOOLS=0) was honored by checkOllamaModelToolSupport
but later validateOllamaModel and validateOpenAiLikeSelection still rejected
the same model on the same condition, looping the wizard back to model
selection.

Thread an explicit allowToolsIncompatible flag from
checkOllamaModelToolSupport through prepareOllamaModel,
selectAndValidateOllamaModel, setupNim, the onboard state-machine handler,
and setupInference so every downstream validator agrees on the override.

- validateOllamaModel downgrades "does not support tools" to a warning when
  the override is set; without the override it still fails early.
- selectAndValidateOllamaModel skips the strict tool-calling probe in
  validateOpenAiLikeSelection when the override is set.
- setupInference's second validateOllamaModel pass honors the same flag.

Tests: targeted regression coverage in test/ollama-tools-capability.test.ts
and src/lib/onboard/machine/handlers/provider-inference.test.ts proving
both the accepted-override path completes and the no-override path still
rejects early.

Co-Authored-By: Claude Opus 4.7 (1M context) <noreply@anthropic.com>
Signed-off-by: Yimo Jiang <yimoj@nvidia.com>
---
 src/lib/inference/local.ts                    |  11 +
 src/lib/inference/ollama/proxy.ts             |  23 ++-
 src/lib/onboard.ts                            |  18 +-
 .../handlers/provider-inference.test.ts       |  32 +++
 .../machine/handlers/provider-inference.ts    |   6 +
 test/ollama-tools-capability.test.ts          | 194 +++++++++++++++++-
 6 files changed, 268 insertions(+), 16 deletions(-)

diff --git a/src/lib/inference/local.ts b/src/lib/inference/local.ts
index 2c9f0a95f0..ebc76bed08 100644
--- a/src/lib/inference/local.ts
+++ b/src/lib/inference/local.ts
@@ -903,6 +903,7 @@ export function validateOllamaModel(
   runCaptureImpl?: RunCaptureFn,
   isSparkImpl?: () => boolean,
   runCaptureExImpl?: RunCaptureExFn,
+  options: { allowToolsIncompatible?: boolean } = {},
 ): ValidationResult {
   const capture = runCaptureImpl ?? runCapture;
   const captureEx = runCaptureExImpl ?? runCaptureEx;
@@ -932,6 +933,16 @@ export function validateOllamaModel(
     if (parsed && typeof parsed.error === "string" && parsed.error.trim()) {
       const errText = parsed.error.trim();
       if (/does not support tools/i.test(errText)) {
+        if (options.allowToolsIncompatible === true) {
+          // The user (or NEMOCLAW_OLLAMA_REQUIRE_TOOLS=0 / --yes) already
+          // accepted that this model lacks tool calling. Don't loop back to
+          // model selection for the same condition — see issue #4241.
+          console.warn(
+            `  ⚠ Ollama model '${model}' confirmed not to support tools; ` +
+              `continuing because the no-tools override was accepted.`,
+          );
+          return { ok: true };
+        }
         return {
           ok: false,
           message:
diff --git a/src/lib/inference/ollama/proxy.ts b/src/lib/inference/ollama/proxy.ts
index 0ef15457ff..50859142e1 100644
--- a/src/lib/inference/ollama/proxy.ts
+++ b/src/lib/inference/ollama/proxy.ts
@@ -688,7 +688,7 @@ function printToolsIncompatibleWarning(model: string): void {
 
 async function checkOllamaModelToolSupport(
   model: string,
-): Promise<{ ok: boolean; message?: string }> {
+): Promise<{ ok: boolean; message?: string; allowToolsIncompatible?: boolean }> {
   const caps = probeOllamaModelCapabilities(model);
 
   if (caps.supportsTools === true) {
@@ -705,11 +705,15 @@ async function checkOllamaModelToolSupport(
   }
 
   // supportsTools === false — model is on disk but advertises no tools support.
+  // Every code path below that returns ok:true must also set
+  // allowToolsIncompatible:true so downstream validators (validateOllamaModel,
+  // probeChatCompletionsToolCalling via setupOllama / setupInference) don't
+  // reject the same model on the same condition — see issue #4241.
   printToolsIncompatibleWarning(model);
 
   if (isProxyAutoYes()) {
     console.log("  Continuing because --yes was passed.");
-    return { ok: true };
+    return { ok: true, allowToolsIncompatible: true };
   }
 
   if (isProxyNonInteractive()) {
@@ -717,7 +721,7 @@ async function checkOllamaModelToolSupport(
       console.error(
         `  NEMOCLAW_OLLAMA_REQUIRE_TOOLS=0 set — proceeding with '${model}' despite missing 'tools'.`,
       );
-      return { ok: true };
+      return { ok: true, allowToolsIncompatible: true };
     }
     console.error(
       "  Re-run with NEMOCLAW_OLLAMA_REQUIRE_TOOLS=0 to override, or pick a tools-capable model.",
@@ -729,10 +733,13 @@ async function checkOllamaModelToolSupport(
   if (!proceed) {
     return { ok: false, message: "Choose a tools-capable model." };
   }
-  return { ok: true };
+  return { ok: true, allowToolsIncompatible: true };
 }
 
-async function prepareOllamaModel(model, installedModels = []) {
+async function prepareOllamaModel(
+  model,
+  installedModels: string[] = [],
+): Promise<{ ok: boolean; message?: string; allowToolsIncompatible?: boolean }> {
   const alreadyInstalled = installedModels.includes(model);
   if (!alreadyInstalled) {
     console.log(`  Pulling Ollama model: ${model}`);
@@ -753,7 +760,11 @@ async function prepareOllamaModel(model, installedModels = []) {
 
   console.log(`  Loading Ollama model: ${model}`);
   run(getOllamaWarmupCommand(model), { ignoreError: true });
-  return validateOllamaModel(model);
+  const allowToolsIncompatible = capCheck.allowToolsIncompatible === true;
+  const result = validateOllamaModel(model, undefined, undefined, undefined, {
+    allowToolsIncompatible,
+  });
+  return { ...result, allowToolsIncompatible };
 }
 
 /**
diff --git a/src/lib/onboard.ts b/src/lib/onboard.ts
index 5d695cd7e9..688b80677e 100644
--- a/src/lib/onboard.ts
+++ b/src/lib/onboard.ts
@@ -4108,7 +4108,7 @@ const { readLiveInference, readRecordedProvider, readRecordedNimContainer, readR
   });
 
 type OllamaModelSelectionOutcome =
-  | { outcome: "selected"; model: string }
+  | { outcome: "selected"; model: string; allowToolsIncompatible?: boolean }
   | { outcome: "back-to-selection" };
 
 // Pick an Ollama model, pull it if missing, and validate it via the local
@@ -4171,6 +4171,7 @@ async function selectAndValidateOllamaModel(
       console.log("");
       continue;
     }
+    const allowToolsIncompatible = probe.allowToolsIncompatible === true;
     const validationBaseUrl = getLocalProviderValidationBaseUrl(provider);
     if (!validationBaseUrl)
       abortNonInteractive("Local Ollama validation URL could not be determined.");
@@ -4183,7 +4184,7 @@ async function selectAndValidateOllamaModel(
       null,
       {
         skipResponsesProbe: true,
-        requireChatCompletionsToolCalling: process.env.NEMOCLAW_OLLAMA_REQUIRE_TOOLS !== "0",
+        requireChatCompletionsToolCalling: !allowToolsIncompatible,
         allowHostDockerInternal:
           localInference.getResolvedOllamaHost() === OLLAMA_HOST_DOCKER_INTERNAL,
       },
@@ -4200,7 +4201,7 @@ async function selectAndValidateOllamaModel(
         "  ℹ Using chat completions API (Ollama tool calls require /v1/chat/completions)",
       );
     }
-    return { outcome: "selected", model: selectedModel };
+    return { outcome: "selected", model: selectedModel, allowToolsIncompatible };
   }
 }
 
@@ -4217,6 +4218,7 @@ async function setupNim(
   hermesToolGateways: string[];
   preferredInferenceApi: string | null;
   nimContainer: string | null;
+  allowToolsIncompatible: boolean;
 }> {
   step(3, 8, "Configuring inference provider");
 
@@ -4228,6 +4230,7 @@ async function setupNim(
   let hermesAuthMethod: HermesAuthMethod | null = null;
   let hermesToolGateways: string[] = [];
   let preferredInferenceApi: string | null = null;
+  let allowToolsIncompatible = false;
 
   // Detect local inference options. Bound curl with --connect-timeout/--max-time
   // so a half-open port or stalled listener cannot hang the onboard at step 3
@@ -5143,6 +5146,7 @@ async function setupNim(
           });
           if (result.outcome === "back-to-selection") continue selectionLoop;
           model = result.model;
+          allowToolsIncompatible = result.allowToolsIncompatible === true;
           preferredInferenceApi = "openai-completions";
         }
         break;
@@ -5229,6 +5233,7 @@ async function setupNim(
             continue selectionLoop;
           }
           model = result.model;
+          allowToolsIncompatible = result.allowToolsIncompatible === true;
           preferredInferenceApi = "openai-completions";
         }
         break;
@@ -5271,6 +5276,7 @@ async function setupNim(
           });
           if (result.outcome === "back-to-selection") continue selectionLoop;
           model = result.model;
+          allowToolsIncompatible = result.allowToolsIncompatible === true;
           preferredInferenceApi = "openai-completions";
         }
         break;
@@ -5428,6 +5434,7 @@ async function setupNim(
     hermesToolGateways,
     preferredInferenceApi,
     nimContainer,
+    allowToolsIncompatible,
   };
 }
 
@@ -5441,6 +5448,7 @@ async function setupInference(
   credentialEnv: string | null = null,
   hermesAuthMethod: HermesAuthMethod | string | null = null,
   hermesToolGateways: string[] = [],
+  options: { allowToolsIncompatible?: boolean } = {},
 ): Promise<{ ok: true; retry?: undefined } | { retry: "selection" }> {
   step(4, 8, "Setting up inference provider");
   runOpenshell(["gateway", "select", GATEWAY_NAME], { ignoreError: true });
@@ -5765,7 +5773,9 @@ async function setupInference(
     ]);
     console.log(`  Priming Ollama model: ${model}`);
     run(getOllamaWarmupCommand(model), { ignoreError: true });
-    const probe = validateOllamaModel(model);
+    const probe = validateOllamaModel(model, undefined, undefined, undefined, {
+      allowToolsIncompatible: options.allowToolsIncompatible === true,
+    });
     if (!probe.ok) {
       console.error(`  ${probe.message}`);
       process.exit(1);
diff --git a/src/lib/onboard/machine/handlers/provider-inference.test.ts b/src/lib/onboard/machine/handlers/provider-inference.test.ts
index 5fd788f6e0..5414e898a5 100644
--- a/src/lib/onboard/machine/handlers/provider-inference.test.ts
+++ b/src/lib/onboard/machine/handlers/provider-inference.test.ts
@@ -148,6 +148,7 @@ describe("handleProviderInferenceState", () => {
       "NVIDIA_API_KEY",
       null,
       [],
+      { allowToolsIncompatible: false },
     );
     expect(calls.deleteEnv).toHaveBeenCalledWith("NVIDIA_API_KEY");
     expect(result).toMatchObject({
@@ -311,6 +312,7 @@ describe("handleProviderInferenceState", () => {
       "COMPATIBLE_API_KEY",
       null,
       [],
+      { allowToolsIncompatible: false },
     );
   });
 
@@ -358,4 +360,34 @@ describe("handleProviderInferenceState", () => {
     expect(calls.exit).toHaveBeenCalledWith(0);
     expect(calls.setupInference).not.toHaveBeenCalled();
   });
+
+  // Regression: #4241. When the provider selection step accepted a no-tools
+  // Ollama model (the user answered "yes" to the override prompt or
+  // NEMOCLAW_OLLAMA_REQUIRE_TOOLS=0 was set), the same flag must reach
+  // setupInference so the second validateOllamaModel pass does not reject the
+  // model on the same condition and bounce the user back to model selection.
+  it("forwards allowToolsIncompatible from provider selection into setupInference (#4241)", async () => {
+    const setupNim = vi.fn(async () => ({
+      ...baseSelection,
+      provider: "ollama-local",
+      model: "tinyllama:1.1b",
+      endpointUrl: "http://127.0.0.1:11434/v1",
+      credentialEnv: null,
+      allowToolsIncompatible: true,
+    }));
+    const { deps, calls } = createDeps({ setupNim });
+
+    await handleProviderInferenceState(baseOptions(deps));
+
+    expect(calls.setupInference).toHaveBeenCalledWith(
+      "my-assistant",
+      "tinyllama:1.1b",
+      "ollama-local",
+      "http://127.0.0.1:11434/v1",
+      null,
+      null,
+      [],
+      { allowToolsIncompatible: true },
+    );
+  });
 });
diff --git a/src/lib/onboard/machine/handlers/provider-inference.ts b/src/lib/onboard/machine/handlers/provider-inference.ts
index 139b643cf2..782ab9144a 100644
--- a/src/lib/onboard/machine/handlers/provider-inference.ts
+++ b/src/lib/onboard/machine/handlers/provider-inference.ts
@@ -15,6 +15,7 @@ export interface ProviderSelectionResult {
   hermesToolGateways: string[];
   preferredInferenceApi: string | null;
   nimContainer: string | null;
+  allowToolsIncompatible?: boolean;
 }
 
 export interface ProviderInferenceStateOptions<Gpu, Agent, Host> {
@@ -53,6 +54,7 @@ export interface ProviderInferenceStateOptions<Gpu, Agent, Host> {
       credentialEnv: string | null,
       hermesAuthMethod: string | null,
       hermesToolGateways: string[],
+      options?: { allowToolsIncompatible?: boolean },
     ): Promise<ProviderInferenceRetry>;
     startRecordedStep(
       stepName: string,
@@ -165,6 +167,7 @@ export async function handleProviderInferenceState<Gpu, Agent, Host>({
   let nimContainer = initial.nimContainer;
   const webSearchConfig = initial.webSearchConfig;
   let forceProviderSelection = initialForceProviderSelection;
+  let allowToolsIncompatible = false;
 
   while (true) {
     let forceInferenceSetup = false;
@@ -220,6 +223,7 @@ export async function handleProviderInferenceState<Gpu, Agent, Host>({
       hermesToolGateways = selection.hermesToolGateways;
       preferredInferenceApi = selection.preferredInferenceApi;
       nimContainer = selection.nimContainer;
+      allowToolsIncompatible = selection.allowToolsIncompatible === true;
       shouldRecordProviderSelection = true;
     }
 
@@ -263,6 +267,7 @@ export async function handleProviderInferenceState<Gpu, Agent, Host>({
             credentialEnv,
             hermesAuthMethod,
             hermesToolGateways,
+            { allowToolsIncompatible },
           );
         } finally {
           clearStagedCredentialEnv(deps, credentialEnv);
@@ -338,6 +343,7 @@ export async function handleProviderInferenceState<Gpu, Agent, Host>({
         credentialEnv,
         hermesAuthMethod,
         hermesToolGateways,
+        { allowToolsIncompatible },
       );
     } finally {
       clearStagedCredentialEnv(deps, credentialEnv);
diff --git a/test/ollama-tools-capability.test.ts b/test/ollama-tools-capability.test.ts
index 6a9fe91faf..05caa19067 100644
--- a/test/ollama-tools-capability.test.ts
+++ b/test/ollama-tools-capability.test.ts
@@ -31,6 +31,7 @@ interface LocalInferenceModule {
     capture?: CaptureFn,
     isSparkImpl?: () => boolean,
     captureExImpl?: (cmd: string[]) => { stdout: string; exitCode: number | null; timedOut: boolean },
+    options?: { allowToolsIncompatible?: boolean },
   ) => { ok: boolean; message?: string };
   setResolvedOllamaHost: (host: string) => void;
   resetOllamaHostCache: () => void;
@@ -40,7 +41,7 @@ interface LocalInferenceModule {
 interface OnboardOllamaProxyModule {
   checkOllamaModelToolSupport: (
     model: string,
-  ) => Promise<{ ok: boolean; message?: string }>;
+  ) => Promise<{ ok: boolean; message?: string; allowToolsIncompatible?: boolean }>;
 }
 
 function loadLocalInference(): LocalInferenceModule {
@@ -324,7 +325,7 @@ describe("checkOllamaModelToolSupport", () => {
     }
   });
 
-  it("interactive yes → {ok:true}", async () => {
+  it("interactive yes → {ok:true, allowToolsIncompatible:true}", async () => {
     const h = loadProxyWithStubs();
     h.setProbeResult({
       source: "api",
@@ -333,7 +334,9 @@ describe("checkOllamaModelToolSupport", () => {
     });
     h.setPromptReply("y");
     const out = await h.proxy.checkOllamaModelToolSupport("phi4");
-    expect(out).toEqual({ ok: true });
+    // The override flag is what downstream validators consume to skip the
+    // strict tools probe — without it onboard would loop back. See #4241.
+    expect(out).toEqual({ ok: true, allowToolsIncompatible: true });
     // Warning banner was printed.
     expect(h.logs.some((l) => l.includes("does not advertise the 'tools' capability"))).toBe(true);
     // Prompt was actually shown.
@@ -367,7 +370,7 @@ describe("checkOllamaModelToolSupport", () => {
     expect(h.errors.some((e) => e.includes("NEMOCLAW_OLLAMA_REQUIRE_TOOLS=0"))).toBe(true);
   });
 
-  it("non-interactive + NEMOCLAW_OLLAMA_REQUIRE_TOOLS=0 → {ok:true} after stderr warning", async () => {
+  it("non-interactive + NEMOCLAW_OLLAMA_REQUIRE_TOOLS=0 → {ok:true, allowToolsIncompatible:true} after stderr warning", async () => {
     process.env.NEMOCLAW_NON_INTERACTIVE = "1";
     process.env.NEMOCLAW_OLLAMA_REQUIRE_TOOLS = "0";
     const h = loadProxyWithStubs();
@@ -378,6 +381,7 @@ describe("checkOllamaModelToolSupport", () => {
     });
     const out = await h.proxy.checkOllamaModelToolSupport("phi4");
     expect(out.ok).toBe(true);
+    expect(out.allowToolsIncompatible).toBe(true);
     // Stderr warning mentions the env-var override + model name.
     const matched = h.errors.some(
       (e) => e.includes("NEMOCLAW_OLLAMA_REQUIRE_TOOLS=0") && e.includes("phi4"),
@@ -385,7 +389,7 @@ describe("checkOllamaModelToolSupport", () => {
     expect(matched).toBe(true);
   });
 
-  it("NEMOCLAW_YES=1 → {ok:true} after note", async () => {
+  it("NEMOCLAW_YES=1 → {ok:true, allowToolsIncompatible:true} after note", async () => {
     process.env.NEMOCLAW_YES = "1";
     const h = loadProxyWithStubs();
     h.setProbeResult({
@@ -394,7 +398,7 @@ describe("checkOllamaModelToolSupport", () => {
       supportsTools: false,
     });
     const out = await h.proxy.checkOllamaModelToolSupport("phi4");
-    expect(out).toEqual({ ok: true });
+    expect(out).toEqual({ ok: true, allowToolsIncompatible: true });
     // Note about --yes is printed.
     expect(h.logs.some((l) => l.toLowerCase().includes("--yes"))).toBe(true);
     // Prompt should NOT have been shown.
@@ -410,9 +414,187 @@ describe("checkOllamaModelToolSupport", () => {
       rawError: "connection refused",
     });
     const out = await h.proxy.checkOllamaModelToolSupport("phi4");
+    // Probe could not determine capabilities — no override needed because we
+    // don't know the model is incompatible.
     expect(out).toEqual({ ok: true });
     // Informational note printed; no warning banner.
     expect(h.logs.some((l) => l.includes("Could not verify 'tools' capability"))).toBe(true);
     expect(h.logs.some((l) => l.includes("does not advertise the 'tools' capability"))).toBe(false);
   });
 });
+
+// ─────────────────────────────────────────────────────────────────
+// Regression coverage for #4241: the no-tools override must propagate
+// through `validateOllamaModel`, so a model that the user explicitly
+// accepted does not get rejected again on the same "does not support
+// tools" error during the second validation pass in setupInference.
+// Without the override (`allowToolsIncompatible: false`/unset), the
+// validator must still fail early so the wizard does not silently
+// strand the user on an unusable model.
+// ─────────────────────────────────────────────────────────────────
+
+describe("validateOllamaModel — no-tools override propagation (#4241)", () => {
+  let localInference: LocalInferenceModule;
+
+  beforeEach(() => {
+    localInference = loadLocalInference();
+  });
+
+  function captureExReturning(errText: string) {
+    const payload = JSON.stringify({ error: errText });
+    return () => ({ stdout: payload, exitCode: 0, timedOut: false });
+  }
+
+  it("rejects a no-tools model when no override is set (back-to-selection path)", () => {
+    const { capture } = makeCapture([]);
+    const captureEx = captureExReturning(
+      "registry.ollama.ai/library/tinyllama:1.1b does not support tools",
+    );
+    const result = localInference.validateOllamaModel(
+      "tinyllama:1.1b",
+      capture,
+      () => false,
+      captureEx,
+    );
+    expect(result.ok).toBe(false);
+    expect(result.message!).toContain("tinyllama");
+    expect(result.message!.toLowerCase()).toContain("tools");
+  });
+
+  it("rejects a no-tools model when allowToolsIncompatible is false (explicit no override)", () => {
+    const { capture } = makeCapture([]);
+    const captureEx = captureExReturning(
+      "registry.ollama.ai/library/tinyllama:1.1b does not support tools",
+    );
+    const result = localInference.validateOllamaModel(
+      "tinyllama:1.1b",
+      capture,
+      () => false,
+      captureEx,
+      { allowToolsIncompatible: false },
+    );
+    expect(result.ok).toBe(false);
+    expect(result.message!.toLowerCase()).toContain("tools");
+  });
+
+  it("accepts a no-tools model when allowToolsIncompatible is true (override accepted upstream)", () => {
+    const { capture } = makeCapture([]);
+    const captureEx = captureExReturning(
+      "registry.ollama.ai/library/tinyllama:1.1b does not support tools",
+    );
+    const warned: string[] = [];
+    const warnSpy = vi.spyOn(console, "warn").mockImplementation((...args) => {
+      warned.push(args.map((a) => String(a)).join(" "));
+    });
+    try {
+      const result = localInference.validateOllamaModel(
+        "tinyllama:1.1b",
+        capture,
+        () => false,
+        captureEx,
+        { allowToolsIncompatible: true },
+      );
+      expect(result.ok).toBe(true);
+      // The earlier accept-the-override warning is restated rather than
+      // reverted into a hard validation failure that would loop selection.
+      expect(warned.some((l) => l.includes("no-tools override was accepted"))).toBe(true);
+    } finally {
+      warnSpy.mockRestore();
+    }
+  });
+
+  it("still surfaces non-tools errors (e.g. memory) regardless of override", () => {
+    const { capture } = makeCapture([]);
+    const captureEx = captureExReturning(
+      "model requires more system memory (50 GiB) than is available (8 GiB)",
+    );
+    const result = localInference.validateOllamaModel(
+      "tinyllama:1.1b",
+      capture,
+      () => false,
+      captureEx,
+      { allowToolsIncompatible: true },
+    );
+    expect(result.ok).toBe(false);
+    expect(result.message!).toContain("more system memory");
+  });
+});
+
+// ─────────────────────────────────────────────────────────────────
+// End-to-end propagation: when checkOllamaModelToolSupport accepts the
+// override, the same model must clear the second validateOllamaModel
+// call without looping back to model selection (the user-visible
+// #4241 regression). Pre-fix this assertion fails because
+// validateOllamaModel does not know about the override and re-rejects.
+// ─────────────────────────────────────────────────────────────────
+
+describe("override propagation across checkOllamaModelToolSupport → validateOllamaModel (#4241)", () => {
+  it("user accepts override → later validateOllamaModel does NOT reject for the same tools-incompatible error", async () => {
+    const h = loadProxyWithStubs();
+    h.setProbeResult({
+      source: "api",
+      capabilities: ["completion"],
+      supportsTools: false,
+    });
+    h.setPromptReply("y");
+
+    // (1) The capability check is what an onboarding session would run first.
+    const cap = await h.proxy.checkOllamaModelToolSupport("tinyllama:1.1b");
+    expect(cap.ok).toBe(true);
+    expect(cap.allowToolsIncompatible).toBe(true);
+
+    // (2) The same setupInference path runs validateOllamaModel(model) after
+    //     `openshell inference set`. Pre-fix it returned ok:false on the same
+    //     "does not support tools" condition and onboarding called
+    //     process.exit(1). Threading `allowToolsIncompatible` keeps it ok:true.
+    const localInference = loadLocalInference();
+    const captureEx = () => ({
+      stdout: JSON.stringify({
+        error: "registry.ollama.ai/library/tinyllama:1.1b does not support tools",
+      }),
+      exitCode: 0,
+      timedOut: false,
+    });
+    const result = localInference.validateOllamaModel(
+      "tinyllama:1.1b",
+      () => "",
+      () => false,
+      captureEx,
+      { allowToolsIncompatible: cap.allowToolsIncompatible === true },
+    );
+    expect(result.ok).toBe(true);
+  });
+
+  it("user declines override → both stages refuse and no later validation runs", async () => {
+    const h = loadProxyWithStubs();
+    h.setProbeResult({
+      source: "api",
+      capabilities: ["completion"],
+      supportsTools: false,
+    });
+    h.setPromptReply("n");
+
+    const cap = await h.proxy.checkOllamaModelToolSupport("tinyllama:1.1b");
+    expect(cap.ok).toBe(false);
+    // Override was never granted, so downstream validators must keep rejecting
+    // the same model.
+    expect(cap.allowToolsIncompatible).toBeUndefined();
+
+    const localInference = loadLocalInference();
+    const captureEx = () => ({
+      stdout: JSON.stringify({
+        error: "registry.ollama.ai/library/tinyllama:1.1b does not support tools",
+      }),
+      exitCode: 0,
+      timedOut: false,
+    });
+    const result = localInference.validateOllamaModel(
+      "tinyllama:1.1b",
+      () => "",
+      () => false,
+      captureEx,
+      { allowToolsIncompatible: cap.allowToolsIncompatible === true },
+    );
+    expect(result.ok).toBe(false);
+  });
+});

From aa5305476c843feec9de4be7faa83007df7726cf Mon Sep 17 00:00:00 2001
From: Yimo Jiang <yimoj@nvidia.com>
Date: Wed, 27 May 2026 03:53:00 +0000
Subject: [PATCH 2/4] refactor(onboard): extract Ollama no-tools probe helpers
 to inference/local
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

Hoist the validateOpenAiLikeSelection options builder and the
validateOllamaModel-with-override wrapper into src/lib/inference/local.ts
so onboard.ts stays net-neutral under the onboard-entrypoint-budget gate.
Call sites use the existing `localInference` namespace import — no new
top-level requires.

No behavioral change. Existing regression tests
(test/ollama-tools-capability.test.ts and
src/lib/onboard/machine/handlers/provider-inference.test.ts) still pass.

Co-Authored-By: Claude Opus 4.7 (1M context) <noreply@anthropic.com>
Signed-off-by: Yimo Jiang <yimoj@nvidia.com>
---
 src/lib/inference/local.ts | 23 +++++++++++++++++++++++
 src/lib/onboard.ts         | 22 ++++++----------------
 2 files changed, 29 insertions(+), 16 deletions(-)

diff --git a/src/lib/inference/local.ts b/src/lib/inference/local.ts
index ebc76bed08..f31c4bdab0 100644
--- a/src/lib/inference/local.ts
+++ b/src/lib/inference/local.ts
@@ -992,6 +992,29 @@ export function validateOllamaModel(
   return { ok: true };
 }
 
+// Helpers for threading the user's "use this no-tools Ollama model anyway"
+// override (see #4241) through onboard validators so they don't loop the
+// wizard back to model selection after the user already accepted.
+
+export function buildOllamaProbeOptions(allowToolsIncompatible: boolean): {
+  skipResponsesProbe: true;
+  requireChatCompletionsToolCalling: boolean;
+  allowHostDockerInternal: boolean;
+} {
+  return {
+    skipResponsesProbe: true,
+    requireChatCompletionsToolCalling: !allowToolsIncompatible,
+    allowHostDockerInternal: getResolvedOllamaHost() === OLLAMA_HOST_DOCKER_INTERNAL,
+  };
+}
+
+export function validateOllamaModelWithToolsOverride(
+  model: string,
+  allowToolsIncompatible: boolean,
+): ValidationResult {
+  return validateOllamaModel(model, undefined, undefined, undefined, { allowToolsIncompatible });
+}
+
 // ─── Tools-capability probe (issue #2667) ─────────────────────────
 //
 // Ollama exposes a model's declared capabilities via /api/show. Tool calling
diff --git a/src/lib/onboard.ts b/src/lib/onboard.ts
index 688b80677e..a9b7c17654 100644
--- a/src/lib/onboard.ts
+++ b/src/lib/onboard.ts
@@ -4108,7 +4108,7 @@ const { readLiveInference, readRecordedProvider, readRecordedNimContainer, readR
   });
 
 type OllamaModelSelectionOutcome =
-  | { outcome: "selected"; model: string; allowToolsIncompatible?: boolean }
+  | { outcome: "selected"; model: string; allowToolsIncompatible: boolean }
   | { outcome: "back-to-selection" };
 
 // Pick an Ollama model, pull it if missing, and validate it via the local
@@ -4182,12 +4182,7 @@ async function selectAndValidateOllamaModel(
       null,
       "Choose a different Ollama model or select Other.",
       null,
-      {
-        skipResponsesProbe: true,
-        requireChatCompletionsToolCalling: !allowToolsIncompatible,
-        allowHostDockerInternal:
-          localInference.getResolvedOllamaHost() === OLLAMA_HOST_DOCKER_INTERNAL,
-      },
+      localInference.buildOllamaProbeOptions(allowToolsIncompatible),
     );
     if (validation.retry === "selection") return { outcome: "back-to-selection" };
     if (!validation.ok) {
@@ -5145,8 +5140,7 @@ async function setupNim(
             recoveredModel: recoveredFromSandbox ? recoveredModel : null,
           });
           if (result.outcome === "back-to-selection") continue selectionLoop;
-          model = result.model;
-          allowToolsIncompatible = result.allowToolsIncompatible === true;
+          ({ model, allowToolsIncompatible } = result);
           preferredInferenceApi = "openai-completions";
         }
         break;
@@ -5232,8 +5226,7 @@ async function setupNim(
             resetOllamaHostCache();
             continue selectionLoop;
           }
-          model = result.model;
-          allowToolsIncompatible = result.allowToolsIncompatible === true;
+          ({ model, allowToolsIncompatible } = result);
           preferredInferenceApi = "openai-completions";
         }
         break;
@@ -5275,8 +5268,7 @@ async function setupNim(
             recoveredModel: recoveredFromSandbox ? recoveredModel : null,
           });
           if (result.outcome === "back-to-selection") continue selectionLoop;
-          model = result.model;
-          allowToolsIncompatible = result.allowToolsIncompatible === true;
+          ({ model, allowToolsIncompatible } = result);
           preferredInferenceApi = "openai-completions";
         }
         break;
@@ -5773,9 +5765,7 @@ async function setupInference(
     ]);
     console.log(`  Priming Ollama model: ${model}`);
     run(getOllamaWarmupCommand(model), { ignoreError: true });
-    const probe = validateOllamaModel(model, undefined, undefined, undefined, {
-      allowToolsIncompatible: options.allowToolsIncompatible === true,
-    });
+    const probe = localInference.validateOllamaModelWithToolsOverride(model, options.allowToolsIncompatible === true);
     if (!probe.ok) {
       console.error(`  ${probe.message}`);
       process.exit(1);

From ac2d0490d1b59bde610a2def14d476edd6b1e657 Mon Sep 17 00:00:00 2001
From: Yimo Jiang <yimoj@nvidia.com>
Date: Wed, 27 May 2026 04:06:31 +0000
Subject: [PATCH 3/4] fix(onboard): preserve Spark CPU-only probe under
 no-tools override

CodeRabbit found the override branch in validateOllamaModel was returning
ok:true immediately on `does not support tools`, bypassing the later
Spark CPU-only runtime check. Restructure so the override falls through
to the existing probeOllamaRuntimeModelStatus() check; other parse-time
errors (memory mismatch, generic) still short-circuit as before.

Also stub localInference.validateOllamaModelWithToolsOverride in
test/onboard.test.ts so the WSL Ollama auth-proxy recovery test continues
to pass after setupInference switched to calling the wrapper (the test's
existing validateOllamaModel stub does not propagate inside local.ts).

Co-Authored-By: Claude Opus 4.7 (1M context) <noreply@anthropic.com>
Signed-off-by: Yimo Jiang <yimoj@nvidia.com>
---
 src/lib/inference/local.ts | 71 +++++++++++++++++++-------------------
 test/onboard.test.ts       |  1 +
 2 files changed, 36 insertions(+), 36 deletions(-)

diff --git a/src/lib/inference/local.ts b/src/lib/inference/local.ts
index f31c4bdab0..a12a3fa436 100644
--- a/src/lib/inference/local.ts
+++ b/src/lib/inference/local.ts
@@ -933,47 +933,46 @@ export function validateOllamaModel(
     if (parsed && typeof parsed.error === "string" && parsed.error.trim()) {
       const errText = parsed.error.trim();
       if (/does not support tools/i.test(errText)) {
-        if (options.allowToolsIncompatible === true) {
-          // The user (or NEMOCLAW_OLLAMA_REQUIRE_TOOLS=0 / --yes) already
-          // accepted that this model lacks tool calling. Don't loop back to
-          // model selection for the same condition — see issue #4241.
-          console.warn(
-            `  ⚠ Ollama model '${model}' confirmed not to support tools; ` +
-              `continuing because the no-tools override was accepted.`,
-          );
-          return { ok: true };
+        if (options.allowToolsIncompatible !== true) {
+          return {
+            ok: false,
+            message:
+              `Selected Ollama model '${model}' does not support tool calling, which ` +
+              `NemoClaw agents require. Run \`ollama show <model>\` to inspect a ` +
+              `model's capabilities and pick one whose list includes 'tools'.`,
+          };
         }
-        return {
-          ok: false,
-          message:
-            `Selected Ollama model '${model}' does not support tool calling, which ` +
-            `NemoClaw agents require. Run \`ollama show <model>\` to inspect a ` +
-            `model's capabilities and pick one whose list includes 'tools'.`,
-        };
-      }
-      // Ollama checks available RAM instead of total; false positive on DGX Spark
-      // unified-memory hosts where GPU and CPU share the same 128 GB pool. (#3251)
-      const memMatch = errText.match(
-        /model requires more system memory \(([0-9.]+)\s*GiB\) than is available \([0-9.]+\s*GiB\)/i,
-      );
-      if (memMatch && sparkHost) {
-        const requiresGiB = parseFloat(memMatch[1]);
-        const freeOut = capture(["free", "-m"], { ignoreError: true });
-        if (freeOut) {
-          const memLine = freeOut.split("\n").find((l: string) => l.includes("Mem:"));
-          if (memLine) {
-            const totalMB = parseInt(memLine.trim().split(/\s+/)[1], 10) || 0;
-            const totalGiB = totalMB / 1024;
-            if (totalGiB >= requiresGiB) {
-              return { ok: true };
+        // Override accepted — log and fall through to the Spark CPU-only
+        // runtime check below so it still enforces. (#4241)
+        console.warn(
+          `  ⚠ Ollama model '${model}' confirmed not to support tools; ` +
+            `continuing because the no-tools override was accepted.`,
+        );
+      } else {
+        // Ollama checks available RAM instead of total; false positive on DGX Spark
+        // unified-memory hosts where GPU and CPU share the same 128 GB pool. (#3251)
+        const memMatch = errText.match(
+          /model requires more system memory \(([0-9.]+)\s*GiB\) than is available \([0-9.]+\s*GiB\)/i,
+        );
+        if (memMatch && sparkHost) {
+          const requiresGiB = parseFloat(memMatch[1]);
+          const freeOut = capture(["free", "-m"], { ignoreError: true });
+          if (freeOut) {
+            const memLine = freeOut.split("\n").find((l: string) => l.includes("Mem:"));
+            if (memLine) {
+              const totalMB = parseInt(memLine.trim().split(/\s+/)[1], 10) || 0;
+              const totalGiB = totalMB / 1024;
+              if (totalGiB >= requiresGiB) {
+                return { ok: true };
+              }
             }
           }
         }
+        return {
+          ok: false,
+          message: `Selected Ollama model '${model}' failed the local probe: ${errText}`,
+        };
       }
-      return {
-        ok: false,
-        message: `Selected Ollama model '${model}' failed the local probe: ${errText}`,
-      };
     }
   } catch {
     /* ignored */
diff --git a/test/onboard.test.ts b/test/onboard.test.ts
index 4bce1ed84b..28d7ffc5ce 100644
--- a/test/onboard.test.ts
+++ b/test/onboard.test.ts
@@ -1306,6 +1306,7 @@ localInference.validateLocalProvider = () => ({
 localInference.getLocalProviderBaseUrl = () => "http://host.openshell.internal:11435/v1";
 localInference.getOllamaWarmupCommand = () => ["true"];
 localInference.validateOllamaModel = () => ({ ok: true });
+localInference.validateOllamaModelWithToolsOverride = () => ({ ok: true });
 proxy.ensureOllamaAuthProxy = () => {
   proxyCalls.push("ensure");
 };

From 1e571cadb80c9860fff93dbd55c5f3a0e04de450 Mon Sep 17 00:00:00 2001
From: Yimo Jiang <yimoj@nvidia.com>
Date: Wed, 27 May 2026 04:17:29 +0000
Subject: [PATCH 4/4] test(ollama): cover Spark CPU-only fall-through under
 no-tools override

After ac2d0490d restructured validateOllamaModel so the override path
falls through to probeOllamaRuntimeModelStatus(), add a regression test
that exercises this exact path: a no-tools error plus override accepted
plus a Spark host running the model on 100% CPU must surface the
CPU-only diagnostic instead of silently passing.

Co-Authored-By: Claude Opus 4.7 (1M context) <noreply@anthropic.com>
Signed-off-by: Yimo Jiang <yimoj@nvidia.com>
---
 test/ollama-tools-capability.test.ts | 28 ++++++++++++++++++++++++++++
 1 file changed, 28 insertions(+)

diff --git a/test/ollama-tools-capability.test.ts b/test/ollama-tools-capability.test.ts
index 05caa19067..b9f1ecb588 100644
--- a/test/ollama-tools-capability.test.ts
+++ b/test/ollama-tools-capability.test.ts
@@ -518,6 +518,34 @@ describe("validateOllamaModel — no-tools override propagation (#4241)", () =>
     expect(result.ok).toBe(false);
     expect(result.message!).toContain("more system memory");
   });
+
+  // The override only suppresses the tools-capability rejection. The Spark
+  // CPU-only runtime check (probeOllamaRuntimeModelStatus) must still run
+  // after the warning fires and surface its own diagnostic — otherwise an
+  // accepted no-tools model on Spark could silently land on CPU.
+  it("Spark CPU-only check still rejects after override is accepted", () => {
+    const cpuOnlyApiPs = JSON.stringify({
+      models: [{ name: "tinyllama:1.1b", model: "tinyllama:1.1b", size_vram: 0, processor: "100% CPU" }],
+    });
+    const { capture } = makeCapture([{ match: /\/api\/ps/, output: cpuOnlyApiPs }]);
+    const captureEx = captureExReturning(
+      "registry.ollama.ai/library/tinyllama:1.1b does not support tools",
+    );
+    const warnSpy = vi.spyOn(console, "warn").mockImplementation(() => {});
+    try {
+      const result = localInference.validateOllamaModel(
+        "tinyllama:1.1b",
+        capture,
+        () => true,
+        captureEx,
+        { allowToolsIncompatible: true },
+      );
+      expect(result.ok).toBe(false);
+      expect(result.message!.toLowerCase()).toContain("cpu");
+    } finally {
+      warnSpy.mockRestore();
+    }
+  });
 });
 
 // ─────────────────────────────────────────────────────────────────