NVIDIA · cv · May 27, 2026 · May 27, 2026 · May 27, 2026 · May 27, 2026
diff --git a/src/lib/inference/local.ts b/src/lib/inference/local.ts
@@ -930,6 +930,7 @@ export function validateOllamaModel(
   runCaptureImpl?: RunCaptureFn,
   isSparkImpl?: () => boolean,
   runCaptureExImpl?: RunCaptureExFn,
+  options: { allowToolsIncompatible?: boolean } = {},
 ): ValidationResult {
   const capture = runCaptureImpl ?? runCapture;
   const captureEx = runCaptureExImpl ?? runCaptureEx;
@@ -959,37 +960,46 @@ export function validateOllamaModel(
     if (parsed && typeof parsed.error === "string" && parsed.error.trim()) {
       const errText = parsed.error.trim();
       if (/does not support tools/i.test(errText)) {
-        return {
-          ok: false,
-          message:
-            `Selected Ollama model '${model}' does not support tool calling, which ` +
-            `NemoClaw agents require. Run \`ollama show <model>\` to inspect a ` +
-            `model's capabilities and pick one whose list includes 'tools'.`,
-        };
-      }
-      // Ollama checks available RAM instead of total; false positive on DGX Spark
-      // unified-memory hosts where GPU and CPU share the same 128 GB pool. (#3251)
-      const memMatch = errText.match(
-        /model requires more system memory \(([0-9.]+)\s*GiB\) than is available \([0-9.]+\s*GiB\)/i,
-      );
-      if (memMatch && sparkHost) {
-        const requiresGiB = parseFloat(memMatch[1]);
-        const freeOut = capture(["free", "-m"], { ignoreError: true });
-        if (freeOut) {
-          const memLine = freeOut.split("\n").find((l: string) => l.includes("Mem:"));
-          if (memLine) {
-            const totalMB = parseInt(memLine.trim().split(/\s+/)[1], 10) || 0;
-            const totalGiB = totalMB / 1024;
-            if (totalGiB >= requiresGiB) {
-              return { ok: true };
+        if (options.allowToolsIncompatible !== true) {
+          return {
+            ok: false,
+            message:
+              `Selected Ollama model '${model}' does not support tool calling, which ` +
+              `NemoClaw agents require. Run \`ollama show <model>\` to inspect a ` +
+              `model's capabilities and pick one whose list includes 'tools'.`,
+          };
+        }
+        // Override accepted — log and fall through to the Spark CPU-only
+        // runtime check below so it still enforces. (#4241)
+        console.warn(
+          `  ⚠ Ollama model '${model}' confirmed not to support tools; ` +
+            `continuing because the no-tools override was accepted.`,
+        );
+      } else {
+        // Ollama checks available RAM instead of total; false positive on DGX Spark
+        // unified-memory hosts where GPU and CPU share the same 128 GB pool. (#3251)
+        const memMatch = errText.match(
+          /model requires more system memory \(([0-9.]+)\s*GiB\) than is available \([0-9.]+\s*GiB\)/i,
+        );
+        if (memMatch && sparkHost) {
+          const requiresGiB = parseFloat(memMatch[1]);
+          const freeOut = capture(["free", "-m"], { ignoreError: true });
+          if (freeOut) {
+            const memLine = freeOut.split("\n").find((l: string) => l.includes("Mem:"));
+            if (memLine) {
+              const totalMB = parseInt(memLine.trim().split(/\s+/)[1], 10) || 0;
+              const totalGiB = totalMB / 1024;
+              if (totalGiB >= requiresGiB) {
+                return { ok: true };
+              }
             }
           }
         }
+        return {
+          ok: false,
+          message: `Selected Ollama model '${model}' failed the local probe: ${errText}`,
+        };
       }
-      return {
-        ok: false,
-        message: `Selected Ollama model '${model}' failed the local probe: ${errText}`,
-      };
     }
   } catch {
     /* ignored */
@@ -1008,6 +1018,29 @@ export function validateOllamaModel(
   return { ok: true };
 }
 
+// Helpers for threading the user's "use this no-tools Ollama model anyway"
+// override (see #4241) through onboard validators so they don't loop the
+// wizard back to model selection after the user already accepted.
+
+export function buildOllamaProbeOptions(allowToolsIncompatible: boolean): {
+  skipResponsesProbe: true;
+  requireChatCompletionsToolCalling: boolean;
+  allowHostDockerInternal: boolean;
+} {
+  return {
+    skipResponsesProbe: true,
+    requireChatCompletionsToolCalling: !allowToolsIncompatible,
+    allowHostDockerInternal: getResolvedOllamaHost() === OLLAMA_HOST_DOCKER_INTERNAL,
+  };
+}
+
+export function validateOllamaModelWithToolsOverride(
+  model: string,
+  allowToolsIncompatible: boolean,
+): ValidationResult {
+  return validateOllamaModel(model, undefined, undefined, undefined, { allowToolsIncompatible });
+}
+
 // ─── Tools-capability probe (issue #2667) ─────────────────────────
 //
 // Ollama exposes a model's declared capabilities via /api/show. Tool calling

diff --git a/src/lib/inference/ollama/proxy.ts b/src/lib/inference/ollama/proxy.ts
@@ -688,7 +688,7 @@ function printToolsIncompatibleWarning(model: string): void {
 
 async function checkOllamaModelToolSupport(
   model: string,
-): Promise<{ ok: boolean; message?: string }> {
+): Promise<{ ok: boolean; message?: string; allowToolsIncompatible?: boolean }> {
   const caps = probeOllamaModelCapabilities(model);
 
   if (caps.supportsTools === true) {
@@ -705,19 +705,23 @@ async function checkOllamaModelToolSupport(
   }
 
   // supportsTools === false — model is on disk but advertises no tools support.
+  // Every code path below that returns ok:true must also set
+  // allowToolsIncompatible:true so downstream validators (validateOllamaModel,
+  // probeChatCompletionsToolCalling via setupOllama / setupInference) don't
+  // reject the same model on the same condition — see issue #4241.
   printToolsIncompatibleWarning(model);
 
   if (isProxyAutoYes()) {
     console.log("  Continuing because --yes was passed.");
-    return { ok: true };
+    return { ok: true, allowToolsIncompatible: true };
   }
 
   if (isProxyNonInteractive()) {
     if (process.env.NEMOCLAW_OLLAMA_REQUIRE_TOOLS === "0") {
       console.error(
         `  NEMOCLAW_OLLAMA_REQUIRE_TOOLS=0 set — proceeding with '${model}' despite missing 'tools'.`,
       );
-      return { ok: true };
+      return { ok: true, allowToolsIncompatible: true };
     }
     console.error(
       "  Re-run with NEMOCLAW_OLLAMA_REQUIRE_TOOLS=0 to override, or pick a tools-capable model.",
@@ -729,10 +733,13 @@ async function checkOllamaModelToolSupport(
   if (!proceed) {
     return { ok: false, message: "Choose a tools-capable model." };
   }
-  return { ok: true };
+  return { ok: true, allowToolsIncompatible: true };
 }
 
-async function prepareOllamaModel(model, installedModels = []) {
+async function prepareOllamaModel(
+  model,
+  installedModels: string[] = [],
+): Promise<{ ok: boolean; message?: string; allowToolsIncompatible?: boolean }> {
   const alreadyInstalled = installedModels.includes(model);
   if (!alreadyInstalled) {
     console.log(`  Pulling Ollama model: ${model}`);
@@ -753,7 +760,11 @@ async function prepareOllamaModel(model, installedModels = []) {
 
   console.log(`  Loading Ollama model: ${model}`);
   run(getOllamaWarmupCommand(model), { ignoreError: true });
-  return validateOllamaModel(model);
+  const allowToolsIncompatible = capCheck.allowToolsIncompatible === true;
+  const result = validateOllamaModel(model, undefined, undefined, undefined, {
+    allowToolsIncompatible,
+  });
+  return { ...result, allowToolsIncompatible };
 }
 
 /**

diff --git a/src/lib/onboard.ts b/src/lib/onboard.ts
@@ -4107,7 +4107,7 @@ const { readLiveInference, readRecordedProvider, readRecordedNimContainer, readR
   });
 
 type OllamaModelSelectionOutcome =
-  | { outcome: "selected"; model: string }
+  | { outcome: "selected"; model: string; allowToolsIncompatible: boolean }
   | { outcome: "back-to-selection" };
 // Pick an Ollama model, pull it if missing, and validate it via the local
 // proxy. Shared by the three Ollama provider branches (running, Windows-host
@@ -4169,6 +4169,7 @@ async function selectAndValidateOllamaModel(
       console.log("");
       continue;
     }
+    const allowToolsIncompatible = probe.allowToolsIncompatible === true;
     const validationBaseUrl = getLocalProviderValidationBaseUrl(provider);
     if (!validationBaseUrl)
       abortNonInteractive("Local Ollama validation URL could not be determined.");
@@ -4179,12 +4180,7 @@ async function selectAndValidateOllamaModel(
       null,
       "Choose a different Ollama model or select Other.",
       null,
-      {
-        skipResponsesProbe: true,
-        requireChatCompletionsToolCalling: process.env.NEMOCLAW_OLLAMA_REQUIRE_TOOLS !== "0",
-        allowHostDockerInternal:
-          localInference.getResolvedOllamaHost() === OLLAMA_HOST_DOCKER_INTERNAL,
-      },
+      localInference.buildOllamaProbeOptions(allowToolsIncompatible),
     );
     if (validation.retry === "selection") return { outcome: "back-to-selection" };
     if (!validation.ok) {
@@ -4199,7 +4195,7 @@ async function selectAndValidateOllamaModel(
       );
     }
     localInference.applyOllamaRuntimeContextWindow(selectedModel);
-    return { outcome: "selected", model: selectedModel };
+    return { outcome: "selected", model: selectedModel, allowToolsIncompatible };
   }
 }
 
@@ -4216,6 +4212,7 @@ async function setupNim(
   hermesToolGateways: string[];
   preferredInferenceApi: string | null;
   nimContainer: string | null;
+  allowToolsIncompatible: boolean;
 }> {
   step(3, 8, "Configuring inference provider");
 
@@ -4227,6 +4224,7 @@ async function setupNim(
   let hermesAuthMethod: HermesAuthMethod | null = null;
   let hermesToolGateways: string[] = [];
   let preferredInferenceApi: string | null = null;
+  let allowToolsIncompatible = false;
 
   // Detect local inference options. Bound curl with --connect-timeout/--max-time
   // so a half-open port or stalled listener cannot hang the onboard at step 3
@@ -5139,7 +5137,7 @@ async function setupNim(
             recoveredModel: recoveredFromSandbox ? recoveredModel : null,
           });
           if (result.outcome === "back-to-selection") continue selectionLoop;
-          model = result.model;
+          ({ model, allowToolsIncompatible } = result);
           preferredInferenceApi = "openai-completions";
         }
         break;
@@ -5225,7 +5223,7 @@ async function setupNim(
             resetOllamaHostCache();
             continue selectionLoop;
           }
-          model = result.model;
+          ({ model, allowToolsIncompatible } = result);
           preferredInferenceApi = "openai-completions";
         }
         break;
@@ -5267,7 +5265,7 @@ async function setupNim(
             recoveredModel: recoveredFromSandbox ? recoveredModel : null,
           });
           if (result.outcome === "back-to-selection") continue selectionLoop;
-          model = result.model;
+          ({ model, allowToolsIncompatible } = result);
           preferredInferenceApi = "openai-completions";
         }
         break;
@@ -5425,6 +5423,7 @@ async function setupNim(
     hermesToolGateways,
     preferredInferenceApi,
     nimContainer,
+    allowToolsIncompatible,
   };
 }
 
@@ -5438,6 +5437,7 @@ async function setupInference(
   credentialEnv: string | null = null,
   hermesAuthMethod: HermesAuthMethod | string | null = null,
   hermesToolGateways: string[] = [],
+  options: { allowToolsIncompatible?: boolean } = {},
 ): Promise<{ ok: true; retry?: undefined } | { retry: "selection" }> {
   step(4, 8, "Setting up inference provider");
   runOpenshell(["gateway", "select", GATEWAY_NAME], { ignoreError: true });
@@ -5742,7 +5742,7 @@ async function setupInference(
     if (await applyLocalInferenceRoute("ollama-local", model)) return { retry: "selection" };
     console.log(`  Priming Ollama model: ${model}`);
     run(getOllamaWarmupCommand(model), { ignoreError: true });
-    const probe = validateOllamaModel(model);
+    const probe = localInference.validateOllamaModelWithToolsOverride(model, options.allowToolsIncompatible === true);
     if (!probe.ok) {
       console.error(`  ${probe.message}`);
       process.exit(1);

diff --git a/src/lib/onboard/machine/handlers/provider-inference.test.ts b/src/lib/onboard/machine/handlers/provider-inference.test.ts
@@ -148,6 +148,7 @@ describe("handleProviderInferenceState", () => {
       "NVIDIA_API_KEY",
       null,
       [],
+      { allowToolsIncompatible: false },
     );
     expect(calls.deleteEnv).toHaveBeenCalledWith("NVIDIA_API_KEY");
     expect(result).toMatchObject({
@@ -311,6 +312,7 @@ describe("handleProviderInferenceState", () => {
       "COMPATIBLE_API_KEY",
       null,
       [],
+      { allowToolsIncompatible: false },
     );
   });
 
@@ -358,4 +360,34 @@ describe("handleProviderInferenceState", () => {
     expect(calls.exit).toHaveBeenCalledWith(0);
     expect(calls.setupInference).not.toHaveBeenCalled();
   });
+
+  // Regression: #4241. When the provider selection step accepted a no-tools
+  // Ollama model (the user answered "yes" to the override prompt or
+  // NEMOCLAW_OLLAMA_REQUIRE_TOOLS=0 was set), the same flag must reach
+  // setupInference so the second validateOllamaModel pass does not reject the
+  // model on the same condition and bounce the user back to model selection.
+  it("forwards allowToolsIncompatible from provider selection into setupInference (#4241)", async () => {
+    const setupNim = vi.fn(async () => ({
+      ...baseSelection,
+      provider: "ollama-local",
+      model: "tinyllama:1.1b",
+      endpointUrl: "http://127.0.0.1:11434/v1",
+      credentialEnv: null,
+      allowToolsIncompatible: true,
+    }));
+    const { deps, calls } = createDeps({ setupNim });
+
+    await handleProviderInferenceState(baseOptions(deps));
+
+    expect(calls.setupInference).toHaveBeenCalledWith(
+      "my-assistant",
+      "tinyllama:1.1b",
+      "ollama-local",
+      "http://127.0.0.1:11434/v1",
+      null,
+      null,
+      [],
+      { allowToolsIncompatible: true },
+    );
+  });
 });
diff --git a/src/lib/onboard/machine/handlers/provider-inference.ts b/src/lib/onboard/machine/handlers/provider-inference.ts
@@ -16,6 +16,7 @@ export interface ProviderSelectionResult {
   hermesToolGateways: string[];
   preferredInferenceApi: string | null;
   nimContainer: string | null;
+  allowToolsIncompatible?: boolean;
 }
 
 export interface ProviderInferenceStateOptions<Gpu, Agent, Host> {
@@ -54,6 +55,7 @@ export interface ProviderInferenceStateOptions<Gpu, Agent, Host> {
       credentialEnv: string | null,
       hermesAuthMethod: string | null,
       hermesToolGateways: string[],
+      options?: { allowToolsIncompatible?: boolean },
     ): Promise<ProviderInferenceRetry>;
     startRecordedStep(
       stepName: string,
@@ -166,6 +168,7 @@ export async function handleProviderInferenceState<Gpu, Agent, Host>({
   let nimContainer = initial.nimContainer;
   const webSearchConfig = initial.webSearchConfig;
   let forceProviderSelection = initialForceProviderSelection;
+  let allowToolsIncompatible = false;
 
   while (true) {
     let forceInferenceSetup = false;
@@ -225,6 +228,7 @@ export async function handleProviderInferenceState<Gpu, Agent, Host>({
       hermesToolGateways = selection.hermesToolGateways;
       preferredInferenceApi = selection.preferredInferenceApi;
       nimContainer = selection.nimContainer;
+      allowToolsIncompatible = selection.allowToolsIncompatible === true;
       shouldRecordProviderSelection = true;
     }
 
@@ -277,6 +281,7 @@ export async function handleProviderInferenceState<Gpu, Agent, Host>({
                 credentialEnv,
                 hermesAuthMethod,
                 hermesToolGateways,
+                { allowToolsIncompatible },
               ),
           );
         } finally {
@@ -360,6 +365,7 @@ export async function handleProviderInferenceState<Gpu, Agent, Host>({
             credentialEnv,
             hermesAuthMethod,
             hermesToolGateways,
+            { allowToolsIncompatible },
           ),
       );
     } finally {