From 280df684e97f13210e4cf427c042041594f418c2 Mon Sep 17 00:00:00 2001 From: Carlos Villela Date: Wed, 10 Jun 2026 12:06:48 -0700 Subject: [PATCH 1/4] refactor(onboard): extract provider menu builder --- src/lib/onboard.ts | 87 ++++++----------- src/lib/onboard/provider-menu.test.ts | 135 ++++++++++++++++++++++++++ src/lib/onboard/provider-menu.ts | 127 ++++++++++++++++++++++++ 3 files changed, 291 insertions(+), 58 deletions(-) create mode 100644 src/lib/onboard/provider-menu.test.ts create mode 100644 src/lib/onboard/provider-menu.ts diff --git a/src/lib/onboard.ts b/src/lib/onboard.ts index dc8a62eb04..a718421fdb 100644 --- a/src/lib/onboard.ts +++ b/src/lib/onboard.ts @@ -216,9 +216,11 @@ const { const { checkOllamaPortsOrWarn, resolveOllamaInstallMenuEntry, - resolveRunningOllamaMenuEntry, assertOllamaUpgradeApplied, } = require("./onboard/ollama-install-menu"); +const { + buildInferenceProviderMenu, +}: typeof import("./onboard/provider-menu") = require("./onboard/provider-menu"); const { ensureOllamaAuthProxy, getOllamaProxyToken, @@ -3944,7 +3946,7 @@ async function createSandbox( // ── Step 3: Inference selection ────────────────────────────────── -type ProviderChoice = { key: string; label: string }; +type ProviderChoice = import("./onboard/provider-menu").ProviderMenuChoice; const { readRecordedProvider, readRecordedNimContainer, readRecordedModel } = providerRecovery.createProviderRecoveryHelpers({ @@ -4130,77 +4132,46 @@ async function setupNim( ? getNonInteractiveModel(requestedProvider || "build") : null; const agentProviderOptions = getAgentInferenceProviderOptions(agent); - const hermesProviderAvailable = agentProviderOptions.includes("hermesProvider"); - const options: Array<{ key: string; label: string }> = []; - options.push({ key: "build", label: "NVIDIA Endpoints" }); - options.push({ key: "openai", label: "OpenAI" }); - options.push({ key: "custom", label: "Other OpenAI-compatible endpoint" }); - options.push({ key: "anthropic", label: "Anthropic" }); - options.push({ key: "anthropicCompatible", label: "Other Anthropic-compatible endpoint" }); - options.push({ key: "gemini", label: "Google Gemini" }); - const runningOllamaMenu = resolveRunningOllamaMenuEntry({ + const ollamaInstallMenu = resolveOllamaInstallMenuEntry({ hasOllama, ollamaRunning, + hasWindowsOllama, ollamaHost, + platform: process.platform, isWsl: isWsl(), + }); + + // Model Router: complexity-based routing via blueprint config. + const blueprintRouterCfg = loadBlueprintProfile("routed"); + const { options, hermesProviderAvailable } = buildInferenceProviderMenu({ + remoteProviderConfig: REMOTE_PROVIDER_CONFIG, + agentProviderOptions, + experimental: EXPERIMENTAL, + gpuNimCapable: Boolean(gpu && gpu.nimCapable), + hasOllama, + ollamaRunning, + ollamaHost, ollamaPort: OLLAMA_PORT, + isWsl: isWsl(), + hasWindowsOllama, + isWindowsHostOllama, windowsHostLabelSuffix: windowsHostOllamaDockerRequirement.supported ? "" : windowsHostOllamaDockerRequirement.labelSuffix, - }); - if (runningOllamaMenu) options.push(runningOllamaMenu); - if (EXPERIMENTAL && gpu && gpu.nimCapable) { - options.push({ key: "nim-local", label: "Local NVIDIA NIM [experimental]" }); - } - options.push( - ...buildVllmMenuEntries({ + windowsHostInstallLabel: windowsHostOllamaDockerRequirement.installLabel, + windowsHostStartLabel: windowsHostOllamaDockerRequirement.startLabel, + windowsOllamaReachable, + winOllamaLoopbackOnly, + ollamaInstallEntry: ollamaInstallMenu.entry, + vllmEntries: buildVllmMenuEntries({ vllmRunning, vllmProfile, experimental: EXPERIMENTAL, platform: gpu?.platform, hasVllmImage, }), - ); - // Skipped when Windows-host already won the cache: the running entry - // above already covers that case. - if (hasWindowsOllama && !isWindowsHostOllama) { - options.push({ - key: "start-windows-ollama", - label: windowsHostOllamaDockerRequirement.startLabel({ - reachable: windowsOllamaReachable, - loopbackOnly: winOllamaLoopbackOnly, - }), - }); - } - // On WSL, always offer to install Ollama on the Windows host when not - // already installed, regardless of WSL Ollama state — users may prefer the - // Windows-host instance (GPU access) even with WSL Ollama running. - if (isWsl() && !hasWindowsOllama) { - options.push({ - key: "install-windows-ollama", - label: windowsHostOllamaDockerRequirement.installLabel, - }); - } - const ollamaInstallMenu = resolveOllamaInstallMenuEntry({ - hasOllama, - ollamaRunning, - hasWindowsOllama, - ollamaHost, - platform: process.platform, - isWsl: isWsl(), + routedEnabled: blueprintRouterCfg?.router?.enabled === true, }); - if (ollamaInstallMenu.entry) options.push(ollamaInstallMenu.entry); - - // Model Router: complexity-based routing via blueprint config. - const blueprintRouterCfg = loadBlueprintProfile("routed"); - if (blueprintRouterCfg && blueprintRouterCfg.router?.enabled === true) { - options.push({ key: "routed", label: "Model Router (experimental)" }); - } - for (const providerKey of agentProviderOptions) { - const remoteConfig = REMOTE_PROVIDER_CONFIG[providerKey]; - if (!remoteConfig || options.some((option) => option.key === providerKey)) continue; - options.push({ key: providerKey, label: remoteConfig.label }); - } function rejectWindowsHostOllama(providerKey: string, windowsHostSelected: boolean): boolean { return rejectUnsupportedWindowsHostOllama( diff --git a/src/lib/onboard/provider-menu.test.ts b/src/lib/onboard/provider-menu.test.ts new file mode 100644 index 0000000000..fc0744773f --- /dev/null +++ b/src/lib/onboard/provider-menu.test.ts @@ -0,0 +1,135 @@ +// SPDX-FileCopyrightText: Copyright (c) 2026 NVIDIA CORPORATION & AFFILIATES. All rights reserved. +// SPDX-License-Identifier: Apache-2.0 + +import { describe, expect, it } from "vitest"; + +import { buildInferenceProviderMenu } from "../../../dist/lib/onboard/provider-menu"; + +const REMOTE_PROVIDER_CONFIG = { + build: { label: "NVIDIA Endpoints" }, + openai: { label: "OpenAI" }, + custom: { label: "Other OpenAI-compatible endpoint" }, + anthropic: { label: "Anthropic" }, + anthropicCompatible: { label: "Other Anthropic-compatible endpoint" }, + gemini: { label: "Google Gemini" }, + hermesProvider: { label: "Hermes Provider" }, +}; + +function buildMenu(overrides: Partial[0]> = {}) { + return buildInferenceProviderMenu({ + remoteProviderConfig: REMOTE_PROVIDER_CONFIG, + agentProviderOptions: [], + experimental: false, + gpuNimCapable: false, + hasOllama: false, + ollamaRunning: false, + ollamaHost: null, + ollamaPort: 11434, + isWsl: false, + hasWindowsOllama: false, + isWindowsHostOllama: false, + windowsHostLabelSuffix: "", + windowsHostInstallLabel: "Install Ollama on Windows host (recommended)", + windowsHostStartLabel: () => "Start Ollama on Windows host (suggested)", + windowsOllamaReachable: false, + winOllamaLoopbackOnly: false, + ollamaInstallEntry: null, + vllmEntries: [], + routedEnabled: false, + ...overrides, + }); +} + +describe("buildInferenceProviderMenu", () => { + it("returns the base remote providers in the existing prompt order", () => { + const result = buildMenu(); + + expect(result.hermesProviderAvailable).toBe(false); + expect(result.options.map((option) => option.key)).toEqual([ + "build", + "openai", + "custom", + "anthropic", + "anthropicCompatible", + "gemini", + ]); + }); + + it("adds local, routed, and agent-scoped providers after the base remote entries", () => { + const result = buildMenu({ + agentProviderOptions: ["hermesProvider", "build"], + experimental: true, + gpuNimCapable: true, + hasOllama: true, + ollamaRunning: true, + ollamaHost: "127.0.0.1", + isWsl: false, + ollamaInstallEntry: { key: "install-ollama", label: "Install Ollama (Linux)" }, + vllmEntries: [{ key: "install-vllm", label: "Install vLLM (DGX Spark)" }], + routedEnabled: true, + }); + + expect(result.hermesProviderAvailable).toBe(true); + expect(result.options.map((option) => option.key)).toEqual([ + "build", + "openai", + "custom", + "anthropic", + "anthropicCompatible", + "gemini", + "ollama", + "nim-local", + "install-vllm", + "install-ollama", + "routed", + "hermesProvider", + ]); + expect(result.options.find((option) => option.key === "build")?.label).toBe("NVIDIA Endpoints"); + expect(result.options.find((option) => option.key === "hermesProvider")?.label).toBe( + "Hermes Provider", + ); + }); + + it("offers Windows-host Ollama install when WSL has no Windows Ollama", () => { + const result = buildMenu({ + isWsl: true, + hasWindowsOllama: false, + windowsHostInstallLabel: "Install Ollama on Windows host (requires Docker Desktop)", + }); + + expect(result.options.at(-1)).toEqual({ + key: "install-windows-ollama", + label: "Install Ollama on Windows host (requires Docker Desktop)", + }); + }); + + it("offers Windows-host Ollama start when detected but not currently selected", () => { + const result = buildMenu({ + isWsl: true, + hasWindowsOllama: true, + isWindowsHostOllama: false, + windowsOllamaReachable: true, + windowsHostStartLabel: ({ reachable }) => + reachable ? "Use Ollama on Windows host - running" : "Start Ollama on Windows host", + }); + + expect(result.options.at(-1)).toEqual({ + key: "start-windows-ollama", + label: "Use Ollama on Windows host - running", + }); + }); + + it("does not add a separate Windows-host start entry when running Ollama already resolves there", () => { + const result = buildMenu({ + isWsl: true, + hasOllama: false, + ollamaRunning: true, + ollamaHost: "host.docker.internal", + hasWindowsOllama: true, + isWindowsHostOllama: true, + }); + + expect(result.options.map((option) => option.key)).toContain("ollama"); + expect(result.options.map((option) => option.key)).not.toContain("start-windows-ollama"); + }); +}); diff --git a/src/lib/onboard/provider-menu.ts b/src/lib/onboard/provider-menu.ts new file mode 100644 index 0000000000..19b390d1f6 --- /dev/null +++ b/src/lib/onboard/provider-menu.ts @@ -0,0 +1,127 @@ +// SPDX-FileCopyrightText: Copyright (c) 2026 NVIDIA CORPORATION & AFFILIATES. All rights reserved. +// SPDX-License-Identifier: Apache-2.0 + +import { resolveRunningOllamaMenuEntry } from "./ollama-install-menu"; + +export interface ProviderMenuChoice { + key: string; + label: string; +} + +interface RemoteProviderMenuConfig { + label: string; +} + +type WindowsHostOllamaStartLabel = (opts: { reachable: boolean; loopbackOnly: boolean }) => string; + +export interface BuildInferenceProviderMenuInput { + remoteProviderConfig: Record; + agentProviderOptions: readonly string[]; + experimental: boolean; + gpuNimCapable: boolean; + hasOllama: boolean; + ollamaRunning: boolean; + ollamaHost: string | null; + ollamaPort: number; + isWsl: boolean; + hasWindowsOllama: boolean; + isWindowsHostOllama: boolean; + windowsHostLabelSuffix: string; + windowsHostInstallLabel: string; + windowsHostStartLabel: WindowsHostOllamaStartLabel; + windowsOllamaReachable: boolean; + winOllamaLoopbackOnly: boolean; + ollamaInstallEntry: ProviderMenuChoice | null; + vllmEntries: readonly ProviderMenuChoice[]; + routedEnabled: boolean; +} + +export interface InferenceProviderMenu { + options: ProviderMenuChoice[]; + hermesProviderAvailable: boolean; +} + +const BASE_REMOTE_PROVIDER_OPTIONS: readonly ProviderMenuChoice[] = [ + { key: "build", label: "NVIDIA Endpoints" }, + { key: "openai", label: "OpenAI" }, + { key: "custom", label: "Other OpenAI-compatible endpoint" }, + { key: "anthropic", label: "Anthropic" }, + { key: "anthropicCompatible", label: "Other Anthropic-compatible endpoint" }, + { key: "gemini", label: "Google Gemini" }, +]; + +function configuredRemoteOption( + config: Record, + fallback: ProviderMenuChoice, +): ProviderMenuChoice { + return { + key: fallback.key, + label: config[fallback.key]?.label ?? fallback.label, + }; +} + +function pushUniqueRemoteProviderOption( + options: ProviderMenuChoice[], + config: Record, + providerKey: string, +): void { + const remoteConfig = config[providerKey]; + if (!remoteConfig || options.some((option) => option.key === providerKey)) return; + options.push({ key: providerKey, label: remoteConfig.label }); +} + +export function buildInferenceProviderMenu( + input: BuildInferenceProviderMenuInput, +): InferenceProviderMenu { + const options: ProviderMenuChoice[] = BASE_REMOTE_PROVIDER_OPTIONS.map((option) => + configuredRemoteOption(input.remoteProviderConfig, option), + ); + + const runningOllamaMenu = resolveRunningOllamaMenuEntry({ + hasOllama: input.hasOllama, + ollamaRunning: input.ollamaRunning, + ollamaHost: input.ollamaHost, + isWsl: input.isWsl, + ollamaPort: input.ollamaPort, + windowsHostLabelSuffix: input.windowsHostLabelSuffix, + }); + if (runningOllamaMenu) options.push(runningOllamaMenu); + + if (input.experimental && input.gpuNimCapable) { + options.push({ key: "nim-local", label: "Local NVIDIA NIM [experimental]" }); + } + + options.push(...input.vllmEntries); + + if (input.hasWindowsOllama && !input.isWindowsHostOllama) { + options.push({ + key: "start-windows-ollama", + label: input.windowsHostStartLabel({ + reachable: input.windowsOllamaReachable, + loopbackOnly: input.winOllamaLoopbackOnly, + }), + }); + } + + if (input.isWsl && !input.hasWindowsOllama) { + options.push({ + key: "install-windows-ollama", + label: input.windowsHostInstallLabel, + }); + } + + if (input.ollamaInstallEntry) options.push(input.ollamaInstallEntry); + + if (input.routedEnabled) { + options.push({ key: "routed", label: "Model Router (experimental)" }); + } + + for (const providerKey of input.agentProviderOptions) { + pushUniqueRemoteProviderOption(options, input.remoteProviderConfig, providerKey); + } + + return { + options, + hermesProviderAvailable: input.agentProviderOptions.includes("hermesProvider"), + }; +} From 1b99e5d49c69bb10ccc2e3020eafb8681ff206e0 Mon Sep 17 00:00:00 2001 From: Carlos Villela Date: Wed, 10 Jun 2026 12:19:07 -0700 Subject: [PATCH 2/4] test(e2e): narrow onboard inference smoke registry stub --- test/e2e/test-onboard-inference-smoke.sh | 10 +++++++++- 1 file changed, 9 insertions(+), 1 deletion(-) diff --git a/test/e2e/test-onboard-inference-smoke.sh b/test/e2e/test-onboard-inference-smoke.sh index b63919a5ed..157a654b46 100755 --- a/test/e2e/test-onboard-inference-smoke.sh +++ b/test/e2e/test-onboard-inference-smoke.sh @@ -55,6 +55,14 @@ const Module = require("module"); const originalLoad = Module._load; const calls = []; +function isRootRegistryRequest(request, parent) { + if (request.endsWith("/dist/lib/registry") || request.endsWith("/dist/lib/registry.js")) { + return true; + } + if (request !== "./registry") return false; + return parent && parent.filename && parent.filename.endsWith("/dist/lib/onboard.js"); +} + Module._load = function patchedLoad(request, parent, isMain) { if (request === "./adapters/openshell/resolve" || request.endsWith("/adapters/openshell/resolve")) { return { resolveOpenshell: () => "/usr/bin/openshell" }; @@ -113,7 +121,7 @@ Module._load = function patchedLoad(request, parent, isMain) { }, }; } - if (request === "./registry" || request.endsWith("/registry")) { + if (isRootRegistryRequest(request, parent)) { return { updateSandbox: (_name, patch) => calls.push(["registry.updateSandbox", patch]), getSandbox: () => null, From d1e02e22ef437ecb5f1169bc585abceeba8a9687 Mon Sep 17 00:00:00 2001 From: Carlos Villela Date: Wed, 10 Jun 2026 12:45:36 -0700 Subject: [PATCH 3/4] refactor(onboard): extract provider host state Signed-off-by: Carlos Villela --- src/lib/onboard.ts | 118 +++-------- src/lib/onboard/provider-host-state.test.ts | 187 +++++++++++++++++ src/lib/onboard/provider-host-state.ts | 220 ++++++++++++++++++++ 3 files changed, 437 insertions(+), 88 deletions(-) create mode 100644 src/lib/onboard/provider-host-state.test.ts create mode 100644 src/lib/onboard/provider-host-state.ts diff --git a/src/lib/onboard.ts b/src/lib/onboard.ts index a718421fdb..533fad0d99 100644 --- a/src/lib/onboard.ts +++ b/src/lib/onboard.ts @@ -122,12 +122,6 @@ const { }: typeof import("./onboard/model-router-process") = require("./onboard/model-router-process"); const bedrockRuntimeOnboard: typeof import("./onboard/bedrock-runtime") = require("./onboard/bedrock-runtime"); -const { - buildVllmMenuEntries, -}: typeof import("./onboard/vllm-menu") = require("./onboard/vllm-menu"); -const { - detectWindowsHostOllama, -}: typeof import("./onboard/windows-host-ollama") = require("./onboard/windows-host-ollama"); const { installOllamaOnLinux, }: typeof import("./onboard/install-ollama-linux") = require("./onboard/install-ollama-linux"); @@ -203,24 +197,24 @@ const { } = require("./core/ports"); const localInference: typeof import("./inference/local") = require("./inference/local"); const { - findReachableOllamaHost, resetOllamaHostCache, getLocalProviderBaseUrl, getLocalProviderHealthCheck, getLocalProviderValidationBaseUrl, getOllamaModelOptions, getOllamaWarmupCommand, - OLLAMA_HOST_DOCKER_INTERNAL, validateLocalProvider, } = localInference; const { checkOllamaPortsOrWarn, - resolveOllamaInstallMenuEntry, assertOllamaUpgradeApplied, } = require("./onboard/ollama-install-menu"); const { buildInferenceProviderMenu, }: typeof import("./onboard/provider-menu") = require("./onboard/provider-menu"); +const { + detectInferenceProviderHostState, +}: typeof import("./onboard/provider-host-state") = require("./onboard/provider-host-state"); const { ensureOllamaAuthProxy, getOllamaProxyToken, @@ -235,7 +229,7 @@ const { switchToWindowsOllamaHost, printWindowsOllamaTimeoutDiagnostics, } = require("./inference/ollama/windows"); -const { detectVllmProfile, installVllm } = require("./inference/vllm"); +const { installVllm } = require("./inference/vllm"); const inferenceConfig: typeof import("./inference/config") = require("./inference/config"); const { DEFAULT_CLOUD_MODEL, getProviderSelectionConfig, parseGatewayInference } = inferenceConfig; @@ -309,7 +303,6 @@ const platformUtils: typeof import("./platform") = require("./platform"); const { isWsl, shouldPatchCoredns } = platformUtils; const { getContainerRuntime, - getWindowsHostOllamaDockerRequirement, repairLocalInferenceSystemdOverrideOrExit, rejectUnsupportedWindowsHostOllama, shouldFrontOllamaWithProxy, @@ -1371,12 +1364,6 @@ function buildGatewayClusterExecArgv(script: string): string[] { return dockerExecArgv(getGatewayClusterContainerName(), ["sh", "-lc", script]); } -function hostCommandExists(commandName: string): boolean { - return !!runCapture(["sh", "-c", 'command -v "$1"', "--", commandName], { - ignoreError: true, - }); -} - function captureProcessArgs(pid: number): string { return runCapture(["ps", "-p", String(pid), "-o", "args="], { ignoreError: true, @@ -4074,72 +4061,33 @@ async function setupNim( let preferredInferenceApi: string | null = null; let allowToolsIncompatible = false; - const localProbeCurlArgs = ["--connect-timeout", "2", "--max-time", "5"] as const; - const hasOllama = hostCommandExists("ollama"); - const ollamaHost = findReachableOllamaHost(); - const ollamaRunning = ollamaHost !== null; - const isWindowsHostOllama = ollamaHost === OLLAMA_HOST_DOCKER_INTERNAL; - const vllmRunning = !!runCapture( - ["curl", "-sf", ...localProbeCurlArgs, `http://127.0.0.1:${VLLM_PORT}/v1/models`], - { ignoreError: true }, - ); - // Pick a vLLM install recipe for this host. Profiles live in inference/vllm.ts; - // null means "no supported platform" (vLLM stays behind EXPERIMENTAL). - const vllmProfile = detectVllmProfile(gpu); - // If the profile's image is already cached, the install path is really a - // "start" — docker pull is a no-op and the container can come up in seconds. - const hasVllmImage = !!( - vllmProfile && - docker.dockerCapture(["images", "-q", vllmProfile.image], { ignoreError: true }).trim() - ); - const windowsHostOllamaDockerRequirement = getWindowsHostOllamaDockerRequirement( - isWsl() ? getContainerRuntime() : null, - ); - // Probed even when WSL has its own Ollama: users may prefer the Windows - // instance for GPU access and a unified model cache. See - // src/lib/onboard/windows-host-ollama.ts for process/path fallback details. - const winOllamaState = detectWindowsHostOllama(); - const hasWindowsOllama = winOllamaState.installed; - const winOllamaInstalledPath = winOllamaState.installedPath; - const winOllamaLoopbackOnly = winOllamaState.loopbackOnly; - - // Independent of findReachableOllamaHost: when WSL Ollama wins the cache - // on 127.0.0.1, Windows-host may also be running on 0.0.0.0 and we want - // to offer a "switch" without restarting anything. - let windowsOllamaReachable = false; - if (isWsl() && !isWindowsHostOllama) { - windowsOllamaReachable = !!runCapture( - ["curl", "-sf", ...localProbeCurlArgs, `http://host.docker.internal:${OLLAMA_PORT}/api/tags`], - { ignoreError: true }, - ); - } - - // Mirrored mode shares loopback so both probes hit the same instance; - // only NAT mode actually has two separate daemons to warn about. - if (isWsl() && ollamaHost === "127.0.0.1" && windowsOllamaReachable) { - const networkingMode = runCapture(["wslinfo", "--networking-mode"], { - ignoreError: true, - }).trim(); - if (networkingMode !== "mirrored") { - console.log(""); - console.log(" ⚠ Ollama is running on both WSL and the Windows host."); - console.log(" Stop one to avoid duplicated GPU memory and model caches."); - console.log(""); - } - } + const providerHostState = detectInferenceProviderHostState({ + gpu, + experimental: EXPERIMENTAL, + }); + const { + hasOllama, + ollamaHost, + ollamaRunning, + isWindowsHostOllama, + isWsl: isWslHost, + hasWindowsOllama, + winOllamaInstalledPath, + winOllamaLoopbackOnly, + windowsOllamaReachable, + windowsHostOllamaDockerRequirement, + vllmRunning, + vllmProfile, + hasVllmImage, + vllmEntries, + ollamaInstallMenu, + gpuNimCapable, + } = providerHostState; const requestedProvider = getNonInteractiveProvider(); const requestedModel = isNonInteractive() ? getNonInteractiveModel(requestedProvider || "build") : null; const agentProviderOptions = getAgentInferenceProviderOptions(agent); - const ollamaInstallMenu = resolveOllamaInstallMenuEntry({ - hasOllama, - ollamaRunning, - hasWindowsOllama, - ollamaHost, - platform: process.platform, - isWsl: isWsl(), - }); // Model Router: complexity-based routing via blueprint config. const blueprintRouterCfg = loadBlueprintProfile("routed"); @@ -4147,12 +4095,12 @@ async function setupNim( remoteProviderConfig: REMOTE_PROVIDER_CONFIG, agentProviderOptions, experimental: EXPERIMENTAL, - gpuNimCapable: Boolean(gpu && gpu.nimCapable), + gpuNimCapable, hasOllama, ollamaRunning, ollamaHost, ollamaPort: OLLAMA_PORT, - isWsl: isWsl(), + isWsl: isWslHost, hasWindowsOllama, isWindowsHostOllama, windowsHostLabelSuffix: windowsHostOllamaDockerRequirement.supported @@ -4163,13 +4111,7 @@ async function setupNim( windowsOllamaReachable, winOllamaLoopbackOnly, ollamaInstallEntry: ollamaInstallMenu.entry, - vllmEntries: buildVllmMenuEntries({ - vllmRunning, - vllmProfile, - experimental: EXPERIMENTAL, - platform: gpu?.platform, - hasVllmImage, - }), + vllmEntries, routedEnabled: blueprintRouterCfg?.router?.enabled === true, }); @@ -4211,7 +4153,7 @@ async function setupNim( // (so the menu's "ollama" key points there), the availability // check below would pass and silently swap the daemon. Detect // and fail-loud with a hint. - if (isWsl() && recordedProvider === "ollama-local" && isWindowsHostOllama) { + if (isWslHost && recordedProvider === "ollama-local" && isWindowsHostOllama) { console.error( ` Recorded provider '${recordedProvider}' (WSL Ollama) is not available in this environment.`, ); diff --git a/src/lib/onboard/provider-host-state.test.ts b/src/lib/onboard/provider-host-state.test.ts new file mode 100644 index 0000000000..26bd0c7be5 --- /dev/null +++ b/src/lib/onboard/provider-host-state.test.ts @@ -0,0 +1,187 @@ +// SPDX-FileCopyrightText: Copyright (c) 2026 NVIDIA CORPORATION & AFFILIATES. All rights reserved. +// SPDX-License-Identifier: Apache-2.0 + +import { describe, expect, it, vi } from "vitest"; + +import { + detectInferenceProviderHostState, + type DetectInferenceProviderHostStateDeps, + type InferenceProviderHostGpu, +} from "../../../dist/lib/onboard/provider-host-state"; + +const SUPPORTED_WINDOWS_OLLAMA = { + supported: true, + detectedRuntime: "Docker Desktop", + installLabel: "Install Ollama on Windows host (recommended)", + startLabel: ({ reachable }: { reachable: boolean; loopbackOnly: boolean }) => + reachable ? "Use Ollama on Windows host - running (suggested)" : "Start Ollama on Windows host", +} as const; + +function buildDeps( + overrides: Partial = {}, +): DetectInferenceProviderHostStateDeps { + return { + runCapture: vi.fn(() => ""), + dockerCapture: vi.fn(() => ""), + hostCommandExists: vi.fn(() => false), + findReachableOllamaHost: vi.fn(() => null), + isWsl: vi.fn(() => false), + getContainerRuntime: vi.fn( + () => "docker-desktop", + ), + detectWindowsHostOllama: vi.fn(() => ({ + installed: false, + installedPath: "", + loopbackOnly: false, + })), + getWindowsHostOllamaDockerRequirement: vi.fn(() => SUPPORTED_WINDOWS_OLLAMA), + detectVllmProfile: vi.fn(() => null), + ...overrides, + }; +} + +function detectWithDeps( + deps: DetectInferenceProviderHostStateDeps, + gpu: InferenceProviderHostGpu | null = null, +) { + return detectInferenceProviderHostState({ + gpu, + experimental: true, + platform: "linux", + env: {}, + log: () => {}, + installedOllamaVersion: "0.24.0", + runningOllamaVersion: "0.24.0", + deps, + }); +} + +describe("detectInferenceProviderHostState", () => { + it("collects local Ollama and vLLM state into one provider host snapshot", () => { + const deps = buildDeps({ + hostCommandExists: vi.fn((command) => command === "ollama"), + findReachableOllamaHost: vi.fn(() => "127.0.0.1"), + runCapture: vi.fn((command) => + command.join(" ").includes(`http://127.0.0.1:8000/v1/models`) ? "{}" : "", + ), + dockerCapture: vi.fn(() => "sha256:cached-image\n"), + detectVllmProfile: vi.fn(() => ({ + name: "Linux + NVIDIA GPU", + platform: "linux" as const, + image: "nvcr.io/nvidia/vllm:test", + defaultModel: {} as never, + containerName: "nemoclaw-vllm", + dockerRunFlags: [], + pullTimeoutSec: 1, + loadTimeoutSec: 1, + })), + }); + + const state = detectWithDeps(deps, { nimCapable: true, type: "nvidia", platform: "linux" }); + + expect(state.hasOllama).toBe(true); + expect(state.ollamaRunning).toBe(true); + expect(state.ollamaHost).toBe("127.0.0.1"); + expect(state.isWindowsHostOllama).toBe(false); + expect(state.vllmRunning).toBe(true); + expect(state.hasVllmImage).toBe(true); + expect(state.vllmEntries.map((entry) => entry.key)).toEqual(["vllm"]); + expect(state.gpuNimCapable).toBe(true); + expect(state.ollamaInstallMenu.entry).toBeNull(); + expect(deps.getWindowsHostOllamaDockerRequirement).toHaveBeenCalledWith(null); + }); + + it("detects a reachable Windows-host Ollama beside WSL-local Ollama and warns outside mirrored networking", () => { + const logs: string[] = []; + const deps = buildDeps({ + isWsl: vi.fn(() => true), + findReachableOllamaHost: vi.fn(() => "127.0.0.1"), + detectWindowsHostOllama: vi.fn(() => ({ + installed: true, + installedPath: "C:\\Users\\me\\AppData\\Local\\Programs\\Ollama\\ollama.exe", + loopbackOnly: false, + })), + runCapture: vi.fn((command) => { + const joined = command.join(" "); + if (joined.includes("host.docker.internal:11434/api/tags")) return "{}"; + if (joined.includes("wslinfo --networking-mode")) return "nat\n"; + return ""; + }), + }); + + const state = detectInferenceProviderHostState({ + gpu: null, + experimental: false, + platform: "linux", + env: {}, + log: (message = "") => logs.push(message), + installedOllamaVersion: "0.24.0", + runningOllamaVersion: "0.24.0", + deps, + }); + + expect(state.isWsl).toBe(true); + expect(state.hasWindowsOllama).toBe(true); + expect(state.windowsOllamaReachable).toBe(true); + expect(state.winOllamaInstalledPath).toMatch(/ollama\.exe$/); + expect(logs.join("\n")).toContain("Ollama is running on both WSL and the Windows host"); + expect(deps.getWindowsHostOllamaDockerRequirement).toHaveBeenCalledWith("docker-desktop"); + }); + + it("suppresses the duplicate-daemon warning when WSL mirrored networking makes the probes equivalent", () => { + const logs: string[] = []; + const deps = buildDeps({ + isWsl: vi.fn(() => true), + findReachableOllamaHost: vi.fn(() => "127.0.0.1"), + detectWindowsHostOllama: vi.fn(() => ({ + installed: true, + installedPath: "C:\\Ollama\\ollama.exe", + loopbackOnly: false, + })), + runCapture: vi.fn((command) => { + const joined = command.join(" "); + if (joined.includes("host.docker.internal:11434/api/tags")) return "{}"; + if (joined.includes("wslinfo --networking-mode")) return "mirrored\n"; + return ""; + }), + }); + + const state = detectInferenceProviderHostState({ + gpu: null, + experimental: false, + platform: "linux", + env: {}, + log: (message = "") => logs.push(message), + installedOllamaVersion: "0.24.0", + runningOllamaVersion: "0.24.0", + deps, + }); + + expect(state.windowsOllamaReachable).toBe(true); + expect(logs).toEqual([]); + }); + + it("does not probe the Windows-host switch path when running Ollama already resolves to the Windows host", () => { + const runCapture = vi.fn(() => ""); + const deps = buildDeps({ + isWsl: vi.fn(() => true), + findReachableOllamaHost: vi.fn(() => "host.docker.internal"), + detectWindowsHostOllama: vi.fn(() => ({ + installed: true, + installedPath: "C:\\Ollama\\ollama.exe", + loopbackOnly: true, + })), + runCapture, + }); + + const state = detectWithDeps(deps); + + expect(state.isWindowsHostOllama).toBe(true); + expect(state.windowsOllamaReachable).toBe(false); + expect( + runCapture.mock.calls.some(([command]) => + command.join(" ").includes("host.docker.internal:11434/api/tags"), + ), + ).toBe(false); + }); +}); diff --git a/src/lib/onboard/provider-host-state.ts b/src/lib/onboard/provider-host-state.ts new file mode 100644 index 0000000000..9f0df3de35 --- /dev/null +++ b/src/lib/onboard/provider-host-state.ts @@ -0,0 +1,220 @@ +// SPDX-FileCopyrightText: Copyright (c) 2026 NVIDIA CORPORATION & AFFILIATES. All rights reserved. +// SPDX-License-Identifier: Apache-2.0 + +import { dockerCapture as defaultDockerCapture } from "../adapters/docker"; +import { OLLAMA_PORT, VLLM_PORT } from "../core/ports"; +import { findReachableOllamaHost, OLLAMA_HOST_DOCKER_INTERNAL } from "../inference/local"; +import type { NvidiaPlatform } from "../inference/nim"; +import { detectVllmProfile, type VllmProfile } from "../inference/vllm"; +import { type ContainerRuntime, isWsl as defaultIsWsl } from "../platform"; +import { runCapture as defaultRunCapture } from "../runner"; +import { + getContainerRuntime as defaultGetContainerRuntime, + getWindowsHostOllamaDockerRequirement, + type WindowsHostOllamaDockerRequirement, +} from "./local-inference-topology"; +import { resolveOllamaInstallMenuEntry, type OllamaInstallMenuResult } from "./ollama-install-menu"; +import { buildVllmMenuEntries, type VllmMenuEntry } from "./vllm-menu"; +import { detectWindowsHostOllama, type WindowsHostOllamaState } from "./windows-host-ollama"; + +type RunCapture = (args: string[], options?: { ignoreError?: boolean }) => string; +type DockerCapture = (args: string[], options?: { ignoreError?: boolean }) => string; + +export interface InferenceProviderHostGpu { + nimCapable?: boolean; + spark?: boolean; + type?: string; + platform?: NvidiaPlatform; +} + +export interface InferenceProviderHostState { + hasOllama: boolean; + ollamaHost: string | null; + ollamaRunning: boolean; + isWindowsHostOllama: boolean; + isWsl: boolean; + hasWindowsOllama: boolean; + winOllamaInstalledPath: string; + winOllamaLoopbackOnly: boolean; + windowsOllamaReachable: boolean; + windowsHostOllamaDockerRequirement: WindowsHostOllamaDockerRequirement; + vllmRunning: boolean; + vllmProfile: VllmProfile | null; + hasVllmImage: boolean; + vllmEntries: VllmMenuEntry[]; + ollamaInstallMenu: OllamaInstallMenuResult; + gpuNimCapable: boolean; +} + +export interface DetectInferenceProviderHostStateInput { + gpu: InferenceProviderHostGpu | null | undefined; + experimental: boolean; + platform?: NodeJS.Platform; + env?: NodeJS.ProcessEnv; + log?: (message?: string) => void; + installedOllamaVersion?: string | null; + runningOllamaVersion?: string | null; + deps?: Partial; +} + +export interface DetectInferenceProviderHostStateDeps { + runCapture: RunCapture; + dockerCapture: DockerCapture; + hostCommandExists: (commandName: string) => boolean; + findReachableOllamaHost: () => string | null; + isWsl: () => boolean; + getContainerRuntime: () => ContainerRuntime; + detectWindowsHostOllama: () => WindowsHostOllamaState; + getWindowsHostOllamaDockerRequirement: ( + runtime: ContainerRuntime | null, + ) => WindowsHostOllamaDockerRequirement; + detectVllmProfile: (gpu: InferenceProviderHostGpu | null | undefined) => VllmProfile | null; +} + +const LOCAL_PROVIDER_PROBE_CURL_ARGS = ["--connect-timeout", "2", "--max-time", "5"] as const; + +function hostCommandExists(commandName: string, runCapture: RunCapture): boolean { + return !!runCapture(["sh", "-c", 'command -v "$1"', "--", commandName], { + ignoreError: true, + }); +} + +function buildDeps( + overrides: Partial = {}, +): DetectInferenceProviderHostStateDeps { + const runCapture = overrides.runCapture ?? defaultRunCapture; + return { + runCapture, + dockerCapture: overrides.dockerCapture ?? defaultDockerCapture, + hostCommandExists: + overrides.hostCommandExists ?? ((command) => hostCommandExists(command, runCapture)), + findReachableOllamaHost: overrides.findReachableOllamaHost ?? findReachableOllamaHost, + isWsl: overrides.isWsl ?? defaultIsWsl, + getContainerRuntime: overrides.getContainerRuntime ?? defaultGetContainerRuntime, + detectWindowsHostOllama: overrides.detectWindowsHostOllama ?? detectWindowsHostOllama, + getWindowsHostOllamaDockerRequirement: + overrides.getWindowsHostOllamaDockerRequirement ?? getWindowsHostOllamaDockerRequirement, + detectVllmProfile: + overrides.detectVllmProfile ?? + ((gpu) => detectVllmProfile(gpu as Parameters[0])), + }; +} + +function probeVllmRunning(runCapture: RunCapture): boolean { + return !!runCapture( + ["curl", "-sf", ...LOCAL_PROVIDER_PROBE_CURL_ARGS, `http://127.0.0.1:${VLLM_PORT}/v1/models`], + { ignoreError: true }, + ); +} + +function probeWindowsOllamaReachable(input: { + isWsl: boolean; + isWindowsHostOllama: boolean; + runCapture: RunCapture; +}): boolean { + if (!input.isWsl || input.isWindowsHostOllama) return false; + return !!input.runCapture( + [ + "curl", + "-sf", + ...LOCAL_PROVIDER_PROBE_CURL_ARGS, + `http://host.docker.internal:${OLLAMA_PORT}/api/tags`, + ], + { ignoreError: true }, + ); +} + +function maybeWarnAboutDuplicateOllamaDaemons(input: { + isWsl: boolean; + ollamaHost: string | null; + windowsOllamaReachable: boolean; + runCapture: RunCapture; + log: (message?: string) => void; +}): void { + if (!input.isWsl || input.ollamaHost !== "127.0.0.1" || !input.windowsOllamaReachable) return; + const networkingMode = input + .runCapture(["wslinfo", "--networking-mode"], { + ignoreError: true, + }) + .trim(); + if (networkingMode === "mirrored") return; + input.log(""); + input.log(" ⚠ Ollama is running on both WSL and the Windows host."); + input.log(" Stop one to avoid duplicated GPU memory and model caches."); + input.log(""); +} + +export function detectInferenceProviderHostState( + input: DetectInferenceProviderHostStateInput, +): InferenceProviderHostState { + const deps = buildDeps(input.deps); + const log = input.log ?? console.log; + const platform = input.platform ?? process.platform; + const isWsl = deps.isWsl(); + const hasOllama = deps.hostCommandExists("ollama"); + const ollamaHost = deps.findReachableOllamaHost(); + const ollamaRunning = ollamaHost !== null; + const isWindowsHostOllama = ollamaHost === OLLAMA_HOST_DOCKER_INTERNAL; + const vllmRunning = probeVllmRunning(deps.runCapture); + const vllmProfile = deps.detectVllmProfile(input.gpu); + const hasVllmImage = !!( + vllmProfile && + deps.dockerCapture(["images", "-q", vllmProfile.image], { ignoreError: true }).trim() + ); + const windowsHostOllamaDockerRequirement = deps.getWindowsHostOllamaDockerRequirement( + isWsl ? deps.getContainerRuntime() : null, + ); + const winOllamaState = deps.detectWindowsHostOllama(); + const hasWindowsOllama = winOllamaState.installed; + const windowsOllamaReachable = probeWindowsOllamaReachable({ + isWsl, + isWindowsHostOllama, + runCapture: deps.runCapture, + }); + + maybeWarnAboutDuplicateOllamaDaemons({ + isWsl, + ollamaHost, + windowsOllamaReachable, + runCapture: deps.runCapture, + log, + }); + + const ollamaInstallMenu = resolveOllamaInstallMenuEntry({ + hasOllama, + ollamaRunning, + hasWindowsOllama, + ollamaHost, + platform, + isWsl, + installedOllamaVersion: input.installedOllamaVersion, + runningOllamaVersion: input.runningOllamaVersion, + }); + + return { + hasOllama, + ollamaHost, + ollamaRunning, + isWindowsHostOllama, + isWsl, + hasWindowsOllama, + winOllamaInstalledPath: winOllamaState.installedPath, + winOllamaLoopbackOnly: winOllamaState.loopbackOnly, + windowsOllamaReachable, + windowsHostOllamaDockerRequirement, + vllmRunning, + vllmProfile, + hasVllmImage, + vllmEntries: buildVllmMenuEntries({ + vllmRunning, + vllmProfile, + experimental: input.experimental, + platform: input.gpu?.platform, + hasVllmImage, + env: input.env, + log: (message) => log(message), + }), + ollamaInstallMenu, + gpuNimCapable: Boolean(input.gpu?.nimCapable), + }; +} From 5f576757e49833e5aa624f6b2e678e6550a785b0 Mon Sep 17 00:00:00 2001 From: Carlos Villela Date: Thu, 11 Jun 2026 01:06:43 -0700 Subject: [PATCH 4/4] fix(onboard): pass WSL detection overrides --- src/lib/onboard/provider-host-state.test.ts | 20 ++++++++++++++++++++ src/lib/onboard/provider-host-state.ts | 10 +++++++--- 2 files changed, 27 insertions(+), 3 deletions(-) diff --git a/src/lib/onboard/provider-host-state.test.ts b/src/lib/onboard/provider-host-state.test.ts index 26bd0c7be5..65ac33a702 100644 --- a/src/lib/onboard/provider-host-state.test.ts +++ b/src/lib/onboard/provider-host-state.test.ts @@ -128,6 +128,26 @@ describe("detectInferenceProviderHostState", () => { expect(deps.getWindowsHostOllamaDockerRequirement).toHaveBeenCalledWith("docker-desktop"); }); + it("passes injected platform and env through WSL detection", () => { + const env = { WSL_DISTRO_NAME: "Ubuntu" } as NodeJS.ProcessEnv; + const isWsl = vi.fn(() => true); + const deps = buildDeps({ isWsl }); + + const state = detectInferenceProviderHostState({ + gpu: null, + experimental: false, + platform: "linux", + env, + log: () => {}, + installedOllamaVersion: "0.24.0", + runningOllamaVersion: "0.24.0", + deps, + }); + + expect(state.isWsl).toBe(true); + expect(isWsl).toHaveBeenCalledWith({ platform: "linux", env }); + }); + it("suppresses the duplicate-daemon warning when WSL mirrored networking makes the probes equivalent", () => { const logs: string[] = []; const deps = buildDeps({ diff --git a/src/lib/onboard/provider-host-state.ts b/src/lib/onboard/provider-host-state.ts index 9f0df3de35..8b50e5c998 100644 --- a/src/lib/onboard/provider-host-state.ts +++ b/src/lib/onboard/provider-host-state.ts @@ -6,7 +6,11 @@ import { OLLAMA_PORT, VLLM_PORT } from "../core/ports"; import { findReachableOllamaHost, OLLAMA_HOST_DOCKER_INTERNAL } from "../inference/local"; import type { NvidiaPlatform } from "../inference/nim"; import { detectVllmProfile, type VllmProfile } from "../inference/vllm"; -import { type ContainerRuntime, isWsl as defaultIsWsl } from "../platform"; +import { + type ContainerRuntime, + isWsl as defaultIsWsl, + type WslDetectionOptions, +} from "../platform"; import { runCapture as defaultRunCapture } from "../runner"; import { getContainerRuntime as defaultGetContainerRuntime, @@ -62,7 +66,7 @@ export interface DetectInferenceProviderHostStateDeps { dockerCapture: DockerCapture; hostCommandExists: (commandName: string) => boolean; findReachableOllamaHost: () => string | null; - isWsl: () => boolean; + isWsl: (opts?: WslDetectionOptions) => boolean; getContainerRuntime: () => ContainerRuntime; detectWindowsHostOllama: () => WindowsHostOllamaState; getWindowsHostOllamaDockerRequirement: ( @@ -150,7 +154,7 @@ export function detectInferenceProviderHostState( const deps = buildDeps(input.deps); const log = input.log ?? console.log; const platform = input.platform ?? process.platform; - const isWsl = deps.isWsl(); + const isWsl = deps.isWsl({ platform, env: input.env }); const hasOllama = deps.hostCommandExists("ollama"); const ollamaHost = deps.findReachableOllamaHost(); const ollamaRunning = ollamaHost !== null;