NVIDIA · cv · Jun 11, 2026 · Jun 10, 2026 · Jun 10, 2026 · Jun 10, 2026
diff --git a/src/lib/onboard.ts b/src/lib/onboard.ts
@@ -123,12 +123,6 @@ const {
 }: typeof import("./onboard/model-router-process") = require("./onboard/model-router-process");
 const bedrockRuntimeOnboard: typeof import("./onboard/bedrock-runtime") =
   require("./onboard/bedrock-runtime");
-const {
-  buildVllmMenuEntries,
-}: typeof import("./onboard/vllm-menu") = require("./onboard/vllm-menu");
-const {
-  detectWindowsHostOllama,
-}: typeof import("./onboard/windows-host-ollama") = require("./onboard/windows-host-ollama");
 const {
   installOllamaOnLinux,
 }: typeof import("./onboard/install-ollama-linux") = require("./onboard/install-ollama-linux");
@@ -204,24 +198,24 @@ const {
 } = require("./core/ports");
 const localInference: typeof import("./inference/local") = require("./inference/local");
 const {
-  findReachableOllamaHost,
   resetOllamaHostCache,
   getLocalProviderBaseUrl,
   getLocalProviderHealthCheck,
   getLocalProviderValidationBaseUrl,
   getOllamaModelOptions,
   getOllamaWarmupCommand,
-  OLLAMA_HOST_DOCKER_INTERNAL,
   validateLocalProvider,
 } = localInference;
 const {
   checkOllamaPortsOrWarn,
-  resolveOllamaInstallMenuEntry,
   assertOllamaUpgradeApplied,
 } = require("./onboard/ollama-install-menu");
 const {
   buildInferenceProviderMenu,
 }: typeof import("./onboard/provider-menu") = require("./onboard/provider-menu");
+const {
+  detectInferenceProviderHostState,
+}: typeof import("./onboard/provider-host-state") = require("./onboard/provider-host-state");
 const {
   ensureOllamaAuthProxy,
   getOllamaProxyToken,
@@ -236,7 +230,7 @@ const {
   switchToWindowsOllamaHost,
   printWindowsOllamaTimeoutDiagnostics,
 } = require("./inference/ollama/windows");
-const { detectVllmProfile, installVllm } = require("./inference/vllm");
+const { installVllm } = require("./inference/vllm");
 const inferenceConfig: typeof import("./inference/config") = require("./inference/config");
 const { DEFAULT_CLOUD_MODEL, getProviderSelectionConfig, parseGatewayInference } = inferenceConfig;
 
@@ -310,7 +304,6 @@ const platformUtils: typeof import("./platform") = require("./platform");
 const { isWsl, shouldPatchCoredns } = platformUtils;
 const {
   getContainerRuntime,
-  getWindowsHostOllamaDockerRequirement,
   repairLocalInferenceSystemdOverrideOrExit,
   rejectUnsupportedWindowsHostOllama,
   shouldFrontOllamaWithProxy,
@@ -1396,12 +1389,6 @@ function buildGatewayClusterExecArgv(script: string): string[] {
   return dockerExecArgv(getGatewayClusterContainerName(), ["sh", "-lc", script]);
 }
 
-function hostCommandExists(commandName: string): boolean {
-  return !!runCapture(["sh", "-c", 'command -v "$1"', "--", commandName], {
-    ignoreError: true,
-  });
-}
-
 function captureProcessArgs(pid: number): string {
   return runCapture(["ps", "-p", String(pid), "-o", "args="], {
     ignoreError: true,
@@ -3781,85 +3768,46 @@ async function setupNim(
   let preferredInferenceApi: string | null = null;
   let allowToolsIncompatible = false;
 
-  const localProbeCurlArgs = ["--connect-timeout", "2", "--max-time", "5"] as const;
-  const hasOllama = hostCommandExists("ollama");
-  const ollamaHost = findReachableOllamaHost();
-  const ollamaRunning = ollamaHost !== null;
-  const isWindowsHostOllama = ollamaHost === OLLAMA_HOST_DOCKER_INTERNAL;
-  const vllmRunning = !!runCapture(
-    ["curl", "-sf", ...localProbeCurlArgs, `http://127.0.0.1:${VLLM_PORT}/v1/models`],
-    { ignoreError: true },
-  );
-  // Pick a vLLM install recipe for this host. Profiles live in inference/vllm.ts;
-  // null means "no supported platform" (vLLM stays behind EXPERIMENTAL).
-  const vllmProfile = detectVllmProfile(gpu);
-  // If the profile's image is already cached, the install path is really a
-  // "start" — docker pull is a no-op and the container can come up in seconds.
-  const hasVllmImage = !!(
-    vllmProfile &&
-    docker.dockerCapture(["images", "-q", vllmProfile.image], { ignoreError: true }).trim()
-  );
-  const windowsHostOllamaDockerRequirement = getWindowsHostOllamaDockerRequirement(
-    isWsl() ? getContainerRuntime() : null,
-  );
-  // Probed even when WSL has its own Ollama: users may prefer the Windows
-  // instance for GPU access and a unified model cache. See
-  // src/lib/onboard/windows-host-ollama.ts for process/path fallback details.
-  const winOllamaState = detectWindowsHostOllama();
-  const hasWindowsOllama = winOllamaState.installed;
-  const winOllamaInstalledPath = winOllamaState.installedPath;
-  const winOllamaLoopbackOnly = winOllamaState.loopbackOnly;
-
-  // Independent of findReachableOllamaHost: when WSL Ollama wins the cache
-  // on 127.0.0.1, Windows-host may also be running on 0.0.0.0 and we want
-  // to offer a "switch" without restarting anything.
-  let windowsOllamaReachable = false;
-  if (isWsl() && !isWindowsHostOllama) {
-    windowsOllamaReachable = !!runCapture(
-      ["curl", "-sf", ...localProbeCurlArgs, `http://host.docker.internal:${OLLAMA_PORT}/api/tags`],
-      { ignoreError: true },
-    );
-  }
-
-  // Mirrored mode shares loopback so both probes hit the same instance;
-  // only NAT mode actually has two separate daemons to warn about.
-  if (isWsl() && ollamaHost === "127.0.0.1" && windowsOllamaReachable) {
-    const networkingMode = runCapture(["wslinfo", "--networking-mode"], {
-      ignoreError: true,
-    }).trim();
-    if (networkingMode !== "mirrored") {
-      console.log("");
-      console.log("  ⚠ Ollama is running on both WSL and the Windows host.");
-      console.log("    Stop one to avoid duplicated GPU memory and model caches.");
-      console.log("");
-    }
-  }
+  const providerHostState = detectInferenceProviderHostState({
+    gpu,
+    experimental: EXPERIMENTAL,
+  });
+  const {
+    hasOllama,
+    ollamaHost,
+    ollamaRunning,
+    isWindowsHostOllama,
+    isWsl: isWslHost,
+    hasWindowsOllama,
+    winOllamaInstalledPath,
+    winOllamaLoopbackOnly,
+    windowsOllamaReachable,
+    windowsHostOllamaDockerRequirement,
+    vllmRunning,
+    vllmProfile,
+    hasVllmImage,
+    vllmEntries,
+    ollamaInstallMenu,
+    gpuNimCapable,
+  } = providerHostState;
   const requestedProvider = getNonInteractiveProvider();
   const requestedModel = isNonInteractive()
     ? getNonInteractiveModel(requestedProvider || "build")
     : null;
   const agentProviderOptions = getAgentInferenceProviderOptions(agent);
-  const ollamaInstallMenu = resolveOllamaInstallMenuEntry({
-    hasOllama,
-    ollamaRunning,
-    hasWindowsOllama,
-    ollamaHost,
-    platform: process.platform,
-    isWsl: isWsl(),
-  });
 
   // Model Router: complexity-based routing via blueprint config.
   const blueprintRouterCfg = loadBlueprintProfile("routed");
   const { options, hermesProviderAvailable } = buildInferenceProviderMenu({
     remoteProviderConfig: REMOTE_PROVIDER_CONFIG,
     agentProviderOptions,
     experimental: EXPERIMENTAL,
-    gpuNimCapable: Boolean(gpu && gpu.nimCapable),
+    gpuNimCapable,
     hasOllama,
     ollamaRunning,
     ollamaHost,
     ollamaPort: OLLAMA_PORT,
-    isWsl: isWsl(),
+    isWsl: isWslHost,
     hasWindowsOllama,
     isWindowsHostOllama,
     windowsHostLabelSuffix: windowsHostOllamaDockerRequirement.supported
@@ -3870,13 +3818,7 @@ async function setupNim(
     windowsOllamaReachable,
     winOllamaLoopbackOnly,
     ollamaInstallEntry: ollamaInstallMenu.entry,
-    vllmEntries: buildVllmMenuEntries({
-      vllmRunning,
-      vllmProfile,
-      experimental: EXPERIMENTAL,
-      platform: gpu?.platform,
-      hasVllmImage,
-    }),
+    vllmEntries,
     routedEnabled: blueprintRouterCfg?.router?.enabled === true,
   });
 
@@ -3918,7 +3860,7 @@ async function setupNim(
             // (so the menu's "ollama" key points there), the availability
             // check below would pass and silently swap the daemon. Detect
             // and fail-loud with a hint.
-            if (isWsl() && recordedProvider === "ollama-local" && isWindowsHostOllama) {
+            if (isWslHost && recordedProvider === "ollama-local" && isWindowsHostOllama) {
               console.error(
                 `  Recorded provider '${recordedProvider}' (WSL Ollama) is not available in this environment.`,
               );