Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
118 changes: 30 additions & 88 deletions src/lib/onboard.ts
Original file line number Diff line number Diff line change
Expand Up @@ -123,12 +123,6 @@ const {
}: typeof import("./onboard/model-router-process") = require("./onboard/model-router-process");
const bedrockRuntimeOnboard: typeof import("./onboard/bedrock-runtime") =
require("./onboard/bedrock-runtime");
const {
buildVllmMenuEntries,
}: typeof import("./onboard/vllm-menu") = require("./onboard/vllm-menu");
const {
detectWindowsHostOllama,
}: typeof import("./onboard/windows-host-ollama") = require("./onboard/windows-host-ollama");
const {
installOllamaOnLinux,
}: typeof import("./onboard/install-ollama-linux") = require("./onboard/install-ollama-linux");
Expand Down Expand Up @@ -204,24 +198,24 @@ const {
} = require("./core/ports");
const localInference: typeof import("./inference/local") = require("./inference/local");
const {
findReachableOllamaHost,
resetOllamaHostCache,
getLocalProviderBaseUrl,
getLocalProviderHealthCheck,
getLocalProviderValidationBaseUrl,
getOllamaModelOptions,
getOllamaWarmupCommand,
OLLAMA_HOST_DOCKER_INTERNAL,
validateLocalProvider,
} = localInference;
const {
checkOllamaPortsOrWarn,
resolveOllamaInstallMenuEntry,
assertOllamaUpgradeApplied,
} = require("./onboard/ollama-install-menu");
const {
buildInferenceProviderMenu,
}: typeof import("./onboard/provider-menu") = require("./onboard/provider-menu");
const {
detectInferenceProviderHostState,
}: typeof import("./onboard/provider-host-state") = require("./onboard/provider-host-state");
const {
ensureOllamaAuthProxy,
getOllamaProxyToken,
Expand All @@ -236,7 +230,7 @@ const {
switchToWindowsOllamaHost,
printWindowsOllamaTimeoutDiagnostics,
} = require("./inference/ollama/windows");
const { detectVllmProfile, installVllm } = require("./inference/vllm");
const { installVllm } = require("./inference/vllm");
const inferenceConfig: typeof import("./inference/config") = require("./inference/config");
const { DEFAULT_CLOUD_MODEL, getProviderSelectionConfig, parseGatewayInference } = inferenceConfig;

Expand Down Expand Up @@ -310,7 +304,6 @@ const platformUtils: typeof import("./platform") = require("./platform");
const { isWsl, shouldPatchCoredns } = platformUtils;
const {
getContainerRuntime,
getWindowsHostOllamaDockerRequirement,
repairLocalInferenceSystemdOverrideOrExit,
rejectUnsupportedWindowsHostOllama,
shouldFrontOllamaWithProxy,
Expand Down Expand Up @@ -1396,12 +1389,6 @@ function buildGatewayClusterExecArgv(script: string): string[] {
return dockerExecArgv(getGatewayClusterContainerName(), ["sh", "-lc", script]);
}

function hostCommandExists(commandName: string): boolean {
return !!runCapture(["sh", "-c", 'command -v "$1"', "--", commandName], {
ignoreError: true,
});
}

function captureProcessArgs(pid: number): string {
return runCapture(["ps", "-p", String(pid), "-o", "args="], {
ignoreError: true,
Expand Down Expand Up @@ -3781,85 +3768,46 @@ async function setupNim(
let preferredInferenceApi: string | null = null;
let allowToolsIncompatible = false;

const localProbeCurlArgs = ["--connect-timeout", "2", "--max-time", "5"] as const;
const hasOllama = hostCommandExists("ollama");
const ollamaHost = findReachableOllamaHost();
const ollamaRunning = ollamaHost !== null;
const isWindowsHostOllama = ollamaHost === OLLAMA_HOST_DOCKER_INTERNAL;
const vllmRunning = !!runCapture(
["curl", "-sf", ...localProbeCurlArgs, `http://127.0.0.1:${VLLM_PORT}/v1/models`],
{ ignoreError: true },
);
// Pick a vLLM install recipe for this host. Profiles live in inference/vllm.ts;
// null means "no supported platform" (vLLM stays behind EXPERIMENTAL).
const vllmProfile = detectVllmProfile(gpu);
// If the profile's image is already cached, the install path is really a
// "start" — docker pull is a no-op and the container can come up in seconds.
const hasVllmImage = !!(
vllmProfile &&
docker.dockerCapture(["images", "-q", vllmProfile.image], { ignoreError: true }).trim()
);
const windowsHostOllamaDockerRequirement = getWindowsHostOllamaDockerRequirement(
isWsl() ? getContainerRuntime() : null,
);
// Probed even when WSL has its own Ollama: users may prefer the Windows
// instance for GPU access and a unified model cache. See
// src/lib/onboard/windows-host-ollama.ts for process/path fallback details.
const winOllamaState = detectWindowsHostOllama();
const hasWindowsOllama = winOllamaState.installed;
const winOllamaInstalledPath = winOllamaState.installedPath;
const winOllamaLoopbackOnly = winOllamaState.loopbackOnly;

// Independent of findReachableOllamaHost: when WSL Ollama wins the cache
// on 127.0.0.1, Windows-host may also be running on 0.0.0.0 and we want
// to offer a "switch" without restarting anything.
let windowsOllamaReachable = false;
if (isWsl() && !isWindowsHostOllama) {
windowsOllamaReachable = !!runCapture(
["curl", "-sf", ...localProbeCurlArgs, `http://host.docker.internal:${OLLAMA_PORT}/api/tags`],
{ ignoreError: true },
);
}

// Mirrored mode shares loopback so both probes hit the same instance;
// only NAT mode actually has two separate daemons to warn about.
if (isWsl() && ollamaHost === "127.0.0.1" && windowsOllamaReachable) {
const networkingMode = runCapture(["wslinfo", "--networking-mode"], {
ignoreError: true,
}).trim();
if (networkingMode !== "mirrored") {
console.log("");
console.log(" ⚠ Ollama is running on both WSL and the Windows host.");
console.log(" Stop one to avoid duplicated GPU memory and model caches.");
console.log("");
}
}
const providerHostState = detectInferenceProviderHostState({
gpu,
experimental: EXPERIMENTAL,
});
const {
hasOllama,
ollamaHost,
ollamaRunning,
isWindowsHostOllama,
isWsl: isWslHost,
hasWindowsOllama,
winOllamaInstalledPath,
winOllamaLoopbackOnly,
windowsOllamaReachable,
windowsHostOllamaDockerRequirement,
vllmRunning,
vllmProfile,
hasVllmImage,
vllmEntries,
ollamaInstallMenu,
gpuNimCapable,
} = providerHostState;
const requestedProvider = getNonInteractiveProvider();
const requestedModel = isNonInteractive()
? getNonInteractiveModel(requestedProvider || "build")
: null;
const agentProviderOptions = getAgentInferenceProviderOptions(agent);
const ollamaInstallMenu = resolveOllamaInstallMenuEntry({
hasOllama,
ollamaRunning,
hasWindowsOllama,
ollamaHost,
platform: process.platform,
isWsl: isWsl(),
});

// Model Router: complexity-based routing via blueprint config.
const blueprintRouterCfg = loadBlueprintProfile("routed");
const { options, hermesProviderAvailable } = buildInferenceProviderMenu({
remoteProviderConfig: REMOTE_PROVIDER_CONFIG,
agentProviderOptions,
experimental: EXPERIMENTAL,
gpuNimCapable: Boolean(gpu && gpu.nimCapable),
gpuNimCapable,
hasOllama,
ollamaRunning,
ollamaHost,
ollamaPort: OLLAMA_PORT,
isWsl: isWsl(),
isWsl: isWslHost,
hasWindowsOllama,
isWindowsHostOllama,
windowsHostLabelSuffix: windowsHostOllamaDockerRequirement.supported
Expand All @@ -3870,13 +3818,7 @@ async function setupNim(
windowsOllamaReachable,
winOllamaLoopbackOnly,
ollamaInstallEntry: ollamaInstallMenu.entry,
vllmEntries: buildVllmMenuEntries({
vllmRunning,
vllmProfile,
experimental: EXPERIMENTAL,
platform: gpu?.platform,
hasVllmImage,
}),
vllmEntries,
routedEnabled: blueprintRouterCfg?.router?.enabled === true,
});

Expand Down Expand Up @@ -3918,7 +3860,7 @@ async function setupNim(
// (so the menu's "ollama" key points there), the availability
// check below would pass and silently swap the daemon. Detect
// and fail-loud with a hint.
if (isWsl() && recordedProvider === "ollama-local" && isWindowsHostOllama) {
if (isWslHost && recordedProvider === "ollama-local" && isWindowsHostOllama) {
console.error(
` Recorded provider '${recordedProvider}' (WSL Ollama) is not available in this environment.`,
);
Expand Down
Loading