Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
206 changes: 31 additions & 175 deletions src/lib/onboard.ts
Original file line number Diff line number Diff line change
Expand Up @@ -452,11 +452,12 @@ const {
assessHost,
checkPortAvailable,
ensureSwap,
getDockerBridgeGatewayIp,
getMemoryInfo,
planHostRemediation,
probeContainerDns,
} = preflightUtils;
const {
assertDockerBridgeAndContainerDnsHealthy,
}: typeof import("./onboard/bridge-dns-preflight") = require("./onboard/bridge-dns-preflight");
const agentOnboard = require("./agent/onboard");
const agentDefs = require("./agent/defs");

Expand Down Expand Up @@ -1803,13 +1804,28 @@ function assertCdiNvidiaGpuSpecPresent(
process.exit(1);
}


type PreflightOptions = Pick<
OnboardOptions,
"sandboxGpu" | "sandboxGpuDevice" | "gpu" | "noGpu"
> & {
optedOutGpuPassthrough?: boolean;
};

// Reject unsupported container runtimes (currently only Podman with the
// Linux Docker-driver gateway) before any Docker-specific probes. Both
// the fresh preflight and `--resume` backstop call this — if `docker`
// resolves to Podman, surface the unsupported-runtime message instead of
// running bridge/DNS diagnostics that would be misleading.
function rejectUnsupportedContainerRuntime(host: ReturnType<typeof assessHost>): void {
if (isLinuxDockerDriverGatewayEnabled() && host.runtime === "podman") {
console.error(` ✗ ${cliDisplayName()} onboarding now uses OpenShell's Docker driver.`);
console.error(` Podman is not supported for this ${cliDisplayName()} integration path.`);
console.error(" Switch to Docker Engine and rerun onboarding.");
process.exit(1);
}
}

async function preflight(
preflightOpts: PreflightOptions = {},
): Promise<ReturnType<typeof nim.detectGpu>> {
Expand All @@ -1823,6 +1839,10 @@ async function preflight(
printRemediationActions(planHostRemediation(host));
process.exit(1);
}
// Reject unsupported runtimes (Podman) BEFORE the success log so
// Podman users do not see a misleading `✓ Docker is running` line
// immediately followed by a fatal unsupported-runtime exit.
rejectUnsupportedContainerRuntime(host);
console.log(" ✓ Docker is running");
require("./onboard/http-proxy-preflight").warnIfHostProxyMissesLoopback();
const gpu = nim.detectGpu();
Expand All @@ -1837,183 +1857,11 @@ async function preflight(
!sandboxGpuConfig.sandboxGpuEnabled;
assertCdiNvidiaGpuSpecPresent(host, optedOutGpuPassthrough, sandboxGpuConfig.hostGpuPlatform);

// DNS resolution from inside containers (#2101). A corp firewall that
// blocks outbound UDP:53 to public resolvers leaves the sandbox build
// unable to resolve registry.npmjs.org; npm then retries for ~15 min and
// prints the cryptic `Exit handler never called`.
const dns = probeContainerDns();
// Only reasons where the probe actually *ran* nslookup and observed a DNS
// failure warrant blocking — other reasons are inconclusive (probe itself
// couldn't run, got killed, etc.) and shouldn't fail a valid environment.
const dnsIsFatal = dns.reason === "servers_unreachable" || dns.reason === "resolution_failed";

if (dns.ok) {
console.log(" ✓ Container DNS resolution works");
} else if (!dnsIsFatal) {
// Inconclusive probe — warn but proceed. If the sandbox build really
// does hit a DNS issue, the user will see #2101 pointers in that layer.
if (dns.reason === "image_pull_failed") {
console.warn(
" ⚠ Container DNS probe inconclusive: docker couldn't pull the busybox test image.",
);
console.warn(" This usually means the docker daemon itself can't reach Docker Hub,");
console.warn(
" but doesn't prove container DNS is broken — the sandbox build may still succeed.",
);
} else {
console.warn(` ⚠ Container DNS probe inconclusive (reason: ${dns.reason ?? "unknown"}).`);
}
if (dns.details) {
for (const line of String(dns.details).split("\n").slice(-3)) {
if (line.trim()) console.warn(` ${line.trim()}`);
}
}
console.warn(" Proceeding. If the sandbox build later hangs at `npm ci`, see issue #2101.");
} else {
console.error(" ✗ DNS resolution from inside a docker container failed.");
if (dns.details) {
for (const line of String(dns.details).split("\n").slice(-4)) {
if (line.trim()) console.error(` ${line.trim()}`);
}
}
console.error("");
{
console.error(" The sandbox build runs `npm ci` inside a container and needs to resolve");
console.error(" registry.npmjs.org. On networks that block outbound UDP:53 to public DNS");
console.error(" (common in corporate environments that force DNS-over-TLS on the host),");
console.error(" the build appears to hang for ~15 minutes and then prints the cryptic");
console.error(" `npm error Exit handler never called`. See issue #2101.");
console.error("");
console.error(" Fix options:");
console.error("");

// Platform-aware remediation hints. The systemd-resolved fix is
// Linux-specific; macOS / Windows / WSL-backed-by-Docker-Desktop
// hosts configure DNS through Docker Desktop's GUI or a
// platform-specific daemon.json path, so we avoid printing shell
// commands that would mislead those users.
const isLinuxWithSystemd =
host.platform === "linux" && !host.isWsl && host.systemctlAvailable;

const printLinuxFix = (bridgeIp: string, note: string | null) => {
if (note) console.error(note);
console.error(" sudo mkdir -p /etc/systemd/resolved.conf.d/");
console.error(
` printf '[Resolve]\\nDNSStubListenerExtra=${bridgeIp}\\n' | sudo tee /etc/systemd/resolved.conf.d/docker-bridge.conf`,
);
console.error(" sudo systemctl restart systemd-resolved");
console.error("");
console.error(
" Then add the dns key to /etc/docker/daemon.json (safely merges with existing config if jq is installed):",
);
console.error(
" sudo cp /etc/docker/daemon.json /etc/docker/daemon.json.bak-$(date +%s) 2>/dev/null",
);
console.error(
` { sudo jq '. + {"dns":["${bridgeIp}"]}' /etc/docker/daemon.json 2>/dev/null || echo '{"dns":["${bridgeIp}"]}'; } | sudo tee /etc/docker/daemon.json.new >/dev/null`,
);
console.error(" sudo mv /etc/docker/daemon.json.new /etc/docker/daemon.json");
console.error(" sudo systemctl restart docker");
};

if (isLinuxWithSystemd) {
const detectedBridgeIp = getDockerBridgeGatewayIp();
const bridgeIp = detectedBridgeIp || "172.17.0.1";
let bridgeNote: string | null = null;
if (detectedBridgeIp && detectedBridgeIp !== "172.17.0.1") {
bridgeNote = ` (detected your docker bridge gateway at ${detectedBridgeIp})`;
} else if (!detectedBridgeIp) {
bridgeNote =
" (could not auto-detect bridge IP; using docker's default — verify with:\n" +
" docker network inspect bridge --format '{{range .IPAM.Config}}{{.Gateway}}{{end}}')";
}
console.error(" 1. Make systemd-resolved reachable from containers (recommended):");
printLinuxFix(bridgeIp, bridgeNote);
console.error("");
console.error(" 2. Configure an explicit UDP:53-capable DNS in /etc/docker/daemon.json");
console.error(" (ask your IT team for an internal DNS server IP).");
} else if (host.platform === "darwin") {
// On macOS, branch by the detected runtime (host.runtime) so users get
// shell commands they can actually paste, not a "click this GUI" hint.
if (host.runtime === "colima") {
console.error(" Configure Colima's DNS (macOS):");
console.error(" colima stop");
console.error(" colima start --dns <corp-dns-ip>");
console.error(" (or edit ~/.colima/default/colima.yaml and `colima restart`)");
} else if (host.runtime === "docker-desktop" || host.runtime === "docker") {
console.error(" Configure Docker Desktop's DNS (macOS):");
console.error(
" cp ~/.docker/daemon.json ~/.docker/daemon.json.bak-$(date +%s) 2>/dev/null",
);
console.error(
` { jq '. + {"dns":["<corp-dns-ip>"]}' ~/.docker/daemon.json 2>/dev/null || echo '{"dns":["<corp-dns-ip>"]}'; } > ~/.docker/daemon.json.new && mv ~/.docker/daemon.json.new ~/.docker/daemon.json`,
);
console.error(" osascript -e 'quit app \"Docker\"' && sleep 3 && open -a Docker");
console.error(
" (or do the same via the Docker Desktop UI: Settings → Docker Engine)",
);
} else {
// Unknown / podman / other
console.error(" Configure your container runtime's DNS (macOS):");
console.error(" - Docker Desktop:");
console.error(
' { jq \'. + {"dns":["<corp-dns-ip>"]}\' ~/.docker/daemon.json 2>/dev/null || echo \'{"dns":["<corp-dns-ip>"]}\'; } > ~/.docker/daemon.json.new && mv ~/.docker/daemon.json.new ~/.docker/daemon.json',
);
console.error(" osascript -e 'quit app \"Docker\"' && sleep 3 && open -a Docker");
console.error(" - Colima:");
console.error(" colima stop && colima start --dns <corp-dns-ip>");
console.error(" - Rancher Desktop / Podman: edit the runtime's DNS config");
console.error(" and restart it.");
}
console.error(" Ask your IT team for an internal DNS server IP that accepts UDP:53.");
} else if (host.platform === "win32" || host.isWsl) {
console.error(" 1. Configure Docker Desktop's DNS (Windows / WSL via Docker Desktop):");
console.error(
" Docker Desktop for Windows → Settings → Docker Engine — edit the JSON to add:",
);
console.error(' { "dns": ["<corp-dns-ip>"] }');
console.error(" Then click Apply & Restart.");
console.error("");
console.error(
" 2. If you run docker natively inside WSL (not Docker Desktop), apply the Linux fix:",
);
// Reuse the same bridge-IP detection the Linux branch uses — a
// native-docker-in-WSL install can have a custom bridge subnet
// just like any other Linux host, so a hardcoded 172.17.0.1
// would break those users' copy-paste.
const wslBridgeIp = getDockerBridgeGatewayIp();
let wslBridgeNote: string | null = null;
if (wslBridgeIp && wslBridgeIp !== "172.17.0.1") {
wslBridgeNote = ` (detected your docker bridge gateway at ${wslBridgeIp})`;
} else if (!wslBridgeIp) {
wslBridgeNote =
" (could not auto-detect bridge IP — the snippet below uses docker's default; verify with:\n" +
" docker network inspect bridge --format '{{range .IPAM.Config}}{{.Gateway}}{{end}}')";
}
printLinuxFix(wslBridgeIp || "172.17.0.1", wslBridgeNote);
} else {
console.error(" Configure your docker daemon to use a DNS server that accepts UDP:53.");
console.error(
' Add { "dns": ["<corp-dns-ip>"] } to your docker daemon.json and restart the daemon.',
);
console.error(" Ask your IT team for an internal DNS server IP.");
}
console.error("");
console.error(" Verify the fix worked:");
console.error(" docker run --rm busybox nslookup registry.npmjs.org");
}
process.exit(1);
}
assertDockerBridgeAndContainerDnsHealthy(host);
Comment thread
coderabbitai[bot] marked this conversation as resolved.

if (host.runtime !== "unknown") {
console.log(` ✓ Container runtime: ${host.runtime}`);
}
if (isLinuxDockerDriverGatewayEnabled() && host.runtime === "podman") {
console.error(" ✗ NemoClaw onboarding now uses OpenShell's Docker driver.");
console.error(" Podman is not supported for this NemoClaw integration path.");
console.error(" Switch to Docker Engine and rerun onboarding.");
process.exit(1);
}
if (host.notes.includes("Running under WSL")) {
console.log(" ⓘ Running under WSL");
}
Expand Down Expand Up @@ -7072,6 +6920,14 @@ async function onboard(opts: OnboardOptions = {}): Promise<void> {
runPreflight: (preflightOptions) => preflight({ ...opts, ...preflightOptions }),
assessHost,
assertCdiNvidiaGpuSpecPresent,
// Resume backstops for #3508/#3630/Podman: the cached preflight
// step does not capture host Docker/DNS state, and a session
// written by an older NemoClaw may not have run the new bridge/
// DNS fatal checks (mirrors the assertCdiNvidiaGpuSpecPresent
// resume pattern). Podman rejection runs first so users on
// unsupported runtimes don't see Docker-specific diagnostics.
rejectUnsupportedContainerRuntime,
assertDockerBridgeAndContainerDnsHealthy,
resolveSandboxGpuConfig,
validateSandboxGpuPreflight,
skippedStepMessage,
Expand Down
Loading
Loading