From 5132e268dd41107de496f9b738ca414f297a5d02 Mon Sep 17 00:00:00 2001 From: Charan Jagwani Date: Tue, 26 May 2026 13:56:31 -0700 Subject: [PATCH 1/3] feat(onboard): auto-apply UFW sandbox-bridge rule under NEMOCLAW_AUTO_FIX_FIREWALL=1 (#4265) MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit On Brev shadecloud Ubuntu 22.04 the host ships with `ufw INPUT policy DROP`, so sandbox containers on the docker bridge can't reach `172.18.0.1:8080` until the operator runs the `sudo ufw allow ...` rule the onboard preflight already prints. Eko's QA team and anyone fresh- booting a shadecloud H100 hits this every time. Opt-in path: set `NEMOCLAW_AUTO_FIX_FIREWALL=1`. When the reachability probe fails on a `bridge_gateway` route, `verifySandboxBridgeGatewayReachableOrExit` calls a new `tryAutoApplyUfwRule` helper that: 1. Checks `sudo -n true` — never prompts. Without passwordless sudo, the manual-instructions path runs as before. 2. Checks `sudo -n which ufw` — skips if ufw isn't installed. 3. Checks `sudo -n ufw status` — skips if ufw is inactive (rule moot). 4. Runs `sudo -n ufw allow from to port proto tcp` — the same rule the existing message prints. 5. Re-probes reachability; proceeds silently if now ok. Without the env var, behaviour is unchanged (existing manual hint, same exit code). Safety: - Opt-in only; no implicit consent. - Narrow rule (single src subnet → single dest IP:port/proto), identical to what preflight already advises. - `sudo -n` only — never elevates with a password prompt mid-run. - Skips on unsupported environments (no sudo, no ufw, ufw inactive) rather than failing loud — the existing manual hint still surfaces. Tests (6 new, all in `gateway-sandbox-reachability.test.ts`): - not_opted_in (env var unset → skip) - no_subnet_or_gateway (missing route info → skip) - sudo_unavailable (passwordless sudo refused) - ufw_missing (`which ufw` non-zero) - ufw_inactive (`Status: inactive`) - ufw_rule_rejected (apply non-zero) - applied=true happy path with exact argv assertion Co-Authored-By: Claude Opus 4.7 (1M context) Signed-off-by: Charan Jagwani --- .../gateway-sandbox-reachability.test.ts | 97 +++++++++++++ .../onboard/gateway-sandbox-reachability.ts | 136 +++++++++++++++++- 2 files changed, 232 insertions(+), 1 deletion(-) diff --git a/src/lib/onboard/gateway-sandbox-reachability.test.ts b/src/lib/onboard/gateway-sandbox-reachability.test.ts index 9599f93687..ce1882966f 100644 --- a/src/lib/onboard/gateway-sandbox-reachability.test.ts +++ b/src/lib/onboard/gateway-sandbox-reachability.test.ts @@ -7,6 +7,7 @@ import { __test, formatSandboxBridgeUnreachableMessage, isSandboxBridgeGatewayReachable, + tryAutoApplyUfwRule, } from "../../../dist/lib/onboard/gateway-sandbox-reachability"; describe("gateway sandbox reachability route modeling", () => { @@ -168,3 +169,99 @@ describe("formatSandboxBridgeUnreachableMessage", () => { expect(msg).not.toContain("ufw allow"); }); }); + +describe("tryAutoApplyUfwRule (#4265)", () => { + type Call = { argv: readonly string[]; status: number; stdout?: string; stderr?: string }; + + function makeRunner(calls: Call[]) { + const recorded: string[][] = []; + const runImpl = (argv: readonly string[]) => { + recorded.push([...argv]); + const idx = recorded.length - 1; + const c = calls[idx]; + if (!c) return { status: 0, stdout: "", stderr: "" }; + return { status: c.status, stdout: c.stdout ?? "", stderr: c.stderr ?? "" }; + }; + return { runImpl, recorded }; + } + + const reach = { + ok: false as const, + reason: "tcp_failed" as const, + routeKind: "bridge_gateway" as const, + networkName: "openshell-docker", + subnet: "172.18.0.0/16", + gatewayIp: "172.18.0.1", + }; + + it("skips when the operator has not opted in", async () => { + const { runImpl, recorded } = makeRunner([]); + const result = await tryAutoApplyUfwRule(reach, { runImpl, optedIn: false }); + expect(result).toEqual({ applied: false, reason: "not_opted_in" }); + expect(recorded).toHaveLength(0); + }); + + it("skips when subnet or gatewayIp is unknown", async () => { + const { runImpl, recorded } = makeRunner([]); + const result = await tryAutoApplyUfwRule( + { ...reach, gatewayIp: undefined }, + { runImpl, optedIn: true }, + ); + expect(result).toEqual({ applied: false, reason: "no_subnet_or_gateway" }); + expect(recorded).toHaveLength(0); + }); + + it("returns sudo_unavailable when passwordless sudo fails", async () => { + const { runImpl } = makeRunner([{ argv: ["sudo", "-n", "true"], status: 1 }]); + const result = await tryAutoApplyUfwRule(reach, { runImpl, optedIn: true }); + expect(result.reason).toBe("sudo_unavailable"); + }); + + it("returns ufw_missing when ufw is not on PATH", async () => { + const { runImpl } = makeRunner([ + { argv: ["sudo", "-n", "true"], status: 0 }, + { argv: ["sudo", "-n", "which", "ufw"], status: 1 }, + ]); + const result = await tryAutoApplyUfwRule(reach, { runImpl, optedIn: true }); + expect(result.reason).toBe("ufw_missing"); + }); + + it("returns ufw_inactive when status reports inactive", async () => { + const { runImpl } = makeRunner([ + { argv: ["sudo", "-n", "true"], status: 0 }, + { argv: ["sudo", "-n", "which", "ufw"], status: 0, stdout: "/usr/sbin/ufw" }, + { argv: ["sudo", "-n", "ufw", "status"], status: 0, stdout: "Status: inactive" }, + ]); + const result = await tryAutoApplyUfwRule(reach, { runImpl, optedIn: true }); + expect(result.reason).toBe("ufw_inactive"); + }); + + it("returns ufw_rule_rejected when ufw exits non-zero on apply", async () => { + const { runImpl } = makeRunner([ + { argv: ["sudo", "-n", "true"], status: 0 }, + { argv: ["sudo", "-n", "which", "ufw"], status: 0, stdout: "/usr/sbin/ufw" }, + { argv: ["sudo", "-n", "ufw", "status"], status: 0, stdout: "Status: active" }, + { argv: [], status: 1, stderr: "ufw: rule rejected" }, + ]); + const result = await tryAutoApplyUfwRule(reach, { runImpl, optedIn: true, port: 8080 }); + expect(result.reason).toBe("ufw_rule_rejected"); + expect(result.detail).toContain("rule rejected"); + }); + + it("applies the narrow allow rule on the happy path", async () => { + const { runImpl, recorded } = makeRunner([ + { argv: ["sudo", "-n", "true"], status: 0 }, + { argv: ["sudo", "-n", "which", "ufw"], status: 0, stdout: "/usr/sbin/ufw" }, + { argv: ["sudo", "-n", "ufw", "status"], status: 0, stdout: "Status: active" }, + { argv: [], status: 0, stdout: "Rule added" }, + ]); + const result = await tryAutoApplyUfwRule(reach, { runImpl, optedIn: true, port: 8080 }); + expect(result).toEqual({ applied: true, reason: "applied", detail: "Rule added" }); + expect(recorded[3]).toEqual([ + "sudo", "-n", "ufw", "allow", + "from", "172.18.0.0/16", + "to", "172.18.0.1", + "port", "8080", "proto", "tcp", + ]); + }); +}); diff --git a/src/lib/onboard/gateway-sandbox-reachability.ts b/src/lib/onboard/gateway-sandbox-reachability.ts index 01641da477..c99b876bce 100644 --- a/src/lib/onboard/gateway-sandbox-reachability.ts +++ b/src/lib/onboard/gateway-sandbox-reachability.ts @@ -10,6 +10,8 @@ * diagnosis; a plain helper container on the bridge is not equivalent. */ +import { spawnSync } from "node:child_process"; + import { dockerCapture, dockerRun } from "../adapters/docker/run"; import { GATEWAY_PORT } from "../core/ports"; @@ -303,6 +305,121 @@ export function formatSandboxBridgeUnreachableMessage( ].join("\n"); } +/** + * Result of attempting to auto-apply the UFW rule that opens + * ` -> :` so sandbox containers can reach the + * gateway. (#4265) + */ +export interface UfwAutoApplyResult { + applied: boolean; + reason: + | "applied" + | "not_opted_in" + | "no_subnet_or_gateway" + | "ufw_missing" + | "ufw_inactive" + | "sudo_unavailable" + | "ufw_rule_rejected"; + detail?: string; +} + +export interface UfwAutoApplyOptions { + port?: number; + /** + * Run a process with argv. Returns the exit code (null on signal/error) + * and trimmed stderr/stdout. Injected for testability — callers in + * production code should rely on the default which uses `spawnSync`. + */ + runImpl?: (argv: readonly string[]) => { status: number | null; stdout: string; stderr: string }; + /** Override to force the opt-in check off in tests. */ + optedIn?: boolean; +} + +function defaultRunArgv( + argv: readonly string[], +): { status: number | null; stdout: string; stderr: string } { + const result = spawnSync(argv[0]!, argv.slice(1), { encoding: "utf-8" }); + return { + status: result.status, + stdout: (result.stdout || "").trim(), + stderr: (result.stderr || "").trim(), + }; +} + +function isUfwAutoApplyOptedIn(): boolean { + return process.env.NEMOCLAW_AUTO_FIX_FIREWALL === "1"; +} + +/** + * Try to apply the `ufw allow from to port ` rule + * non-interactively. Only fires when the operator has opted in via + * `NEMOCLAW_AUTO_FIX_FIREWALL=1`. Returns a structured result so the caller + * can decide whether to re-probe reachability. (#4265) + * + * Safety: + * - Opt-in only — operators must set `NEMOCLAW_AUTO_FIX_FIREWALL=1`. + * - `sudo -n` only — never prompts for a password. If passwordless sudo isn't + * available, falls back to the existing manual-instructions path. + * - Skips on hosts without UFW or with UFW inactive (the rule is moot there). + * - The rule itself is narrow: docker-bridge subnet → host gateway port only. + */ +export async function tryAutoApplyUfwRule( + reach: SandboxBridgeReachabilityResult, + options: UfwAutoApplyOptions = {}, +): Promise { + const port = options.port ?? GATEWAY_PORT; + const run = options.runImpl ?? defaultRunArgv; + const optedIn = options.optedIn ?? isUfwAutoApplyOptedIn(); + + if (!optedIn) return { applied: false, reason: "not_opted_in" }; + if (!reach.subnet || !reach.gatewayIp) { + return { applied: false, reason: "no_subnet_or_gateway" }; + } + + const sudoCheck = run(["sudo", "-n", "true"]); + if (sudoCheck.status !== 0) { + return { + applied: false, + reason: "sudo_unavailable", + detail: "Passwordless sudo not available; cannot auto-apply UFW rule.", + }; + } + + const ufwWhich = run(["sudo", "-n", "which", "ufw"]); + if (ufwWhich.status !== 0) { + return { applied: false, reason: "ufw_missing", detail: "ufw not installed." }; + } + + const status = run(["sudo", "-n", "ufw", "status"]); + if (status.status !== 0 || !/Status:\s*active/i.test(status.stdout)) { + return { applied: false, reason: "ufw_inactive", detail: "ufw is not active." }; + } + + const apply = run([ + "sudo", + "-n", + "ufw", + "allow", + "from", + reach.subnet, + "to", + reach.gatewayIp, + "port", + String(port), + "proto", + "tcp", + ]); + if (apply.status !== 0) { + return { + applied: false, + reason: "ufw_rule_rejected", + detail: apply.stderr || apply.stdout || "ufw rejected the rule.", + }; + } + + return { applied: true, reason: "applied", detail: apply.stdout || undefined }; +} + export async function verifySandboxBridgeGatewayReachableOrExit( exitOnFailure: boolean, options: { skip?: boolean } = {}, @@ -311,9 +428,26 @@ export async function verifySandboxBridgeGatewayReachableOrExit( console.log(" Docker-driver GPU host networking active; skipping sandbox bridge gateway reachability probe."); return; } - const reach = await isSandboxBridgeGatewayReachable(); + let reach = await isSandboxBridgeGatewayReachable(); if (reach.ok) return; + // #4265: when operator opts in, try to auto-apply the firewall rule and + // re-probe before surfacing the manual-fix message. + if (reach.routeKind === "bridge_gateway" && isUfwAutoApplyOptedIn()) { + const autoApply = await tryAutoApplyUfwRule(reach); + if (autoApply.applied) { + console.log( + ` ✓ Applied UFW rule (NEMOCLAW_AUTO_FIX_FIREWALL=1): allow from ${reach.subnet} to ${reach.gatewayIp}:${GATEWAY_PORT}/tcp`, + ); + reach = await isSandboxBridgeGatewayReachable(); + if (reach.ok) return; + } else if (autoApply.reason !== "not_opted_in") { + console.warn( + ` ⚠ NEMOCLAW_AUTO_FIX_FIREWALL=1 set but could not auto-apply UFW rule (${autoApply.reason}${autoApply.detail ? `: ${autoApply.detail}` : ""}); falling back to manual instructions.`, + ); + } + } + const message = formatSandboxBridgeUnreachableMessage(reach); if (reach.reason === "probe_unavailable") { console.warn(message); From 47d45c51c9c634b9dbaf4e5e0cb4daae3d363b91 Mon Sep 17 00:00:00 2001 From: Carlos Villela Date: Wed, 27 May 2026 09:06:01 -0700 Subject: [PATCH 2/3] fix(onboard): harden UFW auto-apply gate --- docs/reference/commands.mdx | 5 +- .../gateway-sandbox-reachability.test.ts | 116 +++++++++- .../onboard/gateway-sandbox-reachability.ts | 163 ++++---------- src/lib/onboard/ufw-auto-apply.ts | 207 ++++++++++++++++++ 4 files changed, 363 insertions(+), 128 deletions(-) create mode 100644 src/lib/onboard/ufw-auto-apply.ts diff --git a/docs/reference/commands.mdx b/docs/reference/commands.mdx index 9107f2126a..e5c21d74ee 100644 --- a/docs/reference/commands.mdx +++ b/docs/reference/commands.mdx @@ -198,7 +198,9 @@ On the Docker-driver gateway path, preflight stays read-only when it detects a s It prints a `⚠ Gateway will be recreated when sandbox creation starts` notice and defers the actual teardown to step `[2/8] Starting OpenShell gateway`. This means pressing `Ctrl+C` between preflight and step `[2/8]` leaves the running gateway and existing sandbox containers untouched, so `nemoclaw onboard` is safe to run just to check preflight output. For Linux Docker-driver gateways, onboarding also checks that a helper container on the OpenShell Docker network can reach `host.openshell.internal:`. -If a host firewall blocks that sandbox path, onboarding exits with a `sudo ufw allow from to any port proto tcp` command before it reports the gateway healthy. +If a host firewall blocks that sandbox path, onboarding exits with a `sudo ufw allow from to port proto tcp` command before it reports the gateway healthy. +Set `NEMOCLAW_AUTO_FIX_FIREWALL=1` to opt in to automatic UFW remediation for this specific failure: NemoClaw uses `sudo -n` only, validates the Docker bridge subnet/gateway/port, applies the narrow UFW rule only after a proven TCP reachability failure, and re-probes before continuing. +If passwordless sudo, UFW, or active UFW is unavailable, NemoClaw falls back to the manual guidance path without prompting for a password. Tune the wait via `NEMOCLAW_REUSE_HEALTH_POLL_COUNT` (default `6`) and `NEMOCLAW_REUSE_HEALTH_POLL_INTERVAL` (default `5` seconds). The poll count is clamped to a minimum of `1` so the probe always runs at least once, and the interval is clamped to a minimum of `0` (no sleep between attempts). @@ -1316,6 +1318,7 @@ These flags toggle optional behaviors during onboarding; set them before running | `NEMOCLAW_OPENSHELL_GATEWAY_BIN` | path | Advanced override for the `openshell-gateway` binary used by the Linux Docker-driver gateway. Defaults to the binary next to `openshell`, then common install paths. | | `NEMOCLAW_OPENSHELL_SANDBOX_BIN` | path | Advanced override for the `openshell-sandbox` binary passed to the Linux Docker-driver gateway supervisor. Defaults to the binary next to `openshell`, then common install paths. | | `NEMOCLAW_OPENSHELL_GATEWAY_STATE_DIR` | path | Advanced override for the Linux Docker-driver gateway pid file and SQLite state directory. Defaults to `~/.local/state/nemoclaw/openshell-docker-gateway`. | +| `NEMOCLAW_AUTO_FIX_FIREWALL` | `1` to enable | Opts in to automatic UFW remediation when Linux Docker-driver sandbox containers cannot reach the host gateway after a proven TCP failure. NemoClaw runs `sudo -n` only, validates the narrow Docker bridge subnet → gateway IP:port rule before invoking UFW, re-probes after applying it, and otherwise falls back to the printed manual command. | | `NEMOCLAW_WECHAT_QUIET` | `1` to enable | Silences the `[wechat]` diagnostic lines printed during the host-side WeChat QR login (poll status, IDC redirects, swallowed gateway errors), which are visible by default while the experimental WeChat path stabilizes; set `1` once the flow is reliable in your environment. | ### Onboard Profiling Traces diff --git a/src/lib/onboard/gateway-sandbox-reachability.test.ts b/src/lib/onboard/gateway-sandbox-reachability.test.ts index ce1882966f..0d851b9fee 100644 --- a/src/lib/onboard/gateway-sandbox-reachability.test.ts +++ b/src/lib/onboard/gateway-sandbox-reachability.test.ts @@ -1,13 +1,14 @@ // SPDX-FileCopyrightText: Copyright (c) 2026 NVIDIA CORPORATION & AFFILIATES. All rights reserved. // SPDX-License-Identifier: Apache-2.0 -import { describe, expect, it } from "vitest"; +import { describe, expect, it, vi } from "vitest"; import { __test, formatSandboxBridgeUnreachableMessage, isSandboxBridgeGatewayReachable, tryAutoApplyUfwRule, + verifySandboxBridgeGatewayReachableOrExit, } from "../../../dist/lib/onboard/gateway-sandbox-reachability"; describe("gateway sandbox reachability route modeling", () => { @@ -201,7 +202,7 @@ describe("tryAutoApplyUfwRule (#4265)", () => { expect(recorded).toHaveLength(0); }); - it("skips when subnet or gatewayIp is unknown", async () => { + it("skips when gatewayIp is unknown", async () => { const { runImpl, recorded } = makeRunner([]); const result = await tryAutoApplyUfwRule( { ...reach, gatewayIp: undefined }, @@ -211,6 +212,37 @@ describe("tryAutoApplyUfwRule (#4265)", () => { expect(recorded).toHaveLength(0); }); + it("skips when subnet is unknown", async () => { + const { runImpl, recorded } = makeRunner([]); + const result = await tryAutoApplyUfwRule( + { ...reach, subnet: undefined }, + { runImpl, optedIn: true }, + ); + expect(result).toEqual({ applied: false, reason: "no_subnet_or_gateway" }); + expect(recorded).toHaveLength(0); + }); + + it("rejects malformed or overly broad UFW operands before sudo", async () => { + const { runImpl, recorded } = makeRunner([]); + const broadSubnet = await tryAutoApplyUfwRule( + { ...reach, subnet: "0.0.0.0/0" }, + { runImpl, optedIn: true }, + ); + const outsideGateway = await tryAutoApplyUfwRule( + { ...reach, gatewayIp: "172.19.0.1" }, + { runImpl, optedIn: true }, + ); + const invalidPort = await tryAutoApplyUfwRule(reach, { + runImpl, + optedIn: true, + port: 70000, + }); + expect(broadSubnet.reason).toBe("invalid_rule_operand"); + expect(outsideGateway.reason).toBe("invalid_rule_operand"); + expect(invalidPort.reason).toBe("invalid_rule_operand"); + expect(recorded).toHaveLength(0); + }); + it("returns sudo_unavailable when passwordless sudo fails", async () => { const { runImpl } = makeRunner([{ argv: ["sudo", "-n", "true"], status: 1 }]); const result = await tryAutoApplyUfwRule(reach, { runImpl, optedIn: true }); @@ -265,3 +297,83 @@ describe("tryAutoApplyUfwRule (#4265)", () => { ]); }); }); + +describe("verifySandboxBridgeGatewayReachableOrExit UFW auto-apply (#4265)", () => { + const tcpFailure = { + ok: false as const, + reason: "tcp_failed" as const, + routeKind: "bridge_gateway" as const, + networkName: "openshell-docker", + subnet: "172.18.0.0/16", + gatewayIp: "172.18.0.1", + }; + + it("does not auto-apply UFW when the bridge-gateway probe is unavailable", async () => { + const autoApplyImpl = vi.fn(); + const warn = vi.spyOn(console, "warn").mockImplementation(() => undefined); + await verifySandboxBridgeGatewayReachableOrExit(false, { + autoApplyImpl, + autoApplyOptedInImpl: () => true, + reachabilityImpl: () => ({ + ...tcpFailure, + reason: "probe_unavailable", + detail: "nc: bad address 'host.openshell.internal'", + }), + }); + expect(autoApplyImpl).not.toHaveBeenCalled(); + expect(warn).toHaveBeenCalledWith(expect.stringContaining("Could not verify")); + warn.mockRestore(); + }); + + it("re-probes and returns cleanly after a successful UFW apply", async () => { + const reachabilityImpl = vi + .fn() + .mockResolvedValueOnce(tcpFailure) + .mockResolvedValueOnce({ ...tcpFailure, ok: true, reason: "ok" }); + const autoApplyImpl = vi.fn().mockReturnValue({ applied: true, reason: "applied" }); + const log = vi.spyOn(console, "log").mockImplementation(() => undefined); + await verifySandboxBridgeGatewayReachableOrExit(true, { + autoApplyImpl, + autoApplyOptedInImpl: () => true, + reachabilityImpl, + }); + expect(autoApplyImpl).toHaveBeenCalledWith(tcpFailure); + expect(reachabilityImpl).toHaveBeenCalledTimes(2); + expect(log).toHaveBeenCalledWith(expect.stringContaining("Applied UFW rule")); + log.mockRestore(); + }); + + it("falls back to the manual message when apply succeeds but the re-probe still fails", async () => { + const reachabilityImpl = vi.fn().mockResolvedValue(tcpFailure); + const autoApplyImpl = vi.fn().mockReturnValue({ applied: true, reason: "applied" }); + const log = vi.spyOn(console, "log").mockImplementation(() => undefined); + const error = vi.spyOn(console, "error").mockImplementation(() => undefined); + await expect( + verifySandboxBridgeGatewayReachableOrExit(false, { + autoApplyImpl, + autoApplyOptedInImpl: () => true, + reachabilityImpl, + }), + ).rejects.toThrow("sandbox-bridge unreachable"); + expect(reachabilityImpl).toHaveBeenCalledTimes(2); + expect(error).toHaveBeenCalledWith(expect.stringContaining("ufw allow")); + log.mockRestore(); + error.mockRestore(); + }); + + it("does not warn for unsupported UFW environments when auto-apply is opted in", async () => { + const warn = vi.spyOn(console, "warn").mockImplementation(() => undefined); + const error = vi.spyOn(console, "error").mockImplementation(() => undefined); + await expect( + verifySandboxBridgeGatewayReachableOrExit(false, { + autoApplyImpl: () => ({ applied: false, reason: "ufw_inactive" }), + autoApplyOptedInImpl: () => true, + reachabilityImpl: () => tcpFailure, + }), + ).rejects.toThrow("sandbox-bridge unreachable"); + expect(warn).not.toHaveBeenCalled(); + expect(error).toHaveBeenCalledWith(expect.stringContaining("ufw allow")); + warn.mockRestore(); + error.mockRestore(); + }); +}); diff --git a/src/lib/onboard/gateway-sandbox-reachability.ts b/src/lib/onboard/gateway-sandbox-reachability.ts index c99b876bce..e37fedb19d 100644 --- a/src/lib/onboard/gateway-sandbox-reachability.ts +++ b/src/lib/onboard/gateway-sandbox-reachability.ts @@ -10,10 +10,13 @@ * diagnosis; a plain helper container on the bridge is not equivalent. */ -import { spawnSync } from "node:child_process"; - import { dockerCapture, dockerRun } from "../adapters/docker/run"; import { GATEWAY_PORT } from "../core/ports"; +import type { UfwAutoApplyResult } from "./ufw-auto-apply"; +import { isUfwAutoApplyOptedIn, tryAutoApplyUfwRule } from "./ufw-auto-apply"; + +export type { UfwAutoApplyOptions, UfwAutoApplyResult } from "./ufw-auto-apply"; +export { tryAutoApplyUfwRule } from "./ufw-auto-apply"; const DEFAULT_PROBE_IMAGE = "busybox@sha256:73aaf090f3d85aa34ee199857f03fa3a95c8ede2ffd4cc2cdb5b94e566b11662"; @@ -305,150 +308,60 @@ export function formatSandboxBridgeUnreachableMessage( ].join("\n"); } -/** - * Result of attempting to auto-apply the UFW rule that opens - * ` -> :` so sandbox containers can reach the - * gateway. (#4265) - */ -export interface UfwAutoApplyResult { - applied: boolean; - reason: - | "applied" - | "not_opted_in" - | "no_subnet_or_gateway" - | "ufw_missing" - | "ufw_inactive" - | "sudo_unavailable" - | "ufw_rule_rejected"; - detail?: string; -} - -export interface UfwAutoApplyOptions { +interface SandboxBridgeVerifierOptions { + skip?: boolean; port?: number; - /** - * Run a process with argv. Returns the exit code (null on signal/error) - * and trimmed stderr/stdout. Injected for testability — callers in - * production code should rely on the default which uses `spawnSync`. - */ - runImpl?: (argv: readonly string[]) => { status: number | null; stdout: string; stderr: string }; - /** Override to force the opt-in check off in tests. */ - optedIn?: boolean; -} - -function defaultRunArgv( - argv: readonly string[], -): { status: number | null; stdout: string; stderr: string } { - const result = spawnSync(argv[0]!, argv.slice(1), { encoding: "utf-8" }); - return { - status: result.status, - stdout: (result.stdout || "").trim(), - stderr: (result.stderr || "").trim(), - }; + reachabilityImpl?: () => Promise | SandboxBridgeReachabilityResult; + autoApplyImpl?: ( + reach: SandboxBridgeReachabilityResult, + ) => Promise | UfwAutoApplyResult; + autoApplyOptedInImpl?: () => boolean; } -function isUfwAutoApplyOptedIn(): boolean { - return process.env.NEMOCLAW_AUTO_FIX_FIREWALL === "1"; -} - -/** - * Try to apply the `ufw allow from to port ` rule - * non-interactively. Only fires when the operator has opted in via - * `NEMOCLAW_AUTO_FIX_FIREWALL=1`. Returns a structured result so the caller - * can decide whether to re-probe reachability. (#4265) - * - * Safety: - * - Opt-in only — operators must set `NEMOCLAW_AUTO_FIX_FIREWALL=1`. - * - `sudo -n` only — never prompts for a password. If passwordless sudo isn't - * available, falls back to the existing manual-instructions path. - * - Skips on hosts without UFW or with UFW inactive (the rule is moot there). - * - The rule itself is narrow: docker-bridge subnet → host gateway port only. - */ -export async function tryAutoApplyUfwRule( - reach: SandboxBridgeReachabilityResult, - options: UfwAutoApplyOptions = {}, -): Promise { - const port = options.port ?? GATEWAY_PORT; - const run = options.runImpl ?? defaultRunArgv; - const optedIn = options.optedIn ?? isUfwAutoApplyOptedIn(); - - if (!optedIn) return { applied: false, reason: "not_opted_in" }; - if (!reach.subnet || !reach.gatewayIp) { - return { applied: false, reason: "no_subnet_or_gateway" }; - } - - const sudoCheck = run(["sudo", "-n", "true"]); - if (sudoCheck.status !== 0) { - return { - applied: false, - reason: "sudo_unavailable", - detail: "Passwordless sudo not available; cannot auto-apply UFW rule.", - }; - } - - const ufwWhich = run(["sudo", "-n", "which", "ufw"]); - if (ufwWhich.status !== 0) { - return { applied: false, reason: "ufw_missing", detail: "ufw not installed." }; - } - - const status = run(["sudo", "-n", "ufw", "status"]); - if (status.status !== 0 || !/Status:\s*active/i.test(status.stdout)) { - return { applied: false, reason: "ufw_inactive", detail: "ufw is not active." }; - } - - const apply = run([ - "sudo", - "-n", - "ufw", - "allow", - "from", - reach.subnet, - "to", - reach.gatewayIp, - "port", - String(port), - "proto", - "tcp", - ]); - if (apply.status !== 0) { - return { - applied: false, - reason: "ufw_rule_rejected", - detail: apply.stderr || apply.stdout || "ufw rejected the rule.", - }; - } - - return { applied: true, reason: "applied", detail: apply.stdout || undefined }; -} +const SILENT_UFW_AUTO_APPLY_REASONS = new Set([ + "not_opted_in", + "ufw_missing", + "ufw_inactive", +]); export async function verifySandboxBridgeGatewayReachableOrExit( exitOnFailure: boolean, - options: { skip?: boolean } = {}, + options: SandboxBridgeVerifierOptions = {}, ): Promise { if (options.skip) { console.log(" Docker-driver GPU host networking active; skipping sandbox bridge gateway reachability probe."); return; } - let reach = await isSandboxBridgeGatewayReachable(); + const port = options.port ?? GATEWAY_PORT; + const reachability = options.reachabilityImpl ?? isSandboxBridgeGatewayReachable; + const autoApplyOptedIn = options.autoApplyOptedInImpl ?? isUfwAutoApplyOptedIn; + const autoApply = + options.autoApplyImpl ?? + ((result: SandboxBridgeReachabilityResult) => tryAutoApplyUfwRule(result, { optedIn: true, port })); + + let reach = await reachability(); if (reach.ok) return; - // #4265: when operator opts in, try to auto-apply the firewall rule and - // re-probe before surfacing the manual-fix message. - if (reach.routeKind === "bridge_gateway" && isUfwAutoApplyOptedIn()) { - const autoApply = await tryAutoApplyUfwRule(reach); - if (autoApply.applied) { + // #4265: when operator opts in and the probe proved a bridge TCP failure, + // try to auto-apply the firewall rule and re-probe before surfacing the + // manual-fix message. Do not mutate firewall state for probe helper/DNS + // failures, even if route metadata is present. + if (reach.routeKind === "bridge_gateway" && reach.reason === "tcp_failed" && autoApplyOptedIn()) { + const autoApplyResult = await autoApply(reach); + if (autoApplyResult.applied) { console.log( - ` ✓ Applied UFW rule (NEMOCLAW_AUTO_FIX_FIREWALL=1): allow from ${reach.subnet} to ${reach.gatewayIp}:${GATEWAY_PORT}/tcp`, + ` ✓ Applied UFW rule (NEMOCLAW_AUTO_FIX_FIREWALL=1): allow from ${reach.subnet} to ${reach.gatewayIp}:${port}/tcp`, ); - reach = await isSandboxBridgeGatewayReachable(); + reach = await reachability(); if (reach.ok) return; - } else if (autoApply.reason !== "not_opted_in") { + } else if (!SILENT_UFW_AUTO_APPLY_REASONS.has(autoApplyResult.reason)) { console.warn( - ` ⚠ NEMOCLAW_AUTO_FIX_FIREWALL=1 set but could not auto-apply UFW rule (${autoApply.reason}${autoApply.detail ? `: ${autoApply.detail}` : ""}); falling back to manual instructions.`, + ` ⚠ NEMOCLAW_AUTO_FIX_FIREWALL=1 set but could not auto-apply UFW rule (${autoApplyResult.reason}${autoApplyResult.detail ? `: ${autoApplyResult.detail}` : ""}); falling back to manual instructions.`, ); } } - const message = formatSandboxBridgeUnreachableMessage(reach); + const message = formatSandboxBridgeUnreachableMessage(reach, port); if (reach.reason === "probe_unavailable") { console.warn(message); return; diff --git a/src/lib/onboard/ufw-auto-apply.ts b/src/lib/onboard/ufw-auto-apply.ts new file mode 100644 index 0000000000..e300d9ee5d --- /dev/null +++ b/src/lib/onboard/ufw-auto-apply.ts @@ -0,0 +1,207 @@ +// SPDX-FileCopyrightText: Copyright (c) 2026 NVIDIA CORPORATION & AFFILIATES. All rights reserved. +// SPDX-License-Identifier: Apache-2.0 + +/** + * Opt-in UFW remediation for Linux Docker-driver sandbox -> gateway reachability. + * + * Invalid state boundary (#4265): the OpenShell gateway is healthy on the host, + * but sandbox containers on the OpenShell Docker bridge cannot reach the host + * gateway IP because host firewall INPUT policy drops bridge traffic. This is + * host policy state outside the OpenShell route model, so remediation is kept + * explicit, narrow, and removable once OpenShell/NemoClaw owns a first-class + * firewall reconciliation layer for Docker-driver gateways. + */ + +import { spawnSync } from "node:child_process"; + +import { GATEWAY_PORT } from "../core/ports"; +import type { SandboxBridgeReachabilityResult } from "./gateway-sandbox-reachability"; + +/** + * Result of attempting to auto-apply the UFW rule that opens + * ` -> :` so sandbox containers can reach the + * gateway. (#4265) + */ +export interface UfwAutoApplyResult { + applied: boolean; + reason: + | "applied" + | "not_opted_in" + | "no_subnet_or_gateway" + | "invalid_rule_operand" + | "ufw_missing" + | "ufw_inactive" + | "sudo_unavailable" + | "ufw_rule_rejected"; + detail?: string; +} + +export interface UfwAutoApplyOptions { + port?: number; + /** + * Run a process with argv. Returns the exit code (null on signal/error) + * and trimmed stderr/stdout. Injected for testability — callers in + * production code should rely on the default which uses `spawnSync`. + */ + runImpl?: (argv: readonly string[]) => { status: number | null; stdout: string; stderr: string }; + /** Override to force the opt-in check in tests or after a caller-level gate. */ + optedIn?: boolean; +} + +function defaultRunArgv( + argv: readonly string[], +): { status: number | null; stdout: string; stderr: string } { + const result = spawnSync(argv[0]!, argv.slice(1), { encoding: "utf-8" }); + return { + status: result.status, + stdout: (result.stdout || "").trim(), + stderr: (result.stderr || "").trim(), + }; +} + +export function isUfwAutoApplyOptedIn(): boolean { + return process.env.NEMOCLAW_AUTO_FIX_FIREWALL === "1"; +} + +function parseIpv4Address(value: string): number | null { + const parts = value.split("."); + if (parts.length !== 4) return null; + let result = 0; + for (const part of parts) { + if (!/^\d{1,3}$/.test(part)) return null; + const octet = Number(part); + if (!Number.isInteger(octet) || octet < 0 || octet > 255) return null; + result = (result << 8) + octet; + } + return result >>> 0; +} + +function parseDockerBridgeCidr(value: string): { network: number; prefix: number; mask: number } | null { + const [address, prefixRaw, extra] = value.split("/"); + if (!address || !prefixRaw || extra !== undefined) return null; + if (!/^\d{1,2}$/.test(prefixRaw)) return null; + const prefix = Number(prefixRaw); + // Docker bridge networks should be narrow IPv4 networks. Reject broad ranges + // such as 0.0.0.0/0 or 10.0.0.0/8 before changing host firewall policy. + if (!Number.isInteger(prefix) || prefix < 16 || prefix > 32) return null; + const network = parseIpv4Address(address); + if (network === null) return null; + const mask = prefix === 0 ? 0 : (0xffffffff << (32 - prefix)) >>> 0; + if ((network & mask) >>> 0 !== network) return null; + return { network, prefix, mask }; +} + +function validateUfwRuleOperands( + subnet: string, + gatewayIp: string, + port: number, +): string | undefined { + if (!Number.isInteger(port) || port < 1 || port > 65535) { + return `invalid port ${port}`; + } + const cidr = parseDockerBridgeCidr(subnet); + if (!cidr) { + return `invalid or overly broad IPv4 subnet ${subnet}`; + } + const gateway = parseIpv4Address(gatewayIp); + if (gateway === null) { + return `invalid IPv4 gateway ${gatewayIp}`; + } + if (((gateway & cidr.mask) >>> 0) !== cidr.network) { + return `gateway ${gatewayIp} is outside subnet ${subnet}`; + } + return undefined; +} + +function sanitizeUfwDetail(value: string): string { + const clean = value.replace(/[\u0000-\u001f\u007f]+/g, " ").replace(/\s+/g, " ").trim(); + if (!clean) return "ufw rejected the rule."; + return clean.length > 240 ? `${clean.slice(0, 237)}...` : clean; +} + +/** + * Try to apply the `ufw allow from to port ` rule + * non-interactively. Only fires when the operator has opted in via + * `NEMOCLAW_AUTO_FIX_FIREWALL=1` or when the caller has already performed an + * equivalent explicit-consent gate and passes `optedIn: true`. Returns a + * structured result so the caller can decide whether to re-probe reachability. + * (#4265) + * + * Safety: + * - Opt-in only — operators must set `NEMOCLAW_AUTO_FIX_FIREWALL=1` unless a + * higher-level caller has an equivalent explicit consent surface. + * - `sudo -n` only — never prompts for a password. If passwordless sudo isn't + * available, falls back to the existing manual-instructions path. + * - Skips on hosts without UFW or with UFW inactive (the rule is moot there). + * - Validates the rule operands before invoking sudo/ufw: narrow IPv4 Docker + * bridge CIDR, gateway inside that CIDR, and a valid TCP port. + * - The rule itself is narrow: docker-bridge subnet → host gateway port only. + */ +export function tryAutoApplyUfwRule( + reach: SandboxBridgeReachabilityResult, + options: UfwAutoApplyOptions = {}, +): UfwAutoApplyResult { + const port = options.port ?? GATEWAY_PORT; + const run = options.runImpl ?? defaultRunArgv; + const optedIn = options.optedIn ?? isUfwAutoApplyOptedIn(); + + if (!optedIn) return { applied: false, reason: "not_opted_in" }; + if (!reach.subnet || !reach.gatewayIp) { + return { applied: false, reason: "no_subnet_or_gateway" }; + } + + const invalidOperand = validateUfwRuleOperands(reach.subnet, reach.gatewayIp, port); + if (invalidOperand) { + return { applied: false, reason: "invalid_rule_operand", detail: invalidOperand }; + } + + const sudoCheck = run(["sudo", "-n", "true"]); + if (sudoCheck.status !== 0) { + return { + applied: false, + reason: "sudo_unavailable", + detail: "Passwordless sudo not available; cannot auto-apply UFW rule.", + }; + } + + const ufwWhich = run(["sudo", "-n", "which", "ufw"]); + if (ufwWhich.status !== 0) { + return { applied: false, reason: "ufw_missing", detail: "ufw not installed." }; + } + + const status = run(["sudo", "-n", "ufw", "status"]); + if (status.status !== 0 || !/Status:\s*active/i.test(status.stdout)) { + return { applied: false, reason: "ufw_inactive", detail: "ufw is not active." }; + } + + const apply = run([ + "sudo", + "-n", + "ufw", + "allow", + "from", + reach.subnet, + "to", + reach.gatewayIp, + "port", + String(port), + "proto", + "tcp", + ]); + if (apply.status !== 0) { + return { + applied: false, + reason: "ufw_rule_rejected", + detail: sanitizeUfwDetail(apply.stderr || apply.stdout || "ufw rejected the rule."), + }; + } + + return { applied: true, reason: "applied", detail: apply.stdout || undefined }; +} + +export const __test = { + parseDockerBridgeCidr, + parseIpv4Address, + sanitizeUfwDetail, + validateUfwRuleOperands, +}; From cc0d1f961f0ad29060f8b56440d8d16da33cf12e Mon Sep 17 00:00:00 2001 From: Carlos Villela Date: Wed, 27 May 2026 09:22:07 -0700 Subject: [PATCH 3/3] Potential fix for pull request finding 'CodeQL / Useless comparison test' Co-authored-by: Copilot Autofix powered by AI <62310815+github-advanced-security[bot]@users.noreply.github.com> --- src/lib/onboard/ufw-auto-apply.ts | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/src/lib/onboard/ufw-auto-apply.ts b/src/lib/onboard/ufw-auto-apply.ts index e300d9ee5d..994f8b8bb2 100644 --- a/src/lib/onboard/ufw-auto-apply.ts +++ b/src/lib/onboard/ufw-auto-apply.ts @@ -86,7 +86,7 @@ function parseDockerBridgeCidr(value: string): { network: number; prefix: number if (!Number.isInteger(prefix) || prefix < 16 || prefix > 32) return null; const network = parseIpv4Address(address); if (network === null) return null; - const mask = prefix === 0 ? 0 : (0xffffffff << (32 - prefix)) >>> 0; + const mask = (0xffffffff << (32 - prefix)) >>> 0; if ((network & mask) >>> 0 !== network) return null; return { network, prefix, mask }; }