From 8f580c25bc97a4cba967e51afa1fca096c7a5fbe Mon Sep 17 00:00:00 2001 From: Tinson Lai Date: Thu, 4 Jun 2026 18:10:46 +0000 Subject: [PATCH 01/11] fix(onboard): harden GPU patch reconnect and rc cleanup against non-fatal failures Signed-off-by: Tinson Lai --- scripts/nemoclaw-start.sh | 15 +++ src/lib/adapters/docker/container.ts | 4 + src/lib/onboard/docker-gpu-patch.test.ts | 100 +++++++++++++++++- src/lib/onboard/docker-gpu-patch.ts | 70 ++++++++++-- .../docker-gpu-supervisor-reconnect.test.ts | 10 +- .../docker-gpu-supervisor-reconnect.ts | 10 +- test/nemoclaw-start.test.ts | 78 ++++++++++++++ 7 files changed, 266 insertions(+), 21 deletions(-) diff --git a/scripts/nemoclaw-start.sh b/scripts/nemoclaw-start.sh index 8adeed3def..78cb8b7d9a 100755 --- a/scripts/nemoclaw-start.sh +++ b/scripts/nemoclaw-start.sh @@ -2358,6 +2358,21 @@ try: if cleaned == lines: sys.exit(0) + # When the rc file is not owned by us (and we are not root) we cannot + # safely rewrite it: fchmod would raise EPERM without CAP_FOWNER, and + # the in-place reopen via /proc/self/fd would fail anyway. The legacy + # shim line we would have removed is functionally inert (it sources + # /tmp/nemoclaw-proxy-env.sh only if that file exists), so leaving it + # in place is safer than crashing the container under errexit. A later + # root-mode boot can finish the cleanup. + if uid != 0 and st.st_uid != uid: + print( + f"[SECURITY] skipping rc cleanup for {rc_path}: not owned by uid={uid} " + f"(file uid={st.st_uid}); legacy shim left in place", + file=sys.stderr, + ) + sys.exit(0) + try: rewrite_open_rc_file(fd, st, cleaned) except OSError as exc: diff --git a/src/lib/adapters/docker/container.ts b/src/lib/adapters/docker/container.ts index 2138e53df7..a4b521502f 100644 --- a/src/lib/adapters/docker/container.ts +++ b/src/lib/adapters/docker/container.ts @@ -13,6 +13,10 @@ export function dockerStop(containerName: string, opts: DockerRunOptions = {}): return dockerRun(["stop", containerName], opts); } +export function dockerStart(containerName: string, opts: DockerRunOptions = {}): DockerRunResult { + return dockerRun(["start", containerName], opts); +} + // NIM (and most Python services) write errors to stderr, so a stdout-only // capture would silently lose the auth-error tail. Use dockerRun so we can // read both streams from the SpawnResult and concatenate them. Bounded by a diff --git a/src/lib/onboard/docker-gpu-patch.test.ts b/src/lib/onboard/docker-gpu-patch.test.ts index 88223f119f..942e1810f3 100644 --- a/src/lib/onboard/docker-gpu-patch.test.ts +++ b/src/lib/onboard/docker-gpu-patch.test.ts @@ -579,11 +579,109 @@ describe("docker-gpu-patch", () => { String(call[0]).includes("nemoclaw-gpu-backup"), ); expect(backupRmCall).toBeGreaterThanOrEqual(0); - expect(dockerRm.mock.invocationCallOrder[backupRmCall]).toBeLessThan( + // Backup container is removed only AFTER supervisor reconnect confirms + // the GPU container is reachable. If reconnect fails the rollback path + // restores the backup under the original name (see the rollback test + // below), so the backup must outlive the supervisor probe. + expect(dockerRm.mock.invocationCallOrder[backupRmCall]).toBeGreaterThan( runOpenshell.mock.invocationCallOrder[0], ); }); + it("rolls back to the backup container when supervisor reconnect fails", () => { + const dockerCapture = vi.fn((args: readonly string[]) => { + if (args[0] === "ps") return "old-container-id\n"; + if (args[0] === "inspect") return JSON.stringify([inspectFixture()]); + if (args[0] === "info") return ""; + return ""; + }); + const dockerRun = vi.fn(() => ({ status: 0, stdout: "probe-id\n" })); + const dockerRunDetached = vi.fn(() => ({ status: 0, stdout: "new-container-id\n" })); + const dockerRename = vi.fn((_old: string, _next: string) => ({ status: 0 })); + const dockerStop = vi.fn(() => ({ status: 0 })); + const dockerStart = vi.fn(() => ({ status: 0 })); + const dockerRm = vi.fn((_name: string) => ({ status: 0 })); + const runOpenshell = vi.fn(() => ({ status: 1, stderr: "supervisor unreachable" })); + const runCaptureOpenshell = vi.fn(() => ""); + + expect(() => + recreateOpenShellDockerSandboxWithGpu( + { sandboxName: "alpha", timeoutSecs: 1 }, + { + dockerCapture, + dockerRun, + dockerRunDetached, + dockerRename, + dockerStop, + dockerStart, + dockerRm, + runOpenshell, + runCaptureOpenshell, + sleep: vi.fn(), + now: () => new Date("2026-05-12T00:00:00Z"), + errorPhaseDebouncePolls: 1, + }, + ), + ).toThrow(/pre-patch sandbox restored/); + + const restoreRename = dockerRename.mock.calls.find( + (call) => + String(call[0]).includes("nemoclaw-gpu-backup") && call[1] === "openshell-alpha", + ); + expect(restoreRename).toBeDefined(); + expect(dockerStart).toHaveBeenCalledWith( + "openshell-alpha", + expect.objectContaining({ ignoreError: true }), + ); + expect( + dockerRm.mock.calls.some((call) => String(call[0]).includes("nemoclaw-gpu-backup")), + ).toBe(false); + }); + + it("reports rollback failure when restoring the backup container fails", () => { + const dockerCapture = vi.fn((args: readonly string[]) => { + if (args[0] === "ps") return "old-container-id\n"; + if (args[0] === "inspect") return JSON.stringify([inspectFixture()]); + if (args[0] === "info") return ""; + return ""; + }); + const dockerRun = vi.fn(() => ({ status: 0, stdout: "probe-id\n" })); + const dockerRunDetached = vi.fn(() => ({ status: 0, stdout: "new-container-id\n" })); + const dockerRename = vi.fn((oldName: string) => { + if (String(oldName).includes("nemoclaw-gpu-backup")) { + return { status: 1, stderr: "no such container" }; + } + return { status: 0 }; + }); + const dockerStop = vi.fn(() => ({ status: 0 })); + const dockerStart = vi.fn(() => ({ status: 0 })); + const dockerRm = vi.fn(() => ({ status: 0 })); + const runOpenshell = vi.fn(() => ({ status: 1, stderr: "supervisor unreachable" })); + const runCaptureOpenshell = vi.fn(() => ""); + + expect(() => + recreateOpenShellDockerSandboxWithGpu( + { sandboxName: "alpha", timeoutSecs: 1 }, + { + dockerCapture, + dockerRun, + dockerRunDetached, + dockerRename, + dockerStop, + dockerStart, + dockerRm, + runOpenshell, + runCaptureOpenshell, + sleep: vi.fn(), + now: () => new Date("2026-05-12T00:00:00Z"), + errorPhaseDebouncePolls: 1, + }, + ), + ).toThrow(/rollback to backup container failed/); + + expect(dockerStart).not.toHaveBeenCalled(); + }); + it("can recreate during sandbox create before supervisor exec is allowed", () => { const dockerCapture = vi.fn((args: readonly string[]) => { if (args[0] === "ps") return "old-container-id\n"; diff --git a/src/lib/onboard/docker-gpu-patch.ts b/src/lib/onboard/docker-gpu-patch.ts index d46705ebc5..327ce675dc 100644 --- a/src/lib/onboard/docker-gpu-patch.ts +++ b/src/lib/onboard/docker-gpu-patch.ts @@ -12,6 +12,7 @@ import { dockerRm, dockerRun, dockerRunDetached, + dockerStart, dockerStop, } from "../adapters/docker"; import { @@ -70,6 +71,7 @@ export type DockerGpuPatchDeps = { dockerRunDetached?: DockerRunFn; dockerRename?: DockerRenameFn; dockerRm?: DockerContainerFn; + dockerStart?: DockerContainerFn; dockerStop?: DockerContainerFn; dockerLogs?: DockerLogsFn; runOpenshell?: (args: string[], opts?: Record) => DockerRunResult; @@ -112,6 +114,7 @@ export type DockerGpuPatchFailureContext = { backupContainerName?: string | null; selectedMode?: DockerGpuPatchMode | null; modeAttempts?: DockerGpuPatchModeAttempt[]; + rolledBack?: boolean; }; export type DockerGpuPatchResult = { @@ -245,6 +248,7 @@ function depsWithDefaults(deps: DockerGpuPatchDeps): Required< | "dockerRunDetached" | "dockerRename" | "dockerRm" + | "dockerStart" | "dockerStop" | "dockerLogs" | "sleep" @@ -260,6 +264,7 @@ function depsWithDefaults(deps: DockerGpuPatchDeps): Required< dockerRunDetached, dockerRename, dockerRm, + dockerStart, dockerStop, dockerLogs, sleep: (seconds: number) => { @@ -867,6 +872,37 @@ export function getDockerGpuPatchFailureContext( return null; } +// Tear down the newly-created GPU container and put the pre-patch backup +// container back in its place. Used when supervisor reconnect cannot confirm +// the GPU container is reachable — keeps the user on the CPU-only sandbox +// they had before the patch attempt rather than a deleted-backup / failed-new +// state that requires manual cleanup. +function rollbackToBackupContainer( + refs: { newContainerId: string; backupContainerName: string; originalName: string }, + d: { + dockerStop: DockerContainerFn; + dockerRm: DockerContainerFn; + dockerRename: DockerRenameFn; + dockerStart: DockerContainerFn; + }, +): boolean { + const containerOpts = { + ignoreError: true, + suppressOutput: true, + timeout: DOCKER_GPU_PATCH_TIMEOUT_MS, + }; + d.dockerStop(refs.newContainerId, containerOpts); + d.dockerRm(refs.newContainerId, containerOpts); + const restored = d.dockerRename( + refs.backupContainerName, + refs.originalName, + containerOpts, + ); + if (!isZeroStatus(restored)) return false; + d.dockerStart(refs.originalName, containerOpts); + return true; +} + export function recreateOpenShellDockerSandboxWithGpu( options: { sandboxName: string; @@ -961,23 +997,34 @@ export function recreateOpenShellDockerSandboxWithGpu( } context.newContainerId = newContainerId; + const execReady = + options.waitForSupervisor === false + ? true + : waitForOpenShellSupervisorReconnect( + options.sandboxName, + options.timeoutSecs ?? DOCKER_GPU_PATCH_WAIT_SECS, + deps, + ); + + if (!execReady) { + const rolledBack = rollbackToBackupContainer( + { newContainerId, backupContainerName, originalName }, + d, + ); + context.rolledBack = rolledBack; + throw new Error( + rolledBack + ? "OpenShell supervisor did not reconnect to the GPU-enabled container; pre-patch sandbox restored." + : "OpenShell supervisor did not reconnect to the GPU-enabled container; rollback to backup container failed.", + ); + } + d.dockerRm(backupContainerName, { ignoreError: true, suppressOutput: true, timeout: DOCKER_GPU_PATCH_TIMEOUT_MS, }); - if (options.waitForSupervisor !== false) { - const execReady = waitForOpenShellSupervisorReconnect( - options.sandboxName, - options.timeoutSecs ?? DOCKER_GPU_PATCH_WAIT_SECS, - deps, - ); - if (!execReady) { - throw new Error("OpenShell supervisor did not reconnect to the GPU-enabled container."); - } - } - return { applied: true, oldContainerId, @@ -1498,6 +1545,7 @@ export function collectDockerGpuPatchDiagnostics( `old_container_id=${context?.oldContainerId ?? "unknown"}`, `new_container_id=${context?.newContainerId ?? "unknown"}`, `backup_container_name=${context?.backupContainerName ?? "none"}`, + `rolled_back=${context?.rolledBack === true ? "yes" : context?.rolledBack === false ? "failed" : "no"}`, "cleanup_commands:", ...cleanupCommands.map((command) => ` ${command}`), ]; diff --git a/src/lib/onboard/docker-gpu-supervisor-reconnect.test.ts b/src/lib/onboard/docker-gpu-supervisor-reconnect.test.ts index 62976067f0..34a2564786 100644 --- a/src/lib/onboard/docker-gpu-supervisor-reconnect.test.ts +++ b/src/lib/onboard/docker-gpu-supervisor-reconnect.test.ts @@ -98,8 +98,8 @@ describe("docker-gpu-supervisor-reconnect Error-phase debounce", () => { expect(runOpenshell).toHaveBeenCalledTimes(6); }); - it("defaults the debounce to 5 polls and honors the env override", () => { - expect(getDockerGpuSupervisorReconnectErrorDebouncePolls({})).toBe(5); + it("defaults the debounce to 15 polls and honors the env override", () => { + expect(getDockerGpuSupervisorReconnectErrorDebouncePolls({})).toBe(15); expect( getDockerGpuSupervisorReconnectErrorDebouncePolls({ NEMOCLAW_DOCKER_GPU_SUPERVISOR_RECONNECT_ERROR_DEBOUNCE: "2", @@ -151,9 +151,9 @@ describe("docker-gpu-supervisor-reconnect Error-phase debounce", () => { }); expect(ok).toBe(false); - // Default K=5 from the env-backed helper: 5 polls + 4 sleeps before fast-fail. - expect(runOpenshell).toHaveBeenCalledTimes(5); - expect(sleep).toHaveBeenCalledTimes(4); + // Default K=15 from the env-backed helper: 15 polls + 14 sleeps before fast-fail. + expect(runOpenshell).toHaveBeenCalledTimes(15); + expect(sleep).toHaveBeenCalledTimes(14); } }); }); diff --git a/src/lib/onboard/docker-gpu-supervisor-reconnect.ts b/src/lib/onboard/docker-gpu-supervisor-reconnect.ts index c8906e9501..7817f71d80 100644 --- a/src/lib/onboard/docker-gpu-supervisor-reconnect.ts +++ b/src/lib/onboard/docker-gpu-supervisor-reconnect.ts @@ -29,10 +29,12 @@ import { envInt } from "./env"; const DOCKER_GPU_PATCH_TIMEOUT_MS = 30_000; const DOCKER_GPU_SUPERVISOR_RECONNECT_MIN_SECS = 900; // Default consecutive Error-phase polls required before fast-fail. With a -// 2-second poll interval this is ~10s of sustained Error, which absorbs the -// transient Error reported during container recreation while still bailing -// fast on a patched container that crashed on startup. -const DOCKER_GPU_SUPERVISOR_RECONNECT_ERROR_PHASE_DEFAULT_DEBOUNCE_POLLS = 5; +// 2-second poll interval this is ~30s of sustained Error, leaving headroom +// for slower hosts (Docker Desktop on WSL2, DGX Spark cached re-onboard) +// whose sandbox list cache divergence was observed at ~12s in the original +// repro. Hosts that genuinely crashed on startup hit the rollback path in +// applyDockerGpuPatch rather than waiting out the full window. +const DOCKER_GPU_SUPERVISOR_RECONNECT_ERROR_PHASE_DEFAULT_DEBOUNCE_POLLS = 15; export const DOCKER_GPU_SUPERVISOR_RECONNECT_TIMEOUT_ENV = "NEMOCLAW_DOCKER_GPU_SUPERVISOR_RECONNECT_TIMEOUT"; diff --git a/test/nemoclaw-start.test.ts b/test/nemoclaw-start.test.ts index a33b76a9d1..2ec412b766 100644 --- a/test/nemoclaw-start.test.ts +++ b/test/nemoclaw-start.test.ts @@ -4694,3 +4694,81 @@ describe("direct-root entrypoint composition under CAP_DAC_OVERRIDE drop", () => } }); }); + +describe("ensure_runtime_shell_env_shim rc cleanup ownership guard", () => { + function extractRcCleanupPython(src: string): string { + const heredocStart = src.indexOf("<<'PY'"); + expect(heredocStart).toBeGreaterThan(-1); + const bodyStart = src.indexOf("\n", heredocStart) + 1; + const bodyEnd = src.indexOf("\nPY\n", bodyStart); + expect(bodyEnd).toBeGreaterThan(bodyStart); + return src.slice(bodyStart, bodyEnd); + } + + it("skips rewrite and exits 0 when the rc file is not owned by the current uid", () => { + const src = fs.readFileSync(START_SCRIPT, "utf-8"); + const py = extractRcCleanupPython(src); + + const tmpDir = fs.mkdtempSync(path.join(os.tmpdir(), "nemoclaw-rc-cleanup-")); + try { + const rcPath = path.join(tmpDir, ".bashrc"); + const shim = "[ -f /tmp/nemoclaw-proxy-env.sh ] && . /tmp/nemoclaw-proxy-env.sh"; + const original = `# user content\nexport FOO=bar\n${shim}\n`; + fs.writeFileSync(rcPath, original); + + const pyPath = path.join(tmpDir, "cleanup.py"); + fs.writeFileSync(pyPath, py); + + // Lie about the running uid so the script sees uid != file owner. + // The on-disk file is owned by the test runner; passing an arbitrary + // non-zero uid that differs from the file's uid drives the new + // ownership-guard branch. + const fakeUid = (process.getuid?.() ?? 1000) + 7777; + const result = spawnSync( + "python3", + [pyPath, rcPath, shim, String(fakeUid)], + { encoding: "utf-8" }, + ); + + expect(result.status).toBe(0); + expect(result.stderr).toContain("skipping rc cleanup"); + expect(result.stderr).toContain("not owned by uid="); + expect(fs.readFileSync(rcPath, "utf-8")).toBe(original); + } finally { + fs.rmSync(tmpDir, { recursive: true, force: true }); + } + }); + + it("still rewrites the rc file when the current uid owns it", () => { + const src = fs.readFileSync(START_SCRIPT, "utf-8"); + const py = extractRcCleanupPython(src); + + const tmpDir = fs.mkdtempSync(path.join(os.tmpdir(), "nemoclaw-rc-cleanup-")); + try { + const rcPath = path.join(tmpDir, ".bashrc"); + const shim = "[ -f /tmp/nemoclaw-proxy-env.sh ] && . /tmp/nemoclaw-proxy-env.sh"; + const original = `# user content\nexport FOO=bar\n${shim}\n`; + fs.writeFileSync(rcPath, original); + + const pyPath = path.join(tmpDir, "cleanup.py"); + fs.writeFileSync(pyPath, py); + + const realUid = process.getuid?.() ?? 0; + const result = spawnSync( + "python3", + [pyPath, rcPath, shim, String(realUid)], + { encoding: "utf-8" }, + ); + + expect(result.status).toBe(0); + expect(result.stderr).not.toContain("could not safely clean runtime env shim"); + expect(result.stderr).not.toContain("skipping rc cleanup"); + const cleaned = fs.readFileSync(rcPath, "utf-8"); + expect(cleaned).not.toContain(shim); + expect(cleaned).toContain("# user content"); + expect(cleaned).toContain("export FOO=bar"); + } finally { + fs.rmSync(tmpDir, { recursive: true, force: true }); + } + }); +}); From bf9577bd8a47131c80bd9d6dab412ae16622a62a Mon Sep 17 00:00:00 2001 From: Tinson Lai Date: Thu, 4 Jun 2026 18:34:30 +0000 Subject: [PATCH 02/11] fix(onboard): propagate dockerStart status in rollback to backup container Signed-off-by: Tinson Lai --- src/lib/onboard/docker-gpu-patch.test.ts | 45 ++++++++++++++++++++++++ src/lib/onboard/docker-gpu-patch.ts | 4 +-- 2 files changed, 47 insertions(+), 2 deletions(-) diff --git a/src/lib/onboard/docker-gpu-patch.test.ts b/src/lib/onboard/docker-gpu-patch.test.ts index 942e1810f3..95f0ddeaa1 100644 --- a/src/lib/onboard/docker-gpu-patch.test.ts +++ b/src/lib/onboard/docker-gpu-patch.test.ts @@ -682,6 +682,51 @@ describe("docker-gpu-patch", () => { expect(dockerStart).not.toHaveBeenCalled(); }); + it("reports rollback failure when restarting the backup container fails", () => { + const dockerCapture = vi.fn((args: readonly string[]) => { + if (args[0] === "ps") return "old-container-id\n"; + if (args[0] === "inspect") return JSON.stringify([inspectFixture()]); + if (args[0] === "info") return ""; + return ""; + }); + const dockerRun = vi.fn(() => ({ status: 0, stdout: "probe-id\n" })); + const dockerRunDetached = vi.fn(() => ({ status: 0, stdout: "new-container-id\n" })); + const dockerRename = vi.fn((_old: string, _next: string) => ({ status: 0 })); + const dockerStop = vi.fn(() => ({ status: 0 })); + const dockerStart = vi.fn(() => ({ status: 1, stderr: "container start failed" })); + const dockerRm = vi.fn((_name: string) => ({ status: 0 })); + const runOpenshell = vi.fn(() => ({ status: 1, stderr: "supervisor unreachable" })); + const runCaptureOpenshell = vi.fn(() => ""); + + expect(() => + recreateOpenShellDockerSandboxWithGpu( + { sandboxName: "alpha", timeoutSecs: 1 }, + { + dockerCapture, + dockerRun, + dockerRunDetached, + dockerRename, + dockerStop, + dockerStart, + dockerRm, + runOpenshell, + runCaptureOpenshell, + sleep: vi.fn(), + now: () => new Date("2026-05-12T00:00:00Z"), + errorPhaseDebouncePolls: 1, + }, + ), + ).toThrow(/rollback to backup container failed/); + + expect(dockerStart).toHaveBeenCalledWith( + "openshell-alpha", + expect.objectContaining({ ignoreError: true }), + ); + expect( + dockerRm.mock.calls.some((call) => String(call[0]).includes("nemoclaw-gpu-backup")), + ).toBe(false); + }); + it("can recreate during sandbox create before supervisor exec is allowed", () => { const dockerCapture = vi.fn((args: readonly string[]) => { if (args[0] === "ps") return "old-container-id\n"; diff --git a/src/lib/onboard/docker-gpu-patch.ts b/src/lib/onboard/docker-gpu-patch.ts index 327ce675dc..9ded29fdcf 100644 --- a/src/lib/onboard/docker-gpu-patch.ts +++ b/src/lib/onboard/docker-gpu-patch.ts @@ -899,8 +899,8 @@ function rollbackToBackupContainer( containerOpts, ); if (!isZeroStatus(restored)) return false; - d.dockerStart(refs.originalName, containerOpts); - return true; + const started = d.dockerStart(refs.originalName, containerOpts); + return isZeroStatus(started); } export function recreateOpenShellDockerSandboxWithGpu( From 73a053a226c4e7c066a244c01d500c53aedda272 Mon Sep 17 00:00:00 2001 From: Tinson Lai Date: Fri, 5 Jun 2026 01:47:47 +0000 Subject: [PATCH 03/11] fix(onboard): defer GPU patch backup removal to caller-driven finalizer Signed-off-by: Tinson Lai --- src/lib/onboard/docker-gpu-patch.test.ts | 105 ++++++++++++++++++- src/lib/onboard/docker-gpu-patch.ts | 90 ++++++++++++++-- src/lib/onboard/docker-gpu-sandbox-create.ts | 11 +- 3 files changed, 196 insertions(+), 10 deletions(-) diff --git a/src/lib/onboard/docker-gpu-patch.test.ts b/src/lib/onboard/docker-gpu-patch.test.ts index 95f0ddeaa1..9d3c22813c 100644 --- a/src/lib/onboard/docker-gpu-patch.test.ts +++ b/src/lib/onboard/docker-gpu-patch.test.ts @@ -16,8 +16,10 @@ import { classifyDockerGpuPatchFailure, collectDockerGpuPatchDiagnostics, type DockerContainerInspect, + type DockerGpuPatchResult, detectSandboxFallbackDns, dockerReportsNvidiaCdiDevices, + finalizeDockerGpuPatchBackup, formatDockerInspectNetworkSummary, getDockerGpuPatchNetworkMode, getDockerGpuSupervisorReconnectTimeoutSecs, @@ -735,6 +737,7 @@ describe("docker-gpu-patch", () => { return ""; }); const dockerRunDetached = vi.fn(() => ({ status: 0, stdout: "new-container-id\n" })); + const dockerRm = vi.fn((_name: string) => ({ status: 0 })); const runOpenshell = vi.fn(() => ({ status: 1, stderr: "phase: Provisioning" })); const result = recreateOpenShellDockerSandboxWithGpu( @@ -750,7 +753,7 @@ describe("docker-gpu-patch", () => { dockerRunDetached, dockerRename: vi.fn(() => ({ status: 0 })), dockerStop: vi.fn(() => ({ status: 0 })), - dockerRm: vi.fn(() => ({ status: 0 })), + dockerRm, runOpenshell, sleep: vi.fn(), now: () => new Date("2026-05-12T00:00:00Z"), @@ -758,7 +761,16 @@ describe("docker-gpu-patch", () => { ); expect(result.newContainerId).toBe("new-container-id"); + expect(result.backupRemoved).toBe(false); + expect(result.originalName).toBe("openshell-alpha"); + expect(result.backupContainerName).toContain("nemoclaw-gpu-backup"); expect(runOpenshell).not.toHaveBeenCalled(); + // The create path takes the supervisor wait into its own hands later in + // the flow. The patch helper must NOT remove the backup yet — that would + // re-introduce the deleted-backup / failed-new state #4664 fixes. + expect( + dockerRm.mock.calls.some((call) => String(call[0]).includes("nemoclaw-gpu-backup")), + ).toBe(false); expect(dockerRunDetached).toHaveBeenCalledWith( expect.arrayContaining([ "--env", @@ -773,6 +785,97 @@ describe("docker-gpu-patch", () => { }); }); +describe("finalizeDockerGpuPatchBackup", () => { + function deferredCreateResult(): DockerGpuPatchResult { + return { + applied: true, + oldContainerId: "old-container-id", + newContainerId: "new-container-id", + originalName: "openshell-alpha", + backupContainerName: + "openshell-alpha-nemoclaw-gpu-backup-1780491860342", + mode: { + kind: "gpus", + label: "--gpus all", + device: "all", + args: ["--gpus", "all"], + }, + backupRemoved: false, + }; + } + + it("removes the backup container when supervisor reconnect succeeded", () => { + const dockerRm = vi.fn((_name: string) => ({ status: 0 })); + const outcome = finalizeDockerGpuPatchBackup( + { result: deferredCreateResult(), supervisorReady: true }, + { dockerRm }, + ); + expect(outcome).toEqual({ backupRemoved: true, rolledBack: false }); + expect(dockerRm).toHaveBeenCalledWith( + "openshell-alpha-nemoclaw-gpu-backup-1780491860342", + expect.objectContaining({ ignoreError: true }), + ); + }); + + it("rolls back to the backup container when supervisor reconnect failed", () => { + const dockerStop = vi.fn(() => ({ status: 0 })); + const dockerRm = vi.fn((_name: string) => ({ status: 0 })); + const dockerRename = vi.fn((_old: string, _next: string) => ({ status: 0 })); + const dockerStart = vi.fn(() => ({ status: 0 })); + const outcome = finalizeDockerGpuPatchBackup( + { result: deferredCreateResult(), supervisorReady: false }, + { dockerStop, dockerRm, dockerRename, dockerStart }, + ); + expect(outcome).toEqual({ backupRemoved: false, rolledBack: true }); + expect(dockerStop).toHaveBeenCalledWith( + "new-container-id", + expect.objectContaining({ ignoreError: true }), + ); + expect(dockerRename).toHaveBeenCalledWith( + "openshell-alpha-nemoclaw-gpu-backup-1780491860342", + "openshell-alpha", + expect.objectContaining({ ignoreError: true }), + ); + expect(dockerStart).toHaveBeenCalledWith( + "openshell-alpha", + expect.objectContaining({ ignoreError: true }), + ); + // The backup itself must NOT be removed when we're restoring it. + expect( + dockerRm.mock.calls.some((call) => + String(call[0]).includes("nemoclaw-gpu-backup"), + ), + ).toBe(false); + }); + + it("reports rolledBack=false when restoring the backup fails", () => { + const dockerStop = vi.fn(() => ({ status: 0 })); + const dockerRm = vi.fn((_name: string) => ({ status: 0 })); + const dockerRename = vi.fn((_old: string, _next: string) => ({ + status: 1, + stderr: "no such container", + })); + const dockerStart = vi.fn(() => ({ status: 0 })); + const outcome = finalizeDockerGpuPatchBackup( + { result: deferredCreateResult(), supervisorReady: false }, + { dockerStop, dockerRm, dockerRename, dockerStart }, + ); + expect(outcome).toEqual({ backupRemoved: false, rolledBack: false }); + expect(dockerStart).not.toHaveBeenCalled(); + }); + + it("is a no-op when the backup was already removed by the patch helper", () => { + const dockerRm = vi.fn((_name: string) => ({ status: 0 })); + const result = { ...deferredCreateResult(), backupRemoved: true }; + const outcome = finalizeDockerGpuPatchBackup( + { result, supervisorReady: true }, + { dockerRm }, + ); + expect(outcome).toEqual({ backupRemoved: true, rolledBack: false }); + expect(dockerRm).not.toHaveBeenCalled(); + }); +}); + describe("docker-gpu-patch sandbox DNS fallback (#3579)", () => { it("returns the systemd-resolved upstream when /etc/resolv.conf is loopback-only", () => { const readFile = (p: string): string | null => { diff --git a/src/lib/onboard/docker-gpu-patch.ts b/src/lib/onboard/docker-gpu-patch.ts index 9ded29fdcf..675570bc66 100644 --- a/src/lib/onboard/docker-gpu-patch.ts +++ b/src/lib/onboard/docker-gpu-patch.ts @@ -121,8 +121,15 @@ export type DockerGpuPatchResult = { applied: true; oldContainerId: string; newContainerId: string; + originalName: string; backupContainerName: string; mode: DockerGpuPatchMode; + // True when the patch path also confirmed supervisor reconnect AND removed + // the backup container. False when the caller deferred the reconnect wait + // (via `waitForSupervisor: false`); the backup is still in place and the + // caller is responsible for calling `finalizeDockerGpuPatchBackup` after + // its own supervisor wait completes. + backupRemoved: boolean; }; export type DockerGpuCloneRunOptions = { @@ -903,6 +910,55 @@ function rollbackToBackupContainer( return isZeroStatus(started); } +// Post-reconnect finaliser for callers that ran `recreateOpenShellDockerSandboxWithGpu` +// with `waitForSupervisor: false`. The patch path defers backup cleanup so the +// create flow can keep the pre-patch container available until its own +// supervisor probe completes. The caller must invoke this function after that +// probe with the observed outcome. +// +// `supervisorReady: true` → remove the backup container. +// `supervisorReady: false` → roll back to the backup container; the boolean +// return reflects whether the rollback completed (rename + start both +// succeeded). On a false return the caller is expected to surface the +// rollback-failed error path to the user. +export type DockerGpuPatchFinalizeOptions = { + result: DockerGpuPatchResult; + supervisorReady: boolean; +}; + +export type DockerGpuPatchFinalizeOutcome = { + backupRemoved: boolean; + rolledBack: boolean; +}; + +export function finalizeDockerGpuPatchBackup( + options: DockerGpuPatchFinalizeOptions, + deps: DockerGpuPatchDeps = {}, +): DockerGpuPatchFinalizeOutcome { + const d = depsWithDefaults(deps); + const containerOpts = { + ignoreError: true, + suppressOutput: true, + timeout: DOCKER_GPU_PATCH_TIMEOUT_MS, + }; + if (options.result.backupRemoved) { + return { backupRemoved: true, rolledBack: false }; + } + if (options.supervisorReady) { + d.dockerRm(options.result.backupContainerName, containerOpts); + return { backupRemoved: true, rolledBack: false }; + } + const rolledBack = rollbackToBackupContainer( + { + newContainerId: options.result.newContainerId, + backupContainerName: options.result.backupContainerName, + originalName: options.result.originalName, + }, + d, + ); + return { backupRemoved: false, rolledBack }; +} + export function recreateOpenShellDockerSandboxWithGpu( options: { sandboxName: string; @@ -997,14 +1053,30 @@ export function recreateOpenShellDockerSandboxWithGpu( } context.newContainerId = newContainerId; - const execReady = - options.waitForSupervisor === false - ? true - : waitForOpenShellSupervisorReconnect( - options.sandboxName, - options.timeoutSecs ?? DOCKER_GPU_PATCH_WAIT_SECS, - deps, - ); + // When the caller defers the supervisor reconnect wait (create-time + // patching, where readiness probing happens later in the create flow), + // leave the backup container in place so the caller can choose to either + // remove it after its own reconnect confirms (`finalizeDockerGpuPatchBackup` + // with `supervisorReady: true`), or roll back to it on failure. Removing + // the backup here would re-introduce the deleted-backup / failed-new + // state #4664 set out to eliminate. + if (options.waitForSupervisor === false) { + return { + applied: true, + oldContainerId, + newContainerId, + originalName, + backupContainerName, + mode: selection.mode, + backupRemoved: false, + }; + } + + const execReady = waitForOpenShellSupervisorReconnect( + options.sandboxName, + options.timeoutSecs ?? DOCKER_GPU_PATCH_WAIT_SECS, + deps, + ); if (!execReady) { const rolledBack = rollbackToBackupContainer( @@ -1029,8 +1101,10 @@ export function recreateOpenShellDockerSandboxWithGpu( applied: true, oldContainerId, newContainerId, + originalName, backupContainerName, mode: selection.mode, + backupRemoved: true, }; } catch (error) { const err = error instanceof Error ? error : new Error(String(error)); diff --git a/src/lib/onboard/docker-gpu-sandbox-create.ts b/src/lib/onboard/docker-gpu-sandbox-create.ts index 2c22f46608..f37463540b 100644 --- a/src/lib/onboard/docker-gpu-sandbox-create.ts +++ b/src/lib/onboard/docker-gpu-sandbox-create.ts @@ -12,6 +12,7 @@ import type { } from "./docker-gpu-patch"; import { applyDockerGpuPatchOrExit, + finalizeDockerGpuPatchBackup, findOpenShellDockerSandboxContainerIds, getDockerGpuSupervisorReconnectTimeoutSecs, printDockerGpuPatchFailureAndExit, @@ -147,10 +148,17 @@ export function createDockerGpuSandboxCreatePatch( sleep: options.deps.sleep, }, ); + const finalizeOutcome = result + ? finalizeDockerGpuPatchBackup({ result, supervisorReady }, options.deps) + : { backupRemoved: false, rolledBack: false }; if (supervisorReady) return; printDockerGpuPatchFailureAndExit( options.sandboxName, - new Error("OpenShell supervisor did not reconnect to the GPU-enabled container."), + new Error( + finalizeOutcome.rolledBack + ? "OpenShell supervisor did not reconnect to the GPU-enabled container; pre-patch sandbox restored." + : "OpenShell supervisor did not reconnect to the GPU-enabled container.", + ), { runCaptureOpenshell: options.deps.runCaptureOpenshell, dockerCapture: options.deps.dockerCapture, @@ -160,6 +168,7 @@ export function createDockerGpuSandboxCreatePatch( newContainerId: result?.newContainerId, backupContainerName: result?.backupContainerName, selectedMode: result?.mode ?? null, + rolledBack: finalizeOutcome.rolledBack, }, }, ); From 97c77f53e07779c44d706469d055a0b908708f59 Mon Sep 17 00:00:00 2001 From: Tinson Lai Date: Fri, 5 Jun 2026 02:11:54 +0000 Subject: [PATCH 04/11] docs(troubleshooting): refresh debounce default to 15 polls / 30s Signed-off-by: Tinson Lai --- .../references/troubleshooting.md | 4 ++-- docs/reference/troubleshooting.mdx | 4 ++-- .../references/troubleshooting.md | 4 ++-- src/lib/onboard/docker-gpu-patch.ts | 13 ++++++------- src/lib/onboard/docker-gpu-sandbox-create.ts | 18 +++++++++++------- 5 files changed, 23 insertions(+), 20 deletions(-) diff --git a/.agents/skills/nemoclaw-user-reference/references/troubleshooting.md b/.agents/skills/nemoclaw-user-reference/references/troubleshooting.md index 6b0a338d5b..f81f2d26fb 100644 --- a/.agents/skills/nemoclaw-user-reference/references/troubleshooting.md +++ b/.agents/skills/nemoclaw-user-reference/references/troubleshooting.md @@ -1249,9 +1249,9 @@ If you do not need GPU access inside the sandbox, rerun with `--no-sandbox-gpu`. Set `NEMOCLAW_DOCKER_GPU_PATCH=0` only when you need to bypass this compatibility path during troubleshooting. If onboarding reports `OpenShell supervisor did not reconnect to the GPU-enabled container.` even though the diagnostic bundle shows the patched container is running and healthy, the supervisor-reconnect wait is treating a transient Error phase (reported while the OpenShell host re-registers the new container) as fatal. -The reconnect wait debounces consecutive Error-phase polls before fast-failing, defaulting to five consecutive polls of about 10 seconds in total. +The reconnect wait debounces consecutive Error-phase polls before fast-failing, defaulting to fifteen consecutive polls of about 30 seconds in total. Increase the debounce window with `NEMOCLAW_DOCKER_GPU_SUPERVISOR_RECONNECT_ERROR_DEBOUNCE` if your host needs more time to re-register the patched container, for example slow WSL2 + Docker Desktop setups. -Set it to a higher integer such as `15` (about 30 seconds) and rerun onboarding; the value is clamped to a minimum of `1`. +Set it to a higher integer such as `30` (about 60 seconds) and rerun onboarding; the value is clamped to a minimum of `1`. ### `pip install` fails with a system-packages error diff --git a/docs/reference/troubleshooting.mdx b/docs/reference/troubleshooting.mdx index ab2e521478..1da83499de 100644 --- a/docs/reference/troubleshooting.mdx +++ b/docs/reference/troubleshooting.mdx @@ -1261,9 +1261,9 @@ If you do not need GPU access inside the sandbox, rerun with `--no-sandbox-gpu`. Set `NEMOCLAW_DOCKER_GPU_PATCH=0` only when you need to bypass this compatibility path during troubleshooting. If onboarding reports `OpenShell supervisor did not reconnect to the GPU-enabled container.` even though the diagnostic bundle shows the patched container is running and healthy, the supervisor-reconnect wait is treating a transient Error phase (reported while the OpenShell host re-registers the new container) as fatal. -The reconnect wait debounces consecutive Error-phase polls before fast-failing, defaulting to five consecutive polls of about 10 seconds in total. +The reconnect wait debounces consecutive Error-phase polls before fast-failing, defaulting to fifteen consecutive polls of about 30 seconds in total. Increase the debounce window with `NEMOCLAW_DOCKER_GPU_SUPERVISOR_RECONNECT_ERROR_DEBOUNCE` if your host needs more time to re-register the patched container, for example slow WSL2 + Docker Desktop setups. -Set it to a higher integer such as `15` (about 30 seconds) and rerun onboarding; the value is clamped to a minimum of `1`. +Set it to a higher integer such as `30` (about 60 seconds) and rerun onboarding; the value is clamped to a minimum of `1`. ### `pip install` fails with a system-packages error diff --git a/skills/nemoclaw-user-reference/references/troubleshooting.md b/skills/nemoclaw-user-reference/references/troubleshooting.md index 7ee345ab8f..a25884baff 100644 --- a/skills/nemoclaw-user-reference/references/troubleshooting.md +++ b/skills/nemoclaw-user-reference/references/troubleshooting.md @@ -1245,9 +1245,9 @@ If you do not need GPU access inside the sandbox, rerun with `--no-sandbox-gpu`. Set `NEMOCLAW_DOCKER_GPU_PATCH=0` only when you need to bypass this compatibility path during troubleshooting. If onboarding reports `OpenShell supervisor did not reconnect to the GPU-enabled container.` even though the diagnostic bundle shows the patched container is running and healthy, the supervisor-reconnect wait is treating a transient Error phase (reported while the OpenShell host re-registers the new container) as fatal. -The reconnect wait debounces consecutive Error-phase polls before fast-failing, defaulting to five consecutive polls of about 10 seconds in total. +The reconnect wait debounces consecutive Error-phase polls before fast-failing, defaulting to fifteen consecutive polls of about 30 seconds in total. Increase the debounce window with `NEMOCLAW_DOCKER_GPU_SUPERVISOR_RECONNECT_ERROR_DEBOUNCE` if your host needs more time to re-register the patched container, for example slow WSL2 + Docker Desktop setups. -Set it to a higher integer such as `15` (about 30 seconds) and rerun onboarding; the value is clamped to a minimum of `1`. +Set it to a higher integer such as `30` (about 60 seconds) and rerun onboarding; the value is clamped to a minimum of `1`. ### `pip install` fails with a system-packages error diff --git a/src/lib/onboard/docker-gpu-patch.ts b/src/lib/onboard/docker-gpu-patch.ts index 675570bc66..659e1ce8e2 100644 --- a/src/lib/onboard/docker-gpu-patch.ts +++ b/src/lib/onboard/docker-gpu-patch.ts @@ -1053,13 +1053,12 @@ export function recreateOpenShellDockerSandboxWithGpu( } context.newContainerId = newContainerId; - // When the caller defers the supervisor reconnect wait (create-time - // patching, where readiness probing happens later in the create flow), - // leave the backup container in place so the caller can choose to either - // remove it after its own reconnect confirms (`finalizeDockerGpuPatchBackup` - // with `supervisorReady: true`), or roll back to it on failure. Removing - // the backup here would re-introduce the deleted-backup / failed-new - // state #4664 set out to eliminate. + // When the caller defers the supervisor reconnect wait, leave the backup + // container in place so the caller can either remove it via + // `finalizeDockerGpuPatchBackup` with `supervisorReady: true` once its own + // probe confirms, or roll back to it on failure. Removing the backup + // here would strand the user with a deleted-backup / failed-new sandbox + // if the deferred reconnect then fails. if (options.waitForSupervisor === false) { return { applied: true, diff --git a/src/lib/onboard/docker-gpu-sandbox-create.ts b/src/lib/onboard/docker-gpu-sandbox-create.ts index f37463540b..d5d04b9f24 100644 --- a/src/lib/onboard/docker-gpu-sandbox-create.ts +++ b/src/lib/onboard/docker-gpu-sandbox-create.ts @@ -150,15 +150,19 @@ export function createDockerGpuSandboxCreatePatch( ); const finalizeOutcome = result ? finalizeDockerGpuPatchBackup({ result, supervisorReady }, options.deps) - : { backupRemoved: false, rolledBack: false }; + : null; if (supervisorReady) return; + const failureMessage = (() => { + if (!finalizeOutcome) { + return "OpenShell supervisor did not reconnect to the GPU-enabled container."; + } + return finalizeOutcome.rolledBack + ? "OpenShell supervisor did not reconnect to the GPU-enabled container; pre-patch sandbox restored." + : "OpenShell supervisor did not reconnect to the GPU-enabled container; rollback to backup container failed."; + })(); printDockerGpuPatchFailureAndExit( options.sandboxName, - new Error( - finalizeOutcome.rolledBack - ? "OpenShell supervisor did not reconnect to the GPU-enabled container; pre-patch sandbox restored." - : "OpenShell supervisor did not reconnect to the GPU-enabled container.", - ), + new Error(failureMessage), { runCaptureOpenshell: options.deps.runCaptureOpenshell, dockerCapture: options.deps.dockerCapture, @@ -168,7 +172,7 @@ export function createDockerGpuSandboxCreatePatch( newContainerId: result?.newContainerId, backupContainerName: result?.backupContainerName, selectedMode: result?.mode ?? null, - rolledBack: finalizeOutcome.rolledBack, + rolledBack: finalizeOutcome?.rolledBack ?? false, }, }, ); From c69777a4eaa28e0b306576a7f63805398cc2f987 Mon Sep 17 00:00:00 2001 From: Tinson Lai Date: Fri, 5 Jun 2026 08:04:08 +0000 Subject: [PATCH 05/11] refactor(onboard): split GPU patch rollback and finalisation into a focused module Signed-off-by: Tinson Lai --- .../onboard/docker-gpu-patch-finalize.test.ts | 96 +++++++++++++++ src/lib/onboard/docker-gpu-patch-finalize.ts | 113 ++++++++++++++++++ src/lib/onboard/docker-gpu-patch.test.ts | 93 -------------- src/lib/onboard/docker-gpu-patch.ts | 88 +------------- src/lib/onboard/docker-gpu-sandbox-create.ts | 2 +- 5 files changed, 213 insertions(+), 179 deletions(-) create mode 100644 src/lib/onboard/docker-gpu-patch-finalize.test.ts create mode 100644 src/lib/onboard/docker-gpu-patch-finalize.ts diff --git a/src/lib/onboard/docker-gpu-patch-finalize.test.ts b/src/lib/onboard/docker-gpu-patch-finalize.test.ts new file mode 100644 index 0000000000..e3e088bd61 --- /dev/null +++ b/src/lib/onboard/docker-gpu-patch-finalize.test.ts @@ -0,0 +1,96 @@ +// SPDX-FileCopyrightText: Copyright (c) 2026 NVIDIA CORPORATION & AFFILIATES. All rights reserved. +// SPDX-License-Identifier: Apache-2.0 + +import { describe, expect, it, vi } from "vitest"; + +import type { DockerGpuPatchResult } from "../../../dist/lib/onboard/docker-gpu-patch"; +import { finalizeDockerGpuPatchBackup } from "../../../dist/lib/onboard/docker-gpu-patch-finalize"; + +function deferredCreateResult(): DockerGpuPatchResult { + return { + applied: true, + oldContainerId: "old-container-id", + newContainerId: "new-container-id", + originalName: "openshell-alpha", + backupContainerName: "openshell-alpha-nemoclaw-gpu-backup-1780491860342", + mode: { + kind: "gpus", + label: "--gpus all", + device: "all", + args: ["--gpus", "all"], + }, + backupRemoved: false, + }; +} + +describe("finalizeDockerGpuPatchBackup", () => { + it("removes the backup container when supervisor reconnect succeeded", () => { + const dockerRm = vi.fn((_name: string) => ({ status: 0 })); + const outcome = finalizeDockerGpuPatchBackup( + { result: deferredCreateResult(), supervisorReady: true }, + { dockerRm }, + ); + expect(outcome).toEqual({ backupRemoved: true, rolledBack: false }); + expect(dockerRm).toHaveBeenCalledWith( + "openshell-alpha-nemoclaw-gpu-backup-1780491860342", + expect.objectContaining({ ignoreError: true }), + ); + }); + + it("rolls back to the backup container when supervisor reconnect failed", () => { + const dockerStop = vi.fn(() => ({ status: 0 })); + const dockerRm = vi.fn((_name: string) => ({ status: 0 })); + const dockerRename = vi.fn((_old: string, _next: string) => ({ status: 0 })); + const dockerStart = vi.fn(() => ({ status: 0 })); + const outcome = finalizeDockerGpuPatchBackup( + { result: deferredCreateResult(), supervisorReady: false }, + { dockerStop, dockerRm, dockerRename, dockerStart }, + ); + expect(outcome).toEqual({ backupRemoved: false, rolledBack: true }); + expect(dockerStop).toHaveBeenCalledWith( + "new-container-id", + expect.objectContaining({ ignoreError: true }), + ); + expect(dockerRename).toHaveBeenCalledWith( + "openshell-alpha-nemoclaw-gpu-backup-1780491860342", + "openshell-alpha", + expect.objectContaining({ ignoreError: true }), + ); + expect(dockerStart).toHaveBeenCalledWith( + "openshell-alpha", + expect.objectContaining({ ignoreError: true }), + ); + expect( + dockerRm.mock.calls.some((call) => + String(call[0]).includes("nemoclaw-gpu-backup"), + ), + ).toBe(false); + }); + + it("reports rolledBack=false when restoring the backup fails", () => { + const dockerStop = vi.fn(() => ({ status: 0 })); + const dockerRm = vi.fn((_name: string) => ({ status: 0 })); + const dockerRename = vi.fn((_old: string, _next: string) => ({ + status: 1, + stderr: "no such container", + })); + const dockerStart = vi.fn(() => ({ status: 0 })); + const outcome = finalizeDockerGpuPatchBackup( + { result: deferredCreateResult(), supervisorReady: false }, + { dockerStop, dockerRm, dockerRename, dockerStart }, + ); + expect(outcome).toEqual({ backupRemoved: false, rolledBack: false }); + expect(dockerStart).not.toHaveBeenCalled(); + }); + + it("is a no-op when the backup was already removed by the patch helper", () => { + const dockerRm = vi.fn((_name: string) => ({ status: 0 })); + const result = { ...deferredCreateResult(), backupRemoved: true }; + const outcome = finalizeDockerGpuPatchBackup( + { result, supervisorReady: true }, + { dockerRm }, + ); + expect(outcome).toEqual({ backupRemoved: true, rolledBack: false }); + expect(dockerRm).not.toHaveBeenCalled(); + }); +}); diff --git a/src/lib/onboard/docker-gpu-patch-finalize.ts b/src/lib/onboard/docker-gpu-patch-finalize.ts new file mode 100644 index 0000000000..1a09823697 --- /dev/null +++ b/src/lib/onboard/docker-gpu-patch-finalize.ts @@ -0,0 +1,113 @@ +// SPDX-FileCopyrightText: Copyright (c) 2026 NVIDIA CORPORATION & AFFILIATES. All rights reserved. +// SPDX-License-Identifier: Apache-2.0 + +import { + dockerRename as defaultDockerRename, + dockerRm as defaultDockerRm, + dockerStart as defaultDockerStart, + dockerStop as defaultDockerStop, +} from "../adapters/docker"; +import type { DockerGpuPatchDeps, DockerGpuPatchResult } from "./docker-gpu-patch"; + +const DOCKER_GPU_PATCH_TIMEOUT_MS = 30_000; + +type DockerRunResult = { + status?: number | null; + stdout?: string | Buffer | null; + stderr?: string | Buffer | null; +}; + +type DockerRunOptions = Record; + +type DockerContainerFn = (containerName: string, opts?: DockerRunOptions) => DockerRunResult; +type DockerRenameFn = ( + oldContainerName: string, + newContainerName: string, + opts?: DockerRunOptions, +) => DockerRunResult; + +type ResolvedRollbackDeps = { + dockerStop: DockerContainerFn; + dockerRm: DockerContainerFn; + dockerRename: DockerRenameFn; + dockerStart: DockerContainerFn; +}; + +function isZeroStatus(result: DockerRunResult | null | undefined): boolean { + return Number(result?.status ?? 0) === 0; +} + +function resolveRollbackDeps(deps: DockerGpuPatchDeps): ResolvedRollbackDeps { + return { + dockerStop: deps.dockerStop ?? defaultDockerStop, + dockerRm: deps.dockerRm ?? defaultDockerRm, + dockerRename: deps.dockerRename ?? defaultDockerRename, + dockerStart: deps.dockerStart ?? defaultDockerStart, + }; +} + +export function rollbackToBackupContainer( + refs: { newContainerId: string; backupContainerName: string; originalName: string }, + deps: ResolvedRollbackDeps, +): boolean { + const containerOpts = { + ignoreError: true, + suppressOutput: true, + timeout: DOCKER_GPU_PATCH_TIMEOUT_MS, + }; + deps.dockerStop(refs.newContainerId, containerOpts); + deps.dockerRm(refs.newContainerId, containerOpts); + const restored = deps.dockerRename( + refs.backupContainerName, + refs.originalName, + containerOpts, + ); + if (!isZeroStatus(restored)) return false; + const started = deps.dockerStart(refs.originalName, containerOpts); + return isZeroStatus(started); +} + +export type DockerGpuPatchFinalizeOptions = { + result: DockerGpuPatchResult; + supervisorReady: boolean; +}; + +export type DockerGpuPatchFinalizeOutcome = { + backupRemoved: boolean; + rolledBack: boolean; +}; + +export function finalizeDockerGpuPatchBackup( + options: DockerGpuPatchFinalizeOptions, + deps: DockerGpuPatchDeps = {}, +): DockerGpuPatchFinalizeOutcome { + const resolved = resolveRollbackDeps(deps); + const containerOpts = { + ignoreError: true, + suppressOutput: true, + timeout: DOCKER_GPU_PATCH_TIMEOUT_MS, + }; + if (options.result.backupRemoved) { + return { backupRemoved: true, rolledBack: false }; + } + if (options.supervisorReady) { + resolved.dockerRm(options.result.backupContainerName, containerOpts); + return { backupRemoved: true, rolledBack: false }; + } + const rolledBack = rollbackToBackupContainer( + { + newContainerId: options.result.newContainerId, + backupContainerName: options.result.backupContainerName, + originalName: options.result.originalName, + }, + resolved, + ); + return { backupRemoved: false, rolledBack }; +} + +export function rollbackPatchToBackup( + refs: { newContainerId: string; backupContainerName: string; originalName: string }, + deps: DockerGpuPatchDeps, +): boolean { + return rollbackToBackupContainer(refs, resolveRollbackDeps(deps)); +} diff --git a/src/lib/onboard/docker-gpu-patch.test.ts b/src/lib/onboard/docker-gpu-patch.test.ts index 9d3c22813c..9bf9e1c9c9 100644 --- a/src/lib/onboard/docker-gpu-patch.test.ts +++ b/src/lib/onboard/docker-gpu-patch.test.ts @@ -16,10 +16,8 @@ import { classifyDockerGpuPatchFailure, collectDockerGpuPatchDiagnostics, type DockerContainerInspect, - type DockerGpuPatchResult, detectSandboxFallbackDns, dockerReportsNvidiaCdiDevices, - finalizeDockerGpuPatchBackup, formatDockerInspectNetworkSummary, getDockerGpuPatchNetworkMode, getDockerGpuSupervisorReconnectTimeoutSecs, @@ -785,97 +783,6 @@ describe("docker-gpu-patch", () => { }); }); -describe("finalizeDockerGpuPatchBackup", () => { - function deferredCreateResult(): DockerGpuPatchResult { - return { - applied: true, - oldContainerId: "old-container-id", - newContainerId: "new-container-id", - originalName: "openshell-alpha", - backupContainerName: - "openshell-alpha-nemoclaw-gpu-backup-1780491860342", - mode: { - kind: "gpus", - label: "--gpus all", - device: "all", - args: ["--gpus", "all"], - }, - backupRemoved: false, - }; - } - - it("removes the backup container when supervisor reconnect succeeded", () => { - const dockerRm = vi.fn((_name: string) => ({ status: 0 })); - const outcome = finalizeDockerGpuPatchBackup( - { result: deferredCreateResult(), supervisorReady: true }, - { dockerRm }, - ); - expect(outcome).toEqual({ backupRemoved: true, rolledBack: false }); - expect(dockerRm).toHaveBeenCalledWith( - "openshell-alpha-nemoclaw-gpu-backup-1780491860342", - expect.objectContaining({ ignoreError: true }), - ); - }); - - it("rolls back to the backup container when supervisor reconnect failed", () => { - const dockerStop = vi.fn(() => ({ status: 0 })); - const dockerRm = vi.fn((_name: string) => ({ status: 0 })); - const dockerRename = vi.fn((_old: string, _next: string) => ({ status: 0 })); - const dockerStart = vi.fn(() => ({ status: 0 })); - const outcome = finalizeDockerGpuPatchBackup( - { result: deferredCreateResult(), supervisorReady: false }, - { dockerStop, dockerRm, dockerRename, dockerStart }, - ); - expect(outcome).toEqual({ backupRemoved: false, rolledBack: true }); - expect(dockerStop).toHaveBeenCalledWith( - "new-container-id", - expect.objectContaining({ ignoreError: true }), - ); - expect(dockerRename).toHaveBeenCalledWith( - "openshell-alpha-nemoclaw-gpu-backup-1780491860342", - "openshell-alpha", - expect.objectContaining({ ignoreError: true }), - ); - expect(dockerStart).toHaveBeenCalledWith( - "openshell-alpha", - expect.objectContaining({ ignoreError: true }), - ); - // The backup itself must NOT be removed when we're restoring it. - expect( - dockerRm.mock.calls.some((call) => - String(call[0]).includes("nemoclaw-gpu-backup"), - ), - ).toBe(false); - }); - - it("reports rolledBack=false when restoring the backup fails", () => { - const dockerStop = vi.fn(() => ({ status: 0 })); - const dockerRm = vi.fn((_name: string) => ({ status: 0 })); - const dockerRename = vi.fn((_old: string, _next: string) => ({ - status: 1, - stderr: "no such container", - })); - const dockerStart = vi.fn(() => ({ status: 0 })); - const outcome = finalizeDockerGpuPatchBackup( - { result: deferredCreateResult(), supervisorReady: false }, - { dockerStop, dockerRm, dockerRename, dockerStart }, - ); - expect(outcome).toEqual({ backupRemoved: false, rolledBack: false }); - expect(dockerStart).not.toHaveBeenCalled(); - }); - - it("is a no-op when the backup was already removed by the patch helper", () => { - const dockerRm = vi.fn((_name: string) => ({ status: 0 })); - const result = { ...deferredCreateResult(), backupRemoved: true }; - const outcome = finalizeDockerGpuPatchBackup( - { result, supervisorReady: true }, - { dockerRm }, - ); - expect(outcome).toEqual({ backupRemoved: true, rolledBack: false }); - expect(dockerRm).not.toHaveBeenCalled(); - }); -}); - describe("docker-gpu-patch sandbox DNS fallback (#3579)", () => { it("returns the systemd-resolved upstream when /etc/resolv.conf is loopback-only", () => { const readFile = (p: string): string | null => { diff --git a/src/lib/onboard/docker-gpu-patch.ts b/src/lib/onboard/docker-gpu-patch.ts index 659e1ce8e2..916c7ade01 100644 --- a/src/lib/onboard/docker-gpu-patch.ts +++ b/src/lib/onboard/docker-gpu-patch.ts @@ -12,9 +12,9 @@ import { dockerRm, dockerRun, dockerRunDetached, - dockerStart, dockerStop, } from "../adapters/docker"; +import { rollbackPatchToBackup } from "./docker-gpu-patch-finalize"; import { type DockerGpuSupervisorReconnectDeps, DOCKER_GPU_SUPERVISOR_RECONNECT_ERROR_DEBOUNCE_ENV, @@ -255,7 +255,6 @@ function depsWithDefaults(deps: DockerGpuPatchDeps): Required< | "dockerRunDetached" | "dockerRename" | "dockerRm" - | "dockerStart" | "dockerStop" | "dockerLogs" | "sleep" @@ -271,7 +270,6 @@ function depsWithDefaults(deps: DockerGpuPatchDeps): Required< dockerRunDetached, dockerRename, dockerRm, - dockerStart, dockerStop, dockerLogs, sleep: (seconds: number) => { @@ -879,86 +877,6 @@ export function getDockerGpuPatchFailureContext( return null; } -// Tear down the newly-created GPU container and put the pre-patch backup -// container back in its place. Used when supervisor reconnect cannot confirm -// the GPU container is reachable — keeps the user on the CPU-only sandbox -// they had before the patch attempt rather than a deleted-backup / failed-new -// state that requires manual cleanup. -function rollbackToBackupContainer( - refs: { newContainerId: string; backupContainerName: string; originalName: string }, - d: { - dockerStop: DockerContainerFn; - dockerRm: DockerContainerFn; - dockerRename: DockerRenameFn; - dockerStart: DockerContainerFn; - }, -): boolean { - const containerOpts = { - ignoreError: true, - suppressOutput: true, - timeout: DOCKER_GPU_PATCH_TIMEOUT_MS, - }; - d.dockerStop(refs.newContainerId, containerOpts); - d.dockerRm(refs.newContainerId, containerOpts); - const restored = d.dockerRename( - refs.backupContainerName, - refs.originalName, - containerOpts, - ); - if (!isZeroStatus(restored)) return false; - const started = d.dockerStart(refs.originalName, containerOpts); - return isZeroStatus(started); -} - -// Post-reconnect finaliser for callers that ran `recreateOpenShellDockerSandboxWithGpu` -// with `waitForSupervisor: false`. The patch path defers backup cleanup so the -// create flow can keep the pre-patch container available until its own -// supervisor probe completes. The caller must invoke this function after that -// probe with the observed outcome. -// -// `supervisorReady: true` → remove the backup container. -// `supervisorReady: false` → roll back to the backup container; the boolean -// return reflects whether the rollback completed (rename + start both -// succeeded). On a false return the caller is expected to surface the -// rollback-failed error path to the user. -export type DockerGpuPatchFinalizeOptions = { - result: DockerGpuPatchResult; - supervisorReady: boolean; -}; - -export type DockerGpuPatchFinalizeOutcome = { - backupRemoved: boolean; - rolledBack: boolean; -}; - -export function finalizeDockerGpuPatchBackup( - options: DockerGpuPatchFinalizeOptions, - deps: DockerGpuPatchDeps = {}, -): DockerGpuPatchFinalizeOutcome { - const d = depsWithDefaults(deps); - const containerOpts = { - ignoreError: true, - suppressOutput: true, - timeout: DOCKER_GPU_PATCH_TIMEOUT_MS, - }; - if (options.result.backupRemoved) { - return { backupRemoved: true, rolledBack: false }; - } - if (options.supervisorReady) { - d.dockerRm(options.result.backupContainerName, containerOpts); - return { backupRemoved: true, rolledBack: false }; - } - const rolledBack = rollbackToBackupContainer( - { - newContainerId: options.result.newContainerId, - backupContainerName: options.result.backupContainerName, - originalName: options.result.originalName, - }, - d, - ); - return { backupRemoved: false, rolledBack }; -} - export function recreateOpenShellDockerSandboxWithGpu( options: { sandboxName: string; @@ -1078,9 +996,9 @@ export function recreateOpenShellDockerSandboxWithGpu( ); if (!execReady) { - const rolledBack = rollbackToBackupContainer( + const rolledBack = rollbackPatchToBackup( { newContainerId, backupContainerName, originalName }, - d, + deps, ); context.rolledBack = rolledBack; throw new Error( diff --git a/src/lib/onboard/docker-gpu-sandbox-create.ts b/src/lib/onboard/docker-gpu-sandbox-create.ts index d5d04b9f24..c7a7d3c133 100644 --- a/src/lib/onboard/docker-gpu-sandbox-create.ts +++ b/src/lib/onboard/docker-gpu-sandbox-create.ts @@ -12,7 +12,6 @@ import type { } from "./docker-gpu-patch"; import { applyDockerGpuPatchOrExit, - finalizeDockerGpuPatchBackup, findOpenShellDockerSandboxContainerIds, getDockerGpuSupervisorReconnectTimeoutSecs, printDockerGpuPatchFailureAndExit, @@ -22,6 +21,7 @@ import { shouldApplyDockerGpuPatch, waitForOpenShellSupervisorReconnect, } from "./docker-gpu-patch"; +import { finalizeDockerGpuPatchBackup } from "./docker-gpu-patch-finalize"; type DockerGpuSandboxCreateDeps = Pick< DockerGpuPatchDeps, From e8b94a91a39d843217054248a4c9edf0b08c8029 Mon Sep 17 00:00:00 2001 From: Tinson Lai Date: Fri, 5 Jun 2026 08:04:18 +0000 Subject: [PATCH 06/11] fix(scripts): extract rc-cleanup helper to standalone Python so non-root rc ownership mismatch is fixture-tested Signed-off-by: Tinson Lai --- Dockerfile | 2 + scripts/lib/clean_runtime_shell_env_shim.py | 190 ++++++++++++++++++++ scripts/nemoclaw-start.sh | 144 ++------------- test/clean-runtime-shell-env-shim.test.ts | 122 +++++++++++++ test/nemoclaw-start.test.ts | 85 --------- 5 files changed, 324 insertions(+), 219 deletions(-) create mode 100644 scripts/lib/clean_runtime_shell_env_shim.py create mode 100644 test/clean-runtime-shell-env-shim.test.ts diff --git a/Dockerfile b/Dockerfile index 79666f5bfe..5d70c1e66c 100644 --- a/Dockerfile +++ b/Dockerfile @@ -402,6 +402,7 @@ RUN mkdir -p /sandbox/.nemoclaw/blueprints/0.1.0 \ # Copy startup script and shared sandbox initialisation library COPY scripts/lib/sandbox-init.sh /usr/local/lib/nemoclaw/sandbox-init.sh COPY scripts/lib/openclaw_device_approval_policy.py /usr/local/lib/nemoclaw/openclaw_device_approval_policy.py +COPY scripts/lib/clean_runtime_shell_env_shim.py /usr/local/lib/nemoclaw/clean_runtime_shell_env_shim.py COPY scripts/nemoclaw-start.sh /usr/local/bin/nemoclaw-start # Copy NODE_OPTIONS preload modules to a Landlock-accessible path. OpenShell ≥0.0.36 # blocks /opt/nemoclaw-blueprint/ from non-root users, but the entrypoint @@ -418,6 +419,7 @@ RUN chmod 755 /usr/local/bin/nemoclaw-start /usr/local/bin/nemoclaw-codex-acp \ /usr/local/lib/nemoclaw/openclaw-build-messaging-plugins.py \ /usr/local/lib/nemoclaw/seed-wechat-accounts.py \ && chmod 644 /usr/local/lib/nemoclaw/openclaw_device_approval_policy.py \ + /usr/local/lib/nemoclaw/clean_runtime_shell_env_shim.py \ && if [ -d /usr/local/lib/nemoclaw/preloads ]; then find /usr/local/lib/nemoclaw/preloads -type f -name '*.js' -exec chmod 644 {} +; fi \ && chmod 755 /usr/local/share/nemoclaw \ /usr/local/share/nemoclaw/openclaw-plugins \ diff --git a/scripts/lib/clean_runtime_shell_env_shim.py b/scripts/lib/clean_runtime_shell_env_shim.py new file mode 100644 index 0000000000..185f02f15a --- /dev/null +++ b/scripts/lib/clean_runtime_shell_env_shim.py @@ -0,0 +1,190 @@ +# SPDX-FileCopyrightText: Copyright (c) 2026 NVIDIA CORPORATION & AFFILIATES. All rights reserved. +# SPDX-License-Identifier: Apache-2.0 +"""Remove the legacy runtime shell-env shim from a sandbox user's rc file. + +Older base images and earlier entrypoints wrote a two-line stanza into +.bashrc/.profile that sourced /tmp/nemoclaw-proxy-env.sh. The startup +entrypoint now exports those variables in-process, so the legacy stanza is +deleted before lock_rc_files makes the rc files read-only again. + +The script intentionally exits 0 in a small number of "leave-it-in-place" +cases that are not safe to rewrite from a non-root entrypoint: + +* The rc file is not owned by the current uid (e.g. root-owned .bashrc in a + non-root sandbox). Rewriting it would need CAP_FOWNER, which the entrypoint + no longer has after process-capability drops. The leftover stanza only + sources /tmp/nemoclaw-proxy-env.sh if that file exists; that file's + permissions are hardened elsewhere in the startup sequence. + +* The rc file contents are already clean (no shim line). + +Invocation (from nemoclaw-start.sh): + python3 clean_runtime_shell_env_shim.py +""" + +import errno +import os +import stat +import sys +import tempfile + + +def same_file(left, right): + return left.st_dev == right.st_dev and left.st_ino == right.st_ino + + +def rewrite_open_rc_file(read_fd, original_stat, cleaned_lines, uid): + # The runtime test image can make /sandbox non-writable while leaving + # legacy shims in the rc files. In that case atomic rename into /sandbox + # fails, so rewrite the already-validated inode through /proc/self/fd + # instead. + if uid == 0: + os.fchown(read_fd, 0, 0) + os.fchmod(read_fd, 0o600) + write_fd = os.open( + f"/proc/self/fd/{read_fd}", + os.O_WRONLY | os.O_TRUNC | getattr(os, "O_CLOEXEC", 0), + ) + try: + if not same_file(original_stat, os.fstat(write_fd)): + raise RuntimeError("rc file descriptor target changed during cleanup") + with os.fdopen(write_fd, "w", encoding="utf-8", errors="surrogateescape") as handle: + write_fd = None + handle.writelines(cleaned_lines) + handle.flush() + os.fsync(handle.fileno()) + finally: + if write_fd is not None: + os.close(write_fd) + os.fchmod(read_fd, 0o644) + + +def rewrite_by_rename(rc_path, cleaned_lines, uid, tmp_paths): + tmp_fd, tmp_path = tempfile.mkstemp(prefix="nemoclaw-rc-clean.", dir="/tmp", text=True) + tmp_paths.append(tmp_path) + with os.fdopen(tmp_fd, "w", encoding="utf-8", errors="surrogateescape") as handle: + handle.writelines(cleaned_lines) + handle.flush() + os.fsync(handle.fileno()) + if uid == 0: + os.chown(tmp_path, 0, 0) + os.chmod(tmp_path, 0o644) + os.replace(tmp_path, rc_path) + tmp_paths.pop() + + +def main(argv): + if len(argv) != 4: + print( + "[SECURITY] clean_runtime_shell_env_shim: expected ", + file=sys.stderr, + ) + return 1 + rc_path = argv[1] + shim = argv[2] + uid = int(argv[3]) + fd = None + tmp_paths = [] + + try: + flags = os.O_RDONLY | getattr(os, "O_CLOEXEC", 0) | getattr(os, "O_NOFOLLOW", 0) + try: + fd = os.open(rc_path, flags) + except OSError as exc: + if exc.errno == errno.ELOOP: + print( + f"[SECURITY] refusing symlinked rc file during cleanup: {rc_path}", + file=sys.stderr, + ) + else: + print( + f"[SECURITY] could not open rc file for cleanup: {rc_path}: {exc}", + file=sys.stderr, + ) + return 1 + + st = os.fstat(fd) + if not stat.S_ISREG(st.st_mode): + print( + f"[SECURITY] refusing non-regular rc file during cleanup: {rc_path}", + file=sys.stderr, + ) + return 1 + with os.fdopen(os.dup(fd), "r", encoding="utf-8", errors="surrogateescape") as handle: + lines = handle.readlines() + + cleaned = [] + index = 0 + while index < len(lines): + line = lines[index] + bare = line.rstrip("\n") + if bare == "# Source runtime proxy config": + if index + 1 < len(lines): + next_line = lines[index + 1] + next_bare = next_line.rstrip("\n") + if next_bare == shim or "/tmp/nemoclaw-proxy-env.sh" in next_line: + index += 2 + continue + cleaned.append(line) + cleaned.append(next_line) + index += 2 + continue + if bare == shim or "/tmp/nemoclaw-proxy-env.sh" in line: + index += 1 + continue + cleaned.append(line) + index += 1 + + if any( + line.rstrip("\n") == shim or "/tmp/nemoclaw-proxy-env.sh" in line + for line in cleaned + ): + print( + f"[SECURITY] runtime env shim still present after cleanup: {rc_path}", + file=sys.stderr, + ) + return 1 + if cleaned == lines: + return 0 + + # When the rc file is not owned by us (and we are not root) we cannot + # safely rewrite it: fchmod would raise EPERM without CAP_FOWNER, and + # the in-place reopen via /proc/self/fd would fail anyway. The legacy + # shim line we would have removed is functionally inert (it sources + # /tmp/nemoclaw-proxy-env.sh only if that file exists), so leaving it + # in place is safer than crashing the container under errexit. A later + # root-mode boot can finish the cleanup. + if uid != 0 and st.st_uid != uid: + print( + f"[SECURITY] skipping rc cleanup for {rc_path}: not owned by uid={uid} " + f"(file uid={st.st_uid}); legacy shim left in place", + file=sys.stderr, + ) + return 0 + + try: + rewrite_open_rc_file(fd, st, cleaned, uid) + except OSError as exc: + if exc.errno != errno.ENOENT: + raise + rewrite_by_rename(rc_path, cleaned, uid, tmp_paths) + except Exception as exc: + print( + f"[SECURITY] could not safely clean runtime env shim from {rc_path}: {exc}", + file=sys.stderr, + ) + return 1 + finally: + if fd is not None: + os.close(fd) + for tmp_path in tmp_paths: + try: + os.unlink(tmp_path) + except FileNotFoundError: + pass + + return 0 + + +if __name__ == "__main__": + sys.exit(main(sys.argv)) diff --git a/scripts/nemoclaw-start.sh b/scripts/nemoclaw-start.sh index aa808ace49..27f13329d3 100755 --- a/scripts/nemoclaw-start.sh +++ b/scripts/nemoclaw-start.sh @@ -2444,6 +2444,15 @@ GUARDENVEOF # Keep per-user rc files out of runtime proxy wiring. Older images and prior # entrypoint versions wrote a two-line shim into .bashrc/.profile; remove that # managed stanza before lock_rc_files makes the files read-only again. +# +# The Python body lives in scripts/lib/clean_runtime_shell_env_shim.py so it +# can be unit-tested with controlled rc fixtures. Installed location in the +# sandbox image: /usr/local/lib/nemoclaw/clean_runtime_shell_env_shim.py. +_CLEAN_RUNTIME_SHELL_ENV_SHIM_SCRIPT="/usr/local/lib/nemoclaw/clean_runtime_shell_env_shim.py" +if [ ! -f "$_CLEAN_RUNTIME_SHELL_ENV_SHIM_SCRIPT" ]; then + _CLEAN_RUNTIME_SHELL_ENV_SHIM_SCRIPT="$(cd "$(dirname "${BASH_SOURCE[0]}")" && pwd)/lib/clean_runtime_shell_env_shim.py" +fi + ensure_runtime_shell_env_shim() { local failed=0 local rc_file @@ -2463,140 +2472,7 @@ ensure_runtime_shell_env_shim() { continue fi - if ! command python3 - "$rc_file" "$_RUNTIME_SHELL_ENV_SHIM" "$(id -u)" <<'PY'; then -import errno -import os -import stat -import sys -import tempfile - -rc_path, shim, uid_text = sys.argv[1:4] -uid = int(uid_text) -fd = None -tmp_path = None - - -def same_file(left, right): - return left.st_dev == right.st_dev and left.st_ino == right.st_ino - - -def rewrite_open_rc_file(read_fd, original_stat, cleaned_lines): - # The runtime test image can make /sandbox non-writable while leaving legacy - # shims in the rc files. In that case atomic rename into /sandbox fails, so - # rewrite the already-validated inode through /proc/self/fd instead. - if uid == 0: - os.fchown(read_fd, 0, 0) - os.fchmod(read_fd, 0o600) - write_fd = os.open( - f"/proc/self/fd/{read_fd}", - os.O_WRONLY | os.O_TRUNC | getattr(os, "O_CLOEXEC", 0), - ) - try: - if not same_file(original_stat, os.fstat(write_fd)): - raise RuntimeError("rc file descriptor target changed during cleanup") - with os.fdopen(write_fd, "w", encoding="utf-8", errors="surrogateescape") as handle: - write_fd = None - handle.writelines(cleaned_lines) - handle.flush() - os.fsync(handle.fileno()) - finally: - if write_fd is not None: - os.close(write_fd) - os.fchmod(read_fd, 0o644) - - -def rewrite_by_rename(cleaned_lines): - global tmp_path - tmp_fd, tmp_path = tempfile.mkstemp(prefix="nemoclaw-rc-clean.", dir="/tmp", text=True) - with os.fdopen(tmp_fd, "w", encoding="utf-8", errors="surrogateescape") as handle: - handle.writelines(cleaned_lines) - handle.flush() - os.fsync(handle.fileno()) - if uid == 0: - os.chown(tmp_path, 0, 0) - os.chmod(tmp_path, 0o644) - os.replace(tmp_path, rc_path) - tmp_path = None - -try: - flags = os.O_RDONLY | getattr(os, "O_CLOEXEC", 0) | getattr(os, "O_NOFOLLOW", 0) - try: - fd = os.open(rc_path, flags) - except OSError as exc: - if exc.errno == errno.ELOOP: - print(f"[SECURITY] refusing symlinked rc file during cleanup: {rc_path}", file=sys.stderr) - else: - print(f"[SECURITY] could not open rc file for cleanup: {rc_path}: {exc}", file=sys.stderr) - sys.exit(1) - - st = os.fstat(fd) - if not stat.S_ISREG(st.st_mode): - print(f"[SECURITY] refusing non-regular rc file during cleanup: {rc_path}", file=sys.stderr) - sys.exit(1) - with os.fdopen(os.dup(fd), "r", encoding="utf-8", errors="surrogateescape") as handle: - lines = handle.readlines() - - cleaned = [] - index = 0 - while index < len(lines): - line = lines[index] - bare = line.rstrip("\n") - if bare == "# Source runtime proxy config": - if index + 1 < len(lines): - next_line = lines[index + 1] - next_bare = next_line.rstrip("\n") - if next_bare == shim or "/tmp/nemoclaw-proxy-env.sh" in next_line: - index += 2 - continue - cleaned.append(line) - cleaned.append(next_line) - index += 2 - continue - if bare == shim or "/tmp/nemoclaw-proxy-env.sh" in line: - index += 1 - continue - cleaned.append(line) - index += 1 - - if any(line.rstrip("\n") == shim or "/tmp/nemoclaw-proxy-env.sh" in line for line in cleaned): - print(f"[SECURITY] runtime env shim still present after cleanup: {rc_path}", file=sys.stderr) - sys.exit(1) - if cleaned == lines: - sys.exit(0) - - # When the rc file is not owned by us (and we are not root) we cannot - # safely rewrite it: fchmod would raise EPERM without CAP_FOWNER, and - # the in-place reopen via /proc/self/fd would fail anyway. The legacy - # shim line we would have removed is functionally inert (it sources - # /tmp/nemoclaw-proxy-env.sh only if that file exists), so leaving it - # in place is safer than crashing the container under errexit. A later - # root-mode boot can finish the cleanup. - if uid != 0 and st.st_uid != uid: - print( - f"[SECURITY] skipping rc cleanup for {rc_path}: not owned by uid={uid} " - f"(file uid={st.st_uid}); legacy shim left in place", - file=sys.stderr, - ) - sys.exit(0) - - try: - rewrite_open_rc_file(fd, st, cleaned) - except OSError as exc: - if exc.errno != errno.ENOENT: - raise - rewrite_by_rename(cleaned) -except Exception as exc: - print(f"[SECURITY] could not safely clean runtime env shim from {rc_path}: {exc}", file=sys.stderr) - sys.exit(1) -finally: - if fd is not None: - os.close(fd) - if tmp_path: - try: - os.unlink(tmp_path) - except FileNotFoundError: - pass -PY + if ! command python3 "$_CLEAN_RUNTIME_SHELL_ENV_SHIM_SCRIPT" "$rc_file" "$_RUNTIME_SHELL_ENV_SHIM" "$(id -u)"; then failed=1 continue fi diff --git a/test/clean-runtime-shell-env-shim.test.ts b/test/clean-runtime-shell-env-shim.test.ts new file mode 100644 index 0000000000..031633c822 --- /dev/null +++ b/test/clean-runtime-shell-env-shim.test.ts @@ -0,0 +1,122 @@ +// SPDX-FileCopyrightText: Copyright (c) 2026 NVIDIA CORPORATION & AFFILIATES. All rights reserved. +// SPDX-License-Identifier: Apache-2.0 + +import { spawnSync } from "node:child_process"; +import fs from "node:fs"; +import os from "node:os"; +import path from "node:path"; + +import { afterEach, beforeEach, describe, expect, it } from "vitest"; + +const CLEAN_SCRIPT = path.join( + import.meta.dirname, + "..", + "scripts", + "lib", + "clean_runtime_shell_env_shim.py", +); +const SHIM_TEXT = '[ -f /tmp/nemoclaw-proxy-env.sh ] && . /tmp/nemoclaw-proxy-env.sh'; + +function runScript(args: { rcPath: string; shim: string; uid: number }): { + status: number | null; + stderr: string; + stdout: string; +} { + const result = spawnSync( + "python3", + [CLEAN_SCRIPT, args.rcPath, args.shim, String(args.uid)], + { encoding: "utf-8" }, + ); + return { + status: result.status, + stderr: result.stderr ?? "", + stdout: result.stdout ?? "", + }; +} + +describe("clean_runtime_shell_env_shim.py", () => { + let tmpDir: string; + + beforeEach(() => { + tmpDir = fs.mkdtempSync(path.join(os.tmpdir(), "nemoclaw-rc-test-")); + }); + + afterEach(() => { + fs.rmSync(tmpDir, { recursive: true, force: true }); + }); + + it("removes the legacy two-line stanza when uid owns the rc file", () => { + const rcPath = path.join(tmpDir, ".bashrc"); + const before = `export FOO=1\n# Source runtime proxy config\n${SHIM_TEXT}\nexport BAR=2\n`; + fs.writeFileSync(rcPath, before, { mode: 0o644 }); + + const stat = fs.statSync(rcPath); + const result = runScript({ rcPath, shim: SHIM_TEXT, uid: stat.uid }); + + expect(result.status).toBe(0); + const after = fs.readFileSync(rcPath, "utf-8"); + expect(after).toBe("export FOO=1\nexport BAR=2\n"); + }); + + it("leaves the rc file untouched and exits 0 when uid does not own it (reproduces the #4713 deployment failure shape)", () => { + // The patched GPU sandbox container exits with code 1 after Docker GPU + // patching when the entrypoint runs as a non-root uid against an rc file + // owned by a different uid (e.g. root). Before this fix, the cleanup + // raised EPERM under errexit and killed the container. The ownership + // guard now logs and exits 0 instead. + const rcPath = path.join(tmpDir, ".bashrc"); + const before = `# Source runtime proxy config\n${SHIM_TEXT}\nexport REAL_USER_LINE=keep\n`; + fs.writeFileSync(rcPath, before, { mode: 0o644 }); + + const stat = fs.statSync(rcPath); + // Mismatched uid: pretend we are running as uid=99999 against a file + // owned by the test runner. Real container repro uses uid=1000 vs root. + const foreignUid = stat.uid + 99999; + const result = runScript({ rcPath, shim: SHIM_TEXT, uid: foreignUid }); + + expect(result.status).toBe(0); + expect(result.stderr).toContain("[SECURITY] skipping rc cleanup"); + expect(result.stderr).toContain(`file uid=${stat.uid}`); + expect(result.stderr).toContain(`uid=${foreignUid}`); + + const after = fs.readFileSync(rcPath, "utf-8"); + expect(after).toBe(before); + }); + + it("exits 0 without rewriting when the rc file is already clean", () => { + const rcPath = path.join(tmpDir, ".bashrc"); + const before = "export FOO=1\nexport BAR=2\n"; + fs.writeFileSync(rcPath, before, { mode: 0o644 }); + + const stat = fs.statSync(rcPath); + const beforeMtime = stat.mtimeMs; + const result = runScript({ rcPath, shim: SHIM_TEXT, uid: stat.uid }); + + expect(result.status).toBe(0); + expect(fs.readFileSync(rcPath, "utf-8")).toBe(before); + expect(fs.statSync(rcPath).mtimeMs).toBe(beforeMtime); + }); + + it("refuses a symlinked rc file and exits 1", () => { + const realPath = path.join(tmpDir, "real"); + const linkPath = path.join(tmpDir, ".bashrc"); + fs.writeFileSync(realPath, "export FOO=1\n", { mode: 0o644 }); + fs.symlinkSync(realPath, linkPath); + + const result = runScript({ rcPath: linkPath, shim: SHIM_TEXT, uid: 0 }); + expect(result.status).toBe(1); + expect(result.stderr).toContain("refusing symlinked rc file"); + }); + + it("strips a bare shim line without the preceding comment", () => { + const rcPath = path.join(tmpDir, ".bashrc"); + const before = `export FOO=1\n${SHIM_TEXT}\nexport BAR=2\n`; + fs.writeFileSync(rcPath, before, { mode: 0o644 }); + + const stat = fs.statSync(rcPath); + const result = runScript({ rcPath, shim: SHIM_TEXT, uid: stat.uid }); + + expect(result.status).toBe(0); + expect(fs.readFileSync(rcPath, "utf-8")).toBe("export FOO=1\nexport BAR=2\n"); + }); +}); diff --git a/test/nemoclaw-start.test.ts b/test/nemoclaw-start.test.ts index 72fea43463..5559e39945 100644 --- a/test/nemoclaw-start.test.ts +++ b/test/nemoclaw-start.test.ts @@ -30,14 +30,6 @@ function runtimeShellEnvBlock(src: string): string { return src.slice(start, end); } -function runtimeShellEnvShimBlock(src: string): string { - const start = src.indexOf("ensure_runtime_shell_env_shim() {"); - const end = src.indexOf("# ── Legacy layout migration", start); - expect(start).toBeGreaterThan(-1); - expect(end).toBeGreaterThan(start); - return src.slice(start, end); -} - function nonRootFallbackBlock(src: string): string { const start = src.indexOf("# ── Non-root fallback"); const end = src.indexOf("# ── Root path", start); @@ -4915,80 +4907,3 @@ describe("direct-root entrypoint composition under CAP_DAC_OVERRIDE drop", () => }); }); -describe("ensure_runtime_shell_env_shim rc cleanup ownership guard", () => { - function extractRcCleanupPython(src: string): string { - const heredocStart = src.indexOf("<<'PY'"); - expect(heredocStart).toBeGreaterThan(-1); - const bodyStart = src.indexOf("\n", heredocStart) + 1; - const bodyEnd = src.indexOf("\nPY\n", bodyStart); - expect(bodyEnd).toBeGreaterThan(bodyStart); - return src.slice(bodyStart, bodyEnd); - } - - it("skips rewrite and exits 0 when the rc file is not owned by the current uid", () => { - const src = fs.readFileSync(START_SCRIPT, "utf-8"); - const py = extractRcCleanupPython(src); - - const tmpDir = fs.mkdtempSync(path.join(os.tmpdir(), "nemoclaw-rc-cleanup-")); - try { - const rcPath = path.join(tmpDir, ".bashrc"); - const shim = "[ -f /tmp/nemoclaw-proxy-env.sh ] && . /tmp/nemoclaw-proxy-env.sh"; - const original = `# user content\nexport FOO=bar\n${shim}\n`; - fs.writeFileSync(rcPath, original); - - const pyPath = path.join(tmpDir, "cleanup.py"); - fs.writeFileSync(pyPath, py); - - // Lie about the running uid so the script sees uid != file owner. - // The on-disk file is owned by the test runner; passing an arbitrary - // non-zero uid that differs from the file's uid drives the new - // ownership-guard branch. - const fakeUid = (process.getuid?.() ?? 1000) + 7777; - const result = spawnSync( - "python3", - [pyPath, rcPath, shim, String(fakeUid)], - { encoding: "utf-8" }, - ); - - expect(result.status).toBe(0); - expect(result.stderr).toContain("skipping rc cleanup"); - expect(result.stderr).toContain("not owned by uid="); - expect(fs.readFileSync(rcPath, "utf-8")).toBe(original); - } finally { - fs.rmSync(tmpDir, { recursive: true, force: true }); - } - }); - - it("still rewrites the rc file when the current uid owns it", () => { - const src = fs.readFileSync(START_SCRIPT, "utf-8"); - const py = extractRcCleanupPython(src); - - const tmpDir = fs.mkdtempSync(path.join(os.tmpdir(), "nemoclaw-rc-cleanup-")); - try { - const rcPath = path.join(tmpDir, ".bashrc"); - const shim = "[ -f /tmp/nemoclaw-proxy-env.sh ] && . /tmp/nemoclaw-proxy-env.sh"; - const original = `# user content\nexport FOO=bar\n${shim}\n`; - fs.writeFileSync(rcPath, original); - - const pyPath = path.join(tmpDir, "cleanup.py"); - fs.writeFileSync(pyPath, py); - - const realUid = process.getuid?.() ?? 0; - const result = spawnSync( - "python3", - [pyPath, rcPath, shim, String(realUid)], - { encoding: "utf-8" }, - ); - - expect(result.status).toBe(0); - expect(result.stderr).not.toContain("could not safely clean runtime env shim"); - expect(result.stderr).not.toContain("skipping rc cleanup"); - const cleaned = fs.readFileSync(rcPath, "utf-8"); - expect(cleaned).not.toContain(shim); - expect(cleaned).toContain("# user content"); - expect(cleaned).toContain("export FOO=bar"); - } finally { - fs.rmSync(tmpDir, { recursive: true, force: true }); - } - }); -}); From e37e7e18fe3a4ee408be756a0d867c754084a050 Mon Sep 17 00:00:00 2001 From: Tinson Lai Date: Fri, 5 Jun 2026 08:48:12 +0000 Subject: [PATCH 07/11] fix(onboard): keep rc-cleanup script discoverable in tests and harden it against CodeQL findings Signed-off-by: Tinson Lai --- scripts/lib/clean_runtime_shell_env_shim.py | 16 ++++++++++++---- scripts/nemoclaw-start.sh | 11 +++++------ src/lib/onboard/docker-gpu-patch.test.ts | 4 ++-- src/lib/onboard/docker-gpu-patch.ts | 2 +- src/lib/onboard/docker-gpu-sandbox-create.ts | 2 +- test/clean-runtime-shell-env-shim.test.ts | 19 +++++++------------ test/service-env.test.ts | 13 +++++++++++++ 7 files changed, 41 insertions(+), 26 deletions(-) diff --git a/scripts/lib/clean_runtime_shell_env_shim.py b/scripts/lib/clean_runtime_shell_env_shim.py index 185f02f15a..ed0e6dbb12 100644 --- a/scripts/lib/clean_runtime_shell_env_shim.py +++ b/scripts/lib/clean_runtime_shell_env_shim.py @@ -38,6 +38,7 @@ def rewrite_open_rc_file(read_fd, original_stat, cleaned_lines, uid): # legacy shims in the rc files. In that case atomic rename into /sandbox # fails, so rewrite the already-validated inode through /proc/self/fd # instead. + final_mode = stat.S_IMODE(original_stat.st_mode) if uid == 0: os.fchown(read_fd, 0, 0) os.fchmod(read_fd, 0o600) @@ -56,10 +57,10 @@ def rewrite_open_rc_file(read_fd, original_stat, cleaned_lines, uid): finally: if write_fd is not None: os.close(write_fd) - os.fchmod(read_fd, 0o644) + os.fchmod(read_fd, final_mode) -def rewrite_by_rename(rc_path, cleaned_lines, uid, tmp_paths): +def rewrite_by_rename(rc_path, original_stat, cleaned_lines, uid, tmp_paths): tmp_fd, tmp_path = tempfile.mkstemp(prefix="nemoclaw-rc-clean.", dir="/tmp", text=True) tmp_paths.append(tmp_path) with os.fdopen(tmp_fd, "w", encoding="utf-8", errors="surrogateescape") as handle: @@ -68,7 +69,10 @@ def rewrite_by_rename(rc_path, cleaned_lines, uid, tmp_paths): os.fsync(handle.fileno()) if uid == 0: os.chown(tmp_path, 0, 0) - os.chmod(tmp_path, 0o644) + # Mirror the original rc file's mode bits rather than fixing a permissive + # default. The pre-cleanup file's mode is the user-visible source of truth; + # widening it here would silently change rc file permissions. + os.chmod(tmp_path, stat.S_IMODE(original_stat.st_mode)) os.replace(tmp_path, rc_path) tmp_paths.pop() @@ -167,7 +171,7 @@ def main(argv): except OSError as exc: if exc.errno != errno.ENOENT: raise - rewrite_by_rename(rc_path, cleaned, uid, tmp_paths) + rewrite_by_rename(rc_path, st, cleaned, uid, tmp_paths) except Exception as exc: print( f"[SECURITY] could not safely clean runtime env shim from {rc_path}: {exc}", @@ -181,6 +185,10 @@ def main(argv): try: os.unlink(tmp_path) except FileNotFoundError: + # The successful path in `rewrite_by_rename` removes the tmp + # path from `tmp_paths` before this finally block runs, so + # arriving here means the rename happened or the OS already + # reaped the file. Nothing left to clean up. pass return 0 diff --git a/scripts/nemoclaw-start.sh b/scripts/nemoclaw-start.sh index 27f13329d3..630e487332 100755 --- a/scripts/nemoclaw-start.sh +++ b/scripts/nemoclaw-start.sh @@ -2448,14 +2448,13 @@ GUARDENVEOF # The Python body lives in scripts/lib/clean_runtime_shell_env_shim.py so it # can be unit-tested with controlled rc fixtures. Installed location in the # sandbox image: /usr/local/lib/nemoclaw/clean_runtime_shell_env_shim.py. -_CLEAN_RUNTIME_SHELL_ENV_SHIM_SCRIPT="/usr/local/lib/nemoclaw/clean_runtime_shell_env_shim.py" -if [ ! -f "$_CLEAN_RUNTIME_SHELL_ENV_SHIM_SCRIPT" ]; then - _CLEAN_RUNTIME_SHELL_ENV_SHIM_SCRIPT="$(cd "$(dirname "${BASH_SOURCE[0]}")" && pwd)/lib/clean_runtime_shell_env_shim.py" -fi - ensure_runtime_shell_env_shim() { local failed=0 local rc_file + local clean_script="${NEMOCLAW_RC_CLEAN_SCRIPT:-/usr/local/lib/nemoclaw/clean_runtime_shell_env_shim.py}" + if [ ! -f "$clean_script" ]; then + clean_script="$(cd "$(dirname "${BASH_SOURCE[0]}")" && pwd)/lib/clean_runtime_shell_env_shim.py" + fi for rc_file in "${_SANDBOX_HOME}/.bashrc" "${_SANDBOX_HOME}/.profile"; do if [ -L "$rc_file" ]; then @@ -2472,7 +2471,7 @@ ensure_runtime_shell_env_shim() { continue fi - if ! command python3 "$_CLEAN_RUNTIME_SHELL_ENV_SHIM_SCRIPT" "$rc_file" "$_RUNTIME_SHELL_ENV_SHIM" "$(id -u)"; then + if ! command python3 "$clean_script" "$rc_file" "$_RUNTIME_SHELL_ENV_SHIM" "$(id -u)"; then failed=1 continue fi diff --git a/src/lib/onboard/docker-gpu-patch.test.ts b/src/lib/onboard/docker-gpu-patch.test.ts index 9bf9e1c9c9..34ca46fdde 100644 --- a/src/lib/onboard/docker-gpu-patch.test.ts +++ b/src/lib/onboard/docker-gpu-patch.test.ts @@ -677,7 +677,7 @@ describe("docker-gpu-patch", () => { errorPhaseDebouncePolls: 1, }, ), - ).toThrow(/rollback to backup container failed/); + ).toThrow(/rollback failed; pre-patch sandbox was NOT restored/); expect(dockerStart).not.toHaveBeenCalled(); }); @@ -716,7 +716,7 @@ describe("docker-gpu-patch", () => { errorPhaseDebouncePolls: 1, }, ), - ).toThrow(/rollback to backup container failed/); + ).toThrow(/rollback failed; pre-patch sandbox was NOT restored/); expect(dockerStart).toHaveBeenCalledWith( "openshell-alpha", diff --git a/src/lib/onboard/docker-gpu-patch.ts b/src/lib/onboard/docker-gpu-patch.ts index 916c7ade01..704a543777 100644 --- a/src/lib/onboard/docker-gpu-patch.ts +++ b/src/lib/onboard/docker-gpu-patch.ts @@ -1004,7 +1004,7 @@ export function recreateOpenShellDockerSandboxWithGpu( throw new Error( rolledBack ? "OpenShell supervisor did not reconnect to the GPU-enabled container; pre-patch sandbox restored." - : "OpenShell supervisor did not reconnect to the GPU-enabled container; rollback to backup container failed.", + : "OpenShell supervisor did not reconnect to the GPU-enabled container and rollback failed; pre-patch sandbox was NOT restored.", ); } diff --git a/src/lib/onboard/docker-gpu-sandbox-create.ts b/src/lib/onboard/docker-gpu-sandbox-create.ts index c7a7d3c133..452836a867 100644 --- a/src/lib/onboard/docker-gpu-sandbox-create.ts +++ b/src/lib/onboard/docker-gpu-sandbox-create.ts @@ -158,7 +158,7 @@ export function createDockerGpuSandboxCreatePatch( } return finalizeOutcome.rolledBack ? "OpenShell supervisor did not reconnect to the GPU-enabled container; pre-patch sandbox restored." - : "OpenShell supervisor did not reconnect to the GPU-enabled container; rollback to backup container failed."; + : "OpenShell supervisor did not reconnect to the GPU-enabled container and rollback failed; pre-patch sandbox was NOT restored."; })(); printDockerGpuPatchFailureAndExit( options.sandboxName, diff --git a/test/clean-runtime-shell-env-shim.test.ts b/test/clean-runtime-shell-env-shim.test.ts index 031633c822..bba5c67ca0 100644 --- a/test/clean-runtime-shell-env-shim.test.ts +++ b/test/clean-runtime-shell-env-shim.test.ts @@ -16,6 +16,7 @@ const CLEAN_SCRIPT = path.join( "clean_runtime_shell_env_shim.py", ); const SHIM_TEXT = '[ -f /tmp/nemoclaw-proxy-env.sh ] && . /tmp/nemoclaw-proxy-env.sh'; +const CURRENT_UID = process.getuid?.() ?? 0; function runScript(args: { rcPath: string; shim: string; uid: number }): { status: number | null; @@ -50,8 +51,7 @@ describe("clean_runtime_shell_env_shim.py", () => { const before = `export FOO=1\n# Source runtime proxy config\n${SHIM_TEXT}\nexport BAR=2\n`; fs.writeFileSync(rcPath, before, { mode: 0o644 }); - const stat = fs.statSync(rcPath); - const result = runScript({ rcPath, shim: SHIM_TEXT, uid: stat.uid }); + const result = runScript({ rcPath, shim: SHIM_TEXT, uid: CURRENT_UID }); expect(result.status).toBe(0); const after = fs.readFileSync(rcPath, "utf-8"); @@ -68,15 +68,14 @@ describe("clean_runtime_shell_env_shim.py", () => { const before = `# Source runtime proxy config\n${SHIM_TEXT}\nexport REAL_USER_LINE=keep\n`; fs.writeFileSync(rcPath, before, { mode: 0o644 }); - const stat = fs.statSync(rcPath); - // Mismatched uid: pretend we are running as uid=99999 against a file + // Mismatched uid: pretend we are running as a foreign uid against a file // owned by the test runner. Real container repro uses uid=1000 vs root. - const foreignUid = stat.uid + 99999; + const foreignUid = CURRENT_UID + 99999; const result = runScript({ rcPath, shim: SHIM_TEXT, uid: foreignUid }); expect(result.status).toBe(0); expect(result.stderr).toContain("[SECURITY] skipping rc cleanup"); - expect(result.stderr).toContain(`file uid=${stat.uid}`); + expect(result.stderr).toContain(`file uid=${CURRENT_UID}`); expect(result.stderr).toContain(`uid=${foreignUid}`); const after = fs.readFileSync(rcPath, "utf-8"); @@ -88,13 +87,10 @@ describe("clean_runtime_shell_env_shim.py", () => { const before = "export FOO=1\nexport BAR=2\n"; fs.writeFileSync(rcPath, before, { mode: 0o644 }); - const stat = fs.statSync(rcPath); - const beforeMtime = stat.mtimeMs; - const result = runScript({ rcPath, shim: SHIM_TEXT, uid: stat.uid }); + const result = runScript({ rcPath, shim: SHIM_TEXT, uid: CURRENT_UID }); expect(result.status).toBe(0); expect(fs.readFileSync(rcPath, "utf-8")).toBe(before); - expect(fs.statSync(rcPath).mtimeMs).toBe(beforeMtime); }); it("refuses a symlinked rc file and exits 1", () => { @@ -113,8 +109,7 @@ describe("clean_runtime_shell_env_shim.py", () => { const before = `export FOO=1\n${SHIM_TEXT}\nexport BAR=2\n`; fs.writeFileSync(rcPath, before, { mode: 0o644 }); - const stat = fs.statSync(rcPath); - const result = runScript({ rcPath, shim: SHIM_TEXT, uid: stat.uid }); + const result = runScript({ rcPath, shim: SHIM_TEXT, uid: CURRENT_UID }); expect(result.status).toBe(0); expect(fs.readFileSync(rcPath, "utf-8")).toBe("export FOO=1\nexport BAR=2\n"); diff --git a/test/service-env.test.ts b/test/service-env.test.ts index 6ac6f4c15b..555b055364 100644 --- a/test/service-env.test.ts +++ b/test/service-env.test.ts @@ -20,6 +20,14 @@ import { join } from "node:path"; import { resolveOpenshell } from "../dist/lib/adapters/openshell/resolve"; const NEMOCLAW_START_SCRIPT = join(import.meta.dirname, "../scripts/nemoclaw-start.sh"); +const RC_CLEAN_SCRIPT = join( + import.meta.dirname, + "../scripts/lib/clean_runtime_shell_env_shim.py", +); + +function rcShimWrapperHeader(): string { + return `export NEMOCLAW_RC_CLEAN_SCRIPT=${JSON.stringify(RC_CLEAN_SCRIPT)}`; +} function extractRuntimeShellEnvSnippet() { const src = readFileSync(NEMOCLAW_START_SCRIPT, "utf-8"); @@ -573,6 +581,7 @@ describe("service environment", () => { `_SANDBOX_HOME=${JSON.stringify(fakeHome)}`, `_RUNTIME_SHELL_ENV_FILE=${JSON.stringify(proxyEnvPath)}`, '_RUNTIME_SHELL_ENV_SHIM="[ -f ${_RUNTIME_SHELL_ENV_FILE} ] && . ${_RUNTIME_SHELL_ENV_FILE}"', + rcShimWrapperHeader(), extractRuntimeShellEnvShimSnippet(), "ensure_runtime_shell_env_shim", ].join("\n"); @@ -627,6 +636,7 @@ describe("service environment", () => { '_RUNTIME_SHELL_ENV_SHIM="[ -f ${_RUNTIME_SHELL_ENV_FILE} ] && . ${_RUNTIME_SHELL_ENV_FILE}"', 'legacy_tmp="${_SANDBOX_HOME}/.bashrc.nemoclaw-clean.$$"', `ln -s ${JSON.stringify(sensitivePath)} "$legacy_tmp"`, + rcShimWrapperHeader(), extractRuntimeShellEnvShimSnippet(), "ensure_runtime_shell_env_shim", ].join("\n"); @@ -677,6 +687,7 @@ describe("service environment", () => { '_RUNTIME_SHELL_ENV_SHIM="[ -f ${_RUNTIME_SHELL_ENV_FILE} ] && . ${_RUNTIME_SHELL_ENV_FILE}"', 'chown() { echo "unexpected chown $*" >&2; exit 42; }', 'chmod() { echo "unexpected chmod $*" >&2; exit 43; }', + rcShimWrapperHeader(), extractRuntimeShellEnvShimSnippet(), ].join("\n"); writeFileSync(tmpFile, wrapper, { mode: 0o700 }); @@ -716,6 +727,7 @@ describe("service environment", () => { `_SANDBOX_HOME=${JSON.stringify(fakeHome)}`, `_RUNTIME_SHELL_ENV_FILE=${JSON.stringify(proxyEnvPath)}`, '_RUNTIME_SHELL_ENV_SHIM="[ -f ${_RUNTIME_SHELL_ENV_FILE} ] && . ${_RUNTIME_SHELL_ENV_FILE}"', + rcShimWrapperHeader(), extractRuntimeShellEnvShimSnippet(), "ensure_runtime_shell_env_shim", ].join("\n"); @@ -772,6 +784,7 @@ describe("service environment", () => { `_SANDBOX_HOME=${JSON.stringify(fakeHome)}`, `_RUNTIME_SHELL_ENV_FILE=${JSON.stringify(proxyEnvPath)}`, '_RUNTIME_SHELL_ENV_SHIM="[ -f ${_RUNTIME_SHELL_ENV_FILE} ] && . ${_RUNTIME_SHELL_ENV_FILE}"', + rcShimWrapperHeader(), extractRuntimeShellEnvShimSnippet(), "ensure_runtime_shell_env_shim", ].join("\n"); From 6647bf25e4db2081dc13dff303c21838c92b1d9e Mon Sep 17 00:00:00 2001 From: Tinson Lai Date: Fri, 5 Jun 2026 09:20:32 +0000 Subject: [PATCH 08/11] fix(onboard): cover GPU patch composed create-flow and #4713 startup invariant, trim docker-gpu-patch test hotspot Signed-off-by: Tinson Lai --- scripts/lib/clean_runtime_shell_env_shim.py | 21 +- .../onboard/docker-gpu-patch-rollback.test.ts | 197 +++++++++++++++ src/lib/onboard/docker-gpu-patch.test.ts | 139 ----------- .../onboard/docker-gpu-sandbox-create.test.ts | 228 ++++++++++++++++++ src/lib/onboard/docker-gpu-sandbox-create.ts | 40 ++- test/nemoclaw-start.test.ts | 1 - test/sandbox-provisioning.test.ts | 1 + test/service-env.test.ts | 77 ++++++ 8 files changed, 553 insertions(+), 151 deletions(-) create mode 100644 src/lib/onboard/docker-gpu-patch-rollback.test.ts create mode 100644 src/lib/onboard/docker-gpu-sandbox-create.test.ts diff --git a/scripts/lib/clean_runtime_shell_env_shim.py b/scripts/lib/clean_runtime_shell_env_shim.py index ed0e6dbb12..f6adfc9e49 100644 --- a/scripts/lib/clean_runtime_shell_env_shim.py +++ b/scripts/lib/clean_runtime_shell_env_shim.py @@ -153,11 +153,22 @@ def main(argv): # When the rc file is not owned by us (and we are not root) we cannot # safely rewrite it: fchmod would raise EPERM without CAP_FOWNER, and - # the in-place reopen via /proc/self/fd would fail anyway. The legacy - # shim line we would have removed is functionally inert (it sources - # /tmp/nemoclaw-proxy-env.sh only if that file exists), so leaving it - # in place is safer than crashing the container under errexit. A later - # root-mode boot can finish the cleanup. + # the in-place reopen via /proc/self/fd would fail anyway. + # + # Threat model: the legacy shim line we would have removed is still an + # active trust-boundary hook. It sources /tmp/nemoclaw-proxy-env.sh on + # every shell start and pulls in the proxy and gateway-token exports + # from that file. That file is written exclusively via + # `emit_sandbox_sourced_file` in scripts/lib/sandbox-init.sh, which + # forces mode 444 (and root ownership when the entrypoint runs as + # root) before placing the file. The startup sequence validates that + # invariant via `validate_tmp_permissions` before launching services. + # The composed test in test/service-env.test.ts proves the file stays + # at mode 444 through this skip path. As long as the proxy-env file + # remains non-user-writable, the leftover shim does not widen the + # sandbox's trust boundary; crashing the container under errexit + # (which the original code did) was the strictly worse outcome. A + # later root-mode boot can finish the cleanup. if uid != 0 and st.st_uid != uid: print( f"[SECURITY] skipping rc cleanup for {rc_path}: not owned by uid={uid} " diff --git a/src/lib/onboard/docker-gpu-patch-rollback.test.ts b/src/lib/onboard/docker-gpu-patch-rollback.test.ts new file mode 100644 index 0000000000..5e330942c1 --- /dev/null +++ b/src/lib/onboard/docker-gpu-patch-rollback.test.ts @@ -0,0 +1,197 @@ +// SPDX-FileCopyrightText: Copyright (c) 2026 NVIDIA CORPORATION & AFFILIATES. All rights reserved. +// SPDX-License-Identifier: Apache-2.0 + +import { describe, expect, it, vi } from "vitest"; + +import { + type DockerContainerInspect, + recreateOpenShellDockerSandboxWithGpu, +} from "../../../dist/lib/onboard/docker-gpu-patch"; + +function inspectFixture(): DockerContainerInspect { + return { + Id: "old-container-id", + Name: "/openshell-alpha", + Config: { + Image: "openshell/sandbox:abc", + Env: [ + "A=1", + "OPENSHELL_ENDPOINT=http://host.openshell.internal:8080/", + "OPENSHELL_TEST=1", + "OPENSHELL_SANDBOX_COMMAND=sleep infinity", + "NVIDIA_VISIBLE_DEVICES=void", + ], + Labels: { + "openshell.ai/managed-by": "openshell", + "openshell.ai/sandbox-name": "alpha", + "openshell.ai/sandbox-id": "sandbox-id", + }, + Entrypoint: ["/opt/openshell/bin/openshell-sandbox"], + Cmd: [], + User: "0", + WorkingDir: "/workspace", + Hostname: "alpha-host", + Tty: true, + }, + HostConfig: { + Binds: ["/host:/container:rw"], + NetworkMode: "openshell-docker", + RestartPolicy: { Name: "unless-stopped" }, + CapAdd: ["SYS_ADMIN", "NET_ADMIN"], + SecurityOpt: ["apparmor=unconfined"], + ExtraHosts: ["host.openshell.internal:172.17.0.1"], + Memory: 8 * 1024 * 1024 * 1024, + NanoCpus: 2_500_000_000, + }, + NetworkSettings: { + Networks: { + "openshell-docker": { + IPAddress: "172.18.0.2", + Gateway: "172.18.0.1", + Aliases: ["openshell-alpha"], + }, + }, + }, + }; +} + +describe("recreateOpenShellDockerSandboxWithGpu rollback path", () => { + it("rolls back to the backup container when supervisor reconnect fails", () => { + const dockerCapture = vi.fn((args: readonly string[]) => { + if (args[0] === "ps") return "old-container-id\n"; + if (args[0] === "inspect") return JSON.stringify([inspectFixture()]); + if (args[0] === "info") return ""; + return ""; + }); + const dockerRun = vi.fn(() => ({ status: 0, stdout: "probe-id\n" })); + const dockerRunDetached = vi.fn(() => ({ status: 0, stdout: "new-container-id\n" })); + const dockerRename = vi.fn((_old: string, _next: string) => ({ status: 0 })); + const dockerStop = vi.fn(() => ({ status: 0 })); + const dockerStart = vi.fn(() => ({ status: 0 })); + const dockerRm = vi.fn((_name: string) => ({ status: 0 })); + const runOpenshell = vi.fn(() => ({ status: 1, stderr: "supervisor unreachable" })); + const runCaptureOpenshell = vi.fn(() => ""); + + expect(() => + recreateOpenShellDockerSandboxWithGpu( + { sandboxName: "alpha", timeoutSecs: 1 }, + { + dockerCapture, + dockerRun, + dockerRunDetached, + dockerRename, + dockerStop, + dockerStart, + dockerRm, + runOpenshell, + runCaptureOpenshell, + sleep: vi.fn(), + now: () => new Date("2026-05-12T00:00:00Z"), + errorPhaseDebouncePolls: 1, + }, + ), + ).toThrow(/pre-patch sandbox restored/); + + const restoreRename = dockerRename.mock.calls.find( + (call) => + String(call[0]).includes("nemoclaw-gpu-backup") && call[1] === "openshell-alpha", + ); + expect(restoreRename).toBeDefined(); + expect(dockerStart).toHaveBeenCalledWith( + "openshell-alpha", + expect.objectContaining({ ignoreError: true }), + ); + expect( + dockerRm.mock.calls.some((call) => String(call[0]).includes("nemoclaw-gpu-backup")), + ).toBe(false); + }); + + it("reports rollback failure when restoring the backup container fails", () => { + const dockerCapture = vi.fn((args: readonly string[]) => { + if (args[0] === "ps") return "old-container-id\n"; + if (args[0] === "inspect") return JSON.stringify([inspectFixture()]); + if (args[0] === "info") return ""; + return ""; + }); + const dockerRun = vi.fn(() => ({ status: 0, stdout: "probe-id\n" })); + const dockerRunDetached = vi.fn(() => ({ status: 0, stdout: "new-container-id\n" })); + const dockerRename = vi.fn((oldName: string) => { + if (String(oldName).includes("nemoclaw-gpu-backup")) { + return { status: 1, stderr: "no such container" }; + } + return { status: 0 }; + }); + const dockerStop = vi.fn(() => ({ status: 0 })); + const dockerStart = vi.fn(() => ({ status: 0 })); + const dockerRm = vi.fn(() => ({ status: 0 })); + const runOpenshell = vi.fn(() => ({ status: 1, stderr: "supervisor unreachable" })); + const runCaptureOpenshell = vi.fn(() => ""); + + expect(() => + recreateOpenShellDockerSandboxWithGpu( + { sandboxName: "alpha", timeoutSecs: 1 }, + { + dockerCapture, + dockerRun, + dockerRunDetached, + dockerRename, + dockerStop, + dockerStart, + dockerRm, + runOpenshell, + runCaptureOpenshell, + sleep: vi.fn(), + now: () => new Date("2026-05-12T00:00:00Z"), + errorPhaseDebouncePolls: 1, + }, + ), + ).toThrow(/rollback failed; pre-patch sandbox was NOT restored/); + + expect(dockerStart).not.toHaveBeenCalled(); + }); + + it("reports rollback failure when restarting the backup container fails", () => { + const dockerCapture = vi.fn((args: readonly string[]) => { + if (args[0] === "ps") return "old-container-id\n"; + if (args[0] === "inspect") return JSON.stringify([inspectFixture()]); + if (args[0] === "info") return ""; + return ""; + }); + const dockerRun = vi.fn(() => ({ status: 0, stdout: "probe-id\n" })); + const dockerRunDetached = vi.fn(() => ({ status: 0, stdout: "new-container-id\n" })); + const dockerRename = vi.fn((_old: string, _next: string) => ({ status: 0 })); + const dockerStop = vi.fn(() => ({ status: 0 })); + const dockerStart = vi.fn(() => ({ status: 1, stderr: "container start failed" })); + const dockerRm = vi.fn((_name: string) => ({ status: 0 })); + const runOpenshell = vi.fn(() => ({ status: 1, stderr: "supervisor unreachable" })); + const runCaptureOpenshell = vi.fn(() => ""); + + expect(() => + recreateOpenShellDockerSandboxWithGpu( + { sandboxName: "alpha", timeoutSecs: 1 }, + { + dockerCapture, + dockerRun, + dockerRunDetached, + dockerRename, + dockerStop, + dockerStart, + dockerRm, + runOpenshell, + runCaptureOpenshell, + sleep: vi.fn(), + now: () => new Date("2026-05-12T00:00:00Z"), + errorPhaseDebouncePolls: 1, + }, + ), + ).toThrow(/rollback failed; pre-patch sandbox was NOT restored/); + + expect(dockerStart).toHaveBeenCalledWith( + "openshell-alpha", + expect.objectContaining({ ignoreError: true }), + ); + expect( + dockerRm.mock.calls.some((call) => String(call[0]).includes("nemoclaw-gpu-backup")), + ).toBe(false); + }); +}); diff --git a/src/lib/onboard/docker-gpu-patch.test.ts b/src/lib/onboard/docker-gpu-patch.test.ts index 34ca46fdde..cfc21b552c 100644 --- a/src/lib/onboard/docker-gpu-patch.test.ts +++ b/src/lib/onboard/docker-gpu-patch.test.ts @@ -588,145 +588,6 @@ describe("docker-gpu-patch", () => { ); }); - it("rolls back to the backup container when supervisor reconnect fails", () => { - const dockerCapture = vi.fn((args: readonly string[]) => { - if (args[0] === "ps") return "old-container-id\n"; - if (args[0] === "inspect") return JSON.stringify([inspectFixture()]); - if (args[0] === "info") return ""; - return ""; - }); - const dockerRun = vi.fn(() => ({ status: 0, stdout: "probe-id\n" })); - const dockerRunDetached = vi.fn(() => ({ status: 0, stdout: "new-container-id\n" })); - const dockerRename = vi.fn((_old: string, _next: string) => ({ status: 0 })); - const dockerStop = vi.fn(() => ({ status: 0 })); - const dockerStart = vi.fn(() => ({ status: 0 })); - const dockerRm = vi.fn((_name: string) => ({ status: 0 })); - const runOpenshell = vi.fn(() => ({ status: 1, stderr: "supervisor unreachable" })); - const runCaptureOpenshell = vi.fn(() => ""); - - expect(() => - recreateOpenShellDockerSandboxWithGpu( - { sandboxName: "alpha", timeoutSecs: 1 }, - { - dockerCapture, - dockerRun, - dockerRunDetached, - dockerRename, - dockerStop, - dockerStart, - dockerRm, - runOpenshell, - runCaptureOpenshell, - sleep: vi.fn(), - now: () => new Date("2026-05-12T00:00:00Z"), - errorPhaseDebouncePolls: 1, - }, - ), - ).toThrow(/pre-patch sandbox restored/); - - const restoreRename = dockerRename.mock.calls.find( - (call) => - String(call[0]).includes("nemoclaw-gpu-backup") && call[1] === "openshell-alpha", - ); - expect(restoreRename).toBeDefined(); - expect(dockerStart).toHaveBeenCalledWith( - "openshell-alpha", - expect.objectContaining({ ignoreError: true }), - ); - expect( - dockerRm.mock.calls.some((call) => String(call[0]).includes("nemoclaw-gpu-backup")), - ).toBe(false); - }); - - it("reports rollback failure when restoring the backup container fails", () => { - const dockerCapture = vi.fn((args: readonly string[]) => { - if (args[0] === "ps") return "old-container-id\n"; - if (args[0] === "inspect") return JSON.stringify([inspectFixture()]); - if (args[0] === "info") return ""; - return ""; - }); - const dockerRun = vi.fn(() => ({ status: 0, stdout: "probe-id\n" })); - const dockerRunDetached = vi.fn(() => ({ status: 0, stdout: "new-container-id\n" })); - const dockerRename = vi.fn((oldName: string) => { - if (String(oldName).includes("nemoclaw-gpu-backup")) { - return { status: 1, stderr: "no such container" }; - } - return { status: 0 }; - }); - const dockerStop = vi.fn(() => ({ status: 0 })); - const dockerStart = vi.fn(() => ({ status: 0 })); - const dockerRm = vi.fn(() => ({ status: 0 })); - const runOpenshell = vi.fn(() => ({ status: 1, stderr: "supervisor unreachable" })); - const runCaptureOpenshell = vi.fn(() => ""); - - expect(() => - recreateOpenShellDockerSandboxWithGpu( - { sandboxName: "alpha", timeoutSecs: 1 }, - { - dockerCapture, - dockerRun, - dockerRunDetached, - dockerRename, - dockerStop, - dockerStart, - dockerRm, - runOpenshell, - runCaptureOpenshell, - sleep: vi.fn(), - now: () => new Date("2026-05-12T00:00:00Z"), - errorPhaseDebouncePolls: 1, - }, - ), - ).toThrow(/rollback failed; pre-patch sandbox was NOT restored/); - - expect(dockerStart).not.toHaveBeenCalled(); - }); - - it("reports rollback failure when restarting the backup container fails", () => { - const dockerCapture = vi.fn((args: readonly string[]) => { - if (args[0] === "ps") return "old-container-id\n"; - if (args[0] === "inspect") return JSON.stringify([inspectFixture()]); - if (args[0] === "info") return ""; - return ""; - }); - const dockerRun = vi.fn(() => ({ status: 0, stdout: "probe-id\n" })); - const dockerRunDetached = vi.fn(() => ({ status: 0, stdout: "new-container-id\n" })); - const dockerRename = vi.fn((_old: string, _next: string) => ({ status: 0 })); - const dockerStop = vi.fn(() => ({ status: 0 })); - const dockerStart = vi.fn(() => ({ status: 1, stderr: "container start failed" })); - const dockerRm = vi.fn((_name: string) => ({ status: 0 })); - const runOpenshell = vi.fn(() => ({ status: 1, stderr: "supervisor unreachable" })); - const runCaptureOpenshell = vi.fn(() => ""); - - expect(() => - recreateOpenShellDockerSandboxWithGpu( - { sandboxName: "alpha", timeoutSecs: 1 }, - { - dockerCapture, - dockerRun, - dockerRunDetached, - dockerRename, - dockerStop, - dockerStart, - dockerRm, - runOpenshell, - runCaptureOpenshell, - sleep: vi.fn(), - now: () => new Date("2026-05-12T00:00:00Z"), - errorPhaseDebouncePolls: 1, - }, - ), - ).toThrow(/rollback failed; pre-patch sandbox was NOT restored/); - - expect(dockerStart).toHaveBeenCalledWith( - "openshell-alpha", - expect.objectContaining({ ignoreError: true }), - ); - expect( - dockerRm.mock.calls.some((call) => String(call[0]).includes("nemoclaw-gpu-backup")), - ).toBe(false); - }); - it("can recreate during sandbox create before supervisor exec is allowed", () => { const dockerCapture = vi.fn((args: readonly string[]) => { if (args[0] === "ps") return "old-container-id\n"; diff --git a/src/lib/onboard/docker-gpu-sandbox-create.test.ts b/src/lib/onboard/docker-gpu-sandbox-create.test.ts new file mode 100644 index 0000000000..281dbdbda7 --- /dev/null +++ b/src/lib/onboard/docker-gpu-sandbox-create.test.ts @@ -0,0 +1,228 @@ +// SPDX-FileCopyrightText: Copyright (c) 2026 NVIDIA CORPORATION & AFFILIATES. All rights reserved. +// SPDX-License-Identifier: Apache-2.0 + +import { afterEach, beforeEach, describe, expect, it, vi } from "vitest"; + +import type { + DockerGpuPatchFailureContext, + DockerGpuPatchResult, +} from "../../../dist/lib/onboard/docker-gpu-patch"; +import { createDockerGpuSandboxCreatePatch } from "../../../dist/lib/onboard/docker-gpu-sandbox-create"; + +function deferredCreateResult(): DockerGpuPatchResult { + return { + applied: true, + oldContainerId: "old-container-id", + newContainerId: "new-container-id", + originalName: "openshell-alpha", + backupContainerName: "openshell-alpha-nemoclaw-gpu-backup-1780491860342", + mode: { + kind: "gpus", + label: "--gpus all", + device: "all", + args: ["--gpus", "all"], + }, + backupRemoved: false, + }; +} + +function makeDeps() { + return { + runOpenshell: vi.fn(() => ({ status: 0 })), + runCaptureOpenshell: vi.fn(() => ""), + sleep: vi.fn(), + dockerCapture: vi.fn(() => ""), + }; +} + +describe("createDockerGpuSandboxCreatePatch composed flow", () => { + beforeEach(() => { + vi.spyOn(console, "log").mockImplementation(() => {}); + vi.spyOn(console, "error").mockImplementation(() => {}); + }); + + afterEach(() => { + vi.restoreAllMocks(); + }); + + it("defers backup removal until waitForSupervisorReconnectIfNeeded sees supervisorReady=true", () => { + const deps = makeDeps(); + const result = deferredCreateResult(); + const recreatePatch = vi.fn(() => result); + const waitForSupervisor = vi.fn(() => true); + const finalizeBackup = vi.fn(() => ({ backupRemoved: true, rolledBack: false })); + const onPatchFailureExit = vi.fn(); + const findContainerIds = vi.fn(() => ["existing-container"]); + + const patch = createDockerGpuSandboxCreatePatch({ + enabled: true, + sandboxName: "alpha", + timeoutSecs: 60, + deps, + overrides: { + findContainerIds, + recreatePatch, + waitForSupervisor, + finalizeBackup, + onPatchFailureExit, + }, + }); + + patch.maybeApplyDuringCreate(); + expect(recreatePatch).toHaveBeenCalledWith( + expect.objectContaining({ waitForSupervisor: false }), + expect.objectContaining({ runCaptureOpenshell: deps.runCaptureOpenshell }), + ); + // Critical invariant: the patch helper must NOT remove the backup during + // create (recreatePatch was called with waitForSupervisor: false; the + // result still carries backupRemoved=false). + expect(finalizeBackup).not.toHaveBeenCalled(); + + patch.waitForSupervisorReconnectIfNeeded(); + expect(waitForSupervisor).toHaveBeenCalledTimes(1); + expect(finalizeBackup).toHaveBeenCalledTimes(1); + expect(finalizeBackup).toHaveBeenCalledWith( + { result, supervisorReady: true }, + deps, + ); + expect(onPatchFailureExit).not.toHaveBeenCalled(); + }); + + it("rolls back to the backup container and surfaces rolledBack=true diagnostics when supervisorReady=false", () => { + const deps = makeDeps(); + const result = deferredCreateResult(); + const recreatePatch = vi.fn(() => result); + const waitForSupervisor = vi.fn(() => false); + const finalizeBackup = vi.fn(() => ({ backupRemoved: false, rolledBack: true })); + const onPatchFailureExit = vi.fn(); + const findContainerIds = vi.fn(() => ["existing-container"]); + + const patch = createDockerGpuSandboxCreatePatch({ + enabled: true, + sandboxName: "alpha", + timeoutSecs: 60, + deps, + overrides: { + findContainerIds, + recreatePatch, + waitForSupervisor, + finalizeBackup, + onPatchFailureExit, + }, + }); + + patch.maybeApplyDuringCreate(); + patch.waitForSupervisorReconnectIfNeeded(); + + expect(finalizeBackup).toHaveBeenCalledWith( + { result, supervisorReady: false }, + deps, + ); + expect(onPatchFailureExit).toHaveBeenCalledTimes(1); + const [sandboxName, error, exitDeps] = onPatchFailureExit.mock.calls[0]; + expect(sandboxName).toBe("alpha"); + expect((error as Error).message).toMatch(/pre-patch sandbox restored/); + const context = (exitDeps as { context: DockerGpuPatchFailureContext }).context; + expect(context.rolledBack).toBe(true); + expect(context.newContainerId).toBe("new-container-id"); + expect(context.backupContainerName).toBe(result.backupContainerName); + }); + + it("reports rolledBack=false in diagnostics when rollback itself fails", () => { + const deps = makeDeps(); + const result = deferredCreateResult(); + const recreatePatch = vi.fn(() => result); + const waitForSupervisor = vi.fn(() => false); + const finalizeBackup = vi.fn(() => ({ backupRemoved: false, rolledBack: false })); + const onPatchFailureExit = vi.fn(); + const findContainerIds = vi.fn(() => ["existing-container"]); + + const patch = createDockerGpuSandboxCreatePatch({ + enabled: true, + sandboxName: "alpha", + timeoutSecs: 60, + deps, + overrides: { + findContainerIds, + recreatePatch, + waitForSupervisor, + finalizeBackup, + onPatchFailureExit, + }, + }); + + patch.maybeApplyDuringCreate(); + patch.waitForSupervisorReconnectIfNeeded(); + + expect(onPatchFailureExit).toHaveBeenCalledTimes(1); + const [, error, exitDeps] = onPatchFailureExit.mock.calls[0]; + expect((error as Error).message).toMatch(/rollback failed; pre-patch sandbox was NOT restored/); + const context = (exitDeps as { context: DockerGpuPatchFailureContext }).context; + expect(context.rolledBack).toBe(false); + }); + + it("skips both apply and supervisor wait when no OpenShell container is found", () => { + const deps = makeDeps(); + const recreatePatch = vi.fn(); + const waitForSupervisor = vi.fn(); + const finalizeBackup = vi.fn(); + const onPatchFailureExit = vi.fn(); + const findContainerIds = vi.fn(() => []); + + const patch = createDockerGpuSandboxCreatePatch({ + enabled: true, + sandboxName: "alpha", + timeoutSecs: 60, + deps, + overrides: { + findContainerIds, + recreatePatch, + waitForSupervisor, + finalizeBackup, + onPatchFailureExit, + }, + }); + + patch.maybeApplyDuringCreate(); + patch.waitForSupervisorReconnectIfNeeded(); + + expect(recreatePatch).not.toHaveBeenCalled(); + expect(waitForSupervisor).not.toHaveBeenCalled(); + expect(finalizeBackup).not.toHaveBeenCalled(); + expect(onPatchFailureExit).not.toHaveBeenCalled(); + }); + + it("records patchError when recreate throws and exitOnPatchError reports it via printDockerGpuPatchFailureAndExit", () => { + const deps = makeDeps(); + const recreatePatch = vi.fn(() => { + throw new Error("docker rename failed"); + }); + const waitForSupervisor = vi.fn(); + const finalizeBackup = vi.fn(); + const onPatchFailureExit = vi.fn(); + const findContainerIds = vi.fn(() => ["existing-container"]); + + const patch = createDockerGpuSandboxCreatePatch({ + enabled: true, + sandboxName: "alpha", + timeoutSecs: 60, + deps, + overrides: { + findContainerIds, + recreatePatch, + waitForSupervisor, + finalizeBackup, + onPatchFailureExit, + }, + }); + + patch.maybeApplyDuringCreate(); + expect(patch.createFailureMessage()).toMatch(/Docker GPU patch failed/); + patch.exitOnPatchError(); + expect(onPatchFailureExit).toHaveBeenCalledTimes(1); + // Supervisor wait must be skipped because needsSupervisorWait stayed false. + patch.waitForSupervisorReconnectIfNeeded(); + expect(waitForSupervisor).not.toHaveBeenCalled(); + expect(finalizeBackup).not.toHaveBeenCalled(); + }); +}); diff --git a/src/lib/onboard/docker-gpu-sandbox-create.ts b/src/lib/onboard/docker-gpu-sandbox-create.ts index 452836a867..44bd1fba91 100644 --- a/src/lib/onboard/docker-gpu-sandbox-create.ts +++ b/src/lib/onboard/docker-gpu-sandbox-create.ts @@ -28,6 +28,12 @@ type DockerGpuSandboxCreateDeps = Pick< "runOpenshell" | "runCaptureOpenshell" | "sleep" | "dockerCapture" >; +type RecreatePatchFn = typeof recreateOpenShellDockerSandboxWithGpu; +type WaitSupervisorFn = typeof waitForOpenShellSupervisorReconnect; +type FindContainerIdsFn = typeof findOpenShellDockerSandboxContainerIds; +type FinalizeBackupFn = typeof finalizeDockerGpuPatchBackup; +type PatchFailureExitFn = typeof printDockerGpuPatchFailureAndExit; + type DockerGpuSandboxCreatePatchOptions = { enabled: boolean; sandboxName: string; @@ -36,6 +42,19 @@ type DockerGpuSandboxCreatePatchOptions = { timeoutSecs: number; backend?: DockerGpuPatchBackend; deps: DockerGpuSandboxCreateDeps; + /** + * Test seams. The production composition uses the canonical + * `docker-gpu-patch`/`docker-gpu-patch-finalize` exports; tests substitute + * lightweight mocks to drive the deferred-finalize sequence without + * standing up the full Docker recreate plumbing. + */ + overrides?: { + findContainerIds?: FindContainerIdsFn; + recreatePatch?: RecreatePatchFn; + waitForSupervisor?: WaitSupervisorFn; + finalizeBackup?: FinalizeBackupFn; + onPatchFailureExit?: PatchFailureExitFn; + }; }; type DockerGpuSandboxConfig = { @@ -81,6 +100,15 @@ export function createDockerGpuSandboxCreatePatch( let patchError: unknown = null; let needsSupervisorWait = false; + const findContainerIds = + options.overrides?.findContainerIds ?? findOpenShellDockerSandboxContainerIds; + const recreatePatch = options.overrides?.recreatePatch ?? recreateOpenShellDockerSandboxWithGpu; + const waitForSupervisor = + options.overrides?.waitForSupervisor ?? waitForOpenShellSupervisorReconnect; + const finalizeBackup = options.overrides?.finalizeBackup ?? finalizeDockerGpuPatchBackup; + const onPatchFailureExit = + options.overrides?.onPatchFailureExit ?? printDockerGpuPatchFailureAndExit; + const applyOptions = { sandboxName: options.sandboxName, gpuDevice: options.gpuDevice, @@ -92,13 +120,13 @@ export function createDockerGpuSandboxCreatePatch( return { maybeApplyDuringCreate() { if (!options.enabled || result || patchError) return; - const containerIds = findOpenShellDockerSandboxContainerIds(options.sandboxName); + const containerIds = findContainerIds(options.sandboxName); if (containerIds.length === 0) return; console.log( " OpenShell Docker container detected; recreating it with NVIDIA GPU access before readiness wait...", ); try { - result = recreateOpenShellDockerSandboxWithGpu( + result = recreatePatch( { ...applyOptions, waitForSupervisor: false }, { runCaptureOpenshell: options.deps.runCaptureOpenshell, sleep: options.deps.sleep }, ); @@ -116,7 +144,7 @@ export function createDockerGpuSandboxCreatePatch( exitOnPatchError() { if (!patchError) return; - printDockerGpuPatchFailureAndExit(options.sandboxName, patchError, { + onPatchFailureExit(options.sandboxName, patchError, { runCaptureOpenshell: options.deps.runCaptureOpenshell, dockerCapture: options.deps.dockerCapture, }); @@ -135,7 +163,7 @@ export function createDockerGpuSandboxCreatePatch( console.log( ` Waiting for OpenShell supervisor to reconnect to the GPU-enabled container (up to ${supervisorReconnectTimeoutSecs}s)...`, ); - const supervisorReady = waitForOpenShellSupervisorReconnect( + const supervisorReady = waitForSupervisor( options.sandboxName, supervisorReconnectTimeoutSecs, { @@ -149,7 +177,7 @@ export function createDockerGpuSandboxCreatePatch( }, ); const finalizeOutcome = result - ? finalizeDockerGpuPatchBackup({ result, supervisorReady }, options.deps) + ? finalizeBackup({ result, supervisorReady }, options.deps) : null; if (supervisorReady) return; const failureMessage = (() => { @@ -160,7 +188,7 @@ export function createDockerGpuSandboxCreatePatch( ? "OpenShell supervisor did not reconnect to the GPU-enabled container; pre-patch sandbox restored." : "OpenShell supervisor did not reconnect to the GPU-enabled container and rollback failed; pre-patch sandbox was NOT restored."; })(); - printDockerGpuPatchFailureAndExit( + onPatchFailureExit( options.sandboxName, new Error(failureMessage), { diff --git a/test/nemoclaw-start.test.ts b/test/nemoclaw-start.test.ts index 5559e39945..d00f232406 100644 --- a/test/nemoclaw-start.test.ts +++ b/test/nemoclaw-start.test.ts @@ -4906,4 +4906,3 @@ describe("direct-root entrypoint composition under CAP_DAC_OVERRIDE drop", () => } }); }); - diff --git a/test/sandbox-provisioning.test.ts b/test/sandbox-provisioning.test.ts index e513d909a7..eeac9ab3ce 100644 --- a/test/sandbox-provisioning.test.ts +++ b/test/sandbox-provisioning.test.ts @@ -779,6 +779,7 @@ describe("sandbox provisioning: copied OpenClaw helper permissions (#2861)", () path.join(localBin, "nemoclaw-codex-acp"), path.join(localLib, "sandbox-init.sh"), path.join(localLib, "openclaw_device_approval_policy.py"), + path.join(localLib, "clean_runtime_shell_env_shim.py"), path.join(localLib, "generate-openclaw-config.mts"), path.join(localLib, "openclaw-build-messaging-plugins.py"), path.join(localLib, "seed-wechat-accounts.py"), diff --git a/test/service-env.test.ts b/test/service-env.test.ts index 555b055364..2730df3599 100644 --- a/test/service-env.test.ts +++ b/test/service-env.test.ts @@ -816,6 +816,83 @@ describe("service environment", () => { } }); + // Reproduces the #4713 deployment failure shape end-to-end in the startup + // sequence: write_runtime_shell_env emits the proxy env file with mode 444, + // ensure_runtime_shell_env_shim then sees a foreign-owned .bashrc and must + // exit 0 (otherwise the entrypoint would terminate the container with + // exit code 1 as reported). The composed assertion proves the legacy + // trust-boundary file remains non-user-writable across the skip path. + it("composed startup leaves the proxy env file at mode 444 when the rc cleanup skips a foreign-owned rc file (#4713)", () => { + const fakeDataDir = mkdtempSync(join(tmpdir(), "nemoclaw-rc-skip-composed-")); + const fakeHome = mkdtempSync(join(tmpdir(), "nemoclaw-rc-skip-home-")); + const proxyEnvPath = join(fakeDataDir, "proxy-env.sh"); + const rcPath = join(fakeHome, ".bashrc"); + const tmpFile = join(tmpdir(), `nemoclaw-rc-skip-composed-${process.pid}.sh`); + try { + const shimLine = `[ -f ${proxyEnvPath} ] && . ${proxyEnvPath}`; + const originalBashrc = [ + "# user-managed bashrc owned by a foreign uid (e.g. root)", + "# Source runtime proxy config", + shimLine, + "export PATH=/usr/local/bin:$PATH", + "", + ].join("\n"); + writeFileSync(rcPath, originalBashrc, { mode: 0o644 }); + + const persistBlock = extractRuntimeShellEnvSnippet() + .trimEnd() + .replaceAll("/tmp/nemoclaw-proxy-env.sh", proxyEnvPath); + // Foreign uid that does not match the test-runner's actual file owner. + // Overriding `id -u` for the bash function-level shim invocation is + // the cheapest way to drive the "uid != owner" branch without root. + const foreignUid = (process.getuid?.() ?? 1000) + 99999; + const wrapper = [ + "#!/usr/bin/env bash", + "set -euo pipefail", + sandboxInitSource, + 'PROXY_HOST="10.200.0.1"', + 'PROXY_PORT="3128"', + '_PROXY_URL="http://${PROXY_HOST}:${PROXY_PORT}"', + '_NO_PROXY_VAL="localhost,127.0.0.1,::1,${PROXY_HOST}"', + "_TOOL_REDIRECTS=()", + `_AXIOS_FIX_SCRIPT="/nonexistent/axios-proxy-fix.js"`, + `_SANDBOX_HOME=${JSON.stringify(fakeHome)}`, + `_RUNTIME_SHELL_ENV_FILE=${JSON.stringify(proxyEnvPath)}`, + `_RUNTIME_SHELL_ENV_SHIM="[ -f \${_RUNTIME_SHELL_ENV_FILE} ] && . \${_RUNTIME_SHELL_ENV_FILE}"`, + rcShimWrapperHeader(), + // Override `id -u` BEFORE the entrypoint snippets are sourced so the + // function-shadow is in place when both write_runtime_shell_env and + // ensure_runtime_shell_env_shim consult `$(id -u)`. + `id() { case "\${1:-}" in -u) echo ${foreignUid};; *) command id "$@";; esac; }`, + "set +u", + persistBlock, + extractRuntimeShellEnvShimSnippet(), + "validate_tmp_permissions " + JSON.stringify(proxyEnvPath), + ].join("\n"); + writeFileSync(tmpFile, wrapper, { mode: 0o700 }); + const result = execFileSync("bash", [tmpFile], { encoding: "utf-8" }); + + expect(result).not.toContain("[SECURITY] " + proxyEnvPath + " has unsafe permissions"); + + const finalMode = (lstatSync(proxyEnvPath).mode & 0o777).toString(8); + expect(finalMode).toBe("444"); + + const rcAfter = readFileSync(rcPath, "utf-8"); + expect(rcAfter).toBe(originalBashrc); + } finally { + try { + unlinkSync(tmpFile); + } catch { + /* ignore */ + } + try { + execFileSync("rm", ["-rf", fakeDataDir, fakeHome]); + } catch { + /* ignore */ + } + } + }); + it("entrypoint overwrites proxy-env.sh cleanly on repeated invocations", () => { const fakeDataDir = join(tmpdir(), `nemoclaw-idempotent-test-${process.pid}`); execFileSync("mkdir", ["-p", fakeDataDir]); From 4748d844a8df1808bfe833fe6b8691699726f684 Mon Sep 17 00:00:00 2001 From: Tinson Lai Date: Fri, 5 Jun 2026 09:54:19 +0000 Subject: [PATCH 09/11] fix(onboard): harden GPU patch finalisation and rc-cleanup helper against advisor findings Signed-off-by: Tinson Lai --- scripts/lib/clean_runtime_shell_env_shim.py | 17 +++++ scripts/nemoclaw-start.sh | 20 +++++- .../onboard/docker-gpu-patch-finalize.test.ts | 16 +++++ src/lib/onboard/docker-gpu-patch-finalize.ts | 64 +++++++++++++++-- src/lib/onboard/docker-gpu-patch.ts | 72 +++++++------------ src/lib/onboard/docker-gpu-sandbox-create.ts | 9 ++- .../docker-gpu-supervisor-reconnect.ts | 23 ++++++ 7 files changed, 167 insertions(+), 54 deletions(-) diff --git a/scripts/lib/clean_runtime_shell_env_shim.py b/scripts/lib/clean_runtime_shell_env_shim.py index f6adfc9e49..6950ee1d19 100644 --- a/scripts/lib/clean_runtime_shell_env_shim.py +++ b/scripts/lib/clean_runtime_shell_env_shim.py @@ -20,6 +20,23 @@ Invocation (from nemoclaw-start.sh): python3 clean_runtime_shell_env_shim.py + +Source-of-truth: this script is a backwards-compatibility skip path. The +invalid state it tolerates is "legacy base image planted a runtime shim into +an rc file owned by a different uid than the entrypoint currently runs as". +The preferred source boundary is the base image build: newer base images +either own the rc file as the entrypoint user or do not plant the shim at +all. The reason the source cannot be fixed in this PR is that previously +shipped sandboxes already have the mismatched-owner rc files on disk; +crashing the entrypoint with exit code 1 on them (the pre-fix behaviour +that surfaced as #4713) is strictly worse than logging and skipping. +Regression test: test/clean-runtime-shell-env-shim.test.ts (direct fixture +asserts skip path) and test/service-env.test.ts (composed startup invariant +asserting /tmp/nemoclaw-proxy-env.sh stays mode 444). Removal condition: +when no supported NemoClaw release ships with a base image that plants the +legacy shim AND every reachable sandbox has been rebuilt off a newer base +image, drop the mismatched-owner branch and have the script exit 1 on EPERM +again. """ import errno diff --git a/scripts/nemoclaw-start.sh b/scripts/nemoclaw-start.sh index 630e487332..1032ef971f 100755 --- a/scripts/nemoclaw-start.sh +++ b/scripts/nemoclaw-start.sh @@ -2451,9 +2451,25 @@ GUARDENVEOF ensure_runtime_shell_env_shim() { local failed=0 local rc_file - local clean_script="${NEMOCLAW_RC_CLEAN_SCRIPT:-/usr/local/lib/nemoclaw/clean_runtime_shell_env_shim.py}" + # Resolution order is deliberately fixed: the immutable installed helper at + # /usr/local/lib/nemoclaw/ ALWAYS wins when present. That path is set up + # by the Dockerfile, chmod 644, root-owned (or build-time owned), and lives + # under a system directory the sandbox user cannot write to. We refuse to + # honour any environment-supplied override when that file is in place so a + # malicious envvar cannot swap in arbitrary Python. + # + # The NEMOCLAW_RC_CLEAN_SCRIPT override is consulted ONLY when the installed + # helper is missing — i.e. running the unit-test wrappers against the + # repository tree, where the script lives at scripts/lib/ instead. + # The final fallback resolves the script relative to nemoclaw-start.sh so + # `bash scripts/nemoclaw-start.sh` works out-of-the-box for ad-hoc dev runs. + local clean_script="/usr/local/lib/nemoclaw/clean_runtime_shell_env_shim.py" if [ ! -f "$clean_script" ]; then - clean_script="$(cd "$(dirname "${BASH_SOURCE[0]}")" && pwd)/lib/clean_runtime_shell_env_shim.py" + if [ -n "${NEMOCLAW_RC_CLEAN_SCRIPT:-}" ] && [ -f "${NEMOCLAW_RC_CLEAN_SCRIPT}" ]; then + clean_script="${NEMOCLAW_RC_CLEAN_SCRIPT}" + else + clean_script="$(cd "$(dirname "${BASH_SOURCE[0]}")" && pwd)/lib/clean_runtime_shell_env_shim.py" + fi fi for rc_file in "${_SANDBOX_HOME}/.bashrc" "${_SANDBOX_HOME}/.profile"; do diff --git a/src/lib/onboard/docker-gpu-patch-finalize.test.ts b/src/lib/onboard/docker-gpu-patch-finalize.test.ts index e3e088bd61..d21edb3092 100644 --- a/src/lib/onboard/docker-gpu-patch-finalize.test.ts +++ b/src/lib/onboard/docker-gpu-patch-finalize.test.ts @@ -93,4 +93,20 @@ describe("finalizeDockerGpuPatchBackup", () => { expect(outcome).toEqual({ backupRemoved: true, rolledBack: false }); expect(dockerRm).not.toHaveBeenCalled(); }); + + it("reports backupRemoved=false when supervisor reconnect succeeded but docker rm of the backup failed", () => { + const dockerRm = vi.fn((_name: string) => ({ + status: 1, + stderr: "Error response from daemon: container is in use", + })); + const outcome = finalizeDockerGpuPatchBackup( + { result: deferredCreateResult(), supervisorReady: true }, + { dockerRm }, + ); + expect(outcome).toEqual({ backupRemoved: false, rolledBack: false }); + expect(dockerRm).toHaveBeenCalledWith( + "openshell-alpha-nemoclaw-gpu-backup-1780491860342", + expect.objectContaining({ ignoreError: true }), + ); + }); }); diff --git a/src/lib/onboard/docker-gpu-patch-finalize.ts b/src/lib/onboard/docker-gpu-patch-finalize.ts index 1a09823697..c4f49ca9ba 100644 --- a/src/lib/onboard/docker-gpu-patch-finalize.ts +++ b/src/lib/onboard/docker-gpu-patch-finalize.ts @@ -1,5 +1,27 @@ // SPDX-FileCopyrightText: Copyright (c) 2026 NVIDIA CORPORATION & AFFILIATES. All rights reserved. // SPDX-License-Identifier: Apache-2.0 +// +// Source-of-truth: this module is a NemoClaw-side workaround. The invalid +// state it recovers from is "OpenShell Docker-driver GPU patch left the +// sandbox in a deleted-backup / failed-new state when the post-recreate +// supervisor reconnect could not confirm the GPU container". The preferred +// source boundary for the fix is OpenShell: a Docker-driver sandbox create +// that natively accepts NVIDIA GPU access would remove the need for the +// post-create container recreation NemoClaw performs here. Until OpenShell +// supports that natively, NemoClaw recreates the container with GPU access +// and uses this module to either confirm the new container or restore the +// pre-patch backup. Regression coverage: +// * src/lib/onboard/docker-gpu-patch-finalize.test.ts — direct unit tests +// for finalize success / rollback / no-op / rollback failure outcomes. +// * src/lib/onboard/docker-gpu-patch-rollback.test.ts — composed +// recreate-with-rollback scenarios. +// * src/lib/onboard/docker-gpu-sandbox-create.test.ts — composed create +// flow driving maybeApplyDuringCreate → waitForSupervisorReconnect → +// finalizeBackup. +// Removal condition: when OpenShell supports native Docker-driver GPU +// creation/reconnect, drop the NemoClaw post-create container recreation +// and delete this module along with its callers in docker-gpu-patch.ts and +// docker-gpu-sandbox-create.ts. import { dockerRename as defaultDockerRename, @@ -91,8 +113,16 @@ export function finalizeDockerGpuPatchBackup( return { backupRemoved: true, rolledBack: false }; } if (options.supervisorReady) { - resolved.dockerRm(options.result.backupContainerName, containerOpts); - return { backupRemoved: true, rolledBack: false }; + // Backup removal is best-effort: the supervisor probe already confirmed + // the new GPU container is reachable, so the backup is no longer needed + // even if `docker rm` cannot delete it (e.g. concurrent admin action, + // daemon timeout). Reflect the actual rm status in the outcome so + // diagnostics can flag a leaked backup container. + const rmResult = resolved.dockerRm( + options.result.backupContainerName, + containerOpts, + ); + return { backupRemoved: isZeroStatus(rmResult), rolledBack: false }; } const rolledBack = rollbackToBackupContainer( { @@ -105,9 +135,33 @@ export function finalizeDockerGpuPatchBackup( return { backupRemoved: false, rolledBack }; } -export function rollbackPatchToBackup( +export type SupervisorReconnectOutcome = + | { execReady: true } + | { execReady: false; rolledBack: boolean; error: Error }; + +export function reconcileSupervisorReconnect( + execReady: boolean, refs: { newContainerId: string; backupContainerName: string; originalName: string }, deps: DockerGpuPatchDeps, -): boolean { - return rollbackToBackupContainer(refs, resolveRollbackDeps(deps)); +): SupervisorReconnectOutcome { + const resolved = resolveRollbackDeps(deps); + const containerOpts = { + ignoreError: true, + suppressOutput: true, + timeout: DOCKER_GPU_PATCH_TIMEOUT_MS, + }; + if (execReady) { + resolved.dockerRm(refs.backupContainerName, containerOpts); + return { execReady: true }; + } + const rolledBack = rollbackToBackupContainer(refs, resolved); + return { + execReady: false, + rolledBack, + error: new Error( + rolledBack + ? "OpenShell supervisor did not reconnect to the GPU-enabled container; pre-patch sandbox restored." + : "OpenShell supervisor did not reconnect to the GPU-enabled container and rollback failed; pre-patch sandbox was NOT restored.", + ), + }; } diff --git a/src/lib/onboard/docker-gpu-patch.ts b/src/lib/onboard/docker-gpu-patch.ts index 704a543777..890f1ac84c 100644 --- a/src/lib/onboard/docker-gpu-patch.ts +++ b/src/lib/onboard/docker-gpu-patch.ts @@ -14,7 +14,7 @@ import { dockerRunDetached, dockerStop, } from "../adapters/docker"; -import { rollbackPatchToBackup } from "./docker-gpu-patch-finalize"; +import { reconcileSupervisorReconnect } from "./docker-gpu-patch-finalize"; import { type DockerGpuSupervisorReconnectDeps, DOCKER_GPU_SUPERVISOR_RECONNECT_ERROR_DEBOUNCE_ENV, @@ -971,58 +971,38 @@ export function recreateOpenShellDockerSandboxWithGpu( } context.newContainerId = newContainerId; - // When the caller defers the supervisor reconnect wait, leave the backup - // container in place so the caller can either remove it via - // `finalizeDockerGpuPatchBackup` with `supervisorReady: true` once its own - // probe confirms, or roll back to it on failure. Removing the backup - // here would strand the user with a deleted-backup / failed-new sandbox - // if the deferred reconnect then fails. - if (options.waitForSupervisor === false) { - return { - applied: true, - oldContainerId, - newContainerId, - originalName, - backupContainerName, - mode: selection.mode, - backupRemoved: false, - }; - } + const selectedMode = selection.mode; + const buildPatchResult = (backupRemoved: boolean): DockerGpuPatchResult => ({ + applied: true, + oldContainerId, + newContainerId, + originalName, + backupContainerName, + mode: selectedMode, + backupRemoved, + }); + + // Deferred: caller will run the supervisor wait and call + // `finalizeDockerGpuPatchBackup` (success → remove the backup, failure → + // roll back to it). Removing the backup here would strand the user with + // a deleted-backup / failed-new sandbox if the deferred reconnect fails. + if (options.waitForSupervisor === false) return buildPatchResult(false); const execReady = waitForOpenShellSupervisorReconnect( options.sandboxName, options.timeoutSecs ?? DOCKER_GPU_PATCH_WAIT_SECS, deps, ); - - if (!execReady) { - const rolledBack = rollbackPatchToBackup( - { newContainerId, backupContainerName, originalName }, - deps, - ); - context.rolledBack = rolledBack; - throw new Error( - rolledBack - ? "OpenShell supervisor did not reconnect to the GPU-enabled container; pre-patch sandbox restored." - : "OpenShell supervisor did not reconnect to the GPU-enabled container and rollback failed; pre-patch sandbox was NOT restored.", - ); + const reconcile = reconcileSupervisorReconnect( + execReady, + { newContainerId, backupContainerName, originalName }, + deps, + ); + if (!reconcile.execReady) { + context.rolledBack = reconcile.rolledBack; + throw reconcile.error; } - - d.dockerRm(backupContainerName, { - ignoreError: true, - suppressOutput: true, - timeout: DOCKER_GPU_PATCH_TIMEOUT_MS, - }); - - return { - applied: true, - oldContainerId, - newContainerId, - originalName, - backupContainerName, - mode: selection.mode, - backupRemoved: true, - }; + return buildPatchResult(true); } catch (error) { const err = error instanceof Error ? error : new Error(String(error)); throw decoratePatchError(err, context); diff --git a/src/lib/onboard/docker-gpu-sandbox-create.ts b/src/lib/onboard/docker-gpu-sandbox-create.ts index 44bd1fba91..2ae979c2b2 100644 --- a/src/lib/onboard/docker-gpu-sandbox-create.ts +++ b/src/lib/onboard/docker-gpu-sandbox-create.ts @@ -32,7 +32,14 @@ type RecreatePatchFn = typeof recreateOpenShellDockerSandboxWithGpu; type WaitSupervisorFn = typeof waitForOpenShellSupervisorReconnect; type FindContainerIdsFn = typeof findOpenShellDockerSandboxContainerIds; type FinalizeBackupFn = typeof finalizeDockerGpuPatchBackup; -type PatchFailureExitFn = typeof printDockerGpuPatchFailureAndExit; +// Loosen the override return type from `never` to `void` so tests can pass a +// plain `vi.fn()` mock. Production wires `printDockerGpuPatchFailureAndExit` +// which has return type `never`; that is assignable to `void`. +type PatchFailureExitFn = ( + sandboxName: string, + error: unknown, + deps: Parameters[2], +) => void; type DockerGpuSandboxCreatePatchOptions = { enabled: boolean; diff --git a/src/lib/onboard/docker-gpu-supervisor-reconnect.ts b/src/lib/onboard/docker-gpu-supervisor-reconnect.ts index 7817f71d80..61ceb15d5a 100644 --- a/src/lib/onboard/docker-gpu-supervisor-reconnect.ts +++ b/src/lib/onboard/docker-gpu-supervisor-reconnect.ts @@ -34,6 +34,29 @@ const DOCKER_GPU_SUPERVISOR_RECONNECT_MIN_SECS = 900; // whose sandbox list cache divergence was observed at ~12s in the original // repro. Hosts that genuinely crashed on startup hit the rollback path in // applyDockerGpuPatch rather than waiting out the full window. +// +// Design choice for #4664: that issue's second suggested fix proposes that +// when the patched Docker container reports Status=running + Health=starting, +// reconnect should keep retrying rather than declaring fatal. We deliberately +// chose the alternative path — generic Error-phase debounce wide enough to +// absorb the observed sandbox-list cache divergence window, plus a +// guaranteed rollback to the pre-patch backup container when reconnect still +// cannot confirm. The reasons: +// 1. The patched container's Health depends on the OpenShell supervisor +// script, which is exactly the thing the reconnect wait is observing +// via `openshell sandbox list`. Branching on Docker Health here would +// either reduplicate the same signal or trust an orthogonal one +// (`docker inspect`) that can lag the supervisor's view by several +// seconds — defeating the determinism of the existing classifier. +// 2. The rollback path (docker-gpu-patch-finalize.ts) guarantees the user +// keeps the pre-patch CPU sandbox when reconnect cannot confirm. Under +// the debounce-plus-rollback alternative, slow hosts get extra time AND +// catastrophic failures get a recoverable sandbox instead of a deleted +// backup. +// If a future repro shows Status=running + Health=starting genuinely failing +// reconnect after this 30s window, switch the strategy to a Health-aware +// retry — but only after extracting Docker health probing into a separate +// observation channel rather than overloading this one. const DOCKER_GPU_SUPERVISOR_RECONNECT_ERROR_PHASE_DEFAULT_DEBOUNCE_POLLS = 15; export const DOCKER_GPU_SUPERVISOR_RECONNECT_TIMEOUT_ENV = From 756d64c27d39ee118748e9f85e0e83d1637213cb Mon Sep 17 00:00:00 2001 From: Tinson Lai Date: Fri, 5 Jun 2026 11:18:03 +0000 Subject: [PATCH 10/11] fix(onboard): track docker rm status through reconnect success path and drop issue refs from comments Signed-off-by: Tinson Lai --- scripts/lib/clean_runtime_shell_env_shim.py | 19 +++++------ src/lib/onboard/docker-gpu-patch-finalize.ts | 11 ++++-- src/lib/onboard/docker-gpu-patch.ts | 2 +- .../docker-gpu-supervisor-reconnect.ts | 34 +++++++------------ test/clean-runtime-shell-env-shim.test.ts | 7 ++-- test/service-env.test.ts | 14 ++++---- 6 files changed, 39 insertions(+), 48 deletions(-) diff --git a/scripts/lib/clean_runtime_shell_env_shim.py b/scripts/lib/clean_runtime_shell_env_shim.py index 6950ee1d19..37ed113e95 100644 --- a/scripts/lib/clean_runtime_shell_env_shim.py +++ b/scripts/lib/clean_runtime_shell_env_shim.py @@ -26,17 +26,14 @@ an rc file owned by a different uid than the entrypoint currently runs as". The preferred source boundary is the base image build: newer base images either own the rc file as the entrypoint user or do not plant the shim at -all. The reason the source cannot be fixed in this PR is that previously -shipped sandboxes already have the mismatched-owner rc files on disk; -crashing the entrypoint with exit code 1 on them (the pre-fix behaviour -that surfaced as #4713) is strictly worse than logging and skipping. -Regression test: test/clean-runtime-shell-env-shim.test.ts (direct fixture -asserts skip path) and test/service-env.test.ts (composed startup invariant -asserting /tmp/nemoclaw-proxy-env.sh stays mode 444). Removal condition: -when no supported NemoClaw release ships with a base image that plants the -legacy shim AND every reachable sandbox has been rebuilt off a newer base -image, drop the mismatched-owner branch and have the script exit 1 on EPERM -again. +all. Previously shipped sandboxes already have the mismatched-owner rc +files on disk; crashing the entrypoint with exit code 1 on them is strictly +worse than logging and skipping. Regression tests cover the direct fixture +skip path and the composed startup invariant asserting +/tmp/nemoclaw-proxy-env.sh stays mode 444. Removal condition: when no +supported release ships a base image that plants the legacy shim AND every +reachable sandbox has been rebuilt off a newer base image, drop the +mismatched-owner branch and have the script exit 1 on EPERM again. """ import errno diff --git a/src/lib/onboard/docker-gpu-patch-finalize.ts b/src/lib/onboard/docker-gpu-patch-finalize.ts index c4f49ca9ba..45fc1761d6 100644 --- a/src/lib/onboard/docker-gpu-patch-finalize.ts +++ b/src/lib/onboard/docker-gpu-patch-finalize.ts @@ -136,7 +136,7 @@ export function finalizeDockerGpuPatchBackup( } export type SupervisorReconnectOutcome = - | { execReady: true } + | { execReady: true; backupRemoved: boolean } | { execReady: false; rolledBack: boolean; error: Error }; export function reconcileSupervisorReconnect( @@ -151,8 +151,13 @@ export function reconcileSupervisorReconnect( timeout: DOCKER_GPU_PATCH_TIMEOUT_MS, }; if (execReady) { - resolved.dockerRm(refs.backupContainerName, containerOpts); - return { execReady: true }; + // Backup removal is best-effort here too: the supervisor probe already + // confirmed the new container is reachable, so a failed rm leaves a + // leaked backup container but the user-visible sandbox is healthy. + // Surface the actual rm status so callers can fold it into diagnostics + // alongside the deferred-finalize path in `finalizeDockerGpuPatchBackup`. + const rmResult = resolved.dockerRm(refs.backupContainerName, containerOpts); + return { execReady: true, backupRemoved: isZeroStatus(rmResult) }; } const rolledBack = rollbackToBackupContainer(refs, resolved); return { diff --git a/src/lib/onboard/docker-gpu-patch.ts b/src/lib/onboard/docker-gpu-patch.ts index 890f1ac84c..7667447b28 100644 --- a/src/lib/onboard/docker-gpu-patch.ts +++ b/src/lib/onboard/docker-gpu-patch.ts @@ -1002,7 +1002,7 @@ export function recreateOpenShellDockerSandboxWithGpu( context.rolledBack = reconcile.rolledBack; throw reconcile.error; } - return buildPatchResult(true); + return buildPatchResult(reconcile.backupRemoved); } catch (error) { const err = error instanceof Error ? error : new Error(String(error)); throw decoratePatchError(err, context); diff --git a/src/lib/onboard/docker-gpu-supervisor-reconnect.ts b/src/lib/onboard/docker-gpu-supervisor-reconnect.ts index 61ceb15d5a..ec69d4c42f 100644 --- a/src/lib/onboard/docker-gpu-supervisor-reconnect.ts +++ b/src/lib/onboard/docker-gpu-supervisor-reconnect.ts @@ -35,28 +35,18 @@ const DOCKER_GPU_SUPERVISOR_RECONNECT_MIN_SECS = 900; // repro. Hosts that genuinely crashed on startup hit the rollback path in // applyDockerGpuPatch rather than waiting out the full window. // -// Design choice for #4664: that issue's second suggested fix proposes that -// when the patched Docker container reports Status=running + Health=starting, -// reconnect should keep retrying rather than declaring fatal. We deliberately -// chose the alternative path — generic Error-phase debounce wide enough to -// absorb the observed sandbox-list cache divergence window, plus a -// guaranteed rollback to the pre-patch backup container when reconnect still -// cannot confirm. The reasons: -// 1. The patched container's Health depends on the OpenShell supervisor -// script, which is exactly the thing the reconnect wait is observing -// via `openshell sandbox list`. Branching on Docker Health here would -// either reduplicate the same signal or trust an orthogonal one -// (`docker inspect`) that can lag the supervisor's view by several -// seconds — defeating the determinism of the existing classifier. -// 2. The rollback path (docker-gpu-patch-finalize.ts) guarantees the user -// keeps the pre-patch CPU sandbox when reconnect cannot confirm. Under -// the debounce-plus-rollback alternative, slow hosts get extra time AND -// catastrophic failures get a recoverable sandbox instead of a deleted -// backup. -// If a future repro shows Status=running + Health=starting genuinely failing -// reconnect after this 30s window, switch the strategy to a Health-aware -// retry — but only after extracting Docker health probing into a separate -// observation channel rather than overloading this one. +// Alternative considered: branching on Docker State.Status + Health.Status +// to keep retrying when the patched container reports Status=running plus +// Health=starting. Rejected because the patched container's Health depends +// on the OpenShell supervisor script — the same signal this wait observes +// via `openshell sandbox list` — so Docker Health is either redundant or +// lags by several seconds. The debounce-plus-rollback path also guarantees +// the user keeps the pre-patch CPU sandbox on reconnect failure, which a +// Health-aware retry alone would not provide. If a future repro shows +// Status=running + Health=starting genuinely failing reconnect after this +// 30s window, switch to a Health-aware retry, but extract Docker health +// probing into a separate observation channel first rather than overloading +// this one. const DOCKER_GPU_SUPERVISOR_RECONNECT_ERROR_PHASE_DEFAULT_DEBOUNCE_POLLS = 15; export const DOCKER_GPU_SUPERVISOR_RECONNECT_TIMEOUT_ENV = diff --git a/test/clean-runtime-shell-env-shim.test.ts b/test/clean-runtime-shell-env-shim.test.ts index bba5c67ca0..7f51f9e8d6 100644 --- a/test/clean-runtime-shell-env-shim.test.ts +++ b/test/clean-runtime-shell-env-shim.test.ts @@ -58,10 +58,9 @@ describe("clean_runtime_shell_env_shim.py", () => { expect(after).toBe("export FOO=1\nexport BAR=2\n"); }); - it("leaves the rc file untouched and exits 0 when uid does not own it (reproduces the #4713 deployment failure shape)", () => { - // The patched GPU sandbox container exits with code 1 after Docker GPU - // patching when the entrypoint runs as a non-root uid against an rc file - // owned by a different uid (e.g. root). Before this fix, the cleanup + it("leaves the rc file untouched and exits 0 when the entrypoint uid does not own it", () => { + // When the entrypoint runs as a non-root uid against an rc file owned by + // a different uid (e.g. root-owned legacy .bashrc), the pre-fix cleanup // raised EPERM under errexit and killed the container. The ownership // guard now logs and exits 0 instead. const rcPath = path.join(tmpDir, ".bashrc"); diff --git a/test/service-env.test.ts b/test/service-env.test.ts index 2730df3599..92a5cf0635 100644 --- a/test/service-env.test.ts +++ b/test/service-env.test.ts @@ -816,13 +816,13 @@ describe("service environment", () => { } }); - // Reproduces the #4713 deployment failure shape end-to-end in the startup - // sequence: write_runtime_shell_env emits the proxy env file with mode 444, - // ensure_runtime_shell_env_shim then sees a foreign-owned .bashrc and must - // exit 0 (otherwise the entrypoint would terminate the container with - // exit code 1 as reported). The composed assertion proves the legacy - // trust-boundary file remains non-user-writable across the skip path. - it("composed startup leaves the proxy env file at mode 444 when the rc cleanup skips a foreign-owned rc file (#4713)", () => { + // Composed startup invariant: write_runtime_shell_env emits the proxy + // env file with mode 444, ensure_runtime_shell_env_shim then sees a + // foreign-owned .bashrc and must exit 0 (otherwise the entrypoint would + // terminate the container with exit code 1). The composed assertion + // proves the legacy trust-boundary file remains non-user-writable + // across the skip path. + it("composed startup leaves the proxy env file at mode 444 when the rc cleanup skips a foreign-owned rc file", () => { const fakeDataDir = mkdtempSync(join(tmpdir(), "nemoclaw-rc-skip-composed-")); const fakeHome = mkdtempSync(join(tmpdir(), "nemoclaw-rc-skip-home-")); const proxyEnvPath = join(fakeDataDir, "proxy-env.sh"); From cff0334b89ed93d9683686d7f8d66318b46b79cf Mon Sep 17 00:00:00 2001 From: Tinson Lai Date: Fri, 5 Jun 2026 11:56:58 +0000 Subject: [PATCH 11/11] fix(sandbox): stage clean_runtime_shell_env_shim.py into sandbox build context Signed-off-by: Tinson Lai --- src/lib/sandbox/build-context.ts | 4 ++++ test/sandbox-build-context.test.ts | 4 ++++ 2 files changed, 8 insertions(+) diff --git a/src/lib/sandbox/build-context.ts b/src/lib/sandbox/build-context.ts index 6398f3ce96..16f6f99ba0 100644 --- a/src/lib/sandbox/build-context.ts +++ b/src/lib/sandbox/build-context.ts @@ -132,6 +132,10 @@ function stageOptimizedSandboxBuildContext( path.join(rootDir, "scripts", "lib", "openclaw_device_approval_policy.py"), path.join(stagedScriptsDir, "lib", "openclaw_device_approval_policy.py"), ); + fs.copyFileSync( + path.join(rootDir, "scripts", "lib", "clean_runtime_shell_env_shim.py"), + path.join(stagedScriptsDir, "lib", "clean_runtime_shell_env_shim.py"), + ); // OpenClaw config generator extracted in #2449 (fixed in #2565) fs.copyFileSync( path.join(rootDir, "scripts", "generate-openclaw-config.mts"), diff --git a/test/sandbox-build-context.test.ts b/test/sandbox-build-context.test.ts index 7299314a96..af8e8b3a0b 100644 --- a/test/sandbox-build-context.test.ts +++ b/test/sandbox-build-context.test.ts @@ -74,6 +74,7 @@ describe("sandbox build context staging", () => { writeFixture(path.join("scripts", "codex-acp-wrapper.sh")); writeFixture(path.join("scripts", "lib", "sandbox-init.sh")); writeFixture(path.join("scripts", "lib", "openclaw_device_approval_policy.py")); + writeFixture(path.join("scripts", "lib", "clean_runtime_shell_env_shim.py")); writeFixture(path.join("scripts", "generate-openclaw-config.mts")); writeFixture(path.join("scripts", "openclaw-build-messaging-plugins.py")); writeFixture(path.join("scripts", "seed-wechat-accounts.py")); @@ -255,6 +256,9 @@ describe("sandbox build context staging", () => { expect( fs.existsSync(path.join(buildCtx, "scripts", "lib", "openclaw_device_approval_policy.py")), ).toBe(true); + expect( + fs.existsSync(path.join(buildCtx, "scripts", "lib", "clean_runtime_shell_env_shim.py")), + ).toBe(true); expect(fs.existsSync(path.join(buildCtx, "scripts", "patch-openclaw-tool-catalog.js"))).toBe( true, );