From 3e98b634fa8e614ec3e8c9c9ed84be7fd209bc50 Mon Sep 17 00:00:00 2001 From: Carlos Villela Date: Mon, 8 Jun 2026 09:03:21 -0700 Subject: [PATCH 1/9] docs(e2e): choose Vitest fixtures as scenario runner Signed-off-by: Carlos Villela --- test/e2e-scenario/docs/MIGRATION.md | 31 +++++++------ test/e2e-scenario/docs/README.md | 71 +++++++++++++++++++++++------ 2 files changed, 75 insertions(+), 27 deletions(-) diff --git a/test/e2e-scenario/docs/MIGRATION.md b/test/e2e-scenario/docs/MIGRATION.md index 2bbeeafb20..a8e26b63c7 100644 --- a/test/e2e-scenario/docs/MIGRATION.md +++ b/test/e2e-scenario/docs/MIGRATION.md @@ -20,10 +20,12 @@ The scenario E2E migration is in a hybrid phase: - legacy `test/e2e/test-*.sh` scripts still provide most live nightly and platform coverage. -This hybrid shape is not the target end state. #3588 should converge on a -single scenario runner. Until that runner owns live execution, resolver, -assertion, evidence, and redaction behavior, treat YAML and bash runner updates -as bridge work rather than durable architecture. +This hybrid shape is not the target end state. #3588 and #4941 should converge +on one live execution path: **Vitest as the scenario runner, extended by +NemoClaw fixtures and typed domain helpers**. Until that path owns live +execution, resolver behavior, assertions, evidence, cleanup, and redaction, +treat YAML and bash runner updates as bridge work rather than durable +architecture. Do not assume legacy scripts are deletion-ready just because a scenario or suite name exists. The final reconciliation phase must show either evidence-complete @@ -34,19 +36,21 @@ removed. The final scenario framework should have one execution path: -- typed scenario definitions compile to the runner plan consumed by CI and local - runs; -- the runner owns setup, onboarding, runtime actions, expected-state probes, - assertion execution, expected-failure matching, evidence artifacts, and - secret redaction; +- Vitest owns live execution, filtering, reporters, timeouts, fixture lifecycle, + skip handling, and CI integration; +- NemoClaw fixtures own setup, onboarding, runtime actions, expected-state + probes, assertion helpers, expected-failure matching, evidence artifacts, + cleanup, and secret redaction; +- typed scenario definitions and matrix helpers describe stable scenario IDs and + supported combinations without becoming a second execution framework; - reusable assertions prefer TypeScript probes and typed clients; - shell scripts remain only for host, sandbox, process, or platform boundaries where shell is the thing being tested or the lowest-risk adapter; -- shell execution is wrapped by the runner so environment scoping, timeout, +- shell execution is wrapped by fixtures so environment scoping, timeout, redaction, artifact capture, and argument validation are consistent. When a bridge PR adds behavior to the current YAML/bash runner, preserve the -requirement it proves, but port that requirement into the single-runner path +requirement it proves, but port that requirement into the Vitest fixture path before removing legacy runner pieces. Do not deepen the bash runner as a second long-term source of truth. @@ -57,6 +61,7 @@ Use these GitHub issues for status and follow-up work: | Issue | Purpose | | --- | --- | | #3588 | Parent architecture epic for layered single-runner scenario E2E | +| #4941 | Decision issue for using Vitest fixtures as the scenario execution model | | #4347–#4356 | Domain-specific audit-coverage phases | | #4357 | Final audit reconciliation, placeholder cleanup, and deletion-readiness review | | #4378 | Friendly `setup_scenarios` aliases for layered test plans | @@ -88,7 +93,7 @@ When moving behavior from a legacy E2E script into the scenario framework: canonical scenario ID. 4. Add or update current YAML metadata only when the existing bridge runner must keep resolving the scenario during the migration. -5. Add the reusable assertion or probe in the single-runner direction whenever +5. Add the reusable assertion or probe in the Vitest fixture direction whenever possible instead of adding new bash-runner-only behavior. 6. Add reusable suite or assertion helpers instead of copying entire legacy scripts. @@ -119,7 +124,7 @@ npx vitest run --project e2e-scenario-framework --silent=false --reporter=defaul ## Cleanup rules - Prefer new scenario-matrix coverage over new legacy-style `test-*.sh` scripts. -- Prefer single-runner behavior over new YAML/bash-runner behavior; if a bridge +- Prefer Vitest fixture behavior over new YAML/bash-runner behavior; if a bridge change is unavoidable, document the porting requirement in the owning issue or PR. - Do not reintroduce the removed workflow-level parity report unless maintainers diff --git a/test/e2e-scenario/docs/README.md b/test/e2e-scenario/docs/README.md index 7f0f1c7246..5d27dd161e 100644 --- a/test/e2e-scenario/docs/README.md +++ b/test/e2e-scenario/docs/README.md @@ -8,11 +8,15 @@ It combines typed scenario builders, product-facing setup manifests, YAML runtime metadata, and reusable shell suites while the older live E2E scripts continue to run in parallel. -This hybrid model is transitional. The target architecture for #3588 is a -single scenario runner that owns scenario resolution, orchestration, evidence -collection, redaction, and assertion dispatch. Shell scripts should be kept to -the smallest practical set of system-boundary probes or command fixtures, not a -second planning or assertion-control runtime. +This hybrid model is transitional. The target architecture for #3588 and #4941 +is **Vitest as the single scenario execution runner**, extended by NemoClaw +fixtures and typed domain helpers. Vitest owns test discovery, filtering, +timeouts, reporters, fixture lifecycle, skips, and CI integration. NemoClaw owns +scenario vocabulary, setup/onboarding helpers, product clients, evidence +collection, redaction, cleanup, and assertion helpers. + +Shell scripts should be kept to the smallest practical set of system-boundary +probes or command fixtures, not a second planning or assertion-control runtime. ## Current sources of truth @@ -34,11 +38,16 @@ maintainer-approved reason. ## Target runner model -Future scenario coverage should move toward one runner with these properties: +Future scenario coverage should move toward one Vitest-based runner with these +properties: -- the runner compiles one typed plan for each scenario and treats that plan as - the source of truth for setup, onboarding, expected state, suites, assertions, - evidence paths, and expected failures; +- Vitest is the execution surface for live scenarios and owns lifecycle, + filtering, reporting, timeouts, and fixture scopes; +- NemoClaw fixtures expose scenario-level helpers for setup, onboarding, host + CLI access, gateway checks, sandbox checks, provider fixtures, evidence + artifacts, redaction, and cleanup; +- typed scenario data and matrix helpers describe stable scenario IDs and + supported combinations without becoming a second runner; - product-facing manifests remain declarative setup inputs, not executable test programs; - assertion modules prefer TypeScript probes and typed client helpers; @@ -48,7 +57,7 @@ Future scenario coverage should move toward one runner with these properties: environment, timeout, redaction, artifact capture, and command/argument validation; - bridge work that expands the YAML/bash runner must also identify how that - behavior will move into the single runner before legacy runner paths are + behavior will move into Vitest fixtures before legacy runner paths are removed. The #4347-#4357 audit-phase issues should be read as acceptance coverage @@ -79,8 +88,40 @@ The current YAML shell runner expresses this through: The typed scenario registry expresses the same intent as deterministic code and is used by the scenario workflow matrix and dry-run plan artifacts. The target -single runner should collapse these parallel expressions into one executable -plan model. +Vitest fixture model should collapse these parallel expressions into one live +execution path. + +## Fixture-first scenario shape + +Final-state live scenarios should read like regular Vitest tests that depend on +NemoClaw fixtures: + +```ts +import { test } from "../framework/e2e-test.ts"; + +test("ubuntu repo cloud OpenClaw", async ({ + repo, + openclaw, + gateway, + sandbox, + inference, +}) => { + await repo.installCurrent(); + + const instance = await openclaw.onboard({ + agent: "openclaw", + provider: "nvidia", + }); + + await gateway.expectHealthy(instance); + await sandbox.expectRunning(instance); + await inference.expectLocalChat(instance, { prompt: "Say ok.", expect: /ok/i }); +}); +``` + +The test body should express product behavior. Fixture implementations should +hide redacted process spawning, artifact paths, cleanup registration, secret +gating, and retry/flake classification. ## How to run @@ -148,14 +189,16 @@ test/e2e-scenario/ `macos-e2e.yaml`, `wsl-e2e.yaml`, `ollama-proxy-e2e.yaml`, and `regression-e2e.yaml` still run legacy live E2E scripts during the migration. - `vitest.config.ts` contains the `e2e-scenario-framework` project for framework - and metadata tests. + and metadata tests. The live scenario target should be a separate opt-in + Vitest project so ordinary `npm test` remains fast and local-friendly. ## Migration tracking Migration status is tracked outside the repository in GitHub issues and PRs, not in repo-local checklists. The parent architecture issue is #3588. Active audit-coverage work is tracked by the #4347–#4357 issue set, with focused -follow-ups such as #4378 for specific drift fixes. +follow-ups such as #4378 for specific drift fixes. The execution-model decision +is tracked in #4941. The old workflow-level parity report has been removed. Use scenario framework tests, the coverage report, PR review, and the audit issues to decide what to From 9e355f0118488819ee0df557d0cd8deea2052fb7 Mon Sep 17 00:00:00 2001 From: Carlos Villela Date: Mon, 8 Jun 2026 09:05:45 -0700 Subject: [PATCH 2/9] test(e2e): add opt-in live scenario project Signed-off-by: Carlos Villela --- .../e2e-live-project-config.test.ts | 45 +++++++++++++++++++ .../framework/live-project-gate.ts | 7 +++ vitest.config.ts | 36 ++++++++++----- 3 files changed, 77 insertions(+), 11 deletions(-) create mode 100644 test/e2e-scenario/framework-tests/e2e-live-project-config.test.ts create mode 100644 test/e2e-scenario/framework/live-project-gate.ts diff --git a/test/e2e-scenario/framework-tests/e2e-live-project-config.test.ts b/test/e2e-scenario/framework-tests/e2e-live-project-config.test.ts new file mode 100644 index 0000000000..9cb42ca5fe --- /dev/null +++ b/test/e2e-scenario/framework-tests/e2e-live-project-config.test.ts @@ -0,0 +1,45 @@ +// SPDX-FileCopyrightText: Copyright (c) 2026 NVIDIA CORPORATION & AFFILIATES. All rights reserved. +// SPDX-License-Identifier: Apache-2.0 + +import { describe, expect, it } from "vitest"; + +import { shouldRunLiveE2EScenarios } from "../framework/live-project-gate.ts"; +import config from "../../../vitest.config.ts"; + +interface ProjectConfig { + test?: { + name?: string; + include?: string[]; + }; +} + +interface RootConfig { + test?: { + projects?: ProjectConfig[]; + }; +} + +function liveProject(): ProjectConfig { + const projects = (config as RootConfig).test?.projects ?? []; + const project = projects.find((entry) => entry.test?.name === "e2e-scenarios-live"); + if (!project) { + throw new Error("missing e2e-scenarios-live Vitest project"); + } + return project; +} + +describe("live E2E scenario Vitest project", () => { + it("is present but excludes live tests by default", () => { + const project = liveProject(); + expect(project.test?.include).toEqual([]); + }); + + it("is enabled only by the explicit live scenario opt-in env var", () => { + expect(shouldRunLiveE2EScenarios({})).toBe(false); + expect(shouldRunLiveE2EScenarios({ NEMOCLAW_RUN_E2E_SCENARIOS: "0" })).toBe(false); + expect(shouldRunLiveE2EScenarios({ NEMOCLAW_RUN_E2E_SCENARIOS: "yes" })).toBe(false); + expect(shouldRunLiveE2EScenarios({ NEMOCLAW_RUN_E2E_SCENARIOS: "1" })).toBe(true); + expect(shouldRunLiveE2EScenarios({ NEMOCLAW_RUN_E2E_SCENARIOS: "true" })).toBe(true); + expect(shouldRunLiveE2EScenarios({ NEMOCLAW_RUN_E2E_SCENARIOS: " TRUE " })).toBe(true); + }); +}); diff --git a/test/e2e-scenario/framework/live-project-gate.ts b/test/e2e-scenario/framework/live-project-gate.ts new file mode 100644 index 0000000000..b84e7ebc57 --- /dev/null +++ b/test/e2e-scenario/framework/live-project-gate.ts @@ -0,0 +1,7 @@ +// SPDX-FileCopyrightText: Copyright (c) 2026 NVIDIA CORPORATION & AFFILIATES. All rights reserved. +// SPDX-License-Identifier: Apache-2.0 + +export function shouldRunLiveE2EScenarios(env: NodeJS.ProcessEnv = process.env): boolean { + const value = env.NEMOCLAW_RUN_E2E_SCENARIOS?.trim().toLowerCase(); + return value === "1" || value === "true"; +} diff --git a/vitest.config.ts b/vitest.config.ts index 0f40c0b542..62ca7cbb89 100644 --- a/vitest.config.ts +++ b/vitest.config.ts @@ -3,10 +3,18 @@ import { defineConfig } from "vitest/config"; +import { shouldRunLiveE2EScenarios } from "./test/e2e-scenario/framework/live-project-gate.ts"; import { testTimeout } from "./test/helpers/timeouts"; const isGithubActions = process.env.GITHUB_ACTIONS === "true"; const isCi = isGithubActions || process.env.CI === "true" || process.env.CI === "1"; +const LIVE_E2E_PROJECT_TIMEOUT_MS = 30 * 60 * 1000; +const runInstallerIntegration = + process.env.CI === "true" || + process.env.CI === "1" || + process.env.NEMOCLAW_RUN_INSTALLER_TESTS === "1"; +const runLiveE2EScenarios = shouldRunLiveE2EScenarios(); +const runBranchValidationE2E = !!process.env.BREV_API_KEY || !!process.env.BREV_API_TOKEN; export default defineConfig({ test: { @@ -36,17 +44,15 @@ export default defineConfig({ { test: { name: "installer-integration", - include: [ - "test/install-preflight.test.ts", - "test/install-openshell-version-check.test.ts", - ], + include: runInstallerIntegration + ? [ + "test/install-preflight.test.ts", + "test/install-openshell-version-check.test.ts", + ] + : [], // Slow tests that spawn real bash install.sh processes. // Run in CI or explicitly: npx vitest run --project installer-integration // Excluded from pre-commit/pre-push to avoid flaky timeouts. - enabled: - process.env.CI === "true" || - process.env.CI === "1" || - process.env.NEMOCLAW_RUN_INSTALLER_TESTS === "1", }, }, { @@ -62,10 +68,20 @@ export default defineConfig({ include: ["test/e2e-scenario/framework-tests/**/*.test.ts"], }, }, + { + test: { + name: "e2e-scenarios-live", + testTimeout: testTimeout(LIVE_E2E_PROJECT_TIMEOUT_MS), + include: runLiveE2EScenarios ? ["test/e2e-scenario/live/**/*.test.ts"] : [], + // Live scenario tests are opt-in because they install, onboard, and + // mutate real NemoClaw/OpenShell state. Run explicitly with: + // NEMOCLAW_RUN_E2E_SCENARIOS=1 npx vitest run --project e2e-scenarios-live + }, + }, { test: { name: "e2e-branch-validation", - include: ["test/e2e/brev-e2e.test.ts"], + include: runBranchValidationE2E ? ["test/e2e/brev-e2e.test.ts"] : [], // Branch validation E2E: rsyncs the branch over a Brev instance // provisioned from the published NemoClaw launchable image and // runs the selected test suites. Only run when explicitly @@ -76,11 +92,9 @@ export default defineConfig({ // is essential for debugging Brev provisioning timing and the // overall suite runs in a single `describe` block, so there's no // test chatter to suppress anyway. - silent: false, // Gate on the new long-lived API key secret. Historically this was // BREV_API_TOKEN (short-lived refresh token); renamed in the // nightly-enable PR to match the new `brev login --api-key` flow. - enabled: !!process.env.BREV_API_KEY || !!process.env.BREV_API_TOKEN, }, }, ], From a11bc2ae468f6e30a19ad964b2572b976bc6478e Mon Sep 17 00:00:00 2001 From: Carlos Villela Date: Mon, 8 Jun 2026 09:09:09 -0700 Subject: [PATCH 3/9] test(e2e): add scenario fixture context Signed-off-by: Carlos Villela --- .../e2e-fixture-context.test.ts | 89 +++++++++++++ test/e2e-scenario/framework/artifacts.ts | 53 ++++++++ test/e2e-scenario/framework/cleanup.ts | 53 ++++++++ test/e2e-scenario/framework/e2e-test.ts | 48 +++++++ test/e2e-scenario/framework/secrets.ts | 53 ++++++++ test/e2e-scenario/framework/shell-probe.ts | 125 ++++++++++++++++++ 6 files changed, 421 insertions(+) create mode 100644 test/e2e-scenario/framework-tests/e2e-fixture-context.test.ts create mode 100644 test/e2e-scenario/framework/artifacts.ts create mode 100644 test/e2e-scenario/framework/cleanup.ts create mode 100644 test/e2e-scenario/framework/e2e-test.ts create mode 100644 test/e2e-scenario/framework/secrets.ts create mode 100644 test/e2e-scenario/framework/shell-probe.ts diff --git a/test/e2e-scenario/framework-tests/e2e-fixture-context.test.ts b/test/e2e-scenario/framework-tests/e2e-fixture-context.test.ts new file mode 100644 index 0000000000..b5e46acafe --- /dev/null +++ b/test/e2e-scenario/framework-tests/e2e-fixture-context.test.ts @@ -0,0 +1,89 @@ +// SPDX-FileCopyrightText: Copyright (c) 2026 NVIDIA CORPORATION & AFFILIATES. All rights reserved. +// SPDX-License-Identifier: Apache-2.0 + +import fs from "node:fs"; +import os from "node:os"; +import path from "node:path"; + +import { describe, expect, it } from "vitest"; + +import { ArtifactSink } from "../framework/artifacts.ts"; +import { CleanupRegistry } from "../framework/cleanup.ts"; +import { test as e2eTest } from "../framework/e2e-test.ts"; +import { SecretStore } from "../framework/secrets.ts"; + +describe("E2E fixture primitives", () => { + it("artifact sink writes under its root and rejects traversal", async () => { + const tmp = fs.mkdtempSync(path.join(os.tmpdir(), "nemoclaw-e2e-artifacts-")); + try { + const artifacts = new ArtifactSink(tmp); + await artifacts.ensureRoot(); + const written = await artifacts.writeText("nested/output.txt", "ok"); + expect(fs.readFileSync(written, "utf8")).toBe("ok"); + expect(() => artifacts.pathFor("../escape.txt")).toThrow(/escapes root/); + expect(() => artifacts.pathFor(path.join(tmp, "absolute.txt"))).toThrow(/must be relative/); + } finally { + fs.rmSync(tmp, { recursive: true, force: true }); + } + }); + + it("cleanup registry runs callbacks in reverse order", async () => { + const cleanup = new CleanupRegistry(); + const order: string[] = []; + cleanup.add("first", () => { + order.push("first"); + }); + cleanup.add("second", () => { + order.push("second"); + }); + + const result = await cleanup.runAll(); + expect(order).toEqual(["second", "first"]); + expect(result).toEqual({ passed: ["second", "first"], failures: [] }); + }); + + it("secret store redacts sensitive env values and skips missing required secrets", () => { + const store = new SecretStore( + { NVIDIA_API_KEY: "nv-secret", PLAIN_VALUE: "visible" }, + (note?: string): never => { + throw new Error(note ?? "skipped"); + }, + ); + + expect(store.optional("PLAIN_VALUE")).toBe("visible"); + expect(store.redact("token=nv-secret plain=visible")).toBe("token=[REDACTED] plain=visible"); + expect(() => store.required("MISSING_SECRET")).toThrow(/missing required E2E secret/); + }); +}); + +e2eTest("fixture context captures redacted shell artifacts", async ({ + artifacts, + cleanup, + shellProbe, +}) => { + const marker = await artifacts.writeText("context.txt", "fixture-ready"); + cleanup.add("write cleanup marker", async () => { + await artifacts.writeText("cleanup-marker.txt", "done"); + }); + + const secret = "shell-probe-secret-value"; + const result = await shellProbe.run(process.execPath, { + args: [ + "-e", + "console.log(process.env.NEMOCLAW_TEST_TOKEN); console.error(process.argv[1]);", + secret, + ], + artifactName: "redaction-proof", + env: { NEMOCLAW_TEST_TOKEN: secret }, + redactionValues: [secret], + timeoutMs: 5_000, + }); + + expect(fs.readFileSync(marker, "utf8")).toBe("fixture-ready"); + expect(result.exitCode).toBe(0); + expect(result.stdout).toContain("[REDACTED]"); + expect(result.stderr).toContain("[REDACTED]"); + expect(result.stdout).not.toContain(secret); + expect(result.stderr).not.toContain(secret); + expect(fs.readFileSync(result.artifacts.result, "utf8")).not.toContain(secret); +}); diff --git a/test/e2e-scenario/framework/artifacts.ts b/test/e2e-scenario/framework/artifacts.ts new file mode 100644 index 0000000000..99597d8e8a --- /dev/null +++ b/test/e2e-scenario/framework/artifacts.ts @@ -0,0 +1,53 @@ +// SPDX-FileCopyrightText: Copyright (c) 2026 NVIDIA CORPORATION & AFFILIATES. All rights reserved. +// SPDX-License-Identifier: Apache-2.0 + +import fs from "node:fs/promises"; +import path from "node:path"; + +export class ArtifactSink { + readonly rootDir: string; + + constructor(rootDir: string) { + this.rootDir = path.resolve(rootDir); + } + + async ensureRoot(): Promise { + await fs.mkdir(this.rootDir, { recursive: true }); + } + + pathFor(relativePath: string): string { + if (!relativePath || path.isAbsolute(relativePath)) { + throw new Error(`artifact path must be relative: ${relativePath}`); + } + const resolved = path.resolve(this.rootDir, relativePath); + if (resolved !== this.rootDir && !resolved.startsWith(`${this.rootDir}${path.sep}`)) { + throw new Error(`artifact path escapes root: ${relativePath}`); + } + return resolved; + } + + async writeText(relativePath: string, text: string): Promise { + const target = this.pathFor(relativePath); + await fs.mkdir(path.dirname(target), { recursive: true }); + await fs.writeFile(target, text, "utf8"); + return target; + } + + async writeJson(relativePath: string, value: unknown): Promise { + return this.writeText(relativePath, `${JSON.stringify(value, null, 2)}\n`); + } +} + +export function slugifyArtifactName(name: string): string { + const slug = name + .trim() + .toLowerCase() + .replace(/[^a-z0-9._-]+/g, "-") + .replace(/^-+|-+$/g, ""); + return slug || "unnamed-test"; +} + +export function createArtifactSink(testName: string, rootDir = process.cwd()): ArtifactSink { + const baseDir = process.env.E2E_ARTIFACT_DIR ?? path.join(rootDir, ".e2e", "vitest"); + return new ArtifactSink(path.join(baseDir, slugifyArtifactName(testName))); +} diff --git a/test/e2e-scenario/framework/cleanup.ts b/test/e2e-scenario/framework/cleanup.ts new file mode 100644 index 0000000000..3f76b6394e --- /dev/null +++ b/test/e2e-scenario/framework/cleanup.ts @@ -0,0 +1,53 @@ +// SPDX-FileCopyrightText: Copyright (c) 2026 NVIDIA CORPORATION & AFFILIATES. All rights reserved. +// SPDX-License-Identifier: Apache-2.0 + +export interface CleanupFailure { + name: string; + message: string; +} + +export interface CleanupResult { + passed: string[]; + failures: CleanupFailure[]; +} + +type CleanupFn = () => Promise | void; + +interface CleanupEntry { + name: string; + run: CleanupFn; +} + +export class CleanupRegistry { + private readonly entries: CleanupEntry[] = []; + + add(name: string, run: CleanupFn): void { + if (!name.trim()) { + throw new Error("cleanup name is required"); + } + this.entries.push({ name, run }); + } + + async runAll(): Promise { + const result: CleanupResult = { passed: [], failures: [] }; + for (const entry of [...this.entries].reverse()) { + try { + await entry.run(); + result.passed.push(entry.name); + } catch (error) { + result.failures.push({ + name: entry.name, + message: error instanceof Error ? error.message : String(error), + }); + } + } + this.entries.length = 0; + return result; + } +} + +export function assertCleanupPassed(result: CleanupResult): void { + if (result.failures.length === 0) return; + const details = result.failures.map((failure) => `${failure.name}: ${failure.message}`).join("; "); + throw new Error(`E2E cleanup failed: ${details}`); +} diff --git a/test/e2e-scenario/framework/e2e-test.ts b/test/e2e-scenario/framework/e2e-test.ts new file mode 100644 index 0000000000..56d9e37b0e --- /dev/null +++ b/test/e2e-scenario/framework/e2e-test.ts @@ -0,0 +1,48 @@ +// SPDX-FileCopyrightText: Copyright (c) 2026 NVIDIA CORPORATION & AFFILIATES. All rights reserved. +// SPDX-License-Identifier: Apache-2.0 + +import { expect, test as base } from "vitest"; + +import { createArtifactSink, type ArtifactSink } from "./artifacts.ts"; +import { assertCleanupPassed, CleanupRegistry } from "./cleanup.ts"; +import { SecretStore } from "./secrets.ts"; +import { ShellProbe } from "./shell-probe.ts"; + +export interface E2EScenarioFixtures { + artifacts: ArtifactSink; + cleanup: CleanupRegistry; + secrets: SecretStore; + shellProbe: ShellProbe; +} + +export const test = base + .extend("artifacts", async ({ task }, { onCleanup }) => { + const artifacts = createArtifactSink(task.name); + await artifacts.ensureRoot(); + onCleanup(async () => { + await artifacts.writeJson("artifact-summary.json", { + test: task.name, + rootDir: artifacts.rootDir, + }); + }); + return artifacts; + }) + .extend("cleanup", async ({ artifacts }, { onCleanup }) => { + const cleanup = new CleanupRegistry(); + onCleanup(async () => { + const result = await cleanup.runAll(); + await artifacts.writeJson("cleanup.json", result); + assertCleanupPassed(result); + }); + return cleanup; + }) + .extend("secrets", async ({ skip }) => new SecretStore(process.env, skip)) + .extend("shellProbe", async ({ artifacts, secrets, signal }) => { + return new ShellProbe({ + artifacts, + redact: (text, extraValues) => secrets.redact(text, extraValues), + signal, + }); + }); + +export { expect }; diff --git a/test/e2e-scenario/framework/secrets.ts b/test/e2e-scenario/framework/secrets.ts new file mode 100644 index 0000000000..5d5f540420 --- /dev/null +++ b/test/e2e-scenario/framework/secrets.ts @@ -0,0 +1,53 @@ +// SPDX-FileCopyrightText: Copyright (c) 2026 NVIDIA CORPORATION & AFFILIATES. All rights reserved. +// SPDX-License-Identifier: Apache-2.0 + +const SENSITIVE_NAME_PATTERN = /(api[_-]?key|token|secret|password|credential)/i; + +export function redactText(text: string, secretValues: Iterable): string { + let redacted = text; + for (const value of secretValues) { + if (!value) continue; + redacted = redacted.split(value).join("[REDACTED]"); + } + return redacted; +} + +export class SecretStore { + private readonly env: NodeJS.ProcessEnv; + private readonly skip: (note?: string) => never; + + constructor(env: NodeJS.ProcessEnv, skip: (note?: string) => never) { + this.env = env; + this.skip = skip; + } + + optional(name: string): string | undefined { + const value = this.env[name]; + return value && value.length > 0 ? value : undefined; + } + + required(name: string): string { + const value = this.optional(name); + if (!value) { + this.skip(`missing required E2E secret: ${name}`); + } + return value; + } + + redactionValues(extraValues: string[] = []): string[] { + const values = new Set(); + for (const [name, value] of Object.entries(this.env)) { + if (value && SENSITIVE_NAME_PATTERN.test(name)) { + values.add(value); + } + } + for (const value of extraValues) { + if (value) values.add(value); + } + return [...values]; + } + + redact(text: string, extraValues: string[] = []): string { + return redactText(text, this.redactionValues(extraValues)); + } +} diff --git a/test/e2e-scenario/framework/shell-probe.ts b/test/e2e-scenario/framework/shell-probe.ts new file mode 100644 index 0000000000..60c70d41b7 --- /dev/null +++ b/test/e2e-scenario/framework/shell-probe.ts @@ -0,0 +1,125 @@ +// SPDX-FileCopyrightText: Copyright (c) 2026 NVIDIA CORPORATION & AFFILIATES. All rights reserved. +// SPDX-License-Identifier: Apache-2.0 + +import { spawn } from "node:child_process"; + +import type { ArtifactSink } from "./artifacts.ts"; +import { redactText } from "./secrets.ts"; + +export interface ShellProbeRunOptions { + args?: string[]; + cwd?: string; + env?: NodeJS.ProcessEnv; + timeoutMs?: number; + artifactName?: string; + redactionValues?: string[]; +} + +export interface ShellProbeResult { + command: string[]; + exitCode: number | null; + signal: NodeJS.Signals | null; + timedOut: boolean; + stdout: string; + stderr: string; + artifacts: { + stdout: string; + stderr: string; + result: string; + }; +} + +export interface ShellProbeDeps { + artifacts: ArtifactSink; + redact: (text: string, extraValues?: string[]) => string; + signal: AbortSignal; +} + +const DEFAULT_TIMEOUT_MS = 60_000; + +function safeArtifactBase(command: string, explicitName?: string): string { + const raw = explicitName ?? command; + const safe = raw + .trim() + .toLowerCase() + .replace(/[^a-z0-9._-]+/g, "-") + .replace(/^-+|-+$/g, ""); + return safe || "shell-probe"; +} + +export class ShellProbe { + private readonly artifacts: ArtifactSink; + private readonly redact: (text: string, extraValues?: string[]) => string; + private readonly signal: AbortSignal; + + constructor(deps: ShellProbeDeps) { + this.artifacts = deps.artifacts; + this.redact = deps.redact; + this.signal = deps.signal; + } + + async run(command: string, options: ShellProbeRunOptions = {}): Promise { + if (!command.trim()) { + throw new Error("shell probe command is required"); + } + + const args = options.args ?? []; + const timeoutMs = options.timeoutMs ?? DEFAULT_TIMEOUT_MS; + const child = spawn(command, args, { + cwd: options.cwd, + env: { ...process.env, ...(options.env ?? {}) }, + stdio: ["ignore", "pipe", "pipe"], + }); + + let stdout = ""; + let stderr = ""; + let timedOut = false; + + child.stdout.setEncoding("utf8"); + child.stderr.setEncoding("utf8"); + child.stdout.on("data", (chunk) => { + stdout += chunk; + }); + child.stderr.on("data", (chunk) => { + stderr += chunk; + }); + + const abort = () => { + child.kill("SIGTERM"); + }; + const timeout = setTimeout(() => { + timedOut = true; + child.kill("SIGTERM"); + }, timeoutMs); + this.signal.addEventListener("abort", abort, { once: true }); + + const { code, signal } = await new Promise<{ code: number | null; signal: NodeJS.Signals | null }>( + (resolve, reject) => { + child.on("error", reject); + child.on("close", (code, signal) => resolve({ code, signal })); + }, + ); + + clearTimeout(timeout); + this.signal.removeEventListener("abort", abort); + + const redactionValues = options.redactionValues ?? []; + const redactedStdout = this.redact(redactText(stdout, redactionValues)); + const redactedStderr = this.redact(redactText(stderr, redactionValues)); + const artifactBase = `shell/${safeArtifactBase(command, options.artifactName)}`; + const result: Omit = { + command: [command, ...args].map((part) => this.redact(redactText(part, redactionValues))), + exitCode: code, + signal, + timedOut, + stdout: redactedStdout, + stderr: redactedStderr, + }; + const artifacts = { + stdout: await this.artifacts.writeText(`${artifactBase}.stdout.txt`, redactedStdout), + stderr: await this.artifacts.writeText(`${artifactBase}.stderr.txt`, redactedStderr), + result: await this.artifacts.writeJson(`${artifactBase}.result.json`, result), + }; + return { ...result, artifacts }; + } +} From ea012f91850d63047c34e57d71a346af692e93d7 Mon Sep 17 00:00:00 2001 From: Carlos Villela Date: Mon, 8 Jun 2026 12:02:07 -0700 Subject: [PATCH 4/9] test(e2e): harden scenario fixture cleanup and probes Signed-off-by: Carlos Villela --- .../e2e-fixture-context.test.ts | 73 ++++++++++++++++++- test/e2e-scenario/framework/cleanup.ts | 12 ++- test/e2e-scenario/framework/e2e-test.ts | 6 +- test/e2e-scenario/framework/shell-probe.ts | 13 +++- 4 files changed, 96 insertions(+), 8 deletions(-) diff --git a/test/e2e-scenario/framework-tests/e2e-fixture-context.test.ts b/test/e2e-scenario/framework-tests/e2e-fixture-context.test.ts index b5e46acafe..3a39de03f1 100644 --- a/test/e2e-scenario/framework-tests/e2e-fixture-context.test.ts +++ b/test/e2e-scenario/framework-tests/e2e-fixture-context.test.ts @@ -8,7 +8,7 @@ import path from "node:path"; import { describe, expect, it } from "vitest"; import { ArtifactSink } from "../framework/artifacts.ts"; -import { CleanupRegistry } from "../framework/cleanup.ts"; +import { assertCleanupPassed, CleanupRegistry } from "../framework/cleanup.ts"; import { test as e2eTest } from "../framework/e2e-test.ts"; import { SecretStore } from "../framework/secrets.ts"; @@ -42,6 +42,32 @@ describe("E2E fixture primitives", () => { expect(result).toEqual({ passed: ["second", "first"], failures: [] }); }); + it("cleanup registry redacts failures, continues, and clears callbacks", async () => { + const secret = "cleanup-secret-value"; + const cleanup = new CleanupRegistry((text) => text.split(secret).join("[REDACTED]")); + const order: string[] = []; + cleanup.add("first", () => { + order.push("first"); + }); + cleanup.add("second", () => { + order.push("second"); + throw new Error(`failed with ${secret}`); + }); + cleanup.add(`third-${secret}`, () => { + order.push("third"); + }); + + const result = await cleanup.runAll(); + expect(order).toEqual(["third", "second", "first"]); + expect(result).toEqual({ + passed: ["third-[REDACTED]", "first"], + failures: [{ name: "second", message: "failed with [REDACTED]" }], + }); + expect(() => assertCleanupPassed(result)).toThrow("failed with [REDACTED]"); + expect(() => assertCleanupPassed(result)).not.toThrow(secret); + expect(await cleanup.runAll()).toEqual({ passed: [], failures: [] }); + }); + it("secret store redacts sensitive env values and skips missing required secrets", () => { const store = new SecretStore( { NVIDIA_API_KEY: "nv-secret", PLAIN_VALUE: "visible" }, @@ -87,3 +113,48 @@ e2eTest("fixture context captures redacted shell artifacts", async ({ expect(result.stderr).not.toContain(secret); expect(fs.readFileSync(result.artifacts.result, "utf8")).not.toContain(secret); }); + +e2eTest("shell probe uses explicit env and escalates ignored timeouts", async ({ shellProbe }) => { + const parentSecretName = "NEMOCLAW_PARENT_SECRET_FOR_PROBE_TEST"; + const parentSecret = "parent-secret-value"; + const explicitSecret = "explicit-secret-value"; + const oldParentSecret = process.env[parentSecretName]; + process.env[parentSecretName] = parentSecret; + try { + const envResult = await shellProbe.run(process.execPath, { + args: [ + "-e", + `console.log(process.env.${parentSecretName} ?? "missing"); console.log(process.env.NEMOCLAW_TEST_TOKEN);`, + ], + artifactName: "minimal-env", + env: { NEMOCLAW_TEST_TOKEN: explicitSecret }, + redactionValues: [explicitSecret, parentSecret], + timeoutMs: 5_000, + }); + + expect(envResult.exitCode).toBe(0); + expect(envResult.stdout).toContain("missing"); + expect(envResult.stdout).toContain("[REDACTED]"); + expect(envResult.stdout).not.toContain(parentSecret); + expect(envResult.stdout).not.toContain(explicitSecret); + expect(fs.readFileSync(envResult.artifacts.result, "utf8")).not.toContain(explicitSecret); + } finally { + if (oldParentSecret === undefined) { + delete process.env[parentSecretName]; + } else { + process.env[parentSecretName] = oldParentSecret; + } + } + + const started = Date.now(); + const timeoutResult = await shellProbe.run(process.execPath, { + args: ["-e", "process.on('SIGTERM', () => {}); setInterval(() => {}, 1000);"], + artifactName: "timeout-escalation", + timeoutMs: 50, + killGraceMs: 50, + }); + + expect(Date.now() - started).toBeLessThan(2_000); + expect(timeoutResult.timedOut).toBe(true); + expect(timeoutResult.signal).toBe("SIGKILL"); +}); diff --git a/test/e2e-scenario/framework/cleanup.ts b/test/e2e-scenario/framework/cleanup.ts index 3f76b6394e..84427cf5fe 100644 --- a/test/e2e-scenario/framework/cleanup.ts +++ b/test/e2e-scenario/framework/cleanup.ts @@ -12,6 +12,7 @@ export interface CleanupResult { } type CleanupFn = () => Promise | void; +type RedactFn = (text: string) => string; interface CleanupEntry { name: string; @@ -20,6 +21,11 @@ interface CleanupEntry { export class CleanupRegistry { private readonly entries: CleanupEntry[] = []; + private readonly redact: RedactFn; + + constructor(redact: RedactFn = (text) => text) { + this.redact = redact; + } add(name: string, run: CleanupFn): void { if (!name.trim()) { @@ -33,11 +39,11 @@ export class CleanupRegistry { for (const entry of [...this.entries].reverse()) { try { await entry.run(); - result.passed.push(entry.name); + result.passed.push(this.redact(entry.name)); } catch (error) { result.failures.push({ - name: entry.name, - message: error instanceof Error ? error.message : String(error), + name: this.redact(entry.name), + message: this.redact(error instanceof Error ? error.message : String(error)), }); } } diff --git a/test/e2e-scenario/framework/e2e-test.ts b/test/e2e-scenario/framework/e2e-test.ts index 56d9e37b0e..d7f8e10745 100644 --- a/test/e2e-scenario/framework/e2e-test.ts +++ b/test/e2e-scenario/framework/e2e-test.ts @@ -27,8 +27,9 @@ export const test = base }); return artifacts; }) - .extend("cleanup", async ({ artifacts }, { onCleanup }) => { - const cleanup = new CleanupRegistry(); + .extend("secrets", async ({ skip }) => new SecretStore(process.env, skip)) + .extend("cleanup", async ({ artifacts, secrets }, { onCleanup }) => { + const cleanup = new CleanupRegistry((text) => secrets.redact(text)); onCleanup(async () => { const result = await cleanup.runAll(); await artifacts.writeJson("cleanup.json", result); @@ -36,7 +37,6 @@ export const test = base }); return cleanup; }) - .extend("secrets", async ({ skip }) => new SecretStore(process.env, skip)) .extend("shellProbe", async ({ artifacts, secrets, signal }) => { return new ShellProbe({ artifacts, diff --git a/test/e2e-scenario/framework/shell-probe.ts b/test/e2e-scenario/framework/shell-probe.ts index 60c70d41b7..72bfc90774 100644 --- a/test/e2e-scenario/framework/shell-probe.ts +++ b/test/e2e-scenario/framework/shell-probe.ts @@ -10,7 +10,9 @@ export interface ShellProbeRunOptions { args?: string[]; cwd?: string; env?: NodeJS.ProcessEnv; + inheritEnv?: boolean; timeoutMs?: number; + killGraceMs?: number; artifactName?: string; redactionValues?: string[]; } @@ -36,6 +38,7 @@ export interface ShellProbeDeps { } const DEFAULT_TIMEOUT_MS = 60_000; +const DEFAULT_KILL_GRACE_MS = 1_000; function safeArtifactBase(command: string, explicitName?: string): string { const raw = explicitName ?? command; @@ -65,9 +68,10 @@ export class ShellProbe { const args = options.args ?? []; const timeoutMs = options.timeoutMs ?? DEFAULT_TIMEOUT_MS; + const killGraceMs = options.killGraceMs ?? DEFAULT_KILL_GRACE_MS; const child = spawn(command, args, { cwd: options.cwd, - env: { ...process.env, ...(options.env ?? {}) }, + env: options.inheritEnv ? { ...process.env, ...(options.env ?? {}) } : { ...(options.env ?? {}) }, stdio: ["ignore", "pipe", "pipe"], }); @@ -87,9 +91,15 @@ export class ShellProbe { const abort = () => { child.kill("SIGTERM"); }; + let killTimer: NodeJS.Timeout | undefined; const timeout = setTimeout(() => { timedOut = true; child.kill("SIGTERM"); + killTimer = setTimeout(() => { + if (child.exitCode === null && child.signalCode === null) { + child.kill("SIGKILL"); + } + }, killGraceMs); }, timeoutMs); this.signal.addEventListener("abort", abort, { once: true }); @@ -101,6 +111,7 @@ export class ShellProbe { ); clearTimeout(timeout); + if (killTimer) clearTimeout(killTimer); this.signal.removeEventListener("abort", abort); const redactionValues = options.redactionValues ?? []; From b117d3cec01b9f791b5daa797c981c055ef89d6f Mon Sep 17 00:00:00 2001 From: Carlos Villela Date: Mon, 8 Jun 2026 12:20:22 -0700 Subject: [PATCH 5/9] test(e2e): harden shell probe spawn errors Signed-off-by: Carlos Villela --- .../e2e-fixture-context.test.ts | 62 +++++++++++++++ test/e2e-scenario/framework/shell-probe.ts | 78 ++++++++++++++----- 2 files changed, 119 insertions(+), 21 deletions(-) diff --git a/test/e2e-scenario/framework-tests/e2e-fixture-context.test.ts b/test/e2e-scenario/framework-tests/e2e-fixture-context.test.ts index 3a39de03f1..578ba46310 100644 --- a/test/e2e-scenario/framework-tests/e2e-fixture-context.test.ts +++ b/test/e2e-scenario/framework-tests/e2e-fixture-context.test.ts @@ -11,6 +11,7 @@ import { ArtifactSink } from "../framework/artifacts.ts"; import { assertCleanupPassed, CleanupRegistry } from "../framework/cleanup.ts"; import { test as e2eTest } from "../framework/e2e-test.ts"; import { SecretStore } from "../framework/secrets.ts"; +import { ShellProbe } from "../framework/shell-probe.ts"; describe("E2E fixture primitives", () => { it("artifact sink writes under its root and rejects traversal", async () => { @@ -80,6 +81,67 @@ describe("E2E fixture primitives", () => { expect(store.redact("token=nv-secret plain=visible")).toBe("token=[REDACTED] plain=visible"); expect(() => store.required("MISSING_SECRET")).toThrow(/missing required E2E secret/); }); + + it("shell probe cleans up and redacts missing command failures", async () => { + const tmp = fs.mkdtempSync(path.join(os.tmpdir(), "nemoclaw-e2e-shell-probe-")); + try { + const artifacts = new ArtifactSink(tmp); + await artifacts.ensureRoot(); + const secret = "spawn-secret-value"; + const controller = new AbortController(); + let abortAdds = 0; + let abortRemoves = 0; + const addEventListener = controller.signal.addEventListener.bind(controller.signal); + const removeEventListener = controller.signal.removeEventListener.bind(controller.signal); + const instrumentedAddEventListener = ( + type: string, + listener: EventListener | EventListenerObject, + options?: AddEventListenerOptions | boolean, + ) => { + if (type === "abort") abortAdds += 1; + return addEventListener(type, listener, options); + }; + const instrumentedRemoveEventListener = ( + type: string, + listener: EventListener | EventListenerObject, + options?: EventListenerOptions | boolean, + ) => { + if (type === "abort") abortRemoves += 1; + return removeEventListener(type, listener, options); + }; + controller.signal.addEventListener = instrumentedAddEventListener as typeof controller.signal.addEventListener; + controller.signal.removeEventListener = instrumentedRemoveEventListener as typeof controller.signal.removeEventListener; + const probe = new ShellProbe({ + artifacts, + redact: (text, extraValues = []) => + [secret, ...extraValues].reduce((redacted, value) => redacted.split(value).join("[REDACTED]"), text), + signal: controller.signal, + }); + + let thrown: unknown; + try { + await probe.run(`missing-command-${secret}`, { + args: [secret], + artifactName: "spawn-error", + redactionValues: [secret], + timeoutMs: 10_000, + }); + } catch (error) { + thrown = error; + } + + expect(thrown).toBeInstanceOf(Error); + const message = thrown instanceof Error ? thrown.message : String(thrown); + expect(message).toContain("[REDACTED]"); + expect(message).not.toContain(secret); + expect(abortAdds).toBe(1); + expect(abortRemoves).toBe(1); + expect(fs.readFileSync(artifacts.pathFor("shell/spawn-error.result.json"), "utf8")).not.toContain(secret); + expect(fs.readFileSync(artifacts.pathFor("shell/spawn-error.stderr.txt"), "utf8")).toContain("[REDACTED]"); + } finally { + fs.rmSync(tmp, { recursive: true, force: true }); + } + }); }); e2eTest("fixture context captures redacted shell artifacts", async ({ diff --git a/test/e2e-scenario/framework/shell-probe.ts b/test/e2e-scenario/framework/shell-probe.ts index 72bfc90774..aec4b5d58a 100644 --- a/test/e2e-scenario/framework/shell-probe.ts +++ b/test/e2e-scenario/framework/shell-probe.ts @@ -40,8 +40,7 @@ export interface ShellProbeDeps { const DEFAULT_TIMEOUT_MS = 60_000; const DEFAULT_KILL_GRACE_MS = 1_000; -function safeArtifactBase(command: string, explicitName?: string): string { - const raw = explicitName ?? command; +function safeArtifactBase(raw: string): string { const safe = raw .trim() .toLowerCase() @@ -50,6 +49,18 @@ function safeArtifactBase(command: string, explicitName?: string): string { return safe || "shell-probe"; } +function errorMessage(error: unknown): string { + return error instanceof Error ? error.message : String(error); +} + +function redactedError(error: unknown, message: string): Error { + const next = new Error(message); + if (error instanceof Error) { + next.name = error.name; + } + return next; +} + export class ShellProbe { private readonly artifacts: ArtifactSink; private readonly redact: (text: string, extraValues?: string[]) => string; @@ -69,6 +80,15 @@ export class ShellProbe { const args = options.args ?? []; const timeoutMs = options.timeoutMs ?? DEFAULT_TIMEOUT_MS; const killGraceMs = options.killGraceMs ?? DEFAULT_KILL_GRACE_MS; + const redactionValues = options.redactionValues ?? []; + const redactProbeText = (text: string) => this.redact(redactText(text, redactionValues)); + const redactedCommand = [command, ...args].map(redactProbeText); + const artifactBase = `shell/${safeArtifactBase(redactProbeText(options.artifactName ?? command))}`; + const writeArtifacts = async (result: Omit): Promise => ({ + stdout: await this.artifacts.writeText(`${artifactBase}.stdout.txt`, result.stdout), + stderr: await this.artifacts.writeText(`${artifactBase}.stderr.txt`, result.stderr), + result: await this.artifacts.writeJson(`${artifactBase}.result.json`, result), + }); const child = spawn(command, args, { cwd: options.cwd, env: options.inheritEnv ? { ...process.env, ...(options.env ?? {}) } : { ...(options.env ?? {}) }, @@ -103,34 +123,50 @@ export class ShellProbe { }, timeoutMs); this.signal.addEventListener("abort", abort, { once: true }); - const { code, signal } = await new Promise<{ code: number | null; signal: NodeJS.Signals | null }>( - (resolve, reject) => { + let childResult: { code: number | null; signal: NodeJS.Signals | null } | undefined; + let childError: unknown; + try { + childResult = await new Promise<{ code: number | null; signal: NodeJS.Signals | null }>((resolve, reject) => { child.on("error", reject); child.on("close", (code, signal) => resolve({ code, signal })); - }, - ); + }); + } catch (error) { + childError = error; + } finally { + clearTimeout(timeout); + if (killTimer) clearTimeout(killTimer); + this.signal.removeEventListener("abort", abort); + } - clearTimeout(timeout); - if (killTimer) clearTimeout(killTimer); - this.signal.removeEventListener("abort", abort); + const redactedStdout = redactProbeText(stdout); + if (childError) { + const redactedMessage = redactProbeText(errorMessage(childError)); + const redactedStderr = redactProbeText([stderr, redactedMessage].filter(Boolean).join("\n")); + await writeArtifacts({ + command: redactedCommand, + exitCode: null, + signal: null, + timedOut, + stdout: redactedStdout, + stderr: redactedStderr, + }); + throw redactedError(childError, redactedMessage); + } - const redactionValues = options.redactionValues ?? []; - const redactedStdout = this.redact(redactText(stdout, redactionValues)); - const redactedStderr = this.redact(redactText(stderr, redactionValues)); - const artifactBase = `shell/${safeArtifactBase(command, options.artifactName)}`; + if (!childResult) { + throw new Error("shell probe child process did not report a result"); + } + + const redactedStderr = redactProbeText(stderr); const result: Omit = { - command: [command, ...args].map((part) => this.redact(redactText(part, redactionValues))), - exitCode: code, - signal, + command: redactedCommand, + exitCode: childResult.code, + signal: childResult.signal, timedOut, stdout: redactedStdout, stderr: redactedStderr, }; - const artifacts = { - stdout: await this.artifacts.writeText(`${artifactBase}.stdout.txt`, redactedStdout), - stderr: await this.artifacts.writeText(`${artifactBase}.stderr.txt`, redactedStderr), - result: await this.artifacts.writeJson(`${artifactBase}.result.json`, result), - }; + const artifacts = await writeArtifacts(result); return { ...result, artifacts }; } } From 5c3cc53b2b6e0a2d1eb8bd9df38ad45c4968c7e5 Mon Sep 17 00:00:00 2001 From: Carlos Villela Date: Mon, 8 Jun 2026 12:35:40 -0700 Subject: [PATCH 6/9] test(e2e): use vitest fixture object api Signed-off-by: Carlos Villela --- test/e2e-scenario/framework/e2e-test.ts | 37 ++++++++++++++----------- 1 file changed, 21 insertions(+), 16 deletions(-) diff --git a/test/e2e-scenario/framework/e2e-test.ts b/test/e2e-scenario/framework/e2e-test.ts index d7f8e10745..b442187f8d 100644 --- a/test/e2e-scenario/framework/e2e-test.ts +++ b/test/e2e-scenario/framework/e2e-test.ts @@ -15,34 +15,39 @@ export interface E2EScenarioFixtures { shellProbe: ShellProbe; } -export const test = base - .extend("artifacts", async ({ task }, { onCleanup }) => { +export const test = base.extend({ + artifacts: async ({ task }, use) => { const artifacts = createArtifactSink(task.name); await artifacts.ensureRoot(); - onCleanup(async () => { + try { + await use(artifacts); + } finally { await artifacts.writeJson("artifact-summary.json", { test: task.name, rootDir: artifacts.rootDir, }); - }); - return artifacts; - }) - .extend("secrets", async ({ skip }) => new SecretStore(process.env, skip)) - .extend("cleanup", async ({ artifacts, secrets }, { onCleanup }) => { + } + }, + secrets: async ({ skip }, use) => { + await use(new SecretStore(process.env, skip)); + }, + cleanup: async ({ artifacts, secrets }, use) => { const cleanup = new CleanupRegistry((text) => secrets.redact(text)); - onCleanup(async () => { + try { + await use(cleanup); + } finally { const result = await cleanup.runAll(); await artifacts.writeJson("cleanup.json", result); assertCleanupPassed(result); - }); - return cleanup; - }) - .extend("shellProbe", async ({ artifacts, secrets, signal }) => { - return new ShellProbe({ + } + }, + shellProbe: async ({ artifacts, secrets, signal }, use) => { + await use(new ShellProbe({ artifacts, redact: (text, extraValues) => secrets.redact(text, extraValues), signal, - }); - }); + })); + }, +}); export { expect }; From 9f305784e37f9aa6eeaf810047bdee9ed38cbff7 Mon Sep 17 00:00:00 2001 From: Carlos Villela Date: Mon, 8 Jun 2026 12:49:40 -0700 Subject: [PATCH 7/9] test(e2e): add shell probe command guardrails Signed-off-by: Carlos Villela --- .../e2e-fixture-context.test.ts | 147 +++++++++++++----- test/e2e-scenario/framework/shell-probe.ts | 74 +++++++-- 2 files changed, 174 insertions(+), 47 deletions(-) diff --git a/test/e2e-scenario/framework-tests/e2e-fixture-context.test.ts b/test/e2e-scenario/framework-tests/e2e-fixture-context.test.ts index 578ba46310..699a27b917 100644 --- a/test/e2e-scenario/framework-tests/e2e-fixture-context.test.ts +++ b/test/e2e-scenario/framework-tests/e2e-fixture-context.test.ts @@ -5,13 +5,13 @@ import fs from "node:fs"; import os from "node:os"; import path from "node:path"; -import { describe, expect, it } from "vitest"; +import { describe, expect, expectTypeOf, it } from "vitest"; import { ArtifactSink } from "../framework/artifacts.ts"; import { assertCleanupPassed, CleanupRegistry } from "../framework/cleanup.ts"; import { test as e2eTest } from "../framework/e2e-test.ts"; import { SecretStore } from "../framework/secrets.ts"; -import { ShellProbe } from "../framework/shell-probe.ts"; +import { ShellProbe, trustedShellCommand, type TrustedShellCommand } from "../framework/shell-probe.ts"; describe("E2E fixture primitives", () => { it("artifact sink writes under its root and rejects traversal", async () => { @@ -82,6 +82,23 @@ describe("E2E fixture primitives", () => { expect(() => store.required("MISSING_SECRET")).toThrow(/missing required E2E secret/); }); + it("shell probe requires trusted command descriptors", () => { + expectTypeOf[0]>().toEqualTypeOf(); + expect(() => + trustedShellCommand({ + command: "node", + reason: "", + }), + ).toThrow(/trusted command reason is required/); + expect(() => + trustedShellCommand({ + command: "node", + args: ["bad\0arg"], + reason: "validate arguments", + }), + ).toThrow(/argument cannot contain NUL bytes/); + }); + it("shell probe cleans up and redacts missing command failures", async () => { const tmp = fs.mkdtempSync(path.join(os.tmpdir(), "nemoclaw-e2e-shell-probe-")); try { @@ -120,12 +137,18 @@ describe("E2E fixture primitives", () => { let thrown: unknown; try { - await probe.run(`missing-command-${secret}`, { - args: [secret], - artifactName: "spawn-error", - redactionValues: [secret], - timeoutMs: 10_000, - }); + await probe.run( + trustedShellCommand({ + command: `missing-command-${secret}`, + args: [secret], + reason: "exercise redacted spawn failure handling", + }), + { + artifactName: "spawn-error", + redactionValues: [secret], + timeoutMs: 10_000, + }, + ); } catch (error) { thrown = error; } @@ -142,6 +165,42 @@ describe("E2E fixture primitives", () => { fs.rmSync(tmp, { recursive: true, force: true }); } }); + + it("shell probe escalates abort-triggered termination", async () => { + const tmp = fs.mkdtempSync(path.join(os.tmpdir(), "nemoclaw-e2e-shell-probe-abort-")); + try { + const artifacts = new ArtifactSink(tmp); + await artifacts.ensureRoot(); + const controller = new AbortController(); + const probe = new ShellProbe({ + artifacts, + redact: (text) => text, + signal: controller.signal, + }); + + const started = Date.now(); + const run = probe.run( + trustedShellCommand({ + command: process.execPath, + args: ["-e", "process.on('SIGTERM', () => {}); setInterval(() => {}, 1000);"], + reason: "exercise abort escalation", + }), + { + artifactName: "abort-escalation", + timeoutMs: 10_000, + killGraceMs: 50, + }, + ); + setTimeout(() => controller.abort(), 50); + const result = await run; + + expect(Date.now() - started).toBeLessThan(2_000); + expect(result.timedOut).toBe(false); + expect(result.signal).toBe("SIGKILL"); + } finally { + fs.rmSync(tmp, { recursive: true, force: true }); + } + }); }); e2eTest("fixture context captures redacted shell artifacts", async ({ @@ -155,17 +214,23 @@ e2eTest("fixture context captures redacted shell artifacts", async ({ }); const secret = "shell-probe-secret-value"; - const result = await shellProbe.run(process.execPath, { - args: [ - "-e", - "console.log(process.env.NEMOCLAW_TEST_TOKEN); console.error(process.argv[1]);", - secret, - ], - artifactName: "redaction-proof", - env: { NEMOCLAW_TEST_TOKEN: secret }, - redactionValues: [secret], - timeoutMs: 5_000, - }); + const result = await shellProbe.run( + trustedShellCommand({ + command: process.execPath, + args: [ + "-e", + "console.log(process.env.NEMOCLAW_TEST_TOKEN); console.error(process.argv[1]);", + secret, + ], + reason: "exercise fixture shell artifact redaction", + }), + { + artifactName: "redaction-proof", + env: { NEMOCLAW_TEST_TOKEN: secret }, + redactionValues: [secret], + timeoutMs: 5_000, + }, + ); expect(fs.readFileSync(marker, "utf8")).toBe("fixture-ready"); expect(result.exitCode).toBe(0); @@ -183,16 +248,22 @@ e2eTest("shell probe uses explicit env and escalates ignored timeouts", async ({ const oldParentSecret = process.env[parentSecretName]; process.env[parentSecretName] = parentSecret; try { - const envResult = await shellProbe.run(process.execPath, { - args: [ - "-e", - `console.log(process.env.${parentSecretName} ?? "missing"); console.log(process.env.NEMOCLAW_TEST_TOKEN);`, - ], - artifactName: "minimal-env", - env: { NEMOCLAW_TEST_TOKEN: explicitSecret }, - redactionValues: [explicitSecret, parentSecret], - timeoutMs: 5_000, - }); + const envResult = await shellProbe.run( + trustedShellCommand({ + command: process.execPath, + args: [ + "-e", + `console.log(process.env.${parentSecretName} ?? "missing"); console.log(process.env.NEMOCLAW_TEST_TOKEN);`, + ], + reason: "exercise explicit shell probe environment", + }), + { + artifactName: "minimal-env", + env: { NEMOCLAW_TEST_TOKEN: explicitSecret }, + redactionValues: [explicitSecret, parentSecret], + timeoutMs: 5_000, + }, + ); expect(envResult.exitCode).toBe(0); expect(envResult.stdout).toContain("missing"); @@ -209,12 +280,18 @@ e2eTest("shell probe uses explicit env and escalates ignored timeouts", async ({ } const started = Date.now(); - const timeoutResult = await shellProbe.run(process.execPath, { - args: ["-e", "process.on('SIGTERM', () => {}); setInterval(() => {}, 1000);"], - artifactName: "timeout-escalation", - timeoutMs: 50, - killGraceMs: 50, - }); + const timeoutResult = await shellProbe.run( + trustedShellCommand({ + command: process.execPath, + args: ["-e", "process.on('SIGTERM', () => {}); setInterval(() => {}, 1000);"], + reason: "exercise timeout escalation", + }), + { + artifactName: "timeout-escalation", + timeoutMs: 50, + killGraceMs: 50, + }, + ); expect(Date.now() - started).toBeLessThan(2_000); expect(timeoutResult.timedOut).toBe(true); diff --git a/test/e2e-scenario/framework/shell-probe.ts b/test/e2e-scenario/framework/shell-probe.ts index aec4b5d58a..c9896548f6 100644 --- a/test/e2e-scenario/framework/shell-probe.ts +++ b/test/e2e-scenario/framework/shell-probe.ts @@ -7,7 +7,6 @@ import type { ArtifactSink } from "./artifacts.ts"; import { redactText } from "./secrets.ts"; export interface ShellProbeRunOptions { - args?: string[]; cwd?: string; env?: NodeJS.ProcessEnv; inheritEnv?: boolean; @@ -17,6 +16,22 @@ export interface ShellProbeRunOptions { redactionValues?: string[]; } +const trustedShellCommandBrand: unique symbol = Symbol("TrustedShellCommand"); + +export interface TrustedShellCommand { + readonly command: string; + readonly args: readonly string[]; + readonly reason: string; + readonly [trustedShellCommandBrand]: true; +} + +export interface TrustedShellCommandInput { + command: string; + args?: string[]; + reason: string; + validate?: (command: string, args: readonly string[]) => void; +} + export interface ShellProbeResult { command: string[]; exitCode: number | null; @@ -61,6 +76,40 @@ function redactedError(error: unknown, message: string): Error { return next; } +function validateShellToken(value: string, label: string): string { + if (value.includes("\0")) { + throw new Error(`shell probe ${label} cannot contain NUL bytes`); + } + return value; +} + +/** + * Declares a shell command as trusted at the fixture/helper boundary. + * + * Build descriptors from constants or typed fixture helpers. Do not pass + * scenario, manifest, PR, or other untrusted values as the executable command. + * Put command-specific argument validation in `validate` when arguments include + * values derived from scenario data. + */ +export function trustedShellCommand(input: TrustedShellCommandInput): TrustedShellCommand { + const command = validateShellToken(input.command.trim(), "command"); + if (!command) { + throw new Error("shell probe command is required"); + } + const reason = input.reason.trim(); + if (!reason) { + throw new Error("shell probe trusted command reason is required"); + } + const args = (input.args ?? []).map((arg) => validateShellToken(arg, "argument")); + input.validate?.(command, args); + return { + command, + args, + reason, + [trustedShellCommandBrand]: true, + }; +} + export class ShellProbe { private readonly artifacts: ArtifactSink; private readonly redact: (text: string, extraValues?: string[]) => string; @@ -72,12 +121,9 @@ export class ShellProbe { this.signal = deps.signal; } - async run(command: string, options: ShellProbeRunOptions = {}): Promise { - if (!command.trim()) { - throw new Error("shell probe command is required"); - } - - const args = options.args ?? []; + async run(trustedCommand: TrustedShellCommand, options: ShellProbeRunOptions = {}): Promise { + const command = trustedCommand.command; + const args = [...trustedCommand.args]; const timeoutMs = options.timeoutMs ?? DEFAULT_TIMEOUT_MS; const killGraceMs = options.killGraceMs ?? DEFAULT_KILL_GRACE_MS; const redactionValues = options.redactionValues ?? []; @@ -108,18 +154,22 @@ export class ShellProbe { stderr += chunk; }); - const abort = () => { - child.kill("SIGTERM"); - }; let killTimer: NodeJS.Timeout | undefined; - const timeout = setTimeout(() => { - timedOut = true; + const terminate = () => { child.kill("SIGTERM"); + if (killTimer) clearTimeout(killTimer); killTimer = setTimeout(() => { if (child.exitCode === null && child.signalCode === null) { child.kill("SIGKILL"); } }, killGraceMs); + }; + const abort = () => { + terminate(); + }; + const timeout = setTimeout(() => { + timedOut = true; + terminate(); }, timeoutMs); this.signal.addEventListener("abort", abort, { once: true }); From a013e040f739bb19a0792b05c01fcddbbbcffd74 Mon Sep 17 00:00:00 2001 From: Carlos Villela Date: Mon, 8 Jun 2026 13:03:16 -0700 Subject: [PATCH 8/9] test(e2e): handle pre-aborted shell probe signals Signed-off-by: Carlos Villela --- .../e2e-fixture-context.test.ts | 35 +++++++++++++++++++ test/e2e-scenario/framework/shell-probe.ts | 6 +++- 2 files changed, 40 insertions(+), 1 deletion(-) diff --git a/test/e2e-scenario/framework-tests/e2e-fixture-context.test.ts b/test/e2e-scenario/framework-tests/e2e-fixture-context.test.ts index 699a27b917..5ed28c0a17 100644 --- a/test/e2e-scenario/framework-tests/e2e-fixture-context.test.ts +++ b/test/e2e-scenario/framework-tests/e2e-fixture-context.test.ts @@ -201,6 +201,41 @@ describe("E2E fixture primitives", () => { fs.rmSync(tmp, { recursive: true, force: true }); } }); + + it("shell probe terminates pre-aborted signals immediately", async () => { + const tmp = fs.mkdtempSync(path.join(os.tmpdir(), "nemoclaw-e2e-shell-probe-pre-abort-")); + try { + const artifacts = new ArtifactSink(tmp); + await artifacts.ensureRoot(); + const controller = new AbortController(); + controller.abort(); + const probe = new ShellProbe({ + artifacts, + redact: (text) => text, + signal: controller.signal, + }); + + const started = Date.now(); + const result = await probe.run( + trustedShellCommand({ + command: process.execPath, + args: ["-e", "process.on('SIGTERM', () => {}); setInterval(() => {}, 1000);"], + reason: "exercise pre-aborted signal termination", + }), + { + artifactName: "pre-abort-escalation", + timeoutMs: 10_000, + killGraceMs: 50, + }, + ); + + expect(Date.now() - started).toBeLessThan(2_000); + expect(result.timedOut).toBe(false); + expect(result.signal).toBeTruthy(); + } finally { + fs.rmSync(tmp, { recursive: true, force: true }); + } + }); }); e2eTest("fixture context captures redacted shell artifacts", async ({ diff --git a/test/e2e-scenario/framework/shell-probe.ts b/test/e2e-scenario/framework/shell-probe.ts index c9896548f6..13e6f78a6f 100644 --- a/test/e2e-scenario/framework/shell-probe.ts +++ b/test/e2e-scenario/framework/shell-probe.ts @@ -171,7 +171,11 @@ export class ShellProbe { timedOut = true; terminate(); }, timeoutMs); - this.signal.addEventListener("abort", abort, { once: true }); + if (this.signal.aborted) { + abort(); + } else { + this.signal.addEventListener("abort", abort, { once: true }); + } let childResult: { code: number | null; signal: NodeJS.Signals | null } | undefined; let childError: unknown; From 929efde562ba4fbfcde10e44cfa16db8faf58702 Mon Sep 17 00:00:00 2001 From: Carlos Villela Date: Mon, 8 Jun 2026 13:20:47 -0700 Subject: [PATCH 9/9] test(e2e): align fixture bridge with scenario infra Signed-off-by: Carlos Villela --- .../e2e-fixture-context.test.ts | 63 +++++++++++++++++++ .../framework-tests/e2e-probes.test.ts | 32 ++++++++++ test/e2e-scenario/framework/secrets.ts | 18 +++++- test/e2e-scenario/framework/shell-probe.ts | 38 +++++++++-- .../scenarios/probes/injection-blocked.ts | 6 +- .../scenarios/probes/network-policy.ts | 2 +- .../scenarios/probes/shields-config.ts | 18 +++--- test/e2e-scenario/scenarios/probes/util.ts | 17 ++++- 8 files changed, 173 insertions(+), 21 deletions(-) diff --git a/test/e2e-scenario/framework-tests/e2e-fixture-context.test.ts b/test/e2e-scenario/framework-tests/e2e-fixture-context.test.ts index 5ed28c0a17..a18108220d 100644 --- a/test/e2e-scenario/framework-tests/e2e-fixture-context.test.ts +++ b/test/e2e-scenario/framework-tests/e2e-fixture-context.test.ts @@ -13,6 +13,26 @@ import { test as e2eTest } from "../framework/e2e-test.ts"; import { SecretStore } from "../framework/secrets.ts"; import { ShellProbe, trustedShellCommand, type TrustedShellCommand } from "../framework/shell-probe.ts"; +const delay = (ms: number): Promise => new Promise((resolve) => setTimeout(resolve, ms)); + +function isProcessAlive(pid: number): boolean { + try { + process.kill(pid, 0); + return true; + } catch (error) { + return (error as NodeJS.ErrnoException).code !== "ESRCH"; + } +} + +async function expectProcessToExit(pid: number, timeoutMs = 2_000): Promise { + const deadline = Date.now() + timeoutMs; + while (Date.now() < deadline) { + if (!isProcessAlive(pid)) return; + await delay(25); + } + throw new Error(`process ${pid} was still alive after ${timeoutMs}ms`); +} + describe("E2E fixture primitives", () => { it("artifact sink writes under its root and rejects traversal", async () => { const tmp = fs.mkdtempSync(path.join(os.tmpdir(), "nemoclaw-e2e-artifacts-")); @@ -70,6 +90,7 @@ describe("E2E fixture primitives", () => { }); it("secret store redacts sensitive env values and skips missing required secrets", () => { + const canonicalToken = `${"nv"}${"api"}-${"a".repeat(24)}`; const store = new SecretStore( { NVIDIA_API_KEY: "nv-secret", PLAIN_VALUE: "visible" }, (note?: string): never => { @@ -79,6 +100,8 @@ describe("E2E fixture primitives", () => { expect(store.optional("PLAIN_VALUE")).toBe("visible"); expect(store.redact("token=nv-secret plain=visible")).toBe("token=[REDACTED] plain=visible"); + expect(store.redact(`printed ${canonicalToken}`)).toContain(""); + expect(store.redact(`printed ${canonicalToken}`)).not.toContain(canonicalToken); expect(() => store.required("MISSING_SECRET")).toThrow(/missing required E2E secret/); }); @@ -236,6 +259,46 @@ describe("E2E fixture primitives", () => { fs.rmSync(tmp, { recursive: true, force: true }); } }); + + it("shell probe reaps timed-out command process groups", async () => { + const tmp = fs.mkdtempSync(path.join(os.tmpdir(), "nemoclaw-e2e-shell-probe-pgid-")); + let grandchildPid: number | undefined; + try { + const artifacts = new ArtifactSink(tmp); + await artifacts.ensureRoot(); + const controller = new AbortController(); + const probe = new ShellProbe({ + artifacts, + redact: (text) => text, + signal: controller.signal, + }); + const pidFile = path.join(tmp, "sleep.pid"); + + const result = await probe.run( + trustedShellCommand({ + command: "bash", + args: ["-c", 'sleep 30 & echo "$!" > "$1"; wait', "e2e-shell-probe", pidFile], + reason: "exercise process-group timeout cleanup", + }), + { + artifactName: "process-group-timeout", + timeoutMs: 200, + killGraceMs: 50, + }, + ); + + grandchildPid = Number(fs.readFileSync(pidFile, "utf8").trim()); + expect(Number.isInteger(grandchildPid)).toBe(true); + expect(result.timedOut).toBe(true); + expect(result.signal).toBeTruthy(); + await expectProcessToExit(grandchildPid); + } finally { + if (grandchildPid && isProcessAlive(grandchildPid)) { + process.kill(grandchildPid, "SIGKILL"); + } + fs.rmSync(tmp, { recursive: true, force: true }); + } + }); }); e2eTest("fixture context captures redacted shell artifacts", async ({ diff --git a/test/e2e-scenario/framework-tests/e2e-probes.test.ts b/test/e2e-scenario/framework-tests/e2e-probes.test.ts index db90b47798..8b96ec4b2c 100644 --- a/test/e2e-scenario/framework-tests/e2e-probes.test.ts +++ b/test/e2e-scenario/framework-tests/e2e-probes.test.ts @@ -14,6 +14,7 @@ import { } from "../scenarios/probes/registry.ts"; import type { ProbeContext, ProbeOutcome } from "../scenarios/probes/types.ts"; import { registerBuiltinProbes } from "../scenarios/probes/builtin.ts"; +import { writeProbeEvidence } from "../scenarios/probes/util.ts"; const REPO_ROOT = path.resolve(path.dirname(fileURLToPath(import.meta.url)), "../../.."); @@ -85,6 +86,37 @@ describe("probe registry", () => { }); }); +describe("probe evidence writer", () => { + it("writes evidence under the context dir and ignores escape paths", () => { + const tmp = fs.mkdtempSync(path.join(os.tmpdir(), "probe-evidence-root-")); + const contextDir = path.join(tmp, "ctx"); + fs.mkdirSync(contextDir, { recursive: true }); + try { + const insidePath = path.join(contextDir, "nested", "evidence.json"); + const insideCtx: ProbeContext = { + contextDir, + evidencePath: insidePath, + contextEnv: {}, + sandboxName: null, + gatewayUrl: null, + repoRoot: REPO_ROOT, + }; + writeProbeEvidence(insideCtx, { ok: true }); + expect(JSON.parse(fs.readFileSync(insidePath, "utf8"))).toEqual({ ok: true }); + + const outsidePath = path.join(tmp, "escape.json"); + const escapingCtx: ProbeContext = { + ...insideCtx, + evidencePath: outsidePath, + }; + writeProbeEvidence(escapingCtx, { ok: false }); + expect(fs.existsSync(outsidePath)).toBe(false); + } finally { + fs.rmSync(tmp, { recursive: true, force: true }); + } + }); +}); + // ───────────────────────────────────────────────────────────────────────────── // diagnosticsProbe — uses a fake `nemoclaw` on PATH so this test runs // reproducibly without depending on a real nemoclaw install. diff --git a/test/e2e-scenario/framework/secrets.ts b/test/e2e-scenario/framework/secrets.ts index 5d5f540420..e6ab9daf99 100644 --- a/test/e2e-scenario/framework/secrets.ts +++ b/test/e2e-scenario/framework/secrets.ts @@ -1,15 +1,29 @@ // SPDX-FileCopyrightText: Copyright (c) 2026 NVIDIA CORPORATION & AFFILIATES. All rights reserved. // SPDX-License-Identifier: Apache-2.0 +import { redactString } from "../scenarios/orchestrators/redaction.ts"; + const SENSITIVE_NAME_PATTERN = /(api[_-]?key|token|secret|password|credential)/i; +const EXPLICIT_SECRET_REDACTION = "[REDACTED]"; + +/** + * Bridge-only fixture secret helper. + * + * The Vitest fixture layer still needs a small SecretStore while the scenario + * runner migration is in flight; #4989 tracks consolidating it into shared E2E + * framework infra. Canonical secret-shaped token matching belongs to + * scenarios/orchestrators/redaction.ts. Keep explicit fixture secret-value + * replacement here and always layer the parity-tested framework redactor + * underneath it so this path does not become a second pattern source. + */ export function redactText(text: string, secretValues: Iterable): string { let redacted = text; for (const value of secretValues) { if (!value) continue; - redacted = redacted.split(value).join("[REDACTED]"); + redacted = redacted.split(value).join(EXPLICIT_SECRET_REDACTION); } - return redacted; + return redactString(redacted); } export class SecretStore { diff --git a/test/e2e-scenario/framework/shell-probe.ts b/test/e2e-scenario/framework/shell-probe.ts index 13e6f78a6f..540f9bd372 100644 --- a/test/e2e-scenario/framework/shell-probe.ts +++ b/test/e2e-scenario/framework/shell-probe.ts @@ -6,6 +6,16 @@ import { spawn } from "node:child_process"; import type { ArtifactSink } from "./artifacts.ts"; import { redactText } from "./secrets.ts"; +/** + * Bridge-only host shell probe for the Vitest fixture migration. + * + * The end state is a shared spawn/evidence helper consumed by both this + * fixture layer and scenarios/orchestrators; #4988 tracks that consolidation. + * Until it lands, this probe mirrors the hardened shell boundary: trusted + * descriptors, NUL-byte rejection, explicit env by default, canonical + * redaction, and detached process-group termination for timeout/abort cleanup. + */ + export interface ShellProbeRunOptions { cwd?: string; env?: NodeJS.ProcessEnv; @@ -137,9 +147,11 @@ export class ShellProbe { }); const child = spawn(command, args, { cwd: options.cwd, + detached: true, env: options.inheritEnv ? { ...process.env, ...(options.env ?? {}) } : { ...(options.env ?? {}) }, stdio: ["ignore", "pipe", "pipe"], }); + const pgid = child.pid; let stdout = ""; let stderr = ""; @@ -155,14 +167,30 @@ export class ShellProbe { }); let killTimer: NodeJS.Timeout | undefined; + let terminationStarted = false; + const signalProcessGroup = (signal: NodeJS.Signals) => { + if (typeof pgid === "number") { + try { + process.kill(-pgid, signal); + return; + } catch { + /* fall back to the leader below */ + } + } + try { + child.kill(signal); + } catch { + /* already gone */ + } + }; const terminate = () => { - child.kill("SIGTERM"); + terminationStarted = true; + signalProcessGroup("SIGTERM"); if (killTimer) clearTimeout(killTimer); killTimer = setTimeout(() => { - if (child.exitCode === null && child.signalCode === null) { - child.kill("SIGKILL"); - } + signalProcessGroup("SIGKILL"); }, killGraceMs); + killTimer.unref(); }; const abort = () => { terminate(); @@ -188,7 +216,7 @@ export class ShellProbe { childError = error; } finally { clearTimeout(timeout); - if (killTimer) clearTimeout(killTimer); + if (killTimer && !terminationStarted) clearTimeout(killTimer); this.signal.removeEventListener("abort", abort); } diff --git a/test/e2e-scenario/scenarios/probes/injection-blocked.ts b/test/e2e-scenario/scenarios/probes/injection-blocked.ts index d1acf8ab3d..e1d4f7d854 100644 --- a/test/e2e-scenario/scenarios/probes/injection-blocked.ts +++ b/test/e2e-scenario/scenarios/probes/injection-blocked.ts @@ -102,7 +102,7 @@ export const injectionBlockedProbe: ProbeFn = async (ctx: ProbeContext): Promise evidence.echoStderrTail = echoResult.stderr; if (echoResult.exitCode !== 0) { - writeProbeEvidence(ctx.evidencePath, evidence); + writeProbeEvidence(ctx, evidence); return { status: "failed", classifier: echoResult.signal === "SIGTERM" ? "gateway-transient" : undefined, @@ -112,7 +112,7 @@ export const injectionBlockedProbe: ProbeFn = async (ctx: ProbeContext): Promise evidence.payloadPreservedLiterally = echoResult.stdout.includes(payload); if (!evidence.payloadPreservedLiterally) { - writeProbeEvidence(ctx.evidencePath, evidence); + writeProbeEvidence(ctx, evidence); return { status: "failed", message: `injectionBlockedProbe: payload was not preserved literally; stdout tail: ${echoResult.stdout.slice(-300)}`, @@ -139,7 +139,7 @@ export const injectionBlockedProbe: ProbeFn = async (ctx: ProbeContext): Promise perCallSeconds: PER_CALL_SECONDS, }); - writeProbeEvidence(ctx.evidencePath, evidence); + writeProbeEvidence(ctx, evidence); if (!evidence.markerAbsent) { return { diff --git a/test/e2e-scenario/scenarios/probes/network-policy.ts b/test/e2e-scenario/scenarios/probes/network-policy.ts index c3bb50923c..33c51ac3cb 100644 --- a/test/e2e-scenario/scenarios/probes/network-policy.ts +++ b/test/e2e-scenario/scenarios/probes/network-policy.ts @@ -92,7 +92,7 @@ export const networkPolicyProbe: ProbeFn = async (ctx: ProbeContext): Promise